pixeltable 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- {pixeltable-0.2.1 → pixeltable-0.2.3}/PKG-INFO +9 -7
- {pixeltable-0.2.1 → pixeltable-0.2.3}/README.md +3 -3
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/dataframe.py +1 -1
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/env.py +16 -14
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/cache_prefetch_node.py +14 -11
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/data_row.py +14 -6
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/iterators/document.py +1 -1
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/store.py +15 -10
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/conftest.py +3 -1
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_component_view.py +6 -8
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_dataframe.py +7 -1
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_document.py +3 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_exprs.py +2 -1
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_functions.py +5 -4
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_table.py +6 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_video.py +2 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/utils.py +15 -4
- pixeltable-0.2.3/pixeltable/tool/create_test_video.py +81 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/type_system.py +1 -1
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pyproject.toml +6 -7
- {pixeltable-0.2.1 → pixeltable-0.2.3}/LICENSE +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/catalog.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/column.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/dir.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/globals.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/insertable_table.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/named_function.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/path_dict.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/schema_object.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/table.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/table_version.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/table_version_path.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/catalog/view.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/client.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/aggregation_node.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/component_iteration_node.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/data_row_batch.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/exec_node.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/expr_eval_node.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/in_memory_data_node.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/media_validation_node.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exec/sql_scan_node.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/arithmetic_expr.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/array_slice.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/column_property_ref.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/column_ref.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/comparison.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/compound_predicate.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/expr.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/expr_set.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/function_call.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/globals.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/image_member_access.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/image_similarity_predicate.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/inline_array.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/inline_dict.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/is_null.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/json_mapper.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/json_path.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/literal.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/object_ref.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/predicate.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/row_builder.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/rowid_ref.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/type_cast.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/exprs/variable.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/aggregate_function.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/batched_function.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/callable_function.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/expr_template_function.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/function.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/function_registry.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/globals.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/nos_function.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/signature.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/func/udf.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/eval.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/fireworks.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/huggingface.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/image.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/openai.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/pil/image.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/string.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/together.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/util.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/functions/video.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/iterators/base.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/iterators/video.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/metadata/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/metadata/schema.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/plan.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_audio.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_catalog.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_client.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_dirs.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_function.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_migration.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_nos.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_snapshot.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_transactional_directory.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_types.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tests/test_view.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/tool/create_test_db_dump.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/clip.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/coco.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/documents.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/filecache.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/help.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/parquet.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/pytorch.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/s3.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.2.1 → pixeltable-0.2.3}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Marcel Kornacker
|
|
6
6
|
Author-email: marcelk@gmail.com
|
|
@@ -11,19 +11,21 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
13
|
Requires-Dist: av (>=10.0.0)
|
|
14
|
+
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
14
15
|
Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
15
16
|
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
16
17
|
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
17
|
-
Requires-Dist: numpy (>=1.
|
|
18
|
+
Requires-Dist: numpy (>=1.26,<2.0)
|
|
18
19
|
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
19
|
-
Requires-Dist: pandas (>=
|
|
20
|
-
Requires-Dist: pgserver (==0.0.
|
|
20
|
+
Requires-Dist: pandas (>=2.0,<3.0)
|
|
21
|
+
Requires-Dist: pgserver (==0.0.9)
|
|
21
22
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
22
23
|
Requires-Dist: pillow (>=9.4.0,<10.0.0)
|
|
23
24
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
24
25
|
Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
|
|
25
26
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
26
27
|
Requires-Dist: regex (>=2022.10.31,<2023.0.0)
|
|
28
|
+
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
27
29
|
Requires-Dist: sqlalchemy-utils (>=0.41.1,<0.42.0)
|
|
28
30
|
Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
|
|
29
31
|
Requires-Dist: tqdm (>=4.64.1,<5.0.0)
|
|
@@ -45,16 +47,16 @@ data plumbing.
|
|
|
45
47
|
It brings together data storage, versioning, and indexing with orchestration and model
|
|
46
48
|
versioning under a declarative table interface, with transformations, model inference,
|
|
47
49
|
and custom logic represented as computed columns.
|
|
48
|
-
|
|
50
|
+
|
|
49
51
|
## Quick Start
|
|
50
52
|
|
|
51
53
|
If you just want to play around with Pixeltable to see what it's capable of, the easiest way is to run
|
|
52
54
|
the Pixeltable Basics tutorial in colab:
|
|
53
55
|
|
|
54
|
-
<a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/docs/tutorials/pixeltable-basics.ipynb">
|
|
56
|
+
<a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb">
|
|
55
57
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
56
58
|
</a>
|
|
57
|
-
|
|
59
|
+
|
|
58
60
|
## Installation
|
|
59
61
|
|
|
60
62
|
Pixeltable works with Python 3.9, 3.10, or 3.11 running on Linux or MacOS.
|
|
@@ -14,16 +14,16 @@ data plumbing.
|
|
|
14
14
|
It brings together data storage, versioning, and indexing with orchestration and model
|
|
15
15
|
versioning under a declarative table interface, with transformations, model inference,
|
|
16
16
|
and custom logic represented as computed columns.
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
## Quick Start
|
|
19
19
|
|
|
20
20
|
If you just want to play around with Pixeltable to see what it's capable of, the easiest way is to run
|
|
21
21
|
the Pixeltable Basics tutorial in colab:
|
|
22
22
|
|
|
23
|
-
<a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/docs/tutorials/pixeltable-basics.ipynb">
|
|
23
|
+
<a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb">
|
|
24
24
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
25
25
|
</a>
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
## Installation
|
|
28
28
|
|
|
29
29
|
Pixeltable works with Python 3.9, 3.10, or 3.11 running on Linux or MacOS.
|
|
@@ -39,7 +39,7 @@ def _format_img(img: object) -> str:
|
|
|
39
39
|
with io.BytesIO() as buffer:
|
|
40
40
|
img.save(buffer, 'jpeg')
|
|
41
41
|
img_base64 = base64.b64encode(buffer.getvalue()).decode()
|
|
42
|
-
return f'<div style
|
|
42
|
+
return f'<div style="width:200px;"><img src="data:image/jpeg;base64,{img_base64}" width="200" /></div>'
|
|
43
43
|
|
|
44
44
|
def _create_source_tag(file_path: str) -> str:
|
|
45
45
|
abs_path = Path(file_path)
|
|
@@ -92,7 +92,7 @@ class Env:
|
|
|
92
92
|
def db_url(self) -> str:
|
|
93
93
|
assert self._db_url is not None
|
|
94
94
|
return self._db_url
|
|
95
|
-
|
|
95
|
+
|
|
96
96
|
@property
|
|
97
97
|
def http_address(self) -> str:
|
|
98
98
|
assert self._http_address is not None
|
|
@@ -142,7 +142,7 @@ class Env:
|
|
|
142
142
|
def set_up(self, echo: bool = False, reinit_db: bool = False) -> None:
|
|
143
143
|
if self._initialized:
|
|
144
144
|
return
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
self._initialized = True
|
|
147
147
|
home = Path(os.environ.get('PIXELTABLE_HOME', str(Path.home() / '.pixeltable')))
|
|
148
148
|
assert self._home is None or self._home == home
|
|
@@ -153,7 +153,6 @@ class Env:
|
|
|
153
153
|
self._dataset_cache_dir = self._home / 'dataset_cache'
|
|
154
154
|
self._log_dir = self._home / 'logs'
|
|
155
155
|
self._tmp_dir = self._home / 'tmp'
|
|
156
|
-
self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
|
|
157
156
|
|
|
158
157
|
# Read in the config
|
|
159
158
|
if os.path.isfile(self._config_file):
|
|
@@ -204,8 +203,9 @@ class Env:
|
|
|
204
203
|
os.remove(path)
|
|
205
204
|
|
|
206
205
|
self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
|
|
206
|
+
self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
|
|
207
207
|
|
|
208
|
-
# cleanup_mode=None will leave db on for debugging purposes
|
|
208
|
+
# in pgserver.get_server(): cleanup_mode=None will leave db on for debugging purposes
|
|
209
209
|
self._db_server = pgserver.get_server(self._pgdata_dir, cleanup_mode=None)
|
|
210
210
|
self._db_url = self._db_server.get_uri(database=self._db_name)
|
|
211
211
|
|
|
@@ -257,16 +257,17 @@ class Env:
|
|
|
257
257
|
_ = create_nos_modules()
|
|
258
258
|
|
|
259
259
|
def _create_openai_client(self) -> None:
|
|
260
|
+
if not self.is_installed_package('openai'):
|
|
261
|
+
raise excs.Error('OpenAI client not initialized (cannot find package `openai`: `pip install openai`?)')
|
|
262
|
+
import openai
|
|
260
263
|
if 'openai' in self._config and 'api_key' in self._config['openai']:
|
|
261
264
|
api_key = self._config['openai']['api_key']
|
|
262
265
|
else:
|
|
263
266
|
api_key = os.environ.get('OPENAI_API_KEY')
|
|
264
267
|
if api_key is None or api_key == '':
|
|
265
|
-
|
|
266
|
-
return
|
|
267
|
-
import openai
|
|
268
|
-
self._logger.info('Initializing OpenAI client.')
|
|
268
|
+
raise excs.Error('OpenAI client not initialized (no API key configured).')
|
|
269
269
|
self._openai_client = openai.OpenAI(api_key=api_key)
|
|
270
|
+
self._logger.info('Initialized OpenAI client.')
|
|
270
271
|
|
|
271
272
|
def _create_together_client(self) -> None:
|
|
272
273
|
if 'together' in self._config and 'api_key' in self._config['together']:
|
|
@@ -285,15 +286,15 @@ class Env:
|
|
|
285
286
|
"""
|
|
286
287
|
The http server root is the file system root.
|
|
287
288
|
eg: /home/media/foo.mp4 is located at http://127.0.0.1:{port}/home/media/foo.mp4
|
|
288
|
-
This arrangement enables serving media hosted within _home,
|
|
289
|
+
This arrangement enables serving media hosted within _home,
|
|
289
290
|
as well as external media inserted into pixeltable or produced by pixeltable.
|
|
290
291
|
The port is chosen dynamically to prevent conflicts.
|
|
291
|
-
"""
|
|
292
|
+
"""
|
|
292
293
|
# Port 0 means OS picks one for us.
|
|
293
294
|
address = ("127.0.0.1", 0)
|
|
294
295
|
class FixedRootHandler(http.server.SimpleHTTPRequestHandler):
|
|
295
296
|
def __init__(self, *args, **kwargs):
|
|
296
|
-
super().__init__(*args, directory='/', **kwargs)
|
|
297
|
+
super().__init__(*args, directory='/', **kwargs)
|
|
297
298
|
self._httpd = socketserver.TCPServer(address, FixedRootHandler)
|
|
298
299
|
port = self._httpd.server_address[1]
|
|
299
300
|
self._http_address = f'http://127.0.0.1:{port}'
|
|
@@ -330,8 +331,6 @@ class Env:
|
|
|
330
331
|
self._spacy_nlp = spacy.load('en_core_web_sm')
|
|
331
332
|
check('tiktoken')
|
|
332
333
|
check('openai')
|
|
333
|
-
if self.is_installed_package('openai'):
|
|
334
|
-
self._create_openai_client()
|
|
335
334
|
check('together')
|
|
336
335
|
if self.is_installed_package('together'):
|
|
337
336
|
self._create_together_client()
|
|
@@ -401,7 +400,10 @@ class Env:
|
|
|
401
400
|
return self._nos_client
|
|
402
401
|
|
|
403
402
|
@property
|
|
404
|
-
def openai_client(self) ->
|
|
403
|
+
def openai_client(self) -> 'openai.OpenAI':
|
|
404
|
+
if self._openai_client is None:
|
|
405
|
+
self._create_openai_client()
|
|
406
|
+
assert self._openai_client is not None
|
|
405
407
|
return self._openai_client
|
|
406
408
|
|
|
407
409
|
@property
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
import logging
|
|
3
5
|
import threading
|
|
6
|
+
import urllib.parse
|
|
7
|
+
import urllib.request
|
|
4
8
|
from collections import defaultdict
|
|
5
|
-
from uuid import UUID
|
|
6
|
-
import concurrent
|
|
7
|
-
import logging
|
|
8
|
-
import urllib
|
|
9
9
|
from pathlib import Path
|
|
10
|
+
from typing import List, Optional, Any, Tuple, Dict
|
|
11
|
+
from uuid import UUID
|
|
10
12
|
|
|
11
|
-
from .data_row_batch import DataRowBatch
|
|
12
|
-
from .exec_node import ExecNode
|
|
13
|
-
import pixeltable.exprs as exprs
|
|
14
|
-
from pixeltable.utils.filecache import FileCache
|
|
15
13
|
import pixeltable.env as env
|
|
16
14
|
import pixeltable.exceptions as excs
|
|
15
|
+
import pixeltable.exprs as exprs
|
|
16
|
+
from pixeltable.utils.filecache import FileCache
|
|
17
|
+
from .data_row_batch import DataRowBatch
|
|
18
|
+
from .exec_node import ExecNode
|
|
17
19
|
|
|
18
20
|
_logger = logging.getLogger('pixeltable')
|
|
19
21
|
|
|
@@ -81,7 +83,9 @@ class CachePrefetchNode(ExecNode):
|
|
|
81
83
|
"""Fetches a remote URL into Env.tmp_dir and returns its path"""
|
|
82
84
|
url = row.file_urls[slot_idx]
|
|
83
85
|
parsed = urllib.parse.urlparse(url)
|
|
84
|
-
|
|
86
|
+
# Use len(parsed.scheme) > 1 here to ensure we're not being passed
|
|
87
|
+
# a Windows filename
|
|
88
|
+
assert len(parsed.scheme) > 1 and parsed.scheme != 'file'
|
|
85
89
|
# preserve the file extension, if there is one
|
|
86
90
|
extension = ''
|
|
87
91
|
if parsed.path != '':
|
|
@@ -95,7 +99,6 @@ class CachePrefetchNode(ExecNode):
|
|
|
95
99
|
if self.boto_client is None:
|
|
96
100
|
self.boto_client = get_client()
|
|
97
101
|
self.boto_client.download_file(parsed.netloc, parsed.path.lstrip('/'), str(tmp_path))
|
|
98
|
-
return tmp_path
|
|
99
102
|
elif parsed.scheme == 'http' or parsed.scheme == 'https':
|
|
100
103
|
with urllib.request.urlopen(url) as resp, open(tmp_path, 'wb') as f:
|
|
101
104
|
data = resp.read()
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import io
|
|
4
|
-
import urllib
|
|
4
|
+
import urllib.parse
|
|
5
|
+
import urllib.request
|
|
6
|
+
from typing import Optional, List, Any, Tuple
|
|
5
7
|
|
|
6
8
|
import PIL
|
|
7
9
|
import numpy as np
|
|
@@ -104,6 +106,7 @@ class DataRow:
|
|
|
104
106
|
assert self.file_paths[index] is not None
|
|
105
107
|
if self.vals[index] is None:
|
|
106
108
|
self.vals[index] = PIL.Image.open(self.file_paths[index])
|
|
109
|
+
self.vals[index].load()
|
|
107
110
|
|
|
108
111
|
return self.vals[index]
|
|
109
112
|
|
|
@@ -137,14 +140,19 @@ class DataRow:
|
|
|
137
140
|
if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
|
|
138
141
|
# this is either a local file path or a URL
|
|
139
142
|
parsed = urllib.parse.urlparse(val)
|
|
140
|
-
if
|
|
143
|
+
# Determine if this is a local file or a remote URL. If the scheme length is <= 1,
|
|
144
|
+
# we assume it's a local file. (This is because a Windows path will be interpreted
|
|
145
|
+
# by urllib as a URL with scheme equal to the drive letter.)
|
|
146
|
+
if len(parsed.scheme) <= 1 or parsed.scheme == 'file':
|
|
141
147
|
# local file path
|
|
142
148
|
assert self.file_urls[idx] is None and self.file_paths[idx] is None
|
|
143
|
-
if parsed.scheme
|
|
144
|
-
self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(
|
|
149
|
+
if len(parsed.scheme) <= 1:
|
|
150
|
+
self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(val))
|
|
151
|
+
self.file_paths[idx] = val
|
|
145
152
|
else:
|
|
146
153
|
self.file_urls[idx] = val
|
|
147
|
-
|
|
154
|
+
# Wrap the path in a url2pathname() call to ensure proper handling on Windows.
|
|
155
|
+
self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
|
|
148
156
|
else:
|
|
149
157
|
# URL
|
|
150
158
|
assert self.file_urls[idx] is None
|
|
@@ -61,7 +61,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
61
61
|
import bs4
|
|
62
62
|
if html_skip_tags is None:
|
|
63
63
|
html_skip_tags = ['nav']
|
|
64
|
-
with open(document, 'r') as fh:
|
|
64
|
+
with open(document, 'r', encoding='utf8') as fh:
|
|
65
65
|
s = fh.read()
|
|
66
66
|
self._doc_handle = get_document_handle(s)
|
|
67
67
|
assert self._doc_handle is not None
|
|
@@ -1,24 +1,26 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import abc
|
|
4
|
+
import logging
|
|
3
5
|
import os
|
|
4
6
|
import sys
|
|
7
|
+
import urllib.parse
|
|
8
|
+
import urllib.request
|
|
5
9
|
import warnings
|
|
6
10
|
from typing import Optional, Dict, Any, List, Tuple, Set
|
|
7
|
-
|
|
8
|
-
import urllib
|
|
11
|
+
|
|
9
12
|
import sqlalchemy as sql
|
|
10
13
|
from tqdm import tqdm, TqdmWarning
|
|
11
|
-
import abc
|
|
12
14
|
|
|
13
15
|
import pixeltable.catalog as catalog
|
|
16
|
+
import pixeltable.env as env
|
|
17
|
+
from pixeltable import exprs
|
|
18
|
+
import pixeltable.exceptions as excs
|
|
19
|
+
from pixeltable.exec import ExecNode
|
|
14
20
|
from pixeltable.metadata import schema
|
|
15
21
|
from pixeltable.type_system import StringType
|
|
16
|
-
from pixeltable.exec import ExecNode
|
|
17
|
-
from pixeltable import exprs
|
|
18
|
-
from pixeltable.utils.sql import log_stmt, log_explain
|
|
19
|
-
import pixeltable.env as env
|
|
20
22
|
from pixeltable.utils.media_store import MediaStore
|
|
21
|
-
|
|
23
|
+
from pixeltable.utils.sql import log_stmt, log_explain
|
|
22
24
|
|
|
23
25
|
_logger = logging.getLogger('pixeltable')
|
|
24
26
|
|
|
@@ -121,10 +123,13 @@ class StoreBase:
|
|
|
121
123
|
if file_url is None:
|
|
122
124
|
return None
|
|
123
125
|
parsed = urllib.parse.urlparse(file_url)
|
|
124
|
-
|
|
126
|
+
# We should never be passed a local file path here. The "len > 1" ensures that Windows
|
|
127
|
+
# file paths aren't mistaken for URLs with a single-character scheme.
|
|
128
|
+
assert len(parsed.scheme) > 1
|
|
129
|
+
if parsed.scheme != 'file':
|
|
125
130
|
# remote url
|
|
126
131
|
return file_url
|
|
127
|
-
file_path = urllib.parse.unquote(parsed.path)
|
|
132
|
+
file_path = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
|
|
128
133
|
if not file_path.startswith(pxt_tmp_dir):
|
|
129
134
|
# not a tmp file
|
|
130
135
|
return file_url
|
|
@@ -23,7 +23,7 @@ def init_env(tmp_path_factory) -> None:
|
|
|
23
23
|
from pixeltable.env import Env
|
|
24
24
|
# set the relevant env vars for Client() to connect to the test db
|
|
25
25
|
|
|
26
|
-
shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', '
|
|
26
|
+
shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', str(pathlib.Path.home() / '.pixeltable')))
|
|
27
27
|
home_dir = str(tmp_path_factory.mktemp('base') / '.pixeltable')
|
|
28
28
|
os.environ['PIXELTABLE_HOME'] = home_dir
|
|
29
29
|
os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.yaml')
|
|
@@ -31,6 +31,8 @@ def init_env(tmp_path_factory) -> None:
|
|
|
31
31
|
os.environ['PIXELTABLE_DB'] = test_db
|
|
32
32
|
os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
|
|
33
33
|
|
|
34
|
+
# ensure this home dir exits
|
|
35
|
+
shared_home.mkdir(parents=True, exist_ok=True)
|
|
34
36
|
# this also runs create_all()
|
|
35
37
|
Env.get().set_up(echo=True)
|
|
36
38
|
yield
|
|
@@ -9,10 +9,9 @@ import pixeltable as pxt
|
|
|
9
9
|
from pixeltable import exceptions as excs
|
|
10
10
|
from pixeltable.iterators import ComponentIterator
|
|
11
11
|
from pixeltable.iterators.video import FrameIterator
|
|
12
|
-
from pixeltable.tests.utils import assert_resultset_eq,
|
|
12
|
+
from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files
|
|
13
13
|
from pixeltable.type_system import IntType, VideoType, JsonType
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
class ConstantImgIterator(ComponentIterator):
|
|
17
16
|
"""Component iterator that generates a fixed number of all-black 1280x720 images."""
|
|
18
17
|
def __init__(self, video: str, num_frames: int = 10):
|
|
@@ -59,14 +58,13 @@ class ConstantImgIterator(ComponentIterator):
|
|
|
59
58
|
return
|
|
60
59
|
self.next_frame_idx = pos
|
|
61
60
|
|
|
62
|
-
|
|
63
61
|
class TestComponentView:
|
|
64
62
|
def test_basic(self, test_client: pxt.Client) -> None:
|
|
65
63
|
cl = test_client
|
|
66
64
|
# create video table
|
|
67
65
|
schema = {'video': VideoType(), 'angle': IntType(), 'other_angle': IntType()}
|
|
68
66
|
video_t = cl.create_table('video_tbl', schema)
|
|
69
|
-
video_filepaths =
|
|
67
|
+
video_filepaths = get_test_video_files()
|
|
70
68
|
|
|
71
69
|
# cannot add 'pos' column
|
|
72
70
|
with pytest.raises(excs.Error) as excinfo:
|
|
@@ -124,7 +122,7 @@ class TestComponentView:
|
|
|
124
122
|
cl = test_client
|
|
125
123
|
# create video table
|
|
126
124
|
video_t = cl.create_table('video_tbl', {'video': VideoType()})
|
|
127
|
-
video_filepaths =
|
|
125
|
+
video_filepaths = get_test_video_files()
|
|
128
126
|
# create frame view
|
|
129
127
|
args = {'video': video_t.video, 'fps': 1}
|
|
130
128
|
view_t = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
|
|
@@ -153,7 +151,7 @@ class TestComponentView:
|
|
|
153
151
|
'test_view', video_t, schema={'annotation': JsonType(nullable=True)},
|
|
154
152
|
iterator_class=FrameIterator, iterator_args=args)
|
|
155
153
|
|
|
156
|
-
video_filepaths =
|
|
154
|
+
video_filepaths = get_test_video_files()
|
|
157
155
|
rows = [{'video': p} for p in video_filepaths]
|
|
158
156
|
status = video_t.insert(rows)
|
|
159
157
|
assert status.num_excs == 0
|
|
@@ -206,7 +204,7 @@ class TestComponentView:
|
|
|
206
204
|
|
|
207
205
|
# create video table
|
|
208
206
|
video_t = cl.create_table(base_path, {'video': VideoType(), 'margin': IntType()})
|
|
209
|
-
video_filepaths =
|
|
207
|
+
video_filepaths = get_test_video_files()
|
|
210
208
|
rows = [{'video': path, 'margin': i * 10} for i, path in enumerate(video_filepaths)]
|
|
211
209
|
status = video_t.insert(rows)
|
|
212
210
|
assert status.num_rows == len(rows)
|
|
@@ -277,7 +275,7 @@ class TestComponentView:
|
|
|
277
275
|
# create video table
|
|
278
276
|
schema = {'video': VideoType(), 'int1': IntType(), 'int2': IntType()}
|
|
279
277
|
video_t = cl.create_table('video_tbl', schema)
|
|
280
|
-
video_filepaths =
|
|
278
|
+
video_filepaths = get_test_video_files()
|
|
281
279
|
|
|
282
280
|
# create first view
|
|
283
281
|
args = {'video': video_t.video}
|
|
@@ -7,7 +7,6 @@ import bs4
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pytest
|
|
9
9
|
import requests
|
|
10
|
-
from pycocotools.coco import COCO
|
|
11
10
|
|
|
12
11
|
import pixeltable as pxt
|
|
13
12
|
from pixeltable import catalog
|
|
@@ -184,6 +183,8 @@ class TestDataFrame:
|
|
|
184
183
|
res = t.select(1.0).where(t.c2 < 10).collect()
|
|
185
184
|
assert res[res.column_names()[0]] == [1.0] * 10
|
|
186
185
|
|
|
186
|
+
# TODO This test doesn't work on Windows due to reliance on the structure of file URLs
|
|
187
|
+
@pytest.mark.skip('Test is not portable')
|
|
187
188
|
def test_html_media_url(self, test_client: pxt.Client) -> None:
|
|
188
189
|
tab = test_client.create_table('test_html_repr', {'video': pxt.VideoType(), 'audio': pxt.AudioType()})
|
|
189
190
|
status = tab.insert(video=get_video_files()[0], audio=get_audio_files()[0])
|
|
@@ -208,6 +209,7 @@ class TestDataFrame:
|
|
|
208
209
|
def test_to_pytorch_dataset(self, all_datatypes_tbl: catalog.Table):
|
|
209
210
|
""" tests all types are handled correctly in this conversion
|
|
210
211
|
"""
|
|
212
|
+
skip_test_if_not_installed('torch')
|
|
211
213
|
import torch
|
|
212
214
|
|
|
213
215
|
t = all_datatypes_tbl
|
|
@@ -238,6 +240,7 @@ class TestDataFrame:
|
|
|
238
240
|
def test_to_pytorch_image_format(self, all_datatypes_tbl: catalog.Table) -> None:
|
|
239
241
|
""" tests the image_format parameter is honored
|
|
240
242
|
"""
|
|
243
|
+
skip_test_if_not_installed('torch')
|
|
241
244
|
import torch
|
|
242
245
|
import torchvision.transforms as T
|
|
243
246
|
|
|
@@ -295,6 +298,7 @@ class TestDataFrame:
|
|
|
295
298
|
1. compatibility with multiprocessing
|
|
296
299
|
2. compatibility of all types with default collate_fn
|
|
297
300
|
"""
|
|
301
|
+
skip_test_if_not_installed('torch')
|
|
298
302
|
import torch.utils.data
|
|
299
303
|
@pxt.udf(param_types=[pxt.JsonType()], return_type=pxt.JsonType())
|
|
300
304
|
def restrict_json_for_default_collate(obj):
|
|
@@ -352,6 +356,7 @@ class TestDataFrame:
|
|
|
352
356
|
2. adding a row to the table invalidates the cached version
|
|
353
357
|
3. changing the select list invalidates the cached version
|
|
354
358
|
"""
|
|
359
|
+
skip_test_if_not_installed('torch')
|
|
355
360
|
t = all_datatypes_tbl
|
|
356
361
|
|
|
357
362
|
t.drop_column('c_video') # null value video column triggers internal assertions in DataRow
|
|
@@ -383,6 +388,7 @@ class TestDataFrame:
|
|
|
383
388
|
|
|
384
389
|
def test_to_coco(self, test_client: pxt.Client) -> None:
|
|
385
390
|
skip_test_if_not_installed('nos')
|
|
391
|
+
from pycocotools.coco import COCO
|
|
386
392
|
cl = test_client
|
|
387
393
|
base_t = cl.create_table('videos', {'video': pxt.VideoType()})
|
|
388
394
|
args = {'video': base_t.video, 'fps': 1}
|
|
@@ -8,6 +8,7 @@ import pytest
|
|
|
8
8
|
import pixeltable as pxt
|
|
9
9
|
from pixeltable.iterators.document import DocumentSplitter
|
|
10
10
|
from pixeltable.tests.utils import get_documents, get_video_files, get_audio_files, get_image_files
|
|
11
|
+
from pixeltable.tests.utils import skip_test_if_not_installed
|
|
11
12
|
from pixeltable.type_system import DocumentType
|
|
12
13
|
|
|
13
14
|
|
|
@@ -34,6 +35,7 @@ class TestDocument:
|
|
|
34
35
|
assert status.num_excs == len(file_paths)
|
|
35
36
|
|
|
36
37
|
def test_doc_splitter(self, test_client: pxt.Client) -> None:
|
|
38
|
+
skip_test_if_not_installed('tiktoken')
|
|
37
39
|
file_paths = self.valid_doc_paths()
|
|
38
40
|
cl = test_client
|
|
39
41
|
doc_t = cl.create_table('docs', {'doc': DocumentType()})
|
|
@@ -88,6 +90,7 @@ class TestDocument:
|
|
|
88
90
|
cl.drop_table('chunks')
|
|
89
91
|
|
|
90
92
|
def test_doc_splitter_headings(self, test_client: pxt.Client) -> None:
|
|
93
|
+
skip_test_if_not_installed('spacy')
|
|
91
94
|
file_paths = self.valid_doc_paths()
|
|
92
95
|
cl = test_client
|
|
93
96
|
doc_t = cl.create_table('docs', {'doc': DocumentType()})
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import urllib.parse
|
|
3
|
+
import urllib.request
|
|
3
4
|
from typing import List, Dict
|
|
4
5
|
|
|
5
6
|
import pytest
|
|
@@ -174,7 +175,7 @@ class TestExprs:
|
|
|
174
175
|
res = img_t.select(img_t.img.fileurl).show(0).to_pandas()
|
|
175
176
|
stored_urls = set(res.iloc[:, 0])
|
|
176
177
|
assert len(stored_urls) == len(res)
|
|
177
|
-
all_urls
|
|
178
|
+
all_urls = set(urllib.parse.urljoin('file:', urllib.request.pathname2url(path)) for path in get_image_files())
|
|
178
179
|
assert stored_urls <= all_urls
|
|
179
180
|
|
|
180
181
|
# localpath
|
|
@@ -5,6 +5,7 @@ import pytest
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
6
|
from pixeltable import catalog
|
|
7
7
|
from pixeltable.env import Env
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
8
9
|
from pixeltable.functions.pil.image import blend
|
|
9
10
|
from pixeltable.iterators import FrameIterator
|
|
10
11
|
from pixeltable.tests.utils import get_video_files, skip_test_if_not_installed, get_sentences, get_image_files
|
|
@@ -67,8 +68,6 @@ class TestFunctions:
|
|
|
67
68
|
def test_openai(self, test_client: pxt.Client) -> None:
|
|
68
69
|
skip_test_if_not_installed('openai')
|
|
69
70
|
TestFunctions.skip_test_if_no_openai_client()
|
|
70
|
-
if Env.get().openai_client is None:
|
|
71
|
-
pytest.skip(f'OpenAI client does not exist (missing API key?).')
|
|
72
71
|
cl = test_client
|
|
73
72
|
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
74
73
|
from pixeltable.functions.openai import chat_completions, embeddings, moderations
|
|
@@ -110,8 +109,10 @@ class TestFunctions:
|
|
|
110
109
|
|
|
111
110
|
@staticmethod
|
|
112
111
|
def skip_test_if_no_openai_client() -> None:
|
|
113
|
-
|
|
114
|
-
|
|
112
|
+
try:
|
|
113
|
+
_ = Env.get().openai_client
|
|
114
|
+
except excs.Error as exc:
|
|
115
|
+
pytest.skip(str(exc))
|
|
115
116
|
|
|
116
117
|
def test_together(self, test_client: pxt.Client) -> None:
|
|
117
118
|
skip_test_if_not_installed('together')
|
|
@@ -18,6 +18,7 @@ from pixeltable.iterators import FrameIterator
|
|
|
18
18
|
from pixeltable.tests.utils import \
|
|
19
19
|
make_tbl, create_table_data, read_data_file, get_video_files, get_audio_files, get_image_files, get_documents, \
|
|
20
20
|
assert_resultset_eq
|
|
21
|
+
from pixeltable.tests.utils import skip_test_if_not_installed
|
|
21
22
|
from pixeltable.type_system import \
|
|
22
23
|
StringType, IntType, FloatType, TimestampType, ImageType, VideoType, JsonType, BoolType, ArrayType, AudioType, \
|
|
23
24
|
DocumentType
|
|
@@ -296,6 +297,7 @@ class TestTable:
|
|
|
296
297
|
self.check_bad_media(test_client, rows, DocumentType(nullable=True))
|
|
297
298
|
|
|
298
299
|
def test_validate_external_url(self, test_client: pxt.Client) -> None:
|
|
300
|
+
skip_test_if_not_installed('boto3')
|
|
299
301
|
rows = [
|
|
300
302
|
{'media': 's3://open-images-dataset/validation/doesnotexist.jpg', 'is_bad_media': True},
|
|
301
303
|
{'media': 'https://archive.random.org/download?file=2024-01-28.bin', 'is_bad_media': True}, # 403 error
|
|
@@ -315,6 +317,7 @@ class TestTable:
|
|
|
315
317
|
self.check_bad_media(test_client, rows, VideoType(nullable=True))
|
|
316
318
|
|
|
317
319
|
def test_create_s3_image_table(self, test_client: pxt.Client) -> None:
|
|
320
|
+
skip_test_if_not_installed('boto3')
|
|
318
321
|
cl = test_client
|
|
319
322
|
tbl = cl.create_table('test', {'img': ImageType(nullable=False)})
|
|
320
323
|
# this is needed because Client.reset_catalog() doesn't call TableVersion.drop(), which would
|
|
@@ -371,6 +374,7 @@ class TestTable:
|
|
|
371
374
|
assert cache_stats.total_size == 0
|
|
372
375
|
|
|
373
376
|
def test_video_url(self, test_client: pxt.Client) -> None:
|
|
377
|
+
skip_test_if_not_installed('boto3')
|
|
374
378
|
cl = test_client
|
|
375
379
|
schema = {
|
|
376
380
|
'payload': IntType(nullable=False),
|
|
@@ -390,6 +394,7 @@ class TestTable:
|
|
|
390
394
|
cap.release()
|
|
391
395
|
|
|
392
396
|
def test_create_video_table(self, test_client: pxt.Client) -> None:
|
|
397
|
+
skip_test_if_not_installed('boto3')
|
|
393
398
|
cl = test_client
|
|
394
399
|
tbl = cl.create_table(
|
|
395
400
|
'test_tbl',
|
|
@@ -529,6 +534,7 @@ class TestTable:
|
|
|
529
534
|
assert 'expected ndarray((2, 3)' in str(exc_info.value)
|
|
530
535
|
|
|
531
536
|
def test_query(self, test_client: pxt.Client) -> None:
|
|
537
|
+
skip_test_if_not_installed('boto3')
|
|
532
538
|
cl = test_client
|
|
533
539
|
col_names = ['c1', 'c2', 'c3', 'c4', 'c5']
|
|
534
540
|
t = make_tbl(cl, 'test', col_names)
|
|
@@ -8,6 +8,7 @@ from pixeltable import catalog
|
|
|
8
8
|
from pixeltable import exceptions as excs
|
|
9
9
|
from pixeltable.iterators import FrameIterator
|
|
10
10
|
from pixeltable.tests.utils import get_video_files
|
|
11
|
+
from pixeltable.tests.utils import skip_test_if_not_installed
|
|
11
12
|
from pixeltable.type_system import VideoType, ImageType
|
|
12
13
|
from pixeltable.utils.media_store import MediaStore
|
|
13
14
|
|
|
@@ -61,6 +62,7 @@ class TestVideo:
|
|
|
61
62
|
assert MediaStore.count(view.get_id()) == view.count()
|
|
62
63
|
|
|
63
64
|
def test_query(self, test_client: pxt.client) -> None:
|
|
65
|
+
skip_test_if_not_installed('boto3')
|
|
64
66
|
video_filepaths = get_video_files()
|
|
65
67
|
cl = test_client
|
|
66
68
|
base_t, view_t = self.create_tbls(cl)
|
|
@@ -18,6 +18,8 @@ from pixeltable.type_system import \
|
|
|
18
18
|
ColumnType, StringType, IntType, FloatType, ArrayType, BoolType, TimestampType, JsonType, ImageType, VideoType
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
|
|
22
|
+
|
|
21
23
|
def make_default_type(t: ColumnType.Type) -> ColumnType:
|
|
22
24
|
if t == ColumnType.Type.STRING:
|
|
23
25
|
return StringType()
|
|
@@ -223,19 +225,28 @@ def read_data_file(dir_name: str, file_name: str, path_col_names: Optional[List[
|
|
|
223
225
|
df[col_name] = df.apply(lambda r: str(abs_path / r[col_name]), axis=1)
|
|
224
226
|
return df.to_dict(orient='records')
|
|
225
227
|
|
|
226
|
-
def get_video_files(include_bad_video=False) -> List[str]:
|
|
228
|
+
def get_video_files(include_bad_video: bool = False) -> List[str]:
|
|
227
229
|
tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
|
|
228
230
|
glob_result = glob.glob(f'{tests_dir}/**/videos/*', recursive=True)
|
|
229
231
|
if not include_bad_video:
|
|
230
232
|
glob_result = [f for f in glob_result if 'bad_video' not in f]
|
|
233
|
+
|
|
234
|
+
half_res = [f for f in glob_result if 'half_res' in f or 'bad_video' in f]
|
|
235
|
+
return half_res
|
|
236
|
+
|
|
237
|
+
def get_test_video_files() -> List[str]:
|
|
238
|
+
tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
|
|
239
|
+
glob_result = glob.glob(f'{tests_dir}/**/test_videos/*', recursive=True)
|
|
231
240
|
return glob_result
|
|
232
241
|
|
|
233
|
-
def get_image_files() -> List[str]:
|
|
242
|
+
def get_image_files(include_bad_image: bool = False) -> List[str]:
|
|
234
243
|
tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
|
|
235
244
|
glob_result = glob.glob(f'{tests_dir}/**/imagenette2-160/*', recursive=True)
|
|
245
|
+
if not include_bad_image:
|
|
246
|
+
glob_result = [f for f in glob_result if 'bad_image' not in f]
|
|
236
247
|
return glob_result
|
|
237
248
|
|
|
238
|
-
def get_audio_files(include_bad_audio=False) -> List[str]:
|
|
249
|
+
def get_audio_files(include_bad_audio: bool = False) -> List[str]:
|
|
239
250
|
tests_dir = os.path.dirname(__file__)
|
|
240
251
|
glob_result = glob.glob(f'{tests_dir}/**/audio/*', recursive=True)
|
|
241
252
|
if not include_bad_audio:
|
|
@@ -250,7 +261,7 @@ def get_documents() -> List[str]:
|
|
|
250
261
|
def get_sentences(n: int = 100) -> List[str]:
|
|
251
262
|
tests_dir = os.path.dirname(__file__)
|
|
252
263
|
path = glob.glob(f'{tests_dir}/**/jeopardy.json', recursive=True)[0]
|
|
253
|
-
with open(path, 'r') as f:
|
|
264
|
+
with open(path, 'r', encoding='utf8') as f:
|
|
254
265
|
questions_list = json.load(f)
|
|
255
266
|
# this dataset contains \' around the questions
|
|
256
267
|
return [q['question'].replace("'", '') for q in questions_list[:n]]
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import av
|
|
2
|
+
import PIL.Image
|
|
3
|
+
import PIL.ImageDraw
|
|
4
|
+
import PIL.ImageFont
|
|
5
|
+
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
import tempfile
|
|
9
|
+
import math
|
|
10
|
+
|
|
11
|
+
def create_test_video(
|
|
12
|
+
frame_count: int,
|
|
13
|
+
frame_rate: float = 1.0,
|
|
14
|
+
frame_width: int = 224,
|
|
15
|
+
aspect_ratio: str = '16:9',
|
|
16
|
+
frame_height: Optional[int] = None,
|
|
17
|
+
output_path: Optional[Path] = None,
|
|
18
|
+
font_file: str = '/Library/Fonts/Arial Unicode.ttf',
|
|
19
|
+
) -> Path:
|
|
20
|
+
"""
|
|
21
|
+
Creates an .mp4 video file such as the ones in /tests/data/test_videos
|
|
22
|
+
The video contains a frame number in each frame (for visual sanity check).
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
frame_count: Number of frames to create
|
|
26
|
+
frame_rate: Frame rate of the video
|
|
27
|
+
frame_width (int): Width in pixels of the video frame. Note: cost of decoding increases dramatically
|
|
28
|
+
with frame width * frame height.
|
|
29
|
+
aspect_ratio: Aspect ratio (width/height) of the video frames string of form 'width:height'
|
|
30
|
+
frame_height: Height of the video frame, if given, aspect_ratio is ignored
|
|
31
|
+
output_path: Path to save the video file
|
|
32
|
+
font_file: Path to the font file used for text.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
if output_path is None:
|
|
36
|
+
output_path = Path(tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name)
|
|
37
|
+
|
|
38
|
+
parts = [int(p) for p in aspect_ratio.split(':')]
|
|
39
|
+
assert len(parts) == 2
|
|
40
|
+
aspect_ratio = parts[0] / parts[1]
|
|
41
|
+
|
|
42
|
+
if frame_height is None:
|
|
43
|
+
frame_height = math.ceil(frame_width / aspect_ratio)
|
|
44
|
+
|
|
45
|
+
frame_size = (frame_width, frame_height)
|
|
46
|
+
|
|
47
|
+
font_size = min(frame_height, frame_width) // 4
|
|
48
|
+
font = PIL.ImageFont.truetype(font=font_file, size=font_size)
|
|
49
|
+
font_fill = 0xFFFFFF # white
|
|
50
|
+
frame_color = 0xFFFFFF - font_fill # black
|
|
51
|
+
# Create a video container
|
|
52
|
+
container = av.open(str(output_path), mode='w')
|
|
53
|
+
|
|
54
|
+
# Add a video stream
|
|
55
|
+
stream = container.add_stream('h264', rate=frame_rate)
|
|
56
|
+
stream.width, stream.height = frame_size
|
|
57
|
+
stream.pix_fmt = 'yuv420p'
|
|
58
|
+
|
|
59
|
+
for frame_number in range(frame_count):
|
|
60
|
+
# Create an image with a number in it
|
|
61
|
+
image = PIL.Image.new('RGB', frame_size, color=frame_color)
|
|
62
|
+
draw = PIL.ImageDraw.Draw(image)
|
|
63
|
+
# Optionally, add a font here if you have one
|
|
64
|
+
text = str(frame_number)
|
|
65
|
+
_, _, text_width, text_height = draw.textbbox((0, 0), text, font=font)
|
|
66
|
+
text_position = ((frame_size[0] - text_width) // 2, (frame_size[1] - text_height) // 2)
|
|
67
|
+
draw.text(text_position, text, font=font, fill=font_fill)
|
|
68
|
+
|
|
69
|
+
# Convert the PIL image to an AVFrame
|
|
70
|
+
frame = av.VideoFrame.from_image(image)
|
|
71
|
+
|
|
72
|
+
# Encode and write the frame
|
|
73
|
+
for packet in stream.encode(frame):
|
|
74
|
+
container.mux(packet)
|
|
75
|
+
|
|
76
|
+
# Flush and close the stream
|
|
77
|
+
for packet in stream.encode():
|
|
78
|
+
container.mux(packet)
|
|
79
|
+
|
|
80
|
+
container.close()
|
|
81
|
+
return output_path
|
|
@@ -911,7 +911,7 @@ class DocumentType(ColumnType):
|
|
|
911
911
|
def validate_media(self, val: Any) -> None:
|
|
912
912
|
assert isinstance(val, str)
|
|
913
913
|
from pixeltable.utils.documents import get_document_handle
|
|
914
|
-
with open(val, 'r') as fh:
|
|
914
|
+
with open(val, 'r', encoding='utf8') as fh:
|
|
915
915
|
try:
|
|
916
916
|
s = fh.read()
|
|
917
917
|
dh = get_document_handle(s)
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "pixeltable"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.3"
|
|
8
8
|
description = "Pixeltable: The Multimodal AI Data Plane"
|
|
9
9
|
authors = ["Marcel Kornacker <marcelk@gmail.com>"]
|
|
10
10
|
readme = "README.md"
|
|
@@ -17,8 +17,8 @@ exclude = [
|
|
|
17
17
|
|
|
18
18
|
[tool.poetry.dependencies]
|
|
19
19
|
python = ">=3.9,<4.0"
|
|
20
|
-
numpy = "^1.
|
|
21
|
-
pandas = "
|
|
20
|
+
numpy = "^1.26"
|
|
21
|
+
pandas = ">=2.0,<3.0"
|
|
22
22
|
pillow = "^9.4.0"
|
|
23
23
|
opencv-python-headless = "^4.7.0.68"
|
|
24
24
|
tqdm = "^4.64.1"
|
|
@@ -31,9 +31,11 @@ sqlalchemy = {extras = ["mypy"], version = "^2.0.23"}
|
|
|
31
31
|
sqlalchemy-utils = "^0.41.1"
|
|
32
32
|
pgvector = "^0.2.1"
|
|
33
33
|
av = ">=10.0.0"
|
|
34
|
+
beautifulsoup4 = "^4.0.0"
|
|
35
|
+
requests = "^2.31.0"
|
|
34
36
|
pyyaml = "^6.0.1"
|
|
35
37
|
jinja2 = "^3.1.3"
|
|
36
|
-
pgserver = "0.0.
|
|
38
|
+
pgserver = "0.0.9"
|
|
37
39
|
|
|
38
40
|
[tool.poetry.group.dev]
|
|
39
41
|
optional = true
|
|
@@ -61,8 +63,6 @@ mkdocs-jupyter = "^0.24"
|
|
|
61
63
|
pycocotools = "^2.0.7"
|
|
62
64
|
ipykernel = "^6.27.1"
|
|
63
65
|
nbmake = "^1.4.6"
|
|
64
|
-
bs4 = "^0.0.2"
|
|
65
|
-
requests = "^2.31.0"
|
|
66
66
|
# packages required by various optional pieces of the codebase
|
|
67
67
|
torch = "^2.2"
|
|
68
68
|
torchvision = "^0.17"
|
|
@@ -74,7 +74,6 @@ boto3 = "^1.17"
|
|
|
74
74
|
spacy = "^3.0"
|
|
75
75
|
en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl"}
|
|
76
76
|
tiktoken = ">=0.3"
|
|
77
|
-
beautifulsoup4 = "^4.0.0"
|
|
78
77
|
sentence-transformers = "^2.0.0"
|
|
79
78
|
transformers = "^4.20"
|
|
80
79
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|