pixeltable 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- {pixeltable-0.3.0 → pixeltable-0.3.2}/PKG-INFO +4 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/__version__.py +2 -2
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/insertable_table.py +3 -3
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/table.py +2 -2
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/table_version.py +3 -2
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/view.py +1 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/dataframe.py +52 -27
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/env.py +109 -4
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/__init__.py +1 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/aggregation_node.py +3 -3
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/cache_prefetch_node.py +13 -7
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/component_iteration_node.py +3 -9
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/data_row_batch.py +17 -5
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/exec_node.py +32 -12
- pixeltable-0.3.2/pixeltable/exec/expr_eval/__init__.py +1 -0
- pixeltable-0.3.2/pixeltable/exec/expr_eval/evaluators.py +240 -0
- pixeltable-0.3.2/pixeltable/exec/expr_eval/expr_eval_node.py +408 -0
- pixeltable-0.3.2/pixeltable/exec/expr_eval/globals.py +113 -0
- pixeltable-0.3.2/pixeltable/exec/expr_eval/row_buffer.py +76 -0
- pixeltable-0.3.2/pixeltable/exec/expr_eval/schedulers.py +240 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/in_memory_data_node.py +2 -2
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/row_update_node.py +14 -14
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/sql_node.py +2 -2
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/column_ref.py +5 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/data_row.py +50 -40
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/expr.py +57 -12
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/function_call.py +54 -19
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/inline_expr.py +12 -21
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/literal.py +25 -8
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/row_builder.py +25 -2
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/aggregate_function.py +4 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/callable_function.py +54 -4
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/expr_template_function.py +5 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/function.py +48 -7
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/query_template_function.py +16 -7
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/udf.py +7 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/__init__.py +1 -1
- pixeltable-0.3.2/pixeltable/functions/anthropic.py +230 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/gemini.py +2 -6
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/openai.py +219 -28
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/globals.py +2 -3
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/io/hf_datasets.py +1 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/io/label_studio.py +5 -5
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/io/parquet.py +1 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/__init__.py +2 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/plan.py +24 -9
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/store.py +6 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/type_system.py +73 -36
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/arrow.py +3 -8
- pixeltable-0.3.2/pixeltable/utils/console_output.py +41 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/filecache.py +1 -1
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pyproject.toml +8 -4
- pixeltable-0.3.0/pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable-0.3.0/pixeltable/functions/anthropic.py +0 -154
- {pixeltable-0.3.0 → pixeltable-0.3.2}/LICENSE +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/README.md +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/catalog.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/column.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/dir.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/globals.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/named_function.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/path_dict.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/schema_object.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/catalog/table_version_path.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/arithmetic_expr.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/array_slice.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/column_property_ref.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/comparison.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/compound_predicate.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/expr_dict.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/expr_set.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/globals.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/in_predicate.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/is_null.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/json_mapper.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/json_path.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/method_ref.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/object_ref.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/rowid_ref.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/similarity_expr.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/sql_element_cache.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/type_cast.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/exprs/variable.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/ext/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/ext/functions/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/ext/functions/whisperx.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/ext/functions/yolox.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/function_registry.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/globals.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/signature.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/func/tools.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/audio.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/fireworks.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/globals.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/huggingface.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/image.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/json.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/llama_cpp.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/math.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/mistralai.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/ollama.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/replicate.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/string.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/timestamp.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/together.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/util.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/video.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/vision.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/functions/whisper.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/index/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/index/base.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/index/btree.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/index/embedding_index.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/io/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/io/external_store.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/io/fiftyone.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/io/globals.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/io/pandas.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/iterators/base.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/iterators/document.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/iterators/image.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/iterators/string.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/iterators/video.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_12.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_13.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_14.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_15.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_16.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_17.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_18.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_19.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_20.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_21.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_22.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_23.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_24.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/convert_25.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/converters/util.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/notes.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/metadata/schema.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/py.typed +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/coco.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/code.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/description_helper.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/documents.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/formatter.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/http_server.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/pytorch.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/s3.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.3.0 → pixeltable-0.3.2}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
Home-page: https://pixeltable.com/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -27,11 +27,14 @@ Requires-Dist: av (>=10.0.0)
|
|
|
27
27
|
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
28
28
|
Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
29
29
|
Requires-Dist: ftfy (>=6.2.0,<7.0.0)
|
|
30
|
+
Requires-Dist: httpcore (>=1.0.3)
|
|
31
|
+
Requires-Dist: httpx (>=0.27)
|
|
30
32
|
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
31
33
|
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
32
34
|
Requires-Dist: jsonschema (>=4.1.0)
|
|
33
35
|
Requires-Dist: lxml (>=5.0)
|
|
34
36
|
Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
37
|
+
Requires-Dist: nest_asyncio (>=1.5)
|
|
35
38
|
Requires-Dist: numpy (>=1.25,<2.0)
|
|
36
39
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
37
40
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.3.
|
|
3
|
-
__version_tuple__ = (0, 3,
|
|
2
|
+
__version__ = "0.3.2"
|
|
3
|
+
__version_tuple__ = (0, 3, 2)
|
|
@@ -68,7 +68,7 @@ class InsertableTable(Table):
|
|
|
68
68
|
cat.tbls[tbl._id] = tbl
|
|
69
69
|
|
|
70
70
|
_logger.info(f'Created table `{name}`, id={tbl_version.id}')
|
|
71
|
-
|
|
71
|
+
Env.get().console_logger.info(f'Created table `{name}`.')
|
|
72
72
|
return tbl
|
|
73
73
|
|
|
74
74
|
def get_metadata(self) -> dict[str, Any]:
|
|
@@ -134,7 +134,7 @@ class InsertableTable(Table):
|
|
|
134
134
|
f'Inserted {status.num_rows} row{"" if status.num_rows == 1 else "s"} '
|
|
135
135
|
f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
136
136
|
)
|
|
137
|
-
|
|
137
|
+
Env.get().console_logger.info(msg)
|
|
138
138
|
_logger.info(f'InsertableTable {self._name}: {msg}')
|
|
139
139
|
FileCache.get().emit_eviction_warnings()
|
|
140
140
|
return status
|
|
@@ -164,7 +164,7 @@ class InsertableTable(Table):
|
|
|
164
164
|
row[col_name] = checked_val
|
|
165
165
|
except TypeError as e:
|
|
166
166
|
msg = str(e)
|
|
167
|
-
raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
|
|
167
|
+
raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}') from e
|
|
168
168
|
|
|
169
169
|
def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
|
|
170
170
|
"""Delete rows in this table.
|
|
@@ -1345,7 +1345,7 @@ class Table(SchemaObject):
|
|
|
1345
1345
|
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1346
1346
|
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
1347
1347
|
self._tbl_version.link_external_store(store)
|
|
1348
|
-
|
|
1348
|
+
env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
|
|
1349
1349
|
|
|
1350
1350
|
def unlink_external_stores(
|
|
1351
1351
|
self,
|
|
@@ -1381,7 +1381,7 @@ class Table(SchemaObject):
|
|
|
1381
1381
|
|
|
1382
1382
|
for store in stores:
|
|
1383
1383
|
self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
|
|
1384
|
-
|
|
1384
|
+
env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store}')
|
|
1385
1385
|
|
|
1386
1386
|
def sync(
|
|
1387
1387
|
self,
|
|
@@ -532,7 +532,7 @@ class TableVersion:
|
|
|
532
532
|
f'Added {status.num_rows} column value{"" if status.num_rows == 1 else "s"} '
|
|
533
533
|
f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}.'
|
|
534
534
|
)
|
|
535
|
-
|
|
535
|
+
Env.get().console_logger.info(msg)
|
|
536
536
|
_logger.info(f'Columns {[col.name for col in cols]}: {msg}')
|
|
537
537
|
return status
|
|
538
538
|
|
|
@@ -734,7 +734,8 @@ class TableVersion:
|
|
|
734
734
|
if conn is None:
|
|
735
735
|
with Env.get().engine.begin() as conn:
|
|
736
736
|
return self._insert(
|
|
737
|
-
plan, conn, time.time(), print_stats=print_stats, rowids=rowids(),
|
|
737
|
+
plan, conn, time.time(), print_stats=print_stats, rowids=rowids(),
|
|
738
|
+
abort_on_exc=fail_on_exception)
|
|
738
739
|
else:
|
|
739
740
|
return self._insert(
|
|
740
741
|
plan, conn, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
|
|
@@ -156,7 +156,7 @@ class View(Table):
|
|
|
156
156
|
plan, num_values_per_row = Planner.create_view_load_plan(view._tbl_version_path)
|
|
157
157
|
num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(
|
|
158
158
|
plan, session.connection(), v_min=tbl_version.version)
|
|
159
|
-
|
|
159
|
+
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
160
160
|
|
|
161
161
|
session.commit()
|
|
162
162
|
cat = Catalog.get()
|
|
@@ -8,18 +8,15 @@ import json
|
|
|
8
8
|
import logging
|
|
9
9
|
import traceback
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import TYPE_CHECKING, Any, Callable, Hashable, Iterator, Optional, Sequence, Union,
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Callable, Hashable, Iterator, Optional, Sequence, Union, AsyncIterator, NoReturn
|
|
12
12
|
|
|
13
|
+
import numpy as np
|
|
13
14
|
import pandas as pd
|
|
14
|
-
import pandas.io.formats.style
|
|
15
15
|
import sqlalchemy as sql
|
|
16
16
|
|
|
17
|
-
import pixeltable.catalog as catalog
|
|
18
17
|
import pixeltable.exceptions as excs
|
|
19
|
-
import pixeltable.exprs as exprs
|
|
20
18
|
import pixeltable.type_system as ts
|
|
21
|
-
from pixeltable import exec
|
|
22
|
-
from pixeltable import plan
|
|
19
|
+
from pixeltable import catalog, exec, exprs, plan
|
|
23
20
|
from pixeltable.catalog import is_valid_identifier
|
|
24
21
|
from pixeltable.catalog.globals import UpdateStatus
|
|
25
22
|
from pixeltable.env import Env
|
|
@@ -29,6 +26,7 @@ from pixeltable.utils.formatter import Formatter
|
|
|
29
26
|
|
|
30
27
|
if TYPE_CHECKING:
|
|
31
28
|
import torch
|
|
29
|
+
import torch.utils.data
|
|
32
30
|
|
|
33
31
|
__all__ = ['DataFrame']
|
|
34
32
|
|
|
@@ -268,6 +266,20 @@ class DataFrame:
|
|
|
268
266
|
else:
|
|
269
267
|
yield from exec_plan(conn)
|
|
270
268
|
|
|
269
|
+
async def _aexec(self, conn: sql.engine.Connection) -> AsyncIterator[exprs.DataRow]:
|
|
270
|
+
"""Run the query and return rows as a generator.
|
|
271
|
+
This function must not modify the state of the DataFrame, otherwise it breaks dataset caching.
|
|
272
|
+
"""
|
|
273
|
+
plan = self._create_query_plan()
|
|
274
|
+
plan.ctx.set_conn(conn)
|
|
275
|
+
plan.open()
|
|
276
|
+
try:
|
|
277
|
+
async for row_batch in plan:
|
|
278
|
+
for row in row_batch:
|
|
279
|
+
yield row
|
|
280
|
+
finally:
|
|
281
|
+
plan.close()
|
|
282
|
+
|
|
271
283
|
def _create_query_plan(self) -> exec.ExecNode:
|
|
272
284
|
# construct a group-by clause if we're grouping by a table
|
|
273
285
|
group_by_clause: Optional[list[exprs.Expr]] = None
|
|
@@ -392,26 +404,29 @@ class DataFrame:
|
|
|
392
404
|
group_by_clause=group_by_clause, grouping_tbl=self.grouping_tbl,
|
|
393
405
|
order_by_clause=order_by_clause, limit=self.limit_val)
|
|
394
406
|
|
|
407
|
+
def _raise_expr_eval_err(self, e: excs.ExprEvalError) -> NoReturn:
|
|
408
|
+
msg = f'In row {e.row_num} the {e.expr_msg} encountered exception ' f'{type(e.exc).__name__}:\n{str(e.exc)}'
|
|
409
|
+
if len(e.input_vals) > 0:
|
|
410
|
+
input_msgs = [
|
|
411
|
+
f"'{d}' = {d.col_type.print_value(e.input_vals[i])}" for i, d in enumerate(e.expr.dependencies())
|
|
412
|
+
]
|
|
413
|
+
msg += f'\nwith {", ".join(input_msgs)}'
|
|
414
|
+
assert e.exc_tb is not None
|
|
415
|
+
stack_trace = traceback.format_tb(e.exc_tb)
|
|
416
|
+
if len(stack_trace) > 2:
|
|
417
|
+
# append a stack trace if the exception happened in user code
|
|
418
|
+
# (frame 0 is ExprEvaluator and frame 1 is some expr's eval()
|
|
419
|
+
nl = '\n'
|
|
420
|
+
# [-1:0:-1]: leave out entry 0 and reverse order, so that the most recent frame is at the top
|
|
421
|
+
msg += f'\nStack:\n{nl.join(stack_trace[-1:1:-1])}'
|
|
422
|
+
raise excs.Error(msg)
|
|
423
|
+
|
|
395
424
|
def _output_row_iterator(self, conn: Optional[sql.engine.Connection] = None) -> Iterator[list]:
|
|
396
425
|
try:
|
|
397
426
|
for data_row in self._exec(conn):
|
|
398
427
|
yield [data_row[e.slot_idx] for e in self._select_list_exprs]
|
|
399
428
|
except excs.ExprEvalError as e:
|
|
400
|
-
|
|
401
|
-
if len(e.input_vals) > 0:
|
|
402
|
-
input_msgs = [
|
|
403
|
-
f"'{d}' = {d.col_type.print_value(e.input_vals[i])}" for i, d in enumerate(e.expr.dependencies())
|
|
404
|
-
]
|
|
405
|
-
msg += f'\nwith {", ".join(input_msgs)}'
|
|
406
|
-
assert e.exc_tb is not None
|
|
407
|
-
stack_trace = traceback.format_tb(e.exc_tb)
|
|
408
|
-
if len(stack_trace) > 2:
|
|
409
|
-
# append a stack trace if the exception happened in user code
|
|
410
|
-
# (frame 0 is ExprEvaluator and frame 1 is some expr's eval()
|
|
411
|
-
nl = '\n'
|
|
412
|
-
# [-1:0:-1]: leave out entry 0 and reverse order, so that the most recent frame is at the top
|
|
413
|
-
msg += f'\nStack:\n{nl.join(stack_trace[-1:1:-1])}'
|
|
414
|
-
raise excs.Error(msg)
|
|
429
|
+
self._raise_expr_eval_err(e)
|
|
415
430
|
except sql.exc.DBAPIError as e:
|
|
416
431
|
raise excs.Error(f'Error during SQL execution:\n{e}')
|
|
417
432
|
|
|
@@ -421,6 +436,18 @@ class DataFrame:
|
|
|
421
436
|
def _collect(self, conn: Optional[sql.engine.Connection] = None) -> DataFrameResultSet:
|
|
422
437
|
return DataFrameResultSet(list(self._output_row_iterator(conn)), self.schema)
|
|
423
438
|
|
|
439
|
+
async def _acollect(self, conn: sql.engine.Connection) -> DataFrameResultSet:
|
|
440
|
+
try:
|
|
441
|
+
result = [
|
|
442
|
+
[row[e.slot_idx] for e in self._select_list_exprs]
|
|
443
|
+
async for row in self._aexec(conn)
|
|
444
|
+
]
|
|
445
|
+
return DataFrameResultSet(result, self.schema)
|
|
446
|
+
except excs.ExprEvalError as e:
|
|
447
|
+
self._raise_expr_eval_err(e)
|
|
448
|
+
except sql.exc.DBAPIError as e:
|
|
449
|
+
raise excs.Error(f'Error during SQL execution:\n{e}')
|
|
450
|
+
|
|
424
451
|
def count(self) -> int:
|
|
425
452
|
"""Return the number of rows in the DataFrame.
|
|
426
453
|
|
|
@@ -540,10 +567,10 @@ class DataFrame:
|
|
|
540
567
|
for raw_expr, name in base_list:
|
|
541
568
|
if isinstance(raw_expr, exprs.Expr):
|
|
542
569
|
select_list.append((raw_expr, name))
|
|
543
|
-
elif isinstance(raw_expr, dict):
|
|
544
|
-
select_list.append((exprs.
|
|
545
|
-
elif isinstance(raw_expr,
|
|
546
|
-
select_list.append((exprs.
|
|
570
|
+
elif isinstance(raw_expr, (dict, list, tuple)):
|
|
571
|
+
select_list.append((exprs.Expr.from_object(raw_expr), name))
|
|
572
|
+
elif isinstance(raw_expr, np.ndarray):
|
|
573
|
+
select_list.append((exprs.Expr.from_array(raw_expr), name))
|
|
547
574
|
else:
|
|
548
575
|
select_list.append((exprs.Literal(raw_expr), name))
|
|
549
576
|
expr = select_list[-1][0]
|
|
@@ -1031,8 +1058,6 @@ class DataFrame:
|
|
|
1031
1058
|
else:
|
|
1032
1059
|
return write_coco_dataset(self, dest_path)
|
|
1033
1060
|
|
|
1034
|
-
# TODO Factor this out into a separate module.
|
|
1035
|
-
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
1036
1061
|
def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
1037
1062
|
"""
|
|
1038
1063
|
Convert the dataframe to a pytorch IterableDataset suitable for parallel loading
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from abc import abstractmethod
|
|
3
4
|
import datetime
|
|
4
5
|
import glob
|
|
5
6
|
import http.server
|
|
@@ -15,8 +16,9 @@ import sys
|
|
|
15
16
|
import threading
|
|
16
17
|
import uuid
|
|
17
18
|
import warnings
|
|
18
|
-
from dataclasses import dataclass
|
|
19
|
+
from dataclasses import dataclass, field
|
|
19
20
|
from pathlib import Path
|
|
21
|
+
from sys import stdout
|
|
20
22
|
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
|
|
21
23
|
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
22
24
|
|
|
@@ -27,12 +29,18 @@ from tqdm import TqdmWarning
|
|
|
27
29
|
|
|
28
30
|
import pixeltable.exceptions as excs
|
|
29
31
|
from pixeltable import metadata
|
|
32
|
+
from pixeltable.utils.console_output import ConsoleLogger, ConsoleMessageFilter, ConsoleOutputHandler, map_level
|
|
30
33
|
from pixeltable.utils.http_server import make_server
|
|
31
34
|
|
|
32
35
|
if TYPE_CHECKING:
|
|
33
36
|
import spacy
|
|
34
37
|
|
|
35
38
|
|
|
39
|
+
_logger = logging.getLogger('pixeltable')
|
|
40
|
+
|
|
41
|
+
T = TypeVar('T')
|
|
42
|
+
|
|
43
|
+
|
|
36
44
|
class Env:
|
|
37
45
|
"""
|
|
38
46
|
Store for runtime globals.
|
|
@@ -61,6 +69,7 @@ class Env:
|
|
|
61
69
|
_httpd: Optional[http.server.HTTPServer]
|
|
62
70
|
_http_address: Optional[str]
|
|
63
71
|
_logger: logging.Logger
|
|
72
|
+
_console_logger: ConsoleLogger
|
|
64
73
|
_default_log_level: int
|
|
65
74
|
_logfilename: Optional[str]
|
|
66
75
|
_log_to_stdout: bool
|
|
@@ -70,6 +79,8 @@ class Env:
|
|
|
70
79
|
_stdout_handler: logging.StreamHandler
|
|
71
80
|
_initialized: bool
|
|
72
81
|
|
|
82
|
+
_resource_pool_info: dict[str, Any]
|
|
83
|
+
|
|
73
84
|
@classmethod
|
|
74
85
|
def get(cls) -> Env:
|
|
75
86
|
if cls._instance is None:
|
|
@@ -84,6 +95,8 @@ class Env:
|
|
|
84
95
|
cls._instance = env
|
|
85
96
|
|
|
86
97
|
def __init__(self):
|
|
98
|
+
assert self._instance is None, 'Env is a singleton; use Env.get() to access the instance'
|
|
99
|
+
|
|
87
100
|
self._home = None
|
|
88
101
|
self._media_dir = None # computed media files
|
|
89
102
|
self._file_cache_dir = None # cached media files with external URL
|
|
@@ -121,6 +134,8 @@ class Env:
|
|
|
121
134
|
self._stdout_handler.setFormatter(logging.Formatter(self._log_fmt_str))
|
|
122
135
|
self._initialized = False
|
|
123
136
|
|
|
137
|
+
self._resource_pool_info = {}
|
|
138
|
+
|
|
124
139
|
@property
|
|
125
140
|
def config(self) -> Config:
|
|
126
141
|
assert self._config is not None
|
|
@@ -221,6 +236,10 @@ class Env:
|
|
|
221
236
|
else:
|
|
222
237
|
return False
|
|
223
238
|
|
|
239
|
+
@property
|
|
240
|
+
def console_logger(self) -> ConsoleLogger:
|
|
241
|
+
return self._console_logger
|
|
242
|
+
|
|
224
243
|
def _set_up(self, echo: bool = False, reinit_db: bool = False) -> None:
|
|
225
244
|
if self._initialized:
|
|
226
245
|
return
|
|
@@ -278,6 +297,14 @@ class Env:
|
|
|
278
297
|
warnings.simplefilter('ignore', category=UserWarning)
|
|
279
298
|
warnings.simplefilter('ignore', category=FutureWarning)
|
|
280
299
|
|
|
300
|
+
# Set verbose level for user visible console messages
|
|
301
|
+
verbosity = map_level(self._config.get_int_value('verbosity'))
|
|
302
|
+
stdout_handler = ConsoleOutputHandler(stream=stdout)
|
|
303
|
+
stdout_handler.setLevel(verbosity)
|
|
304
|
+
stdout_handler.addFilter(ConsoleMessageFilter())
|
|
305
|
+
self._logger.addHandler(stdout_handler)
|
|
306
|
+
self._console_logger = ConsoleLogger(self._logger)
|
|
307
|
+
|
|
281
308
|
# configure _logger to log to a file
|
|
282
309
|
self._logfilename = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + '.log'
|
|
283
310
|
fh = logging.FileHandler(self._log_dir / self._logfilename, mode='w')
|
|
@@ -351,7 +378,7 @@ class Env:
|
|
|
351
378
|
schema.base_metadata.create_all(self._sa_engine)
|
|
352
379
|
metadata.create_system_info(self._sa_engine)
|
|
353
380
|
|
|
354
|
-
|
|
381
|
+
self.console_logger.info(f'Connected to Pixeltable database at: {self.db_url}')
|
|
355
382
|
|
|
356
383
|
# we now have a home directory and db; start other services
|
|
357
384
|
self._set_up_runtime()
|
|
@@ -609,6 +636,16 @@ class Env:
|
|
|
609
636
|
def create_tmp_path(self, extension: str = '') -> Path:
|
|
610
637
|
return self._tmp_dir / f'{uuid.uuid4()}{extension}'
|
|
611
638
|
|
|
639
|
+
|
|
640
|
+
#def get_resource_pool_info(self, pool_id: str, pool_info_cls: Optional[Type[T]]) -> T:
|
|
641
|
+
def get_resource_pool_info(self, pool_id: str, make_pool_info: Optional[Callable[[], T]] = None) -> T:
|
|
642
|
+
"""Returns the info object for the given id, creating it if necessary."""
|
|
643
|
+
info = self._resource_pool_info.get(pool_id)
|
|
644
|
+
if info is None and make_pool_info is not None:
|
|
645
|
+
info = make_pool_info()
|
|
646
|
+
self._resource_pool_info[pool_id] = info
|
|
647
|
+
return info
|
|
648
|
+
|
|
612
649
|
@property
|
|
613
650
|
def home(self) -> Path:
|
|
614
651
|
assert self._home is not None
|
|
@@ -686,8 +723,6 @@ class Config:
|
|
|
686
723
|
"""
|
|
687
724
|
__config: dict[str, Any]
|
|
688
725
|
|
|
689
|
-
T = TypeVar('T')
|
|
690
|
-
|
|
691
726
|
@classmethod
|
|
692
727
|
def from_file(cls, path: Path) -> Config:
|
|
693
728
|
"""
|
|
@@ -767,3 +802,73 @@ class PackageInfo:
|
|
|
767
802
|
is_installed: bool
|
|
768
803
|
library_name: str # pypi library name (may be different from package name)
|
|
769
804
|
version: Optional[list[int]] = None # installed version, as a list of components (such as [3,0,2] for "3.0.2")
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
TIME_FORMAT = '%H:%M.%S %f'
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
@dataclass
|
|
811
|
+
class RateLimitsInfo:
|
|
812
|
+
"""
|
|
813
|
+
Abstract base class for resource pools made up of rate limits for different resources.
|
|
814
|
+
|
|
815
|
+
Rate limits and currently remaining resources are periodically reported via record().
|
|
816
|
+
|
|
817
|
+
Subclasses provide operational customization via:
|
|
818
|
+
- get_retry_delay()
|
|
819
|
+
- get_request_resources(self, ...) -> dict[str, int]
|
|
820
|
+
with parameters that are a subset of those of the udf that creates the subclass's instance
|
|
821
|
+
"""
|
|
822
|
+
|
|
823
|
+
# get_request_resources:
|
|
824
|
+
# - Returns estimated resources needed for a specific request (ie, a single udf call) as a dict (key: resource name)
|
|
825
|
+
# - parameters are a subset of those of the udf
|
|
826
|
+
# - this is not a class method because the signature depends on the instantiating udf
|
|
827
|
+
get_request_resources: Callable[..., dict[str, int]]
|
|
828
|
+
|
|
829
|
+
resource_limits: dict[str, RateLimitInfo] = field(default_factory=dict)
|
|
830
|
+
|
|
831
|
+
def is_initialized(self) -> bool:
|
|
832
|
+
return len(self.resource_limits) > 0
|
|
833
|
+
|
|
834
|
+
def reset(self) -> None:
|
|
835
|
+
self.resource_limits.clear()
|
|
836
|
+
|
|
837
|
+
def record(self, **kwargs) -> None:
|
|
838
|
+
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
839
|
+
if len(self.resource_limits) == 0:
|
|
840
|
+
self.resource_limits = {k: RateLimitInfo(k, now, *v) for k, v in kwargs.items() if v is not None}
|
|
841
|
+
# TODO: remove
|
|
842
|
+
for info in self.resource_limits.values():
|
|
843
|
+
_logger.debug(f'Init {info.resource} rate limit: rem={info.remaining} reset={info.reset_at.strftime(TIME_FORMAT)} delta={(info.reset_at - now).total_seconds()}')
|
|
844
|
+
else:
|
|
845
|
+
for k, v in kwargs.items():
|
|
846
|
+
if v is not None:
|
|
847
|
+
self.resource_limits[k].update(now, *v)
|
|
848
|
+
|
|
849
|
+
@abstractmethod
|
|
850
|
+
def get_retry_delay(self, exc: Exception) -> Optional[float]:
|
|
851
|
+
"""Returns number of seconds to wait before retry, or None if not retryable"""
|
|
852
|
+
pass
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
@dataclass
|
|
856
|
+
class RateLimitInfo:
|
|
857
|
+
"""Container for rate limit-related information for a single resource."""
|
|
858
|
+
resource: str
|
|
859
|
+
recorded_at: datetime.datetime
|
|
860
|
+
limit: int
|
|
861
|
+
remaining: int
|
|
862
|
+
reset_at: datetime.datetime
|
|
863
|
+
|
|
864
|
+
def update(self, recorded_at: datetime.datetime, limit: int, remaining: int, reset_at: datetime.datetime) -> None:
|
|
865
|
+
# we always update everything, even though responses may come back out-of-order: we can't use reset_at to
|
|
866
|
+
# determine order, because it doesn't increase monotonically (the reeset duration shortens as output_tokens
|
|
867
|
+
# are freed up - going from max to actual)
|
|
868
|
+
self.recorded_at = recorded_at
|
|
869
|
+
self.limit = limit
|
|
870
|
+
self.remaining = remaining
|
|
871
|
+
reset_delta = reset_at - self.reset_at
|
|
872
|
+
self.reset_at = reset_at
|
|
873
|
+
# TODO: remove
|
|
874
|
+
_logger.debug(f'Update {self.resource} rate limit: rem={self.remaining} reset={self.reset_at.strftime(TIME_FORMAT)} reset_delta={reset_delta.total_seconds()} recorded_delta={(self.reset_at - recorded_at).total_seconds()}')
|
|
@@ -4,7 +4,7 @@ from .component_iteration_node import ComponentIterationNode
|
|
|
4
4
|
from .data_row_batch import DataRowBatch
|
|
5
5
|
from .exec_context import ExecContext
|
|
6
6
|
from .exec_node import ExecNode
|
|
7
|
-
from .expr_eval_node import ExprEvalNode
|
|
8
7
|
from .in_memory_data_node import InMemoryDataNode
|
|
9
8
|
from .row_update_node import RowUpdateNode
|
|
10
9
|
from .sql_node import SqlLookupNode, SqlScanNode, SqlAggregationNode, SqlNode, SqlJoinNode
|
|
10
|
+
from .expr_eval import ExprEvalNode
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import sys
|
|
5
|
-
from typing import Any, Iterable, Iterator, Optional, cast
|
|
5
|
+
from typing import Any, Iterable, Iterator, Optional, cast, AsyncIterator
|
|
6
6
|
|
|
7
7
|
import pixeltable.catalog as catalog
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
@@ -60,11 +60,11 @@ class AggregationNode(ExecNode):
|
|
|
60
60
|
input_vals = [row[d.slot_idx] for d in fn_call.dependencies()]
|
|
61
61
|
raise excs.ExprEvalError(fn_call, expr_msg, e, exc_tb, input_vals, row_num)
|
|
62
62
|
|
|
63
|
-
def
|
|
63
|
+
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
|
64
64
|
prev_row: Optional[exprs.DataRow] = None
|
|
65
65
|
current_group: Optional[list[Any]] = None # the values of the group-by exprs
|
|
66
66
|
num_input_rows = 0
|
|
67
|
-
for row_batch in self.input:
|
|
67
|
+
async for row_batch in self.input:
|
|
68
68
|
num_input_rows += len(row_batch)
|
|
69
69
|
for row in row_batch:
|
|
70
70
|
group = [row[e.slot_idx] for e in self.group_by] if self.group_by is not None else None
|
|
@@ -9,7 +9,7 @@ import urllib.request
|
|
|
9
9
|
from collections import deque
|
|
10
10
|
from concurrent import futures
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Optional, Any, Iterator
|
|
12
|
+
from typing import Optional, Any, Iterator, AsyncIterator
|
|
13
13
|
from uuid import UUID
|
|
14
14
|
|
|
15
15
|
import pixeltable.env as env
|
|
@@ -79,12 +79,12 @@ class CachePrefetchNode(ExecNode):
|
|
|
79
79
|
self.input_finished = False
|
|
80
80
|
self.row_idx = itertools.count() if retain_input_order else itertools.repeat(None)
|
|
81
81
|
|
|
82
|
-
def
|
|
83
|
-
input_iter =
|
|
82
|
+
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
|
83
|
+
input_iter = self.input.__aiter__()
|
|
84
84
|
with futures.ThreadPoolExecutor(max_workers=self.NUM_EXECUTOR_THREADS) as executor:
|
|
85
85
|
# we create enough in-flight requests to fill the first batch
|
|
86
86
|
while not self.input_finished and self.__num_pending_rows() < self.BATCH_SIZE:
|
|
87
|
-
self.__submit_input_batch(input_iter, executor)
|
|
87
|
+
await self.__submit_input_batch(input_iter, executor)
|
|
88
88
|
|
|
89
89
|
while True:
|
|
90
90
|
# try to assemble a full batch of output rows
|
|
@@ -93,7 +93,7 @@ class CachePrefetchNode(ExecNode):
|
|
|
93
93
|
|
|
94
94
|
# try to create enough in-flight requests to fill the next batch
|
|
95
95
|
while not self.input_finished and self.__num_pending_rows() < self.BATCH_SIZE:
|
|
96
|
-
self.__submit_input_batch(input_iter, executor)
|
|
96
|
+
await self.__submit_input_batch(input_iter, executor)
|
|
97
97
|
|
|
98
98
|
if len(self.ready_rows) > 0:
|
|
99
99
|
# create DataRowBatch from the first BATCH_SIZE ready rows
|
|
@@ -163,9 +163,15 @@ class CachePrefetchNode(ExecNode):
|
|
|
163
163
|
self.__add_ready_row(row, state.idx)
|
|
164
164
|
_logger.debug(f'row {state.idx} is ready (ready_batch_size={self.__ready_prefix_len()})')
|
|
165
165
|
|
|
166
|
-
def __submit_input_batch(
|
|
166
|
+
async def __submit_input_batch(
|
|
167
|
+
self, input: AsyncIterator[DataRowBatch], executor: futures.ThreadPoolExecutor
|
|
168
|
+
) -> None:
|
|
167
169
|
assert not self.input_finished
|
|
168
|
-
input_batch
|
|
170
|
+
input_batch: Optional[DataRowBatch]
|
|
171
|
+
try:
|
|
172
|
+
input_batch = await input.__anext__()
|
|
173
|
+
except StopAsyncIteration:
|
|
174
|
+
input_batch = None
|
|
169
175
|
if input_batch is None:
|
|
170
176
|
self.input_finished = True
|
|
171
177
|
return
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import inspect
|
|
2
|
-
from typing import Iterator, Optional
|
|
2
|
+
from typing import Iterator, Optional, AsyncIterator
|
|
3
3
|
|
|
4
4
|
import pixeltable.catalog as catalog
|
|
5
5
|
import pixeltable.exceptions as excs
|
|
@@ -37,11 +37,10 @@ class ComponentIterationNode(ExecNode):
|
|
|
37
37
|
e.col.name: e.slot_idx for e in self.row_builder.unique_exprs
|
|
38
38
|
if isinstance(e, exprs.ColumnRef) and e.col.name in self.iterator_output_fields
|
|
39
39
|
}
|
|
40
|
-
self.__output: Optional[Iterator[DataRowBatch]] = None
|
|
41
40
|
|
|
42
|
-
def
|
|
41
|
+
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
|
43
42
|
output_batch = DataRowBatch(self.view, self.row_builder)
|
|
44
|
-
for input_batch in self.input:
|
|
43
|
+
async for input_batch in self.input:
|
|
45
44
|
for input_row in input_batch:
|
|
46
45
|
self.row_builder.eval(input_row, self.iterator_args_ctx)
|
|
47
46
|
iterator_args = input_row[self.iterator_args.slot_idx]
|
|
@@ -93,8 +92,3 @@ class ComponentIterationNode(ExecNode):
|
|
|
93
92
|
raise excs.Error(
|
|
94
93
|
f'Invalid output of {self.view.iterator_cls.__name__}: '
|
|
95
94
|
f'missing fields {", ".join(missing_fields)}')
|
|
96
|
-
|
|
97
|
-
def __next__(self) -> DataRowBatch:
|
|
98
|
-
if self.__output is None:
|
|
99
|
-
self.__output = self.__output_batches()
|
|
100
|
-
return next(self.__output)
|
|
@@ -21,7 +21,14 @@ class DataRowBatch:
|
|
|
21
21
|
array_slot_idxs: list[int]
|
|
22
22
|
rows: list[exprs.DataRow]
|
|
23
23
|
|
|
24
|
-
def __init__(
|
|
24
|
+
def __init__(
|
|
25
|
+
self, tbl: Optional[catalog.TableVersion], row_builder: exprs.RowBuilder, num_rows: Optional[int] = None,
|
|
26
|
+
rows: Optional[list[exprs.DataRow]] = None
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
Requires either num_rows or rows to be specified, but not both.
|
|
30
|
+
"""
|
|
31
|
+
assert num_rows is None or rows is None
|
|
25
32
|
self.tbl = tbl
|
|
26
33
|
self.row_builder = row_builder
|
|
27
34
|
self.img_slot_idxs = [e.slot_idx for e in row_builder.unique_exprs if e.col_type.is_image_type()]
|
|
@@ -31,10 +38,15 @@ class DataRowBatch:
|
|
|
31
38
|
if e.col_type.is_media_type() and not e.col_type.is_image_type()
|
|
32
39
|
]
|
|
33
40
|
self.array_slot_idxs = [e.slot_idx for e in row_builder.unique_exprs if e.col_type.is_array_type()]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
41
|
+
if rows is not None:
|
|
42
|
+
self.rows = rows
|
|
43
|
+
else:
|
|
44
|
+
if num_rows is None:
|
|
45
|
+
num_rows = 0
|
|
46
|
+
self.rows = [
|
|
47
|
+
exprs.DataRow(row_builder.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs)
|
|
48
|
+
for _ in range(num_rows)
|
|
49
|
+
]
|
|
38
50
|
|
|
39
51
|
def add_row(self, row: Optional[exprs.DataRow] = None) -> exprs.DataRow:
|
|
40
52
|
if row is None:
|