pixeltable 0.3.14__tar.gz → 0.3.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- {pixeltable-0.3.14 → pixeltable-0.3.15}/PKG-INFO +1 -1
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/__version__.py +2 -2
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/table_version.py +2 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/table_version_path.py +0 -7
- pixeltable-0.3.15/pixeltable/functions/gemini.py +226 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/math.py +63 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/parquet.py +2 -2
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/share/packager.py +183 -6
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/store.py +12 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pyproject.toml +1 -1
- pixeltable-0.3.14/pixeltable/functions/gemini.py +0 -93
- {pixeltable-0.3.14 → pixeltable-0.3.15}/LICENSE +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/README.md +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/catalog.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/column.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/dir.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/globals.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/insertable_table.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/named_function.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/schema_object.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/table.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/table_version_handle.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/catalog/view.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/config.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/dataframe.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/env.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/aggregation_node.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/cache_prefetch_node.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/component_iteration_node.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/data_row_batch.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/exec_node.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/expr_eval/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/expr_eval/evaluators.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/expr_eval/expr_eval_node.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/expr_eval/globals.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/expr_eval/row_buffer.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/expr_eval/schedulers.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/in_memory_data_node.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/row_update_node.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exec/sql_node.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/arithmetic_expr.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/array_slice.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/column_property_ref.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/column_ref.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/comparison.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/compound_predicate.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/data_row.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/expr.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/expr_dict.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/expr_set.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/function_call.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/globals.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/in_predicate.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/inline_expr.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/is_null.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/json_mapper.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/json_path.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/literal.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/method_ref.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/object_ref.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/row_builder.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/rowid_ref.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/similarity_expr.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/sql_element_cache.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/string_op.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/type_cast.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/exprs/variable.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/ext/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/ext/functions/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/ext/functions/whisperx.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/ext/functions/yolox.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/aggregate_function.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/callable_function.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/expr_template_function.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/function.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/function_registry.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/globals.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/query_template_function.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/signature.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/tools.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/func/udf.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/anthropic.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/audio.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/bedrock.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/date.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/deepseek.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/fireworks.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/globals.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/huggingface.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/image.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/json.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/llama_cpp.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/mistralai.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/ollama.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/openai.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/replicate.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/string.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/timestamp.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/together.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/util.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/video.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/vision.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/functions/whisper.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/globals.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/index/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/index/base.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/index/btree.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/index/embedding_index.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/datarows.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/external_store.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/fiftyone.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/globals.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/hf_datasets.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/label_studio.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/pandas.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/table_data_conduit.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/io/utils.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/iterators/audio.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/iterators/base.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/iterators/document.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/iterators/image.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/iterators/string.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/iterators/video.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_12.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_13.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_14.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_15.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_16.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_17.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_18.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_19.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_20.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_21.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_22.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_23.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_24.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_25.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_26.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_27.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_28.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_29.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_30.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_31.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_32.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_33.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/convert_34.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/converters/util.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/notes.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/metadata/schema.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/plan.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/py.typed +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/share/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/share/publish.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/type_system.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/arrow.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/coco.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/code.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/console_output.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/coroutine.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/dbms.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/description_helper.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/documents.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/exception_handler.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/filecache.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/formatter.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/http_server.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/iceberg.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/pytorch.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/s3.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.3.14 → pixeltable-0.3.15}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.15
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.3.
|
|
3
|
-
__version_tuple__ = (0, 3,
|
|
2
|
+
__version__ = '0.3.15'
|
|
3
|
+
__version_tuple__ = (0, 3, 15)
|
|
@@ -324,6 +324,7 @@ class TableVersion:
|
|
|
324
324
|
@classmethod
|
|
325
325
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
326
326
|
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
327
|
+
_logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
|
|
327
328
|
view_md = md.tbl_md.view_md
|
|
328
329
|
base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
|
|
329
330
|
base = base_path.tbl_version if base_path is not None else None
|
|
@@ -331,6 +332,7 @@ class TableVersion:
|
|
|
331
332
|
tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
|
|
332
333
|
)
|
|
333
334
|
tbl_version.store_tbl.create()
|
|
335
|
+
tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
|
|
334
336
|
return tbl_version
|
|
335
337
|
|
|
336
338
|
def drop(self) -> None:
|
|
@@ -98,13 +98,6 @@ class TableVersionPath:
|
|
|
98
98
|
return None
|
|
99
99
|
return self.base.find_tbl_version(id)
|
|
100
100
|
|
|
101
|
-
@property
|
|
102
|
-
def ancestor_paths(self) -> list[TableVersionPath]:
|
|
103
|
-
if self.base is None:
|
|
104
|
-
return [self]
|
|
105
|
-
else:
|
|
106
|
-
return [self, *self.base.ancestor_paths]
|
|
107
|
-
|
|
108
101
|
def columns(self) -> list[Column]:
|
|
109
102
|
"""Return all user columns visible in this tbl version path, including columns from bases"""
|
|
110
103
|
result = list(self.tbl_version.get().cols_by_name.values())
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
3
|
+
that wrap various endpoints from the Google Gemini API. In order to use them, you must
|
|
4
|
+
first `pip install google-genai` and configure your Gemini credentials, as described in
|
|
5
|
+
the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini) tutorial.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import io
|
|
10
|
+
import tempfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import TYPE_CHECKING, Optional
|
|
13
|
+
|
|
14
|
+
import PIL.Image
|
|
15
|
+
|
|
16
|
+
import pixeltable as pxt
|
|
17
|
+
from pixeltable import env, exceptions as excs, exprs
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from google import genai
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@env.register_client('gemini')
|
|
24
|
+
def _(api_key: str) -> 'genai.client.Client':
|
|
25
|
+
from google import genai
|
|
26
|
+
|
|
27
|
+
return genai.client.Client(api_key=api_key)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _genai_client() -> 'genai.client.Client':
|
|
31
|
+
return env.Env.get().get_client('gemini')
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pxt.udf(resource_pool='request-rate:gemini')
|
|
35
|
+
async def generate_content(
|
|
36
|
+
contents: str, *, model: str, config: Optional[dict] = None, tools: Optional[list[dict]] = None
|
|
37
|
+
) -> dict:
|
|
38
|
+
"""
|
|
39
|
+
Generate content from the specified model. For additional details, see:
|
|
40
|
+
<https://ai.google.dev/gemini-api/docs/text-generation>
|
|
41
|
+
|
|
42
|
+
Request throttling:
|
|
43
|
+
Applies the rate limit set in the config (section `gemini`, key `rate_limit`). If no rate
|
|
44
|
+
limit is configured, uses a default of 600 RPM.
|
|
45
|
+
|
|
46
|
+
__Requirements:__
|
|
47
|
+
|
|
48
|
+
- `pip install google-genai`
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
contents: The input content to generate from.
|
|
52
|
+
model: The name of the model to use.
|
|
53
|
+
config: Configuration for generation, corresponding to keyword arguments of
|
|
54
|
+
`genai.types.GenerateContentConfig`. For details on the parameters, see:
|
|
55
|
+
<https://googleapis.github.io/python-genai/genai.html#module-genai.types>
|
|
56
|
+
tools: Optional list of Pixeltable tools to use. It is also possible to specify tools manually via the
|
|
57
|
+
`config.tools` parameter, but at most one of `config.tools` or `tools` may be used.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
A dictionary containing the response and other metadata.
|
|
61
|
+
|
|
62
|
+
Examples:
|
|
63
|
+
Add a computed column that applies the model `gemini-2.0-flash`
|
|
64
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
65
|
+
|
|
66
|
+
>>> tbl.add_computed_column(response=generate_content(tbl.prompt, model='gemini-2.0-flash'))
|
|
67
|
+
"""
|
|
68
|
+
env.Env.get().require_package('google.genai')
|
|
69
|
+
from google.genai import types
|
|
70
|
+
|
|
71
|
+
config_: types.GenerateContentConfig
|
|
72
|
+
if config is None and tools is None:
|
|
73
|
+
config_ = None
|
|
74
|
+
else:
|
|
75
|
+
if config is None:
|
|
76
|
+
config_ = types.GenerateContentConfig()
|
|
77
|
+
else:
|
|
78
|
+
config_ = types.GenerateContentConfig(**config)
|
|
79
|
+
if tools is not None:
|
|
80
|
+
gemini_tools = [__convert_pxt_tool(tool) for tool in tools]
|
|
81
|
+
config_.tools = [types.Tool(function_declarations=gemini_tools)]
|
|
82
|
+
|
|
83
|
+
response = await _genai_client().aio.models.generate_content(model=model, contents=contents, config=config_)
|
|
84
|
+
return response.model_dump()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def __convert_pxt_tool(pxt_tool: dict) -> dict:
|
|
88
|
+
return {
|
|
89
|
+
'name': pxt_tool['name'],
|
|
90
|
+
'description': pxt_tool['description'],
|
|
91
|
+
'parameters': {
|
|
92
|
+
'type': 'object',
|
|
93
|
+
'properties': pxt_tool['parameters']['properties'],
|
|
94
|
+
'required': pxt_tool['required'],
|
|
95
|
+
},
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def invoke_tools(tools: pxt.func.Tools, response: exprs.Expr) -> exprs.InlineDict:
|
|
100
|
+
"""Converts an OpenAI response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
|
|
101
|
+
return tools._invoke(_gemini_response_to_pxt_tool_calls(response))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@pxt.udf
|
|
105
|
+
def _gemini_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
|
|
106
|
+
print(response)
|
|
107
|
+
pxt_tool_calls: dict[str, list[dict]] = {}
|
|
108
|
+
for part in response['candidates'][0]['content']['parts']:
|
|
109
|
+
tool_call = part.get('function_call')
|
|
110
|
+
if tool_call is not None:
|
|
111
|
+
tool_name = tool_call['name']
|
|
112
|
+
if tool_name not in pxt_tool_calls:
|
|
113
|
+
pxt_tool_calls[tool_name] = []
|
|
114
|
+
pxt_tool_calls[tool_name].append({'args': tool_call['args']})
|
|
115
|
+
if len(pxt_tool_calls) == 0:
|
|
116
|
+
return None
|
|
117
|
+
return pxt_tool_calls
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@generate_content.resource_pool
|
|
121
|
+
def _(model: str) -> str:
|
|
122
|
+
return f'request-rate:gemini:{model}'
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@pxt.udf(resource_pool='request-rate:imagen')
|
|
126
|
+
async def generate_images(prompt: str, *, model: str, config: Optional[dict] = None) -> PIL.Image.Image:
|
|
127
|
+
"""
|
|
128
|
+
Generates images based on a text description and configuration. For additional details, see:
|
|
129
|
+
<https://ai.google.dev/gemini-api/docs/image-generation>
|
|
130
|
+
|
|
131
|
+
__Requirements:__
|
|
132
|
+
|
|
133
|
+
- `pip install google-genai`
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
prompt: A text description of the images to generate.
|
|
137
|
+
model: The model to use.
|
|
138
|
+
config: Configuration for generation, corresponding to keyword arguments of
|
|
139
|
+
`genai.types.GenerateImagesConfig`. For details on the parameters, see:
|
|
140
|
+
<https://googleapis.github.io/python-genai/genai.html#module-genai.types>
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
The generated image.
|
|
144
|
+
|
|
145
|
+
Examples:
|
|
146
|
+
Add a computed column that applies the model `imagen-3.0-generate-002`
|
|
147
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
148
|
+
|
|
149
|
+
>>> tbl.add_computed_column(response=generate_images(tbl.prompt, model='imagen-3.0-generate-002'))
|
|
150
|
+
"""
|
|
151
|
+
env.Env.get().require_package('google.genai')
|
|
152
|
+
from google.genai.types import GenerateImagesConfig
|
|
153
|
+
|
|
154
|
+
config_ = GenerateImagesConfig(**config) if config else None
|
|
155
|
+
response = await _genai_client().aio.models.generate_images(model=model, prompt=prompt, config=config_)
|
|
156
|
+
return response.generated_images[0].image._pil_image
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@generate_images.resource_pool
|
|
160
|
+
def _(model: str) -> str:
|
|
161
|
+
return f'request-rate:imagen:{model}'
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@pxt.udf(resource_pool='request-rate:veo')
|
|
165
|
+
async def generate_videos(
|
|
166
|
+
prompt: Optional[str] = None, image: Optional[PIL.Image.Image] = None, *, model: str, config: Optional[dict] = None
|
|
167
|
+
) -> pxt.Video:
|
|
168
|
+
"""
|
|
169
|
+
Generates videos based on a text description and configuration. For additional details, see:
|
|
170
|
+
<https://ai.google.dev/gemini-api/docs/video-generation>
|
|
171
|
+
|
|
172
|
+
__Requirements:__
|
|
173
|
+
|
|
174
|
+
- `pip install google-genai`
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
prompt: A text description of the videos to generate.
|
|
178
|
+
image: An optional image to use as the first frame of the video. At least one of `prompt` or `image` must be
|
|
179
|
+
provided. (It is ok to specify both.)
|
|
180
|
+
model: The model to use.
|
|
181
|
+
config: Configuration for generation, corresponding to keyword arguments of
|
|
182
|
+
`genai.types.GenerateVideosConfig`. For details on the parameters, see:
|
|
183
|
+
<https://googleapis.github.io/python-genai/genai.html#module-genai.types>
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
The generated video.
|
|
187
|
+
|
|
188
|
+
Examples:
|
|
189
|
+
Add a computed column that applies the model `veo-2.0-generate-001`
|
|
190
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
191
|
+
|
|
192
|
+
>>> tbl.add_computed_column(response=generate_videos(tbl.prompt, model='veo-2.0-generate-001'))
|
|
193
|
+
"""
|
|
194
|
+
env.Env.get().require_package('google.genai')
|
|
195
|
+
from google.genai import types
|
|
196
|
+
|
|
197
|
+
if prompt is None and image is None:
|
|
198
|
+
raise excs.Error('At least one of `prompt` or `image` must be provided.')
|
|
199
|
+
|
|
200
|
+
image_: Optional[types.Image] = None
|
|
201
|
+
if image is not None:
|
|
202
|
+
with io.BytesIO() as buffer:
|
|
203
|
+
image.save(buffer, format='jpeg')
|
|
204
|
+
image_ = types.Image(image_bytes=buffer.getvalue(), mime_type='image/jpeg')
|
|
205
|
+
|
|
206
|
+
config_ = types.GenerateVideosConfig(**config) if config else None
|
|
207
|
+
operation = await _genai_client().aio.models.generate_videos(
|
|
208
|
+
model=model, prompt=prompt, image=image_, config=config_
|
|
209
|
+
)
|
|
210
|
+
while not operation.done:
|
|
211
|
+
await asyncio.sleep(3)
|
|
212
|
+
operation = await _genai_client().aio.operations.get(operation)
|
|
213
|
+
|
|
214
|
+
video = operation.response.generated_videos[0]
|
|
215
|
+
|
|
216
|
+
video_bytes = await _genai_client().aio.files.download(file=video.video) # type: ignore[arg-type]
|
|
217
|
+
assert video_bytes is not None
|
|
218
|
+
|
|
219
|
+
_, output_filename = tempfile.mkstemp(suffix='.mp4', dir=str(env.Env.get().tmp_dir))
|
|
220
|
+
Path(output_filename).write_bytes(video_bytes)
|
|
221
|
+
return output_filename
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@generate_videos.resource_pool
|
|
225
|
+
def _(model: str) -> str:
|
|
226
|
+
return f'request-rate:veo:{model}'
|
|
@@ -100,6 +100,69 @@ def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sq
|
|
|
100
100
|
return sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer))
|
|
101
101
|
|
|
102
102
|
|
|
103
|
+
@pxt.udf(is_method=True)
|
|
104
|
+
def pow(self: int, other: int) -> float:
|
|
105
|
+
"""
|
|
106
|
+
Raise `self` to the power of `other`.
|
|
107
|
+
|
|
108
|
+
Equivalent to Python [`self ** other`](https://docs.python.org/3/library/functions.html#pow).
|
|
109
|
+
"""
|
|
110
|
+
return self**other
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@pow.to_sql
|
|
114
|
+
def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
|
|
115
|
+
return sql.func.pow(self, other)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@pxt.udf(is_method=True)
|
|
119
|
+
def bitwise_and(self: int, other: int) -> int:
|
|
120
|
+
"""
|
|
121
|
+
Bitwise AND of two integers.
|
|
122
|
+
|
|
123
|
+
Equivalent to Python
|
|
124
|
+
[`self & other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
|
|
125
|
+
"""
|
|
126
|
+
return self & other
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@bitwise_and.to_sql
|
|
130
|
+
def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
|
|
131
|
+
return self.bitwise_and(other)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@pxt.udf(is_method=True)
|
|
135
|
+
def bitwise_or(self: int, other: int) -> int:
|
|
136
|
+
"""
|
|
137
|
+
Bitwise OR of two integers.
|
|
138
|
+
|
|
139
|
+
Equivalent to Python
|
|
140
|
+
[`self | other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
|
|
141
|
+
"""
|
|
142
|
+
return self | other
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@bitwise_or.to_sql
|
|
146
|
+
def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
|
|
147
|
+
return self.bitwise_or(other)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@pxt.udf(is_method=True)
|
|
151
|
+
def bitwise_xor(self: int, other: int) -> int:
|
|
152
|
+
"""
|
|
153
|
+
Bitwise XOR of two integers.
|
|
154
|
+
|
|
155
|
+
Equivalent to Python
|
|
156
|
+
[`self ^ other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
|
|
157
|
+
"""
|
|
158
|
+
return self ^ other
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@bitwise_xor.to_sql
|
|
162
|
+
def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
|
|
163
|
+
return self.bitwise_xor(other)
|
|
164
|
+
|
|
165
|
+
|
|
103
166
|
__all__ = local_public_names(__name__)
|
|
104
167
|
|
|
105
168
|
|
|
@@ -112,11 +112,11 @@ def export_parquet(
|
|
|
112
112
|
length = len(val)
|
|
113
113
|
elif col_type.is_string_type():
|
|
114
114
|
length = len(val)
|
|
115
|
-
elif col_type.is_video_type():
|
|
115
|
+
elif col_type.is_video_type() or col_type.is_audio_type():
|
|
116
116
|
if data_row.file_paths is not None and data_row.file_paths[e.slot_idx] is not None:
|
|
117
117
|
val = data_row.file_paths[e.slot_idx]
|
|
118
118
|
else:
|
|
119
|
-
raise excs.Error(f'unknown video type {type(val)}')
|
|
119
|
+
raise excs.Error(f'unknown audio/video type {type(val)}')
|
|
120
120
|
length = len(val)
|
|
121
121
|
elif col_type.is_json_type():
|
|
122
122
|
val = json.dumps(val)
|
|
@@ -17,6 +17,7 @@ import pixeltable as pxt
|
|
|
17
17
|
from pixeltable import catalog, exceptions as excs, metadata
|
|
18
18
|
from pixeltable.env import Env
|
|
19
19
|
from pixeltable.metadata import schema
|
|
20
|
+
from pixeltable.utils import sha256sum
|
|
20
21
|
from pixeltable.utils.media_store import MediaStore
|
|
21
22
|
|
|
22
23
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -88,7 +89,7 @@ class TablePackager:
|
|
|
88
89
|
assert any(tv.id == base.id for base in self.table._tbl_version_path.get_tbl_versions())
|
|
89
90
|
sql_types = {col.name: col.type for col in tv.store_tbl.sa_tbl.columns}
|
|
90
91
|
media_cols: set[str] = set()
|
|
91
|
-
for col in tv.
|
|
92
|
+
for col in tv.cols:
|
|
92
93
|
if col.is_stored and col.col_type.is_media_type():
|
|
93
94
|
media_cols.add(col.store_name())
|
|
94
95
|
|
|
@@ -182,7 +183,12 @@ class TablePackager:
|
|
|
182
183
|
path = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_url.path)))
|
|
183
184
|
if path not in self.media_files:
|
|
184
185
|
# Create a new entry in the `media_files` dict so that we can copy the file into the tarball later.
|
|
185
|
-
|
|
186
|
+
# We name the media files in the archive by their SHA256 hash. This ensures that we can properly
|
|
187
|
+
# deduplicate and validate them later.
|
|
188
|
+
# If we get a collision, it's not a problem; it just means we have two identical files (which will
|
|
189
|
+
# be conveniently deduplicated in the bundle).
|
|
190
|
+
sha = sha256sum(path)
|
|
191
|
+
dest_name = f'{sha}{path.suffix}'
|
|
186
192
|
self.media_files[path] = dest_name
|
|
187
193
|
return f'pxtmedia://{self.media_files[path]}'
|
|
188
194
|
# For any type of URL other than a local file, just return the URL as-is.
|
|
@@ -276,11 +282,182 @@ class TableRestorer:
|
|
|
276
282
|
tbl_id = uuid.UUID(tbl_md.tbl_md.tbl_id)
|
|
277
283
|
parquet_dir = bundle_path / 'tables' / f'tbl_{tbl_id.hex}'
|
|
278
284
|
parquet_table = pq.read_table(str(parquet_dir))
|
|
279
|
-
|
|
280
|
-
|
|
285
|
+
replica_version = tv.version
|
|
286
|
+
|
|
287
|
+
conn = Env.get().conn
|
|
288
|
+
store_sa_tbl = tv.store_tbl.sa_tbl
|
|
289
|
+
store_sa_tbl_name = tv.store_tbl._storage_name()
|
|
290
|
+
|
|
291
|
+
# Sometimes we are importing a table that has never been seen before. Other times, however, we are importing
|
|
292
|
+
# an existing replica table, and the table version and/or row selection differs from what was imported
|
|
293
|
+
# previously. Care must be taken to ensure that the new data is merged with existing data in a way that
|
|
294
|
+
# yields an internally consistent version history for each row.
|
|
295
|
+
|
|
296
|
+
# The overall strategy is this:
|
|
297
|
+
# 1. Import the parquet data into a temporary table;
|
|
298
|
+
# 2. "rectify" the v_max values in both the temporary table and the existing table (more on this below);
|
|
299
|
+
# 3. Delete any row instances from the temporary table that are already present in the existing table;
|
|
300
|
+
# 4. Copy the remaining rows from the temporary table into the existing table.
|
|
301
|
+
|
|
302
|
+
# Create a temporary table for the initial data load, containing columns for all columns present in the
|
|
303
|
+
# parquet table. The parquet columns have identical names to those in the store table, so we can use the
|
|
304
|
+
# store table schema to get their SQL types (which are not necessarily derivable from their Parquet types,
|
|
305
|
+
# e.g., pa.string() may hold either VARCHAR or serialized JSONB).
|
|
306
|
+
temp_cols: dict[str, sql.Column] = {}
|
|
307
|
+
for field in parquet_table.schema:
|
|
308
|
+
assert field.name in store_sa_tbl.columns
|
|
309
|
+
col_type = store_sa_tbl.columns[field.name].type
|
|
310
|
+
temp_cols[field.name] = sql.Column(field.name, col_type)
|
|
311
|
+
temp_sa_tbl_name = f'temp_{uuid.uuid4().hex}'
|
|
312
|
+
_logger.debug(f'Creating temporary table: {temp_sa_tbl_name}')
|
|
313
|
+
temp_md = sql.MetaData()
|
|
314
|
+
temp_sa_tbl = sql.Table(temp_sa_tbl_name, temp_md, *temp_cols.values(), prefixes=['TEMPORARY'])
|
|
315
|
+
temp_sa_tbl.create(conn)
|
|
316
|
+
|
|
317
|
+
# Populate the temporary table with data from the Parquet file.
|
|
318
|
+
_logger.debug(f'Loading {parquet_table.num_rows} row(s) into temporary table: {temp_sa_tbl_name}')
|
|
319
|
+
for batch in parquet_table.to_batches(max_chunksize=10_000):
|
|
281
320
|
pydict = batch.to_pydict()
|
|
282
321
|
rows = self.__from_pa_pydict(tv, pydict)
|
|
283
|
-
|
|
322
|
+
conn.execute(sql.insert(temp_sa_tbl), rows)
|
|
323
|
+
|
|
324
|
+
# Each row version is identified uniquely by its pk, a tuple (row_id, pos_0, pos_1, ..., pos_k, v_min).
|
|
325
|
+
# Conversely, v_max is not part of the primary key, but is simply a bookkeeping device.
|
|
326
|
+
# In an original table, v_max is always equal to the v_min of the succeeding row instance with the same
|
|
327
|
+
# row id, or MAX_VERSION if no such row instance exists. But in the replica, we need to be careful, since
|
|
328
|
+
# we might see only a subset of the original table's versions, and we might see them out of order.
|
|
329
|
+
|
|
330
|
+
# We'll adjust the v_max values according to the principle of "latest provable v_max":
|
|
331
|
+
# they will always correspond to the latest version for which we can prove the row instance was alive. This
|
|
332
|
+
# will enable us to maintain consistency of the v_max values if additional table versions are later imported,
|
|
333
|
+
# regardless of the order in which they are seen. It also means that replica tables (unlike original tables)
|
|
334
|
+
# may have gaps in their row version histories, but this is fine; the gaps simply correspond to table versions
|
|
335
|
+
# that have never been observed.
|
|
336
|
+
|
|
337
|
+
pk_predicates = [col == temp_cols[col.name] for col in tv.store_tbl.pk_columns()]
|
|
338
|
+
pk_clause = sql.and_(*pk_predicates)
|
|
339
|
+
|
|
340
|
+
# If the same pk exists in both the temporary table and the existing table, then the corresponding row data
|
|
341
|
+
# must be identical; the rows can differ only in their v_max value. As a sanity check, we go through the
|
|
342
|
+
# motion of verifying this; a failure implies data corruption in either the replica being imported or in a
|
|
343
|
+
# previously imported replica.
|
|
344
|
+
|
|
345
|
+
system_col_names = {col.name for col in tv.store_tbl.system_columns()}
|
|
346
|
+
media_col_names = {col.store_name() for col in tv.cols if col.col_type.is_media_type() and col.is_stored}
|
|
347
|
+
value_store_cols = [
|
|
348
|
+
store_sa_tbl.c[col_name]
|
|
349
|
+
for col_name in temp_cols
|
|
350
|
+
if col_name not in system_col_names and col_name not in media_col_names
|
|
351
|
+
]
|
|
352
|
+
value_temp_cols = [
|
|
353
|
+
col
|
|
354
|
+
for col_name, col in temp_cols.items()
|
|
355
|
+
if col_name not in system_col_names and col_name not in media_col_names
|
|
356
|
+
]
|
|
357
|
+
mismatch_predicates = [store_col != temp_col for store_col, temp_col in zip(value_store_cols, value_temp_cols)]
|
|
358
|
+
mismatch_clause = sql.or_(*mismatch_predicates)
|
|
359
|
+
|
|
360
|
+
# This query looks for rows that have matching primary keys (rowid + pos_k + v_min), but differ in at least
|
|
361
|
+
# one value column. Pseudo-SQL:
|
|
362
|
+
#
|
|
363
|
+
# SELECT store_tbl.col_0, ..., store_tbl.col_n, temp_tbl.col_0, ..., temp_tbl.col_n
|
|
364
|
+
# FROM store_tbl, temp_tbl
|
|
365
|
+
# WHERE store_tbl.rowid = temp_tbl.rowid
|
|
366
|
+
# AND store_tbl.pos_0 = temp_tbl.pos_0
|
|
367
|
+
# AND ... AND store_tbl.pos_k = temp_tbl.pos_k
|
|
368
|
+
# AND store_tbl.v_min = temp_tbl.v_min
|
|
369
|
+
# AND (
|
|
370
|
+
# store_tbl.col_0 != temp_tbl.col_0
|
|
371
|
+
# OR store_tbl.col_1 != temp_tbl.col_1
|
|
372
|
+
# OR ... OR store_tbl.col_n != temp_tbl.col_n
|
|
373
|
+
# )
|
|
374
|
+
#
|
|
375
|
+
# The value column comparisons (store_tbl.col_0 != temp_tbl.col_0, etc.) will always be false for rows where
|
|
376
|
+
# either column is NULL; this is what we want, since it may indicate a column that is present in one version
|
|
377
|
+
# but not the other.
|
|
378
|
+
q = sql.select(*value_store_cols, *value_temp_cols).where(pk_clause).where(mismatch_clause)
|
|
379
|
+
_logger.debug(q.compile())
|
|
380
|
+
result = conn.execute(q)
|
|
381
|
+
if result.rowcount > 0:
|
|
382
|
+
_logger.debug(
|
|
383
|
+
f'Data corruption error between {temp_sa_tbl_name!r} and {store_sa_tbl_name!r}: '
|
|
384
|
+
f'{result.rowcount} inconsistent row(s).'
|
|
385
|
+
)
|
|
386
|
+
row = result.first()
|
|
387
|
+
_logger.debug('Example mismatch:')
|
|
388
|
+
_logger.debug(f'{store_sa_tbl_name}: {row[: len(value_store_cols)]}')
|
|
389
|
+
_logger.debug(f'{temp_sa_tbl_name}: {row[len(value_store_cols) :]}')
|
|
390
|
+
raise excs.Error(
|
|
391
|
+
'Data corruption error: the replica data are inconsistent with data retrieved from a previous replica.'
|
|
392
|
+
)
|
|
393
|
+
_logger.debug(f'Verified data integrity between {store_sa_tbl_name!r} and {temp_sa_tbl_name!r}.')
|
|
394
|
+
|
|
395
|
+
# Now rectify the v_max values in the temporary table.
|
|
396
|
+
# If a row instance has a concrete v_max value, then we know it's genuine: it's the unique and immutable
|
|
397
|
+
# version when the row was deleted. (This can only happen if later versions of the base table already
|
|
398
|
+
# existed at the time this replica was published.)
|
|
399
|
+
# But if a row instance has a v_max value of MAX_VERSION, then we don't know anything about its future.
|
|
400
|
+
# It might live indefinitely, or it might be deleted as early as version `n + 1`. Following the principle
|
|
401
|
+
# of "latest provable v_max", we simply set v_max equal to `n + 1`.
|
|
402
|
+
q = (
|
|
403
|
+
temp_sa_tbl.update()
|
|
404
|
+
.values(v_max=(replica_version + 1))
|
|
405
|
+
.where(temp_sa_tbl.c.v_max == schema.Table.MAX_VERSION)
|
|
406
|
+
)
|
|
407
|
+
_logger.debug(q.compile())
|
|
408
|
+
result = conn.execute(q)
|
|
409
|
+
_logger.debug(f'Rectified {result.rowcount} row(s) in {temp_sa_tbl_name!r}.')
|
|
410
|
+
|
|
411
|
+
# Now rectify the v_max values in the existing table. This is done by simply taking the later of the two v_max
|
|
412
|
+
# values (the existing one and the new one) for each row instance, following the "latest provable v_max"
|
|
413
|
+
# principle. Obviously we only need to do this for rows that exist in both tables (it's a simple join).
|
|
414
|
+
q = (
|
|
415
|
+
store_sa_tbl.update()
|
|
416
|
+
.values(v_max=sql.func.greatest(store_sa_tbl.c.v_max, temp_sa_tbl.c.v_max))
|
|
417
|
+
.where(pk_clause)
|
|
418
|
+
)
|
|
419
|
+
_logger.debug(q.compile())
|
|
420
|
+
result = conn.execute(q)
|
|
421
|
+
_logger.debug(f'Rectified {result.rowcount} row(s) in {store_sa_tbl_name!r}.')
|
|
422
|
+
|
|
423
|
+
# Now we need to update rows in the existing table that are also present in the temporary table. This is to
|
|
424
|
+
# account for the scenario where the temporary table has columns that are not present in the existing table.
|
|
425
|
+
# (We can't simply replace the rows with their versions in the temporary table, because the converse scenario
|
|
426
|
+
# might also occur; there may be columns in the existing table that are not present in the temporary table.)
|
|
427
|
+
value_update_clauses: dict[str, sql.ColumnElement] = {}
|
|
428
|
+
for temp_col in temp_cols.values():
|
|
429
|
+
if temp_col.name not in system_col_names:
|
|
430
|
+
store_col = store_sa_tbl.c[temp_col.name]
|
|
431
|
+
# Prefer the value from the existing table, substituting the value from the temporary table if it's
|
|
432
|
+
# NULL. This works in all cases (including media columns, where we prefer the existing media file).
|
|
433
|
+
clause = sql.case((store_col == None, temp_col), else_=store_col)
|
|
434
|
+
value_update_clauses[temp_col.name] = clause
|
|
435
|
+
if len(value_update_clauses) > 0:
|
|
436
|
+
q = store_sa_tbl.update().values(**value_update_clauses).where(pk_clause)
|
|
437
|
+
_logger.debug(q.compile())
|
|
438
|
+
result = conn.execute(q)
|
|
439
|
+
_logger.debug(
|
|
440
|
+
f'Merged values from {temp_sa_tbl_name!r} into {store_sa_tbl_name!r} for {result.rowcount} row(s).'
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
# Now drop any rows from the temporary table that are also present in the existing table.
|
|
444
|
+
# The v_max values have been rectified, data has been merged into NULL cells, and all other row values have
|
|
445
|
+
# been verified identical.
|
|
446
|
+
# TODO: Delete any media files that were orphaned by this operation (they're necessarily duplicates of media
|
|
447
|
+
# files that are already present in the existing table).
|
|
448
|
+
q = temp_sa_tbl.delete().where(pk_clause)
|
|
449
|
+
_logger.debug(q.compile())
|
|
450
|
+
result = conn.execute(q)
|
|
451
|
+
_logger.debug(f'Deleted {result.rowcount} row(s) from {temp_sa_tbl_name!r}.')
|
|
452
|
+
|
|
453
|
+
# Finally, copy the remaining data (consisting entirely of new row instances) from the temporary table into
|
|
454
|
+
# the actual table.
|
|
455
|
+
q = store_sa_tbl.insert().from_select(
|
|
456
|
+
[store_sa_tbl.c[col_name] for col_name in temp_cols], sql.select(*temp_cols.values())
|
|
457
|
+
)
|
|
458
|
+
_logger.debug(q.compile())
|
|
459
|
+
result = conn.execute(q)
|
|
460
|
+
_logger.debug(f'Inserted {result.rowcount} row(s) from {temp_sa_tbl_name!r} into {store_sa_tbl_name!r}.')
|
|
284
461
|
|
|
285
462
|
def __from_pa_pydict(self, tv: catalog.TableVersion, pydict: dict[str, Any]) -> list[dict[str, Any]]:
|
|
286
463
|
# Data conversions from pyarrow to Pixeltable
|
|
@@ -289,7 +466,7 @@ class TableRestorer:
|
|
|
289
466
|
assert col_name in tv.store_tbl.sa_tbl.columns
|
|
290
467
|
sql_types[col_name] = tv.store_tbl.sa_tbl.columns[col_name].type
|
|
291
468
|
media_col_ids: dict[str, int] = {}
|
|
292
|
-
for col in tv.
|
|
469
|
+
for col in tv.cols:
|
|
293
470
|
if col.is_stored and col.col_type.is_media_type():
|
|
294
471
|
media_col_ids[col.store_name()] = col.id
|
|
295
472
|
|
|
@@ -54,6 +54,9 @@ class StoreBase:
|
|
|
54
54
|
self.base = tbl_version.base.get().store_tbl if tbl_version.base is not None else None
|
|
55
55
|
self.create_sa_tbl()
|
|
56
56
|
|
|
57
|
+
def system_columns(self) -> list[sql.Column]:
|
|
58
|
+
return [*self._pk_cols, self.v_max_col]
|
|
59
|
+
|
|
57
60
|
def pk_columns(self) -> list[sql.Column]:
|
|
58
61
|
return self._pk_cols
|
|
59
62
|
|
|
@@ -215,6 +218,15 @@ class StoreBase:
|
|
|
215
218
|
log_stmt(_logger, stmt)
|
|
216
219
|
Env.get().conn.execute(stmt)
|
|
217
220
|
|
|
221
|
+
def ensure_columns_exist(self, cols: Iterable[catalog.Column]) -> None:
|
|
222
|
+
conn = Env.get().conn
|
|
223
|
+
sql_text = f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r}'
|
|
224
|
+
result = conn.execute(sql.text(sql_text))
|
|
225
|
+
existing_cols = {row[0] for row in result}
|
|
226
|
+
for col in cols:
|
|
227
|
+
if col.store_name() not in existing_cols:
|
|
228
|
+
self.add_column(col)
|
|
229
|
+
|
|
218
230
|
def load_column(
|
|
219
231
|
self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, on_error: Literal['abort', 'ignore']
|
|
220
232
|
) -> int:
|