pixeltable 0.4.0rc1__tar.gz → 0.4.0rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/PKG-INFO +2 -1
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/__version__.py +2 -2
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/catalog.py +4 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/table.py +16 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/table_version.py +17 -2
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/view.py +24 -1
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/dataframe.py +185 -9
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/env.py +2 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/__init__.py +1 -1
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/expr_eval/evaluators.py +4 -1
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/sql_node.py +152 -12
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/data_row.py +5 -3
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/expr.py +7 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/literal.py +2 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/tools.py +1 -1
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/gemini.py +0 -1
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/globals.py +5 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/__init__.py +11 -2
- pixeltable-0.4.0rc2/pixeltable/metadata/converters/convert_36.py +38 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/notes.py +1 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/schema.py +3 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/plan.py +217 -10
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/share/packager.py +115 -6
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/formatter.py +64 -42
- pixeltable-0.4.0rc2/pixeltable/utils/sample.py +25 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pyproject.toml +2 -1
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/LICENSE +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/README.md +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/column.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/dir.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/globals.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/insertable_table.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/named_function.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/schema_object.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/table_version_handle.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/catalog/table_version_path.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/config.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/aggregation_node.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/cache_prefetch_node.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/component_iteration_node.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/data_row_batch.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/exec_node.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/expr_eval/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/expr_eval/expr_eval_node.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/expr_eval/globals.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/expr_eval/row_buffer.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/expr_eval/schedulers.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/in_memory_data_node.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exec/row_update_node.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/arithmetic_expr.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/array_slice.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/column_property_ref.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/column_ref.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/comparison.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/compound_predicate.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/expr_dict.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/expr_set.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/function_call.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/globals.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/in_predicate.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/inline_expr.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/is_null.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/json_mapper.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/json_path.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/method_ref.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/object_ref.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/row_builder.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/rowid_ref.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/similarity_expr.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/sql_element_cache.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/string_op.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/type_cast.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/exprs/variable.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/ext/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/ext/functions/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/ext/functions/whisperx.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/ext/functions/yolox.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/aggregate_function.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/callable_function.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/expr_template_function.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/function.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/function_registry.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/globals.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/query_template_function.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/signature.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/func/udf.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/anthropic.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/audio.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/bedrock.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/date.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/deepseek.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/fireworks.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/globals.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/huggingface.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/image.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/json.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/llama_cpp.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/math.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/mistralai.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/ollama.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/openai.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/replicate.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/string.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/timestamp.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/together.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/util.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/video.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/vision.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/functions/whisper.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/index/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/index/base.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/index/btree.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/index/embedding_index.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/datarows.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/external_store.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/fiftyone.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/globals.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/hf_datasets.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/label_studio.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/pandas.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/parquet.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/table_data_conduit.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/io/utils.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/iterators/audio.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/iterators/base.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/iterators/document.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/iterators/image.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/iterators/string.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/iterators/video.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_12.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_13.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_14.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_15.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_16.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_17.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_18.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_19.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_20.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_21.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_22.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_23.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_24.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_25.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_26.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_27.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_28.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_29.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_30.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_31.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_32.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_33.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_34.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/convert_35.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/metadata/converters/util.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/py.typed +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/share/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/share/publish.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/store.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/type_system.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/arrow.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/coco.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/code.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/console_output.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/coroutine.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/dbms.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/description_helper.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/documents.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/exception_handler.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/filecache.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/http_server.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/iceberg.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/pytorch.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/s3.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc2}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0rc2
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai
|
|
@@ -36,6 +36,7 @@ Requires-Dist: numpy (>=1.25)
|
|
|
36
36
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
37
37
|
Requires-Dist: pgvector (>=0.2.1)
|
|
38
38
|
Requires-Dist: pillow (>=9.3.0)
|
|
39
|
+
Requires-Dist: pillow-heif (>=0.15.0)
|
|
39
40
|
Requires-Dist: pixeltable-pgserver (==0.3.1)
|
|
40
41
|
Requires-Dist: psutil (>=5.9.5)
|
|
41
42
|
Requires-Dist: psycopg[binary] (>=3.1.18)
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.4.
|
|
3
|
-
__version_tuple__ = (0, 4, "
|
|
2
|
+
__version__ = '0.4.0rc2'
|
|
3
|
+
__version_tuple__ = (0, 4, "0rc2")
|
|
@@ -17,6 +17,8 @@ from pixeltable.env import Env
|
|
|
17
17
|
from pixeltable.iterators import ComponentIterator
|
|
18
18
|
from pixeltable.metadata import schema
|
|
19
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from pixeltable.plan import SampleClause
|
|
20
22
|
from .dir import Dir
|
|
21
23
|
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
|
|
22
24
|
from .insertable_table import InsertableTable
|
|
@@ -526,6 +528,7 @@ class Catalog:
|
|
|
526
528
|
base: TableVersionPath,
|
|
527
529
|
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
|
|
528
530
|
where: Optional[exprs.Expr],
|
|
531
|
+
sample_clause: Optional['SampleClause'],
|
|
529
532
|
additional_columns: Optional[dict[str, Any]],
|
|
530
533
|
is_snapshot: bool,
|
|
531
534
|
iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]],
|
|
@@ -554,6 +557,7 @@ class Catalog:
|
|
|
554
557
|
select_list=select_list,
|
|
555
558
|
additional_columns=additional_columns,
|
|
556
559
|
predicate=where,
|
|
560
|
+
sample_clause=sample_clause,
|
|
557
561
|
is_snapshot=is_snapshot,
|
|
558
562
|
iterator_cls=iterator_class,
|
|
559
563
|
iterator_args=iterator_args,
|
|
@@ -240,6 +240,22 @@ class Table(SchemaObject):
|
|
|
240
240
|
def limit(self, n: int) -> 'pxt.DataFrame':
|
|
241
241
|
return self._df().limit(n)
|
|
242
242
|
|
|
243
|
+
def sample(
|
|
244
|
+
self,
|
|
245
|
+
n: Optional[int] = None,
|
|
246
|
+
n_per_stratum: Optional[int] = None,
|
|
247
|
+
fraction: Optional[float] = None,
|
|
248
|
+
seed: Optional[int] = None,
|
|
249
|
+
stratify_by: Any = None,
|
|
250
|
+
) -> pxt.DataFrame:
|
|
251
|
+
"""Choose a shuffled sample of rows
|
|
252
|
+
|
|
253
|
+
See [`DataFrame.sample`][pixeltable.DataFrame.sample] for more details.
|
|
254
|
+
"""
|
|
255
|
+
return self._df().sample(
|
|
256
|
+
n=n, n_per_stratum=n_per_stratum, fraction=fraction, seed=seed, stratify_by=stratify_by
|
|
257
|
+
)
|
|
258
|
+
|
|
243
259
|
def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
|
|
244
260
|
"""Return rows from this table."""
|
|
245
261
|
return self._df().collect()
|
|
@@ -23,6 +23,10 @@ from pixeltable.utils.exception_handler import run_cleanup_on_exception
|
|
|
23
23
|
from pixeltable.utils.filecache import FileCache
|
|
24
24
|
from pixeltable.utils.media_store import MediaStore
|
|
25
25
|
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from pixeltable.plan import SampleClause
|
|
28
|
+
|
|
29
|
+
|
|
26
30
|
from ..func.globals import resolve_symbol
|
|
27
31
|
from .column import Column
|
|
28
32
|
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
|
|
@@ -66,6 +70,8 @@ class TableVersion:
|
|
|
66
70
|
path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
|
|
67
71
|
base: Optional[TableVersionHandle] # only set for views
|
|
68
72
|
predicate: Optional[exprs.Expr]
|
|
73
|
+
sample_clause: Optional['SampleClause']
|
|
74
|
+
|
|
69
75
|
iterator_cls: Optional[type[ComponentIterator]]
|
|
70
76
|
iterator_args: Optional[exprs.InlineDict]
|
|
71
77
|
num_iterator_cols: int
|
|
@@ -132,9 +138,12 @@ class TableVersion:
|
|
|
132
138
|
|
|
133
139
|
# view-specific initialization
|
|
134
140
|
from pixeltable import exprs
|
|
141
|
+
from pixeltable.plan import SampleClause
|
|
135
142
|
|
|
136
143
|
predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
|
|
137
144
|
self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
|
|
145
|
+
sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
|
|
146
|
+
self.sample_clause = SampleClause.from_dict(sample_dict) if sample_dict is not None else None
|
|
138
147
|
|
|
139
148
|
# component view-specific initialization
|
|
140
149
|
self.iterator_cls = None
|
|
@@ -269,7 +278,13 @@ class TableVersion:
|
|
|
269
278
|
|
|
270
279
|
# if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
|
|
271
280
|
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
272
|
-
if
|
|
281
|
+
if (
|
|
282
|
+
view_md is not None
|
|
283
|
+
and view_md.is_snapshot
|
|
284
|
+
and view_md.predicate is None
|
|
285
|
+
and view_md.sample_clause is None
|
|
286
|
+
and len(cols) == 0
|
|
287
|
+
):
|
|
273
288
|
session.add(tbl_record)
|
|
274
289
|
session.add(tbl_version_record)
|
|
275
290
|
session.add(schema_version_record)
|
|
@@ -906,7 +921,7 @@ class TableVersion:
|
|
|
906
921
|
result.num_excs = num_excs
|
|
907
922
|
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
908
923
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
909
|
-
self._write_md(new_version=True, new_version_ts=
|
|
924
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
910
925
|
|
|
911
926
|
# update views
|
|
912
927
|
for view in self.mutable_views:
|
|
@@ -12,6 +12,10 @@ from pixeltable import catalog, exprs, func
|
|
|
12
12
|
from pixeltable.env import Env
|
|
13
13
|
from pixeltable.iterators import ComponentIterator
|
|
14
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from pixeltable.plan import SampleClause
|
|
17
|
+
|
|
18
|
+
|
|
15
19
|
from .column import Column
|
|
16
20
|
from .globals import _POS_COLUMN_NAME, MediaValidation, UpdateStatus
|
|
17
21
|
from .table import Table
|
|
@@ -66,6 +70,7 @@ class View(Table):
|
|
|
66
70
|
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
|
|
67
71
|
additional_columns: dict[str, Any],
|
|
68
72
|
predicate: Optional['exprs.Expr'],
|
|
73
|
+
sample_clause: Optional['SampleClause'],
|
|
69
74
|
is_snapshot: bool,
|
|
70
75
|
num_retained_versions: int,
|
|
71
76
|
comment: str,
|
|
@@ -73,6 +78,8 @@ class View(Table):
|
|
|
73
78
|
iterator_cls: Optional[type[ComponentIterator]],
|
|
74
79
|
iterator_args: Optional[dict],
|
|
75
80
|
) -> View:
|
|
81
|
+
from pixeltable.plan import SampleClause
|
|
82
|
+
|
|
76
83
|
# Convert select_list to more additional_columns if present
|
|
77
84
|
include_base_columns: bool = select_list is None
|
|
78
85
|
select_list_columns: List[Column] = []
|
|
@@ -84,12 +91,23 @@ class View(Table):
|
|
|
84
91
|
columns = select_list_columns + columns_from_additional_columns
|
|
85
92
|
cls._verify_schema(columns)
|
|
86
93
|
|
|
87
|
-
# verify that
|
|
94
|
+
# verify that filters can be evaluated in the context of the base
|
|
88
95
|
if predicate is not None:
|
|
89
96
|
if not predicate.is_bound_by([base]):
|
|
90
97
|
raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
|
|
91
98
|
# create a copy that we can modify and store
|
|
92
99
|
predicate = predicate.copy()
|
|
100
|
+
if sample_clause is not None:
|
|
101
|
+
# make sure that the sample clause can be computed in the context of the base
|
|
102
|
+
if sample_clause.stratify_exprs is not None and not all(
|
|
103
|
+
stratify_expr.is_bound_by([base]) for stratify_expr in sample_clause.stratify_exprs
|
|
104
|
+
):
|
|
105
|
+
raise excs.Error(f'Sample clause cannot be computed in the context of the base {base.tbl_name()}')
|
|
106
|
+
# create a copy that we can modify and store
|
|
107
|
+
sc = sample_clause
|
|
108
|
+
sample_clause = SampleClause(
|
|
109
|
+
sc.version, sc.n, sc.n_per_stratum, sc.fraction, sc.seed, sc.stratify_exprs.copy()
|
|
110
|
+
)
|
|
93
111
|
|
|
94
112
|
# same for value exprs
|
|
95
113
|
for col in columns:
|
|
@@ -160,6 +178,8 @@ class View(Table):
|
|
|
160
178
|
# if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
|
|
161
179
|
if is_snapshot:
|
|
162
180
|
predicate = predicate.retarget(base_version_path) if predicate is not None else None
|
|
181
|
+
if sample_clause is not None:
|
|
182
|
+
exprs.Expr.retarget_list(sample_clause.stratify_exprs, base_version_path)
|
|
163
183
|
iterator_args_expr = (
|
|
164
184
|
iterator_args_expr.retarget(base_version_path) if iterator_args_expr is not None else None
|
|
165
185
|
)
|
|
@@ -171,6 +191,7 @@ class View(Table):
|
|
|
171
191
|
is_snapshot=is_snapshot,
|
|
172
192
|
include_base_columns=include_base_columns,
|
|
173
193
|
predicate=predicate.as_dict() if predicate is not None else None,
|
|
194
|
+
sample_clause=sample_clause.as_dict() if sample_clause is not None else None,
|
|
174
195
|
base_versions=base_version_path.as_md(),
|
|
175
196
|
iterator_class_fqn=iterator_class_fqn,
|
|
176
197
|
iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
|
|
@@ -306,4 +327,6 @@ class View(Table):
|
|
|
306
327
|
|
|
307
328
|
if self._tbl_version.get().predicate is not None:
|
|
308
329
|
result.append(f'\nWhere: {self._tbl_version.get().predicate!s}')
|
|
330
|
+
if self._tbl_version.get().sample_clause is not None:
|
|
331
|
+
result.append(f'\nSample: {self._tbl_version.get().sample_clause!s}')
|
|
309
332
|
return ''.join(result)
|
|
@@ -17,6 +17,7 @@ from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_syst
|
|
|
17
17
|
from pixeltable.catalog import Catalog, is_valid_identifier
|
|
18
18
|
from pixeltable.catalog.globals import UpdateStatus
|
|
19
19
|
from pixeltable.env import Env
|
|
20
|
+
from pixeltable.plan import Planner, SampleClause
|
|
20
21
|
from pixeltable.type_system import ColumnType
|
|
21
22
|
from pixeltable.utils.description_helper import DescriptionHelper
|
|
22
23
|
from pixeltable.utils.formatter import Formatter
|
|
@@ -139,6 +140,7 @@ class DataFrame:
|
|
|
139
140
|
grouping_tbl: Optional[catalog.TableVersion]
|
|
140
141
|
order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
|
|
141
142
|
limit_val: Optional[exprs.Expr]
|
|
143
|
+
sample_clause: Optional[SampleClause]
|
|
142
144
|
|
|
143
145
|
def __init__(
|
|
144
146
|
self,
|
|
@@ -149,6 +151,7 @@ class DataFrame:
|
|
|
149
151
|
grouping_tbl: Optional[catalog.TableVersion] = None,
|
|
150
152
|
order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None, # list[(expr, asc)]
|
|
151
153
|
limit: Optional[exprs.Expr] = None,
|
|
154
|
+
sample_clause: Optional[SampleClause] = None,
|
|
152
155
|
):
|
|
153
156
|
self._from_clause = from_clause
|
|
154
157
|
|
|
@@ -168,6 +171,7 @@ class DataFrame:
|
|
|
168
171
|
self.grouping_tbl = grouping_tbl
|
|
169
172
|
self.order_by_clause = copy.deepcopy(order_by_clause)
|
|
170
173
|
self.limit_val = limit
|
|
174
|
+
self.sample_clause = sample_clause
|
|
171
175
|
|
|
172
176
|
@classmethod
|
|
173
177
|
def _normalize_select_list(
|
|
@@ -210,8 +214,7 @@ class DataFrame:
|
|
|
210
214
|
|
|
211
215
|
@property
|
|
212
216
|
def _first_tbl(self) -> catalog.TableVersionPath:
|
|
213
|
-
|
|
214
|
-
return self._from_clause.tbls[0]
|
|
217
|
+
return self._from_clause._first_tbl
|
|
215
218
|
|
|
216
219
|
def _vars(self) -> dict[str, exprs.Variable]:
|
|
217
220
|
"""
|
|
@@ -236,6 +239,36 @@ class DataFrame:
|
|
|
236
239
|
raise excs.Error(f'Multiple definitions of parameter {var.name}')
|
|
237
240
|
return unique_vars
|
|
238
241
|
|
|
242
|
+
@classmethod
|
|
243
|
+
def _convert_param_to_typed_expr(
|
|
244
|
+
cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: Optional[tuple[Any, Any]] = None
|
|
245
|
+
) -> Optional[exprs.Expr]:
|
|
246
|
+
if v is None:
|
|
247
|
+
if required:
|
|
248
|
+
raise excs.Error(f'{name!r} parameter must be present')
|
|
249
|
+
return v
|
|
250
|
+
v_expr = exprs.Expr.from_object(v)
|
|
251
|
+
if not v_expr.col_type.matches(required_type):
|
|
252
|
+
raise excs.Error(f'{name!r} parameter must be of type {required_type!r}, instead of {v_expr.col_type}')
|
|
253
|
+
if range is not None:
|
|
254
|
+
if not isinstance(v_expr, exprs.Literal):
|
|
255
|
+
raise excs.Error(f'{name!r} parameter must be a constant, not {v_expr}')
|
|
256
|
+
if range[0] is not None and not (v_expr.val >= range[0]):
|
|
257
|
+
raise excs.Error(f'{name!r} parameter must be >= {range[0]}')
|
|
258
|
+
if range[1] is not None and not (v_expr.val <= range[1]):
|
|
259
|
+
raise excs.Error(f'{name!r} parameter must be <= {range[1]}')
|
|
260
|
+
return v_expr
|
|
261
|
+
|
|
262
|
+
@classmethod
|
|
263
|
+
def validate_constant_type_range(
|
|
264
|
+
cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: Optional[tuple[Any, Any]] = None
|
|
265
|
+
) -> Any:
|
|
266
|
+
"""Validate that the given named parameter is a constant of the required type and within the specified range."""
|
|
267
|
+
v_expr = cls._convert_param_to_typed_expr(v, required_type, required, name, range)
|
|
268
|
+
if v_expr is None:
|
|
269
|
+
return None
|
|
270
|
+
return v_expr.val
|
|
271
|
+
|
|
239
272
|
def parameters(self) -> dict[str, ColumnType]:
|
|
240
273
|
"""Return a dict mapping parameter name to parameter type.
|
|
241
274
|
|
|
@@ -280,7 +313,7 @@ class DataFrame:
|
|
|
280
313
|
num_rowid_cols = len(self.grouping_tbl.store_tbl.rowid_columns())
|
|
281
314
|
# the grouping table must be a base of self.tbl
|
|
282
315
|
assert num_rowid_cols <= len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
|
|
283
|
-
group_by_clause =
|
|
316
|
+
group_by_clause = self.__rowid_columns(num_rowid_cols)
|
|
284
317
|
elif self.group_by_clause is not None:
|
|
285
318
|
group_by_clause = self.group_by_clause
|
|
286
319
|
|
|
@@ -292,14 +325,21 @@ class DataFrame:
|
|
|
292
325
|
self._select_list_exprs,
|
|
293
326
|
where_clause=self.where_clause,
|
|
294
327
|
group_by_clause=group_by_clause,
|
|
295
|
-
order_by_clause=self.order_by_clause
|
|
328
|
+
order_by_clause=self.order_by_clause,
|
|
296
329
|
limit=self.limit_val,
|
|
330
|
+
sample_clause=self.sample_clause,
|
|
297
331
|
)
|
|
298
332
|
|
|
333
|
+
def __rowid_columns(self, num_rowid_cols: Optional[int] = None) -> list[exprs.Expr]:
|
|
334
|
+
"""Return list of RowidRef for the given number of associated rowids"""
|
|
335
|
+
return Planner.rowid_columns(self._first_tbl.tbl_version, num_rowid_cols)
|
|
336
|
+
|
|
299
337
|
def _has_joins(self) -> bool:
|
|
300
338
|
return len(self._from_clause.join_clauses) > 0
|
|
301
339
|
|
|
302
340
|
def show(self, n: int = 20) -> DataFrameResultSet:
|
|
341
|
+
if self.sample_clause is not None:
|
|
342
|
+
raise excs.Error('show() cannot be used with sample()')
|
|
303
343
|
assert n is not None
|
|
304
344
|
return self.limit(n).collect()
|
|
305
345
|
|
|
@@ -322,6 +362,8 @@ class DataFrame:
|
|
|
322
362
|
raise excs.Error('head() cannot be used with order_by()')
|
|
323
363
|
if self._has_joins():
|
|
324
364
|
raise excs.Error('head() not supported for joins')
|
|
365
|
+
if self.sample_clause is not None:
|
|
366
|
+
raise excs.Error('head() cannot be used with sample()')
|
|
325
367
|
if self.group_by_clause is not None:
|
|
326
368
|
raise excs.Error('head() cannot be used with group_by()')
|
|
327
369
|
num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
|
|
@@ -347,6 +389,8 @@ class DataFrame:
|
|
|
347
389
|
raise excs.Error('tail() cannot be used with order_by()')
|
|
348
390
|
if self._has_joins():
|
|
349
391
|
raise excs.Error('tail() not supported for joins')
|
|
392
|
+
if self.sample_clause is not None:
|
|
393
|
+
raise excs.Error('tail() cannot be used with sample()')
|
|
350
394
|
if self.group_by_clause is not None:
|
|
351
395
|
raise excs.Error('tail() cannot be used with group_by()')
|
|
352
396
|
num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
|
|
@@ -510,6 +554,9 @@ class DataFrame:
|
|
|
510
554
|
if self.limit_val is not None:
|
|
511
555
|
heading_vals.append('Limit')
|
|
512
556
|
info_vals.append(self.limit_val.display_str(inline=False))
|
|
557
|
+
if self.sample_clause is not None:
|
|
558
|
+
heading_vals.append('Sample')
|
|
559
|
+
info_vals.append(self.sample_clause.display_str(inline=False))
|
|
513
560
|
assert len(heading_vals) == len(info_vals)
|
|
514
561
|
return pd.DataFrame(info_vals, index=heading_vals)
|
|
515
562
|
|
|
@@ -644,6 +691,8 @@ class DataFrame:
|
|
|
644
691
|
"""
|
|
645
692
|
if self.where_clause is not None:
|
|
646
693
|
raise excs.Error('Where clause already specified')
|
|
694
|
+
if self.sample_clause is not None:
|
|
695
|
+
raise excs.Error('where cannot be used after sample()')
|
|
647
696
|
if not isinstance(pred, exprs.Expr):
|
|
648
697
|
raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
|
|
649
698
|
if not pred.col_type.is_bool_type():
|
|
@@ -771,6 +820,8 @@ class DataFrame:
|
|
|
771
820
|
|
|
772
821
|
>>> df = t.join(d, on=(t.d1 == d.pk1) & (t.d2 == d.pk2), how='left')
|
|
773
822
|
"""
|
|
823
|
+
if self.sample_clause is not None:
|
|
824
|
+
raise excs.Error('join() cannot be used with sample()')
|
|
774
825
|
join_pred: Optional[exprs.Expr]
|
|
775
826
|
if how == 'cross':
|
|
776
827
|
if on is not None:
|
|
@@ -838,6 +889,9 @@ class DataFrame:
|
|
|
838
889
|
"""
|
|
839
890
|
if self.group_by_clause is not None:
|
|
840
891
|
raise excs.Error('Group-by already specified')
|
|
892
|
+
if self.sample_clause is not None:
|
|
893
|
+
raise excs.Error('group_by() cannot be used with sample()')
|
|
894
|
+
|
|
841
895
|
grouping_tbl: Optional[catalog.TableVersion] = None
|
|
842
896
|
group_by_clause: Optional[list[exprs.Expr]] = None
|
|
843
897
|
for item in grouping_items:
|
|
@@ -921,6 +975,8 @@ class DataFrame:
|
|
|
921
975
|
|
|
922
976
|
>>> df = book.order_by(t.price, asc=False).order_by(t.pages)
|
|
923
977
|
"""
|
|
978
|
+
if self.sample_clause is not None:
|
|
979
|
+
raise excs.Error('group_by() cannot be used with sample()')
|
|
924
980
|
for e in expr_list:
|
|
925
981
|
if not isinstance(e, exprs.Expr):
|
|
926
982
|
raise excs.Error(f'Invalid expression in order_by(): {e}')
|
|
@@ -945,10 +1001,10 @@ class DataFrame:
|
|
|
945
1001
|
Returns:
|
|
946
1002
|
A new DataFrame with the specified limited rows.
|
|
947
1003
|
"""
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
1004
|
+
if self.sample_clause is not None:
|
|
1005
|
+
raise excs.Error('limit() cannot be used with sample()')
|
|
1006
|
+
|
|
1007
|
+
limit_expr = self._convert_param_to_typed_expr(n, ts.IntType(nullable=False), True, 'limit()')
|
|
952
1008
|
return DataFrame(
|
|
953
1009
|
from_clause=self._from_clause,
|
|
954
1010
|
select_list=self.select_list,
|
|
@@ -956,7 +1012,124 @@ class DataFrame:
|
|
|
956
1012
|
group_by_clause=self.group_by_clause,
|
|
957
1013
|
grouping_tbl=self.grouping_tbl,
|
|
958
1014
|
order_by_clause=self.order_by_clause,
|
|
959
|
-
limit=
|
|
1015
|
+
limit=limit_expr,
|
|
1016
|
+
)
|
|
1017
|
+
|
|
1018
|
+
def sample(
|
|
1019
|
+
self,
|
|
1020
|
+
n: Optional[int] = None,
|
|
1021
|
+
n_per_stratum: Optional[int] = None,
|
|
1022
|
+
fraction: Optional[float] = None,
|
|
1023
|
+
seed: Optional[int] = None,
|
|
1024
|
+
stratify_by: Any = None,
|
|
1025
|
+
) -> DataFrame:
|
|
1026
|
+
"""
|
|
1027
|
+
Return a new DataFrame specifying a sample of rows from the DataFrame, considered in a shuffled order.
|
|
1028
|
+
|
|
1029
|
+
The size of the sample can be specified in three ways:
|
|
1030
|
+
|
|
1031
|
+
- `n`: the total number of rows to produce as a sample
|
|
1032
|
+
- `n_per_stratum`: the number of rows to produce per stratum as a sample
|
|
1033
|
+
- `fraction`: the fraction of available rows to produce as a sample
|
|
1034
|
+
|
|
1035
|
+
The sample can be stratified by one or more columns, which means that the sample will
|
|
1036
|
+
be selected from each stratum separately.
|
|
1037
|
+
|
|
1038
|
+
The data is shuffled before creating the sample.
|
|
1039
|
+
|
|
1040
|
+
Args:
|
|
1041
|
+
n: Total number of rows to produce as a sample.
|
|
1042
|
+
n_per_stratum: Number of rows to produce per stratum as a sample. This parameter is only valid if
|
|
1043
|
+
`stratify_by` is specified. Only one of `n` or `n_per_stratum` can be specified.
|
|
1044
|
+
fraction: Fraction of available rows to produce as a sample. This parameter is not usable with `n` or
|
|
1045
|
+
`n_per_stratum`. The fraction must be between 0.0 and 1.0.
|
|
1046
|
+
seed: Random seed for reproducible shuffling
|
|
1047
|
+
stratify_by: If specified, the sample will be stratified by these values.
|
|
1048
|
+
|
|
1049
|
+
Returns:
|
|
1050
|
+
A new DataFrame which specifies the sampled rows
|
|
1051
|
+
|
|
1052
|
+
Examples:
|
|
1053
|
+
Given the Table `person` containing the field 'age', we can create samples of the table in various ways:
|
|
1054
|
+
|
|
1055
|
+
Sample 100 rows from the above Table:
|
|
1056
|
+
|
|
1057
|
+
>>> df = person.sample(n=100)
|
|
1058
|
+
|
|
1059
|
+
Sample 10% of the rows from the above Table:
|
|
1060
|
+
|
|
1061
|
+
>>> df = person.sample(fraction=0.1)
|
|
1062
|
+
|
|
1063
|
+
Sample 10% of the rows from the above Table, stratified by the column 'age':
|
|
1064
|
+
|
|
1065
|
+
>>> df = person.sample(fraction=0.1, stratify_by=t.age)
|
|
1066
|
+
|
|
1067
|
+
Equal allocation sampling: Sample 2 rows from each age present in the above Table:
|
|
1068
|
+
|
|
1069
|
+
>>> df = person.sample(n_per_stratum=2, stratify_by=t.age)
|
|
1070
|
+
|
|
1071
|
+
Sampling is compatible with the where clause, so we can also sample from a filtered DataFrame:
|
|
1072
|
+
|
|
1073
|
+
>>> df = person.where(t.age > 30).sample(n=100)
|
|
1074
|
+
"""
|
|
1075
|
+
# Check context of usage
|
|
1076
|
+
if self.sample_clause is not None:
|
|
1077
|
+
raise excs.Error('sample() cannot be used with sample()')
|
|
1078
|
+
if self.group_by_clause is not None:
|
|
1079
|
+
raise excs.Error('sample() cannot be used with group_by()')
|
|
1080
|
+
if self.order_by_clause is not None:
|
|
1081
|
+
raise excs.Error('sample() cannot be used with order_by()')
|
|
1082
|
+
if self.limit_val is not None:
|
|
1083
|
+
raise excs.Error('sample() cannot be used with limit()')
|
|
1084
|
+
if self._has_joins():
|
|
1085
|
+
raise excs.Error('sample() cannot be used with join()')
|
|
1086
|
+
|
|
1087
|
+
# Check paramter combinations
|
|
1088
|
+
if (n is not None) + (n_per_stratum is not None) + (fraction is not None) != 1:
|
|
1089
|
+
raise excs.Error('Exactly one of `n`, `n_per_stratum`, or `fraction` must be specified.')
|
|
1090
|
+
if n_per_stratum is not None and stratify_by is None:
|
|
1091
|
+
raise excs.Error('Must specify `stratify_by` to use `n_per_stratum`')
|
|
1092
|
+
|
|
1093
|
+
# Check parameter types and values
|
|
1094
|
+
n = self.validate_constant_type_range(n, ts.IntType(nullable=False), False, 'n', (1, None))
|
|
1095
|
+
n_per_stratum = self.validate_constant_type_range(
|
|
1096
|
+
n_per_stratum, ts.IntType(nullable=False), False, 'n_per_stratum', (1, None)
|
|
1097
|
+
)
|
|
1098
|
+
fraction = self.validate_constant_type_range(
|
|
1099
|
+
fraction, ts.FloatType(nullable=False), False, 'fraction', (0.0, 1.0)
|
|
1100
|
+
)
|
|
1101
|
+
seed = self.validate_constant_type_range(seed, ts.IntType(nullable=False), False, 'seed')
|
|
1102
|
+
|
|
1103
|
+
# analyze stratify list
|
|
1104
|
+
stratify_exprs: list[exprs.Expr] = []
|
|
1105
|
+
if stratify_by is not None:
|
|
1106
|
+
if isinstance(stratify_by, exprs.Expr):
|
|
1107
|
+
stratify_by = [stratify_by]
|
|
1108
|
+
if not isinstance(stratify_by, (list, tuple)):
|
|
1109
|
+
raise excs.Error('`stratify_by` must be a list of scalar expressions')
|
|
1110
|
+
for expr in stratify_by:
|
|
1111
|
+
if expr is None or not isinstance(expr, exprs.Expr):
|
|
1112
|
+
raise excs.Error(f'Invalid expression: {expr}')
|
|
1113
|
+
if not expr.col_type.is_scalar_type():
|
|
1114
|
+
raise excs.Error(f'Invalid type: expression must be a scalar type (not {expr.col_type})')
|
|
1115
|
+
if not expr.is_bound_by(self._from_clause.tbls):
|
|
1116
|
+
raise excs.Error(
|
|
1117
|
+
f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
|
|
1118
|
+
f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
|
|
1119
|
+
)
|
|
1120
|
+
stratify_exprs.append(expr)
|
|
1121
|
+
|
|
1122
|
+
sample_clause = SampleClause(None, n, n_per_stratum, fraction, seed, stratify_exprs)
|
|
1123
|
+
|
|
1124
|
+
return DataFrame(
|
|
1125
|
+
from_clause=self._from_clause,
|
|
1126
|
+
select_list=self.select_list,
|
|
1127
|
+
where_clause=self.where_clause,
|
|
1128
|
+
group_by_clause=self.group_by_clause,
|
|
1129
|
+
grouping_tbl=self.grouping_tbl,
|
|
1130
|
+
order_by_clause=self.order_by_clause,
|
|
1131
|
+
limit=self.limit_val,
|
|
1132
|
+
sample_clause=sample_clause,
|
|
960
1133
|
)
|
|
961
1134
|
|
|
962
1135
|
def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
|
|
@@ -1055,6 +1228,7 @@ class DataFrame:
|
|
|
1055
1228
|
if self.order_by_clause is not None
|
|
1056
1229
|
else None,
|
|
1057
1230
|
'limit_val': self.limit_val.as_dict() if self.limit_val is not None else None,
|
|
1231
|
+
'sample_clause': self.sample_clause.as_dict() if self.sample_clause is not None else None,
|
|
1058
1232
|
}
|
|
1059
1233
|
return d
|
|
1060
1234
|
|
|
@@ -1081,6 +1255,7 @@ class DataFrame:
|
|
|
1081
1255
|
else None
|
|
1082
1256
|
)
|
|
1083
1257
|
limit_val = exprs.Expr.from_dict(d['limit_val']) if d['limit_val'] is not None else None
|
|
1258
|
+
sample_clause = SampleClause.from_dict(d['sample_clause']) if d['sample_clause'] is not None else None
|
|
1084
1259
|
|
|
1085
1260
|
return DataFrame(
|
|
1086
1261
|
from_clause=from_clause,
|
|
@@ -1090,6 +1265,7 @@ class DataFrame:
|
|
|
1090
1265
|
grouping_tbl=grouping_tbl,
|
|
1091
1266
|
order_by_clause=order_by_clause,
|
|
1092
1267
|
limit=limit_val,
|
|
1268
|
+
sample_clause=sample_clause,
|
|
1093
1269
|
)
|
|
1094
1270
|
|
|
1095
1271
|
def _hash_result_set(self) -> str:
|
|
@@ -25,6 +25,7 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
|
25
25
|
|
|
26
26
|
import pixeltable_pgserver
|
|
27
27
|
import sqlalchemy as sql
|
|
28
|
+
from pillow_heif import register_heif_opener # type: ignore[import-untyped]
|
|
28
29
|
from tqdm import TqdmWarning
|
|
29
30
|
|
|
30
31
|
from pixeltable import exceptions as excs
|
|
@@ -598,6 +599,7 @@ class Env:
|
|
|
598
599
|
|
|
599
600
|
def _set_up_runtime(self) -> None:
|
|
600
601
|
"""Check for and start runtime services"""
|
|
602
|
+
register_heif_opener()
|
|
601
603
|
self._start_web_server()
|
|
602
604
|
self.__register_packages()
|
|
603
605
|
|
|
@@ -9,4 +9,4 @@ from .exec_node import ExecNode
|
|
|
9
9
|
from .expr_eval import ExprEvalNode
|
|
10
10
|
from .in_memory_data_node import InMemoryDataNode
|
|
11
11
|
from .row_update_node import RowUpdateNode
|
|
12
|
-
from .sql_node import SqlAggregationNode, SqlJoinNode, SqlLookupNode, SqlNode, SqlScanNode
|
|
12
|
+
from .sql_node import SqlAggregationNode, SqlJoinNode, SqlLookupNode, SqlNode, SqlSampleNode, SqlScanNode
|
|
@@ -317,7 +317,10 @@ class JsonMapperDispatcher(Evaluator):
|
|
|
317
317
|
for _ in src
|
|
318
318
|
]
|
|
319
319
|
for nested_row, anchor_val in zip(nested_rows, src):
|
|
320
|
-
|
|
320
|
+
# It's possible that self.scope_anchor.slot_idx is None; this corresponds to the case where the
|
|
321
|
+
# mapper expression doesn't actually contain references to RELATIVE_PATH_ROOT.
|
|
322
|
+
if self.scope_anchor.slot_idx is not None:
|
|
323
|
+
nested_row[self.scope_anchor.slot_idx] = anchor_val
|
|
321
324
|
for slot_idx_, nested_slot_idx in self.external_slot_map.items():
|
|
322
325
|
nested_row[nested_slot_idx] = row[slot_idx_]
|
|
323
326
|
self.nested_exec_ctx.init_rows(nested_rows)
|