pixeltable 0.2.28__tar.gz → 0.2.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- {pixeltable-0.2.28 → pixeltable-0.2.29}/PKG-INFO +3 -2
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/__version__.py +2 -2
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/__init__.py +1 -1
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/dir.py +6 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/globals.py +13 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/named_function.py +4 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/path_dict.py +37 -11
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/schema_object.py +6 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/table.py +22 -5
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/table_version.py +22 -8
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/dataframe.py +201 -3
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/env.py +9 -3
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/expr_eval_node.py +1 -1
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/sql_node.py +2 -2
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/function_call.py +134 -24
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/inline_expr.py +22 -2
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/row_builder.py +1 -1
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/similarity_expr.py +9 -2
- pixeltable-0.2.29/pixeltable/func/aggregate_function.py +286 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/func/callable_function.py +49 -13
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/func/expr_template_function.py +55 -24
- pixeltable-0.2.29/pixeltable/func/function.py +370 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/func/function_registry.py +2 -1
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/func/query_template_function.py +11 -6
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/func/signature.py +64 -7
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/func/udf.py +57 -35
- pixeltable-0.2.29/pixeltable/functions/globals.py +164 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/json.py +3 -8
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/ollama.py +4 -4
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/timestamp.py +1 -1
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/video.py +2 -8
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/vision.py +1 -1
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/globals.py +218 -59
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/index/embedding_index.py +44 -24
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/__init__.py +1 -1
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_16.py +2 -1
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_17.py +2 -1
- pixeltable-0.2.29/pixeltable/metadata/converters/convert_23.py +35 -0
- pixeltable-0.2.29/pixeltable/metadata/converters/convert_24.py +47 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/util.py +4 -2
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/notes.py +2 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/schema.py +1 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/tool/create_test_db_dump.py +11 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/tool/doc_plugins/griffe.py +4 -3
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/type_system.py +180 -45
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pyproject.toml +4 -2
- pixeltable-0.2.28/pixeltable/func/aggregate_function.py +0 -206
- pixeltable-0.2.28/pixeltable/func/function.py +0 -209
- pixeltable-0.2.28/pixeltable/functions/globals.py +0 -144
- {pixeltable-0.2.28 → pixeltable-0.2.29}/LICENSE +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/README.md +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/catalog.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/column.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/insertable_table.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/table_version_path.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/catalog/view.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/aggregation_node.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/cache_prefetch_node.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/component_iteration_node.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/data_row_batch.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/exec_node.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/in_memory_data_node.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exec/row_update_node.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/arithmetic_expr.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/array_slice.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/column_property_ref.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/column_ref.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/comparison.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/compound_predicate.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/data_row.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/expr.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/expr_dict.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/expr_set.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/globals.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/in_predicate.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/is_null.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/json_mapper.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/json_path.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/literal.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/method_ref.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/object_ref.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/rowid_ref.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/sql_element_cache.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/type_cast.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/exprs/variable.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/ext/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/ext/functions/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/ext/functions/whisperx.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/ext/functions/yolox.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/func/globals.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/anthropic.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/audio.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/fireworks.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/gemini.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/huggingface.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/image.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/llama_cpp.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/mistralai.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/openai.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/replicate.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/string.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/together.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/util.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/functions/whisper.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/index/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/index/base.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/index/btree.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/io/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/io/external_store.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/io/fiftyone.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/io/globals.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/io/hf_datasets.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/io/label_studio.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/io/pandas.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/io/parquet.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/iterators/base.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/iterators/document.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/iterators/image.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/iterators/string.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/iterators/video.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_12.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_13.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_14.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_15.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_18.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_19.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_20.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_21.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/metadata/converters/convert_22.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/plan.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/py.typed +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/store.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/tool/create_test_video.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/tool/doc_plugins/mkdocstrings.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/tool/embed_udf.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/tool/mypy_plugin.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/arrow.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/coco.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/code.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/description_helper.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/documents.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/filecache.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/formatter.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/http_server.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/pytorch.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/s3.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.2.28 → pixeltable-0.2.29}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.29
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
Home-page: https://pixeltable.com/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -29,13 +29,14 @@ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
|
29
29
|
Requires-Dist: ftfy (>=6.2.0,<7.0.0)
|
|
30
30
|
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
31
31
|
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
32
|
+
Requires-Dist: jsonschema (>=4.1.0)
|
|
32
33
|
Requires-Dist: lxml (>=5.0)
|
|
33
34
|
Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
34
35
|
Requires-Dist: numpy (>=1.25,<2.0)
|
|
35
36
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
36
37
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
37
38
|
Requires-Dist: pillow (>=9.3.0)
|
|
38
|
-
Requires-Dist: pixeltable-pgserver (==0.2.
|
|
39
|
+
Requires-Dist: pixeltable-pgserver (==0.2.9)
|
|
39
40
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
40
41
|
Requires-Dist: psycopg[binary] (>=3.1.18)
|
|
41
42
|
Requires-Dist: puremagic (>=1.20)
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.29"
|
|
3
|
+
__version_tuple__ = (0, 2, 29)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .catalog import Catalog
|
|
2
2
|
from .column import Column
|
|
3
3
|
from .dir import Dir
|
|
4
|
-
from .globals import UpdateStatus, is_valid_identifier, is_valid_path, MediaValidation
|
|
4
|
+
from .globals import UpdateStatus, is_valid_identifier, is_valid_path, MediaValidation, IfExistsParam
|
|
5
5
|
from .insertable_table import InsertableTable
|
|
6
6
|
from .named_function import NamedFunction
|
|
7
7
|
from .path import Path
|
|
@@ -21,6 +21,12 @@ class Dir(SchemaObject):
|
|
|
21
21
|
def _display_name(cls) -> str:
|
|
22
22
|
return 'directory'
|
|
23
23
|
|
|
24
|
+
@property
|
|
25
|
+
def _has_dependents(self) -> bool:
|
|
26
|
+
""" Returns True if this directory has any children. """
|
|
27
|
+
from pixeltable.catalog import Catalog, Path
|
|
28
|
+
return len(Catalog.get().paths.get_children(Path(self._path), child_type=None, recursive=False)) > 0
|
|
29
|
+
|
|
24
30
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
25
31
|
super()._move(new_name, new_dir_id)
|
|
26
32
|
with Env.get().engine.begin() as conn:
|
|
@@ -51,6 +51,19 @@ class MediaValidation(enum.Enum):
|
|
|
51
51
|
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
|
|
52
52
|
raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
|
|
53
53
|
|
|
54
|
+
class IfExistsParam(enum.Enum):
|
|
55
|
+
ERROR = 0
|
|
56
|
+
IGNORE = 1
|
|
57
|
+
REPLACE = 2
|
|
58
|
+
REPLACE_FORCE = 3
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def validated(cls, param_val: str, param_name: str) -> IfExistsParam:
|
|
62
|
+
try:
|
|
63
|
+
return cls[param_val.upper()]
|
|
64
|
+
except KeyError:
|
|
65
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
|
|
66
|
+
raise excs.Error(f'{param_name} must be one of: [{val_strs}]')
|
|
54
67
|
|
|
55
68
|
def is_valid_identifier(name: str) -> bool:
|
|
56
69
|
return name.isidentifier() and not name.startswith('_')
|
|
@@ -25,6 +25,10 @@ class NamedFunction(SchemaObject):
|
|
|
25
25
|
def _display_name(cls) -> str:
|
|
26
26
|
return 'function'
|
|
27
27
|
|
|
28
|
+
@property
|
|
29
|
+
def _has_dependents(self) -> bool:
|
|
30
|
+
return False
|
|
31
|
+
|
|
28
32
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
29
33
|
super()._move(new_name, new_dir_id)
|
|
30
34
|
with Env.get().engine.begin() as conn:
|
|
@@ -51,11 +51,41 @@ class PathDict:
|
|
|
51
51
|
record_dir(dir)
|
|
52
52
|
|
|
53
53
|
def _resolve_path(self, path: Path) -> SchemaObject:
|
|
54
|
+
"""Resolve the path to a SchemaObject.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
path: path to resolve
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
SchemaObject at the path.
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
Error if path is invalid or does not exist.
|
|
64
|
+
"""
|
|
65
|
+
schema_obj = self.get_object(path)
|
|
66
|
+
if schema_obj is None:
|
|
67
|
+
raise excs.Error(f"No such path: {str(path)}")
|
|
68
|
+
return schema_obj
|
|
69
|
+
|
|
70
|
+
def get_object(self, path: Path) -> Optional[SchemaObject]:
|
|
71
|
+
"""Get the object at the given path, if any.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
path: path to object
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
SchemaObject at the path if it exists, None otherwise.
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
Error if path is invalid.
|
|
81
|
+
"""
|
|
54
82
|
if path.is_root:
|
|
55
83
|
return self.root_dir
|
|
56
84
|
dir = self.root_dir
|
|
57
85
|
for i, component in enumerate(path.components):
|
|
58
86
|
if component not in self.dir_contents[dir._id]:
|
|
87
|
+
if i == len(path.components) - 1:
|
|
88
|
+
return None
|
|
59
89
|
raise excs.Error(f'No such path: {".".join(path.components[:i + 1])}')
|
|
60
90
|
schema_obj = self.dir_contents[dir._id][component]
|
|
61
91
|
if i < len(path.components) - 1:
|
|
@@ -112,19 +142,15 @@ class PathDict:
|
|
|
112
142
|
Error if path is invalid or object at path has wrong type
|
|
113
143
|
"""
|
|
114
144
|
# check for existence
|
|
145
|
+
obj = self.get_object(path)
|
|
115
146
|
if expected is not None:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
f'{str(path)} needs to be a {expected._display_name()} but is a {type(schema_obj)._display_name()}')
|
|
120
|
-
if expected is None:
|
|
121
|
-
parent_obj = self._resolve_path(path.parent)
|
|
122
|
-
if not isinstance(parent_obj, Dir):
|
|
147
|
+
if obj is None:
|
|
148
|
+
raise excs.Error(f"No such path: {str(path)}")
|
|
149
|
+
if not isinstance(obj, expected):
|
|
123
150
|
raise excs.Error(
|
|
124
|
-
f'{str(path
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
|
|
151
|
+
f'{str(path)} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}')
|
|
152
|
+
if expected is None and obj is not None:
|
|
153
|
+
raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
|
|
128
154
|
|
|
129
155
|
def get_children(self, parent: Path, child_type: Optional[type[SchemaObject]], recursive: bool) -> list[Path]:
|
|
130
156
|
dir = self._resolve_path(parent)
|
|
@@ -66,7 +66,13 @@ class SchemaObject:
|
|
|
66
66
|
"""
|
|
67
67
|
pass
|
|
68
68
|
|
|
69
|
+
@property
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def _has_dependents(self) -> bool:
|
|
72
|
+
"""Returns True if this object has dependents (e.g., children, views)"""
|
|
73
|
+
|
|
69
74
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
70
75
|
"""Subclasses need to override this to make the change persistent"""
|
|
71
76
|
self.__name = new_name
|
|
72
77
|
self.__dir_id = new_dir_id
|
|
78
|
+
|
|
@@ -63,6 +63,11 @@ class Table(SchemaObject):
|
|
|
63
63
|
return self._queries[name]
|
|
64
64
|
raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
|
|
65
65
|
|
|
66
|
+
@property
|
|
67
|
+
def _has_dependents(self) -> bool:
|
|
68
|
+
"""Returns True if this table has any dependent views, or snapshots."""
|
|
69
|
+
return len(self._get_views(recursive=False)) > 0
|
|
70
|
+
|
|
66
71
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
67
72
|
self._check_is_dropped()
|
|
68
73
|
super()._move(new_name, new_dir_id)
|
|
@@ -184,26 +189,38 @@ class Table(SchemaObject):
|
|
|
184
189
|
return self.__query_scope
|
|
185
190
|
|
|
186
191
|
def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
|
|
187
|
-
"""
|
|
192
|
+
""" Select columns or expressions from this table.
|
|
193
|
+
|
|
194
|
+
See [`DataFrame.select`][pixeltable.DataFrame.select] for more details.
|
|
195
|
+
"""
|
|
188
196
|
return self._df().select(*items, **named_items)
|
|
189
197
|
|
|
190
198
|
def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
191
|
-
"""
|
|
199
|
+
"""Filter rows from this table based on the expression.
|
|
200
|
+
|
|
201
|
+
See [`DataFrame.where`][pixeltable.DataFrame.where] for more details.
|
|
202
|
+
"""
|
|
192
203
|
return self._df().where(pred)
|
|
193
204
|
|
|
194
205
|
def join(
|
|
195
206
|
self, other: 'Table', *, on: Optional['exprs.Expr'] = None,
|
|
196
207
|
how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
|
|
197
208
|
) -> 'pxt.DataFrame':
|
|
198
|
-
"""
|
|
209
|
+
"""Join this table with another table."""
|
|
199
210
|
return self._df().join(other, on=on, how=how)
|
|
200
211
|
|
|
201
212
|
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
202
|
-
"""
|
|
213
|
+
"""Order the rows of this table based on the expression.
|
|
214
|
+
|
|
215
|
+
See [`DataFrame.order_by`][pixeltable.DataFrame.order_by] for more details.
|
|
216
|
+
"""
|
|
203
217
|
return self._df().order_by(*items, asc=asc)
|
|
204
218
|
|
|
205
219
|
def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
206
|
-
"""
|
|
220
|
+
"""Group the rows of this table based on the expression.
|
|
221
|
+
|
|
222
|
+
See [`DataFrame.group_by`][pixeltable.DataFrame.group_by] for more details.
|
|
223
|
+
"""
|
|
207
224
|
return self._df().group_by(*items)
|
|
208
225
|
|
|
209
226
|
def limit(self, n: int) -> 'pxt.DataFrame':
|
|
@@ -9,6 +9,7 @@ import uuid
|
|
|
9
9
|
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional
|
|
10
10
|
from uuid import UUID
|
|
11
11
|
|
|
12
|
+
import jsonschema.exceptions
|
|
12
13
|
import sqlalchemy as sql
|
|
13
14
|
import sqlalchemy.orm as orm
|
|
14
15
|
|
|
@@ -173,6 +174,14 @@ class TableVersion:
|
|
|
173
174
|
def __hash__(self) -> int:
|
|
174
175
|
return hash(self.id)
|
|
175
176
|
|
|
177
|
+
def _get_column(self, tbl_id: UUID, col_id: int) -> Column:
|
|
178
|
+
if self.id == tbl_id:
|
|
179
|
+
return self.cols_by_id[col_id]
|
|
180
|
+
else:
|
|
181
|
+
if self.base is None:
|
|
182
|
+
raise excs.Error(f'Unknown table id: {tbl_id}')
|
|
183
|
+
return self.base._get_column(tbl_id, col_id)
|
|
184
|
+
|
|
176
185
|
def create_snapshot_copy(self) -> TableVersion:
|
|
177
186
|
"""Create a snapshot copy of this TableVersion"""
|
|
178
187
|
assert not self.is_snapshot
|
|
@@ -335,7 +344,7 @@ class TableVersion:
|
|
|
335
344
|
# instantiate index object
|
|
336
345
|
cls_name = md.class_fqn.rsplit('.', 1)[-1]
|
|
337
346
|
cls = getattr(index_module, cls_name)
|
|
338
|
-
idx_col = self.
|
|
347
|
+
idx_col = self._get_column(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
|
|
339
348
|
idx = cls.from_dict(idx_col, md.init_args)
|
|
340
349
|
|
|
341
350
|
# fix up the sa column type of the index value and undo columns
|
|
@@ -457,7 +466,8 @@ class TableVersion:
|
|
|
457
466
|
idx_cls = type(idx)
|
|
458
467
|
idx_md = schema.IndexMd(
|
|
459
468
|
id=idx_id, name=idx_name,
|
|
460
|
-
indexed_col_id=col.id,
|
|
469
|
+
indexed_col_id=col.id, indexed_col_tbl_id=str(col.tbl.id),
|
|
470
|
+
index_val_col_id=val_col.id, index_val_undo_col_id=undo_col.id,
|
|
461
471
|
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
462
472
|
class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__, init_args=idx.as_dict())
|
|
463
473
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
@@ -485,7 +495,10 @@ class TableVersion:
|
|
|
485
495
|
idx_md.schema_version_drop = self.schema_version
|
|
486
496
|
assert idx_md.name in self.idxs_by_name
|
|
487
497
|
idx_info = self.idxs_by_name[idx_md.name]
|
|
498
|
+
# remove this index entry from the active indexes (in memory)
|
|
499
|
+
# and the index metadata (in persistent table metadata)
|
|
488
500
|
del self.idxs_by_name[idx_md.name]
|
|
501
|
+
del self.idx_md[idx_id]
|
|
489
502
|
|
|
490
503
|
with Env.get().engine.begin() as conn:
|
|
491
504
|
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
@@ -819,7 +832,7 @@ class TableVersion:
|
|
|
819
832
|
if error_if_not_exists:
|
|
820
833
|
raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
|
|
821
834
|
if insert_if_not_exists:
|
|
822
|
-
insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
|
|
835
|
+
insert_status = self.insert(unmatched_rows, None, conn=conn, print_stats=False, fail_on_exception=False)
|
|
823
836
|
result += insert_status
|
|
824
837
|
return result
|
|
825
838
|
|
|
@@ -846,10 +859,11 @@ class TableVersion:
|
|
|
846
859
|
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
847
860
|
|
|
848
861
|
# make sure that the value is compatible with the column type
|
|
862
|
+
value_expr: exprs.Expr
|
|
849
863
|
try:
|
|
850
864
|
# check if this is a literal
|
|
851
|
-
value_expr
|
|
852
|
-
except TypeError:
|
|
865
|
+
value_expr = exprs.Literal(val, col_type=col.col_type)
|
|
866
|
+
except (TypeError, jsonschema.exceptions.ValidationError):
|
|
853
867
|
if not allow_exprs:
|
|
854
868
|
raise excs.Error(
|
|
855
869
|
f'Column {col_name}: value {val!r} is not a valid literal for this column '
|
|
@@ -858,11 +872,11 @@ class TableVersion:
|
|
|
858
872
|
value_expr = exprs.Expr.from_object(val)
|
|
859
873
|
if value_expr is None:
|
|
860
874
|
raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
|
|
861
|
-
if not col.col_type.
|
|
862
|
-
raise excs.Error(
|
|
875
|
+
if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
|
|
876
|
+
raise excs.Error(
|
|
863
877
|
f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
|
|
864
878
|
f'{col_name} ({col.col_type})'
|
|
865
|
-
)
|
|
879
|
+
)
|
|
866
880
|
update_targets[col] = value_expr
|
|
867
881
|
|
|
868
882
|
return update_targets
|
|
@@ -300,6 +300,20 @@ class DataFrame:
|
|
|
300
300
|
return self.limit(n).collect()
|
|
301
301
|
|
|
302
302
|
def head(self, n: int = 10) -> DataFrameResultSet:
|
|
303
|
+
"""Return the first n rows of the DataFrame, in insertion order of the underlying Table.
|
|
304
|
+
|
|
305
|
+
head() is not supported for joins.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
n: Number of rows to select. Default is 10.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
A DataFrameResultSet with the first n rows of the DataFrame.
|
|
312
|
+
|
|
313
|
+
Raises:
|
|
314
|
+
Error: If the DataFrame is the result of a join or
|
|
315
|
+
if the DataFrame has an order_by clause.
|
|
316
|
+
"""
|
|
303
317
|
if self.order_by_clause is not None:
|
|
304
318
|
raise excs.Error(f'head() cannot be used with order_by()')
|
|
305
319
|
if self._has_joins():
|
|
@@ -309,6 +323,20 @@ class DataFrame:
|
|
|
309
323
|
return self.order_by(*order_by_clause, asc=True).limit(n).collect()
|
|
310
324
|
|
|
311
325
|
def tail(self, n: int = 10) -> DataFrameResultSet:
|
|
326
|
+
"""Return the last n rows of the DataFrame, in insertion order of the underlying Table.
|
|
327
|
+
|
|
328
|
+
tail() is not supported for joins.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
n: Number of rows to select. Default is 10.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
A DataFrameResultSet with the last n rows of the DataFrame.
|
|
335
|
+
|
|
336
|
+
Raises:
|
|
337
|
+
Error: If the DataFrame is the result of a join or
|
|
338
|
+
if the DataFrame has an order_by clause.
|
|
339
|
+
"""
|
|
312
340
|
if self.order_by_clause is not None:
|
|
313
341
|
raise excs.Error(f'tail() cannot be used with order_by()')
|
|
314
342
|
if self._has_joins():
|
|
@@ -394,6 +422,11 @@ class DataFrame:
|
|
|
394
422
|
return DataFrameResultSet(list(self._output_row_iterator(conn)), self.schema)
|
|
395
423
|
|
|
396
424
|
def count(self) -> int:
|
|
425
|
+
"""Return the number of rows in the DataFrame.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
The number of rows in the DataFrame.
|
|
429
|
+
"""
|
|
397
430
|
from pixeltable.plan import Planner
|
|
398
431
|
|
|
399
432
|
stmt = Planner.create_count_stmt(self._first_tbl, self.where_clause)
|
|
@@ -463,6 +496,36 @@ class DataFrame:
|
|
|
463
496
|
return self._descriptors().to_html()
|
|
464
497
|
|
|
465
498
|
def select(self, *items: Any, **named_items: Any) -> DataFrame:
|
|
499
|
+
""" Select columns or expressions from the DataFrame.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
items: expressions to be selected
|
|
503
|
+
named_items: named expressions to be selected
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
A new DataFrame with the specified select list.
|
|
507
|
+
|
|
508
|
+
Raises:
|
|
509
|
+
Error: If the select list is already specified,
|
|
510
|
+
or if any of the specified expressions are invalid,
|
|
511
|
+
or refer to tables not in the DataFrame.
|
|
512
|
+
|
|
513
|
+
Examples:
|
|
514
|
+
Given the DataFrame person from a table t with all its columns and rows:
|
|
515
|
+
|
|
516
|
+
>>> person = t.select()
|
|
517
|
+
|
|
518
|
+
Select the columns 'name' and 'age' (referenced in table t) from the DataFrame person:
|
|
519
|
+
|
|
520
|
+
>>> df = person.select(t.name, t.age)
|
|
521
|
+
|
|
522
|
+
Select the columns 'name' (referenced in table t) from the DataFrame person,
|
|
523
|
+
and a named column 'is_adult' from the expression `age >= 18` where 'age' is
|
|
524
|
+
another column in table t:
|
|
525
|
+
|
|
526
|
+
>>> df = person.select(t.name, is_adult=(t.age >= 18))
|
|
527
|
+
|
|
528
|
+
"""
|
|
466
529
|
if self.select_list is not None:
|
|
467
530
|
raise excs.Error(f'Select list already specified')
|
|
468
531
|
for name, _ in named_items.items():
|
|
@@ -512,6 +575,29 @@ class DataFrame:
|
|
|
512
575
|
)
|
|
513
576
|
|
|
514
577
|
def where(self, pred: exprs.Expr) -> DataFrame:
|
|
578
|
+
"""Filter rows based on a predicate.
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
pred: the predicate to filter rows
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
A new DataFrame with the specified predicates replacing the where-clause.
|
|
585
|
+
|
|
586
|
+
Raises:
|
|
587
|
+
Error: If the predicate is not a Pixeltable expression,
|
|
588
|
+
or if it does not return a boolean value,
|
|
589
|
+
or refers to tables not in the DataFrame.
|
|
590
|
+
|
|
591
|
+
Examples:
|
|
592
|
+
Given the DataFrame person from a table t with all its columns and rows:
|
|
593
|
+
|
|
594
|
+
>>> person = t.select()
|
|
595
|
+
|
|
596
|
+
Filter the above DataFrame person to only include rows where the column 'age'
|
|
597
|
+
(referenced in table t) is greater than 30:
|
|
598
|
+
|
|
599
|
+
>>> df = person.where(t.age > 30)
|
|
600
|
+
"""
|
|
515
601
|
if not isinstance(pred, exprs.Expr):
|
|
516
602
|
raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
|
|
517
603
|
if not pred.col_type.is_bool_type():
|
|
@@ -662,11 +748,45 @@ class DataFrame:
|
|
|
662
748
|
)
|
|
663
749
|
|
|
664
750
|
def group_by(self, *grouping_items: Any) -> DataFrame:
|
|
665
|
-
"""
|
|
666
|
-
|
|
751
|
+
""" Add a group-by clause to this DataFrame.
|
|
752
|
+
|
|
667
753
|
Variants:
|
|
668
754
|
- group_by(<base table>): group a component view by their respective base table rows
|
|
669
755
|
- group_by(<expr>, ...): group by the given expressions
|
|
756
|
+
|
|
757
|
+
Note, that grouping will be applied to the rows and take effect when
|
|
758
|
+
used with an aggregation function like sum(), count() etc.
|
|
759
|
+
|
|
760
|
+
Args:
|
|
761
|
+
grouping_items: expressions to group by
|
|
762
|
+
|
|
763
|
+
Returns:
|
|
764
|
+
A new DataFrame with the specified group-by clause.
|
|
765
|
+
|
|
766
|
+
Raises:
|
|
767
|
+
Error: If the group-by clause is already specified,
|
|
768
|
+
or if the specified expression is invalid,
|
|
769
|
+
or refer to tables not in the DataFrame,
|
|
770
|
+
or if the DataFrame is a result of a join.
|
|
771
|
+
|
|
772
|
+
Examples:
|
|
773
|
+
Given the DataFrame book from a table t with all its columns and rows:
|
|
774
|
+
|
|
775
|
+
>>> book = t.select()
|
|
776
|
+
|
|
777
|
+
Group the above DataFrame book by the 'genre' column (referenced in table t):
|
|
778
|
+
|
|
779
|
+
>>> df = book.group_by(t.genre)
|
|
780
|
+
|
|
781
|
+
Use the above DataFrame df grouped by genre to count the number of
|
|
782
|
+
books for each 'genre':
|
|
783
|
+
|
|
784
|
+
>>> df = book.group_by(t.genre).select(t.genre, count=count(t.genre)).show()
|
|
785
|
+
|
|
786
|
+
Use the above DataFrame df grouped by genre to the total price of
|
|
787
|
+
books for each 'genre':
|
|
788
|
+
|
|
789
|
+
>>> df = book.group_by(t.genre).select(t.genre, total=sum(t.price)).show()
|
|
670
790
|
"""
|
|
671
791
|
if self.group_by_clause is not None:
|
|
672
792
|
raise excs.Error(f'Group-by already specified')
|
|
@@ -699,6 +819,35 @@ class DataFrame:
|
|
|
699
819
|
)
|
|
700
820
|
|
|
701
821
|
def order_by(self, *expr_list: exprs.Expr, asc: bool = True) -> DataFrame:
|
|
822
|
+
""" Add an order-by clause to this DataFrame.
|
|
823
|
+
|
|
824
|
+
Args:
|
|
825
|
+
expr_list: expressions to order by
|
|
826
|
+
asc: whether to order in ascending order (True) or descending order (False).
|
|
827
|
+
Default is True.
|
|
828
|
+
|
|
829
|
+
Returns:
|
|
830
|
+
A new DataFrame with the specified order-by clause.
|
|
831
|
+
|
|
832
|
+
Raises:
|
|
833
|
+
Error: If the order-by clause is already specified,
|
|
834
|
+
or if the specified expression is invalid,
|
|
835
|
+
or refer to tables not in the DataFrame.
|
|
836
|
+
|
|
837
|
+
Examples:
|
|
838
|
+
Given the DataFrame book from a table t with all its columns and rows:
|
|
839
|
+
|
|
840
|
+
>>> book = t.select()
|
|
841
|
+
|
|
842
|
+
Order the above DataFrame book by two columns (price, pages) in descending order:
|
|
843
|
+
|
|
844
|
+
>>> df = book.order_by(t.price, t.pages, asc=False)
|
|
845
|
+
|
|
846
|
+
Order the above DataFrame book by price in descending order, but order the pages
|
|
847
|
+
in ascending order:
|
|
848
|
+
|
|
849
|
+
>>> df = book.order_by(t.price, asc=False).order_by(t.pages)
|
|
850
|
+
"""
|
|
702
851
|
for e in expr_list:
|
|
703
852
|
if not isinstance(e, exprs.Expr):
|
|
704
853
|
raise excs.Error(f'Invalid expression in order_by(): {e}')
|
|
@@ -715,6 +864,14 @@ class DataFrame:
|
|
|
715
864
|
)
|
|
716
865
|
|
|
717
866
|
def limit(self, n: int) -> DataFrame:
|
|
867
|
+
""" Limit the number of rows in the DataFrame.
|
|
868
|
+
|
|
869
|
+
Args:
|
|
870
|
+
n: Number of rows to select.
|
|
871
|
+
|
|
872
|
+
Returns:
|
|
873
|
+
A new DataFrame with the specified limited rows.
|
|
874
|
+
"""
|
|
718
875
|
# TODO: allow n to be a Variable that can be substituted in bind()
|
|
719
876
|
assert n is not None and isinstance(n, int)
|
|
720
877
|
return DataFrame(
|
|
@@ -728,17 +885,58 @@ class DataFrame:
|
|
|
728
885
|
)
|
|
729
886
|
|
|
730
887
|
def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
|
|
888
|
+
""" Update rows in the underlying table of the DataFrame.
|
|
889
|
+
|
|
890
|
+
Update rows in the table with the specified value_spec.
|
|
891
|
+
|
|
892
|
+
Args:
|
|
893
|
+
value_spec: a dict of column names to update and the new value to update it to.
|
|
894
|
+
cascade: if True, also update all computed columns that transitively depend
|
|
895
|
+
on the updated columns, including within views. Default is True.
|
|
896
|
+
|
|
897
|
+
Returns:
|
|
898
|
+
UpdateStatus: the status of the update operation.
|
|
899
|
+
|
|
900
|
+
Example:
|
|
901
|
+
Given the DataFrame person from a table t with all its columns and rows:
|
|
902
|
+
|
|
903
|
+
>>> person = t.select()
|
|
904
|
+
|
|
905
|
+
Via the above DataFrame person, update the column 'city' to 'Oakland' and 'state' to 'CA' in the table t:
|
|
906
|
+
|
|
907
|
+
>>> df = person.update({'city': 'Oakland', 'state': 'CA'})
|
|
908
|
+
|
|
909
|
+
Via the above DataFrame person, update the column 'age' to 30 for any rows where 'year' is 2014 in the table t:
|
|
910
|
+
|
|
911
|
+
>>> df = person.where(t.year == 2014).update({'age': 30})
|
|
912
|
+
"""
|
|
731
913
|
self._validate_mutable('update')
|
|
732
914
|
return self._first_tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
|
|
733
915
|
|
|
734
916
|
def delete(self) -> UpdateStatus:
|
|
917
|
+
""" Delete rows form the underlying table of the DataFrame.
|
|
918
|
+
|
|
919
|
+
The delete operation is only allowed for DataFrames on base tables.
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
UpdateStatus: the status of the delete operation.
|
|
923
|
+
|
|
924
|
+
Example:
|
|
925
|
+
Given the DataFrame person from a table t with all its columns and rows:
|
|
926
|
+
|
|
927
|
+
>>> person = t.select()
|
|
928
|
+
|
|
929
|
+
Via the above DataFrame person, delete all rows from the table t where the column 'age' is less than 18:
|
|
930
|
+
|
|
931
|
+
>>> df = person.where(t.age < 18).delete()
|
|
932
|
+
"""
|
|
735
933
|
self._validate_mutable('delete')
|
|
736
934
|
if not self._first_tbl.is_insertable():
|
|
737
935
|
raise excs.Error(f'Cannot delete from view')
|
|
738
936
|
return self._first_tbl.tbl_version.delete(where=self.where_clause)
|
|
739
937
|
|
|
740
938
|
def _validate_mutable(self, op_name: str) -> None:
|
|
741
|
-
"""Tests whether this
|
|
939
|
+
"""Tests whether this DataFrame can be mutated (such as by an update operation)."""
|
|
742
940
|
if self.group_by_clause is not None or self.grouping_tbl is not None:
|
|
743
941
|
raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
|
|
744
942
|
if self.order_by_clause is not None:
|
|
@@ -8,6 +8,7 @@ import importlib.util
|
|
|
8
8
|
import inspect
|
|
9
9
|
import logging
|
|
10
10
|
import os
|
|
11
|
+
import platform
|
|
11
12
|
import shutil
|
|
12
13
|
import subprocess
|
|
13
14
|
import sys
|
|
@@ -311,8 +312,12 @@ class Env:
|
|
|
311
312
|
self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
|
|
312
313
|
self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
|
|
313
314
|
|
|
314
|
-
#
|
|
315
|
-
|
|
315
|
+
# cleanup_mode=None will leave the postgres process running after Python exits
|
|
316
|
+
# cleanup_mode='stop' will terminate the postgres process when Python exits
|
|
317
|
+
# On Windows, we need cleanup_mode='stop' because child processes are killed automatically when the parent
|
|
318
|
+
# process (such as Terminal or VSCode) exits, potentially leaving it in an unusable state.
|
|
319
|
+
cleanup_mode = 'stop' if platform.system() == 'Windows' else None
|
|
320
|
+
self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=cleanup_mode)
|
|
316
321
|
self._db_url = self._db_server.get_uri(database=self._db_name, driver='psycopg')
|
|
317
322
|
|
|
318
323
|
tz_name = self.config.get_string_value('time_zone')
|
|
@@ -357,7 +362,7 @@ class Env:
|
|
|
357
362
|
self.db_url,
|
|
358
363
|
echo=echo,
|
|
359
364
|
future=True,
|
|
360
|
-
isolation_level='
|
|
365
|
+
isolation_level='REPEATABLE READ',
|
|
361
366
|
connect_args=connect_args,
|
|
362
367
|
)
|
|
363
368
|
self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
|
|
@@ -506,6 +511,7 @@ class Env:
|
|
|
506
511
|
self.__register_package('openai')
|
|
507
512
|
self.__register_package('openpyxl')
|
|
508
513
|
self.__register_package('pyarrow')
|
|
514
|
+
self.__register_package('pydantic')
|
|
509
515
|
self.__register_package('replicate')
|
|
510
516
|
self.__register_package('sentencepiece')
|
|
511
517
|
self.__register_package('sentence_transformers', library_name='sentence-transformers')
|
|
@@ -208,7 +208,7 @@ class ExprEvalNode(ExecNode):
|
|
|
208
208
|
}
|
|
209
209
|
start_ts = time.perf_counter()
|
|
210
210
|
assert isinstance(fn_call.fn, CallableFunction)
|
|
211
|
-
result_batch = fn_call.fn.exec_batch(
|
|
211
|
+
result_batch = fn_call.fn.exec_batch(call_args, call_kwargs)
|
|
212
212
|
self.ctx.profile.eval_time[fn_call.slot_idx] += time.perf_counter() - start_ts
|
|
213
213
|
self.ctx.profile.eval_count[fn_call.slot_idx] += num_ext_batch_rows
|
|
214
214
|
|
|
@@ -262,7 +262,7 @@ class SqlNode(ExecNode):
|
|
|
262
262
|
explain_str = '\n'.join([str(row) for row in explain_result])
|
|
263
263
|
_logger.debug(f'SqlScanNode explain:\n{explain_str}')
|
|
264
264
|
except Exception as e:
|
|
265
|
-
_logger.warning(f'EXPLAIN failed')
|
|
265
|
+
_logger.warning(f'EXPLAIN failed with error: {e}')
|
|
266
266
|
|
|
267
267
|
def __iter__(self) -> Iterator[DataRowBatch]:
|
|
268
268
|
# run the query; do this here rather than in _open(), exceptions are only expected during iteration
|
|
@@ -468,4 +468,4 @@ class SqlJoinNode(SqlNode):
|
|
|
468
468
|
stmt = stmt.join(
|
|
469
469
|
self.input_ctes[i + 1], onclause=on_clause, isouter=is_outer,
|
|
470
470
|
full=join_clause == plan.JoinType.FULL_OUTER)
|
|
471
|
-
return stmt
|
|
471
|
+
return stmt
|