pixeltable 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +370 -93
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +14 -16
- pixeltable/catalog/insertable_table.py +6 -8
- pixeltable/catalog/path.py +14 -7
- pixeltable/catalog/table.py +72 -62
- pixeltable/catalog/table_version.py +137 -107
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +10 -14
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -18
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +4 -9
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/aggregate_function.py +2 -2
- pixeltable/func/callable_function.py +3 -6
- pixeltable/func/expr_template_function.py +24 -4
- pixeltable/func/function.py +7 -9
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/query_template_function.py +87 -4
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +7 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +26 -1
- pixeltable/plan.py +2 -3
- pixeltable/share/packager.py +8 -24
- pixeltable/share/publish.py +20 -9
- pixeltable/store.py +9 -6
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/exception_handler.py +59 -0
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.12.dist-info/METADATA +436 -0
- pixeltable-0.3.12.dist-info/RECORD +183 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
pixeltable/io/label_studio.py
CHANGED
|
@@ -5,16 +5,14 @@ import os
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Any, Iterator, Literal, Optional, cast
|
|
8
|
-
from xml.etree import ElementTree
|
|
8
|
+
from xml.etree import ElementTree as ET
|
|
9
9
|
|
|
10
10
|
import label_studio_sdk # type: ignore[import-untyped]
|
|
11
11
|
import PIL.Image
|
|
12
12
|
from requests.exceptions import HTTPError
|
|
13
13
|
|
|
14
14
|
import pixeltable as pxt
|
|
15
|
-
import
|
|
16
|
-
import pixeltable.exceptions as excs
|
|
17
|
-
from pixeltable import Column, Table
|
|
15
|
+
from pixeltable import Column, Table, env, exceptions as excs
|
|
18
16
|
from pixeltable.config import Config
|
|
19
17
|
from pixeltable.exprs import ColumnRef, DataRow, Expr
|
|
20
18
|
from pixeltable.io.external_store import Project, SyncStatus
|
|
@@ -140,7 +138,8 @@ class LabelStudioProject(Project):
|
|
|
140
138
|
page += 1
|
|
141
139
|
if unknown_task_count > 0:
|
|
142
140
|
_logger.warning(
|
|
143
|
-
f'Skipped {unknown_task_count} unrecognized task(s) when syncing
|
|
141
|
+
f'Skipped {unknown_task_count} unrecognized task(s) when syncing '
|
|
142
|
+
f'Label Studio project {self.project_title!r}.'
|
|
144
143
|
)
|
|
145
144
|
|
|
146
145
|
def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) -> SyncStatus:
|
|
@@ -174,11 +173,11 @@ class LabelStudioProject(Project):
|
|
|
174
173
|
# Send media to Label Studio by HTTP post.
|
|
175
174
|
assert len(t_data_cols) == 1 # This was verified when the project was set up
|
|
176
175
|
return self.__update_tasks_by_post(t, existing_tasks, t_data_cols[0], t_rl_cols, rl_info)
|
|
177
|
-
elif self.media_import_method
|
|
176
|
+
elif self.media_import_method in ('file', 'url'):
|
|
178
177
|
# Send media to Label Studio by file reference (local file or URL).
|
|
179
178
|
return self.__update_tasks_by_files(t, existing_tasks, t_data_cols, t_rl_cols, rl_info)
|
|
180
179
|
else:
|
|
181
|
-
|
|
180
|
+
raise AssertionError()
|
|
182
181
|
|
|
183
182
|
def __update_tasks_by_post(
|
|
184
183
|
self,
|
|
@@ -227,7 +226,7 @@ class LabelStudioProject(Project):
|
|
|
227
226
|
)
|
|
228
227
|
for i in range(len(coco_annotations))
|
|
229
228
|
]
|
|
230
|
-
_logger.debug(
|
|
229
|
+
_logger.debug('`predictions`: {%s}', predictions)
|
|
231
230
|
self.project.create_predictions(predictions)
|
|
232
231
|
tasks_created += 1
|
|
233
232
|
|
|
@@ -358,7 +357,7 @@ class LabelStudioProject(Project):
|
|
|
358
357
|
def __localpath_to_lspath(cls, localpath: str) -> str:
|
|
359
358
|
# Transform the local path into Label Studio's bespoke path format.
|
|
360
359
|
relpath = Path(localpath).relative_to(Config.get().home)
|
|
361
|
-
return f'/data/local-files/?d={
|
|
360
|
+
return f'/data/local-files/?d={relpath}'
|
|
362
361
|
|
|
363
362
|
def __delete_stale_tasks(
|
|
364
363
|
self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int
|
|
@@ -405,7 +404,8 @@ class LabelStudioProject(Project):
|
|
|
405
404
|
updates = [{'_rowid': rowid, local_annotations_col.name: ann} for rowid, ann in annotations.items()]
|
|
406
405
|
if len(updates) > 0:
|
|
407
406
|
_logger.info(
|
|
408
|
-
f'Updating table
|
|
407
|
+
f'Updating table {t._name!r}, column {local_annotations_col.name!r} '
|
|
408
|
+
f'with {len(updates)} total annotations.'
|
|
409
409
|
)
|
|
410
410
|
# batch_update currently doesn't propagate from views to base tables. As a workaround, we call
|
|
411
411
|
# batch_update on the actual ancestor table that holds the annotations column.
|
|
@@ -451,7 +451,7 @@ class LabelStudioProject(Project):
|
|
|
451
451
|
Parses a Label Studio XML config, extracting the names and Pixeltable types of
|
|
452
452
|
all input variables.
|
|
453
453
|
"""
|
|
454
|
-
root:
|
|
454
|
+
root: ET.Element = ET.fromstring(xml_config)
|
|
455
455
|
if root.tag.lower() != 'view':
|
|
456
456
|
raise excs.Error('Root of Label Studio config must be a `View`')
|
|
457
457
|
config = _LabelStudioConfig(
|
|
@@ -461,7 +461,7 @@ class LabelStudioProject(Project):
|
|
|
461
461
|
return config
|
|
462
462
|
|
|
463
463
|
@classmethod
|
|
464
|
-
def __parse_data_keys_config(cls, root:
|
|
464
|
+
def __parse_data_keys_config(cls, root: ET.Element) -> dict[str, '_DataKey']:
|
|
465
465
|
"""Parses the data keys from a Label Studio XML config."""
|
|
466
466
|
config: dict[str, '_DataKey'] = {}
|
|
467
467
|
for element in root:
|
|
@@ -477,7 +477,7 @@ class LabelStudioProject(Project):
|
|
|
477
477
|
return config
|
|
478
478
|
|
|
479
479
|
@classmethod
|
|
480
|
-
def __parse_rectangle_labels_config(cls, root:
|
|
480
|
+
def __parse_rectangle_labels_config(cls, root: ET.Element) -> dict[str, '_RectangleLabel']:
|
|
481
481
|
"""Parses the RectangleLabels from a Label Studio XML config."""
|
|
482
482
|
config: dict[str, '_RectangleLabel'] = {}
|
|
483
483
|
for element in root:
|
|
@@ -534,7 +534,7 @@ class LabelStudioProject(Project):
|
|
|
534
534
|
_label_studio_client().delete_project(self.project_id)
|
|
535
535
|
env.Env.get().console_logger.info(f'Deleted Label Studio project: {title}')
|
|
536
536
|
|
|
537
|
-
def __eq__(self, other) -> bool:
|
|
537
|
+
def __eq__(self, other: object) -> bool:
|
|
538
538
|
return isinstance(other, LabelStudioProject) and self.project_id == other.project_id
|
|
539
539
|
|
|
540
540
|
def __hash__(self) -> int:
|
|
@@ -576,7 +576,7 @@ class LabelStudioProject(Project):
|
|
|
576
576
|
local_annotations_column = ANNOTATIONS_COLUMN
|
|
577
577
|
else:
|
|
578
578
|
local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
579
|
-
if local_annotations_column not in t._schema
|
|
579
|
+
if local_annotations_column not in t._schema:
|
|
580
580
|
t.add_columns({local_annotations_column: pxt.JsonType(nullable=True)})
|
|
581
581
|
|
|
582
582
|
resolved_col_mapping = cls.validate_columns(
|
|
@@ -591,9 +591,9 @@ class LabelStudioProject(Project):
|
|
|
591
591
|
if media_import_method != 'url':
|
|
592
592
|
raise excs.Error("`s3_configuration` is only valid when `media_import_method == 'url'`")
|
|
593
593
|
s3_configuration = copy.copy(s3_configuration)
|
|
594
|
-
if
|
|
594
|
+
if 'bucket' not in s3_configuration:
|
|
595
595
|
raise excs.Error('`s3_configuration` must contain a `bucket` field')
|
|
596
|
-
if
|
|
596
|
+
if 'title' not in s3_configuration:
|
|
597
597
|
s3_configuration['title'] = 'Pixeltable-S3-Import-Storage'
|
|
598
598
|
if (
|
|
599
599
|
'aws_access_key_id' not in s3_configuration
|
|
@@ -633,7 +633,8 @@ class LabelStudioProject(Project):
|
|
|
633
633
|
raise excs.Error(
|
|
634
634
|
'`media_import_method` is set to `file`, but your Label Studio server is not configured '
|
|
635
635
|
'for local file storage.\nPlease set the `LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED` '
|
|
636
|
-
'environment variable to `true` in the environment where your Label Studio server
|
|
636
|
+
'environment variable to `true` in the environment where your Label Studio server '
|
|
637
|
+
'is running.'
|
|
637
638
|
) from exc
|
|
638
639
|
raise # Handle any other exception type normally
|
|
639
640
|
|
|
@@ -663,7 +664,7 @@ class _LabelStudioConfig:
|
|
|
663
664
|
rectangle_labels: dict[str, _RectangleLabel]
|
|
664
665
|
|
|
665
666
|
def validate(self) -> None:
|
|
666
|
-
data_key_names =
|
|
667
|
+
data_key_names = {key.name for key in self.data_keys.values() if key.name is not None}
|
|
667
668
|
for name, rl in self.rectangle_labels.items():
|
|
668
669
|
if rl.to_name not in data_key_names:
|
|
669
670
|
raise excs.Error(
|
|
@@ -674,7 +675,7 @@ class _LabelStudioConfig:
|
|
|
674
675
|
@property
|
|
675
676
|
def export_columns(self) -> dict[str, pxt.ColumnType]:
|
|
676
677
|
data_key_cols = {key_id: key_info.column_type for key_id, key_info in self.data_keys.items()}
|
|
677
|
-
rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels
|
|
678
|
+
rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels}
|
|
678
679
|
return {**data_key_cols, **rl_cols}
|
|
679
680
|
|
|
680
681
|
|
pixeltable/io/pandas.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from typing import Any, Optional, Union
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
@@ -52,12 +53,12 @@ def import_pandas(
|
|
|
52
53
|
|
|
53
54
|
def import_csv(
|
|
54
55
|
tbl_name: str,
|
|
55
|
-
filepath_or_buffer,
|
|
56
|
+
filepath_or_buffer: Union[str, os.PathLike],
|
|
56
57
|
schema_overrides: Optional[dict[str, Any]] = None,
|
|
57
58
|
primary_key: Optional[Union[str, list[str]]] = None,
|
|
58
59
|
num_retained_versions: int = 10,
|
|
59
60
|
comment: str = '',
|
|
60
|
-
**kwargs,
|
|
61
|
+
**kwargs: Any,
|
|
61
62
|
) -> pxt.Table:
|
|
62
63
|
"""
|
|
63
64
|
Creates a new base table from a csv file. This is a convenience method and is equivalent
|
|
@@ -81,13 +82,13 @@ def import_csv(
|
|
|
81
82
|
|
|
82
83
|
def import_excel(
|
|
83
84
|
tbl_name: str,
|
|
84
|
-
io,
|
|
85
|
-
|
|
85
|
+
io: Union[str, os.PathLike],
|
|
86
|
+
*,
|
|
86
87
|
schema_overrides: Optional[dict[str, Any]] = None,
|
|
87
88
|
primary_key: Optional[Union[str, list[str]]] = None,
|
|
88
89
|
num_retained_versions: int = 10,
|
|
89
90
|
comment: str = '',
|
|
90
|
-
**kwargs,
|
|
91
|
+
**kwargs: Any,
|
|
91
92
|
) -> pxt.Table:
|
|
92
93
|
"""
|
|
93
94
|
Creates a new base table from an Excel (.xlsx) file. This is a convenience method and is
|
pixeltable/iterators/__init__.py
CHANGED
pixeltable/metadata/__init__.py
CHANGED
|
@@ -16,7 +16,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
19
|
-
VERSION =
|
|
19
|
+
VERSION = 34
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
@@ -24,9 +24,11 @@ def create_system_info(engine: sql.engine.Engine) -> None:
|
|
|
24
24
|
system_md = SystemInfoMd(schema_version=VERSION)
|
|
25
25
|
record = SystemInfo(md=dataclasses.asdict(system_md))
|
|
26
26
|
with orm.Session(engine, future=True) as session:
|
|
27
|
-
|
|
28
|
-
session.
|
|
29
|
-
|
|
27
|
+
# Write system metadata only once for idempotency
|
|
28
|
+
if session.query(SystemInfo).count() == 0:
|
|
29
|
+
session.add(record)
|
|
30
|
+
session.flush()
|
|
31
|
+
session.commit()
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
# conversion functions for upgrading the metadata schema from one version to the following
|
|
@@ -19,11 +19,11 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
|
|
|
19
19
|
isinstance(v, dict)
|
|
20
20
|
and '_classpath' in v
|
|
21
21
|
and v['_classpath']
|
|
22
|
-
in
|
|
22
|
+
in (
|
|
23
23
|
'pixeltable.func.callable_function.CallableFunction',
|
|
24
24
|
'pixeltable.func.aggregate_function.AggregateFunction',
|
|
25
25
|
'pixeltable.func.expr_template_function.ExprTemplateFunction',
|
|
26
|
-
|
|
26
|
+
)
|
|
27
27
|
):
|
|
28
28
|
if 'path' in v:
|
|
29
29
|
assert 'signature' not in v
|
|
@@ -50,6 +50,6 @@ def __substitute_path(path: str) -> str:
|
|
|
50
50
|
# versions, it's necessary to resolve the function symbol to get the signature. The following
|
|
51
51
|
# adjustment is necessary for function names that are stored in db artifacts of version < 25, but
|
|
52
52
|
# have changed in some version > 25.
|
|
53
|
-
if path in
|
|
53
|
+
if path in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image'):
|
|
54
54
|
return 'pixeltable.functions.huggingface.clip'
|
|
55
55
|
return path
|
|
@@ -13,7 +13,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
13
13
|
|
|
14
14
|
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
15
15
|
if k == 'path' and (
|
|
16
|
-
v in
|
|
16
|
+
v in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image')
|
|
17
17
|
):
|
|
18
18
|
return 'path', 'pixeltable.functions.huggingface.clip'
|
|
19
19
|
return None
|
|
@@ -80,7 +80,7 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
|
|
|
80
80
|
rolled_kwargs = kwargs.pop(param['name'])
|
|
81
81
|
|
|
82
82
|
if rolled_args is not None:
|
|
83
|
-
assert rolled_args['_classname'] in
|
|
83
|
+
assert rolled_args['_classname'] in ('InlineArray', 'InlineList')
|
|
84
84
|
new_args.extend(rolled_args['components'])
|
|
85
85
|
if rolled_kwargs is not None:
|
|
86
86
|
assert rolled_kwargs['_classname'] == 'InlineDict'
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import sqlalchemy as sql
|
|
2
|
+
|
|
3
|
+
from pixeltable.metadata import register_converter
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_converter(version=31)
|
|
7
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
8
|
+
# Add a column "lock_dummy: int8" to the dirs table in the store
|
|
9
|
+
# This column is the target of an UPDATE operation to synchronize directory operations
|
|
10
|
+
with engine.begin() as conn:
|
|
11
|
+
conn.execute(sql.text('ALTER TABLE dirs ADD COLUMN lock_dummy int8'))
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_converter(version=32)
|
|
10
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
11
|
+
convert_table_md(engine, table_md_updater=__update_table_md)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def __update_table_md(table_md: dict, table_id: UUID) -> None:
|
|
15
|
+
table_md['is_replica'] = False
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_converter(version=33)
|
|
10
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
11
|
+
convert_table_md(engine, table_md_updater=__update_table_md)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def __update_table_md(table_md: dict, table_id: UUID) -> None:
|
|
15
|
+
"""Set default value of 'is_pk' field in column metadata to False"""
|
|
16
|
+
for col_md in table_md['column_md'].values():
|
|
17
|
+
col_md['is_pk'] = False if col_md['is_pk'] is None else col_md['is_pk']
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
34: 'Set default value for is_pk field in column metadata to False',
|
|
6
|
+
33: 'Add is_replica field to table metadata',
|
|
7
|
+
32: 'Add the lock_dummy BIGINT column to the dirs table',
|
|
5
8
|
31: 'Add table ids to metadata structs',
|
|
6
9
|
30: 'Store default values and constant arguments as literals',
|
|
7
10
|
29: 'Add user and additional_md fields to metadata structs',
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import typing
|
|
3
3
|
import uuid
|
|
4
|
-
from typing import Any, Optional, TypeVar, Union, get_type_hints
|
|
4
|
+
from typing import Any, NamedTuple, Optional, TypeVar, Union, get_type_hints
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
|
|
@@ -84,6 +84,8 @@ class Dir(Base):
|
|
|
84
84
|
)
|
|
85
85
|
parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
86
86
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # DirMd
|
|
87
|
+
# This field is updated to synchronize database operations across multiple sessions
|
|
88
|
+
lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
|
|
87
89
|
|
|
88
90
|
|
|
89
91
|
@dataclasses.dataclass
|
|
@@ -155,6 +157,7 @@ class ViewMd:
|
|
|
155
157
|
class TableMd:
|
|
156
158
|
tbl_id: str # uuid.UUID
|
|
157
159
|
name: str
|
|
160
|
+
is_replica: bool
|
|
158
161
|
|
|
159
162
|
user: Optional[str]
|
|
160
163
|
|
|
@@ -286,3 +289,25 @@ class Function(Base):
|
|
|
286
289
|
dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
287
290
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # FunctionMd
|
|
288
291
|
binary_obj: orm.Mapped[Optional[bytes]] = orm.mapped_column(LargeBinary, nullable=True)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class FullTableMd(NamedTuple):
|
|
295
|
+
tbl_md: TableMd
|
|
296
|
+
version_md: TableVersionMd
|
|
297
|
+
schema_version_md: TableSchemaVersionMd
|
|
298
|
+
|
|
299
|
+
def as_dict(self) -> dict[str, Any]:
|
|
300
|
+
return {
|
|
301
|
+
'table_id': self.tbl_md.tbl_id,
|
|
302
|
+
'table_md': dataclasses.asdict(self.tbl_md),
|
|
303
|
+
'table_version_md': dataclasses.asdict(self.version_md),
|
|
304
|
+
'table_schema_version_md': dataclasses.asdict(self.schema_version_md),
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
@classmethod
|
|
308
|
+
def from_dict(cls, data_dict: dict[str, Any]) -> 'FullTableMd':
|
|
309
|
+
return FullTableMd(
|
|
310
|
+
tbl_md=md_from_dict(TableMd, data_dict['table_md']),
|
|
311
|
+
version_md=md_from_dict(TableVersionMd, data_dict['table_version_md']),
|
|
312
|
+
schema_version_md=md_from_dict(TableSchemaVersionMd, data_dict['table_schema_version_md']),
|
|
313
|
+
)
|
pixeltable/plan.py
CHANGED
|
@@ -768,8 +768,7 @@ class Planner:
|
|
|
768
768
|
# - select list subexprs that aren't aggregates
|
|
769
769
|
# - join clause subexprs
|
|
770
770
|
# - subexprs of Where clause conjuncts that can't be run in SQL
|
|
771
|
-
# - all grouping exprs
|
|
772
|
-
# run in Python)
|
|
771
|
+
# - all grouping exprs
|
|
773
772
|
candidates = list(
|
|
774
773
|
exprs.Expr.list_subexprs(
|
|
775
774
|
analyzer.select_list,
|
|
@@ -784,7 +783,7 @@ class Planner:
|
|
|
784
783
|
candidates.extend(
|
|
785
784
|
exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
|
|
786
785
|
)
|
|
787
|
-
if
|
|
786
|
+
if analyzer.group_by_clause is not None:
|
|
788
787
|
candidates.extend(
|
|
789
788
|
exprs.Expr.list_subexprs(analyzer.group_by_clause, filter=sql_elements.contains, traverse_matches=False)
|
|
790
789
|
)
|
pixeltable/share/packager.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import dataclasses
|
|
2
1
|
import io
|
|
3
2
|
import json
|
|
4
3
|
import logging
|
|
@@ -6,7 +5,6 @@ import tarfile
|
|
|
6
5
|
import urllib.parse
|
|
7
6
|
import urllib.request
|
|
8
7
|
import uuid
|
|
9
|
-
from datetime import datetime
|
|
10
8
|
from pathlib import Path
|
|
11
9
|
from typing import Any, Iterator, Optional
|
|
12
10
|
|
|
@@ -58,28 +56,14 @@ class TablePackager:
|
|
|
58
56
|
self.tmp_dir = Path(Env.get().create_tmp_path())
|
|
59
57
|
self.media_files = {}
|
|
60
58
|
|
|
61
|
-
#
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
'
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
# These are temporary; will replace with a better solution once the concurrency
|
|
70
|
-
# changes to catalog have been merged
|
|
71
|
-
'table_md': dataclasses.asdict(t._tbl_version.get()._create_tbl_md()),
|
|
72
|
-
'table_version_md': dataclasses.asdict(
|
|
73
|
-
t._tbl_version.get()._create_version_md(datetime.now().timestamp())
|
|
74
|
-
),
|
|
75
|
-
'table_schema_version_md': dataclasses.asdict(
|
|
76
|
-
t._tbl_version.get()._create_schema_version_md(0)
|
|
77
|
-
),
|
|
78
|
-
}
|
|
79
|
-
for t in (table, *table._bases)
|
|
80
|
-
]
|
|
81
|
-
},
|
|
82
|
-
}
|
|
59
|
+
# Load metadata
|
|
60
|
+
with Env.get().begin_xact():
|
|
61
|
+
tbl_md = catalog.Catalog.get().load_replica_md(table)
|
|
62
|
+
self.md = {
|
|
63
|
+
'pxt_version': pxt.__version__,
|
|
64
|
+
'pxt_md_version': metadata.VERSION,
|
|
65
|
+
'md': {'tables': [md.as_dict() for md in tbl_md]},
|
|
66
|
+
}
|
|
83
67
|
if additional_md is not None:
|
|
84
68
|
self.md.update(additional_md)
|
|
85
69
|
|
pixeltable/share/publish.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import sys
|
|
3
2
|
import urllib.parse
|
|
4
3
|
import urllib.request
|
|
@@ -10,22 +9,22 @@ from tqdm import tqdm
|
|
|
10
9
|
import pixeltable as pxt
|
|
11
10
|
from pixeltable import exceptions as excs
|
|
12
11
|
from pixeltable.env import Env
|
|
12
|
+
from pixeltable.metadata.schema import FullTableMd
|
|
13
13
|
from pixeltable.utils import sha256sum
|
|
14
14
|
|
|
15
15
|
from .packager import TablePackager
|
|
16
16
|
|
|
17
17
|
# These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
|
|
18
18
|
# pixeltable.com URLs are available.
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
|
|
20
|
+
PIXELTABLE_API_URL = 'https://internal-api.pixeltable.com'
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
|
|
24
24
|
packager = TablePackager(src_tbl, additional_md={'table_uri': dest_tbl_uri})
|
|
25
|
-
request_json = packager.md
|
|
26
|
-
headers_json = {'X-api-key': Env.get().pxt_api_key}
|
|
27
|
-
|
|
28
|
-
response = requests.post(_PUBLISH_URL, json=request_json, headers=headers_json)
|
|
25
|
+
request_json = packager.md | {'operation_type': 'publish_snapshot'}
|
|
26
|
+
headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
|
|
27
|
+
response = requests.post(PIXELTABLE_API_URL, json=request_json, headers=headers_json)
|
|
29
28
|
if response.status_code != 200:
|
|
30
29
|
raise excs.Error(f'Error publishing snapshot: {response.text}')
|
|
31
30
|
response_json = response.json()
|
|
@@ -47,14 +46,14 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
|
|
|
47
46
|
Env.get().console_logger.info('Finalizing snapshot ...')
|
|
48
47
|
|
|
49
48
|
finalize_request_json = {
|
|
49
|
+
'operation_type': 'finalize_snapshot',
|
|
50
50
|
'upload_id': upload_id,
|
|
51
51
|
'datafile': bundle.name,
|
|
52
52
|
'size': bundle.stat().st_size,
|
|
53
53
|
'sha256': sha256sum(bundle), # Generate our own SHA for independent verification
|
|
54
54
|
}
|
|
55
|
-
|
|
56
55
|
# TODO: Use Pydantic for validation
|
|
57
|
-
finalize_response = requests.post(
|
|
56
|
+
finalize_response = requests.post(PIXELTABLE_API_URL, json=finalize_request_json, headers=headers_json)
|
|
58
57
|
if finalize_response.status_code != 200:
|
|
59
58
|
raise excs.Error(f'Error finalizing snapshot: {finalize_response.text}')
|
|
60
59
|
finalize_response_json = finalize_response.json()
|
|
@@ -66,6 +65,18 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
|
|
|
66
65
|
return confirmed_tbl_uri
|
|
67
66
|
|
|
68
67
|
|
|
68
|
+
def clone_snapshot(dest_tbl_uri: str) -> list[FullTableMd]:
|
|
69
|
+
headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
|
|
70
|
+
clone_request_json = {'operation_type': 'clone_snapshot', 'table_uri': dest_tbl_uri}
|
|
71
|
+
response = requests.post(PIXELTABLE_API_URL, json=clone_request_json, headers=headers_json)
|
|
72
|
+
if response.status_code != 200:
|
|
73
|
+
raise excs.Error(f'Error cloning snapshot: {response.text}')
|
|
74
|
+
response_json = response.json()
|
|
75
|
+
if not isinstance(response_json, dict) or 'table_uri' not in response_json:
|
|
76
|
+
raise excs.Error(f'Unexpected response from server.\n{response_json}')
|
|
77
|
+
return [FullTableMd.from_dict(t) for t in response_json['md']['tables']]
|
|
78
|
+
|
|
79
|
+
|
|
69
80
|
def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
|
|
70
81
|
from pixeltable.utils.s3 import get_client
|
|
71
82
|
|
pixeltable/store.py
CHANGED
|
@@ -16,6 +16,7 @@ from pixeltable import catalog, exceptions as excs, exprs
|
|
|
16
16
|
from pixeltable.env import Env
|
|
17
17
|
from pixeltable.exec import ExecNode
|
|
18
18
|
from pixeltable.metadata import schema
|
|
19
|
+
from pixeltable.utils.exception_handler import run_cleanup
|
|
19
20
|
from pixeltable.utils.media_store import MediaStore
|
|
20
21
|
from pixeltable.utils.sql import log_explain, log_stmt
|
|
21
22
|
|
|
@@ -99,9 +100,9 @@ class StoreBase:
|
|
|
99
100
|
|
|
100
101
|
# v_min/v_max indices: speeds up base table scans needed to propagate a base table insert or delete
|
|
101
102
|
idx_name = f'vmin_idx_{self.tbl_version.id.hex}'
|
|
102
|
-
idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using=
|
|
103
|
+
idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using=Env.get().dbms.version_index_type))
|
|
103
104
|
idx_name = f'vmax_idx_{self.tbl_version.id.hex}'
|
|
104
|
-
idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=
|
|
105
|
+
idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=Env.get().dbms.version_index_type))
|
|
105
106
|
|
|
106
107
|
self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
|
|
107
108
|
|
|
@@ -232,7 +233,6 @@ class StoreBase:
|
|
|
232
233
|
assert col.tbl.id == self.tbl_version.id
|
|
233
234
|
num_excs = 0
|
|
234
235
|
num_rows = 0
|
|
235
|
-
|
|
236
236
|
# create temp table to store output of exec_plan, with the same primary key as the store table
|
|
237
237
|
tmp_name = f'temp_{self._storage_name()}'
|
|
238
238
|
tmp_pk_cols = [sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns()]
|
|
@@ -301,10 +301,13 @@ class StoreBase:
|
|
|
301
301
|
)
|
|
302
302
|
log_explain(_logger, update_stmt, conn)
|
|
303
303
|
conn.execute(update_stmt)
|
|
304
|
-
|
|
305
304
|
finally:
|
|
306
|
-
|
|
307
|
-
|
|
305
|
+
|
|
306
|
+
def remove_tmp_tbl() -> None:
|
|
307
|
+
self.sa_md.remove(tmp_tbl)
|
|
308
|
+
tmp_tbl.drop(bind=conn)
|
|
309
|
+
|
|
310
|
+
run_cleanup(remove_tmp_tbl, raise_error=True)
|
|
308
311
|
return num_excs
|
|
309
312
|
|
|
310
313
|
def insert_rows(
|
pixeltable/type_system.py
CHANGED
|
@@ -512,7 +512,7 @@ class StringType(ColumnType):
|
|
|
512
512
|
def __init__(self, nullable: bool = False):
|
|
513
513
|
super().__init__(self.Type.STRING, nullable=nullable)
|
|
514
514
|
|
|
515
|
-
def has_supertype(self):
|
|
515
|
+
def has_supertype(self) -> bool:
|
|
516
516
|
return not self.nullable
|
|
517
517
|
|
|
518
518
|
@classmethod
|
|
@@ -602,7 +602,7 @@ class TimestampType(ColumnType):
|
|
|
602
602
|
def __init__(self, nullable: bool = False):
|
|
603
603
|
super().__init__(self.Type.TIMESTAMP, nullable=nullable)
|
|
604
604
|
|
|
605
|
-
def has_supertype(self):
|
|
605
|
+
def has_supertype(self) -> bool:
|
|
606
606
|
return not self.nullable
|
|
607
607
|
|
|
608
608
|
@classmethod
|
|
@@ -768,7 +768,7 @@ class JsonType(ColumnType):
|
|
|
768
768
|
a_type = a.get('type')
|
|
769
769
|
b_type = b.get('type')
|
|
770
770
|
|
|
771
|
-
if a_type in
|
|
771
|
+
if a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type:
|
|
772
772
|
# a and b both have the same type designation, but are not identical. This can happen if
|
|
773
773
|
# (for example) they have validators or other attributes that differ. In this case, we
|
|
774
774
|
# generalize to {'type': t}, where t is their shared type, with no other qualifications.
|
|
@@ -1170,6 +1170,20 @@ class DocumentType(ColumnType):
|
|
|
1170
1170
|
XML = 3
|
|
1171
1171
|
TXT = 4
|
|
1172
1172
|
|
|
1173
|
+
@classmethod
|
|
1174
|
+
def from_extension(cls, ext: str) -> Optional['DocumentType.DocumentFormat']:
|
|
1175
|
+
if ext in ('.htm', '.html'):
|
|
1176
|
+
return cls.HTML
|
|
1177
|
+
if ext == '.md':
|
|
1178
|
+
return cls.MD
|
|
1179
|
+
if ext == '.pdf':
|
|
1180
|
+
return cls.PDF
|
|
1181
|
+
if ext == '.xml':
|
|
1182
|
+
return cls.XML
|
|
1183
|
+
if ext == '.txt':
|
|
1184
|
+
return cls.TXT
|
|
1185
|
+
return None
|
|
1186
|
+
|
|
1173
1187
|
def __init__(self, nullable: bool = False, doc_formats: Optional[str] = None):
|
|
1174
1188
|
super().__init__(self.Type.DOCUMENT, nullable=nullable)
|
|
1175
1189
|
self.doc_formats = doc_formats
|
|
@@ -1203,9 +1217,7 @@ class DocumentType(ColumnType):
|
|
|
1203
1217
|
assert isinstance(val, str)
|
|
1204
1218
|
from pixeltable.utils.documents import get_document_handle
|
|
1205
1219
|
|
|
1206
|
-
|
|
1207
|
-
if dh is None:
|
|
1208
|
-
raise excs.Error(f'Not a recognized document format: {val}')
|
|
1220
|
+
_ = get_document_handle(val)
|
|
1209
1221
|
|
|
1210
1222
|
|
|
1211
1223
|
T = typing.TypeVar('T')
|
|
@@ -1240,7 +1252,7 @@ class _PxtType:
|
|
|
1240
1252
|
`ColumnType`.
|
|
1241
1253
|
"""
|
|
1242
1254
|
|
|
1243
|
-
def __init__(self):
|
|
1255
|
+
def __init__(self) -> None:
|
|
1244
1256
|
raise TypeError(f'Type `{type(self)}` cannot be instantiated.')
|
|
1245
1257
|
|
|
1246
1258
|
@classmethod
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import TextIO
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
def map_level(verbosity: int) -> int:
|
|
@@ -22,10 +23,10 @@ def map_level(verbosity: int) -> int:
|
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class ConsoleOutputHandler(logging.StreamHandler):
|
|
25
|
-
def __init__(self, stream):
|
|
26
|
+
def __init__(self, stream: TextIO):
|
|
26
27
|
super().__init__(stream)
|
|
27
28
|
|
|
28
|
-
def emit(self, record):
|
|
29
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
29
30
|
if record.msg.endswith('\n'):
|
|
30
31
|
self.stream.write(record.msg)
|
|
31
32
|
else:
|