pixeltable 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +21 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +195 -158
- pixeltable/catalog/table_version.py +187 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +90 -90
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/column_ref.py +9 -9
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +4 -4
- pixeltable/exprs/expr.py +20 -5
- pixeltable/exprs/function_call.py +98 -58
- pixeltable/exprs/json_mapper.py +25 -8
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/object_ref.py +16 -5
- pixeltable/exprs/row_builder.py +15 -15
- pixeltable/exprs/rowid_ref.py +21 -7
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/function.py +38 -6
- pixeltable/func/query_template_function.py +3 -6
- pixeltable/func/tools.py +26 -26
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/anthropic.py +9 -3
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +26 -23
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/packager.py +12 -9
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/filecache.py +2 -1
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/METADATA +1 -1
- pixeltable-0.3.7.dist-info/RECORD +174 -0
- pixeltable-0.3.5.dist-info/RECORD +0 -172
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/dir.py
CHANGED
|
@@ -18,16 +18,29 @@ class Dir(SchemaObject):
|
|
|
18
18
|
def __init__(self, id: UUID, parent_id: UUID, name: str):
|
|
19
19
|
super().__init__(id, name, parent_id)
|
|
20
20
|
|
|
21
|
+
@classmethod
|
|
22
|
+
def _create(cls, parent_id: UUID, name: str) -> Dir:
|
|
23
|
+
session = Env.get().session
|
|
24
|
+
assert session is not None
|
|
25
|
+
dir_md = schema.DirMd(name=name, user=None, additional_md={})
|
|
26
|
+
dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
|
|
27
|
+
session.add(dir_record)
|
|
28
|
+
session.flush()
|
|
29
|
+
assert dir_record.id is not None
|
|
30
|
+
assert isinstance(dir_record.id, UUID)
|
|
31
|
+
dir = cls(dir_record.id, parent_id, name)
|
|
32
|
+
return dir
|
|
33
|
+
|
|
21
34
|
@classmethod
|
|
22
35
|
def _display_name(cls) -> str:
|
|
23
36
|
return 'directory'
|
|
24
37
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
return
|
|
38
|
+
def _path(self) -> str:
|
|
39
|
+
"""Returns the path to this schema object."""
|
|
40
|
+
if self._dir_id is None:
|
|
41
|
+
# we're the root dir
|
|
42
|
+
return ''
|
|
43
|
+
return super()._path()
|
|
31
44
|
|
|
32
45
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
33
46
|
super()._move(new_name, new_dir_id)
|
|
@@ -4,18 +4,16 @@ import logging
|
|
|
4
4
|
from typing import Any, Iterable, Literal, Optional, overload
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
|
-
import sqlalchemy.orm as orm
|
|
8
|
-
|
|
9
7
|
import pixeltable as pxt
|
|
10
8
|
import pixeltable.type_system as ts
|
|
11
9
|
from pixeltable import exceptions as excs
|
|
12
10
|
from pixeltable.env import Env
|
|
13
11
|
from pixeltable.utils.filecache import FileCache
|
|
14
12
|
|
|
15
|
-
from .catalog import Catalog
|
|
16
13
|
from .globals import MediaValidation, UpdateStatus
|
|
17
14
|
from .table import Table
|
|
18
15
|
from .table_version import TableVersion
|
|
16
|
+
from .table_version_handle import TableVersionHandle
|
|
19
17
|
from .table_version_path import TableVersionPath
|
|
20
18
|
|
|
21
19
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -24,15 +22,14 @@ _logger = logging.getLogger('pixeltable')
|
|
|
24
22
|
class InsertableTable(Table):
|
|
25
23
|
"""A `Table` that allows inserting and deleting rows."""
|
|
26
24
|
|
|
27
|
-
def __init__(self, dir_id: UUID, tbl_version:
|
|
25
|
+
def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
|
|
28
26
|
tbl_version_path = TableVersionPath(tbl_version)
|
|
29
|
-
super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
|
|
27
|
+
super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
|
|
30
28
|
|
|
31
29
|
@classmethod
|
|
32
30
|
def _display_name(cls) -> str:
|
|
33
31
|
return 'table'
|
|
34
32
|
|
|
35
|
-
# MODULE-LOCAL, NOT PUBLIC
|
|
36
33
|
@classmethod
|
|
37
34
|
def _create(
|
|
38
35
|
cls,
|
|
@@ -56,33 +53,29 @@ class InsertableTable(Table):
|
|
|
56
53
|
raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
|
|
57
54
|
col.is_pk = True
|
|
58
55
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
_logger.info(f'Created table `{name}`, id={tbl_version.id}')
|
|
84
|
-
Env.get().console_logger.info(f'Created table `{name}`.')
|
|
85
|
-
return tbl
|
|
56
|
+
_, tbl_version = TableVersion.create(
|
|
57
|
+
dir_id,
|
|
58
|
+
name,
|
|
59
|
+
columns,
|
|
60
|
+
num_retained_versions=num_retained_versions,
|
|
61
|
+
comment=comment,
|
|
62
|
+
media_validation=media_validation,
|
|
63
|
+
)
|
|
64
|
+
tbl = cls(dir_id, TableVersionHandle.create(tbl_version))
|
|
65
|
+
# TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
|
|
66
|
+
# when the table metadata gets updated. Once we have a notion of user-defined transactions in
|
|
67
|
+
# Pixeltable, we can wrap the create/insert in a transaction to avoid this.
|
|
68
|
+
session = Env.get().session
|
|
69
|
+
session.commit()
|
|
70
|
+
if df is not None:
|
|
71
|
+
# A DataFrame was provided, so insert its contents into the table
|
|
72
|
+
# (using the same DB session as the table creation)
|
|
73
|
+
tbl_version.insert(None, df, fail_on_exception=True)
|
|
74
|
+
session.commit()
|
|
75
|
+
|
|
76
|
+
_logger.info(f'Created table `{name}`, id={tbl_version.id}')
|
|
77
|
+
Env.get().console_logger.info(f'Created table `{name}`.')
|
|
78
|
+
return tbl
|
|
86
79
|
|
|
87
80
|
def get_metadata(self) -> dict[str, Any]:
|
|
88
81
|
md = super().get_metadata()
|
|
@@ -131,7 +124,10 @@ class InsertableTable(Table):
|
|
|
131
124
|
if not isinstance(row, dict):
|
|
132
125
|
raise excs.Error('rows must be a list of dictionaries')
|
|
133
126
|
self._validate_input_rows(rows)
|
|
134
|
-
|
|
127
|
+
with Env.get().begin_xact():
|
|
128
|
+
status = self._tbl_version.get().insert(
|
|
129
|
+
rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
130
|
+
)
|
|
135
131
|
|
|
136
132
|
if status.num_excs == 0:
|
|
137
133
|
cols_with_excs_str = ''
|
|
@@ -152,8 +148,8 @@ class InsertableTable(Table):
|
|
|
152
148
|
def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
|
|
153
149
|
"""Verify that the input rows match the table schema"""
|
|
154
150
|
valid_col_names = set(self._schema.keys())
|
|
155
|
-
reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
|
|
156
|
-
computed_col_names = set(self._tbl_version_path.tbl_version.get_computed_col_names())
|
|
151
|
+
reqd_col_names = set(self._tbl_version_path.tbl_version.get().get_required_col_names())
|
|
152
|
+
computed_col_names = set(self._tbl_version_path.tbl_version.get().get_computed_col_names())
|
|
157
153
|
for row in rows:
|
|
158
154
|
assert isinstance(row, dict)
|
|
159
155
|
col_names = set(row.keys())
|
|
@@ -191,4 +187,5 @@ class InsertableTable(Table):
|
|
|
191
187
|
|
|
192
188
|
>>> tbl.delete(tbl.a > 5)
|
|
193
189
|
"""
|
|
194
|
-
|
|
190
|
+
with Env.get().begin_xact():
|
|
191
|
+
return self._tbl_version.get().delete(where=where)
|
|
@@ -27,10 +27,6 @@ class NamedFunction(SchemaObject):
|
|
|
27
27
|
def _display_name(cls) -> str:
|
|
28
28
|
return 'function'
|
|
29
29
|
|
|
30
|
-
@property
|
|
31
|
-
def _has_dependents(self) -> bool:
|
|
32
|
-
return False
|
|
33
|
-
|
|
34
30
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
35
31
|
super()._move(new_name, new_dir_id)
|
|
36
32
|
with Env.get().engine.begin() as conn:
|
|
@@ -2,6 +2,8 @@ from abc import abstractmethod
|
|
|
2
2
|
from typing import TYPE_CHECKING, Any, Optional
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
|
+
import pixeltable.env as env
|
|
6
|
+
|
|
5
7
|
if TYPE_CHECKING:
|
|
6
8
|
from pixeltable import catalog
|
|
7
9
|
|
|
@@ -12,53 +14,42 @@ class SchemaObject:
|
|
|
12
14
|
Each object has an id, a name and a parent directory.
|
|
13
15
|
"""
|
|
14
16
|
|
|
17
|
+
_id: UUID
|
|
18
|
+
_name: str
|
|
19
|
+
_dir_id: Optional[UUID]
|
|
20
|
+
|
|
15
21
|
def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
|
|
16
22
|
# make these private so they don't collide with column names (id and name are fairly common)
|
|
17
|
-
self.
|
|
18
|
-
self.
|
|
19
|
-
self.
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def _id(self) -> UUID:
|
|
23
|
-
return self.__id
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def _name(self) -> str:
|
|
27
|
-
return self.__name
|
|
23
|
+
self._id = obj_id
|
|
24
|
+
self._name = name
|
|
25
|
+
self._dir_id = dir_id
|
|
28
26
|
|
|
29
|
-
@property
|
|
30
|
-
def _dir_id(self) -> Optional[UUID]:
|
|
31
|
-
return self.__dir_id
|
|
32
|
-
|
|
33
|
-
@property
|
|
34
27
|
def _parent(self) -> Optional['catalog.Dir']:
|
|
35
28
|
"""Returns the parent directory of this schema object."""
|
|
36
|
-
from
|
|
29
|
+
from .catalog import Catalog
|
|
37
30
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
return dir
|
|
31
|
+
with env.Env.get().begin_xact():
|
|
32
|
+
if self._dir_id is None:
|
|
33
|
+
return None
|
|
34
|
+
return Catalog.get().get_dir(self._dir_id)
|
|
43
35
|
|
|
44
|
-
@property
|
|
45
36
|
def _path(self) -> str:
|
|
46
37
|
"""Returns the path to this schema object."""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
38
|
+
with env.Env.get().begin_xact():
|
|
39
|
+
from .catalog import Catalog
|
|
40
|
+
|
|
41
|
+
cat = Catalog.get()
|
|
42
|
+
dir_path = cat.get_dir_path(self._dir_id)
|
|
43
|
+
if dir_path == '':
|
|
44
|
+
# Either this is the root directory, with empty path, or its parent is the
|
|
45
|
+
# root directory. Either way, we return just the name.
|
|
46
|
+
return self._name
|
|
47
|
+
else:
|
|
48
|
+
return f'{dir_path}.{self._name}'
|
|
54
49
|
|
|
55
50
|
def get_metadata(self) -> dict[str, Any]:
|
|
56
51
|
"""Returns metadata associated with this schema object."""
|
|
57
|
-
return {
|
|
58
|
-
'name': self._name,
|
|
59
|
-
'path': self._path,
|
|
60
|
-
'parent': self._parent._path if self._parent is not None else None,
|
|
61
|
-
}
|
|
52
|
+
return {'name': self._name, 'path': self._path()}
|
|
62
53
|
|
|
63
54
|
@classmethod
|
|
64
55
|
@abstractmethod
|
|
@@ -68,12 +59,7 @@ class SchemaObject:
|
|
|
68
59
|
"""
|
|
69
60
|
pass
|
|
70
61
|
|
|
71
|
-
@property
|
|
72
|
-
@abstractmethod
|
|
73
|
-
def _has_dependents(self) -> bool:
|
|
74
|
-
"""Returns True if this object has dependents (e.g., children, views)"""
|
|
75
|
-
|
|
76
62
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
77
63
|
"""Subclasses need to override this to make the change persistent"""
|
|
78
|
-
self.
|
|
79
|
-
self.
|
|
64
|
+
self._name = new_name
|
|
65
|
+
self._dir_id = new_dir_id
|