pixeltable 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +3 -0
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/globals.py +15 -6
- pixeltable/catalog/insertable_table.py +23 -8
- pixeltable/catalog/named_function.py +1 -1
- pixeltable/catalog/path_dict.py +4 -4
- pixeltable/catalog/schema_object.py +30 -18
- pixeltable/catalog/table.py +84 -99
- pixeltable/catalog/table_version.py +35 -24
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +15 -8
- pixeltable/dataframe.py +56 -56
- pixeltable/env.py +7 -5
- pixeltable/exec/__init__.py +3 -3
- pixeltable/exec/aggregation_node.py +3 -3
- pixeltable/exec/expr_eval_node.py +3 -3
- pixeltable/exec/in_memory_data_node.py +4 -4
- pixeltable/exec/sql_node.py +4 -1
- pixeltable/exprs/array_slice.py +3 -4
- pixeltable/exprs/column_ref.py +20 -4
- pixeltable/exprs/comparison.py +11 -6
- pixeltable/exprs/data_row.py +3 -0
- pixeltable/exprs/expr.py +51 -23
- pixeltable/exprs/function_call.py +8 -1
- pixeltable/exprs/inline_array.py +2 -2
- pixeltable/exprs/json_path.py +36 -20
- pixeltable/exprs/row_builder.py +4 -4
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/functions/__init__.py +1 -2
- pixeltable/functions/anthropic.py +97 -0
- pixeltable/functions/audio.py +32 -0
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/huggingface.py +4 -4
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/video.py +5 -1
- pixeltable/functions/vision.py +2 -6
- pixeltable/globals.py +57 -28
- pixeltable/io/external_store.py +4 -4
- pixeltable/io/globals.py +12 -13
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/pandas.py +27 -12
- pixeltable/io/parquet.py +14 -14
- pixeltable/iterators/document.py +7 -7
- pixeltable/plan.py +58 -29
- pixeltable/store.py +32 -31
- pixeltable/tool/create_test_db_dump.py +12 -6
- pixeltable/type_system.py +89 -97
- pixeltable/utils/pytorch.py +12 -10
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/METADATA +10 -10
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/RECORD +55 -53
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/entry_points.txt +0 -0
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.17"
|
|
3
|
+
__version_tuple__ = (0, 2, 17)
|
pixeltable/catalog/column.py
CHANGED
|
@@ -196,6 +196,9 @@ class Column:
|
|
|
196
196
|
def __str__(self) -> str:
|
|
197
197
|
return f'{self.name}: {self.col_type}'
|
|
198
198
|
|
|
199
|
+
def __repr__(self) -> str:
|
|
200
|
+
return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
|
|
201
|
+
|
|
199
202
|
def __hash__(self) -> int:
|
|
200
203
|
# TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
|
|
201
204
|
# abstraction (perhaps separating out the version-dependent properties into a different abstraction).
|
pixeltable/catalog/dir.py
CHANGED
pixeltable/catalog/globals.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
|
-
from typing import List
|
|
2
1
|
import dataclasses
|
|
2
|
+
import itertools
|
|
3
3
|
import logging
|
|
4
|
-
|
|
4
|
+
from typing import Optional
|
|
5
5
|
|
|
6
6
|
_logger = logging.getLogger('pixeltable')
|
|
7
7
|
|
|
8
8
|
# name of the position column in a component view
|
|
9
|
-
|
|
9
|
+
_POS_COLUMN_NAME = 'pos'
|
|
10
10
|
_ROWID_COLUMN_NAME = '_rowid'
|
|
11
11
|
|
|
12
|
+
# Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
|
|
13
|
+
# This will be populated lazily to avoid circular imports.
|
|
14
|
+
_PREDEF_SYMBOLS: Optional[set[str]] = None
|
|
15
|
+
|
|
12
16
|
|
|
13
17
|
@dataclasses.dataclass
|
|
14
18
|
class UpdateStatus:
|
|
@@ -16,8 +20,8 @@ class UpdateStatus:
|
|
|
16
20
|
# TODO: disambiguate what this means: # of slots computed or # of columns computed?
|
|
17
21
|
num_computed_values: int = 0
|
|
18
22
|
num_excs: int = 0
|
|
19
|
-
updated_cols:
|
|
20
|
-
cols_with_excs:
|
|
23
|
+
updated_cols: list[str] = dataclasses.field(default_factory=list)
|
|
24
|
+
cols_with_excs: list[str] = dataclasses.field(default_factory=list)
|
|
21
25
|
|
|
22
26
|
def __iadd__(self, other: 'UpdateStatus') -> 'UpdateStatus':
|
|
23
27
|
self.num_rows += other.num_rows
|
|
@@ -40,4 +44,9 @@ def is_valid_path(path: str, empty_is_valid : bool) -> bool:
|
|
|
40
44
|
return True
|
|
41
45
|
|
|
42
46
|
def is_system_column_name(name: str) -> bool:
|
|
43
|
-
|
|
47
|
+
from pixeltable.catalog import InsertableTable, View
|
|
48
|
+
|
|
49
|
+
global _PREDEF_SYMBOLS
|
|
50
|
+
if _PREDEF_SYMBOLS is None:
|
|
51
|
+
_PREDEF_SYMBOLS = set(itertools.chain(dir(InsertableTable), dir(View)))
|
|
52
|
+
return name == _POS_COLUMN_NAME or name in _PREDEF_SYMBOLS
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Optional, overload
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
import sqlalchemy.orm as orm
|
|
8
8
|
|
|
9
|
-
import pixeltable
|
|
9
|
+
import pixeltable as pxt
|
|
10
10
|
import pixeltable.type_system as ts
|
|
11
11
|
from pixeltable import exceptions as excs
|
|
12
12
|
from pixeltable.env import Env
|
|
13
|
+
|
|
13
14
|
from .catalog import Catalog
|
|
14
15
|
from .globals import UpdateStatus
|
|
15
16
|
from .table import Table
|
|
@@ -27,13 +28,13 @@ class InsertableTable(Table):
|
|
|
27
28
|
super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
|
|
28
29
|
|
|
29
30
|
@classmethod
|
|
30
|
-
def
|
|
31
|
+
def _display_name(cls) -> str:
|
|
31
32
|
return 'table'
|
|
32
33
|
|
|
33
34
|
# MODULE-LOCAL, NOT PUBLIC
|
|
34
35
|
@classmethod
|
|
35
|
-
def
|
|
36
|
-
cls, dir_id: UUID, name: str, schema:
|
|
36
|
+
def _create(
|
|
37
|
+
cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame], primary_key: List[str],
|
|
37
38
|
num_retained_versions: int, comment: str
|
|
38
39
|
) -> InsertableTable:
|
|
39
40
|
columns = cls._create_columns(schema)
|
|
@@ -50,6 +51,14 @@ class InsertableTable(Table):
|
|
|
50
51
|
with orm.Session(Env.get().engine, future=True) as session:
|
|
51
52
|
_, tbl_version = TableVersion.create(session, dir_id, name, columns, num_retained_versions, comment)
|
|
52
53
|
tbl = cls(dir_id, tbl_version)
|
|
54
|
+
# TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
|
|
55
|
+
# when the table metadata gets updated. Once we have a notion of user-defined transactions in
|
|
56
|
+
# Pixeltable, we can wrap the create/insert in a transaction to avoid this.
|
|
57
|
+
session.commit()
|
|
58
|
+
if df is not None:
|
|
59
|
+
# A DataFrame was provided, so insert its contents into the table
|
|
60
|
+
# (using the same DB session as the table creation)
|
|
61
|
+
tbl_version.insert(None, df, conn=session.connection(), fail_on_exception=True)
|
|
53
62
|
session.commit()
|
|
54
63
|
cat = Catalog.get()
|
|
55
64
|
cat.tbl_dependents[tbl._id] = []
|
|
@@ -59,6 +68,12 @@ class InsertableTable(Table):
|
|
|
59
68
|
print(f'Created table `{name}`.')
|
|
60
69
|
return tbl
|
|
61
70
|
|
|
71
|
+
def get_metadata(self) -> dict[str, Any]:
|
|
72
|
+
md = super().get_metadata()
|
|
73
|
+
md['is_view'] = False
|
|
74
|
+
md['is_snapshot'] = False
|
|
75
|
+
return md
|
|
76
|
+
|
|
62
77
|
@overload
|
|
63
78
|
def insert(
|
|
64
79
|
self, rows: Iterable[Dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
|
|
@@ -86,7 +101,7 @@ class InsertableTable(Table):
|
|
|
86
101
|
if not isinstance(row, dict):
|
|
87
102
|
raise excs.Error('rows must be a list of dictionaries')
|
|
88
103
|
self._validate_input_rows(rows)
|
|
89
|
-
result = self._tbl_version.insert(rows, print_stats=print_stats, fail_on_exception=fail_on_exception)
|
|
104
|
+
result = self._tbl_version.insert(rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception)
|
|
90
105
|
|
|
91
106
|
if result.num_excs == 0:
|
|
92
107
|
cols_with_excs_str = ''
|
|
@@ -104,7 +119,7 @@ class InsertableTable(Table):
|
|
|
104
119
|
|
|
105
120
|
def _validate_input_rows(self, rows: List[Dict[str, Any]]) -> None:
|
|
106
121
|
"""Verify that the input rows match the table schema"""
|
|
107
|
-
valid_col_names = set(self.
|
|
122
|
+
valid_col_names = set(self._schema.keys())
|
|
108
123
|
reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
|
|
109
124
|
computed_col_names = set(self._tbl_version_path.tbl_version.get_computed_col_names())
|
|
110
125
|
for row in rows:
|
|
@@ -129,7 +144,7 @@ class InsertableTable(Table):
|
|
|
129
144
|
msg = str(e)
|
|
130
145
|
raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
|
|
131
146
|
|
|
132
|
-
def delete(self, where: Optional['
|
|
147
|
+
def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
|
|
133
148
|
"""Delete rows in this table.
|
|
134
149
|
|
|
135
150
|
Args:
|
pixeltable/catalog/path_dict.py
CHANGED
|
@@ -114,20 +114,20 @@ class PathDict:
|
|
|
114
114
|
schema_obj = self._resolve_path(path)
|
|
115
115
|
if not isinstance(schema_obj, expected):
|
|
116
116
|
raise excs.Error(
|
|
117
|
-
f'{str(path)} needs to be a {expected.
|
|
117
|
+
f'{str(path)} needs to be a {expected._display_name()} but is a {type(schema_obj)._display_name()}')
|
|
118
118
|
if expected is None:
|
|
119
119
|
parent_obj = self._resolve_path(path.parent)
|
|
120
120
|
if not isinstance(parent_obj, Dir):
|
|
121
121
|
raise excs.Error(
|
|
122
|
-
f'{str(path.parent)} is a {type(parent_obj).
|
|
122
|
+
f'{str(path.parent)} is a {type(parent_obj)._display_name()}, not a {Dir._display_name()}')
|
|
123
123
|
if path.name in self.dir_contents[parent_obj._id]:
|
|
124
124
|
obj = self.dir_contents[parent_obj._id][path.name]
|
|
125
|
-
raise excs.Error(f"{type(obj).
|
|
125
|
+
raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
|
|
126
126
|
|
|
127
127
|
def get_children(self, parent: Path, child_type: Optional[Type[SchemaObject]], recursive: bool) -> List[Path]:
|
|
128
128
|
dir = self._resolve_path(parent)
|
|
129
129
|
if not isinstance(dir, Dir):
|
|
130
|
-
raise excs.Error(f'{str(parent)} is a {type(dir).
|
|
130
|
+
raise excs.Error(f'{str(parent)} is a {type(dir)._display_name()}, not a directory')
|
|
131
131
|
matches = [
|
|
132
132
|
obj for obj in self.dir_contents[dir._id].values() if child_type is None or isinstance(obj, child_type)
|
|
133
133
|
]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
-
from typing import TYPE_CHECKING, Optional
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
5
|
if TYPE_CHECKING:
|
|
@@ -13,20 +13,24 @@ class SchemaObject:
|
|
|
13
13
|
"""
|
|
14
14
|
def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
|
|
15
15
|
# make these private so they don't collide with column names (id and name are fairly common)
|
|
16
|
-
self.
|
|
17
|
-
self.
|
|
18
|
-
self.
|
|
16
|
+
self.__id = obj_id
|
|
17
|
+
self.__name = name
|
|
18
|
+
self.__dir_id = dir_id
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
@property
|
|
21
|
+
def _id(self) -> UUID:
|
|
22
|
+
return self.__id
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def _name(self) -> str:
|
|
26
|
+
return self.__name
|
|
22
27
|
|
|
23
28
|
@property
|
|
24
|
-
def
|
|
25
|
-
|
|
26
|
-
return self._name
|
|
29
|
+
def _dir_id(self) -> Optional[UUID]:
|
|
30
|
+
return self.__dir_id
|
|
27
31
|
|
|
28
32
|
@property
|
|
29
|
-
def
|
|
33
|
+
def _parent(self) -> Optional['catalog.Dir']:
|
|
30
34
|
"""Returns the parent directory of this schema object."""
|
|
31
35
|
from pixeltable import catalog
|
|
32
36
|
if self._dir_id is None:
|
|
@@ -36,19 +40,27 @@ class SchemaObject:
|
|
|
36
40
|
return dir
|
|
37
41
|
|
|
38
42
|
@property
|
|
39
|
-
def
|
|
43
|
+
def _path(self) -> str:
|
|
40
44
|
"""Returns the path to this schema object."""
|
|
41
|
-
parent = self.
|
|
42
|
-
if parent is None or parent.
|
|
45
|
+
parent = self._parent
|
|
46
|
+
if parent is None or parent._parent is None:
|
|
43
47
|
# Either this is the root directory, with empty path, or its parent is the
|
|
44
48
|
# root directory. Either way, we return just the name.
|
|
45
|
-
return self.
|
|
49
|
+
return self._name
|
|
46
50
|
else:
|
|
47
|
-
return f'{parent.
|
|
51
|
+
return f'{parent._path}.{self._name}'
|
|
52
|
+
|
|
53
|
+
def get_metadata(self) -> dict[str, Any]:
|
|
54
|
+
"""Returns metadata associated with this schema object."""
|
|
55
|
+
return {
|
|
56
|
+
'name': self._name,
|
|
57
|
+
'path': self._path,
|
|
58
|
+
'parent': self._parent._path if self._parent is not None else None,
|
|
59
|
+
}
|
|
48
60
|
|
|
49
61
|
@classmethod
|
|
50
62
|
@abstractmethod
|
|
51
|
-
def
|
|
63
|
+
def _display_name(cls) -> str:
|
|
52
64
|
"""
|
|
53
65
|
Return name displayed in error messages.
|
|
54
66
|
"""
|
|
@@ -56,5 +68,5 @@ class SchemaObject:
|
|
|
56
68
|
|
|
57
69
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
58
70
|
"""Subclasses need to override this to make the change persistent"""
|
|
59
|
-
self.
|
|
60
|
-
self.
|
|
71
|
+
self.__name = new_name
|
|
72
|
+
self.__dir_id = new_dir_id
|