pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/column.py +41 -29
- pixeltable/catalog/globals.py +18 -0
- pixeltable/catalog/insertable_table.py +30 -10
- pixeltable/catalog/table.py +198 -86
- pixeltable/catalog/table_version.py +47 -53
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +17 -18
- pixeltable/dataframe.py +27 -36
- pixeltable/env.py +7 -0
- pixeltable/exec/__init__.py +0 -1
- pixeltable/exec/aggregation_node.py +6 -3
- pixeltable/exec/cache_prefetch_node.py +189 -43
- pixeltable/exec/data_row_batch.py +5 -22
- pixeltable/exec/exec_context.py +2 -2
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval_node.py +23 -16
- pixeltable/exec/in_memory_data_node.py +6 -3
- pixeltable/exec/sql_node.py +24 -25
- pixeltable/exprs/arithmetic_expr.py +12 -5
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +97 -14
- pixeltable/exprs/comparison.py +10 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +27 -18
- pixeltable/exprs/expr.py +53 -52
- pixeltable/exprs/expr_set.py +5 -0
- pixeltable/exprs/function_call.py +32 -16
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +6 -11
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +12 -11
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +7 -5
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/expr_template_function.py +6 -5
- pixeltable/func/function.py +11 -10
- pixeltable/func/udf.py +6 -11
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/globals.py +5 -7
- pixeltable/functions/huggingface.py +155 -45
- pixeltable/functions/llama_cpp.py +107 -0
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +9 -0
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +5 -2
- pixeltable/globals.py +67 -26
- pixeltable/index/btree.py +16 -3
- pixeltable/index/embedding_index.py +4 -4
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +96 -2
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +1 -1
- pixeltable/iterators/video.py +120 -63
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +45 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/plan.py +17 -15
- pixeltable/py.typed +0 -0
- pixeltable/store.py +7 -2
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +28 -5
- pixeltable/type_system.py +100 -36
- pixeltable/utils/coco.py +5 -5
- pixeltable/utils/documents.py +15 -1
- pixeltable/utils/formatter.py +12 -13
- pixeltable/utils/s3.py +6 -3
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
- pixeltable-0.2.23.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable-0.2.21.dist-info/RECORD +0 -148
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -3,8 +3,8 @@ from .dataframe import DataFrame
|
|
|
3
3
|
from .exceptions import Error
|
|
4
4
|
from .exprs import RELATIVE_PATH_ROOT
|
|
5
5
|
from .func import Aggregator, Function, expr_udf, uda, udf
|
|
6
|
-
from .globals import (array, configure_logging, create_dir, create_table, create_view, drop_dir,
|
|
7
|
-
init, list_dirs, list_functions, list_tables, move)
|
|
6
|
+
from .globals import (array, configure_logging, create_dir, create_snapshot, create_table, create_view, drop_dir,
|
|
7
|
+
drop_table, get_table, init, list_dirs, list_functions, list_tables, move)
|
|
8
8
|
from .type_system import (Array, ArrayType, Audio, AudioType, Bool, BoolType, ColumnType, Document, DocumentType, Float,
|
|
9
9
|
FloatType, Image, ImageType, Int, IntType, Json, JsonType, Required, String, StringType,
|
|
10
10
|
Timestamp, TimestampType, Video, VideoType)
|
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.23"
|
|
3
|
+
__version_tuple__ = (0, 2, 23)
|
pixeltable/catalog/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .catalog import Catalog
|
|
2
2
|
from .column import Column
|
|
3
3
|
from .dir import Dir
|
|
4
|
-
from .globals import UpdateStatus, is_valid_identifier, is_valid_path
|
|
4
|
+
from .globals import UpdateStatus, is_valid_identifier, is_valid_path, MediaValidation
|
|
5
5
|
from .insertable_table import InsertableTable
|
|
6
6
|
from .named_function import NamedFunction
|
|
7
7
|
from .path import Path
|
pixeltable/catalog/column.py
CHANGED
|
@@ -1,31 +1,49 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import TYPE_CHECKING, Any,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.type_system as ts
|
|
10
10
|
from pixeltable import exprs
|
|
11
|
-
|
|
12
|
-
from .globals import is_valid_identifier
|
|
11
|
+
from .globals import is_valid_identifier, MediaValidation
|
|
13
12
|
|
|
14
13
|
if TYPE_CHECKING:
|
|
15
14
|
from .table_version import TableVersion
|
|
16
15
|
|
|
17
16
|
_logger = logging.getLogger('pixeltable')
|
|
18
17
|
|
|
18
|
+
|
|
19
19
|
class Column:
|
|
20
20
|
"""Representation of a column in the schema of a Table/DataFrame.
|
|
21
21
|
|
|
22
22
|
A Column contains all the metadata necessary for executing queries and updates against a particular version of a
|
|
23
23
|
table/view.
|
|
24
24
|
"""
|
|
25
|
+
name: str
|
|
26
|
+
id: Optional[int]
|
|
27
|
+
col_type: ts.ColumnType
|
|
28
|
+
stored: bool
|
|
29
|
+
is_pk: bool
|
|
30
|
+
_media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
|
|
31
|
+
schema_version_add: Optional[int]
|
|
32
|
+
schema_version_drop: Optional[int]
|
|
33
|
+
_records_errors: Optional[bool]
|
|
34
|
+
sa_col: Optional[sql.schema.Column]
|
|
35
|
+
sa_col_type: Optional[sql.sqltypes.TypeEngine]
|
|
36
|
+
sa_errormsg_col: Optional[sql.schema.Column]
|
|
37
|
+
sa_errortype_col: Optional[sql.schema.Column]
|
|
38
|
+
_value_expr: Optional[exprs.Expr]
|
|
39
|
+
value_expr_dict: Optional[dict[str, Any]]
|
|
40
|
+
dependent_cols: set[Column]
|
|
41
|
+
tbl: Optional[TableVersion]
|
|
42
|
+
|
|
25
43
|
def __init__(
|
|
26
44
|
self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
|
|
27
|
-
computed_with: Optional[
|
|
28
|
-
is_pk: bool = False, stored: bool = True,
|
|
45
|
+
computed_with: Optional[exprs.Expr] = None,
|
|
46
|
+
is_pk: bool = False, stored: bool = True, media_validation: Optional[MediaValidation] = None,
|
|
29
47
|
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
30
48
|
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
31
49
|
records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
|
|
@@ -35,7 +53,7 @@ class Column:
|
|
|
35
53
|
Args:
|
|
36
54
|
name: column name; None for system columns (eg, index columns)
|
|
37
55
|
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
38
|
-
computed_with:
|
|
56
|
+
computed_with: an Expr that computes the column value
|
|
39
57
|
is_pk: if True, this column is part of the primary key
|
|
40
58
|
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
41
59
|
col_id: column ID (only used internally)
|
|
@@ -45,11 +63,6 @@ class Column:
|
|
|
45
63
|
col_type is None
|
|
46
64
|
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
47
65
|
|
|
48
|
-
``computed_with`` is a Callable:
|
|
49
|
-
- the callable's parameter names must correspond to existing columns in the table for which this Column
|
|
50
|
-
is being used
|
|
51
|
-
- ``col_type`` needs to be set to the callable's return type
|
|
52
|
-
|
|
53
66
|
``stored`` (only valid for computed image columns):
|
|
54
67
|
- if True: the column is present in the stored table
|
|
55
68
|
- if False: the column is not present in the stored table and recomputed during a query
|
|
@@ -62,21 +75,13 @@ class Column:
|
|
|
62
75
|
raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
|
|
63
76
|
|
|
64
77
|
self._value_expr: Optional[exprs.Expr] = None
|
|
65
|
-
self.compute_func: Optional[Callable] = None
|
|
66
78
|
self.value_expr_dict = value_expr_dict
|
|
67
79
|
if computed_with is not None:
|
|
68
80
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
69
81
|
if value_expr is None:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
|
|
74
|
-
f'but it is a {type(computed_with)}')
|
|
75
|
-
if col_type is None:
|
|
76
|
-
raise excs.Error(f'Column {name}: col_type is required if computed_with is a Callable')
|
|
77
|
-
# we need to turn the computed_with function into an Expr, but this requires resolving
|
|
78
|
-
# column name references and for that we need to wait until we're assigned to a Table
|
|
79
|
-
self.compute_func = computed_with
|
|
82
|
+
raise excs.Error(
|
|
83
|
+
f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
|
|
84
|
+
f'but it is a {type(computed_with)}')
|
|
80
85
|
else:
|
|
81
86
|
self._value_expr = value_expr.copy()
|
|
82
87
|
self.col_type = self._value_expr.col_type
|
|
@@ -86,24 +91,24 @@ class Column:
|
|
|
86
91
|
assert self.col_type is not None
|
|
87
92
|
|
|
88
93
|
self.stored = stored
|
|
89
|
-
self.dependent_cols
|
|
94
|
+
self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
90
95
|
self.id = col_id
|
|
91
96
|
self.is_pk = is_pk
|
|
97
|
+
self._media_validation = media_validation
|
|
92
98
|
self.schema_version_add = schema_version_add
|
|
93
99
|
self.schema_version_drop = schema_version_drop
|
|
94
100
|
|
|
95
101
|
self._records_errors = records_errors
|
|
96
102
|
|
|
97
103
|
# column in the stored table for the values of this Column
|
|
98
|
-
self.sa_col
|
|
104
|
+
self.sa_col = None
|
|
99
105
|
self.sa_col_type = sa_col_type
|
|
100
106
|
|
|
101
107
|
# computed cols also have storage columns for the exception string and type
|
|
102
|
-
self.sa_errormsg_col
|
|
103
|
-
self.sa_errortype_col
|
|
108
|
+
self.sa_errormsg_col = None
|
|
109
|
+
self.sa_errortype_col = None
|
|
104
110
|
|
|
105
|
-
|
|
106
|
-
self.tbl: Optional[TableVersion] = None # set by owning TableVersion
|
|
111
|
+
self.tbl = None # set by owning TableVersion
|
|
107
112
|
|
|
108
113
|
@property
|
|
109
114
|
def value_expr(self) -> Optional[exprs.Expr]:
|
|
@@ -139,7 +144,7 @@ class Column:
|
|
|
139
144
|
|
|
140
145
|
@property
|
|
141
146
|
def is_computed(self) -> bool:
|
|
142
|
-
return self.
|
|
147
|
+
return self._value_expr is not None or self.value_expr_dict is not None
|
|
143
148
|
|
|
144
149
|
@property
|
|
145
150
|
def is_stored(self) -> bool:
|
|
@@ -160,6 +165,13 @@ class Column:
|
|
|
160
165
|
assert self.tbl is not None
|
|
161
166
|
return f'{self.tbl.name}.{self.name}'
|
|
162
167
|
|
|
168
|
+
@property
|
|
169
|
+
def media_validation(self) -> MediaValidation:
|
|
170
|
+
if self._media_validation is not None:
|
|
171
|
+
return self._media_validation
|
|
172
|
+
assert self.tbl is not None
|
|
173
|
+
return self.tbl.media_validation
|
|
174
|
+
|
|
163
175
|
def source(self) -> None:
|
|
164
176
|
"""
|
|
165
177
|
If this is a computed col and the top-level expr is a function call, print the source, if possible.
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
1
2
|
import dataclasses
|
|
3
|
+
import enum
|
|
2
4
|
import itertools
|
|
3
5
|
import logging
|
|
4
6
|
from typing import Optional
|
|
5
7
|
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
9
|
+
|
|
6
10
|
_logger = logging.getLogger('pixeltable')
|
|
7
11
|
|
|
8
12
|
# name of the position column in a component view
|
|
@@ -34,6 +38,20 @@ class UpdateStatus:
|
|
|
34
38
|
self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
|
|
35
39
|
return self
|
|
36
40
|
|
|
41
|
+
|
|
42
|
+
class MediaValidation(enum.Enum):
|
|
43
|
+
ON_READ = 0
|
|
44
|
+
ON_WRITE = 1
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def validated(cls, name: str, error_prefix: str) -> MediaValidation:
|
|
48
|
+
try:
|
|
49
|
+
return cls[name.upper()]
|
|
50
|
+
except KeyError:
|
|
51
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
|
|
52
|
+
raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
|
|
53
|
+
|
|
54
|
+
|
|
37
55
|
def is_valid_identifier(name: str) -> bool:
|
|
38
56
|
return name.isidentifier() and not name.startswith('_')
|
|
39
57
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Iterable, Literal, Optional, overload
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
import sqlalchemy.orm as orm
|
|
@@ -13,7 +13,7 @@ from pixeltable.env import Env
|
|
|
13
13
|
from pixeltable.utils.filecache import FileCache
|
|
14
14
|
|
|
15
15
|
from .catalog import Catalog
|
|
16
|
-
from .globals import UpdateStatus
|
|
16
|
+
from .globals import MediaValidation, UpdateStatus
|
|
17
17
|
from .table import Table
|
|
18
18
|
from .table_version import TableVersion
|
|
19
19
|
from .table_version_path import TableVersionPath
|
|
@@ -35,8 +35,8 @@ class InsertableTable(Table):
|
|
|
35
35
|
# MODULE-LOCAL, NOT PUBLIC
|
|
36
36
|
@classmethod
|
|
37
37
|
def _create(
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame],
|
|
39
|
+
primary_key: list[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
|
|
40
40
|
) -> InsertableTable:
|
|
41
41
|
columns = cls._create_columns(schema)
|
|
42
42
|
cls._verify_schema(columns)
|
|
@@ -50,7 +50,9 @@ class InsertableTable(Table):
|
|
|
50
50
|
col.is_pk = True
|
|
51
51
|
|
|
52
52
|
with orm.Session(Env.get().engine, future=True) as session:
|
|
53
|
-
_, tbl_version = TableVersion.create(
|
|
53
|
+
_, tbl_version = TableVersion.create(
|
|
54
|
+
session, dir_id, name, columns, num_retained_versions=num_retained_versions, comment=comment,
|
|
55
|
+
media_validation=media_validation)
|
|
54
56
|
tbl = cls(dir_id, tbl_version)
|
|
55
57
|
# TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
|
|
56
58
|
# when the table metadata gets updated. Once we have a notion of user-defined transactions in
|
|
@@ -77,15 +79,31 @@ class InsertableTable(Table):
|
|
|
77
79
|
|
|
78
80
|
@overload
|
|
79
81
|
def insert(
|
|
80
|
-
|
|
82
|
+
self,
|
|
83
|
+
rows: Iterable[dict[str, Any]],
|
|
84
|
+
/,
|
|
85
|
+
*,
|
|
86
|
+
print_stats: bool = False,
|
|
87
|
+
on_error: Literal['abort', 'ignore'] = 'abort'
|
|
81
88
|
) -> UpdateStatus: ...
|
|
82
89
|
|
|
83
90
|
@overload
|
|
84
|
-
def insert(
|
|
91
|
+
def insert(
|
|
92
|
+
self,
|
|
93
|
+
*,
|
|
94
|
+
print_stats: bool = False,
|
|
95
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
96
|
+
**kwargs: Any
|
|
97
|
+
) -> UpdateStatus: ...
|
|
85
98
|
|
|
86
99
|
def insert( # type: ignore[misc]
|
|
87
|
-
|
|
88
|
-
|
|
100
|
+
self,
|
|
101
|
+
rows: Optional[Iterable[dict[str, Any]]] = None,
|
|
102
|
+
/,
|
|
103
|
+
*,
|
|
104
|
+
print_stats: bool = False,
|
|
105
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
106
|
+
**kwargs: Any
|
|
89
107
|
) -> UpdateStatus:
|
|
90
108
|
if rows is None:
|
|
91
109
|
rows = [kwargs]
|
|
@@ -94,6 +112,8 @@ class InsertableTable(Table):
|
|
|
94
112
|
if len(kwargs) > 0:
|
|
95
113
|
raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
|
|
96
114
|
|
|
115
|
+
fail_on_exception = on_error == 'abort'
|
|
116
|
+
|
|
97
117
|
if not isinstance(rows, list):
|
|
98
118
|
raise excs.Error('rows must be a list of dictionaries')
|
|
99
119
|
if len(rows) == 0:
|
|
@@ -119,7 +139,7 @@ class InsertableTable(Table):
|
|
|
119
139
|
FileCache.get().emit_eviction_warnings()
|
|
120
140
|
return status
|
|
121
141
|
|
|
122
|
-
def _validate_input_rows(self, rows:
|
|
142
|
+
def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
|
|
123
143
|
"""Verify that the input rows match the table schema"""
|
|
124
144
|
valid_col_names = set(self._schema.keys())
|
|
125
145
|
reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
|