pixeltable 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +5 -0
- pixeltable/catalog/globals.py +16 -0
- pixeltable/catalog/insertable_table.py +82 -41
- pixeltable/catalog/table.py +78 -55
- pixeltable/catalog/table_version.py +18 -3
- pixeltable/catalog/view.py +9 -2
- pixeltable/env.py +1 -1
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exprs/__init__.py +2 -1
- pixeltable/exprs/arithmetic_expr.py +2 -0
- pixeltable/exprs/column_ref.py +36 -0
- pixeltable/exprs/expr.py +39 -9
- pixeltable/exprs/globals.py +12 -0
- pixeltable/exprs/json_mapper.py +1 -1
- pixeltable/exprs/json_path.py +0 -6
- pixeltable/exprs/similarity_expr.py +5 -20
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/ext/functions/yolox.py +21 -64
- pixeltable/func/tools.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/globals.py +16 -5
- pixeltable/globals.py +85 -33
- pixeltable/io/__init__.py +3 -2
- pixeltable/io/datarows.py +138 -0
- pixeltable/io/external_store.py +8 -5
- pixeltable/io/globals.py +7 -160
- pixeltable/io/hf_datasets.py +21 -98
- pixeltable/io/pandas.py +29 -43
- pixeltable/io/parquet.py +17 -42
- pixeltable/io/table_data_conduit.py +569 -0
- pixeltable/io/utils.py +6 -21
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_30.py +50 -0
- pixeltable/metadata/converters/util.py +26 -1
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +3 -0
- pixeltable/utils/arrow.py +32 -7
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.10.dist-info}/METADATA +1 -1
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.10.dist-info}/RECORD +44 -40
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.10.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.10.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.10.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -3,8 +3,7 @@
|
|
|
3
3
|
from .__version__ import __version__, __version_tuple__
|
|
4
4
|
from .catalog import Column, InsertableTable, Table, UpdateStatus, View
|
|
5
5
|
from .dataframe import DataFrame
|
|
6
|
-
from .exceptions import Error, PixeltableWarning
|
|
7
|
-
from .exprs import RELATIVE_PATH_ROOT
|
|
6
|
+
from .exceptions import Error, ExprEvalError, PixeltableWarning
|
|
8
7
|
from .func import Aggregator, Function, expr_udf, query, uda, udf
|
|
9
8
|
from .globals import (
|
|
10
9
|
array,
|
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.3.
|
|
3
|
-
__version_tuple__ = (0, 3,
|
|
2
|
+
__version__ = '0.3.10'
|
|
3
|
+
__version_tuple__ = (0, 3, 10)
|
pixeltable/catalog/column.py
CHANGED
|
@@ -202,6 +202,11 @@ class Column:
|
|
|
202
202
|
assert self.tbl is not None
|
|
203
203
|
return self.tbl.get().media_validation
|
|
204
204
|
|
|
205
|
+
@property
|
|
206
|
+
def is_required_for_insert(self) -> bool:
|
|
207
|
+
"""Returns True if column is required when inserting rows."""
|
|
208
|
+
return not self.col_type.nullable and not self.is_computed
|
|
209
|
+
|
|
205
210
|
def source(self) -> None:
|
|
206
211
|
"""
|
|
207
212
|
If this is a computed col and the top-level expr is a function call, print the source, if possible.
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -40,6 +40,22 @@ class UpdateStatus:
|
|
|
40
40
|
self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
|
|
41
41
|
return self
|
|
42
42
|
|
|
43
|
+
@property
|
|
44
|
+
def insert_msg(self) -> str:
|
|
45
|
+
"""Return a message describing the results of an insert operation."""
|
|
46
|
+
if self.num_excs == 0:
|
|
47
|
+
cols_with_excs_str = ''
|
|
48
|
+
else:
|
|
49
|
+
cols_with_excs_str = (
|
|
50
|
+
f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
|
|
51
|
+
)
|
|
52
|
+
cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
|
|
53
|
+
msg = (
|
|
54
|
+
f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
|
|
55
|
+
f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
56
|
+
)
|
|
57
|
+
return msg
|
|
58
|
+
|
|
43
59
|
|
|
44
60
|
class MediaValidation(enum.Enum):
|
|
45
61
|
ON_READ = 0
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import enum
|
|
3
4
|
import logging
|
|
4
|
-
from typing import Any, Iterable, Literal, Optional, overload
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
|
|
5
6
|
from uuid import UUID
|
|
6
7
|
|
|
7
8
|
import pixeltable as pxt
|
|
@@ -16,9 +17,36 @@ from .table_version import TableVersion
|
|
|
16
17
|
from .table_version_handle import TableVersionHandle
|
|
17
18
|
from .table_version_path import TableVersionPath
|
|
18
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import datasets # type: ignore[import-untyped]
|
|
22
|
+
|
|
23
|
+
from pixeltable.globals import RowData, TableDataSource
|
|
24
|
+
from pixeltable.io.table_data_conduit import TableDataConduit
|
|
25
|
+
|
|
19
26
|
_logger = logging.getLogger('pixeltable')
|
|
20
27
|
|
|
21
28
|
|
|
29
|
+
class OnErrorParameter(enum.Enum):
|
|
30
|
+
"""Supported values for the on_error parameter"""
|
|
31
|
+
|
|
32
|
+
ABORT = 'abort'
|
|
33
|
+
IGNORE = 'ignore'
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def is_valid(cls, v: Any) -> bool:
|
|
37
|
+
if isinstance(v, str):
|
|
38
|
+
return v.lower() in [c.value for c in cls]
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def fail_on_exception(cls, v: Any) -> bool:
|
|
43
|
+
if not cls.is_valid(v):
|
|
44
|
+
raise ValueError(f'Invalid value for on_error: {v}')
|
|
45
|
+
if isinstance(v, str):
|
|
46
|
+
return v.lower() != cls.IGNORE.value
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
|
|
22
50
|
class InsertableTable(Table):
|
|
23
51
|
"""A `Table` that allows inserting and deleting rows."""
|
|
24
52
|
|
|
@@ -86,62 +114,75 @@ class InsertableTable(Table):
|
|
|
86
114
|
@overload
|
|
87
115
|
def insert(
|
|
88
116
|
self,
|
|
89
|
-
|
|
117
|
+
source: Optional[TableDataSource] = None,
|
|
90
118
|
/,
|
|
91
119
|
*,
|
|
92
|
-
|
|
120
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
121
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
93
122
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
123
|
+
print_stats: bool = False,
|
|
124
|
+
**kwargs: Any,
|
|
94
125
|
) -> UpdateStatus: ...
|
|
95
126
|
|
|
96
127
|
@overload
|
|
97
128
|
def insert(
|
|
98
|
-
self, *,
|
|
129
|
+
self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
|
|
99
130
|
) -> UpdateStatus: ...
|
|
100
131
|
|
|
101
|
-
def insert(
|
|
132
|
+
def insert(
|
|
102
133
|
self,
|
|
103
|
-
|
|
134
|
+
source: Optional[TableDataSource] = None,
|
|
104
135
|
/,
|
|
105
136
|
*,
|
|
106
|
-
|
|
137
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
138
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
107
139
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
140
|
+
print_stats: bool = False,
|
|
108
141
|
**kwargs: Any,
|
|
109
142
|
) -> UpdateStatus:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
if not isinstance(rows, list):
|
|
120
|
-
raise excs.Error('rows must be a list of dictionaries')
|
|
121
|
-
if len(rows) == 0:
|
|
122
|
-
raise excs.Error('rows must not be empty')
|
|
123
|
-
for row in rows:
|
|
124
|
-
if not isinstance(row, dict):
|
|
125
|
-
raise excs.Error('rows must be a list of dictionaries')
|
|
126
|
-
self._validate_input_rows(rows)
|
|
127
|
-
with Env.get().begin_xact():
|
|
128
|
-
status = self._tbl_version.get().insert(
|
|
129
|
-
rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
if status.num_excs == 0:
|
|
133
|
-
cols_with_excs_str = ''
|
|
134
|
-
else:
|
|
135
|
-
cols_with_excs_str = (
|
|
136
|
-
f' across {len(status.cols_with_excs)} column{"" if len(status.cols_with_excs) == 1 else "s"}'
|
|
137
|
-
)
|
|
138
|
-
cols_with_excs_str += f' ({", ".join(status.cols_with_excs)})'
|
|
139
|
-
msg = (
|
|
140
|
-
f'Inserted {status.num_rows} row{"" if status.num_rows == 1 else "s"} '
|
|
141
|
-
f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
143
|
+
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
144
|
+
|
|
145
|
+
table = self
|
|
146
|
+
if source is None:
|
|
147
|
+
source = [kwargs]
|
|
148
|
+
kwargs = None
|
|
149
|
+
|
|
150
|
+
tds = UnkTableDataConduit(
|
|
151
|
+
source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
|
|
142
152
|
)
|
|
143
|
-
|
|
144
|
-
|
|
153
|
+
data_source = tds.specialize()
|
|
154
|
+
if data_source.source_column_map is None:
|
|
155
|
+
data_source.src_pk = []
|
|
156
|
+
|
|
157
|
+
assert isinstance(table, Table)
|
|
158
|
+
data_source.add_table_info(table)
|
|
159
|
+
data_source.prepare_for_insert_into_table()
|
|
160
|
+
|
|
161
|
+
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
162
|
+
return table.insert_table_data_source(
|
|
163
|
+
data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def insert_table_data_source(
|
|
167
|
+
self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
|
|
168
|
+
) -> pxt.UpdateStatus:
|
|
169
|
+
"""Insert row batches into this table from a `TableDataConduit`."""
|
|
170
|
+
from pixeltable.io.table_data_conduit import DFTableDataConduit, TableDataConduit
|
|
171
|
+
|
|
172
|
+
status = pxt.UpdateStatus()
|
|
173
|
+
with Env.get().begin_xact():
|
|
174
|
+
if isinstance(data_source, DFTableDataConduit):
|
|
175
|
+
status += self._tbl_version.get().insert(
|
|
176
|
+
rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
for row_batch in data_source.valid_row_batch():
|
|
180
|
+
status += self._tbl_version.get().insert(
|
|
181
|
+
rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
Env.get().console_logger.info(status.insert_msg)
|
|
185
|
+
|
|
145
186
|
FileCache.get().emit_eviction_warnings()
|
|
146
187
|
return status
|
|
147
188
|
|
pixeltable/catalog/table.py
CHANGED
|
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
9
9
|
|
|
10
10
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
11
|
+
from keyword import iskeyword as is_python_keyword
|
|
11
12
|
from uuid import UUID
|
|
12
13
|
|
|
13
14
|
import pandas as pd
|
|
@@ -42,9 +43,11 @@ from .table_version_handle import TableVersionHandle
|
|
|
42
43
|
from .table_version_path import TableVersionPath
|
|
43
44
|
|
|
44
45
|
if TYPE_CHECKING:
|
|
46
|
+
import datasets # type: ignore[import-untyped]
|
|
45
47
|
import torch.utils.data
|
|
46
48
|
|
|
47
49
|
import pixeltable.plan
|
|
50
|
+
from pixeltable.globals import RowData, TableDataSource
|
|
48
51
|
|
|
49
52
|
_logger = logging.getLogger('pixeltable')
|
|
50
53
|
|
|
@@ -720,13 +723,18 @@ class Table(SchemaObject):
|
|
|
720
723
|
columns.append(column)
|
|
721
724
|
return columns
|
|
722
725
|
|
|
726
|
+
@classmethod
|
|
727
|
+
def validate_column_name(cls, name: str) -> None:
|
|
728
|
+
"""Check that a name is usable as a pixeltalbe column name"""
|
|
729
|
+
if is_system_column_name(name) or is_python_keyword(name):
|
|
730
|
+
raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
|
|
731
|
+
if not is_valid_identifier(name):
|
|
732
|
+
raise excs.Error(f'Invalid column name: {name!r}')
|
|
733
|
+
|
|
723
734
|
@classmethod
|
|
724
735
|
def _verify_column(cls, col: Column) -> None:
|
|
725
736
|
"""Check integrity of user-supplied Column and supply defaults"""
|
|
726
|
-
|
|
727
|
-
raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
|
|
728
|
-
if not is_valid_identifier(col.name):
|
|
729
|
-
raise excs.Error(f'Invalid column name: {col.name!r}')
|
|
737
|
+
cls.validate_column_name(col.name)
|
|
730
738
|
if col.stored is False and not col.is_computed:
|
|
731
739
|
raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
|
|
732
740
|
if col.stored is False and col.has_window_fn_call():
|
|
@@ -745,16 +753,6 @@ class Table(SchemaObject):
|
|
|
745
753
|
cls._verify_column(col)
|
|
746
754
|
column_names.add(col.name)
|
|
747
755
|
|
|
748
|
-
def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
|
|
749
|
-
col = self._tbl_version_path.get_column(column_name, include_bases)
|
|
750
|
-
if col is None:
|
|
751
|
-
raise excs.Error(f'Column {column_name!r} unknown')
|
|
752
|
-
|
|
753
|
-
def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
|
|
754
|
-
exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
|
|
755
|
-
if not exists:
|
|
756
|
-
raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
|
|
757
|
-
|
|
758
756
|
def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
759
757
|
"""Drop a column from the table.
|
|
760
758
|
|
|
@@ -907,7 +905,7 @@ class Table(SchemaObject):
|
|
|
907
905
|
Args:
|
|
908
906
|
column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
|
|
909
907
|
idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
|
|
910
|
-
automatically. If specified, the name must be unique for this table.
|
|
908
|
+
automatically. If specified, the name must be unique for this table and a valid pixeltable column name.
|
|
911
909
|
embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
|
|
912
910
|
or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
|
|
913
911
|
array of floats.
|
|
@@ -960,13 +958,7 @@ class Table(SchemaObject):
|
|
|
960
958
|
"""
|
|
961
959
|
if self._tbl_version_path.is_snapshot():
|
|
962
960
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
963
|
-
col
|
|
964
|
-
if isinstance(column, str):
|
|
965
|
-
self.__check_column_name_exists(column, include_bases=True)
|
|
966
|
-
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
967
|
-
else:
|
|
968
|
-
self.__check_column_ref_exists(column, include_bases=True)
|
|
969
|
-
col = column.col
|
|
961
|
+
col = self._resolve_column_parameter(column)
|
|
970
962
|
|
|
971
963
|
with Env.get().begin_xact():
|
|
972
964
|
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
@@ -986,6 +978,10 @@ class Table(SchemaObject):
|
|
|
986
978
|
assert idx_name not in self._tbl_version.get().idxs_by_name
|
|
987
979
|
from pixeltable.index import EmbeddingIndex
|
|
988
980
|
|
|
981
|
+
# idx_name must be a valid pixeltable column name
|
|
982
|
+
if idx_name is not None:
|
|
983
|
+
Table.validate_column_name(idx_name)
|
|
984
|
+
|
|
989
985
|
# create the EmbeddingIndex instance to verify args
|
|
990
986
|
idx = EmbeddingIndex(
|
|
991
987
|
col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
|
|
@@ -1049,17 +1045,28 @@ class Table(SchemaObject):
|
|
|
1049
1045
|
|
|
1050
1046
|
col: Column = None
|
|
1051
1047
|
if idx_name is None:
|
|
1052
|
-
|
|
1053
|
-
self.__check_column_name_exists(column, include_bases=True)
|
|
1054
|
-
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
1055
|
-
else:
|
|
1056
|
-
self.__check_column_ref_exists(column, include_bases=True)
|
|
1057
|
-
col = column.col
|
|
1048
|
+
col = self._resolve_column_parameter(column)
|
|
1058
1049
|
assert col is not None
|
|
1059
1050
|
|
|
1060
1051
|
with Env.get().begin_xact():
|
|
1061
1052
|
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1062
1053
|
|
|
1054
|
+
def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
|
|
1055
|
+
"""Resolve a column parameter to a Column object"""
|
|
1056
|
+
col: Column = None
|
|
1057
|
+
if isinstance(column, str):
|
|
1058
|
+
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
1059
|
+
if col is None:
|
|
1060
|
+
raise excs.Error(f'Column {column!r} unknown')
|
|
1061
|
+
elif isinstance(column, ColumnRef):
|
|
1062
|
+
exists = self._tbl_version_path.has_column(column.col, include_bases=True)
|
|
1063
|
+
if not exists:
|
|
1064
|
+
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
1065
|
+
col = column.col
|
|
1066
|
+
else:
|
|
1067
|
+
raise excs.Error(f'Invalid column parameter type: {type(column)}')
|
|
1068
|
+
return col
|
|
1069
|
+
|
|
1063
1070
|
def drop_index(
|
|
1064
1071
|
self,
|
|
1065
1072
|
*,
|
|
@@ -1115,12 +1122,7 @@ class Table(SchemaObject):
|
|
|
1115
1122
|
|
|
1116
1123
|
col: Column = None
|
|
1117
1124
|
if idx_name is None:
|
|
1118
|
-
|
|
1119
|
-
self.__check_column_name_exists(column, include_bases=True)
|
|
1120
|
-
col = self._tbl_version_path.get_column(column, include_bases=True)
|
|
1121
|
-
else:
|
|
1122
|
-
self.__check_column_ref_exists(column, include_bases=True)
|
|
1123
|
-
col = column.col
|
|
1125
|
+
col = self._resolve_column_parameter(column)
|
|
1124
1126
|
assert col is not None
|
|
1125
1127
|
|
|
1126
1128
|
with Env.get().begin_xact():
|
|
@@ -1145,49 +1147,62 @@ class Table(SchemaObject):
|
|
|
1145
1147
|
raise excs.Error(f'Index {idx_name!r} does not exist')
|
|
1146
1148
|
assert _if_not_exists == IfNotExistsParam.IGNORE
|
|
1147
1149
|
return
|
|
1148
|
-
|
|
1150
|
+
idx_info = self._tbl_version.get().idxs_by_name[idx_name]
|
|
1149
1151
|
else:
|
|
1150
1152
|
if col.tbl.id != self._tbl_version.id:
|
|
1151
1153
|
raise excs.Error(
|
|
1152
1154
|
f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
|
|
1153
1155
|
)
|
|
1154
|
-
|
|
1156
|
+
idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
|
|
1155
1157
|
if _idx_class is not None:
|
|
1156
|
-
|
|
1157
|
-
if len(
|
|
1158
|
+
idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
|
|
1159
|
+
if len(idx_info_list) == 0:
|
|
1158
1160
|
_if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
1159
1161
|
if _if_not_exists == IfNotExistsParam.ERROR:
|
|
1160
1162
|
raise excs.Error(f'Column {col.name!r} does not have an index')
|
|
1161
1163
|
assert _if_not_exists == IfNotExistsParam.IGNORE
|
|
1162
1164
|
return
|
|
1163
|
-
if len(
|
|
1165
|
+
if len(idx_info_list) > 1:
|
|
1164
1166
|
raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
+
idx_info = idx_info_list[0]
|
|
1168
|
+
|
|
1169
|
+
# Find out if anything depends on this index
|
|
1170
|
+
dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
|
|
1171
|
+
if len(dependent_user_cols) > 0:
|
|
1172
|
+
raise excs.Error(
|
|
1173
|
+
f'Cannot drop index because the following columns depend on it:\n'
|
|
1174
|
+
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
1175
|
+
)
|
|
1176
|
+
self._tbl_version.get().drop_index(idx_info.id)
|
|
1167
1177
|
|
|
1168
1178
|
@overload
|
|
1169
1179
|
def insert(
|
|
1170
1180
|
self,
|
|
1171
|
-
|
|
1181
|
+
source: TableDataSource,
|
|
1172
1182
|
/,
|
|
1173
1183
|
*,
|
|
1174
|
-
|
|
1184
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
1185
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
1175
1186
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1187
|
+
print_stats: bool = False,
|
|
1188
|
+
**kwargs: Any,
|
|
1176
1189
|
) -> UpdateStatus: ...
|
|
1177
1190
|
|
|
1178
1191
|
@overload
|
|
1179
1192
|
def insert(
|
|
1180
|
-
self, *,
|
|
1193
|
+
self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
|
|
1181
1194
|
) -> UpdateStatus: ...
|
|
1182
1195
|
|
|
1183
|
-
@abc.abstractmethod
|
|
1196
|
+
@abc.abstractmethod
|
|
1184
1197
|
def insert(
|
|
1185
1198
|
self,
|
|
1186
|
-
|
|
1199
|
+
source: Optional[TableDataSource] = None,
|
|
1187
1200
|
/,
|
|
1188
1201
|
*,
|
|
1189
|
-
|
|
1202
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
1203
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
1190
1204
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1205
|
+
print_stats: bool = False,
|
|
1191
1206
|
**kwargs: Any,
|
|
1192
1207
|
) -> UpdateStatus:
|
|
1193
1208
|
"""Inserts rows into this table. There are two mutually exclusive call patterns:
|
|
@@ -1196,11 +1211,12 @@ class Table(SchemaObject):
|
|
|
1196
1211
|
|
|
1197
1212
|
```python
|
|
1198
1213
|
insert(
|
|
1199
|
-
|
|
1214
|
+
source: TableSourceDataType,
|
|
1200
1215
|
/,
|
|
1201
1216
|
*,
|
|
1217
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1202
1218
|
print_stats: bool = False,
|
|
1203
|
-
|
|
1219
|
+
**kwargs: Any,
|
|
1204
1220
|
)```
|
|
1205
1221
|
|
|
1206
1222
|
To insert just a single row, you can use the more concise syntax:
|
|
@@ -1208,23 +1224,25 @@ class Table(SchemaObject):
|
|
|
1208
1224
|
```python
|
|
1209
1225
|
insert(
|
|
1210
1226
|
*,
|
|
1211
|
-
print_stats: bool = False,
|
|
1212
1227
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1228
|
+
print_stats: bool = False,
|
|
1213
1229
|
**kwargs: Any
|
|
1214
1230
|
)```
|
|
1215
1231
|
|
|
1216
1232
|
Args:
|
|
1217
|
-
|
|
1218
|
-
names to values.
|
|
1233
|
+
source: A data source from which data can be imported.
|
|
1219
1234
|
kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
|
|
1220
|
-
|
|
1235
|
+
(if inserting multiple rows) Additional keyword arguments are passed to the data source.
|
|
1236
|
+
source_format: A hint about the format of the source data
|
|
1237
|
+
schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
|
|
1221
1238
|
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
1222
1239
|
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
1223
1240
|
|
|
1224
1241
|
- If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
|
|
1225
1242
|
- If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
|
|
1226
|
-
|
|
1227
|
-
|
|
1243
|
+
with errors will have a `None` value for that cell, with information about the error stored in the
|
|
1244
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
1245
|
+
print_stats: If `True`, print statistics about the cost of computed columns.
|
|
1228
1246
|
|
|
1229
1247
|
Returns:
|
|
1230
1248
|
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
@@ -1236,6 +1254,7 @@ class Table(SchemaObject):
|
|
|
1236
1254
|
- The table has been dropped.
|
|
1237
1255
|
- One of the rows being inserted does not conform to the table schema.
|
|
1238
1256
|
- An error occurs during processing of computed columns, and `on_error='ignore'`.
|
|
1257
|
+
- An error occurs while importing data from a source, and `on_error='abort'`.
|
|
1239
1258
|
|
|
1240
1259
|
Examples:
|
|
1241
1260
|
Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
|
|
@@ -1247,6 +1266,10 @@ class Table(SchemaObject):
|
|
|
1247
1266
|
Insert a single row using the alternative syntax:
|
|
1248
1267
|
|
|
1249
1268
|
>>> tbl.insert(a=3, b=3, c=3)
|
|
1269
|
+
|
|
1270
|
+
Insert rows from a CSV file:
|
|
1271
|
+
|
|
1272
|
+
>>> tbl.insert(source='path/to/file.csv')
|
|
1250
1273
|
"""
|
|
1251
1274
|
raise NotImplementedError
|
|
1252
1275
|
|
|
@@ -225,7 +225,9 @@ class TableVersion:
|
|
|
225
225
|
# create schema.Table
|
|
226
226
|
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
227
227
|
column_md = cls._create_column_md(cols)
|
|
228
|
+
tbl_id = uuid.uuid4()
|
|
228
229
|
table_md = schema.TableMd(
|
|
230
|
+
tbl_id=str(tbl_id),
|
|
229
231
|
name=name,
|
|
230
232
|
user=None,
|
|
231
233
|
current_version=0,
|
|
@@ -241,11 +243,12 @@ class TableVersion:
|
|
|
241
243
|
)
|
|
242
244
|
# create a schema.Table here, we need it to call our c'tor;
|
|
243
245
|
# don't add it to the session yet, we might add index metadata
|
|
244
|
-
tbl_id = uuid.uuid4()
|
|
245
246
|
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
246
247
|
|
|
247
248
|
# create schema.TableVersion
|
|
248
|
-
table_version_md = schema.TableVersionMd(
|
|
249
|
+
table_version_md = schema.TableVersionMd(
|
|
250
|
+
tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
|
|
251
|
+
)
|
|
249
252
|
tbl_version_record = schema.TableVersion(
|
|
250
253
|
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
|
|
251
254
|
)
|
|
@@ -261,6 +264,7 @@ class TableVersion:
|
|
|
261
264
|
schema_col_md[col.id] = md
|
|
262
265
|
|
|
263
266
|
schema_version_md = schema.TableSchemaVersionMd(
|
|
267
|
+
tbl_id=str(tbl_record.id),
|
|
264
268
|
schema_version=0,
|
|
265
269
|
preceding_schema_version=None,
|
|
266
270
|
columns=schema_col_md,
|
|
@@ -1240,6 +1244,11 @@ class TableVersion:
|
|
|
1240
1244
|
"""Return all non-system columns"""
|
|
1241
1245
|
return [c for c in self.cols if c.is_pk]
|
|
1242
1246
|
|
|
1247
|
+
@property
|
|
1248
|
+
def primary_key(self) -> list[str]:
|
|
1249
|
+
"""Return the names of the primary key columns"""
|
|
1250
|
+
return [c.name for c in self.cols if c.is_pk]
|
|
1251
|
+
|
|
1243
1252
|
def get_required_col_names(self) -> list[str]:
|
|
1244
1253
|
"""Return the names of all columns for which values must be specified in insert()"""
|
|
1245
1254
|
assert not self.is_view
|
|
@@ -1306,6 +1315,7 @@ class TableVersion:
|
|
|
1306
1315
|
|
|
1307
1316
|
def _create_tbl_md(self) -> schema.TableMd:
|
|
1308
1317
|
return schema.TableMd(
|
|
1318
|
+
tbl_id=str(self.id),
|
|
1309
1319
|
name=self.name,
|
|
1310
1320
|
user=None,
|
|
1311
1321
|
current_version=self.version,
|
|
@@ -1322,7 +1332,11 @@ class TableVersion:
|
|
|
1322
1332
|
|
|
1323
1333
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1324
1334
|
return schema.TableVersionMd(
|
|
1325
|
-
|
|
1335
|
+
tbl_id=str(self.id),
|
|
1336
|
+
created_at=timestamp,
|
|
1337
|
+
version=self.version,
|
|
1338
|
+
schema_version=self.schema_version,
|
|
1339
|
+
additional_md={},
|
|
1326
1340
|
)
|
|
1327
1341
|
|
|
1328
1342
|
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
@@ -1335,6 +1349,7 @@ class TableVersion:
|
|
|
1335
1349
|
)
|
|
1336
1350
|
# preceding_schema_version to be set by the caller
|
|
1337
1351
|
return schema.TableSchemaVersionMd(
|
|
1352
|
+
tbl_id=str(self.id),
|
|
1338
1353
|
schema_version=self.schema_version,
|
|
1339
1354
|
preceding_schema_version=preceding_schema_version,
|
|
1340
1355
|
columns=column_md,
|
pixeltable/catalog/view.py
CHANGED
|
@@ -251,13 +251,20 @@ class View(Table):
|
|
|
251
251
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
252
252
|
return md
|
|
253
253
|
|
|
254
|
+
if TYPE_CHECKING:
|
|
255
|
+
import datasets # type: ignore[import-untyped]
|
|
256
|
+
|
|
257
|
+
from pixeltable.globals import RowData, TableDataSource
|
|
258
|
+
|
|
254
259
|
def insert(
|
|
255
260
|
self,
|
|
256
|
-
|
|
261
|
+
source: Optional[TableDataSource] = None,
|
|
257
262
|
/,
|
|
258
263
|
*,
|
|
259
|
-
|
|
264
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
265
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
260
266
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
267
|
+
print_stats: bool = False,
|
|
261
268
|
**kwargs: Any,
|
|
262
269
|
) -> UpdateStatus:
|
|
263
270
|
raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
|
pixeltable/env.py
CHANGED
|
@@ -567,7 +567,7 @@ class Env:
|
|
|
567
567
|
self.__register_package('transformers')
|
|
568
568
|
self.__register_package('whisper', library_name='openai-whisper')
|
|
569
569
|
self.__register_package('whisperx')
|
|
570
|
-
self.__register_package('yolox', library_name='
|
|
570
|
+
self.__register_package('yolox', library_name='pixeltable-yolox')
|
|
571
571
|
|
|
572
572
|
def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
|
|
573
573
|
is_installed: bool
|
pixeltable/exec/exec_node.py
CHANGED
pixeltable/exprs/__init__.py
CHANGED
|
@@ -16,7 +16,7 @@ from .in_predicate import InPredicate
|
|
|
16
16
|
from .inline_expr import InlineArray, InlineDict, InlineList
|
|
17
17
|
from .is_null import IsNull
|
|
18
18
|
from .json_mapper import JsonMapper
|
|
19
|
-
from .json_path import
|
|
19
|
+
from .json_path import JsonPath
|
|
20
20
|
from .literal import Literal
|
|
21
21
|
from .method_ref import MethodRef
|
|
22
22
|
from .object_ref import ObjectRef
|
|
@@ -24,5 +24,6 @@ from .row_builder import ColumnSlotIdx, ExecProfile, RowBuilder
|
|
|
24
24
|
from .rowid_ref import RowidRef
|
|
25
25
|
from .similarity_expr import SimilarityExpr
|
|
26
26
|
from .sql_element_cache import SqlElementCache
|
|
27
|
+
from .string_op import StringOp
|
|
27
28
|
from .type_cast import TypeCast
|
|
28
29
|
from .variable import Variable
|
|
@@ -19,6 +19,8 @@ class ArithmeticExpr(Expr):
|
|
|
19
19
|
Allows arithmetic exprs on json paths
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
operator: ArithmeticOperator
|
|
23
|
+
|
|
22
24
|
def __init__(self, operator: ArithmeticOperator, op1: Expr, op2: Expr):
|
|
23
25
|
if op1.col_type.is_json_type() or op2.col_type.is_json_type() or operator == ArithmeticOperator.DIV:
|
|
24
26
|
# we assume it's a float
|