pixeltable 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +509 -103
- pixeltable/catalog/column.py +5 -0
- pixeltable/catalog/dir.py +15 -6
- pixeltable/catalog/globals.py +16 -0
- pixeltable/catalog/insertable_table.py +82 -41
- pixeltable/catalog/path.py +15 -0
- pixeltable/catalog/schema_object.py +7 -12
- pixeltable/catalog/table.py +81 -67
- pixeltable/catalog/table_version.py +23 -7
- pixeltable/catalog/view.py +9 -6
- pixeltable/env.py +15 -9
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exprs/__init__.py +2 -1
- pixeltable/exprs/arithmetic_expr.py +2 -0
- pixeltable/exprs/column_ref.py +38 -2
- pixeltable/exprs/expr.py +61 -12
- pixeltable/exprs/function_call.py +1 -4
- pixeltable/exprs/globals.py +12 -0
- pixeltable/exprs/json_mapper.py +4 -4
- pixeltable/exprs/json_path.py +10 -11
- pixeltable/exprs/similarity_expr.py +5 -20
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/ext/functions/yolox.py +21 -64
- pixeltable/func/callable_function.py +5 -2
- pixeltable/func/query_template_function.py +6 -18
- pixeltable/func/tools.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/globals.py +16 -5
- pixeltable/globals.py +172 -262
- pixeltable/io/__init__.py +3 -2
- pixeltable/io/datarows.py +138 -0
- pixeltable/io/external_store.py +8 -5
- pixeltable/io/globals.py +7 -160
- pixeltable/io/hf_datasets.py +21 -98
- pixeltable/io/pandas.py +29 -43
- pixeltable/io/parquet.py +17 -42
- pixeltable/io/table_data_conduit.py +569 -0
- pixeltable/io/utils.py +6 -21
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_30.py +50 -0
- pixeltable/metadata/converters/util.py +26 -1
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +3 -0
- pixeltable/utils/arrow.py +32 -7
- pixeltable/utils/coroutine.py +41 -0
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/METADATA +1 -1
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/RECORD +52 -47
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/entry_points.txt +0 -0
|
@@ -177,10 +177,6 @@ class TableVersion:
|
|
|
177
177
|
# Init external stores (this needs to happen after the schema is created)
|
|
178
178
|
self._init_external_stores(tbl_md)
|
|
179
179
|
|
|
180
|
-
# Force column metadata to load, in order to surface any invalid metadata now (as warnings)
|
|
181
|
-
for col in self.cols_by_id.values():
|
|
182
|
-
_ = col.value_expr
|
|
183
|
-
|
|
184
180
|
def __hash__(self) -> int:
|
|
185
181
|
return hash(self.id)
|
|
186
182
|
|
|
@@ -229,7 +225,9 @@ class TableVersion:
|
|
|
229
225
|
# create schema.Table
|
|
230
226
|
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
231
227
|
column_md = cls._create_column_md(cols)
|
|
228
|
+
tbl_id = uuid.uuid4()
|
|
232
229
|
table_md = schema.TableMd(
|
|
230
|
+
tbl_id=str(tbl_id),
|
|
233
231
|
name=name,
|
|
234
232
|
user=None,
|
|
235
233
|
current_version=0,
|
|
@@ -245,11 +243,12 @@ class TableVersion:
|
|
|
245
243
|
)
|
|
246
244
|
# create a schema.Table here, we need it to call our c'tor;
|
|
247
245
|
# don't add it to the session yet, we might add index metadata
|
|
248
|
-
tbl_id = uuid.uuid4()
|
|
249
246
|
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
250
247
|
|
|
251
248
|
# create schema.TableVersion
|
|
252
|
-
table_version_md = schema.TableVersionMd(
|
|
249
|
+
table_version_md = schema.TableVersionMd(
|
|
250
|
+
tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
|
|
251
|
+
)
|
|
253
252
|
tbl_version_record = schema.TableVersion(
|
|
254
253
|
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
|
|
255
254
|
)
|
|
@@ -265,6 +264,7 @@ class TableVersion:
|
|
|
265
264
|
schema_col_md[col.id] = md
|
|
266
265
|
|
|
267
266
|
schema_version_md = schema.TableSchemaVersionMd(
|
|
267
|
+
tbl_id=str(tbl_record.id),
|
|
268
268
|
schema_version=0,
|
|
269
269
|
preceding_schema_version=None,
|
|
270
270
|
columns=schema_col_md,
|
|
@@ -458,6 +458,11 @@ class TableVersion:
|
|
|
458
458
|
)
|
|
459
459
|
)
|
|
460
460
|
|
|
461
|
+
def ensure_md_loaded(self) -> None:
|
|
462
|
+
"""Ensure that table metadata is loaded."""
|
|
463
|
+
for col in self.cols_by_id.values():
|
|
464
|
+
_ = col.value_expr
|
|
465
|
+
|
|
461
466
|
def _store_idx_name(self, idx_id: int) -> str:
|
|
462
467
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
463
468
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
@@ -1239,6 +1244,11 @@ class TableVersion:
|
|
|
1239
1244
|
"""Return all non-system columns"""
|
|
1240
1245
|
return [c for c in self.cols if c.is_pk]
|
|
1241
1246
|
|
|
1247
|
+
@property
|
|
1248
|
+
def primary_key(self) -> list[str]:
|
|
1249
|
+
"""Return the names of the primary key columns"""
|
|
1250
|
+
return [c.name for c in self.cols if c.is_pk]
|
|
1251
|
+
|
|
1242
1252
|
def get_required_col_names(self) -> list[str]:
|
|
1243
1253
|
"""Return the names of all columns for which values must be specified in insert()"""
|
|
1244
1254
|
assert not self.is_view
|
|
@@ -1305,6 +1315,7 @@ class TableVersion:
|
|
|
1305
1315
|
|
|
1306
1316
|
def _create_tbl_md(self) -> schema.TableMd:
|
|
1307
1317
|
return schema.TableMd(
|
|
1318
|
+
tbl_id=str(self.id),
|
|
1308
1319
|
name=self.name,
|
|
1309
1320
|
user=None,
|
|
1310
1321
|
current_version=self.version,
|
|
@@ -1321,7 +1332,11 @@ class TableVersion:
|
|
|
1321
1332
|
|
|
1322
1333
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1323
1334
|
return schema.TableVersionMd(
|
|
1324
|
-
|
|
1335
|
+
tbl_id=str(self.id),
|
|
1336
|
+
created_at=timestamp,
|
|
1337
|
+
version=self.version,
|
|
1338
|
+
schema_version=self.schema_version,
|
|
1339
|
+
additional_md={},
|
|
1325
1340
|
)
|
|
1326
1341
|
|
|
1327
1342
|
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
@@ -1334,6 +1349,7 @@ class TableVersion:
|
|
|
1334
1349
|
)
|
|
1335
1350
|
# preceding_schema_version to be set by the caller
|
|
1336
1351
|
return schema.TableSchemaVersionMd(
|
|
1352
|
+
tbl_id=str(self.id),
|
|
1337
1353
|
schema_version=self.schema_version,
|
|
1338
1354
|
preceding_schema_version=preceding_schema_version,
|
|
1339
1355
|
columns=column_md,
|
pixeltable/catalog/view.py
CHANGED
|
@@ -237,15 +237,11 @@ class View(Table):
|
|
|
237
237
|
)
|
|
238
238
|
|
|
239
239
|
def _drop(self) -> None:
|
|
240
|
-
cat = catalog.Catalog.get()
|
|
241
240
|
if self._snapshot_only:
|
|
242
241
|
# there is not TableVersion to drop
|
|
243
242
|
self._check_is_dropped()
|
|
244
243
|
self.is_dropped = True
|
|
245
244
|
TableVersion.delete_md(self._id)
|
|
246
|
-
# update catalog
|
|
247
|
-
cat = catalog.Catalog.get()
|
|
248
|
-
cat.remove_tbl(self._id)
|
|
249
245
|
else:
|
|
250
246
|
super()._drop()
|
|
251
247
|
|
|
@@ -255,13 +251,20 @@ class View(Table):
|
|
|
255
251
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
256
252
|
return md
|
|
257
253
|
|
|
254
|
+
if TYPE_CHECKING:
|
|
255
|
+
import datasets # type: ignore[import-untyped]
|
|
256
|
+
|
|
257
|
+
from pixeltable.globals import RowData, TableDataSource
|
|
258
|
+
|
|
258
259
|
def insert(
|
|
259
260
|
self,
|
|
260
|
-
|
|
261
|
+
source: Optional[TableDataSource] = None,
|
|
261
262
|
/,
|
|
262
263
|
*,
|
|
263
|
-
|
|
264
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
265
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
264
266
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
267
|
+
print_stats: bool = False,
|
|
265
268
|
**kwargs: Any,
|
|
266
269
|
) -> UpdateStatus:
|
|
267
270
|
raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
|
pixeltable/env.py
CHANGED
|
@@ -170,19 +170,25 @@ class Env:
|
|
|
170
170
|
assert self._current_session is not None
|
|
171
171
|
return self._current_session
|
|
172
172
|
|
|
173
|
+
def in_xact(self) -> bool:
|
|
174
|
+
return self._current_conn is not None
|
|
175
|
+
|
|
173
176
|
@contextmanager
|
|
174
177
|
def begin_xact(self) -> Iterator[sql.Connection]:
|
|
175
178
|
"""Return a context manager that yields a connection to the database. Idempotent."""
|
|
176
179
|
if self._current_conn is None:
|
|
177
180
|
assert self._current_session is None
|
|
178
|
-
|
|
179
|
-
self.
|
|
180
|
-
|
|
181
|
-
|
|
181
|
+
try:
|
|
182
|
+
with self.engine.begin() as conn, sql.orm.Session(conn) as session:
|
|
183
|
+
# TODO: remove print() once we're done with debugging the concurrent update behavior
|
|
184
|
+
# print(f'{datetime.datetime.now()}: start xact')
|
|
185
|
+
self._current_conn = conn
|
|
186
|
+
self._current_session = session
|
|
182
187
|
yield conn
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
188
|
+
finally:
|
|
189
|
+
self._current_session = None
|
|
190
|
+
self._current_conn = None
|
|
191
|
+
# print(f'{datetime.datetime.now()}: end xact')
|
|
186
192
|
else:
|
|
187
193
|
assert self._current_session is not None
|
|
188
194
|
yield self._current_conn
|
|
@@ -391,7 +397,7 @@ class Env:
|
|
|
391
397
|
def _create_engine(self, time_zone_name: Optional[str], echo: bool = False) -> None:
|
|
392
398
|
connect_args = {} if time_zone_name is None else {'options': f'-c timezone={time_zone_name}'}
|
|
393
399
|
self._sa_engine = sql.create_engine(
|
|
394
|
-
self.db_url, echo=echo,
|
|
400
|
+
self.db_url, echo=echo, isolation_level='REPEATABLE READ', connect_args=connect_args
|
|
395
401
|
)
|
|
396
402
|
self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
|
|
397
403
|
with self.engine.begin() as conn:
|
|
@@ -561,7 +567,7 @@ class Env:
|
|
|
561
567
|
self.__register_package('transformers')
|
|
562
568
|
self.__register_package('whisper', library_name='openai-whisper')
|
|
563
569
|
self.__register_package('whisperx')
|
|
564
|
-
self.__register_package('yolox', library_name='
|
|
570
|
+
self.__register_package('yolox', library_name='pixeltable-yolox')
|
|
565
571
|
|
|
566
572
|
def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
|
|
567
573
|
is_installed: bool
|
pixeltable/exec/exec_node.py
CHANGED
pixeltable/exprs/__init__.py
CHANGED
|
@@ -16,7 +16,7 @@ from .in_predicate import InPredicate
|
|
|
16
16
|
from .inline_expr import InlineArray, InlineDict, InlineList
|
|
17
17
|
from .is_null import IsNull
|
|
18
18
|
from .json_mapper import JsonMapper
|
|
19
|
-
from .json_path import
|
|
19
|
+
from .json_path import JsonPath
|
|
20
20
|
from .literal import Literal
|
|
21
21
|
from .method_ref import MethodRef
|
|
22
22
|
from .object_ref import ObjectRef
|
|
@@ -24,5 +24,6 @@ from .row_builder import ColumnSlotIdx, ExecProfile, RowBuilder
|
|
|
24
24
|
from .rowid_ref import RowidRef
|
|
25
25
|
from .similarity_expr import SimilarityExpr
|
|
26
26
|
from .sql_element_cache import SqlElementCache
|
|
27
|
+
from .string_op import StringOp
|
|
27
28
|
from .type_cast import TypeCast
|
|
28
29
|
from .variable import Variable
|
|
@@ -19,6 +19,8 @@ class ArithmeticExpr(Expr):
|
|
|
19
19
|
Allows arithmetic exprs on json paths
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
operator: ArithmeticOperator
|
|
23
|
+
|
|
22
24
|
def __init__(self, operator: ArithmeticOperator, op1: Expr, op2: Expr):
|
|
23
25
|
if op1.col_type.is_json_type() or op2.col_type.is_json_type() or operator == ArithmeticOperator.DIV:
|
|
24
26
|
# we assume it's a float
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import copy
|
|
3
4
|
from typing import Any, Optional, Sequence
|
|
4
5
|
from uuid import UUID
|
|
5
6
|
|
|
@@ -125,11 +126,46 @@ class ColumnRef(Expr):
|
|
|
125
126
|
|
|
126
127
|
return super().__getattr__(name)
|
|
127
128
|
|
|
129
|
+
@classmethod
|
|
130
|
+
def find_embedding_index(
|
|
131
|
+
cls, col: catalog.Column, idx_name: Optional[str], method_name: str
|
|
132
|
+
) -> dict[str, catalog.TableVersion.IndexInfo]:
|
|
133
|
+
"""Return IndexInfo for a column, with an optional given name"""
|
|
134
|
+
# determine index to use
|
|
135
|
+
idx_info_dict = col.get_idx_info()
|
|
136
|
+
from pixeltable import index
|
|
137
|
+
|
|
138
|
+
embedding_idx_info = {
|
|
139
|
+
info: value for info, value in idx_info_dict.items() if isinstance(value.idx, index.EmbeddingIndex)
|
|
140
|
+
}
|
|
141
|
+
if len(embedding_idx_info) == 0:
|
|
142
|
+
raise excs.Error(f'No indices found for {method_name!r} on column {col.name!r}')
|
|
143
|
+
if idx_name is not None and idx_name not in embedding_idx_info:
|
|
144
|
+
raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {col.name!r}')
|
|
145
|
+
if len(embedding_idx_info) > 1:
|
|
146
|
+
if idx_name is None:
|
|
147
|
+
raise excs.Error(
|
|
148
|
+
f'Column {col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
149
|
+
f'`{method_name}(..., idx=<index_name>)`'
|
|
150
|
+
)
|
|
151
|
+
idx_info = {idx_name: embedding_idx_info[idx_name]}
|
|
152
|
+
else:
|
|
153
|
+
idx_info = embedding_idx_info
|
|
154
|
+
return idx_info
|
|
155
|
+
|
|
128
156
|
def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
|
|
129
157
|
from .similarity_expr import SimilarityExpr
|
|
130
158
|
|
|
131
159
|
return SimilarityExpr(self, item, idx_name=idx)
|
|
132
160
|
|
|
161
|
+
def embedding(self, *, idx: Optional[str] = None) -> ColumnRef:
|
|
162
|
+
idx_info = ColumnRef.find_embedding_index(self.col, idx, 'embedding')
|
|
163
|
+
assert len(idx_info) == 1
|
|
164
|
+
col = copy.copy(next(iter(idx_info.values())).val_col)
|
|
165
|
+
col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
|
|
166
|
+
col.create_sa_cols()
|
|
167
|
+
return ColumnRef(col)
|
|
168
|
+
|
|
133
169
|
def default_column_name(self) -> Optional[str]:
|
|
134
170
|
return str(self)
|
|
135
171
|
|
|
@@ -137,7 +173,7 @@ class ColumnRef(Expr):
|
|
|
137
173
|
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
138
174
|
|
|
139
175
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
140
|
-
tbl = catalog.Catalog.get().
|
|
176
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
|
|
141
177
|
return tbl.select(self)
|
|
142
178
|
|
|
143
179
|
def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
@@ -165,7 +201,7 @@ class ColumnRef(Expr):
|
|
|
165
201
|
return self._descriptors().to_html()
|
|
166
202
|
|
|
167
203
|
def _descriptors(self) -> DescriptionHelper:
|
|
168
|
-
tbl = catalog.Catalog.get().
|
|
204
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
|
|
169
205
|
helper = DescriptionHelper()
|
|
170
206
|
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
|
|
171
207
|
helper.append(tbl._col_descriptor([self.col.name]))
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -17,7 +17,7 @@ from typing_extensions import Self, _AnnotatedAlias
|
|
|
17
17
|
from pixeltable import catalog, exceptions as excs, func, type_system as ts
|
|
18
18
|
|
|
19
19
|
from .data_row import DataRow
|
|
20
|
-
from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
|
|
20
|
+
from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator, StringOperator
|
|
21
21
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
23
|
from pixeltable import exprs
|
|
@@ -90,14 +90,29 @@ class Expr(abc.ABC):
|
|
|
90
90
|
result = c_scope
|
|
91
91
|
return result
|
|
92
92
|
|
|
93
|
-
def bind_rel_paths(self
|
|
93
|
+
def bind_rel_paths(self) -> None:
|
|
94
94
|
"""
|
|
95
95
|
Binds relative JsonPaths to mapper.
|
|
96
96
|
This needs to be done in a separate phase after __init__(), because RelativeJsonPath()(-1) cannot be resolved
|
|
97
97
|
by the immediately containing JsonMapper during initialization.
|
|
98
98
|
"""
|
|
99
|
+
self._bind_rel_paths()
|
|
100
|
+
assert not self._has_relative_path, self._expr_tree()
|
|
101
|
+
|
|
102
|
+
def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapper'] = None) -> None:
|
|
103
|
+
for c in self.components:
|
|
104
|
+
c._bind_rel_paths(mapper)
|
|
105
|
+
|
|
106
|
+
def _expr_tree(self) -> str:
|
|
107
|
+
"""Returns a string representation of this expression as a multi-line tree. Useful for debugging."""
|
|
108
|
+
buf: list[str] = []
|
|
109
|
+
self._expr_tree_r(0, buf)
|
|
110
|
+
return '\n'.join(buf)
|
|
111
|
+
|
|
112
|
+
def _expr_tree_r(self, indent: int, buf: list[str]) -> None:
|
|
113
|
+
buf.append(f'{" " * indent}{type(self).__name__}: {self}'.replace('\n', '\\n'))
|
|
99
114
|
for c in self.components:
|
|
100
|
-
c.
|
|
115
|
+
c._expr_tree_r(indent + 2, buf)
|
|
101
116
|
|
|
102
117
|
def default_column_name(self) -> Optional[str]:
|
|
103
118
|
"""
|
|
@@ -355,6 +370,10 @@ class Expr(abc.ABC):
|
|
|
355
370
|
except StopIteration:
|
|
356
371
|
return False
|
|
357
372
|
|
|
373
|
+
@property
|
|
374
|
+
def _has_relative_path(self) -> bool:
|
|
375
|
+
return any(c._has_relative_path for c in self.components)
|
|
376
|
+
|
|
358
377
|
def tbl_ids(self) -> set[UUID]:
|
|
359
378
|
"""Returns table ids referenced by this expr."""
|
|
360
379
|
from .column_ref import ColumnRef
|
|
@@ -514,7 +533,7 @@ class Expr(abc.ABC):
|
|
|
514
533
|
|
|
515
534
|
@classmethod
|
|
516
535
|
def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
|
|
517
|
-
raise AssertionError('not implemented')
|
|
536
|
+
raise AssertionError(f'not implemented: {cls.__name__}')
|
|
518
537
|
|
|
519
538
|
def isin(self, value_set: Any) -> 'exprs.InPredicate':
|
|
520
539
|
from .in_predicate import InPredicate
|
|
@@ -586,10 +605,6 @@ class Expr(abc.ABC):
|
|
|
586
605
|
# Return the `MethodRef` object itself; it requires arguments to become a `FunctionCall`
|
|
587
606
|
return method_ref
|
|
588
607
|
|
|
589
|
-
def __rshift__(self, other: object) -> 'exprs.Expr':
|
|
590
|
-
# Implemented here for type-checking purposes
|
|
591
|
-
raise excs.Error('The `>>` operator can only be applied to Json expressions')
|
|
592
|
-
|
|
593
608
|
def __bool__(self) -> bool:
|
|
594
609
|
raise TypeError(
|
|
595
610
|
f'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)\n{self!r}'
|
|
@@ -639,13 +654,17 @@ class Expr(abc.ABC):
|
|
|
639
654
|
def __neg__(self) -> 'exprs.ArithmeticExpr':
|
|
640
655
|
return self._make_arithmetic_expr(ArithmeticOperator.MUL, -1)
|
|
641
656
|
|
|
642
|
-
def __add__(self, other: object) ->
|
|
657
|
+
def __add__(self, other: object) -> Union[exprs.ArithmeticExpr, exprs.StringOp]:
|
|
658
|
+
if isinstance(self, str) or (isinstance(self, Expr) and self.col_type.is_string_type()):
|
|
659
|
+
return self._make_string_expr(StringOperator.CONCAT, other)
|
|
643
660
|
return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
|
|
644
661
|
|
|
645
662
|
def __sub__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
646
663
|
return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
|
|
647
664
|
|
|
648
|
-
def __mul__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
665
|
+
def __mul__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
|
|
666
|
+
if isinstance(self, str) or (isinstance(self, Expr) and self.col_type.is_string_type()):
|
|
667
|
+
return self._make_string_expr(StringOperator.REPEAT, other)
|
|
649
668
|
return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
|
|
650
669
|
|
|
651
670
|
def __truediv__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
@@ -657,13 +676,17 @@ class Expr(abc.ABC):
|
|
|
657
676
|
def __floordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
658
677
|
return self._make_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
|
|
659
678
|
|
|
660
|
-
def __radd__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
679
|
+
def __radd__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
|
|
680
|
+
if isinstance(other, str) or (isinstance(other, Expr) and other.col_type.is_string_type()):
|
|
681
|
+
return self._rmake_string_expr(StringOperator.CONCAT, other)
|
|
661
682
|
return self._rmake_arithmetic_expr(ArithmeticOperator.ADD, other)
|
|
662
683
|
|
|
663
684
|
def __rsub__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
664
685
|
return self._rmake_arithmetic_expr(ArithmeticOperator.SUB, other)
|
|
665
686
|
|
|
666
|
-
def __rmul__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
687
|
+
def __rmul__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
|
|
688
|
+
if isinstance(other, str) or (isinstance(other, Expr) and other.col_type.is_string_type()):
|
|
689
|
+
return self._rmake_string_expr(StringOperator.REPEAT, other)
|
|
667
690
|
return self._rmake_arithmetic_expr(ArithmeticOperator.MUL, other)
|
|
668
691
|
|
|
669
692
|
def __rtruediv__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
@@ -675,6 +698,32 @@ class Expr(abc.ABC):
|
|
|
675
698
|
def __rfloordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
|
|
676
699
|
return self._rmake_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
|
|
677
700
|
|
|
701
|
+
def _make_string_expr(self, op: StringOperator, other: object) -> 'exprs.StringOp':
|
|
702
|
+
"""
|
|
703
|
+
Make left-handed version of string expression.
|
|
704
|
+
"""
|
|
705
|
+
from .literal import Literal
|
|
706
|
+
from .string_op import StringOp
|
|
707
|
+
|
|
708
|
+
if isinstance(other, Expr):
|
|
709
|
+
return StringOp(op, self, other)
|
|
710
|
+
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
711
|
+
return StringOp(op, self, Literal(other))
|
|
712
|
+
raise TypeError(f'Other must be Expr or literal: {type(other)}')
|
|
713
|
+
|
|
714
|
+
def _rmake_string_expr(self, op: StringOperator, other: object) -> 'exprs.StringOp':
|
|
715
|
+
"""
|
|
716
|
+
Right-handed version of _make_string_expr. other must be a literal; if it were an Expr,
|
|
717
|
+
the operation would have already been evaluated in its left-handed form.
|
|
718
|
+
"""
|
|
719
|
+
from .literal import Literal
|
|
720
|
+
from .string_op import StringOp
|
|
721
|
+
|
|
722
|
+
assert not isinstance(other, Expr) # Else the left-handed form would have evaluated first
|
|
723
|
+
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
724
|
+
return StringOp(op, Literal(other), self)
|
|
725
|
+
raise TypeError(f'Other must be Expr or literal: {type(other)}')
|
|
726
|
+
|
|
678
727
|
def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'exprs.ArithmeticExpr':
|
|
679
728
|
"""
|
|
680
729
|
other: Union[Expr, LiteralPythonTypes]
|
|
@@ -360,10 +360,7 @@ class FunctionCall(Expr):
|
|
|
360
360
|
return
|
|
361
361
|
args, kwargs = args_kwargs
|
|
362
362
|
|
|
363
|
-
if
|
|
364
|
-
# optimization: avoid additional level of indirection we'd get from calling Function.exec()
|
|
365
|
-
data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
|
|
366
|
-
elif self.is_window_fn_call:
|
|
363
|
+
if self.is_window_fn_call:
|
|
367
364
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
368
365
|
agg_cls = self.fn.agg_class
|
|
369
366
|
if self.has_group_by():
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -87,3 +87,15 @@ class ArithmeticOperator(enum.Enum):
|
|
|
87
87
|
if self == self.FLOORDIV:
|
|
88
88
|
return '//'
|
|
89
89
|
raise AssertionError()
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class StringOperator(enum.Enum):
|
|
93
|
+
CONCAT = 0
|
|
94
|
+
REPEAT = 1
|
|
95
|
+
|
|
96
|
+
def __str__(self) -> str:
|
|
97
|
+
if self == self.CONCAT:
|
|
98
|
+
return '+'
|
|
99
|
+
if self == self.REPEAT:
|
|
100
|
+
return '*'
|
|
101
|
+
raise AssertionError()
|
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -48,9 +48,9 @@ class JsonMapper(Expr):
|
|
|
48
48
|
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
49
49
|
self.components.append(scope_anchor)
|
|
50
50
|
|
|
51
|
-
def
|
|
52
|
-
self._src_expr.
|
|
53
|
-
self._target_expr.
|
|
51
|
+
def _bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
|
|
52
|
+
self._src_expr._bind_rel_paths(mapper)
|
|
53
|
+
self._target_expr._bind_rel_paths(self)
|
|
54
54
|
self.parent_mapper = mapper
|
|
55
55
|
parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
|
|
56
56
|
self.target_expr_scope.parent = parent_scope
|
|
@@ -86,7 +86,7 @@ class JsonMapper(Expr):
|
|
|
86
86
|
return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
|
|
87
87
|
|
|
88
88
|
def __repr__(self) -> str:
|
|
89
|
-
return f'{self._src_expr}
|
|
89
|
+
return f'map({self._src_expr}, lambda R: {self._target_expr})'
|
|
90
90
|
|
|
91
91
|
@property
|
|
92
92
|
def _src_expr(self) -> Expr:
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -80,11 +80,16 @@ class JsonPath(Expr):
|
|
|
80
80
|
def is_relative_path(self) -> bool:
|
|
81
81
|
return self._anchor is None
|
|
82
82
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
83
|
+
@property
|
|
84
|
+
def _has_relative_path(self) -> bool:
|
|
85
|
+
return self.is_relative_path() or super()._has_relative_path
|
|
86
|
+
|
|
87
|
+
def _bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
|
|
88
|
+
if self.is_relative_path():
|
|
89
|
+
# TODO: take scope_idx into account
|
|
90
|
+
self.set_anchor(mapper.scope_anchor)
|
|
91
|
+
else:
|
|
92
|
+
self._anchor._bind_rel_paths(mapper)
|
|
88
93
|
|
|
89
94
|
def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
|
|
90
95
|
"""
|
|
@@ -105,12 +110,6 @@ class JsonPath(Expr):
|
|
|
105
110
|
return JsonPath(self._anchor, [*self.path_elements, index])
|
|
106
111
|
raise excs.Error(f'Invalid json list index: {index}')
|
|
107
112
|
|
|
108
|
-
def __rshift__(self, other: object) -> 'JsonMapper':
|
|
109
|
-
rhs_expr = Expr.from_object(other)
|
|
110
|
-
if rhs_expr is None:
|
|
111
|
-
raise excs.Error(f'>> requires an expression on the right-hand side, found {type(other)}')
|
|
112
|
-
return JsonMapper(self, rhs_expr)
|
|
113
|
-
|
|
114
113
|
def default_column_name(self) -> Optional[str]:
|
|
115
114
|
anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
|
|
116
115
|
ret_name = f'{anchor_name}.{self._json_path()}'
|
|
@@ -23,26 +23,12 @@ class SimilarityExpr(Expr):
|
|
|
23
23
|
|
|
24
24
|
self.components = [col_ref, item_expr]
|
|
25
25
|
|
|
26
|
-
# determine index to use
|
|
27
|
-
idx_info = col_ref.col.get_idx_info()
|
|
28
26
|
from pixeltable import index
|
|
29
27
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
raise excs.Error(f'No index found for column {col_ref.col!r}')
|
|
35
|
-
if idx_name is not None and idx_name not in embedding_idx_info:
|
|
36
|
-
raise excs.Error(f'Index {idx_name!r} not found for column {col_ref.col.name!r}')
|
|
37
|
-
if len(embedding_idx_info) > 1:
|
|
38
|
-
if idx_name is None:
|
|
39
|
-
raise excs.Error(
|
|
40
|
-
f'Column {col_ref.col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
41
|
-
f'`{col_ref.col.name}.similarity(..., idx=<name>)`'
|
|
42
|
-
)
|
|
43
|
-
self.idx_info = embedding_idx_info[idx_name]
|
|
44
|
-
else:
|
|
45
|
-
self.idx_info = next(iter(embedding_idx_info.values()))
|
|
28
|
+
# determine index to use
|
|
29
|
+
idx_dict = ColumnRef.find_embedding_index(col_ref.col, idx_name, 'similarity')
|
|
30
|
+
assert len(idx_dict) == 1
|
|
31
|
+
self.idx_info = next(iter(idx_dict.values()))
|
|
46
32
|
idx = self.idx_info.idx
|
|
47
33
|
assert isinstance(idx, index.EmbeddingIndex)
|
|
48
34
|
|
|
@@ -86,8 +72,7 @@ class SimilarityExpr(Expr):
|
|
|
86
72
|
return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
|
|
87
73
|
|
|
88
74
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
89
|
-
|
|
90
|
-
raise AssertionError()
|
|
75
|
+
raise excs.Error('similarity(): cannot be used in a computed column')
|
|
91
76
|
|
|
92
77
|
def _as_dict(self) -> dict:
|
|
93
78
|
return {'idx_name': self.idx_info.name, **super()._as_dict()}
|