pixeltable 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (52) hide show
  1. pixeltable/__init__.py +1 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +509 -103
  4. pixeltable/catalog/column.py +5 -0
  5. pixeltable/catalog/dir.py +15 -6
  6. pixeltable/catalog/globals.py +16 -0
  7. pixeltable/catalog/insertable_table.py +82 -41
  8. pixeltable/catalog/path.py +15 -0
  9. pixeltable/catalog/schema_object.py +7 -12
  10. pixeltable/catalog/table.py +81 -67
  11. pixeltable/catalog/table_version.py +23 -7
  12. pixeltable/catalog/view.py +9 -6
  13. pixeltable/env.py +15 -9
  14. pixeltable/exec/exec_node.py +1 -1
  15. pixeltable/exprs/__init__.py +2 -1
  16. pixeltable/exprs/arithmetic_expr.py +2 -0
  17. pixeltable/exprs/column_ref.py +38 -2
  18. pixeltable/exprs/expr.py +61 -12
  19. pixeltable/exprs/function_call.py +1 -4
  20. pixeltable/exprs/globals.py +12 -0
  21. pixeltable/exprs/json_mapper.py +4 -4
  22. pixeltable/exprs/json_path.py +10 -11
  23. pixeltable/exprs/similarity_expr.py +5 -20
  24. pixeltable/exprs/string_op.py +107 -0
  25. pixeltable/ext/functions/yolox.py +21 -64
  26. pixeltable/func/callable_function.py +5 -2
  27. pixeltable/func/query_template_function.py +6 -18
  28. pixeltable/func/tools.py +2 -2
  29. pixeltable/functions/__init__.py +1 -1
  30. pixeltable/functions/globals.py +16 -5
  31. pixeltable/globals.py +172 -262
  32. pixeltable/io/__init__.py +3 -2
  33. pixeltable/io/datarows.py +138 -0
  34. pixeltable/io/external_store.py +8 -5
  35. pixeltable/io/globals.py +7 -160
  36. pixeltable/io/hf_datasets.py +21 -98
  37. pixeltable/io/pandas.py +29 -43
  38. pixeltable/io/parquet.py +17 -42
  39. pixeltable/io/table_data_conduit.py +569 -0
  40. pixeltable/io/utils.py +6 -21
  41. pixeltable/metadata/__init__.py +1 -1
  42. pixeltable/metadata/converters/convert_30.py +50 -0
  43. pixeltable/metadata/converters/util.py +26 -1
  44. pixeltable/metadata/notes.py +1 -0
  45. pixeltable/metadata/schema.py +3 -0
  46. pixeltable/utils/arrow.py +32 -7
  47. pixeltable/utils/coroutine.py +41 -0
  48. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/METADATA +1 -1
  49. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/RECORD +52 -47
  50. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/WHEEL +1 -1
  51. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/LICENSE +0 -0
  52. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/entry_points.txt +0 -0
@@ -177,10 +177,6 @@ class TableVersion:
177
177
  # Init external stores (this needs to happen after the schema is created)
178
178
  self._init_external_stores(tbl_md)
179
179
 
180
- # Force column metadata to load, in order to surface any invalid metadata now (as warnings)
181
- for col in self.cols_by_id.values():
182
- _ = col.value_expr
183
-
184
180
  def __hash__(self) -> int:
185
181
  return hash(self.id)
186
182
 
@@ -229,7 +225,9 @@ class TableVersion:
229
225
  # create schema.Table
230
226
  # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
231
227
  column_md = cls._create_column_md(cols)
228
+ tbl_id = uuid.uuid4()
232
229
  table_md = schema.TableMd(
230
+ tbl_id=str(tbl_id),
233
231
  name=name,
234
232
  user=None,
235
233
  current_version=0,
@@ -245,11 +243,12 @@ class TableVersion:
245
243
  )
246
244
  # create a schema.Table here, we need it to call our c'tor;
247
245
  # don't add it to the session yet, we might add index metadata
248
- tbl_id = uuid.uuid4()
249
246
  tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
250
247
 
251
248
  # create schema.TableVersion
252
- table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0, additional_md={})
249
+ table_version_md = schema.TableVersionMd(
250
+ tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
251
+ )
253
252
  tbl_version_record = schema.TableVersion(
254
253
  tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
255
254
  )
@@ -265,6 +264,7 @@ class TableVersion:
265
264
  schema_col_md[col.id] = md
266
265
 
267
266
  schema_version_md = schema.TableSchemaVersionMd(
267
+ tbl_id=str(tbl_record.id),
268
268
  schema_version=0,
269
269
  preceding_schema_version=None,
270
270
  columns=schema_col_md,
@@ -458,6 +458,11 @@ class TableVersion:
458
458
  )
459
459
  )
460
460
 
461
+ def ensure_md_loaded(self) -> None:
462
+ """Ensure that table metadata is loaded."""
463
+ for col in self.cols_by_id.values():
464
+ _ = col.value_expr
465
+
461
466
  def _store_idx_name(self, idx_id: int) -> str:
462
467
  """Return name of index in the store, which needs to be globally unique"""
463
468
  return f'idx_{self.id.hex}_{idx_id}'
@@ -1239,6 +1244,11 @@ class TableVersion:
1239
1244
  """Return all non-system columns"""
1240
1245
  return [c for c in self.cols if c.is_pk]
1241
1246
 
1247
+ @property
1248
+ def primary_key(self) -> list[str]:
1249
+ """Return the names of the primary key columns"""
1250
+ return [c.name for c in self.cols if c.is_pk]
1251
+
1242
1252
  def get_required_col_names(self) -> list[str]:
1243
1253
  """Return the names of all columns for which values must be specified in insert()"""
1244
1254
  assert not self.is_view
@@ -1305,6 +1315,7 @@ class TableVersion:
1305
1315
 
1306
1316
  def _create_tbl_md(self) -> schema.TableMd:
1307
1317
  return schema.TableMd(
1318
+ tbl_id=str(self.id),
1308
1319
  name=self.name,
1309
1320
  user=None,
1310
1321
  current_version=self.version,
@@ -1321,7 +1332,11 @@ class TableVersion:
1321
1332
 
1322
1333
  def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1323
1334
  return schema.TableVersionMd(
1324
- created_at=timestamp, version=self.version, schema_version=self.schema_version, additional_md={}
1335
+ tbl_id=str(self.id),
1336
+ created_at=timestamp,
1337
+ version=self.version,
1338
+ schema_version=self.schema_version,
1339
+ additional_md={},
1325
1340
  )
1326
1341
 
1327
1342
  def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
@@ -1334,6 +1349,7 @@ class TableVersion:
1334
1349
  )
1335
1350
  # preceding_schema_version to be set by the caller
1336
1351
  return schema.TableSchemaVersionMd(
1352
+ tbl_id=str(self.id),
1337
1353
  schema_version=self.schema_version,
1338
1354
  preceding_schema_version=preceding_schema_version,
1339
1355
  columns=column_md,
@@ -237,15 +237,11 @@ class View(Table):
237
237
  )
238
238
 
239
239
  def _drop(self) -> None:
240
- cat = catalog.Catalog.get()
241
240
  if self._snapshot_only:
242
241
  # there is not TableVersion to drop
243
242
  self._check_is_dropped()
244
243
  self.is_dropped = True
245
244
  TableVersion.delete_md(self._id)
246
- # update catalog
247
- cat = catalog.Catalog.get()
248
- cat.remove_tbl(self._id)
249
245
  else:
250
246
  super()._drop()
251
247
 
@@ -255,13 +251,20 @@ class View(Table):
255
251
  md['is_snapshot'] = self._tbl_version_path.is_snapshot()
256
252
  return md
257
253
 
254
+ if TYPE_CHECKING:
255
+ import datasets # type: ignore[import-untyped]
256
+
257
+ from pixeltable.globals import RowData, TableDataSource
258
+
258
259
  def insert(
259
260
  self,
260
- rows: Optional[Iterable[dict[str, Any]]] = None,
261
+ source: Optional[TableDataSource] = None,
261
262
  /,
262
263
  *,
263
- print_stats: bool = False,
264
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
265
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
264
266
  on_error: Literal['abort', 'ignore'] = 'abort',
267
+ print_stats: bool = False,
265
268
  **kwargs: Any,
266
269
  ) -> UpdateStatus:
267
270
  raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
pixeltable/env.py CHANGED
@@ -170,19 +170,25 @@ class Env:
170
170
  assert self._current_session is not None
171
171
  return self._current_session
172
172
 
173
+ def in_xact(self) -> bool:
174
+ return self._current_conn is not None
175
+
173
176
  @contextmanager
174
177
  def begin_xact(self) -> Iterator[sql.Connection]:
175
178
  """Return a context manager that yields a connection to the database. Idempotent."""
176
179
  if self._current_conn is None:
177
180
  assert self._current_session is None
178
- with self.engine.begin() as conn, sql.orm.Session(conn) as session:
179
- self._current_conn = conn
180
- self._current_session = session
181
- try:
181
+ try:
182
+ with self.engine.begin() as conn, sql.orm.Session(conn) as session:
183
+ # TODO: remove print() once we're done with debugging the concurrent update behavior
184
+ # print(f'{datetime.datetime.now()}: start xact')
185
+ self._current_conn = conn
186
+ self._current_session = session
182
187
  yield conn
183
- finally:
184
- self._current_session = None
185
- self._current_conn = None
188
+ finally:
189
+ self._current_session = None
190
+ self._current_conn = None
191
+ # print(f'{datetime.datetime.now()}: end xact')
186
192
  else:
187
193
  assert self._current_session is not None
188
194
  yield self._current_conn
@@ -391,7 +397,7 @@ class Env:
391
397
  def _create_engine(self, time_zone_name: Optional[str], echo: bool = False) -> None:
392
398
  connect_args = {} if time_zone_name is None else {'options': f'-c timezone={time_zone_name}'}
393
399
  self._sa_engine = sql.create_engine(
394
- self.db_url, echo=echo, future=True, isolation_level='REPEATABLE READ', connect_args=connect_args
400
+ self.db_url, echo=echo, isolation_level='REPEATABLE READ', connect_args=connect_args
395
401
  )
396
402
  self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
397
403
  with self.engine.begin() as conn:
@@ -561,7 +567,7 @@ class Env:
561
567
  self.__register_package('transformers')
562
568
  self.__register_package('whisper', library_name='openai-whisper')
563
569
  self.__register_package('whisperx')
564
- self.__register_package('yolox', library_name='git+https://github.com/Megvii-BaseDetection/YOLOX@ac58e0a')
570
+ self.__register_package('yolox', library_name='pixeltable-yolox')
565
571
 
566
572
  def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
567
573
  is_installed: bool
@@ -75,7 +75,7 @@ class ExecNode(abc.ABC):
75
75
  loop = asyncio.new_event_loop()
76
76
  asyncio.set_event_loop(loop)
77
77
 
78
- if 'pytest' in sys.modules:
78
+ if _logger.isEnabledFor(logging.DEBUG):
79
79
  loop.set_debug(True)
80
80
 
81
81
  aiter = self.__aiter__()
@@ -16,7 +16,7 @@ from .in_predicate import InPredicate
16
16
  from .inline_expr import InlineArray, InlineDict, InlineList
17
17
  from .is_null import IsNull
18
18
  from .json_mapper import JsonMapper
19
- from .json_path import RELATIVE_PATH_ROOT, JsonPath
19
+ from .json_path import JsonPath
20
20
  from .literal import Literal
21
21
  from .method_ref import MethodRef
22
22
  from .object_ref import ObjectRef
@@ -24,5 +24,6 @@ from .row_builder import ColumnSlotIdx, ExecProfile, RowBuilder
24
24
  from .rowid_ref import RowidRef
25
25
  from .similarity_expr import SimilarityExpr
26
26
  from .sql_element_cache import SqlElementCache
27
+ from .string_op import StringOp
27
28
  from .type_cast import TypeCast
28
29
  from .variable import Variable
@@ -19,6 +19,8 @@ class ArithmeticExpr(Expr):
19
19
  Allows arithmetic exprs on json paths
20
20
  """
21
21
 
22
+ operator: ArithmeticOperator
23
+
22
24
  def __init__(self, operator: ArithmeticOperator, op1: Expr, op2: Expr):
23
25
  if op1.col_type.is_json_type() or op2.col_type.is_json_type() or operator == ArithmeticOperator.DIV:
24
26
  # we assume it's a float
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import copy
3
4
  from typing import Any, Optional, Sequence
4
5
  from uuid import UUID
5
6
 
@@ -125,11 +126,46 @@ class ColumnRef(Expr):
125
126
 
126
127
  return super().__getattr__(name)
127
128
 
129
+ @classmethod
130
+ def find_embedding_index(
131
+ cls, col: catalog.Column, idx_name: Optional[str], method_name: str
132
+ ) -> dict[str, catalog.TableVersion.IndexInfo]:
133
+ """Return IndexInfo for a column, with an optional given name"""
134
+ # determine index to use
135
+ idx_info_dict = col.get_idx_info()
136
+ from pixeltable import index
137
+
138
+ embedding_idx_info = {
139
+ info: value for info, value in idx_info_dict.items() if isinstance(value.idx, index.EmbeddingIndex)
140
+ }
141
+ if len(embedding_idx_info) == 0:
142
+ raise excs.Error(f'No indices found for {method_name!r} on column {col.name!r}')
143
+ if idx_name is not None and idx_name not in embedding_idx_info:
144
+ raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {col.name!r}')
145
+ if len(embedding_idx_info) > 1:
146
+ if idx_name is None:
147
+ raise excs.Error(
148
+ f'Column {col.name!r} has multiple indices; use the index name to disambiguate: '
149
+ f'`{method_name}(..., idx=<index_name>)`'
150
+ )
151
+ idx_info = {idx_name: embedding_idx_info[idx_name]}
152
+ else:
153
+ idx_info = embedding_idx_info
154
+ return idx_info
155
+
128
156
  def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
129
157
  from .similarity_expr import SimilarityExpr
130
158
 
131
159
  return SimilarityExpr(self, item, idx_name=idx)
132
160
 
161
+ def embedding(self, *, idx: Optional[str] = None) -> ColumnRef:
162
+ idx_info = ColumnRef.find_embedding_index(self.col, idx, 'embedding')
163
+ assert len(idx_info) == 1
164
+ col = copy.copy(next(iter(idx_info.values())).val_col)
165
+ col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
166
+ col.create_sa_cols()
167
+ return ColumnRef(col)
168
+
133
169
  def default_column_name(self) -> Optional[str]:
134
170
  return str(self)
135
171
 
@@ -137,7 +173,7 @@ class ColumnRef(Expr):
137
173
  return self.col == other.col and self.perform_validation == other.perform_validation
138
174
 
139
175
  def _df(self) -> 'pxt.dataframe.DataFrame':
140
- tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
176
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
141
177
  return tbl.select(self)
142
178
 
143
179
  def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
@@ -165,7 +201,7 @@ class ColumnRef(Expr):
165
201
  return self._descriptors().to_html()
166
202
 
167
203
  def _descriptors(self) -> DescriptionHelper:
168
- tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
204
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
169
205
  helper = DescriptionHelper()
170
206
  helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
171
207
  helper.append(tbl._col_descriptor([self.col.name]))
pixeltable/exprs/expr.py CHANGED
@@ -17,7 +17,7 @@ from typing_extensions import Self, _AnnotatedAlias
17
17
  from pixeltable import catalog, exceptions as excs, func, type_system as ts
18
18
 
19
19
  from .data_row import DataRow
20
- from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
20
+ from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator, StringOperator
21
21
 
22
22
  if TYPE_CHECKING:
23
23
  from pixeltable import exprs
@@ -90,14 +90,29 @@ class Expr(abc.ABC):
90
90
  result = c_scope
91
91
  return result
92
92
 
93
- def bind_rel_paths(self, mapper: Optional['exprs.JsonMapper'] = None) -> None:
93
+ def bind_rel_paths(self) -> None:
94
94
  """
95
95
  Binds relative JsonPaths to mapper.
96
96
  This needs to be done in a separate phase after __init__(), because RelativeJsonPath()(-1) cannot be resolved
97
97
  by the immediately containing JsonMapper during initialization.
98
98
  """
99
+ self._bind_rel_paths()
100
+ assert not self._has_relative_path, self._expr_tree()
101
+
102
+ def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapper'] = None) -> None:
103
+ for c in self.components:
104
+ c._bind_rel_paths(mapper)
105
+
106
+ def _expr_tree(self) -> str:
107
+ """Returns a string representation of this expression as a multi-line tree. Useful for debugging."""
108
+ buf: list[str] = []
109
+ self._expr_tree_r(0, buf)
110
+ return '\n'.join(buf)
111
+
112
+ def _expr_tree_r(self, indent: int, buf: list[str]) -> None:
113
+ buf.append(f'{" " * indent}{type(self).__name__}: {self}'.replace('\n', '\\n'))
99
114
  for c in self.components:
100
- c.bind_rel_paths(mapper)
115
+ c._expr_tree_r(indent + 2, buf)
101
116
 
102
117
  def default_column_name(self) -> Optional[str]:
103
118
  """
@@ -355,6 +370,10 @@ class Expr(abc.ABC):
355
370
  except StopIteration:
356
371
  return False
357
372
 
373
+ @property
374
+ def _has_relative_path(self) -> bool:
375
+ return any(c._has_relative_path for c in self.components)
376
+
358
377
  def tbl_ids(self) -> set[UUID]:
359
378
  """Returns table ids referenced by this expr."""
360
379
  from .column_ref import ColumnRef
@@ -514,7 +533,7 @@ class Expr(abc.ABC):
514
533
 
515
534
  @classmethod
516
535
  def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
517
- raise AssertionError('not implemented')
536
+ raise AssertionError(f'not implemented: {cls.__name__}')
518
537
 
519
538
  def isin(self, value_set: Any) -> 'exprs.InPredicate':
520
539
  from .in_predicate import InPredicate
@@ -586,10 +605,6 @@ class Expr(abc.ABC):
586
605
  # Return the `MethodRef` object itself; it requires arguments to become a `FunctionCall`
587
606
  return method_ref
588
607
 
589
- def __rshift__(self, other: object) -> 'exprs.Expr':
590
- # Implemented here for type-checking purposes
591
- raise excs.Error('The `>>` operator can only be applied to Json expressions')
592
-
593
608
  def __bool__(self) -> bool:
594
609
  raise TypeError(
595
610
  f'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)\n{self!r}'
@@ -639,13 +654,17 @@ class Expr(abc.ABC):
639
654
  def __neg__(self) -> 'exprs.ArithmeticExpr':
640
655
  return self._make_arithmetic_expr(ArithmeticOperator.MUL, -1)
641
656
 
642
- def __add__(self, other: object) -> 'exprs.ArithmeticExpr':
657
+ def __add__(self, other: object) -> Union[exprs.ArithmeticExpr, exprs.StringOp]:
658
+ if isinstance(self, str) or (isinstance(self, Expr) and self.col_type.is_string_type()):
659
+ return self._make_string_expr(StringOperator.CONCAT, other)
643
660
  return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
644
661
 
645
662
  def __sub__(self, other: object) -> 'exprs.ArithmeticExpr':
646
663
  return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
647
664
 
648
- def __mul__(self, other: object) -> 'exprs.ArithmeticExpr':
665
+ def __mul__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
666
+ if isinstance(self, str) or (isinstance(self, Expr) and self.col_type.is_string_type()):
667
+ return self._make_string_expr(StringOperator.REPEAT, other)
649
668
  return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
650
669
 
651
670
  def __truediv__(self, other: object) -> 'exprs.ArithmeticExpr':
@@ -657,13 +676,17 @@ class Expr(abc.ABC):
657
676
  def __floordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
658
677
  return self._make_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
659
678
 
660
- def __radd__(self, other: object) -> 'exprs.ArithmeticExpr':
679
+ def __radd__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
680
+ if isinstance(other, str) or (isinstance(other, Expr) and other.col_type.is_string_type()):
681
+ return self._rmake_string_expr(StringOperator.CONCAT, other)
661
682
  return self._rmake_arithmetic_expr(ArithmeticOperator.ADD, other)
662
683
 
663
684
  def __rsub__(self, other: object) -> 'exprs.ArithmeticExpr':
664
685
  return self._rmake_arithmetic_expr(ArithmeticOperator.SUB, other)
665
686
 
666
- def __rmul__(self, other: object) -> 'exprs.ArithmeticExpr':
687
+ def __rmul__(self, other: object) -> Union['exprs.ArithmeticExpr', 'exprs.StringOp']:
688
+ if isinstance(other, str) or (isinstance(other, Expr) and other.col_type.is_string_type()):
689
+ return self._rmake_string_expr(StringOperator.REPEAT, other)
667
690
  return self._rmake_arithmetic_expr(ArithmeticOperator.MUL, other)
668
691
 
669
692
  def __rtruediv__(self, other: object) -> 'exprs.ArithmeticExpr':
@@ -675,6 +698,32 @@ class Expr(abc.ABC):
675
698
  def __rfloordiv__(self, other: object) -> 'exprs.ArithmeticExpr':
676
699
  return self._rmake_arithmetic_expr(ArithmeticOperator.FLOORDIV, other)
677
700
 
701
+ def _make_string_expr(self, op: StringOperator, other: object) -> 'exprs.StringOp':
702
+ """
703
+ Make left-handed version of string expression.
704
+ """
705
+ from .literal import Literal
706
+ from .string_op import StringOp
707
+
708
+ if isinstance(other, Expr):
709
+ return StringOp(op, self, other)
710
+ if isinstance(other, typing.get_args(LiteralPythonTypes)):
711
+ return StringOp(op, self, Literal(other))
712
+ raise TypeError(f'Other must be Expr or literal: {type(other)}')
713
+
714
+ def _rmake_string_expr(self, op: StringOperator, other: object) -> 'exprs.StringOp':
715
+ """
716
+ Right-handed version of _make_string_expr. other must be a literal; if it were an Expr,
717
+ the operation would have already been evaluated in its left-handed form.
718
+ """
719
+ from .literal import Literal
720
+ from .string_op import StringOp
721
+
722
+ assert not isinstance(other, Expr) # Else the left-handed form would have evaluated first
723
+ if isinstance(other, typing.get_args(LiteralPythonTypes)):
724
+ return StringOp(op, Literal(other), self)
725
+ raise TypeError(f'Other must be Expr or literal: {type(other)}')
726
+
678
727
  def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'exprs.ArithmeticExpr':
679
728
  """
680
729
  other: Union[Expr, LiteralPythonTypes]
@@ -360,10 +360,7 @@ class FunctionCall(Expr):
360
360
  return
361
361
  args, kwargs = args_kwargs
362
362
 
363
- if isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
364
- # optimization: avoid additional level of indirection we'd get from calling Function.exec()
365
- data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
366
- elif self.is_window_fn_call:
363
+ if self.is_window_fn_call:
367
364
  assert isinstance(self.fn, func.AggregateFunction)
368
365
  agg_cls = self.fn.agg_class
369
366
  if self.has_group_by():
@@ -87,3 +87,15 @@ class ArithmeticOperator(enum.Enum):
87
87
  if self == self.FLOORDIV:
88
88
  return '//'
89
89
  raise AssertionError()
90
+
91
+
92
+ class StringOperator(enum.Enum):
93
+ CONCAT = 0
94
+ REPEAT = 1
95
+
96
+ def __str__(self) -> str:
97
+ if self == self.CONCAT:
98
+ return '+'
99
+ if self == self.REPEAT:
100
+ return '*'
101
+ raise AssertionError()
@@ -48,9 +48,9 @@ class JsonMapper(Expr):
48
48
  scope_anchor = ObjectRef(self.target_expr_scope, self)
49
49
  self.components.append(scope_anchor)
50
50
 
51
- def bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
52
- self._src_expr.bind_rel_paths(mapper)
53
- self._target_expr.bind_rel_paths(self)
51
+ def _bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
52
+ self._src_expr._bind_rel_paths(mapper)
53
+ self._target_expr._bind_rel_paths(self)
54
54
  self.parent_mapper = mapper
55
55
  parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
56
56
  self.target_expr_scope.parent = parent_scope
@@ -86,7 +86,7 @@ class JsonMapper(Expr):
86
86
  return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
87
87
 
88
88
  def __repr__(self) -> str:
89
- return f'{self._src_expr} >> {self._target_expr}'
89
+ return f'map({self._src_expr}, lambda R: {self._target_expr})'
90
90
 
91
91
  @property
92
92
  def _src_expr(self) -> Expr:
@@ -80,11 +80,16 @@ class JsonPath(Expr):
80
80
  def is_relative_path(self) -> bool:
81
81
  return self._anchor is None
82
82
 
83
- def bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
84
- if not self.is_relative_path():
85
- return
86
- # TODO: take scope_idx into account
87
- self.set_anchor(mapper.scope_anchor)
83
+ @property
84
+ def _has_relative_path(self) -> bool:
85
+ return self.is_relative_path() or super()._has_relative_path
86
+
87
+ def _bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
88
+ if self.is_relative_path():
89
+ # TODO: take scope_idx into account
90
+ self.set_anchor(mapper.scope_anchor)
91
+ else:
92
+ self._anchor._bind_rel_paths(mapper)
88
93
 
89
94
  def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
90
95
  """
@@ -105,12 +110,6 @@ class JsonPath(Expr):
105
110
  return JsonPath(self._anchor, [*self.path_elements, index])
106
111
  raise excs.Error(f'Invalid json list index: {index}')
107
112
 
108
- def __rshift__(self, other: object) -> 'JsonMapper':
109
- rhs_expr = Expr.from_object(other)
110
- if rhs_expr is None:
111
- raise excs.Error(f'>> requires an expression on the right-hand side, found {type(other)}')
112
- return JsonMapper(self, rhs_expr)
113
-
114
113
  def default_column_name(self) -> Optional[str]:
115
114
  anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
116
115
  ret_name = f'{anchor_name}.{self._json_path()}'
@@ -23,26 +23,12 @@ class SimilarityExpr(Expr):
23
23
 
24
24
  self.components = [col_ref, item_expr]
25
25
 
26
- # determine index to use
27
- idx_info = col_ref.col.get_idx_info()
28
26
  from pixeltable import index
29
27
 
30
- embedding_idx_info = {
31
- info.name: info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)
32
- }
33
- if len(embedding_idx_info) == 0:
34
- raise excs.Error(f'No index found for column {col_ref.col!r}')
35
- if idx_name is not None and idx_name not in embedding_idx_info:
36
- raise excs.Error(f'Index {idx_name!r} not found for column {col_ref.col.name!r}')
37
- if len(embedding_idx_info) > 1:
38
- if idx_name is None:
39
- raise excs.Error(
40
- f'Column {col_ref.col.name!r} has multiple indices; use the index name to disambiguate: '
41
- f'`{col_ref.col.name}.similarity(..., idx=<name>)`'
42
- )
43
- self.idx_info = embedding_idx_info[idx_name]
44
- else:
45
- self.idx_info = next(iter(embedding_idx_info.values()))
28
+ # determine index to use
29
+ idx_dict = ColumnRef.find_embedding_index(col_ref.col, idx_name, 'similarity')
30
+ assert len(idx_dict) == 1
31
+ self.idx_info = next(iter(idx_dict.values()))
46
32
  idx = self.idx_info.idx
47
33
  assert isinstance(idx, index.EmbeddingIndex)
48
34
 
@@ -86,8 +72,7 @@ class SimilarityExpr(Expr):
86
72
  return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
87
73
 
88
74
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
89
- # this should never get called
90
- raise AssertionError()
75
+ raise excs.Error('similarity(): cannot be used in a computed column')
91
76
 
92
77
  def _as_dict(self) -> dict:
93
78
  return {'idx_name': self.idx_info.name, **super()._as_dict()}