pixeltable 0.4.15__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (57) hide show
  1. pixeltable/__init__.py +4 -0
  2. pixeltable/catalog/catalog.py +105 -51
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +4 -0
  6. pixeltable/catalog/table_version.py +99 -78
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/config.py +6 -0
  9. pixeltable/dataframe.py +10 -5
  10. pixeltable/env.py +48 -19
  11. pixeltable/exec/__init__.py +2 -0
  12. pixeltable/exec/cell_materialization_node.py +231 -0
  13. pixeltable/exec/cell_reconstruction_node.py +135 -0
  14. pixeltable/exec/exec_node.py +1 -1
  15. pixeltable/exec/expr_eval/evaluators.py +1 -0
  16. pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
  17. pixeltable/exec/expr_eval/globals.py +2 -0
  18. pixeltable/exec/globals.py +32 -0
  19. pixeltable/exec/object_store_save_node.py +1 -4
  20. pixeltable/exec/row_update_node.py +16 -9
  21. pixeltable/exec/sql_node.py +107 -14
  22. pixeltable/exprs/__init__.py +1 -1
  23. pixeltable/exprs/arithmetic_expr.py +10 -11
  24. pixeltable/exprs/column_property_ref.py +10 -10
  25. pixeltable/exprs/column_ref.py +2 -2
  26. pixeltable/exprs/data_row.py +106 -37
  27. pixeltable/exprs/expr.py +9 -0
  28. pixeltable/exprs/expr_set.py +14 -7
  29. pixeltable/exprs/inline_expr.py +2 -19
  30. pixeltable/exprs/json_path.py +45 -12
  31. pixeltable/exprs/row_builder.py +54 -22
  32. pixeltable/functions/__init__.py +1 -0
  33. pixeltable/functions/bedrock.py +7 -0
  34. pixeltable/functions/deepseek.py +11 -4
  35. pixeltable/functions/llama_cpp.py +7 -0
  36. pixeltable/functions/math.py +1 -1
  37. pixeltable/functions/ollama.py +7 -0
  38. pixeltable/functions/openai.py +4 -4
  39. pixeltable/functions/openrouter.py +143 -0
  40. pixeltable/globals.py +10 -4
  41. pixeltable/io/globals.py +16 -15
  42. pixeltable/io/table_data_conduit.py +46 -21
  43. pixeltable/metadata/__init__.py +1 -1
  44. pixeltable/metadata/converters/convert_40.py +73 -0
  45. pixeltable/metadata/notes.py +1 -0
  46. pixeltable/plan.py +175 -46
  47. pixeltable/store.py +1 -1
  48. pixeltable/type_system.py +5 -3
  49. pixeltable/utils/console_output.py +4 -1
  50. pixeltable/utils/exception_handler.py +5 -28
  51. pixeltable/utils/image.py +7 -0
  52. pixeltable/utils/misc.py +5 -0
  53. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
  54. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/RECORD +57 -50
  55. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
  56. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
  57. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
@@ -11,6 +11,7 @@ from uuid import UUID
11
11
 
12
12
  import jsonschema.exceptions
13
13
  import sqlalchemy as sql
14
+ from sqlalchemy import exc as sql_exc
14
15
 
15
16
  import pixeltable as pxt
16
17
  import pixeltable.exceptions as excs
@@ -21,20 +22,16 @@ from pixeltable.metadata import schema
21
22
  from pixeltable.utils.filecache import FileCache
22
23
  from pixeltable.utils.object_stores import ObjectOps
23
24
 
24
- from .tbl_ops import TableOp
25
-
26
- if TYPE_CHECKING:
27
- from pixeltable.plan import SampleClause
28
-
29
25
  from ..func.globals import resolve_symbol
30
26
  from .column import Column
31
27
  from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
28
+ from .tbl_ops import TableOp
32
29
  from .update_status import RowCountStats, UpdateStatus
33
30
 
34
31
  if TYPE_CHECKING:
35
32
  from pixeltable import exec, store
36
-
37
- from .table_version_handle import TableVersionHandle
33
+ from pixeltable.catalog.table_version_handle import TableVersionHandle
34
+ from pixeltable.plan import SampleClause
38
35
 
39
36
  _logger = logging.getLogger('pixeltable')
40
37
 
@@ -294,7 +291,14 @@ class TableVersion:
294
291
  cat = pxt.catalog.Catalog.get()
295
292
 
296
293
  tbl_id = UUID(hex=inital_md.tbl_md.tbl_id)
294
+ assert (tbl_id, None) not in cat._tbl_versions
297
295
  tbl_version = cls(tbl_id, inital_md.tbl_md, inital_md.version_md, None, inital_md.schema_version_md, [])
296
+
297
+ @cat.register_undo_action
298
+ def _() -> None:
299
+ if (tbl_id, None) in cat._tbl_versions:
300
+ del cat._tbl_versions[tbl_id, None]
301
+
298
302
  # TODO: break this up, so that Catalog.create_table() registers tbl_version
299
303
  cat._tbl_versions[tbl_id, None] = tbl_version
300
304
  tbl_version.init()
@@ -507,9 +511,7 @@ class TableVersion:
507
511
 
508
512
  def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
509
513
  # we're creating a new schema version
510
- self.version += 1
511
- self.created_at = time.time()
512
- self.schema_version = self.version
514
+ self.bump_version(bump_schema_version=True)
513
515
  status = self._add_index(col, idx_name, idx)
514
516
  self._write_md(new_version=True, new_schema_version=True)
515
517
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
@@ -620,9 +622,7 @@ class TableVersion:
620
622
  assert idx_id in self._tbl_md.index_md
621
623
 
622
624
  # we're creating a new schema version
623
- self.version += 1
624
- self.created_at = time.time()
625
- self.schema_version = self.version
625
+ self.bump_version(bump_schema_version=True)
626
626
  idx_md = self._tbl_md.index_md[idx_id]
627
627
  idx_md.schema_version_drop = self.schema_version
628
628
  assert idx_md.name in self.idxs_by_name
@@ -651,9 +651,7 @@ class TableVersion:
651
651
  self.next_col_id += 1
652
652
 
653
653
  # we're creating a new schema version
654
- self.version += 1
655
- self.created_at = time.time()
656
- self.schema_version = self.version
654
+ self.bump_version(bump_schema_version=True)
657
655
  index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
658
656
  all_cols: list[Column] = []
659
657
  for col in cols:
@@ -685,7 +683,11 @@ class TableVersion:
685
683
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
686
684
  ) -> UpdateStatus:
687
685
  """Add and populate columns within the current transaction"""
686
+ from pixeltable.catalog import Catalog
687
+ from pixeltable.plan import Planner
688
+
688
689
  cols_to_add = list(cols)
690
+
689
691
  row_count = self.store_tbl.count()
690
692
  for col in cols_to_add:
691
693
  assert col.tbl is self
@@ -722,17 +724,19 @@ class TableVersion:
722
724
  continue
723
725
 
724
726
  # populate the column
725
- from pixeltable.plan import Planner
726
-
727
727
  plan = Planner.create_add_column_plan(self.path, col)
728
728
  plan.ctx.num_rows = row_count
729
729
  try:
730
730
  plan.open()
731
731
  try:
732
732
  excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
733
- except sql.exc.DBAPIError as exc:
734
- # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
735
- raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
733
+ except sql_exc.DBAPIError as exc:
734
+ Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
735
+ # If it wasn't converted, re-raise as a generic Pixeltable error
736
+ # (this means it's not a known concurrency error; it's something else)
737
+ raise excs.Error(
738
+ f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
739
+ ) from exc
736
740
  if excs_per_col > 0:
737
741
  cols_with_excs.append(col)
738
742
  num_excs += excs_per_col
@@ -740,7 +744,7 @@ class TableVersion:
740
744
  finally:
741
745
  plan.close()
742
746
 
743
- pxt.catalog.Catalog.get().record_column_dependencies(self)
747
+ Catalog.get().record_column_dependencies(self)
744
748
 
745
749
  if print_stats:
746
750
  plan.ctx.profile.print(num_rows=row_count)
@@ -760,9 +764,7 @@ class TableVersion:
760
764
  assert self.is_mutable
761
765
 
762
766
  # we're creating a new schema version
763
- self.version += 1
764
- self.created_at = time.time()
765
- self.schema_version = self.version
767
+ self.bump_version(bump_schema_version=True)
766
768
 
767
769
  # drop this column and all dependent index columns and indices
768
770
  dropped_cols = [col]
@@ -826,9 +828,7 @@ class TableVersion:
826
828
  self._schema_version_md.columns[col.id].name = new_name
827
829
 
828
830
  # we're creating a new schema version
829
- self.version += 1
830
- self.created_at = time.time()
831
- self.schema_version = self.version
831
+ self.bump_version(bump_schema_version=True)
832
832
 
833
833
  self._write_md(new_version=True, new_schema_version=True)
834
834
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
@@ -848,9 +848,7 @@ class TableVersion:
848
848
 
849
849
  def _create_schema_version(self) -> None:
850
850
  # we're creating a new schema version
851
- self.version += 1
852
- self.created_at = time.time()
853
- self.schema_version = self.version
851
+ self.bump_version(bump_schema_version=True)
854
852
  self._write_md(new_version=True, new_schema_version=True)
855
853
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
856
854
 
@@ -897,8 +895,7 @@ class TableVersion:
897
895
  ) -> UpdateStatus:
898
896
  """Insert rows produced by exec_plan and propagate to views"""
899
897
  # we're creating a new version
900
- self.version += 1
901
- self.created_at = timestamp
898
+ self.bump_version(timestamp, bump_schema_version=False)
902
899
  cols_with_excs, row_counts = self.store_tbl.insert_rows(
903
900
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
904
901
  )
@@ -933,10 +930,11 @@ class TableVersion:
933
930
  cascade: if True, also update all computed columns that transitively depend on the updated columns,
934
931
  including within views.
935
932
  """
936
- assert self.is_mutable
937
-
933
+ from pixeltable.exprs import SqlElementCache
938
934
  from pixeltable.plan import Planner
939
935
 
936
+ assert self.is_mutable
937
+
940
938
  update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
941
939
  if where is not None:
942
940
  if not isinstance(where, exprs.Expr):
@@ -947,7 +945,6 @@ class TableVersion:
947
945
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
948
946
 
949
947
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
950
- from pixeltable.exprs import SqlElementCache
951
948
 
952
949
  result = self.propagate_update(
953
950
  plan,
@@ -974,11 +971,11 @@ class TableVersion:
974
971
  batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
975
972
  rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
976
973
  """
974
+ from pixeltable.plan import Planner
975
+
977
976
  # if we do lookups of rowids, we must have one for each row in the batch
978
977
  assert len(rowids) == 0 or len(rowids) == len(batch)
979
978
 
980
- from pixeltable.plan import Planner
981
-
982
979
  plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
983
980
  self.path, batch, rowids, cascade=cascade
984
981
  )
@@ -1050,14 +1047,14 @@ class TableVersion:
1050
1047
  def recompute_columns(
1051
1048
  self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
1052
1049
  ) -> UpdateStatus:
1050
+ from pixeltable.exprs import CompoundPredicate, SqlElementCache
1051
+ from pixeltable.plan import Planner
1052
+
1053
1053
  assert self.is_mutable
1054
1054
  assert all(name in self.cols_by_name for name in col_names)
1055
1055
  assert len(col_names) > 0
1056
1056
  assert len(col_names) == 1 or not errors_only
1057
1057
 
1058
- from pixeltable.exprs import CompoundPredicate
1059
- from pixeltable.plan import Planner
1060
-
1061
1058
  target_columns = [self.cols_by_name[name] for name in col_names]
1062
1059
  where_clause: Optional[exprs.Expr] = None
1063
1060
  if where is not None:
@@ -1072,7 +1069,6 @@ class TableVersion:
1072
1069
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1073
1070
  self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1074
1071
  )
1075
- from pixeltable.exprs import SqlElementCache
1076
1072
 
1077
1073
  result = self.propagate_update(
1078
1074
  plan,
@@ -1096,11 +1092,14 @@ class TableVersion:
1096
1092
  cascade: bool,
1097
1093
  show_progress: bool = True,
1098
1094
  ) -> UpdateStatus:
1095
+ from pixeltable.catalog import Catalog
1096
+ from pixeltable.plan import Planner
1097
+
1098
+ Catalog.get().mark_modified_tvs(self.handle)
1099
1099
  result = UpdateStatus()
1100
1100
  create_new_table_version = plan is not None
1101
1101
  if create_new_table_version:
1102
- self.version += 1
1103
- self.created_at = timestamp
1102
+ self.bump_version(timestamp, bump_schema_version=False)
1104
1103
  cols_with_excs, row_counts = self.store_tbl.insert_rows(
1105
1104
  plan, v_min=self.version, show_progress=show_progress
1106
1105
  )
@@ -1119,8 +1118,6 @@ class TableVersion:
1119
1118
  recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
1120
1119
  plan = None
1121
1120
  if len(recomputed_cols) > 0:
1122
- from pixeltable.plan import Planner
1123
-
1124
1121
  plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
1125
1122
  status = view.get().propagate_update(
1126
1123
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
@@ -1155,6 +1152,10 @@ class TableVersion:
1155
1152
  self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1156
1153
  ) -> UpdateStatus:
1157
1154
  """Delete rows in this table and propagate to views"""
1155
+ from pixeltable.catalog import Catalog
1156
+
1157
+ Catalog.get().mark_modified_tvs(self.handle)
1158
+
1158
1159
  # print(f'calling sql_expr()')
1159
1160
  sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
1160
1161
  # #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
@@ -1171,8 +1172,7 @@ class TableVersion:
1171
1172
  result = UpdateStatus(row_count_stats=row_counts)
1172
1173
  if del_rows > 0:
1173
1174
  # we're creating a new version
1174
- self.version += 1
1175
- self.created_at = timestamp
1175
+ self.bump_version(timestamp, bump_schema_version=False)
1176
1176
  for view in self.mutable_views:
1177
1177
  status = view.get().propagate_delete(
1178
1178
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
@@ -1198,6 +1198,8 @@ class TableVersion:
1198
1198
  Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
1199
1199
  and relies on Catalog to reload it
1200
1200
  """
1201
+ from pixeltable.catalog import Catalog
1202
+
1201
1203
  conn = Env.get().conn
1202
1204
  # make sure we don't have a snapshot referencing this version
1203
1205
  # (unclear how to express this with sqlalchemy)
@@ -1217,8 +1219,6 @@ class TableVersion:
1217
1219
  )
1218
1220
  )
1219
1221
 
1220
- # delete newly-added data
1221
- self.delete_media(tbl_version=self.version)
1222
1222
  conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
1223
1223
 
1224
1224
  # revert new deletions
@@ -1233,6 +1233,8 @@ class TableVersion:
1233
1233
  # revert schema changes:
1234
1234
  # - undo changes to self._tbl_md and write that back
1235
1235
  # - delete newly-added TableVersion/TableSchemaVersion records
1236
+ Catalog.get().mark_modified_tvs(self.handle)
1237
+ old_version = self.version
1236
1238
  if self.version == self.schema_version:
1237
1239
  # physically delete newly-added columns and remove them from the stored md
1238
1240
  added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
@@ -1279,7 +1281,8 @@ class TableVersion:
1279
1281
  .where(schema.TableVersion.version == self.version)
1280
1282
  )
1281
1283
 
1282
- self.version -= 1
1284
+ self._tbl_md.current_version = self._version_md.version = self.version - 1
1285
+
1283
1286
  self._write_md(new_version=False, new_schema_version=False)
1284
1287
 
1285
1288
  # propagate to views
@@ -1290,7 +1293,12 @@ class TableVersion:
1290
1293
 
1291
1294
  # force reload on next operation
1292
1295
  self.is_validated = False
1293
- pxt.catalog.Catalog.get().remove_tbl_version(self)
1296
+ Catalog.get().remove_tbl_version(self)
1297
+
1298
+ # delete newly-added data
1299
+ # Do this at the end, after all DB operations have completed.
1300
+ # TODO: The transaction could still fail. Really this should be done via PendingTableOps.
1301
+ self.delete_media(tbl_version=old_version)
1294
1302
  _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1295
1303
 
1296
1304
  def _init_external_stores(self) -> None:
@@ -1301,9 +1309,7 @@ class TableVersion:
1301
1309
  self.external_stores[store.name] = store
1302
1310
 
1303
1311
  def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1304
- self.version += 1
1305
- self.created_at = time.time()
1306
- self.schema_version = self.version
1312
+ self.bump_version(bump_schema_version=True)
1307
1313
 
1308
1314
  self.external_stores[store.name] = store
1309
1315
  self._tbl_md.external_stores.append(
@@ -1313,9 +1319,7 @@ class TableVersion:
1313
1319
 
1314
1320
  def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
1315
1321
  del self.external_stores[store.name]
1316
- self.version += 1
1317
- self.created_at = time.time()
1318
- self.schema_version = self.version
1322
+ self.bump_version(bump_schema_version=True)
1319
1323
  idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
1320
1324
  self._tbl_md.external_stores.pop(idx)
1321
1325
  self._write_md(new_version=True, new_schema_version=True)
@@ -1371,35 +1375,52 @@ class TableVersion:
1371
1375
  # if this is a snapshot instance, we need to ignore current_version
1372
1376
  return self._tbl_md.current_version if self.effective_version is None else self.effective_version
1373
1377
 
1374
- @version.setter
1375
- def version(self, version: int) -> None:
1376
- assert self.effective_version is None
1377
- self._tbl_md.current_version = version
1378
- self._version_md.version = version
1379
-
1380
1378
  @property
1381
1379
  def created_at(self) -> float:
1382
1380
  return self._version_md.created_at
1383
1381
 
1384
- @created_at.setter
1385
- def created_at(self, ts: float) -> None:
1386
- assert self.effective_version is None
1387
- self._version_md.created_at = ts
1388
-
1389
1382
  @property
1390
1383
  def schema_version(self) -> int:
1391
1384
  return self._schema_version_md.schema_version
1392
1385
 
1393
- @schema_version.setter
1394
- def schema_version(self, version: int) -> None:
1386
+ def bump_version(self, timestamp: Optional[float] = None, *, bump_schema_version: bool) -> None:
1387
+ """
1388
+ Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
1389
+ _write_md() must be called separately to persist the changes.
1390
+
1391
+ Args:
1392
+ timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
1393
+ to the same timestamp. If `None`, then defaults to `time.time()`.
1394
+ bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
1395
+ and associated metadata.
1396
+ """
1397
+ from pixeltable.catalog import Catalog
1398
+
1395
1399
  assert self.effective_version is None
1396
- self._tbl_md.current_schema_version = version
1397
- self._version_md.schema_version = version
1398
- self._schema_version_md.preceding_schema_version = self._schema_version_md.schema_version
1399
- self._schema_version_md.schema_version = version
1400
+
1401
+ if timestamp is None:
1402
+ timestamp = time.time()
1403
+
1404
+ Catalog.get().mark_modified_tvs(self.handle)
1405
+
1406
+ old_version = self._tbl_md.current_version
1407
+ assert self._version_md.version == old_version
1408
+ new_version = old_version + 1
1409
+ self._tbl_md.current_version = new_version
1410
+ self._version_md.version = new_version
1411
+ self._version_md.created_at = timestamp
1412
+
1413
+ if bump_schema_version:
1414
+ old_schema_version = self._tbl_md.current_schema_version
1415
+ assert self._version_md.schema_version == old_schema_version
1416
+ assert self._schema_version_md.schema_version == old_schema_version
1417
+ self._tbl_md.current_schema_version = new_version
1418
+ self._version_md.schema_version = new_version
1419
+ self._schema_version_md.preceding_schema_version = old_schema_version
1420
+ self._schema_version_md.schema_version = new_version
1400
1421
 
1401
1422
  @property
1402
- def preceding_schema_version(self) -> int:
1423
+ def preceding_schema_version(self) -> Optional[int]:
1403
1424
  return self._schema_version_md.preceding_schema_version
1404
1425
 
1405
1426
  @property
@@ -1531,8 +1552,8 @@ class TableVersion:
1531
1552
 
1532
1553
  @classmethod
1533
1554
  def from_dict(cls, d: dict) -> TableVersion:
1534
- from pixeltable import catalog
1555
+ from pixeltable.catalog import Catalog
1535
1556
 
1536
1557
  id = UUID(d['id'])
1537
1558
  effective_version = d['effective_version']
1538
- return catalog.Catalog.get().get_tbl_version(id, effective_version)
1559
+ return Catalog.get().get_tbl_version(id, effective_version)
@@ -37,6 +37,9 @@ class TableVersionHandle:
37
37
  def __hash__(self) -> int:
38
38
  return hash((self.id, self.effective_version))
39
39
 
40
+ def __repr__(self) -> str:
41
+ return f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version})'
42
+
40
43
  @property
41
44
  def is_snapshot(self) -> bool:
42
45
  return self.effective_version is not None
@@ -81,7 +84,7 @@ class ColumnHandle:
81
84
  if self.col_id not in self.tbl_version.get().cols_by_id:
82
85
  schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
83
86
  raise excs.Error(
84
- f'Column has been dropped (no record for column ID {self.col_id} in table '
87
+ f'Column was dropped (no record for column ID {self.col_id} in table '
85
88
  f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
86
89
  )
87
90
  return self.tbl_version.get().cols_by_id[self.col_id]
pixeltable/config.py CHANGED
@@ -181,6 +181,12 @@ KNOWN_CONFIG_OPTIONS = {
181
181
  'api_version': 'API version if using Azure OpenAI',
182
182
  'rate_limits': 'Per-model rate limits for OpenAI API requests',
183
183
  },
184
+ 'openrouter': {
185
+ 'api_key': 'OpenRouter API key',
186
+ 'site_url': 'Optional URL for your application (for OpenRouter analytics)',
187
+ 'app_name': 'Optional name for your application (for OpenRouter analytics)',
188
+ 'rate_limit': 'Rate limit for OpenRouter API requests',
189
+ },
184
190
  'replicate': {'api_token': 'Replicate API token'},
185
191
  'together': {
186
192
  'api_key': 'Together API key',
pixeltable/dataframe.py CHANGED
@@ -23,7 +23,7 @@ from typing import (
23
23
 
24
24
  import pandas as pd
25
25
  import pydantic
26
- import sqlalchemy as sql
26
+ import sqlalchemy.exc as sql_exc
27
27
 
28
28
  from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_system as ts
29
29
  from pixeltable.catalog import Catalog, is_valid_identifier
@@ -186,6 +186,8 @@ class DataFrameResultSet:
186
186
 
187
187
 
188
188
  class DataFrame:
189
+ """Represents a query for retrieving and transforming data from Pixeltable tables."""
190
+
189
191
  _from_clause: plan.FromClause
190
192
  _select_list_exprs: list[exprs.Expr]
191
193
  _schema: dict[str, ts.ColumnType]
@@ -539,20 +541,23 @@ class DataFrame:
539
541
  yield [data_row[e.slot_idx] for e in self._select_list_exprs]
540
542
  except excs.ExprEvalError as e:
541
543
  self._raise_expr_eval_err(e)
542
- except sql.exc.DBAPIError as e:
543
- raise excs.Error(f'Error during SQL execution:\n{e}') from e
544
+ except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
545
+ Catalog.get().convert_sql_exc(e, tbl=(single_tbl.tbl_version if single_tbl is not None else None))
546
+ raise # just re-raise if not converted to a Pixeltable error
544
547
 
545
548
  def collect(self) -> DataFrameResultSet:
546
549
  return DataFrameResultSet(list(self._output_row_iterator()), self.schema)
547
550
 
548
551
  async def _acollect(self) -> DataFrameResultSet:
552
+ single_tbl = self._first_tbl if len(self._from_clause.tbls) == 1 else None
549
553
  try:
550
554
  result = [[row[e.slot_idx] for e in self._select_list_exprs] async for row in self._aexec()]
551
555
  return DataFrameResultSet(result, self.schema)
552
556
  except excs.ExprEvalError as e:
553
557
  self._raise_expr_eval_err(e)
554
- except sql.exc.DBAPIError as e:
555
- raise excs.Error(f'Error during SQL execution:\n{e}') from e
558
+ except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
559
+ Catalog.get().convert_sql_exc(e, tbl=(single_tbl.tbl_version if single_tbl is not None else None))
560
+ raise # just re-raise if not converted to a Pixeltable error
556
561
 
557
562
  def count(self) -> int:
558
563
  """Return the number of rows in the DataFrame.
pixeltable/env.py CHANGED
@@ -27,6 +27,7 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
27
27
  import nest_asyncio # type: ignore[import-untyped]
28
28
  import pixeltable_pgserver
29
29
  import sqlalchemy as sql
30
+ import tzlocal
30
31
  from pillow_heif import register_heif_opener # type: ignore[import-untyped]
31
32
  from sqlalchemy import orm
32
33
  from tenacity import retry, stop_after_attempt, wait_exponential_jitter
@@ -71,6 +72,7 @@ class Env:
71
72
  _db_server: Optional[pixeltable_pgserver.PostgresServer] # set only when running in local environment
72
73
  _db_url: Optional[str]
73
74
  _default_time_zone: Optional[ZoneInfo]
75
+ _verbosity: int
74
76
 
75
77
  # info about optional packages that are utilized by some parts of the code
76
78
  __optional_packages: dict[str, PackageInfo]
@@ -218,10 +220,18 @@ class Env:
218
220
  """
219
221
  This is not a publicly visible setter; it is only for testing purposes.
220
222
  """
221
- tz_name = None if tz is None else tz.key
223
+ if tz is None:
224
+ tz_name = self._get_tz_name()
225
+ else:
226
+ assert isinstance(tz, ZoneInfo)
227
+ tz_name = tz.key
222
228
  self.engine.dispose()
223
229
  self._create_engine(time_zone_name=tz_name)
224
230
 
231
+ @property
232
+ def verbosity(self) -> int:
233
+ return self._verbosity
234
+
225
235
  @property
226
236
  def conn(self) -> Optional[sql.Connection]:
227
237
  assert self._current_conn is not None
@@ -237,6 +247,11 @@ class Env:
237
247
  assert self._dbms is not None
238
248
  return self._dbms
239
249
 
250
+ @property
251
+ def is_using_cockroachdb(self) -> bool:
252
+ assert self._dbms is not None
253
+ return isinstance(self._dbms, CockroachDbms)
254
+
240
255
  @property
241
256
  def in_xact(self) -> bool:
242
257
  return self._current_conn is not None
@@ -247,7 +262,7 @@ class Env:
247
262
  return self._db_server is not None
248
263
 
249
264
  @contextmanager
250
- def begin_xact(self, for_write: bool = False) -> Iterator[sql.Connection]:
265
+ def begin_xact(self, *, for_write: bool = False) -> Iterator[sql.Connection]:
251
266
  """
252
267
  Call Catalog.begin_xact() instead, unless there is a specific reason to call this directly.
253
268
 
@@ -350,6 +365,26 @@ class Env:
350
365
  def console_logger(self) -> ConsoleLogger:
351
366
  return self._console_logger
352
367
 
368
+ def _get_tz_name(self) -> str:
369
+ """Get the time zone name from the configuration, or the system local time zone if not specified.
370
+
371
+ Returns:
372
+ str: The time zone name.
373
+ """
374
+ tz_name = Config.get().get_string_value('time_zone')
375
+ if tz_name is not None:
376
+ # Validate tzname
377
+ if not isinstance(tz_name, str):
378
+ self._logger.error('Invalid time zone specified in configuration.')
379
+ else:
380
+ try:
381
+ _ = ZoneInfo(tz_name)
382
+ except ZoneInfoNotFoundError:
383
+ self._logger.error(f'Invalid time zone specified in configuration: {tz_name}')
384
+ else:
385
+ tz_name = tzlocal.get_localzone_name()
386
+ return tz_name
387
+
353
388
  def _set_up(self, echo: bool = False, reinit_db: bool = False) -> None:
354
389
  if self._initialized:
355
390
  return
@@ -393,10 +428,12 @@ class Env:
393
428
  warnings.simplefilter('ignore', category=UserWarning)
394
429
  warnings.simplefilter('ignore', category=FutureWarning)
395
430
 
396
- # Set verbose level for user visible console messages
397
- verbosity = map_level(config.get_int_value('verbosity'))
431
+ # Set verbosity level for user visible console messages
432
+ self._verbosity = config.get_int_value('verbosity')
433
+ if self._verbosity is None:
434
+ self._verbosity = 1
398
435
  stdout_handler = ConsoleOutputHandler(stream=stdout)
399
- stdout_handler.setLevel(verbosity)
436
+ stdout_handler.setLevel(map_level(self._verbosity))
400
437
  stdout_handler.addFilter(ConsoleMessageFilter())
401
438
  self._logger.addHandler(stdout_handler)
402
439
  self._console_logger = ConsoleLogger(self._logger)
@@ -430,6 +467,7 @@ class Env:
430
467
  http_logger.propagate = False
431
468
 
432
469
  self.clear_tmp_dir()
470
+ tz_name = self._get_tz_name()
433
471
 
434
472
  # configure pixeltable database
435
473
  self._init_db(config)
@@ -439,22 +477,10 @@ class Env:
439
477
  'Reinitializing pixeltable database is not supported when running in non-local environment'
440
478
  )
441
479
 
442
- tz_name = config.get_string_value('time_zone')
443
- if tz_name is not None:
444
- # Validate tzname
445
- if not isinstance(tz_name, str):
446
- self._logger.error('Invalid time zone specified in configuration.')
447
- else:
448
- try:
449
- _ = ZoneInfo(tz_name)
450
- except ZoneInfoNotFoundError:
451
- self._logger.error(f'Invalid time zone specified in configuration: {tz_name}')
452
-
453
480
  if reinit_db and self._store_db_exists():
454
481
  self._drop_store_db()
455
482
 
456
483
  create_db = not self._store_db_exists()
457
-
458
484
  if create_db:
459
485
  self._logger.info(f'creating database at: {self.db_url}')
460
486
  self._create_store_db()
@@ -534,13 +560,16 @@ class Env:
534
560
  metadata.schema.base_metadata.create_all(self._sa_engine, checkfirst=True)
535
561
  metadata.create_system_info(self._sa_engine)
536
562
 
537
- def _create_engine(self, time_zone_name: Optional[str], echo: bool = False) -> None:
538
- connect_args = {} if time_zone_name is None else {'options': f'-c timezone={time_zone_name}'}
563
+ def _create_engine(self, time_zone_name: str, echo: bool = False) -> None:
564
+ connect_args = {'options': f'-c timezone={time_zone_name}'}
565
+ self._logger.info(f'Creating SQLAlchemy engine with connection arguments: {connect_args}')
539
566
  self._sa_engine = sql.create_engine(
540
567
  self.db_url, echo=echo, isolation_level=self._dbms.transaction_isolation_level, connect_args=connect_args
541
568
  )
542
569
 
543
570
  self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
571
+ self._logger.info(f'Engine dialect: {self._sa_engine.dialect.name}')
572
+ self._logger.info(f'Engine driver : {self._sa_engine.dialect.driver}')
544
573
 
545
574
  with self.engine.begin() as conn:
546
575
  tz_name = conn.execute(sql.text('SHOW TIME ZONE')).scalar()
@@ -2,6 +2,8 @@
2
2
 
3
3
  from .aggregation_node import AggregationNode
4
4
  from .cache_prefetch_node import CachePrefetchNode
5
+ from .cell_materialization_node import CellMaterializationNode
6
+ from .cell_reconstruction_node import CellReconstructionNode
5
7
  from .component_iteration_node import ComponentIterationNode
6
8
  from .data_row_batch import DataRowBatch
7
9
  from .exec_context import ExecContext