pixeltable 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (64) hide show
  1. pixeltable/__init__.py +6 -1
  2. pixeltable/catalog/catalog.py +107 -45
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +5 -0
  6. pixeltable/catalog/table_version.py +100 -106
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/catalog/update_status.py +12 -0
  9. pixeltable/config.py +6 -0
  10. pixeltable/dataframe.py +11 -5
  11. pixeltable/env.py +52 -19
  12. pixeltable/exec/__init__.py +2 -0
  13. pixeltable/exec/cell_materialization_node.py +231 -0
  14. pixeltable/exec/cell_reconstruction_node.py +135 -0
  15. pixeltable/exec/exec_node.py +1 -1
  16. pixeltable/exec/expr_eval/evaluators.py +1 -0
  17. pixeltable/exec/expr_eval/expr_eval_node.py +14 -0
  18. pixeltable/exec/expr_eval/globals.py +2 -0
  19. pixeltable/exec/globals.py +32 -0
  20. pixeltable/exec/object_store_save_node.py +1 -4
  21. pixeltable/exec/row_update_node.py +16 -9
  22. pixeltable/exec/sql_node.py +107 -14
  23. pixeltable/exprs/__init__.py +1 -1
  24. pixeltable/exprs/arithmetic_expr.py +10 -11
  25. pixeltable/exprs/column_property_ref.py +10 -10
  26. pixeltable/exprs/column_ref.py +2 -2
  27. pixeltable/exprs/data_row.py +106 -37
  28. pixeltable/exprs/expr.py +9 -0
  29. pixeltable/exprs/expr_set.py +14 -7
  30. pixeltable/exprs/inline_expr.py +2 -19
  31. pixeltable/exprs/json_path.py +45 -12
  32. pixeltable/exprs/row_builder.py +54 -22
  33. pixeltable/functions/__init__.py +1 -0
  34. pixeltable/functions/bedrock.py +7 -0
  35. pixeltable/functions/deepseek.py +11 -4
  36. pixeltable/functions/llama_cpp.py +7 -0
  37. pixeltable/functions/math.py +1 -1
  38. pixeltable/functions/ollama.py +7 -0
  39. pixeltable/functions/openai.py +4 -4
  40. pixeltable/functions/openrouter.py +143 -0
  41. pixeltable/functions/video.py +123 -9
  42. pixeltable/functions/whisperx.py +2 -0
  43. pixeltable/functions/yolox.py +2 -0
  44. pixeltable/globals.py +56 -31
  45. pixeltable/io/__init__.py +1 -0
  46. pixeltable/io/globals.py +16 -15
  47. pixeltable/io/table_data_conduit.py +46 -21
  48. pixeltable/iterators/__init__.py +1 -0
  49. pixeltable/metadata/__init__.py +1 -1
  50. pixeltable/metadata/converters/convert_40.py +73 -0
  51. pixeltable/metadata/notes.py +1 -0
  52. pixeltable/plan.py +175 -46
  53. pixeltable/share/publish.py +0 -1
  54. pixeltable/store.py +2 -2
  55. pixeltable/type_system.py +5 -3
  56. pixeltable/utils/console_output.py +4 -1
  57. pixeltable/utils/exception_handler.py +5 -28
  58. pixeltable/utils/image.py +7 -0
  59. pixeltable/utils/misc.py +5 -0
  60. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
  61. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/RECORD +64 -57
  62. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
  63. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
  64. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
@@ -11,6 +11,7 @@ from uuid import UUID
11
11
 
12
12
  import jsonschema.exceptions
13
13
  import sqlalchemy as sql
14
+ from sqlalchemy import exc as sql_exc
14
15
 
15
16
  import pixeltable as pxt
16
17
  import pixeltable.exceptions as excs
@@ -18,24 +19,19 @@ from pixeltable import exprs, index
18
19
  from pixeltable.env import Env
19
20
  from pixeltable.iterators import ComponentIterator
20
21
  from pixeltable.metadata import schema
21
- from pixeltable.utils.exception_handler import run_cleanup_on_exception
22
22
  from pixeltable.utils.filecache import FileCache
23
23
  from pixeltable.utils.object_stores import ObjectOps
24
24
 
25
- from .tbl_ops import TableOp
26
-
27
- if TYPE_CHECKING:
28
- from pixeltable.plan import SampleClause
29
-
30
25
  from ..func.globals import resolve_symbol
31
26
  from .column import Column
32
27
  from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
28
+ from .tbl_ops import TableOp
33
29
  from .update_status import RowCountStats, UpdateStatus
34
30
 
35
31
  if TYPE_CHECKING:
36
32
  from pixeltable import exec, store
37
-
38
- from .table_version_handle import TableVersionHandle
33
+ from pixeltable.catalog.table_version_handle import TableVersionHandle
34
+ from pixeltable.plan import SampleClause
39
35
 
40
36
  _logger = logging.getLogger('pixeltable')
41
37
 
@@ -295,7 +291,14 @@ class TableVersion:
295
291
  cat = pxt.catalog.Catalog.get()
296
292
 
297
293
  tbl_id = UUID(hex=inital_md.tbl_md.tbl_id)
294
+ assert (tbl_id, None) not in cat._tbl_versions
298
295
  tbl_version = cls(tbl_id, inital_md.tbl_md, inital_md.version_md, None, inital_md.schema_version_md, [])
296
+
297
+ @cat.register_undo_action
298
+ def _() -> None:
299
+ if (tbl_id, None) in cat._tbl_versions:
300
+ del cat._tbl_versions[tbl_id, None]
301
+
299
302
  # TODO: break this up, so that Catalog.create_table() registers tbl_version
300
303
  cat._tbl_versions[tbl_id, None] = tbl_version
301
304
  tbl_version.init()
@@ -508,9 +511,7 @@ class TableVersion:
508
511
 
509
512
  def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
510
513
  # we're creating a new schema version
511
- self.version += 1
512
- self.created_at = time.time()
513
- self.schema_version = self.version
514
+ self.bump_version(bump_schema_version=True)
514
515
  status = self._add_index(col, idx_name, idx)
515
516
  self._write_md(new_version=True, new_schema_version=True)
516
517
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
@@ -604,18 +605,7 @@ class TableVersion:
604
605
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
605
606
  self._tbl_md.index_md[idx_id] = idx_md
606
607
  self.idxs_by_name[idx_name] = idx_info
607
- try:
608
- idx.create_index(self._store_idx_name(idx_id), val_col)
609
- finally:
610
-
611
- def cleanup_index() -> None:
612
- """Delete the newly added in-memory index structure"""
613
- del self.idxs_by_name[idx_name]
614
- del self._tbl_md.index_md[idx_id]
615
- self.next_idx_id = idx_id
616
-
617
- # Run cleanup only if there has been an exception; otherwise, skip cleanup.
618
- run_cleanup_on_exception(cleanup_index)
608
+ idx.create_index(self._store_idx_name(idx_id), val_col)
619
609
 
620
610
  def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
621
611
  val_col, undo_vol = self._create_index_columns(idx)
@@ -632,9 +622,7 @@ class TableVersion:
632
622
  assert idx_id in self._tbl_md.index_md
633
623
 
634
624
  # we're creating a new schema version
635
- self.version += 1
636
- self.created_at = time.time()
637
- self.schema_version = self.version
625
+ self.bump_version(bump_schema_version=True)
638
626
  idx_md = self._tbl_md.index_md[idx_id]
639
627
  idx_md.schema_version_drop = self.schema_version
640
628
  assert idx_md.name in self.idxs_by_name
@@ -663,9 +651,7 @@ class TableVersion:
663
651
  self.next_col_id += 1
664
652
 
665
653
  # we're creating a new schema version
666
- self.version += 1
667
- self.created_at = time.time()
668
- self.schema_version = self.version
654
+ self.bump_version(bump_schema_version=True)
669
655
  index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
670
656
  all_cols: list[Column] = []
671
657
  for col in cols:
@@ -697,7 +683,11 @@ class TableVersion:
697
683
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
698
684
  ) -> UpdateStatus:
699
685
  """Add and populate columns within the current transaction"""
686
+ from pixeltable.catalog import Catalog
687
+ from pixeltable.plan import Planner
688
+
700
689
  cols_to_add = list(cols)
690
+
701
691
  row_count = self.store_tbl.count()
702
692
  for col in cols_to_add:
703
693
  assert col.tbl is self
@@ -734,40 +724,27 @@ class TableVersion:
734
724
  continue
735
725
 
736
726
  # populate the column
737
- from pixeltable.plan import Planner
738
-
739
727
  plan = Planner.create_add_column_plan(self.path, col)
740
728
  plan.ctx.num_rows = row_count
741
729
  try:
742
730
  plan.open()
743
731
  try:
744
732
  excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
745
- except sql.exc.DBAPIError as exc:
746
- # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
747
- raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
733
+ except sql_exc.DBAPIError as exc:
734
+ Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
735
+ # If it wasn't converted, re-raise as a generic Pixeltable error
736
+ # (this means it's not a known concurrency error; it's something else)
737
+ raise excs.Error(
738
+ f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
739
+ ) from exc
748
740
  if excs_per_col > 0:
749
741
  cols_with_excs.append(col)
750
742
  num_excs += excs_per_col
751
743
  computed_values += plan.ctx.num_computed_exprs * row_count
752
744
  finally:
753
- # Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
754
- def cleanup_on_error() -> None:
755
- """Delete columns that are added as part of current add_columns operation and re-initialize
756
- the sqlalchemy schema"""
757
- self.cols = [col for col in self.cols if col not in cols_to_add]
758
- for col in cols_to_add:
759
- # remove columns that we already added
760
- if col.id in self.cols_by_id:
761
- del self.cols_by_id[col.id]
762
- if col.name is not None and col.name in self.cols_by_name:
763
- del self.cols_by_name[col.name]
764
- self.store_tbl.create_sa_tbl()
765
-
766
- # Run cleanup only if there has been an exception; otherwise, skip cleanup.
767
- run_cleanup_on_exception(cleanup_on_error)
768
745
  plan.close()
769
746
 
770
- pxt.catalog.Catalog.get().record_column_dependencies(self)
747
+ Catalog.get().record_column_dependencies(self)
771
748
 
772
749
  if print_stats:
773
750
  plan.ctx.profile.print(num_rows=row_count)
@@ -787,9 +764,7 @@ class TableVersion:
787
764
  assert self.is_mutable
788
765
 
789
766
  # we're creating a new schema version
790
- self.version += 1
791
- self.created_at = time.time()
792
- self.schema_version = self.version
767
+ self.bump_version(bump_schema_version=True)
793
768
 
794
769
  # drop this column and all dependent index columns and indices
795
770
  dropped_cols = [col]
@@ -853,9 +828,7 @@ class TableVersion:
853
828
  self._schema_version_md.columns[col.id].name = new_name
854
829
 
855
830
  # we're creating a new schema version
856
- self.version += 1
857
- self.created_at = time.time()
858
- self.schema_version = self.version
831
+ self.bump_version(bump_schema_version=True)
859
832
 
860
833
  self._write_md(new_version=True, new_schema_version=True)
861
834
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
@@ -875,9 +848,7 @@ class TableVersion:
875
848
 
876
849
  def _create_schema_version(self) -> None:
877
850
  # we're creating a new schema version
878
- self.version += 1
879
- self.created_at = time.time()
880
- self.schema_version = self.version
851
+ self.bump_version(bump_schema_version=True)
881
852
  self._write_md(new_version=True, new_schema_version=True)
882
853
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
883
854
 
@@ -924,8 +895,7 @@ class TableVersion:
924
895
  ) -> UpdateStatus:
925
896
  """Insert rows produced by exec_plan and propagate to views"""
926
897
  # we're creating a new version
927
- self.version += 1
928
- self.created_at = timestamp
898
+ self.bump_version(timestamp, bump_schema_version=False)
929
899
  cols_with_excs, row_counts = self.store_tbl.insert_rows(
930
900
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
931
901
  )
@@ -960,10 +930,11 @@ class TableVersion:
960
930
  cascade: if True, also update all computed columns that transitively depend on the updated columns,
961
931
  including within views.
962
932
  """
963
- assert self.is_mutable
964
-
933
+ from pixeltable.exprs import SqlElementCache
965
934
  from pixeltable.plan import Planner
966
935
 
936
+ assert self.is_mutable
937
+
967
938
  update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
968
939
  if where is not None:
969
940
  if not isinstance(where, exprs.Expr):
@@ -974,7 +945,6 @@ class TableVersion:
974
945
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
975
946
 
976
947
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
977
- from pixeltable.exprs import SqlElementCache
978
948
 
979
949
  result = self.propagate_update(
980
950
  plan,
@@ -1001,11 +971,11 @@ class TableVersion:
1001
971
  batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
1002
972
  rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
1003
973
  """
974
+ from pixeltable.plan import Planner
975
+
1004
976
  # if we do lookups of rowids, we must have one for each row in the batch
1005
977
  assert len(rowids) == 0 or len(rowids) == len(batch)
1006
978
 
1007
- from pixeltable.plan import Planner
1008
-
1009
979
  plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
1010
980
  self.path, batch, rowids, cascade=cascade
1011
981
  )
@@ -1077,14 +1047,14 @@ class TableVersion:
1077
1047
  def recompute_columns(
1078
1048
  self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
1079
1049
  ) -> UpdateStatus:
1050
+ from pixeltable.exprs import CompoundPredicate, SqlElementCache
1051
+ from pixeltable.plan import Planner
1052
+
1080
1053
  assert self.is_mutable
1081
1054
  assert all(name in self.cols_by_name for name in col_names)
1082
1055
  assert len(col_names) > 0
1083
1056
  assert len(col_names) == 1 or not errors_only
1084
1057
 
1085
- from pixeltable.exprs import CompoundPredicate
1086
- from pixeltable.plan import Planner
1087
-
1088
1058
  target_columns = [self.cols_by_name[name] for name in col_names]
1089
1059
  where_clause: Optional[exprs.Expr] = None
1090
1060
  if where is not None:
@@ -1099,7 +1069,6 @@ class TableVersion:
1099
1069
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1100
1070
  self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1101
1071
  )
1102
- from pixeltable.exprs import SqlElementCache
1103
1072
 
1104
1073
  result = self.propagate_update(
1105
1074
  plan,
@@ -1123,11 +1092,14 @@ class TableVersion:
1123
1092
  cascade: bool,
1124
1093
  show_progress: bool = True,
1125
1094
  ) -> UpdateStatus:
1095
+ from pixeltable.catalog import Catalog
1096
+ from pixeltable.plan import Planner
1097
+
1098
+ Catalog.get().mark_modified_tvs(self.handle)
1126
1099
  result = UpdateStatus()
1127
1100
  create_new_table_version = plan is not None
1128
1101
  if create_new_table_version:
1129
- self.version += 1
1130
- self.created_at = timestamp
1102
+ self.bump_version(timestamp, bump_schema_version=False)
1131
1103
  cols_with_excs, row_counts = self.store_tbl.insert_rows(
1132
1104
  plan, v_min=self.version, show_progress=show_progress
1133
1105
  )
@@ -1146,8 +1118,6 @@ class TableVersion:
1146
1118
  recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
1147
1119
  plan = None
1148
1120
  if len(recomputed_cols) > 0:
1149
- from pixeltable.plan import Planner
1150
-
1151
1121
  plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
1152
1122
  status = view.get().propagate_update(
1153
1123
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
@@ -1182,6 +1152,10 @@ class TableVersion:
1182
1152
  self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1183
1153
  ) -> UpdateStatus:
1184
1154
  """Delete rows in this table and propagate to views"""
1155
+ from pixeltable.catalog import Catalog
1156
+
1157
+ Catalog.get().mark_modified_tvs(self.handle)
1158
+
1185
1159
  # print(f'calling sql_expr()')
1186
1160
  sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
1187
1161
  # #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
@@ -1198,8 +1172,7 @@ class TableVersion:
1198
1172
  result = UpdateStatus(row_count_stats=row_counts)
1199
1173
  if del_rows > 0:
1200
1174
  # we're creating a new version
1201
- self.version += 1
1202
- self.created_at = timestamp
1175
+ self.bump_version(timestamp, bump_schema_version=False)
1203
1176
  for view in self.mutable_views:
1204
1177
  status = view.get().propagate_delete(
1205
1178
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
@@ -1225,6 +1198,8 @@ class TableVersion:
1225
1198
  Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
1226
1199
  and relies on Catalog to reload it
1227
1200
  """
1201
+ from pixeltable.catalog import Catalog
1202
+
1228
1203
  conn = Env.get().conn
1229
1204
  # make sure we don't have a snapshot referencing this version
1230
1205
  # (unclear how to express this with sqlalchemy)
@@ -1244,8 +1219,6 @@ class TableVersion:
1244
1219
  )
1245
1220
  )
1246
1221
 
1247
- # delete newly-added data
1248
- self.delete_media(tbl_version=self.version)
1249
1222
  conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
1250
1223
 
1251
1224
  # revert new deletions
@@ -1260,6 +1233,8 @@ class TableVersion:
1260
1233
  # revert schema changes:
1261
1234
  # - undo changes to self._tbl_md and write that back
1262
1235
  # - delete newly-added TableVersion/TableSchemaVersion records
1236
+ Catalog.get().mark_modified_tvs(self.handle)
1237
+ old_version = self.version
1263
1238
  if self.version == self.schema_version:
1264
1239
  # physically delete newly-added columns and remove them from the stored md
1265
1240
  added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
@@ -1306,7 +1281,8 @@ class TableVersion:
1306
1281
  .where(schema.TableVersion.version == self.version)
1307
1282
  )
1308
1283
 
1309
- self.version -= 1
1284
+ self._tbl_md.current_version = self._version_md.version = self.version - 1
1285
+
1310
1286
  self._write_md(new_version=False, new_schema_version=False)
1311
1287
 
1312
1288
  # propagate to views
@@ -1317,7 +1293,12 @@ class TableVersion:
1317
1293
 
1318
1294
  # force reload on next operation
1319
1295
  self.is_validated = False
1320
- pxt.catalog.Catalog.get().remove_tbl_version(self)
1296
+ Catalog.get().remove_tbl_version(self)
1297
+
1298
+ # delete newly-added data
1299
+ # Do this at the end, after all DB operations have completed.
1300
+ # TODO: The transaction could still fail. Really this should be done via PendingTableOps.
1301
+ self.delete_media(tbl_version=old_version)
1321
1302
  _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1322
1303
 
1323
1304
  def _init_external_stores(self) -> None:
@@ -1328,9 +1309,7 @@ class TableVersion:
1328
1309
  self.external_stores[store.name] = store
1329
1310
 
1330
1311
  def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1331
- self.version += 1
1332
- self.created_at = time.time()
1333
- self.schema_version = self.version
1312
+ self.bump_version(bump_schema_version=True)
1334
1313
 
1335
1314
  self.external_stores[store.name] = store
1336
1315
  self._tbl_md.external_stores.append(
@@ -1340,9 +1319,7 @@ class TableVersion:
1340
1319
 
1341
1320
  def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
1342
1321
  del self.external_stores[store.name]
1343
- self.version += 1
1344
- self.created_at = time.time()
1345
- self.schema_version = self.version
1322
+ self.bump_version(bump_schema_version=True)
1346
1323
  idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
1347
1324
  self._tbl_md.external_stores.pop(idx)
1348
1325
  self._write_md(new_version=True, new_schema_version=True)
@@ -1398,35 +1375,52 @@ class TableVersion:
1398
1375
  # if this is a snapshot instance, we need to ignore current_version
1399
1376
  return self._tbl_md.current_version if self.effective_version is None else self.effective_version
1400
1377
 
1401
- @version.setter
1402
- def version(self, version: int) -> None:
1403
- assert self.effective_version is None
1404
- self._tbl_md.current_version = version
1405
- self._version_md.version = version
1406
-
1407
1378
  @property
1408
1379
  def created_at(self) -> float:
1409
1380
  return self._version_md.created_at
1410
1381
 
1411
- @created_at.setter
1412
- def created_at(self, ts: float) -> None:
1413
- assert self.effective_version is None
1414
- self._version_md.created_at = ts
1415
-
1416
1382
  @property
1417
1383
  def schema_version(self) -> int:
1418
1384
  return self._schema_version_md.schema_version
1419
1385
 
1420
- @schema_version.setter
1421
- def schema_version(self, version: int) -> None:
1386
+ def bump_version(self, timestamp: Optional[float] = None, *, bump_schema_version: bool) -> None:
1387
+ """
1388
+ Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
1389
+ _write_md() must be called separately to persist the changes.
1390
+
1391
+ Args:
1392
+ timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
1393
+ to the same timestamp. If `None`, then defaults to `time.time()`.
1394
+ bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
1395
+ and associated metadata.
1396
+ """
1397
+ from pixeltable.catalog import Catalog
1398
+
1422
1399
  assert self.effective_version is None
1423
- self._tbl_md.current_schema_version = version
1424
- self._version_md.schema_version = version
1425
- self._schema_version_md.preceding_schema_version = self._schema_version_md.schema_version
1426
- self._schema_version_md.schema_version = version
1400
+
1401
+ if timestamp is None:
1402
+ timestamp = time.time()
1403
+
1404
+ Catalog.get().mark_modified_tvs(self.handle)
1405
+
1406
+ old_version = self._tbl_md.current_version
1407
+ assert self._version_md.version == old_version
1408
+ new_version = old_version + 1
1409
+ self._tbl_md.current_version = new_version
1410
+ self._version_md.version = new_version
1411
+ self._version_md.created_at = timestamp
1412
+
1413
+ if bump_schema_version:
1414
+ old_schema_version = self._tbl_md.current_schema_version
1415
+ assert self._version_md.schema_version == old_schema_version
1416
+ assert self._schema_version_md.schema_version == old_schema_version
1417
+ self._tbl_md.current_schema_version = new_version
1418
+ self._version_md.schema_version = new_version
1419
+ self._schema_version_md.preceding_schema_version = old_schema_version
1420
+ self._schema_version_md.schema_version = new_version
1427
1421
 
1428
1422
  @property
1429
- def preceding_schema_version(self) -> int:
1423
+ def preceding_schema_version(self) -> Optional[int]:
1430
1424
  return self._schema_version_md.preceding_schema_version
1431
1425
 
1432
1426
  @property
@@ -1558,8 +1552,8 @@ class TableVersion:
1558
1552
 
1559
1553
  @classmethod
1560
1554
  def from_dict(cls, d: dict) -> TableVersion:
1561
- from pixeltable import catalog
1555
+ from pixeltable.catalog import Catalog
1562
1556
 
1563
1557
  id = UUID(d['id'])
1564
1558
  effective_version = d['effective_version']
1565
- return catalog.Catalog.get().get_tbl_version(id, effective_version)
1559
+ return Catalog.get().get_tbl_version(id, effective_version)
@@ -37,6 +37,9 @@ class TableVersionHandle:
37
37
  def __hash__(self) -> int:
38
38
  return hash((self.id, self.effective_version))
39
39
 
40
+ def __repr__(self) -> str:
41
+ return f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version})'
42
+
40
43
  @property
41
44
  def is_snapshot(self) -> bool:
42
45
  return self.effective_version is not None
@@ -81,7 +84,7 @@ class ColumnHandle:
81
84
  if self.col_id not in self.tbl_version.get().cols_by_id:
82
85
  schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
83
86
  raise excs.Error(
84
- f'Column has been dropped (no record for column ID {self.col_id} in table '
87
+ f'Column was dropped (no record for column ID {self.col_id} in table '
85
88
  f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
86
89
  )
87
90
  return self.tbl_version.get().cols_by_id[self.col_id]
@@ -57,27 +57,35 @@ class UpdateStatus:
57
57
  """
58
58
 
59
59
  updated_cols: list[str] = field(default_factory=list)
60
+ """Columns that were updated."""
60
61
  cols_with_excs: list[str] = field(default_factory=list)
62
+ """Columns that encountered exceptions."""
61
63
 
62
64
  # stats for the rows affected by the operation
63
65
  row_count_stats: RowCountStats = field(default_factory=RowCountStats)
66
+ """Row count statistics for rows affected by this operation."""
64
67
 
65
68
  # stats for changes cascaded to other tables
66
69
  cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
70
+ """Row count statistics for changes cascaded to other tables."""
67
71
 
68
72
  # stats for the rows affected by the operation in an external store
69
73
  ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
74
+ """Row count statistics for rows affected in an external store."""
70
75
 
71
76
  @property
72
77
  def num_rows(self) -> int:
78
+ """Total number of rows affected (including cascaded changes)."""
73
79
  return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
74
80
 
75
81
  @property
76
82
  def num_excs(self) -> int:
83
+ """Total number of exceptions encountered (including cascaded changes)."""
77
84
  return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
78
85
 
79
86
  @property
80
87
  def num_computed_values(self) -> int:
88
+ """Total number of computed values affected (including cascaded changes)."""
81
89
  return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
82
90
 
83
91
  def insert_to_update(self) -> 'UpdateStatus':
@@ -164,16 +172,20 @@ class UpdateStatus:
164
172
 
165
173
  @property
166
174
  def external_rows_updated(self) -> int:
175
+ """Number of rows updated in an external store."""
167
176
  return self.ext_row_count_stats.upd_rows
168
177
 
169
178
  @property
170
179
  def external_rows_created(self) -> int:
180
+ """Number of rows created in an external store."""
171
181
  return self.ext_row_count_stats.ins_rows
172
182
 
173
183
  @property
174
184
  def external_rows_deleted(self) -> int:
185
+ """Number of rows deleted from an external store."""
175
186
  return self.ext_row_count_stats.del_rows
176
187
 
177
188
  @property
178
189
  def ext_num_rows(self) -> int:
190
+ """Total number of rows affected in an external store."""
179
191
  return self.ext_row_count_stats.num_rows
pixeltable/config.py CHANGED
@@ -181,6 +181,12 @@ KNOWN_CONFIG_OPTIONS = {
181
181
  'api_version': 'API version if using Azure OpenAI',
182
182
  'rate_limits': 'Per-model rate limits for OpenAI API requests',
183
183
  },
184
+ 'openrouter': {
185
+ 'api_key': 'OpenRouter API key',
186
+ 'site_url': 'Optional URL for your application (for OpenRouter analytics)',
187
+ 'app_name': 'Optional name for your application (for OpenRouter analytics)',
188
+ 'rate_limit': 'Rate limit for OpenRouter API requests',
189
+ },
184
190
  'replicate': {'api_token': 'Replicate API token'},
185
191
  'together': {
186
192
  'api_key': 'Together API key',
pixeltable/dataframe.py CHANGED
@@ -23,7 +23,7 @@ from typing import (
23
23
 
24
24
  import pandas as pd
25
25
  import pydantic
26
- import sqlalchemy as sql
26
+ import sqlalchemy.exc as sql_exc
27
27
 
28
28
  from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_system as ts
29
29
  from pixeltable.catalog import Catalog, is_valid_identifier
@@ -186,6 +186,8 @@ class DataFrameResultSet:
186
186
 
187
187
 
188
188
  class DataFrame:
189
+ """Represents a query for retrieving and transforming data from Pixeltable tables."""
190
+
189
191
  _from_clause: plan.FromClause
190
192
  _select_list_exprs: list[exprs.Expr]
191
193
  _schema: dict[str, ts.ColumnType]
@@ -456,6 +458,7 @@ class DataFrame:
456
458
 
457
459
  @property
458
460
  def schema(self) -> dict[str, ColumnType]:
461
+ """Column names and types in this DataFrame."""
459
462
  return self._schema
460
463
 
461
464
  def bind(self, args: dict[str, Any]) -> DataFrame:
@@ -538,20 +541,23 @@ class DataFrame:
538
541
  yield [data_row[e.slot_idx] for e in self._select_list_exprs]
539
542
  except excs.ExprEvalError as e:
540
543
  self._raise_expr_eval_err(e)
541
- except sql.exc.DBAPIError as e:
542
- raise excs.Error(f'Error during SQL execution:\n{e}') from e
544
+ except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
545
+ Catalog.get().convert_sql_exc(e, tbl=(single_tbl.tbl_version if single_tbl is not None else None))
546
+ raise # just re-raise if not converted to a Pixeltable error
543
547
 
544
548
  def collect(self) -> DataFrameResultSet:
545
549
  return DataFrameResultSet(list(self._output_row_iterator()), self.schema)
546
550
 
547
551
  async def _acollect(self) -> DataFrameResultSet:
552
+ single_tbl = self._first_tbl if len(self._from_clause.tbls) == 1 else None
548
553
  try:
549
554
  result = [[row[e.slot_idx] for e in self._select_list_exprs] async for row in self._aexec()]
550
555
  return DataFrameResultSet(result, self.schema)
551
556
  except excs.ExprEvalError as e:
552
557
  self._raise_expr_eval_err(e)
553
- except sql.exc.DBAPIError as e:
554
- raise excs.Error(f'Error during SQL execution:\n{e}') from e
558
+ except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
559
+ Catalog.get().convert_sql_exc(e, tbl=(single_tbl.tbl_version if single_tbl is not None else None))
560
+ raise # just re-raise if not converted to a Pixeltable error
555
561
 
556
562
  def count(self) -> int:
557
563
  """Return the number of rows in the DataFrame.