pixeltable 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (79) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +9 -1
  4. pixeltable/catalog/catalog.py +559 -134
  5. pixeltable/catalog/column.py +36 -32
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +12 -0
  8. pixeltable/catalog/insertable_table.py +30 -25
  9. pixeltable/catalog/schema_object.py +9 -6
  10. pixeltable/catalog/table.py +334 -267
  11. pixeltable/catalog/table_version.py +360 -241
  12. pixeltable/catalog/table_version_handle.py +18 -2
  13. pixeltable/catalog/table_version_path.py +86 -23
  14. pixeltable/catalog/view.py +47 -23
  15. pixeltable/dataframe.py +198 -19
  16. pixeltable/env.py +6 -4
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/__init__.py +1 -1
  19. pixeltable/exec/exec_node.py +2 -0
  20. pixeltable/exec/expr_eval/evaluators.py +4 -1
  21. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  22. pixeltable/exec/in_memory_data_node.py +1 -1
  23. pixeltable/exec/sql_node.py +188 -22
  24. pixeltable/exprs/column_property_ref.py +16 -6
  25. pixeltable/exprs/column_ref.py +33 -11
  26. pixeltable/exprs/comparison.py +1 -1
  27. pixeltable/exprs/data_row.py +5 -3
  28. pixeltable/exprs/expr.py +11 -4
  29. pixeltable/exprs/literal.py +2 -0
  30. pixeltable/exprs/row_builder.py +4 -6
  31. pixeltable/exprs/rowid_ref.py +8 -0
  32. pixeltable/exprs/similarity_expr.py +1 -0
  33. pixeltable/func/__init__.py +1 -0
  34. pixeltable/func/mcp.py +74 -0
  35. pixeltable/func/query_template_function.py +5 -3
  36. pixeltable/func/tools.py +12 -2
  37. pixeltable/func/udf.py +2 -2
  38. pixeltable/functions/__init__.py +1 -0
  39. pixeltable/functions/anthropic.py +19 -45
  40. pixeltable/functions/deepseek.py +19 -38
  41. pixeltable/functions/fireworks.py +9 -18
  42. pixeltable/functions/gemini.py +165 -33
  43. pixeltable/functions/groq.py +108 -0
  44. pixeltable/functions/llama_cpp.py +6 -6
  45. pixeltable/functions/math.py +63 -0
  46. pixeltable/functions/mistralai.py +16 -53
  47. pixeltable/functions/ollama.py +1 -1
  48. pixeltable/functions/openai.py +82 -165
  49. pixeltable/functions/string.py +212 -58
  50. pixeltable/functions/together.py +22 -80
  51. pixeltable/globals.py +10 -4
  52. pixeltable/index/base.py +5 -0
  53. pixeltable/index/btree.py +5 -0
  54. pixeltable/index/embedding_index.py +5 -0
  55. pixeltable/io/external_store.py +10 -31
  56. pixeltable/io/label_studio.py +5 -5
  57. pixeltable/io/parquet.py +4 -4
  58. pixeltable/io/table_data_conduit.py +1 -32
  59. pixeltable/metadata/__init__.py +11 -2
  60. pixeltable/metadata/converters/convert_13.py +2 -2
  61. pixeltable/metadata/converters/convert_30.py +6 -11
  62. pixeltable/metadata/converters/convert_35.py +9 -0
  63. pixeltable/metadata/converters/convert_36.py +38 -0
  64. pixeltable/metadata/converters/convert_37.py +15 -0
  65. pixeltable/metadata/converters/util.py +3 -9
  66. pixeltable/metadata/notes.py +3 -0
  67. pixeltable/metadata/schema.py +13 -1
  68. pixeltable/plan.py +135 -12
  69. pixeltable/share/packager.py +321 -20
  70. pixeltable/share/publish.py +2 -2
  71. pixeltable/store.py +31 -13
  72. pixeltable/type_system.py +30 -0
  73. pixeltable/utils/dbms.py +1 -1
  74. pixeltable/utils/formatter.py +64 -42
  75. {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
  76. {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/RECORD +79 -74
  77. {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
  78. {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
  79. {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
pixeltable/index/btree.py CHANGED
@@ -59,6 +59,11 @@ class BtreeIndex(IndexBase):
59
59
  conn = Env.get().conn
60
60
  idx.create(bind=conn)
61
61
 
62
+ def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
63
+ """Drop the index on the index value column"""
64
+ # TODO: implement
65
+ raise NotImplementedError()
66
+
62
67
  @classmethod
63
68
  def display_name(cls) -> str:
64
69
  return 'btree'
@@ -148,6 +148,11 @@ class EmbeddingIndex(IndexBase):
148
148
  conn = Env.get().conn
149
149
  idx.create(bind=conn)
150
150
 
151
+ def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
152
+ """Drop the index on the index value column"""
153
+ # TODO: implement
154
+ raise NotImplementedError()
155
+
151
156
  def similarity_clause(self, val_column: catalog.Column, item: Any) -> sql.ColumnElement:
152
157
  """Create a ColumnElement that represents '<val_column> <op> <item>'"""
153
158
  assert isinstance(item, (str, PIL.Image.Image))
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  import abc
4
4
  import itertools
5
5
  import logging
6
- import time
7
6
  from dataclasses import dataclass
8
7
  from typing import Any, Optional
9
8
  from uuid import UUID
@@ -11,7 +10,7 @@ from uuid import UUID
11
10
  import pixeltable.exceptions as excs
12
11
  import pixeltable.type_system as ts
13
12
  from pixeltable import Column, Table
14
- from pixeltable.catalog import TableVersion, TableVersionHandle
13
+ from pixeltable.catalog import TableVersion
15
14
 
16
15
  _logger = logging.getLogger('pixeltable')
17
16
 
@@ -32,15 +31,11 @@ class ExternalStore(abc.ABC):
32
31
 
33
32
  @abc.abstractmethod
34
33
  def link(self, tbl_version: TableVersion) -> None:
35
- """
36
- Called by `TableVersion.link()` to implement store-specific logic.
37
- """
34
+ """Creates store-specific metadata needed to implement sync()."""
38
35
 
39
36
  @abc.abstractmethod
40
37
  def unlink(self, tbl_version: TableVersion) -> None:
41
- """
42
- Called by `TableVersion.unlink()` to implement store-specific logic.
43
- """
38
+ """Removes store-specific metadata created in link()."""
44
39
 
45
40
  @abc.abstractmethod
46
41
  def get_local_columns(self) -> list[Column]:
@@ -111,17 +106,10 @@ class Project(ExternalStore, abc.ABC):
111
106
 
112
107
  if len(stored_proxies_needed) > 0:
113
108
  _logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
114
- # Create stored proxies for columns that need one. Increment the schema version
115
- # accordingly.
116
- tbl_version.version += 1
117
- preceding_schema_version = tbl_version.schema_version
118
- tbl_version.schema_version = tbl_version.version
119
- proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
109
+ # Create stored proxies for columns that need one
110
+ proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
120
111
  # Add the columns; this will also update table metadata.
121
- tbl_version._add_columns(proxy_cols, print_stats=False, on_error='ignore')
122
- # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
123
- # invisible to the user.
124
- tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
112
+ tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
125
113
 
126
114
  def unlink(self, tbl_version: TableVersion) -> None:
127
115
  # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
@@ -132,15 +120,10 @@ class Project(ExternalStore, abc.ABC):
132
120
  deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
133
121
  if len(deletions_needed) > 0:
134
122
  _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
135
- # Delete stored proxies that are no longer needed.
136
- tbl_version.version += 1
137
- preceding_schema_version = tbl_version.schema_version
138
- tbl_version.schema_version = tbl_version.version
139
123
  tbl_version._drop_columns(deletions_needed)
140
124
  self.stored_proxies.clear()
141
- tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
142
125
 
143
- def create_stored_proxy(self, tbl_version: TableVersion, col: Column) -> Column:
126
+ def create_stored_proxy(self, col: Column) -> Column:
144
127
  """
145
128
  Creates a proxy column for the specified column. The proxy column will be created in the specified
146
129
  `TableVersion`.
@@ -158,12 +141,7 @@ class Project(ExternalStore, abc.ABC):
158
141
  # Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
159
142
  computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
160
143
  stored=True,
161
- col_id=tbl_version.next_col_id,
162
- sa_col_type=col.col_type.to_sa_type(),
163
- schema_version_add=tbl_version.schema_version,
164
144
  )
165
- proxy_col.tbl = TableVersionHandle(tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version)
166
- tbl_version.next_col_id += 1
167
145
  self.stored_proxies[col] = proxy_col
168
146
  return proxy_col
169
147
 
@@ -213,6 +191,7 @@ class Project(ExternalStore, abc.ABC):
213
191
  external (import or export) columns.
214
192
  If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
215
193
  in which the Pixeltable column names are resolved to the corresponding `Column` objects.
194
+ TODO: return columns as names or qualified ids
216
195
  """
217
196
  from pixeltable import exprs
218
197
 
@@ -223,7 +202,7 @@ class Project(ExternalStore, abc.ABC):
223
202
  resolved_col_mapping: dict[Column, str] = {}
224
203
 
225
204
  # Validate names
226
- t_cols = set(table._schema.keys())
205
+ t_cols = set(table._get_schema().keys())
227
206
  for t_col, ext_col in col_mapping.items():
228
207
  if t_col not in t_cols:
229
208
  if is_user_specified_col_mapping:
@@ -246,7 +225,7 @@ class Project(ExternalStore, abc.ABC):
246
225
  assert isinstance(col_ref, exprs.ColumnRef)
247
226
  resolved_col_mapping[col_ref.col] = ext_col
248
227
  # Validate column specs
249
- t_col_types = table._schema
228
+ t_col_types = table._get_schema()
250
229
  for t_col, ext_col in col_mapping.items():
251
230
  t_col_type = t_col_types[t_col]
252
231
  if ext_col in export_cols:
@@ -412,8 +412,8 @@ class LabelStudioProject(Project):
412
412
  # TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
413
413
  ancestor = t
414
414
  while local_annotations_col not in ancestor._tbl_version.get().cols:
415
- assert ancestor._base_table is not None
416
- ancestor = ancestor._base_table
415
+ assert ancestor._get_base_table is not None
416
+ ancestor = ancestor._get_base_table()
417
417
  update_status = ancestor.batch_update(updates)
418
418
  env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
419
419
  return SyncStatus(pxt_rows_updated=update_status.num_rows, num_excs=update_status.num_excs)
@@ -560,7 +560,7 @@ class LabelStudioProject(Project):
560
560
 
561
561
  if name is None:
562
562
  # Create a default name that's unique to the table
563
- all_stores = t.external_stores
563
+ all_stores = t.external_stores()
564
564
  n = 0
565
565
  while f'ls_project_{n}' in all_stores:
566
566
  n += 1
@@ -576,8 +576,8 @@ class LabelStudioProject(Project):
576
576
  local_annotations_column = ANNOTATIONS_COLUMN
577
577
  else:
578
578
  local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
579
- if local_annotations_column not in t._schema:
580
- t.add_columns({local_annotations_column: ts.JsonType(nullable=True)})
579
+ if local_annotations_column not in t._get_schema():
580
+ t.add_columns({local_annotations_column: ts.Json})
581
581
 
582
582
  resolved_col_mapping = cls.validate_columns(
583
583
  t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
pixeltable/io/parquet.py CHANGED
@@ -14,7 +14,7 @@ import PIL.Image
14
14
 
15
15
  import pixeltable as pxt
16
16
  import pixeltable.exceptions as excs
17
- from pixeltable.env import Env
17
+ from pixeltable.catalog import Catalog
18
18
  from pixeltable.utils.transactional_directory import transactional_directory
19
19
 
20
20
  if typing.TYPE_CHECKING:
@@ -87,7 +87,7 @@ def export_parquet(
87
87
  current_value_batch: dict[str, deque] = {k: deque() for k in df.schema}
88
88
  current_byte_estimate = 0
89
89
 
90
- with Env.get().begin_xact():
90
+ with Catalog.get().begin_xact(for_write=False):
91
91
  for data_row in df._exec():
92
92
  for (col_name, col_type), e in zip(df.schema.items(), df._select_list_exprs):
93
93
  val = data_row[e.slot_idx]
@@ -112,11 +112,11 @@ def export_parquet(
112
112
  length = len(val)
113
113
  elif col_type.is_string_type():
114
114
  length = len(val)
115
- elif col_type.is_video_type():
115
+ elif col_type.is_video_type() or col_type.is_audio_type():
116
116
  if data_row.file_paths is not None and data_row.file_paths[e.slot_idx] is not None:
117
117
  val = data_row.file_paths[e.slot_idx]
118
118
  else:
119
- raise excs.Error(f'unknown video type {type(val)}')
119
+ raise excs.Error(f'unknown audio/video type {type(val)}')
120
120
  length = len(val)
121
121
  elif col_type.is_json_type():
122
122
  val = json.dumps(val)
@@ -23,7 +23,6 @@ from .utils import normalize_schema_names
23
23
 
24
24
  _logger = logging.getLogger('pixeltable')
25
25
 
26
- # ---------------------------------------------------------------------------------------------------------
27
26
 
28
27
  if TYPE_CHECKING:
29
28
  import datasets # type: ignore[import-untyped]
@@ -46,9 +45,6 @@ class TableDataConduitFormat(str, enum.Enum):
46
45
  return False
47
46
 
48
47
 
49
- # ---------------------------------------------------------------------------------------------------------
50
-
51
-
52
48
  @dataclass
53
49
  class TableDataConduit:
54
50
  source: TableDataSource
@@ -105,7 +101,7 @@ class TableDataConduit:
105
101
  def add_table_info(self, table: pxt.Table) -> None:
106
102
  """Add information about the table into which we are inserting data"""
107
103
  assert isinstance(table, pxt.Table)
108
- self.pxt_schema = table._schema
104
+ self.pxt_schema = table._get_schema()
109
105
  self.pxt_pk = table._tbl_version.get().primary_key
110
106
  for col in table._tbl_version_path.columns():
111
107
  if col.is_required_for_insert:
@@ -129,9 +125,6 @@ class TableDataConduit:
129
125
  raise excs.Error(f'Missing required column(s) ({", ".join(missing_cols)})')
130
126
 
131
127
 
132
- # ---------------------------------------------------------------------------------------------------------
133
-
134
-
135
128
  class DFTableDataConduit(TableDataConduit):
136
129
  pxt_df: pxt.DataFrame = None
137
130
 
@@ -155,9 +148,6 @@ class DFTableDataConduit(TableDataConduit):
155
148
  self.check_source_columns_are_insertable(self.pxt_df.schema.keys())
156
149
 
157
150
 
158
- # ---------------------------------------------------------------------------------------------------------
159
-
160
-
161
151
  class RowDataTableDataConduit(TableDataConduit):
162
152
  raw_rows: Optional[RowData] = None
163
153
  disable_mapping: bool = True
@@ -235,9 +225,6 @@ class RowDataTableDataConduit(TableDataConduit):
235
225
  yield self.valid_rows
236
226
 
237
227
 
238
- # ---------------------------------------------------------------------------------------------------------
239
-
240
-
241
228
  class PandasTableDataConduit(TableDataConduit):
242
229
  pd_df: pd.DataFrame = None
243
230
  batch_count: int = 0
@@ -293,9 +280,6 @@ class PandasTableDataConduit(TableDataConduit):
293
280
  yield self.valid_rows
294
281
 
295
282
 
296
- # ---------------------------------------------------------------------------------------------------------
297
-
298
-
299
283
  class CSVTableDataConduit(TableDataConduit):
300
284
  @classmethod
301
285
  def from_tds(cls, tds: TableDataConduit) -> 'PandasTableDataConduit':
@@ -307,9 +291,6 @@ class CSVTableDataConduit(TableDataConduit):
307
291
  return PandasTableDataConduit.from_tds(t)
308
292
 
309
293
 
310
- # ---------------------------------------------------------------------------------------------------------
311
-
312
-
313
294
  class ExcelTableDataConduit(TableDataConduit):
314
295
  @classmethod
315
296
  def from_tds(cls, tds: TableDataConduit) -> 'PandasTableDataConduit':
@@ -321,9 +302,6 @@ class ExcelTableDataConduit(TableDataConduit):
321
302
  return PandasTableDataConduit.from_tds(t)
322
303
 
323
304
 
324
- # ---------------------------------------------------------------------------------------------------------
325
-
326
-
327
305
  class JsonTableDataConduit(TableDataConduit):
328
306
  @classmethod
329
307
  def from_tds(cls, tds: TableDataConduit) -> RowDataTableDataConduit:
@@ -346,9 +324,6 @@ class JsonTableDataConduit(TableDataConduit):
346
324
  return t2
347
325
 
348
326
 
349
- # ---------------------------------------------------------------------------------------------------------
350
-
351
-
352
327
  class HFTableDataConduit(TableDataConduit):
353
328
  hf_ds: Optional[Union[datasets.Dataset, datasets.DatasetDict]] = None
354
329
  column_name_for_split: Optional[str] = None
@@ -478,9 +453,6 @@ class HFTableDataConduit(TableDataConduit):
478
453
  yield batch
479
454
 
480
455
 
481
- # ---------------------------------------------------------------------------------------------------------
482
-
483
-
484
456
  class ParquetTableDataConduit(TableDataConduit):
485
457
  pq_ds: Optional[ParquetDataset] = None
486
458
 
@@ -542,9 +514,6 @@ class ParquetTableDataConduit(TableDataConduit):
542
514
  raise e
543
515
 
544
516
 
545
- # ---------------------------------------------------------------------------------------------------------
546
-
547
-
548
517
  class UnkTableDataConduit(TableDataConduit):
549
518
  """Source type is not known at the time of creation"""
550
519
 
@@ -8,15 +8,17 @@ from typing import Callable
8
8
  import sqlalchemy as sql
9
9
  from sqlalchemy import orm
10
10
 
11
+ import pixeltable as pxt
12
+ import pixeltable.exceptions as excs
11
13
  from pixeltable.utils.console_output import ConsoleLogger
12
14
 
13
15
  from .schema import SystemInfo, SystemInfoMd
14
16
 
15
17
  _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
16
-
18
+ _logger = logging.getLogger('pixeltable')
17
19
 
18
20
  # current version of the metadata; this is incremented whenever the metadata schema changes
19
- VERSION = 35
21
+ VERSION = 38
20
22
 
21
23
 
22
24
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -55,6 +57,13 @@ def upgrade_md(engine: sql.engine.Engine) -> None:
55
57
  system_info = session.query(SystemInfo).one().md
56
58
  md_version = system_info['schema_version']
57
59
  assert isinstance(md_version, int)
60
+ _logger.info(f'Current database version: {md_version}, installed version: {VERSION}')
61
+ if md_version > VERSION:
62
+ raise excs.Error(
63
+ 'This Pixeltable database was created with a newer Pixeltable version '
64
+ f'than the one currently installed ({pxt.__version__}).\n'
65
+ 'Please update to the latest Pixeltable version by running: pip install --upgrade pixeltable'
66
+ )
58
67
  if md_version == VERSION:
59
68
  return
60
69
  while md_version < VERSION:
@@ -12,9 +12,9 @@ _logger = logging.getLogger('pixeltable')
12
12
  @register_converter(version=13)
13
13
  def _(engine: sql.engine.Engine) -> None:
14
14
  with engine.begin() as conn:
15
- for row in conn.execute(sql.select(Table)):
15
+ for row in conn.execute(sql.select(Table.id, Table.md)):
16
16
  id = row[0]
17
- md = row[2]
17
+ md = row[1]
18
18
  updated_md = __update_md(md)
19
19
  if updated_md != md:
20
20
  _logger.info(f'Updating schema for table: {id}')
@@ -1,33 +1,28 @@
1
1
  import copy
2
+ from uuid import UUID
2
3
 
3
4
  import sqlalchemy as sql
4
5
 
5
6
  from pixeltable.metadata import register_converter
6
7
  from pixeltable.metadata.converters.util import (
7
- convert_table_record,
8
+ convert_table_md,
8
9
  convert_table_schema_version_record,
9
10
  convert_table_version_record,
10
11
  )
11
- from pixeltable.metadata.schema import Table, TableSchemaVersion, TableVersion
12
+ from pixeltable.metadata.schema import TableSchemaVersion, TableVersion
12
13
 
13
14
 
14
15
  @register_converter(version=30)
15
16
  def _(engine: sql.engine.Engine) -> None:
16
- convert_table_record(engine, table_record_updater=__update_table_record)
17
+ convert_table_md(engine, table_md_updater=__update_table_md)
17
18
  convert_table_version_record(engine, table_version_record_updater=__update_table_version_record)
18
19
  convert_table_schema_version_record(
19
20
  engine, table_schema_version_record_updater=__update_table_schema_version_record
20
21
  )
21
22
 
22
23
 
23
- def __update_table_record(record: Table) -> None:
24
- """
25
- Update TableMd with table_id
26
- """
27
- assert isinstance(record.md, dict)
28
- md = copy.copy(record.md)
29
- md['tbl_id'] = str(record.id)
30
- record.md = md
24
+ def __update_table_md(md: dict, tbl_id: UUID) -> None:
25
+ md['tbl_id'] = str(tbl_id)
31
26
 
32
27
 
33
28
  def __update_table_version_record(record: TableVersion) -> None:
@@ -0,0 +1,9 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=35)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ with engine.begin() as conn:
9
+ conn.execute(sql.text('ALTER TABLE tables ADD COLUMN lock_dummy int8'))
@@ -0,0 +1,38 @@
1
+ import logging
2
+ from typing import Any, Optional
3
+ from uuid import UUID
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from pixeltable.metadata import register_converter
8
+ from pixeltable.metadata.converters.util import convert_table_md
9
+
10
+ _logger = logging.getLogger('pixeltable')
11
+
12
+
13
+ @register_converter(version=36)
14
+ def _(engine: sql.engine.Engine) -> None:
15
+ convert_table_md(engine, table_md_updater=__update_table_md, substitution_fn=__substitute_md)
16
+
17
+
18
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
19
+ """Update the view metadata to add the sample_clause field if it is missing
20
+
21
+ Args:
22
+ table_md (dict): copy of the original table metadata. this gets updated in place.
23
+ table_id (UUID): the table id
24
+
25
+ """
26
+ if table_md['view_md'] is None:
27
+ return
28
+ if 'sample_clause' not in table_md['view_md']:
29
+ table_md['view_md']['sample_clause'] = None
30
+ _logger.info(f'Updating view metadata for table: {table_id}')
31
+
32
+
33
+ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
34
+ if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
35
+ if 'sample_clause' not in v:
36
+ v['sample_clause'] = None
37
+ return k, v
38
+ return None
@@ -0,0 +1,15 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=37)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, _: UUID) -> None:
15
+ table_md['view_sn'] = 0
@@ -33,9 +33,10 @@ def convert_table_md(
33
33
  the original entry will be replaced, and the traversal will continue with `v'`.
34
34
  """
35
35
  with engine.begin() as conn:
36
- for row in conn.execute(sql.select(Table)):
36
+ # avoid a SELECT * here, which breaks when we add new columns to Table
37
+ for row in conn.execute(sql.select(Table.id, Table.md)):
37
38
  tbl_id = row[0]
38
- table_md = row[2]
39
+ table_md = row[1]
39
40
  assert isinstance(table_md, dict)
40
41
  updated_table_md = copy.deepcopy(table_md)
41
42
  if table_md_updater is not None:
@@ -145,13 +146,6 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
145
146
  schema_column_updater(schema_col)
146
147
 
147
148
 
148
- def convert_table_record(engine: sql.engine.Engine, table_record_updater: Optional[Callable[[Table], None]]) -> None:
149
- with sql.orm.Session(engine, future=True) as session:
150
- for record in session.query(Table).all():
151
- table_record_updater(record)
152
- session.commit()
153
-
154
-
155
149
  def convert_table_version_record(
156
150
  engine: sql.engine.Engine, table_version_record_updater: Optional[Callable[[TableVersion], None]]
157
151
  ) -> None:
@@ -2,6 +2,9 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 38: 'Added TableMd.view_sn',
6
+ 37: 'Add support for the sample() method on DataFrames',
7
+ 36: 'Added Table.lock_dummy',
5
8
  35: 'Track reference_tbl in ColumnRef',
6
9
  34: 'Set default value for is_pk field in column metadata to False',
7
10
  33: 'Add is_replica field to table metadata',
@@ -84,7 +84,8 @@ class Dir(Base):
84
84
  )
85
85
  parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
86
86
  md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # DirMd
87
- # This field is updated to synchronize database operations across multiple sessions
87
+
88
+ # used to force acquisition of an X-lock via an Update stmt
88
89
  lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
89
90
 
90
91
 
@@ -146,6 +147,9 @@ class ViewMd:
146
147
  # filter predicate applied to the base table; view-only
147
148
  predicate: Optional[dict[str, Any]]
148
149
 
150
+ # sampling predicate applied to the base table; view-only
151
+ sample_clause: Optional[dict[str, Any]]
152
+
149
153
  # ComponentIterator subclass; only for component views
150
154
  iterator_class_fqn: Optional[str]
151
155
 
@@ -173,6 +177,11 @@ class TableMd:
173
177
  # - every row is assigned a unique and immutable rowid on insertion
174
178
  next_row_id: int
175
179
 
180
+ # sequence number to track changes in the set of mutable views of this table (ie, this table = the view base)
181
+ # - incremented for each add/drop of a mutable view
182
+ # - only maintained for mutable tables
183
+ view_sn: int
184
+
176
185
  # Metadata format for external stores:
177
186
  # {'class': 'pixeltable.io.label_studio.LabelStudioProject', 'md': {'project_id': 3}}
178
187
  external_stores: list[dict[str, Any]]
@@ -200,6 +209,9 @@ class Table(Base):
200
209
  dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
201
210
  md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableMd
202
211
 
212
+ # used to force acquisition of an X-lock via an Update stmt
213
+ lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
214
+
203
215
 
204
216
  @dataclasses.dataclass
205
217
  class TableVersionMd: