pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,124 @@
1
+ import logging
2
+ from uuid import UUID
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+ _logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ @register_converter(version=39)
13
+ def _(engine: sql.engine.Engine) -> None:
14
+ convert_table_md(engine, table_modifier=__table_modifier)
15
+
16
+
17
+ def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
18
+ store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
19
+ store_name = f'{store_prefix}_{tbl_id.hex}'
20
+
21
+ # Get the list of column names that need to be migrated
22
+ col_names = find_error_columns(conn=conn, store_name=store_name)
23
+ if len(col_names) == 0:
24
+ _logger.info(f'No error columns found in table {store_name}. Skipping migration.')
25
+ return
26
+
27
+ # Check if the table exists, outside of the metadata we were given
28
+ # There seem to be cases where the metadata is present in the catalog,
29
+ # but the table itself is not in the database.
30
+ check_table_sql = sql.text(f"""
31
+ SELECT EXISTS (
32
+ SELECT 1
33
+ FROM information_schema.tables
34
+ WHERE table_name = '{store_name}'
35
+ )
36
+ """)
37
+ table_exists = conn.execute(check_table_sql).scalar()
38
+ if not table_exists:
39
+ _logger.warning(f'Table {store_name} does not exist. Skipping migration.')
40
+ return
41
+
42
+ return migrate_error_to_cellmd_columns(conn, store_name, col_names)
43
+
44
+
45
+ def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
46
+ """
47
+ Return and errormsg or errortype columns in the given table
48
+
49
+ Args:
50
+ conn: SQLAlchemy connection
51
+ store_name: Name of the table to check
52
+
53
+ Returns:
54
+ List of column name roots (root_errormsg, root_errortype)
55
+ """
56
+ check_columns_sql = sql.text(f"""
57
+ SELECT column_name
58
+ FROM information_schema.columns
59
+ WHERE table_name = '{store_name}'
60
+ """)
61
+ found_columns = [
62
+ row[0]
63
+ for row in conn.execute(check_columns_sql)
64
+ if row[0].endswith('_errormsg') or row[0].endswith('_errortype')
65
+ ]
66
+ column_roots = {s.removesuffix('_errormsg').removesuffix('_errortype') for s in found_columns}
67
+ return [*column_roots]
68
+
69
+
70
+ def migrate_error_to_cellmd_columns(
71
+ conn: sql.Connection, store_name: str, col_names: list[str], backup_table: str | None = None
72
+ ) -> None:
73
+ """
74
+ Safe version with error handling and optional backup.
75
+
76
+ Args:
77
+ engine: SQLAlchemy engine
78
+ store_name: Name of the table to modify
79
+ col_names: List of column name prefixes
80
+ backup_table: Optional name for backup table
81
+
82
+ Usage:
83
+ migrate_error_to_cellmd_columns(engine, 'my_table', ['columnname'], 'my_table_backup')
84
+ """
85
+
86
+ try:
87
+ # Optional: Create backup
88
+ if backup_table:
89
+ backup_sql = sql.text(f"""
90
+ CREATE TABLE {backup_table} AS SELECT * FROM {store_name}
91
+ """)
92
+ conn.execute(backup_sql)
93
+ _logger.info(f'Backup created: {backup_table}')
94
+
95
+ # Step 1: Add new columns
96
+ add_column_str = ', '.join(f'ADD COLUMN {col}_cellmd JSONB DEFAULT NULL' for col in col_names)
97
+ add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
98
+ conn.execute(add_column_sql)
99
+ _logger.info(f'Added columns: {", ".join(f"{col}_cellmd" for col in col_names)}')
100
+
101
+ # Step 2: Populate new columns
102
+ set_column_str = ', '.join(
103
+ [
104
+ f'{col}_cellmd = CASE WHEN {col}_errormsg IS NULL OR {col}_errortype IS NULL '
105
+ f"THEN NULL ELSE jsonb_build_object('errormsg', {col}_errormsg, 'errortype', {col}_errortype) END"
106
+ for col in col_names
107
+ ]
108
+ )
109
+ populate_sql = sql.text(f'UPDATE {store_name} SET {set_column_str}')
110
+ result = conn.execute(populate_sql)
111
+ _logger.info(f'Updated {result.rowcount} rows')
112
+
113
+ # Step 3: Drop old columns
114
+ drop_columns_str = ', '.join(
115
+ [f'DROP COLUMN IF EXISTS {col}_errormsg, DROP COLUMN IF EXISTS {col}_errortype' for col in col_names]
116
+ )
117
+ drop_columns_sql = sql.text(f'ALTER TABLE {store_name} {drop_columns_str}')
118
+ conn.execute(drop_columns_sql)
119
+ _logger.info(f'Dropped columns: {", ".join(f"{col}_errormsg, {col}_errortype" for col in col_names)}')
120
+ _logger.info(f'Migration completed successfully for table: {store_name}')
121
+
122
+ except sql.exc.SQLAlchemyError as e:
123
+ _logger.error(f'Migration for table {store_name} failed: {e}')
124
+ raise
@@ -0,0 +1,73 @@
1
+ import logging
2
+ from uuid import UUID
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+ _logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ @register_converter(version=40)
13
+ def _(engine: sql.engine.Engine) -> None:
14
+ convert_table_md(engine, table_modifier=__table_modifier)
15
+
16
+
17
+ def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
18
+ store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
19
+ store_name = f'{store_prefix}_{tbl_id.hex}'
20
+
21
+ # Get the list of column names that need _cellmd columns
22
+ _logger.info(f'Checking table {orig_table_md["name"]} ({store_name})')
23
+ col_ids = find_target_columns(orig_table_md)
24
+ if len(col_ids) == 0:
25
+ _logger.info(f'No Array or Json columns found in table {orig_table_md["name"]}. Skipping migration.')
26
+ return
27
+
28
+ # Check which columns already exist in the table
29
+ check_columns_sql = sql.text(f"""
30
+ SELECT column_name
31
+ FROM information_schema.columns
32
+ WHERE table_name = '{store_name}'
33
+ """)
34
+ existing_columns = {row[0] for row in conn.execute(check_columns_sql)}
35
+
36
+ # Filter out columns that already have _cellmd
37
+ col_ids_to_add: list[int] = []
38
+ for col_id in col_ids:
39
+ cellmd_col = f'col_{col_id}_cellmd'
40
+ if cellmd_col not in existing_columns:
41
+ col_ids_to_add.append(col_id)
42
+ else:
43
+ _logger.info(f'Column {cellmd_col} already exists in table {orig_table_md["name"]}. Skipping.')
44
+
45
+ if len(col_ids_to_add) == 0:
46
+ _logger.info(f'All _cellmd columns already exist in table {orig_table_md["name"]}. Skipping migration.')
47
+ return
48
+
49
+ return add_cellmd_columns(conn, store_name, col_ids_to_add)
50
+
51
+
52
+ def find_target_columns(table_md: dict) -> list[int]:
53
+ """Returns ids of stored array and json columns"""
54
+ result: list[int] = []
55
+ for col_id, col_md in table_md['column_md'].items():
56
+ col_type = col_md['col_type']
57
+ classname = col_type.get('_classname')
58
+ if classname in ['ArrayType', 'JsonType'] and col_md.get('stored', False):
59
+ result.append(col_id)
60
+ _logger.info(f'Found {classname} column: {col_id}')
61
+ return result
62
+
63
+
64
+ def add_cellmd_columns(conn: sql.Connection, store_name: str, col_ids: list[int]) -> None:
65
+ try:
66
+ # Add new columns
67
+ add_column_str = ', '.join(f'ADD COLUMN col_{col_id}_cellmd JSONB DEFAULT NULL' for col_id in col_ids)
68
+ add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
69
+ conn.execute(add_column_sql)
70
+ _logger.info(f'Added columns to {store_name}: {", ".join(f"col_{col_id}_cellmd" for col_id in col_ids)}')
71
+ except sql.exc.SQLAlchemyError as e:
72
+ _logger.error(f'Migration for table {store_name} failed: {e}')
73
+ raise
@@ -1,6 +1,6 @@
1
1
  import copy
2
2
  import logging
3
- from typing import Any, Callable, Optional
3
+ from typing import Any, Callable
4
4
  from uuid import UUID
5
5
 
6
6
  import sqlalchemy as sql
@@ -12,10 +12,11 @@ __logger = logging.getLogger('pixeltable')
12
12
 
13
13
  def convert_table_md(
14
14
  engine: sql.engine.Engine,
15
- table_md_updater: Optional[Callable[[dict, UUID], None]] = None,
16
- column_md_updater: Optional[Callable[[dict], None]] = None,
17
- external_store_md_updater: Optional[Callable[[dict], None]] = None,
18
- substitution_fn: Optional[Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]] = None,
15
+ table_md_updater: Callable[[dict, UUID], None] | None = None,
16
+ column_md_updater: Callable[[dict], None] | None = None,
17
+ external_store_md_updater: Callable[[dict], None] | None = None,
18
+ substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None] | None = None,
19
+ table_modifier: Callable[[sql.Connection, UUID, dict, dict], None] | None = None,
19
20
  ) -> None:
20
21
  """
21
22
  Converts schema.TableMd dicts based on the specified conversion functions.
@@ -50,6 +51,8 @@ def convert_table_md(
50
51
  if updated_table_md != table_md:
51
52
  __logger.info(f'Updating schema for table: {tbl_id}')
52
53
  conn.execute(sql.update(Table).where(Table.id == tbl_id).values(md=updated_table_md))
54
+ if table_modifier is not None:
55
+ table_modifier(conn, tbl_id, table_md, updated_table_md)
53
56
 
54
57
  for row in conn.execute(sql.select(Function)):
55
58
  fn_id = row[0]
@@ -77,9 +80,7 @@ def __update_external_store_md(table_md: dict, external_store_md_updater: Callab
77
80
  external_store_md_updater(store_md)
78
81
 
79
82
 
80
- def __substitute_md_rec(
81
- md: Any, substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
82
- ) -> Any:
83
+ def __substitute_md_rec(md: Any, substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None]) -> Any:
83
84
  if isinstance(md, dict):
84
85
  updated_dict: dict[str, Any] = {}
85
86
  for k, v in md.items():
@@ -107,8 +108,8 @@ def __substitute_md_rec(
107
108
 
108
109
  def convert_table_schema_version_md(
109
110
  engine: sql.engine.Engine,
110
- table_schema_version_md_updater: Optional[Callable[[dict], None]] = None,
111
- schema_column_updater: Optional[Callable[[dict], None]] = None,
111
+ table_schema_version_md_updater: Callable[[dict], None] | None = None,
112
+ schema_column_updater: Callable[[dict], None] | None = None,
112
113
  ) -> None:
113
114
  """
114
115
  Converts schema.TableSchemaVersionMd dicts based on the specified conversion functions.
@@ -147,7 +148,7 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
147
148
 
148
149
 
149
150
  def convert_table_version_record(
150
- engine: sql.engine.Engine, table_version_record_updater: Optional[Callable[[TableVersion], None]]
151
+ engine: sql.engine.Engine, table_version_record_updater: Callable[[TableVersion], None] | None
151
152
  ) -> None:
152
153
  with sql.orm.Session(engine, future=True) as session:
153
154
  for record in session.query(TableVersion).all():
@@ -156,7 +157,7 @@ def convert_table_version_record(
156
157
 
157
158
 
158
159
  def convert_table_schema_version_record(
159
- engine: sql.engine.Engine, table_schema_version_record_updater: Optional[Callable[[TableSchemaVersion], None]]
160
+ engine: sql.engine.Engine, table_schema_version_record_updater: Callable[[TableSchemaVersion], None] | None
160
161
  ) -> None:
161
162
  with sql.orm.Session(engine, future=True) as session:
162
163
  for record in session.query(TableSchemaVersion).all():
@@ -2,6 +2,10 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 41: 'Cellmd columns for array and json columns',
6
+ 40: 'Convert error property columns to cellmd columns',
7
+ 39: 'ColumnHandles in external stores',
8
+ 38: 'Added TableMd.view_sn',
5
9
  37: 'Add support for the sample() method on DataFrames',
6
10
  36: 'Added Table.lock_dummy',
7
11
  35: 'Track reference_tbl in ColumnRef',
@@ -1,13 +1,16 @@
1
1
  import dataclasses
2
+ import types
2
3
  import typing
3
4
  import uuid
4
- from typing import Any, NamedTuple, Optional, TypeVar, Union, get_type_hints
5
+ from typing import Any, TypeVar, Union, get_type_hints
5
6
 
6
7
  import sqlalchemy as sql
7
8
  from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
8
9
  from sqlalchemy.dialects.postgresql import JSONB, UUID
9
10
  from sqlalchemy.orm.decl_api import DeclarativeMeta
10
11
 
12
+ from ..catalog.update_status import UpdateStatus
13
+
11
14
  # Base has to be marked explicitly as a type, in order to be used elsewhere as a type hint. But in addition to being
12
15
  # a type, it's also a `DeclarativeMeta`. The following pattern enables us to expose both `Base` and `Base.metadata`
13
16
  # outside of the module in a typesafe way.
@@ -22,13 +25,13 @@ def md_from_dict(data_class_type: type[T], data: Any) -> T:
22
25
  """Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
23
26
  if dataclasses.is_dataclass(data_class_type):
24
27
  fieldtypes = get_type_hints(data_class_type)
25
- return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data}) # type: ignore[return-value]
28
+ return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})
26
29
 
27
30
  origin = typing.get_origin(data_class_type)
28
31
  if origin is not None:
29
32
  type_args = typing.get_args(data_class_type)
30
- if origin is Union and type(None) in type_args:
31
- # Handling Optional types
33
+ if (origin is Union or origin is types.UnionType) and type(None) in type_args:
34
+ # handling T | None, T | None
32
35
  non_none_args = [arg for arg in type_args if arg is not type(None)]
33
36
  assert len(non_none_args) == 1
34
37
  return md_from_dict(non_none_args[0], data) if data is not None else None
@@ -72,7 +75,7 @@ class SystemInfo(Base):
72
75
  @dataclasses.dataclass
73
76
  class DirMd:
74
77
  name: str
75
- user: Optional[str]
78
+ user: str | None
76
79
  additional_md: dict[str, Any]
77
80
 
78
81
 
@@ -101,17 +104,20 @@ class ColumnMd:
101
104
 
102
105
  id: int
103
106
  schema_version_add: int
104
- schema_version_drop: Optional[int]
107
+ schema_version_drop: int | None
105
108
  col_type: dict
106
109
 
107
110
  # if True, is part of the primary key
108
111
  is_pk: bool
109
112
 
110
113
  # if set, this is a computed column
111
- value_expr: Optional[dict]
114
+ value_expr: dict | None
112
115
 
113
116
  # if True, the column is present in the stored table
114
- stored: Optional[bool]
117
+ stored: bool | None
118
+
119
+ # If present, the URI for the destination for column values
120
+ destination: str | None = None
115
121
 
116
122
 
117
123
  @dataclasses.dataclass
@@ -127,13 +133,13 @@ class IndexMd:
127
133
  index_val_col_id: int # column holding the values to be indexed
128
134
  index_val_undo_col_id: int # column holding index values for deleted rows
129
135
  schema_version_add: int
130
- schema_version_drop: Optional[int]
136
+ schema_version_drop: int | None
131
137
  class_fqn: str
132
138
  init_args: dict[str, Any]
133
139
 
134
140
 
135
141
  # a stored table version path is a list of (table id as str, effective table version)
136
- TableVersionPath = list[tuple[str, Optional[int]]]
142
+ TableVersionPath = list[tuple[str, int | None]]
137
143
 
138
144
 
139
145
  @dataclasses.dataclass
@@ -145,16 +151,16 @@ class ViewMd:
145
151
  base_versions: TableVersionPath
146
152
 
147
153
  # filter predicate applied to the base table; view-only
148
- predicate: Optional[dict[str, Any]]
154
+ predicate: dict[str, Any] | None
149
155
 
150
156
  # sampling predicate applied to the base table; view-only
151
- sample_clause: Optional[dict[str, Any]]
157
+ sample_clause: dict[str, Any] | None
152
158
 
153
159
  # ComponentIterator subclass; only for component views
154
- iterator_class_fqn: Optional[str]
160
+ iterator_class_fqn: str | None
155
161
 
156
162
  # args to pass to the iterator class constructor; only for component views
157
- iterator_args: Optional[dict[str, Any]]
163
+ iterator_args: dict[str, Any] | None
158
164
 
159
165
 
160
166
  @dataclasses.dataclass
@@ -163,7 +169,7 @@ class TableMd:
163
169
  name: str
164
170
  is_replica: bool
165
171
 
166
- user: Optional[str]
172
+ user: str | None
167
173
 
168
174
  # monotonically increasing w/in Table for both data and schema changes, starting at 0
169
175
  current_version: int
@@ -177,15 +183,47 @@ class TableMd:
177
183
  # - every row is assigned a unique and immutable rowid on insertion
178
184
  next_row_id: int
179
185
 
186
+ # sequence number to track changes in the set of mutable views of this table (ie, this table = the view base)
187
+ # - incremented for each add/drop of a mutable view
188
+ # - only maintained for mutable tables
189
+ # TODO: replace with mutable_views: list[UUID] to help with debugging
190
+ view_sn: int
191
+
180
192
  # Metadata format for external stores:
181
193
  # {'class': 'pixeltable.io.label_studio.LabelStudioProject', 'md': {'project_id': 3}}
182
194
  external_stores: list[dict[str, Any]]
183
195
 
184
196
  column_md: dict[int, ColumnMd] # col_id -> ColumnMd
185
197
  index_md: dict[int, IndexMd] # index_id -> IndexMd
186
- view_md: Optional[ViewMd]
198
+ view_md: ViewMd | None
187
199
  additional_md: dict[str, Any]
188
200
 
201
+ has_pending_ops: bool = False
202
+
203
+ @property
204
+ def is_snapshot(self) -> bool:
205
+ return self.view_md is not None and self.view_md.is_snapshot
206
+
207
+ @property
208
+ def is_mutable(self) -> bool:
209
+ return not self.is_snapshot and not self.is_replica
210
+
211
+ @property
212
+ def is_pure_snapshot(self) -> bool:
213
+ return (
214
+ self.view_md is not None
215
+ and self.view_md.is_snapshot
216
+ and self.view_md.sample_clause is None
217
+ and self.view_md.predicate is None
218
+ and len(self.column_md) == 0
219
+ )
220
+
221
+ @property
222
+ def ancestor_ids(self) -> list[str]:
223
+ if self.view_md is None:
224
+ return []
225
+ return [id for id, _ in self.view_md.base_versions]
226
+
189
227
 
190
228
  class Table(Base):
191
229
  """
@@ -214,7 +252,12 @@ class TableVersionMd:
214
252
  created_at: float # time.time()
215
253
  version: int
216
254
  schema_version: int
217
- additional_md: dict[str, Any]
255
+ user: str | None = None # User that created this version
256
+ update_status: UpdateStatus | None = None # UpdateStatus of the change that created this version
257
+ # A version fragment cannot be queried or instantiated via get_table(). A fragment represents a version of a
258
+ # replica table that has incomplete data, and exists only to provide base table support for a dependent view.
259
+ is_fragment: bool = False
260
+ additional_md: dict[str, Any] = dataclasses.field(default_factory=dict)
218
261
 
219
262
 
220
263
  class TableVersion(Base):
@@ -237,7 +280,7 @@ class SchemaColumn:
237
280
 
238
281
  # media validation strategy of this particular media column; if not set, TableMd.media_validation applies
239
282
  # stores column.MediaValiation.name.lower()
240
- media_validation: Optional[str]
283
+ media_validation: str | None
241
284
 
242
285
 
243
286
  @dataclasses.dataclass
@@ -248,7 +291,7 @@ class TableSchemaVersionMd:
248
291
 
249
292
  tbl_id: str # uuid.UUID
250
293
  schema_version: int
251
- preceding_schema_version: Optional[int]
294
+ preceding_schema_version: int | None
252
295
  columns: dict[int, SchemaColumn] # col_id -> SchemaColumn
253
296
  num_retained_versions: int
254
297
  comment: str
@@ -270,6 +313,22 @@ class TableSchemaVersion(Base):
270
313
  md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableSchemaVersionMd
271
314
 
272
315
 
316
+ class PendingTableOp(Base):
317
+ """
318
+ Table operation that needs to be completed before the table can be used.
319
+
320
+ Operations need to be completed in order of increasing seq_num.
321
+ """
322
+
323
+ __tablename__ = 'pendingtableops'
324
+
325
+ tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
326
+ UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
327
+ )
328
+ op_sn: orm.Mapped[int] = orm.mapped_column(Integer, primary_key=True, nullable=False) # catalog.TableOp.op_sn
329
+ op: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # catalog.TableOp
330
+
331
+
273
332
  @dataclasses.dataclass
274
333
  class FunctionMd:
275
334
  name: str
@@ -295,26 +354,4 @@ class Function(Base):
295
354
  )
296
355
  dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
297
356
  md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # FunctionMd
298
- binary_obj: orm.Mapped[Optional[bytes]] = orm.mapped_column(LargeBinary, nullable=True)
299
-
300
-
301
- class FullTableMd(NamedTuple):
302
- tbl_md: TableMd
303
- version_md: TableVersionMd
304
- schema_version_md: TableSchemaVersionMd
305
-
306
- def as_dict(self) -> dict[str, Any]:
307
- return {
308
- 'table_id': self.tbl_md.tbl_id,
309
- 'table_md': dataclasses.asdict(self.tbl_md),
310
- 'table_version_md': dataclasses.asdict(self.version_md),
311
- 'table_schema_version_md': dataclasses.asdict(self.schema_version_md),
312
- }
313
-
314
- @classmethod
315
- def from_dict(cls, data_dict: dict[str, Any]) -> 'FullTableMd':
316
- return FullTableMd(
317
- tbl_md=md_from_dict(TableMd, data_dict['table_md']),
318
- version_md=md_from_dict(TableVersionMd, data_dict['table_version_md']),
319
- schema_version_md=md_from_dict(TableSchemaVersionMd, data_dict['table_schema_version_md']),
320
- )
357
+ binary_obj: orm.Mapped[bytes | None] = orm.mapped_column(LargeBinary, nullable=True)
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ from pixeltable.metadata import schema
4
+
5
+
6
+ class MetadataUtils:
7
+ @classmethod
8
+ def _diff_md(
9
+ cls, old_md: dict[int, schema.SchemaColumn] | None, new_md: dict[int, schema.SchemaColumn] | None
10
+ ) -> str:
11
+ """Return a string reporting the differences in a specific entry in two dictionaries
12
+
13
+ Results are formatted as follows:
14
+ - If `old_md` is `None`, returns 'Initial Version'.
15
+ - If `old_md` and `new_md` are the same, returns an empty string.
16
+ - If there are additions, changes, or deletions, returns a string summarizing the changes.
17
+ """
18
+ assert new_md is not None
19
+ if old_md is None:
20
+ return 'Initial Version'
21
+ if old_md == new_md:
22
+ return ''
23
+ added = {k: v.name for k, v in new_md.items() if k not in old_md}
24
+ changed = {
25
+ k: f'{old_md[k].name!r} to {v.name!r}'
26
+ for k, v in new_md.items()
27
+ if k in old_md and old_md[k].name != v.name
28
+ }
29
+ deleted = {k: v.name for k, v in old_md.items() if k not in new_md}
30
+ if len(added) == 0 and len(changed) == 0 and len(deleted) == 0:
31
+ return ''
32
+ # Format the result
33
+ t = []
34
+ if len(added) > 0:
35
+ t.append('Added: ' + ', '.join(added.values()))
36
+ if len(changed) > 0:
37
+ t.append('Renamed: ' + ', '.join(changed.values()))
38
+ if len(deleted) > 0:
39
+ t.append('Deleted: ' + ', '.join(deleted.values()))
40
+ r = ', '.join(t)
41
+ return r
42
+
43
+ @classmethod
44
+ def _create_md_change_dict(cls, md_list: list[tuple[int, dict[int, schema.SchemaColumn]]] | None) -> dict[int, str]:
45
+ """Return a dictionary of schema changes by version
46
+ Args:
47
+ md_list: a list of tuples, each containing a version number and a metadata dictionary.
48
+ """
49
+ r: dict[int, str] = {}
50
+ if md_list is None or len(md_list) == 0:
51
+ return r
52
+
53
+ # Sort the list in place by version number
54
+ md_list.sort()
55
+
56
+ first_retrieved_version = md_list[0][0]
57
+ if first_retrieved_version == 0:
58
+ prev_md = None
59
+ prev_ver = -1
60
+ start = 0
61
+ else:
62
+ prev_md = md_list[0][1]
63
+ prev_ver = first_retrieved_version
64
+ start = 1
65
+
66
+ for ver, curr_md in md_list[start:]:
67
+ if ver == prev_ver:
68
+ continue
69
+ assert ver > prev_ver
70
+ tf = cls._diff_md(prev_md, curr_md)
71
+ if tf != '':
72
+ r[ver] = tf
73
+ prev_md = curr_md
74
+ return r
@@ -0,0 +1,3 @@
1
+ from .mypy_plugin import plugin
2
+
3
+ __all__ = ['plugin']