pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -8,21 +8,24 @@ from typing import Callable
8
8
  import sqlalchemy as sql
9
9
  from sqlalchemy import orm
10
10
 
11
+ import pixeltable as pxt
12
+ import pixeltable.exceptions as excs
11
13
  from pixeltable.utils.console_output import ConsoleLogger
12
14
 
13
15
  from .schema import SystemInfo, SystemInfoMd
14
16
 
15
17
  _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
16
-
18
+ _logger = logging.getLogger('pixeltable')
17
19
 
18
20
  # current version of the metadata; this is incremented whenever the metadata schema changes
19
- VERSION = 35
21
+ VERSION = 44
20
22
 
21
23
 
22
24
  def create_system_info(engine: sql.engine.Engine) -> None:
23
25
  """Create the system metadata record"""
24
26
  system_md = SystemInfoMd(schema_version=VERSION)
25
27
  record = SystemInfo(md=dataclasses.asdict(system_md))
28
+ _logger.debug(f'Creating pixeltable system info record {record}')
26
29
  with orm.Session(engine, future=True) as session:
27
30
  # Write system metadata only once for idempotency
28
31
  if session.query(SystemInfo).count() == 0:
@@ -52,9 +55,17 @@ for _, modname, _ in pkgutil.iter_modules([os.path.dirname(__file__) + '/convert
52
55
  def upgrade_md(engine: sql.engine.Engine) -> None:
53
56
  """Upgrade the metadata schema to the current version"""
54
57
  with orm.Session(engine) as session:
55
- system_info = session.query(SystemInfo).one().md
58
+ # Get exclusive lock on SystemInfo row
59
+ system_info = session.query(SystemInfo).with_for_update().one().md
56
60
  md_version = system_info['schema_version']
57
61
  assert isinstance(md_version, int)
62
+ _logger.info(f'Current database version: {md_version}, installed version: {VERSION}')
63
+ if md_version > VERSION:
64
+ raise excs.Error(
65
+ 'This Pixeltable database was created with a newer Pixeltable version '
66
+ f'than the one currently installed ({pxt.__version__}).\n'
67
+ 'Please update to the latest Pixeltable version by running: pip install --upgrade pixeltable'
68
+ )
58
69
  if md_version == VERSION:
59
70
  return
60
71
  while md_version < VERSION:
@@ -12,9 +12,9 @@ _logger = logging.getLogger('pixeltable')
12
12
  @register_converter(version=13)
13
13
  def _(engine: sql.engine.Engine) -> None:
14
14
  with engine.begin() as conn:
15
- for row in conn.execute(sql.select(Table)):
15
+ for row in conn.execute(sql.select(Table.id, Table.md)):
16
16
  id = row[0]
17
- md = row[2]
17
+ md = row[1]
18
18
  updated_md = __update_md(md)
19
19
  if updated_md != md:
20
20
  _logger.info(f'Updating schema for table: {id}')
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  # Migrate a few changed function names
16
16
  if k == 'path' and v == 'pixeltable.functions.string.str_format':
17
17
  return 'path', 'pixeltable.functions.string.format'
@@ -1,5 +1,5 @@
1
1
  import datetime
2
- from typing import Any, Optional
2
+ from typing import Any
3
3
 
4
4
  import sqlalchemy as sql
5
5
 
@@ -28,7 +28,7 @@ def _(engine: sql.engine.Engine) -> None:
28
28
  conn.execute(sql.text(f'ALTER TABLE {store_name} ALTER COLUMN col_{col_id} TYPE TIMESTAMPTZ'))
29
29
 
30
30
 
31
- def __update_timestamp_literals(k: Any, v: Any) -> Optional[tuple[Any, Any]]:
31
+ def __update_timestamp_literals(k: Any, v: Any) -> tuple[Any, Any] | None:
32
32
  if isinstance(v, dict) and 'val_t' in v:
33
33
  # It's a literal with an explicit 'val_t' field. In version 19 this can only mean a
34
34
  # timestamp literal, which (in version 19) is stored in the DB as a naive datetime.
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if isinstance(v, dict) and '_classname' in v:
16
16
  # The way InlineArray is represented changed in v20. Previously, literal values were stored
17
17
  # directly in the Inline expr; now we store them in Literal sub-exprs. This converter
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -24,7 +24,7 @@ def __update_schema_column(schema_column: dict) -> None:
24
24
  schema_column['media_validation'] = None
25
25
 
26
26
 
27
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
27
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
28
28
  if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
29
29
  if 'perform_validation' not in v:
30
30
  v['perform_validation'] = False
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'DataFrame':
16
16
  v['from_clause'] = {'tbls': [v['tbl']], 'join_clauses': []}
17
17
  return k, v
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  from pixeltable import func
16
16
  from pixeltable.func.globals import resolve_symbol
17
17
 
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if k == 'path' and (
16
16
  v in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image')
17
17
  ):
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  import pixeltable.type_system as ts
16
16
  from pixeltable.exprs.literal import Literal
17
17
 
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -12,7 +12,7 @@ def _(engine: sql.engine.Engine) -> None:
12
12
  convert_table_md(engine, substitution_fn=__substitute_md)
13
13
 
14
14
 
15
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
15
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
16
16
  # Defaults are now stored as literals in signatures
17
17
  if k == 'parameters':
18
18
  for param in v:
@@ -55,8 +55,8 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
55
55
  # We need to expand ("unroll") any var-args or var-kwargs.
56
56
 
57
57
  new_args_len = len(new_args)
58
- rolled_args: Optional[dict] = None
59
- rolled_kwargs: Optional[dict] = None
58
+ rolled_args: dict | None = None
59
+ rolled_kwargs: dict | None = None
60
60
 
61
61
  if 'signature' in v['fn']:
62
62
  # If it's a pickled function, there's no signature, so we're out of luck; varargs in a pickled function
@@ -1,36 +1,49 @@
1
1
  import copy
2
+ import uuid
3
+ from typing import Any
2
4
 
3
5
  import sqlalchemy as sql
6
+ from sqlalchemy import orm
7
+ from sqlalchemy.dialects.postgresql import JSONB, UUID
4
8
 
5
9
  from pixeltable.metadata import register_converter
6
- from pixeltable.metadata.converters.util import (
7
- convert_table_record,
8
- convert_table_schema_version_record,
9
- convert_table_version_record,
10
- )
11
- from pixeltable.metadata.schema import Table, TableSchemaVersion, TableVersion
10
+ from pixeltable.metadata.converters.util import convert_sql_table_record, convert_table_md
12
11
 
13
12
 
14
13
  @register_converter(version=30)
15
14
  def _(engine: sql.engine.Engine) -> None:
16
- convert_table_record(engine, table_record_updater=__update_table_record)
17
- convert_table_version_record(engine, table_version_record_updater=__update_table_version_record)
18
- convert_table_schema_version_record(
19
- engine, table_schema_version_record_updater=__update_table_schema_version_record
20
- )
15
+ convert_table_md(engine, table_md_updater=__update_table_md)
16
+ convert_sql_table_record(TableVersionAtV30, engine, record_updater=__update_table_version_record)
17
+ convert_sql_table_record(TableSchemaVersionAtV30, engine, record_updater=__update_table_schema_version_record)
21
18
 
22
19
 
23
- def __update_table_record(record: Table) -> None:
24
- """
25
- Update TableMd with table_id
26
- """
27
- assert isinstance(record.md, dict)
28
- md = copy.copy(record.md)
29
- md['tbl_id'] = str(record.id)
30
- record.md = md
20
+ def __update_table_md(md: dict, tbl_id: uuid.UUID) -> None:
21
+ md['tbl_id'] = str(tbl_id)
22
+
23
+
24
+ # We can't use the ORM declarations from pixeltable.metadata.schema, because those might have changed since the
25
+ # version being converted. So we include static declarations here of the tables as they existed at version 30.
26
+
27
+ Base: type = orm.declarative_base()
28
+
29
+
30
+ class TableVersionAtV30(Base):
31
+ __tablename__ = 'tableversions'
32
+
33
+ tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
34
+ version: orm.Mapped[int] = orm.mapped_column(sql.BigInteger, primary_key=True, nullable=False)
35
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)
36
+
37
+
38
+ class TableSchemaVersionAtV30(Base):
39
+ __tablename__ = 'tableschemaversions'
40
+
41
+ tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
42
+ schema_version: orm.Mapped[int] = orm.mapped_column(sql.BigInteger, primary_key=True, nullable=False)
43
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableSchemaVersionMd
31
44
 
32
45
 
33
- def __update_table_version_record(record: TableVersion) -> None:
46
+ def __update_table_version_record(record: TableVersionAtV30) -> None:
34
47
  """
35
48
  Update TableVersion with table_id.
36
49
  """
@@ -40,7 +53,7 @@ def __update_table_version_record(record: TableVersion) -> None:
40
53
  record.md = md
41
54
 
42
55
 
43
- def __update_table_schema_version_record(record: TableSchemaVersion) -> None:
56
+ def __update_table_schema_version_record(record: TableSchemaVersionAtV30) -> None:
44
57
  """
45
58
  Update TableSchemaVersion with table_id.
46
59
  """
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
16
16
  # Add reference_tbl to ColumnRef; for historical metadata it is always equal to tbl
17
17
  assert 'reference_tbl' not in v
@@ -0,0 +1,9 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=35)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ with engine.begin() as conn:
9
+ conn.execute(sql.text('ALTER TABLE tables ADD COLUMN lock_dummy int8'))
@@ -0,0 +1,38 @@
1
+ import logging
2
+ from typing import Any
3
+ from uuid import UUID
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from pixeltable.metadata import register_converter
8
+ from pixeltable.metadata.converters.util import convert_table_md
9
+
10
+ _logger = logging.getLogger('pixeltable')
11
+
12
+
13
+ @register_converter(version=36)
14
+ def _(engine: sql.engine.Engine) -> None:
15
+ convert_table_md(engine, table_md_updater=__update_table_md, substitution_fn=__substitute_md)
16
+
17
+
18
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
19
+ """Update the view metadata to add the sample_clause field if it is missing
20
+
21
+ Args:
22
+ table_md (dict): copy of the original table metadata. this gets updated in place.
23
+ table_id (UUID): the table id
24
+
25
+ """
26
+ if table_md['view_md'] is None:
27
+ return
28
+ if 'sample_clause' not in table_md['view_md']:
29
+ table_md['view_md']['sample_clause'] = None
30
+ _logger.info(f'Updating view metadata for table: {table_id}')
31
+
32
+
33
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
34
+ if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
35
+ if 'sample_clause' not in v:
36
+ v['sample_clause'] = None
37
+ return k, v
38
+ return None
@@ -0,0 +1,15 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=37)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, _: UUID) -> None:
15
+ table_md['view_sn'] = 0
@@ -0,0 +1,39 @@
1
+ from typing import Any
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=38)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
+ if k == 'col_mapping':
16
+ assert isinstance(v, list)
17
+ return k, [__col_mapping_entry(e) for e in v]
18
+ if k == 'stored_proxies':
19
+ assert isinstance(v, list)
20
+ return k, [__stored_proxies_entry(e) for e in v]
21
+ return None
22
+
23
+
24
+ def __col_mapping_entry(e: list) -> list:
25
+ assert isinstance(e, list)
26
+ assert isinstance(e[0], dict)
27
+ assert isinstance(e[1], str)
28
+ return [__col_handle(e[0]), e[1]]
29
+
30
+
31
+ def __stored_proxies_entry(e: list) -> list:
32
+ assert isinstance(e, list)
33
+ assert isinstance(e[0], dict)
34
+ assert isinstance(e[1], dict)
35
+ return [__col_handle(e[0]), __col_handle(e[1])]
36
+
37
+
38
+ def __col_handle(e: dict) -> dict:
39
+ return {'tbl_version': {'id': e['tbl_id'], 'effective_version': None}, 'col_id': e['col_id']}
@@ -0,0 +1,124 @@
1
+ import logging
2
+ from uuid import UUID
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+ _logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ @register_converter(version=39)
13
+ def _(engine: sql.engine.Engine) -> None:
14
+ convert_table_md(engine, table_modifier=__table_modifier)
15
+
16
+
17
+ def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
18
+ store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
19
+ store_name = f'{store_prefix}_{tbl_id.hex}'
20
+
21
+ # Get the list of column names that need to be migrated
22
+ col_names = find_error_columns(conn=conn, store_name=store_name)
23
+ if len(col_names) == 0:
24
+ _logger.info(f'No error columns found in table {store_name}. Skipping migration.')
25
+ return
26
+
27
+ # Check if the table exists, outside of the metadata we were given
28
+ # There seem to be cases where the metadata is present in the catalog,
29
+ # but the table itself is not in the database.
30
+ check_table_sql = sql.text(f"""
31
+ SELECT EXISTS (
32
+ SELECT 1
33
+ FROM information_schema.tables
34
+ WHERE table_name = '{store_name}'
35
+ )
36
+ """)
37
+ table_exists = conn.execute(check_table_sql).scalar()
38
+ if not table_exists:
39
+ _logger.warning(f'Table {store_name} does not exist. Skipping migration.')
40
+ return
41
+
42
+ return migrate_error_to_cellmd_columns(conn, store_name, col_names)
43
+
44
+
45
+ def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
46
+ """
47
+ Return and errormsg or errortype columns in the given table
48
+
49
+ Args:
50
+ conn: SQLAlchemy connection
51
+ store_name: Name of the table to check
52
+
53
+ Returns:
54
+ List of column name roots (root_errormsg, root_errortype)
55
+ """
56
+ check_columns_sql = sql.text(f"""
57
+ SELECT column_name
58
+ FROM information_schema.columns
59
+ WHERE table_name = '{store_name}'
60
+ """)
61
+ found_columns = [
62
+ row[0]
63
+ for row in conn.execute(check_columns_sql)
64
+ if row[0].endswith('_errormsg') or row[0].endswith('_errortype')
65
+ ]
66
+ column_roots = {s.removesuffix('_errormsg').removesuffix('_errortype') for s in found_columns}
67
+ return [*column_roots]
68
+
69
+
70
+ def migrate_error_to_cellmd_columns(
71
+ conn: sql.Connection, store_name: str, col_names: list[str], backup_table: str | None = None
72
+ ) -> None:
73
+ """
74
+ Safe version with error handling and optional backup.
75
+
76
+ Args:
77
+ engine: SQLAlchemy engine
78
+ store_name: Name of the table to modify
79
+ col_names: List of column name prefixes
80
+ backup_table: Optional name for backup table
81
+
82
+ Usage:
83
+ migrate_error_to_cellmd_columns(engine, 'my_table', ['columnname'], 'my_table_backup')
84
+ """
85
+
86
+ try:
87
+ # Optional: Create backup
88
+ if backup_table:
89
+ backup_sql = sql.text(f"""
90
+ CREATE TABLE {backup_table} AS SELECT * FROM {store_name}
91
+ """)
92
+ conn.execute(backup_sql)
93
+ _logger.info(f'Backup created: {backup_table}')
94
+
95
+ # Step 1: Add new columns
96
+ add_column_str = ', '.join(f'ADD COLUMN {col}_cellmd JSONB DEFAULT NULL' for col in col_names)
97
+ add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
98
+ conn.execute(add_column_sql)
99
+ _logger.info(f'Added columns: {", ".join(f"{col}_cellmd" for col in col_names)}')
100
+
101
+ # Step 2: Populate new columns
102
+ set_column_str = ', '.join(
103
+ [
104
+ f'{col}_cellmd = CASE WHEN {col}_errormsg IS NULL OR {col}_errortype IS NULL '
105
+ f"THEN NULL ELSE jsonb_build_object('errormsg', {col}_errormsg, 'errortype', {col}_errortype) END"
106
+ for col in col_names
107
+ ]
108
+ )
109
+ populate_sql = sql.text(f'UPDATE {store_name} SET {set_column_str}')
110
+ result = conn.execute(populate_sql)
111
+ _logger.info(f'Updated {result.rowcount} rows')
112
+
113
+ # Step 3: Drop old columns
114
+ drop_columns_str = ', '.join(
115
+ [f'DROP COLUMN IF EXISTS {col}_errormsg, DROP COLUMN IF EXISTS {col}_errortype' for col in col_names]
116
+ )
117
+ drop_columns_sql = sql.text(f'ALTER TABLE {store_name} {drop_columns_str}')
118
+ conn.execute(drop_columns_sql)
119
+ _logger.info(f'Dropped columns: {", ".join(f"{col}_errormsg, {col}_errortype" for col in col_names)}')
120
+ _logger.info(f'Migration completed successfully for table: {store_name}')
121
+
122
+ except sql.exc.SQLAlchemyError as e:
123
+ _logger.error(f'Migration for table {store_name} failed: {e}')
124
+ raise
@@ -0,0 +1,73 @@
1
+ import logging
2
+ from uuid import UUID
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+ _logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ @register_converter(version=40)
13
+ def _(engine: sql.engine.Engine) -> None:
14
+ convert_table_md(engine, table_modifier=__table_modifier)
15
+
16
+
17
+ def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
18
+ store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
19
+ store_name = f'{store_prefix}_{tbl_id.hex}'
20
+
21
+ # Get the list of column names that need _cellmd columns
22
+ _logger.info(f'Checking table {orig_table_md["name"]} ({store_name})')
23
+ col_ids = find_target_columns(orig_table_md)
24
+ if len(col_ids) == 0:
25
+ _logger.info(f'No Array or Json columns found in table {orig_table_md["name"]}. Skipping migration.')
26
+ return
27
+
28
+ # Check which columns already exist in the table
29
+ check_columns_sql = sql.text(f"""
30
+ SELECT column_name
31
+ FROM information_schema.columns
32
+ WHERE table_name = '{store_name}'
33
+ """)
34
+ existing_columns = {row[0] for row in conn.execute(check_columns_sql)}
35
+
36
+ # Filter out columns that already have _cellmd
37
+ col_ids_to_add: list[int] = []
38
+ for col_id in col_ids:
39
+ cellmd_col = f'col_{col_id}_cellmd'
40
+ if cellmd_col not in existing_columns:
41
+ col_ids_to_add.append(col_id)
42
+ else:
43
+ _logger.info(f'Column {cellmd_col} already exists in table {orig_table_md["name"]}. Skipping.')
44
+
45
+ if len(col_ids_to_add) == 0:
46
+ _logger.info(f'All _cellmd columns already exist in table {orig_table_md["name"]}. Skipping migration.')
47
+ return
48
+
49
+ return add_cellmd_columns(conn, store_name, col_ids_to_add)
50
+
51
+
52
+ def find_target_columns(table_md: dict) -> list[int]:
53
+ """Returns ids of stored array and json columns"""
54
+ result: list[int] = []
55
+ for col_id, col_md in table_md['column_md'].items():
56
+ col_type = col_md['col_type']
57
+ classname = col_type.get('_classname')
58
+ if classname in ['ArrayType', 'JsonType'] and col_md.get('stored', False):
59
+ result.append(col_id)
60
+ _logger.info(f'Found {classname} column: {col_id}')
61
+ return result
62
+
63
+
64
+ def add_cellmd_columns(conn: sql.Connection, store_name: str, col_ids: list[int]) -> None:
65
+ try:
66
+ # Add new columns
67
+ add_column_str = ', '.join(f'ADD COLUMN col_{col_id}_cellmd JSONB DEFAULT NULL' for col_id in col_ids)
68
+ add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
69
+ conn.execute(add_column_sql)
70
+ _logger.info(f'Added columns to {store_name}: {", ".join(f"col_{col_id}_cellmd" for col_id in col_ids)}')
71
+ except sql.exc.SQLAlchemyError as e:
72
+ _logger.error(f'Migration for table {store_name} failed: {e}')
73
+ raise
@@ -0,0 +1,12 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=41)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ with engine.begin() as conn:
9
+ conn.execute(sql.text("ALTER TABLE dirs ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
10
+ conn.execute(sql.text("ALTER TABLE tables ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
11
+ conn.execute(sql.text("ALTER TABLE tableversions ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
12
+ conn.execute(sql.text("ALTER TABLE tableschemaversions ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
@@ -0,0 +1,9 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=42)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ with engine.begin() as conn:
9
+ conn.execute(sql.text('ALTER TABLE tables ALTER COLUMN dir_id DROP NOT NULL'))