pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,63 @@
1
+ import copy
2
+ import uuid
3
+ from typing import Any
4
+
5
+ import sqlalchemy as sql
6
+ from sqlalchemy import orm
7
+ from sqlalchemy.dialects.postgresql import JSONB, UUID
8
+
9
+ from pixeltable.metadata import register_converter
10
+ from pixeltable.metadata.converters.util import convert_sql_table_record, convert_table_md
11
+
12
+
13
+ @register_converter(version=30)
14
+ def _(engine: sql.engine.Engine) -> None:
15
+ convert_table_md(engine, table_md_updater=__update_table_md)
16
+ convert_sql_table_record(TableVersionAtV30, engine, record_updater=__update_table_version_record)
17
+ convert_sql_table_record(TableSchemaVersionAtV30, engine, record_updater=__update_table_schema_version_record)
18
+
19
+
20
+ def __update_table_md(md: dict, tbl_id: uuid.UUID) -> None:
21
+ md['tbl_id'] = str(tbl_id)
22
+
23
+
24
+ # We can't use the ORM declarations from pixeltable.metadata.schema, because those might have changed since the
25
+ # version being converted. So we include static declarations here of the tables as they existed at version 30.
26
+
27
+ Base: type = orm.declarative_base()
28
+
29
+
30
+ class TableVersionAtV30(Base):
31
+ __tablename__ = 'tableversions'
32
+
33
+ tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
34
+ version: orm.Mapped[int] = orm.mapped_column(sql.BigInteger, primary_key=True, nullable=False)
35
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)
36
+
37
+
38
+ class TableSchemaVersionAtV30(Base):
39
+ __tablename__ = 'tableschemaversions'
40
+
41
+ tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
42
+ schema_version: orm.Mapped[int] = orm.mapped_column(sql.BigInteger, primary_key=True, nullable=False)
43
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableSchemaVersionMd
44
+
45
+
46
+ def __update_table_version_record(record: TableVersionAtV30) -> None:
47
+ """
48
+ Update TableVersion with table_id.
49
+ """
50
+ assert isinstance(record.md, dict)
51
+ md = copy.copy(record.md)
52
+ md['tbl_id'] = str(record.tbl_id)
53
+ record.md = md
54
+
55
+
56
+ def __update_table_schema_version_record(record: TableSchemaVersionAtV30) -> None:
57
+ """
58
+ Update TableSchemaVersion with table_id.
59
+ """
60
+ assert isinstance(record.md, dict)
61
+ md = copy.copy(record.md)
62
+ md['tbl_id'] = str(record.tbl_id)
63
+ record.md = md
@@ -0,0 +1,11 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=31)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ # Add a column "lock_dummy: int8" to the dirs table in the store
9
+ # This column is the target of an UPDATE operation to synchronize directory operations
10
+ with engine.begin() as conn:
11
+ conn.execute(sql.text('ALTER TABLE dirs ADD COLUMN lock_dummy int8'))
@@ -0,0 +1,15 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=32)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
15
+ table_md['is_replica'] = False
@@ -0,0 +1,17 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=33)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
15
+ """Set default value of 'is_pk' field in column metadata to False"""
16
+ for col_md in table_md['column_md'].values():
17
+ col_md['is_pk'] = False if col_md['is_pk'] is None else col_md['is_pk']
@@ -0,0 +1,21 @@
1
+ from typing import Any
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=34)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
+ if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
16
+ # Add reference_tbl to ColumnRef; for historical metadata it is always equal to tbl
17
+ assert 'reference_tbl' not in v
18
+ v['reference_tbl'] = None
19
+ return k, v
20
+
21
+ return None
@@ -0,0 +1,9 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=35)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ with engine.begin() as conn:
9
+ conn.execute(sql.text('ALTER TABLE tables ADD COLUMN lock_dummy int8'))
@@ -0,0 +1,38 @@
1
+ import logging
2
+ from typing import Any
3
+ from uuid import UUID
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from pixeltable.metadata import register_converter
8
+ from pixeltable.metadata.converters.util import convert_table_md
9
+
10
+ _logger = logging.getLogger('pixeltable')
11
+
12
+
13
+ @register_converter(version=36)
14
+ def _(engine: sql.engine.Engine) -> None:
15
+ convert_table_md(engine, table_md_updater=__update_table_md, substitution_fn=__substitute_md)
16
+
17
+
18
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
19
+ """Update the view metadata to add the sample_clause field if it is missing
20
+
21
+ Args:
22
+ table_md (dict): copy of the original table metadata. this gets updated in place.
23
+ table_id (UUID): the table id
24
+
25
+ """
26
+ if table_md['view_md'] is None:
27
+ return
28
+ if 'sample_clause' not in table_md['view_md']:
29
+ table_md['view_md']['sample_clause'] = None
30
+ _logger.info(f'Updating view metadata for table: {table_id}')
31
+
32
+
33
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
34
+ if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
35
+ if 'sample_clause' not in v:
36
+ v['sample_clause'] = None
37
+ return k, v
38
+ return None
@@ -0,0 +1,15 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=37)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, _: UUID) -> None:
15
+ table_md['view_sn'] = 0
@@ -0,0 +1,39 @@
1
+ from typing import Any
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=38)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
+ if k == 'col_mapping':
16
+ assert isinstance(v, list)
17
+ return k, [__col_mapping_entry(e) for e in v]
18
+ if k == 'stored_proxies':
19
+ assert isinstance(v, list)
20
+ return k, [__stored_proxies_entry(e) for e in v]
21
+ return None
22
+
23
+
24
+ def __col_mapping_entry(e: list) -> list:
25
+ assert isinstance(e, list)
26
+ assert isinstance(e[0], dict)
27
+ assert isinstance(e[1], str)
28
+ return [__col_handle(e[0]), e[1]]
29
+
30
+
31
+ def __stored_proxies_entry(e: list) -> list:
32
+ assert isinstance(e, list)
33
+ assert isinstance(e[0], dict)
34
+ assert isinstance(e[1], dict)
35
+ return [__col_handle(e[0]), __col_handle(e[1])]
36
+
37
+
38
+ def __col_handle(e: dict) -> dict:
39
+ return {'tbl_version': {'id': e['tbl_id'], 'effective_version': None}, 'col_id': e['col_id']}
@@ -0,0 +1,124 @@
1
+ import logging
2
+ from uuid import UUID
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+ _logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ @register_converter(version=39)
13
+ def _(engine: sql.engine.Engine) -> None:
14
+ convert_table_md(engine, table_modifier=__table_modifier)
15
+
16
+
17
+ def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
18
+ store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
19
+ store_name = f'{store_prefix}_{tbl_id.hex}'
20
+
21
+ # Get the list of column names that need to be migrated
22
+ col_names = find_error_columns(conn=conn, store_name=store_name)
23
+ if len(col_names) == 0:
24
+ _logger.info(f'No error columns found in table {store_name}. Skipping migration.')
25
+ return
26
+
27
+ # Check if the table exists, outside of the metadata we were given
28
+ # There seem to be cases where the metadata is present in the catalog,
29
+ # but the table itself is not in the database.
30
+ check_table_sql = sql.text(f"""
31
+ SELECT EXISTS (
32
+ SELECT 1
33
+ FROM information_schema.tables
34
+ WHERE table_name = '{store_name}'
35
+ )
36
+ """)
37
+ table_exists = conn.execute(check_table_sql).scalar()
38
+ if not table_exists:
39
+ _logger.warning(f'Table {store_name} does not exist. Skipping migration.')
40
+ return
41
+
42
+ return migrate_error_to_cellmd_columns(conn, store_name, col_names)
43
+
44
+
45
+ def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
46
+ """
47
+ Return and errormsg or errortype columns in the given table
48
+
49
+ Args:
50
+ conn: SQLAlchemy connection
51
+ store_name: Name of the table to check
52
+
53
+ Returns:
54
+ List of column name roots (root_errormsg, root_errortype)
55
+ """
56
+ check_columns_sql = sql.text(f"""
57
+ SELECT column_name
58
+ FROM information_schema.columns
59
+ WHERE table_name = '{store_name}'
60
+ """)
61
+ found_columns = [
62
+ row[0]
63
+ for row in conn.execute(check_columns_sql)
64
+ if row[0].endswith('_errormsg') or row[0].endswith('_errortype')
65
+ ]
66
+ column_roots = {s.removesuffix('_errormsg').removesuffix('_errortype') for s in found_columns}
67
+ return [*column_roots]
68
+
69
+
70
+ def migrate_error_to_cellmd_columns(
71
+ conn: sql.Connection, store_name: str, col_names: list[str], backup_table: str | None = None
72
+ ) -> None:
73
+ """
74
+ Safe version with error handling and optional backup.
75
+
76
+ Args:
77
+ engine: SQLAlchemy engine
78
+ store_name: Name of the table to modify
79
+ col_names: List of column name prefixes
80
+ backup_table: Optional name for backup table
81
+
82
+ Usage:
83
+ migrate_error_to_cellmd_columns(engine, 'my_table', ['columnname'], 'my_table_backup')
84
+ """
85
+
86
+ try:
87
+ # Optional: Create backup
88
+ if backup_table:
89
+ backup_sql = sql.text(f"""
90
+ CREATE TABLE {backup_table} AS SELECT * FROM {store_name}
91
+ """)
92
+ conn.execute(backup_sql)
93
+ _logger.info(f'Backup created: {backup_table}')
94
+
95
+ # Step 1: Add new columns
96
+ add_column_str = ', '.join(f'ADD COLUMN {col}_cellmd JSONB DEFAULT NULL' for col in col_names)
97
+ add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
98
+ conn.execute(add_column_sql)
99
+ _logger.info(f'Added columns: {", ".join(f"{col}_cellmd" for col in col_names)}')
100
+
101
+ # Step 2: Populate new columns
102
+ set_column_str = ', '.join(
103
+ [
104
+ f'{col}_cellmd = CASE WHEN {col}_errormsg IS NULL OR {col}_errortype IS NULL '
105
+ f"THEN NULL ELSE jsonb_build_object('errormsg', {col}_errormsg, 'errortype', {col}_errortype) END"
106
+ for col in col_names
107
+ ]
108
+ )
109
+ populate_sql = sql.text(f'UPDATE {store_name} SET {set_column_str}')
110
+ result = conn.execute(populate_sql)
111
+ _logger.info(f'Updated {result.rowcount} rows')
112
+
113
+ # Step 3: Drop old columns
114
+ drop_columns_str = ', '.join(
115
+ [f'DROP COLUMN IF EXISTS {col}_errormsg, DROP COLUMN IF EXISTS {col}_errortype' for col in col_names]
116
+ )
117
+ drop_columns_sql = sql.text(f'ALTER TABLE {store_name} {drop_columns_str}')
118
+ conn.execute(drop_columns_sql)
119
+ _logger.info(f'Dropped columns: {", ".join(f"{col}_errormsg, {col}_errortype" for col in col_names)}')
120
+ _logger.info(f'Migration completed successfully for table: {store_name}')
121
+
122
+ except sql.exc.SQLAlchemyError as e:
123
+ _logger.error(f'Migration for table {store_name} failed: {e}')
124
+ raise
@@ -0,0 +1,73 @@
1
+ import logging
2
+ from uuid import UUID
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+ _logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ @register_converter(version=40)
13
+ def _(engine: sql.engine.Engine) -> None:
14
+ convert_table_md(engine, table_modifier=__table_modifier)
15
+
16
+
17
+ def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
18
+ store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
19
+ store_name = f'{store_prefix}_{tbl_id.hex}'
20
+
21
+ # Get the list of column names that need _cellmd columns
22
+ _logger.info(f'Checking table {orig_table_md["name"]} ({store_name})')
23
+ col_ids = find_target_columns(orig_table_md)
24
+ if len(col_ids) == 0:
25
+ _logger.info(f'No Array or Json columns found in table {orig_table_md["name"]}. Skipping migration.')
26
+ return
27
+
28
+ # Check which columns already exist in the table
29
+ check_columns_sql = sql.text(f"""
30
+ SELECT column_name
31
+ FROM information_schema.columns
32
+ WHERE table_name = '{store_name}'
33
+ """)
34
+ existing_columns = {row[0] for row in conn.execute(check_columns_sql)}
35
+
36
+ # Filter out columns that already have _cellmd
37
+ col_ids_to_add: list[int] = []
38
+ for col_id in col_ids:
39
+ cellmd_col = f'col_{col_id}_cellmd'
40
+ if cellmd_col not in existing_columns:
41
+ col_ids_to_add.append(col_id)
42
+ else:
43
+ _logger.info(f'Column {cellmd_col} already exists in table {orig_table_md["name"]}. Skipping.')
44
+
45
+ if len(col_ids_to_add) == 0:
46
+ _logger.info(f'All _cellmd columns already exist in table {orig_table_md["name"]}. Skipping migration.')
47
+ return
48
+
49
+ return add_cellmd_columns(conn, store_name, col_ids_to_add)
50
+
51
+
52
+ def find_target_columns(table_md: dict) -> list[int]:
53
+ """Returns ids of stored array and json columns"""
54
+ result: list[int] = []
55
+ for col_id, col_md in table_md['column_md'].items():
56
+ col_type = col_md['col_type']
57
+ classname = col_type.get('_classname')
58
+ if classname in ['ArrayType', 'JsonType'] and col_md.get('stored', False):
59
+ result.append(col_id)
60
+ _logger.info(f'Found {classname} column: {col_id}')
61
+ return result
62
+
63
+
64
+ def add_cellmd_columns(conn: sql.Connection, store_name: str, col_ids: list[int]) -> None:
65
+ try:
66
+ # Add new columns
67
+ add_column_str = ', '.join(f'ADD COLUMN col_{col_id}_cellmd JSONB DEFAULT NULL' for col_id in col_ids)
68
+ add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
69
+ conn.execute(add_column_sql)
70
+ _logger.info(f'Added columns to {store_name}: {", ".join(f"col_{col_id}_cellmd" for col_id in col_ids)}')
71
+ except sql.exc.SQLAlchemyError as e:
72
+ _logger.error(f'Migration for table {store_name} failed: {e}')
73
+ raise
@@ -0,0 +1,12 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=41)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ with engine.begin() as conn:
9
+ conn.execute(sql.text("ALTER TABLE dirs ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
10
+ conn.execute(sql.text("ALTER TABLE tables ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
11
+ conn.execute(sql.text("ALTER TABLE tableversions ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
12
+ conn.execute(sql.text("ALTER TABLE tableschemaversions ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
@@ -0,0 +1,9 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=42)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ with engine.begin() as conn:
9
+ conn.execute(sql.text('ALTER TABLE tables ALTER COLUMN dir_id DROP NOT NULL'))
@@ -0,0 +1,44 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable import type_system as ts
7
+ from pixeltable.metadata import register_converter
8
+ from pixeltable.metadata.converters.util import convert_table_md
9
+
10
+
11
+ @register_converter(version=43)
12
+ def _(engine: sql.engine.Engine) -> None:
13
+ """Converts ArrayTypes by replacing legacy dtype (which was a pxt Type ID) to numpy dtype."""
14
+ convert_table_md(engine, substitution_fn=_substitution_fn)
15
+
16
+
17
+ def _substitution_fn(key: str | None, value: Any) -> tuple[str | None, Any] | None:
18
+ if not isinstance(value, dict):
19
+ return None
20
+ if value.get('_classname', None) != 'ArrayType':
21
+ return None
22
+ if 'numpy_dtype' in value:
23
+ return None
24
+ assert 'dtype' in value
25
+
26
+ legacy_dtype_val = value['dtype']
27
+ new_dtype: np.dtype | None
28
+ if legacy_dtype_val is None:
29
+ new_dtype = None
30
+ else:
31
+ legacy_dtype = ts.ColumnType.Type(legacy_dtype_val)
32
+ new_dtype = ts.ArrayType.pxt_dtype_to_numpy_dtype.get(legacy_dtype, None)
33
+ if new_dtype is None:
34
+ raise ValueError(f'Unrecognized dtype: {legacy_dtype_val} ({legacy_dtype}) in {key}, {value}')
35
+
36
+ del value['dtype']
37
+ if new_dtype is None:
38
+ value['numpy_dtype'] = None
39
+ elif new_dtype == np.str_:
40
+ # str(np.str_) would be something like '<U'
41
+ value['numpy_dtype'] = 'str'
42
+ else:
43
+ value['numpy_dtype'] = str(new_dtype)
44
+ return key, value
@@ -1,20 +1,22 @@
1
1
  import copy
2
2
  import logging
3
- from typing import Any, Callable, Optional
3
+ from typing import Any, Callable, TypeVar
4
+ from uuid import UUID
4
5
 
5
6
  import sqlalchemy as sql
6
7
 
7
- from pixeltable.metadata.schema import Table, TableSchemaVersion
8
+ from pixeltable.metadata.schema import Function, Table, TableSchemaVersion
8
9
 
9
10
  __logger = logging.getLogger('pixeltable')
10
11
 
11
12
 
12
13
  def convert_table_md(
13
14
  engine: sql.engine.Engine,
14
- table_md_updater: Optional[Callable[[dict], None]] = None,
15
- column_md_updater: Optional[Callable[[dict], None]] = None,
16
- external_store_md_updater: Optional[Callable[[dict], None]] = None,
17
- substitution_fn: Optional[Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]] = None
15
+ table_md_updater: Callable[[dict, UUID], None] | None = None,
16
+ column_md_updater: Callable[[dict], None] | None = None,
17
+ external_store_md_updater: Callable[[dict], None] | None = None,
18
+ substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None] | None = None,
19
+ table_modifier: Callable[[sql.Connection, UUID, dict, dict], None] | None = None,
18
20
  ) -> None:
19
21
  """
20
22
  Converts schema.TableMd dicts based on the specified conversion functions.
@@ -22,6 +24,7 @@ def convert_table_md(
22
24
  Args:
23
25
  engine: The SQLAlchemy engine.
24
26
  table_md_updater: A function that updates schema.TableMd dicts in place.
27
+ It takes two arguments: the metadata dict (new values) and the table id.
25
28
  column_md_updater: A function that updates schema.ColumnMd dicts in place.
26
29
  external_store_md_updater: A function that updates the external store metadata in place.
27
30
  substitution_fn: A function that substitutes metadata values. If specified, all metadata will be traversed
@@ -31,13 +34,14 @@ def convert_table_md(
31
34
  the original entry will be replaced, and the traversal will continue with `v'`.
32
35
  """
33
36
  with engine.begin() as conn:
34
- for row in conn.execute(sql.select(Table)):
35
- id = row[0]
36
- table_md = row[2]
37
+ # avoid a SELECT * here, which breaks when we add new columns to Table
38
+ for row in conn.execute(sql.select(Table.id, Table.md)):
39
+ tbl_id = row[0]
40
+ table_md = row[1]
37
41
  assert isinstance(table_md, dict)
38
42
  updated_table_md = copy.deepcopy(table_md)
39
43
  if table_md_updater is not None:
40
- table_md_updater(updated_table_md)
44
+ table_md_updater(updated_table_md, tbl_id)
41
45
  if column_md_updater is not None:
42
46
  __update_column_md(updated_table_md, column_md_updater)
43
47
  if external_store_md_updater is not None:
@@ -45,8 +49,21 @@ def convert_table_md(
45
49
  if substitution_fn is not None:
46
50
  updated_table_md = __substitute_md_rec(updated_table_md, substitution_fn)
47
51
  if updated_table_md != table_md:
48
- __logger.info(f'Updating schema for table: {id}')
49
- conn.execute(sql.update(Table).where(Table.id == id).values(md=updated_table_md))
52
+ __logger.info(f'Updating schema for table: {tbl_id}')
53
+ conn.execute(sql.update(Table).where(Table.id == tbl_id).values(md=updated_table_md))
54
+ if table_modifier is not None:
55
+ table_modifier(conn, tbl_id, table_md, updated_table_md)
56
+
57
+ for row in conn.execute(sql.select(Function)):
58
+ fn_id = row[0]
59
+ function_md = row[2]
60
+ assert isinstance(function_md, dict)
61
+ updated_function_md = copy.deepcopy(function_md)
62
+ if substitution_fn is not None:
63
+ updated_function_md = __substitute_md_rec(updated_function_md, substitution_fn)
64
+ if updated_function_md != function_md:
65
+ __logger.info(f'Updating function: {fn_id}')
66
+ conn.execute(sql.update(Function).where(Function.id == fn_id).values(md=updated_function_md))
50
67
 
51
68
 
52
69
  def __update_column_md(table_md: dict, column_md_updater: Callable[[dict], None]) -> None:
@@ -63,10 +80,7 @@ def __update_external_store_md(table_md: dict, external_store_md_updater: Callab
63
80
  external_store_md_updater(store_md)
64
81
 
65
82
 
66
- def __substitute_md_rec(
67
- md: Any,
68
- substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
69
- ) -> Any:
83
+ def __substitute_md_rec(md: Any, substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None]) -> Any:
70
84
  if isinstance(md, dict):
71
85
  updated_dict: dict[str, Any] = {}
72
86
  for k, v in md.items():
@@ -94,8 +108,8 @@ def __substitute_md_rec(
94
108
 
95
109
  def convert_table_schema_version_md(
96
110
  engine: sql.engine.Engine,
97
- table_schema_version_md_updater: Optional[Callable[[dict], None]] = None,
98
- schema_column_updater: Optional[Callable[[dict], None]] = None
111
+ table_schema_version_md_updater: Callable[[dict], None] | None = None,
112
+ schema_column_updater: Callable[[dict], None] | None = None,
99
113
  ) -> None:
100
114
  """
101
115
  Converts schema.TableSchemaVersionMd dicts based on the specified conversion functions.
@@ -131,3 +145,15 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
131
145
  assert isinstance(cols, dict)
132
146
  for schema_col in cols.values():
133
147
  schema_column_updater(schema_col)
148
+
149
+
150
+ T = TypeVar('T')
151
+
152
+
153
+ def convert_sql_table_record(
154
+ schema: type[T], engine: sql.engine.Engine, record_updater: Callable[[T], None] | None
155
+ ) -> None:
156
+ with sql.orm.Session(engine, future=True) as session:
157
+ for record in session.query(schema).all():
158
+ record_updater(record)
159
+ session.commit()
@@ -2,6 +2,27 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 44: 'ArrayType dtype migration from pxt types to numpy dtypes',
6
+ 43: 'Changing tables.dir_id to nullable',
7
+ 42: 'Add additional_md columns to metadata tables',
8
+ 41: 'Cellmd columns for array and json columns',
9
+ 40: 'Convert error property columns to cellmd columns',
10
+ 39: 'ColumnHandles in external stores',
11
+ 38: 'Added TableMd.view_sn',
12
+ 37: 'Add support for the sample() method on DataFrames',
13
+ 36: 'Added Table.lock_dummy',
14
+ 35: 'Track reference_tbl in ColumnRef',
15
+ 34: 'Set default value for is_pk field in column metadata to False',
16
+ 33: 'Add is_replica field to table metadata',
17
+ 32: 'Add the lock_dummy BIGINT column to the dirs table',
18
+ 31: 'Add table ids to metadata structs',
19
+ 30: 'Store default values and constant arguments as literals',
20
+ 29: 'Add user and additional_md fields to metadata structs',
21
+ 28: 'Enable view creation from DataFrame with select clause',
22
+ 27: 'Enable pxt.query parameterization of limit clauses',
23
+ 26: 'Rename clip_text and clip_image to clip',
24
+ 25: 'Functions with multiple signatures',
25
+ 24: 'Added TableMd/IndexMd.indexed_col_tbl_id',
5
26
  23: 'DataFrame.from_clause',
6
27
  22: 'TableMd/ColumnMd.media_validation',
7
28
  21: 'Separate InlineArray and InlineList',