pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,53 +1,78 @@
1
1
  import dataclasses
2
2
  import importlib
3
+ import logging
3
4
  import os
4
5
  import pkgutil
5
6
  from typing import Callable
6
7
 
7
8
  import sqlalchemy as sql
8
- import sqlalchemy.orm as orm
9
+ from sqlalchemy import orm
10
+
11
+ import pixeltable as pxt
12
+ import pixeltable.exceptions as excs
13
+ from pixeltable.utils.console_output import ConsoleLogger
9
14
 
10
15
  from .schema import SystemInfo, SystemInfoMd
11
16
 
17
+ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
18
+ _logger = logging.getLogger('pixeltable')
19
+
12
20
  # current version of the metadata; this is incremented whenever the metadata schema changes
13
- VERSION = 23
21
+ VERSION = 44
14
22
 
15
23
 
16
24
  def create_system_info(engine: sql.engine.Engine) -> None:
17
25
  """Create the system metadata record"""
18
26
  system_md = SystemInfoMd(schema_version=VERSION)
19
27
  record = SystemInfo(md=dataclasses.asdict(system_md))
28
+ _logger.debug(f'Creating pixeltable system info record {record}')
20
29
  with orm.Session(engine, future=True) as session:
21
- session.add(record)
22
- session.flush()
23
- session.commit()
30
+ # Write system metadata only once for idempotency
31
+ if session.query(SystemInfo).count() == 0:
32
+ session.add(record)
33
+ session.flush()
34
+ session.commit()
35
+
24
36
 
25
37
  # conversion functions for upgrading the metadata schema from one version to the following
26
38
  # key: old schema version
27
39
  converter_cbs: dict[int, Callable[[sql.engine.Engine], None]] = {}
28
40
 
41
+
29
42
  def register_converter(version: int) -> Callable[[Callable[[sql.engine.Engine], None]], None]:
30
43
  def decorator(fn: Callable[[sql.engine.Engine], None]) -> None:
31
- global converter_cbs
44
+ assert version not in converter_cbs
32
45
  converter_cbs[version] = fn
46
+
33
47
  return decorator
34
48
 
49
+
35
50
  # load all converter modules
36
51
  for _, modname, _ in pkgutil.iter_modules([os.path.dirname(__file__) + '/converters']):
37
52
  importlib.import_module('pixeltable.metadata.converters.' + modname)
38
53
 
54
+
39
55
  def upgrade_md(engine: sql.engine.Engine) -> None:
40
56
  """Upgrade the metadata schema to the current version"""
41
57
  with orm.Session(engine) as session:
42
- system_info = session.query(SystemInfo).one().md
58
+ # Get exclusive lock on SystemInfo row
59
+ system_info = session.query(SystemInfo).with_for_update().one().md
43
60
  md_version = system_info['schema_version']
44
61
  assert isinstance(md_version, int)
62
+ _logger.info(f'Current database version: {md_version}, installed version: {VERSION}')
63
+ if md_version > VERSION:
64
+ raise excs.Error(
65
+ 'This Pixeltable database was created with a newer Pixeltable version '
66
+ f'than the one currently installed ({pxt.__version__}).\n'
67
+ 'Please update to the latest Pixeltable version by running: pip install --upgrade pixeltable'
68
+ )
45
69
  if md_version == VERSION:
46
70
  return
47
71
  while md_version < VERSION:
48
72
  if md_version not in converter_cbs:
49
73
  raise RuntimeError(f'No metadata converter for version {md_version}')
50
- print(f'Converting metadata from version {md_version} to {md_version + 1}')
74
+ # We can't use the console logger in Env, because Env might not have been initialized yet.
75
+ _console_logger.info(f'Converting metadata from version {md_version} to {md_version + 1}')
51
76
  converter_cbs[md_version](engine)
52
77
  md_version += 1
53
78
  # update system info
@@ -1,16 +1,15 @@
1
1
  import sqlalchemy as sql
2
2
 
3
- from pixeltable.metadata.schema import Table, TableSchemaVersion
4
3
  from pixeltable.metadata import register_converter
4
+ from pixeltable.metadata.schema import Table, TableSchemaVersion
5
5
 
6
6
 
7
7
  @register_converter(version=10)
8
8
  def _(engine: sql.engine.Engine) -> None:
9
- default_table_attrs = {"comment": None, "num_retained_versions": 10}
9
+ default_table_attrs = {'comment': None, 'num_retained_versions': 10}
10
10
  with engine.begin() as conn:
11
11
  # Because `parameters` wasn't actually used for anything,
12
12
  # we can simply delete it without any data loss.
13
13
  conn.execute(sql.update(Table).values(md=Table.md - 'parameters'))
14
14
  # Add `table_attrs` to all instances of tableschemaversions.md.
15
15
  conn.execute(sql.update(TableSchemaVersion).values(md=TableSchemaVersion.md.concat(default_table_attrs)))
16
- return
@@ -12,9 +12,9 @@ _logger = logging.getLogger('pixeltable')
12
12
  @register_converter(version=13)
13
13
  def _(engine: sql.engine.Engine) -> None:
14
14
  with engine.begin() as conn:
15
- for row in conn.execute(sql.select(Table)):
15
+ for row in conn.execute(sql.select(Table.id, Table.md)):
16
16
  id = row[0]
17
- md = row[2]
17
+ md = row[1]
18
18
  updated_md = __update_md(md)
19
19
  if updated_md != md:
20
20
  _logger.info(f'Updating schema for table: {id}')
@@ -1,4 +1,3 @@
1
-
2
1
  import inspect
3
2
  import logging
4
3
  from typing import Any
@@ -6,8 +5,6 @@ from typing import Any
6
5
  import cloudpickle # type: ignore[import-untyped]
7
6
  import sqlalchemy as sql
8
7
 
9
- import pixeltable.func as func
10
- import pixeltable.type_system as ts
11
8
  from pixeltable.metadata import register_converter
12
9
  from pixeltable.metadata.schema import Function
13
10
 
@@ -18,7 +15,7 @@ _logger = logging.getLogger('pixeltable')
18
15
  def _(engine: sql.engine.Engine) -> None:
19
16
  with engine.begin() as conn:
20
17
  for row in conn.execute(sql.select(Function)):
21
- id, dir_id, md, binary_obj = row
18
+ id, _, md, binary_obj = row
22
19
  md['md'] = __update_md(md['md'], binary_obj)
23
20
  _logger.info(f'Updating function: {id}')
24
21
  conn.execute(sql.update(Function).where(Function.id == id).values(md=md))
@@ -28,17 +25,24 @@ def __update_md(orig_d: dict, binary_obj: bytes) -> Any:
28
25
  # construct dict produced by CallableFunction.to_store()
29
26
  py_fn = cloudpickle.loads(binary_obj)
30
27
  py_params = inspect.signature(py_fn).parameters
31
- return_type = ts.ColumnType.from_dict(orig_d['return_type'])
32
- params: list[func.Parameter] = []
28
+ return_type = orig_d['return_type']
29
+ params: list[dict] = []
33
30
  for name, col_type_dict, kind_int, is_batched in orig_d['parameters']:
34
- col_type = ts.ColumnType.from_dict(col_type_dict) if col_type_dict is not None else None
35
31
  default = py_params[name].default
36
- kind = inspect._ParameterKind(kind_int) # is there a way to avoid referencing a private type?
37
- params.append(func.Parameter(name=name, col_type=col_type, kind=kind, default=default, is_batched=is_batched))
32
+ kind = inspect._ParameterKind(kind_int)
33
+ params.append(
34
+ {
35
+ 'name': name,
36
+ 'col_type': col_type_dict,
37
+ 'kind': str(kind),
38
+ 'is_batched': is_batched,
39
+ 'has_default': default is not inspect.Parameter.empty,
40
+ 'default': None if default is inspect.Parameter.empty else default,
41
+ }
42
+ )
38
43
  is_batched = 'batch_size' in orig_d
39
- sig = func.Signature(return_type, params, is_batched=is_batched)
40
44
  d = {
41
- 'signature': sig.as_dict(),
45
+ 'signature': {'return_type': return_type, 'parameters': params, 'is_batched': is_batched},
42
46
  'batch_size': orig_d['batch_size'] if is_batched else None,
43
47
  }
44
48
  return d
@@ -1,3 +1,5 @@
1
+ from uuid import UUID
2
+
1
3
  import sqlalchemy as sql
2
4
 
3
5
  from pixeltable.metadata import register_converter
@@ -6,13 +8,10 @@ from pixeltable.metadata.converters.util import convert_table_md
6
8
 
7
9
  @register_converter(version=16)
8
10
  def _(engine: sql.engine.Engine) -> None:
9
- convert_table_md(
10
- engine,
11
- table_md_updater=__update_table_md
12
- )
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
13
12
 
14
13
 
15
- def __update_table_md(table_md: dict) -> None:
14
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
16
15
  # External stores are not migratable; just drop them
17
16
  del table_md['remotes']
18
17
  table_md['external_stores'] = {}
@@ -1,3 +1,5 @@
1
+ from uuid import UUID
2
+
1
3
  import sqlalchemy as sql
2
4
 
3
5
  from pixeltable.metadata import register_converter
@@ -6,13 +8,10 @@ from pixeltable.metadata.converters.util import convert_table_md
6
8
 
7
9
  @register_converter(version=17)
8
10
  def _(engine: sql.engine.Engine) -> None:
9
- convert_table_md(
10
- engine,
11
- table_md_updater=__update_table_md
12
- )
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
13
12
 
14
13
 
15
- def __update_table_md(table_md: dict) -> None:
14
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
16
15
  # key changes in IndexMd.init_args: img_embed -> image_embed, txt_embed -> string_embed
17
16
  if len(table_md['index_md']) == 0:
18
17
  return
@@ -1,4 +1,5 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
+
2
3
  import sqlalchemy as sql
3
4
 
4
5
  from pixeltable.metadata import register_converter
@@ -7,13 +8,10 @@ from pixeltable.metadata.converters.util import convert_table_md
7
8
 
8
9
  @register_converter(version=18)
9
10
  def _(engine: sql.engine.Engine) -> None:
10
- convert_table_md(
11
- engine,
12
- substitution_fn=__substitute_md
13
- )
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
14
12
 
15
13
 
16
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
17
15
  # Migrate a few changed function names
18
16
  if k == 'path' and v == 'pixeltable.functions.string.str_format':
19
17
  return 'path', 'pixeltable.functions.string.format'
@@ -1,9 +1,9 @@
1
1
  import datetime
2
- from typing import Any, Optional
2
+ from typing import Any
3
3
 
4
4
  import sqlalchemy as sql
5
5
 
6
- import pixeltable as pxt
6
+ import pixeltable.type_system as ts
7
7
  from pixeltable.metadata import register_converter, schema
8
8
  from pixeltable.metadata.converters.util import convert_table_md
9
9
 
@@ -22,22 +22,19 @@ def _(engine: sql.engine.Engine) -> None:
22
22
  store_name = f'{store_prefix}_{id.hex}'
23
23
  column_md = md['column_md']
24
24
  timestamp_cols = [
25
- col_id for col_id, col in column_md.items()
26
- if col['col_type']['_classname'] == 'TimestampType'
25
+ col_id for col_id, col in column_md.items() if col['col_type']['_classname'] == 'TimestampType'
27
26
  ]
28
27
  for col_id in timestamp_cols:
29
- conn.execute(
30
- sql.text(f'ALTER TABLE {store_name} ALTER COLUMN col_{col_id} TYPE TIMESTAMPTZ')
31
- )
28
+ conn.execute(sql.text(f'ALTER TABLE {store_name} ALTER COLUMN col_{col_id} TYPE TIMESTAMPTZ'))
32
29
 
33
30
 
34
- def __update_timestamp_literals(k: Any, v: Any) -> Optional[tuple[Any, Any]]:
31
+ def __update_timestamp_literals(k: Any, v: Any) -> tuple[Any, Any] | None:
35
32
  if isinstance(v, dict) and 'val_t' in v:
36
33
  # It's a literal with an explicit 'val_t' field. In version 19 this can only mean a
37
34
  # timestamp literal, which (in version 19) is stored in the DB as a naive datetime.
38
35
  # We convert it to an aware datetime, stored in UTC.
39
36
  assert v['_classname'] == 'Literal'
40
- assert v['val_t'] == pxt.ColumnType.Type.TIMESTAMP.name
37
+ assert v['val_t'] == ts.ColumnType.Type.TIMESTAMP.name
41
38
  assert isinstance(v['val'], str)
42
39
  dt = datetime.datetime.fromisoformat(v['val'])
43
40
  assert dt.tzinfo is None # In version 19 all timestamps are naive
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -8,13 +8,10 @@ from pixeltable.metadata.converters.util import convert_table_md
8
8
 
9
9
  @register_converter(version=20)
10
10
  def _(engine: sql.engine.Engine) -> None:
11
- convert_table_md(
12
- engine,
13
- substitution_fn=__substitute_md
14
- )
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
15
12
 
16
13
 
17
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
18
15
  if isinstance(v, dict) and '_classname' in v:
19
16
  # The way InlineArray is represented changed in v20. Previously, literal values were stored
20
17
  # directly in the Inline expr; now we store them in Literal sub-exprs. This converter
@@ -1,8 +1,9 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
+
2
3
  import sqlalchemy as sql
3
4
 
4
5
  from pixeltable.metadata import register_converter
5
- from pixeltable.metadata.converters.util import convert_table_schema_version_md, convert_table_md
6
+ from pixeltable.metadata.converters.util import convert_table_md, convert_table_schema_version_md
6
7
 
7
8
 
8
9
  @register_converter(version=21)
@@ -10,12 +11,9 @@ def _(engine: sql.engine.Engine) -> None:
10
11
  convert_table_schema_version_md(
11
12
  engine,
12
13
  table_schema_version_md_updater=__update_table_schema_version,
13
- schema_column_updater=__update_schema_column
14
- )
15
- convert_table_md(
16
- engine,
17
- substitution_fn=__substitute_md
14
+ schema_column_updater=__update_schema_column,
18
15
  )
16
+ convert_table_md(engine, substitution_fn=__substitute_md)
19
17
 
20
18
 
21
19
  def __update_table_schema_version(table_schema_version_md: dict) -> None:
@@ -26,7 +24,7 @@ def __update_schema_column(schema_column: dict) -> None:
26
24
  schema_column['media_validation'] = None
27
25
 
28
26
 
29
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
27
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
30
28
  if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
31
29
  if 'perform_validation' not in v:
32
30
  v['perform_validation'] = False
@@ -1,4 +1,5 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
+
2
3
  import sqlalchemy as sql
3
4
 
4
5
  from pixeltable.metadata import register_converter
@@ -10,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
10
11
  convert_table_md(engine, substitution_fn=__substitute_md)
11
12
 
12
13
 
13
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
14
15
  if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'DataFrame':
15
16
  v['from_clause'] = {'tbls': [v['tbl']], 'join_clauses': []}
16
17
  return k, v
@@ -0,0 +1,33 @@
1
+ import logging
2
+ from uuid import UUID
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+ _logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ @register_converter(version=23)
13
+ def _(engine: sql.engine.Engine) -> None:
14
+ convert_table_md(engine, table_md_updater=__update_table_md)
15
+
16
+
17
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
18
+ """update the index metadata to add indexed_col_tbl_id column if it is missing
19
+
20
+ Args:
21
+ table_md (dict): copy of the original table metadata. this gets updated in place.
22
+ table_id (UUID): the table id
23
+
24
+ """
25
+ if len(table_md['index_md']) == 0:
26
+ return
27
+ for idx_md in table_md['index_md'].values():
28
+ if 'indexed_col_tbl_id' not in idx_md:
29
+ # index metadata is missing indexed_col_tbl_id
30
+ # assume that the indexed column is in the same table
31
+ # and update the index metadata.
32
+ _logger.info(f'Updating index metadata for table: {table_id} index: {idx_md["id"]}')
33
+ idx_md['indexed_col_tbl_id'] = str(table_id)
@@ -0,0 +1,55 @@
1
+ from typing import Any
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=24)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
+ from pixeltable import func
16
+ from pixeltable.func.globals import resolve_symbol
17
+
18
+ if (
19
+ isinstance(v, dict)
20
+ and '_classpath' in v
21
+ and v['_classpath']
22
+ in (
23
+ 'pixeltable.func.callable_function.CallableFunction',
24
+ 'pixeltable.func.aggregate_function.AggregateFunction',
25
+ 'pixeltable.func.expr_template_function.ExprTemplateFunction',
26
+ )
27
+ ):
28
+ if 'path' in v:
29
+ assert 'signature' not in v
30
+ f = resolve_symbol(__substitute_path(v['path']))
31
+ assert isinstance(f, func.Function)
32
+ v['signature'] = f.signatures[0].as_dict()
33
+ return k, v
34
+
35
+ if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'FunctionCall':
36
+ # Correct an older serialization mechanism where Expr elements of FunctionCall args and
37
+ # kwargs were indicated with idx == -1 rather than None. This was fixed for InlineList
38
+ # and InlineDict back in convert_20, but not for FunctionCall.
39
+ assert 'args' in v and isinstance(v['args'], list)
40
+ assert 'kwargs' in v and isinstance(v['kwargs'], dict)
41
+ v['args'] = [(None, arg) if idx == -1 else (idx, arg) for idx, arg in v['args']]
42
+ v['kwargs'] = {k: (None, arg) if idx == -1 else (idx, arg) for k, (idx, arg) in v['kwargs'].items()}
43
+ return k, v
44
+
45
+ return None
46
+
47
+
48
+ def __substitute_path(path: str) -> str:
49
+ # Starting with version 25, function signatures are preserved in metadata. To migrate from older
50
+ # versions, it's necessary to resolve the function symbol to get the signature. The following
51
+ # adjustment is necessary for function names that are stored in db artifacts of version < 25, but
52
+ # have changed in some version > 25.
53
+ if path in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image'):
54
+ return 'pixeltable.functions.huggingface.clip'
55
+ return path
@@ -0,0 +1,19 @@
1
+ from typing import Any
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=25)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
+ if k == 'path' and (
16
+ v in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image')
17
+ ):
18
+ return 'path', 'pixeltable.functions.huggingface.clip'
19
+ return None
@@ -0,0 +1,23 @@
1
+ from typing import Any
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=26)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
+ import pixeltable.type_system as ts
16
+ from pixeltable.exprs.literal import Literal
17
+
18
+ if k == 'limit_val' and v is not None:
19
+ assert isinstance(v, int)
20
+ newv = Literal(v, ts.IntType(nullable=False)).as_dict()
21
+ return k, newv
22
+
23
+ return None
@@ -0,0 +1,29 @@
1
+ import logging
2
+ from uuid import UUID
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+ _logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ @register_converter(version=27)
13
+ def _(engine: sql.engine.Engine) -> None:
14
+ convert_table_md(engine, table_md_updater=__update_table_md)
15
+
16
+
17
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
18
+ """Update the view metadata to add the include_base_columns boolean if it is missing
19
+
20
+ Args:
21
+ table_md (dict): copy of the original table metadata. this gets updated in place.
22
+ table_id (UUID): the table id
23
+
24
+ """
25
+ if table_md['view_md'] is None:
26
+ return
27
+ if 'include_base_columns' not in table_md['view_md']:
28
+ table_md['view_md']['include_base_columns'] = True
29
+ _logger.info(f'Updating view metadata for table: {table_id}')
@@ -0,0 +1,13 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+ from pixeltable.metadata.schema import Dir, Table, TableSchemaVersion, TableVersion
5
+
6
+
7
+ @register_converter(version=28)
8
+ def _(engine: sql.engine.Engine) -> None:
9
+ with engine.begin() as conn:
10
+ conn.execute(sql.update(Dir).values(md=Dir.md.concat({'user': None, 'additional_md': {}})))
11
+ conn.execute(sql.update(Table).values(md=Table.md.concat({'user': None, 'additional_md': {}})))
12
+ conn.execute(sql.update(TableVersion).values(md=TableVersion.md.concat({'additional_md': {}})))
13
+ conn.execute(sql.update(TableSchemaVersion).values(md=TableSchemaVersion.md.concat({'additional_md': {}})))
@@ -0,0 +1,110 @@
1
+ from typing import Any
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable import exprs
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+
10
+ @register_converter(version=29)
11
+ def _(engine: sql.engine.Engine) -> None:
12
+ convert_table_md(engine, substitution_fn=__substitute_md)
13
+
14
+
15
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
16
+ # Defaults are now stored as literals in signatures
17
+ if k == 'parameters':
18
+ for param in v:
19
+ assert isinstance(param, dict)
20
+ has_default = param.get('has_default') or (param.get('default') is not None)
21
+ if 'has_default' in param:
22
+ del param['has_default']
23
+ literal = exprs.Expr.from_object(param['default']) if has_default else None
24
+ assert literal is None or isinstance(literal, exprs.Literal)
25
+ param['default'] = None if literal is None else literal.as_dict()
26
+ return k, v
27
+
28
+ # Method of organizing argument expressions has changed
29
+ if isinstance(v, dict) and v.get('_classname') == 'FunctionCall':
30
+ args = v['args']
31
+ kwargs = v['kwargs']
32
+ components = v['components']
33
+ group_by_start_idx = v['group_by_start_idx']
34
+ group_by_stop_idx = v['group_by_stop_idx']
35
+ order_by_start_idx = v['order_by_start_idx']
36
+
37
+ new_args = []
38
+ for arg in args:
39
+ if arg[0] is not None:
40
+ assert isinstance(arg[0], int)
41
+ new_args.append(components[arg[0]])
42
+ else:
43
+ literal = exprs.Expr.from_object(arg[1])
44
+ new_args.append(literal.as_dict())
45
+
46
+ new_kwargs = {}
47
+ for name, kwarg in kwargs.items():
48
+ if kwarg[0] is not None:
49
+ assert isinstance(kwarg[0], int)
50
+ new_kwargs[name] = components[kwarg[0]]
51
+ else:
52
+ literal = exprs.Expr.from_object(kwarg[1])
53
+ new_kwargs[name] = literal.as_dict()
54
+
55
+ # We need to expand ("unroll") any var-args or var-kwargs.
56
+
57
+ new_args_len = len(new_args)
58
+ rolled_args: dict | None = None
59
+ rolled_kwargs: dict | None = None
60
+
61
+ if 'signature' in v['fn']:
62
+ # If it's a pickled function, there's no signature, so we're out of luck; varargs in a pickled function
63
+ # is an edge case that won't migrate properly.
64
+ parameters: list[dict] = v['fn']['signature']['parameters']
65
+ for i, param in enumerate(parameters):
66
+ if param['kind'] == 'VAR_POSITIONAL' and new_args_len > i:
67
+ # For peculiar historical reasons, variable kwargs might show up in args. Thus variable
68
+ # positional args is not necessarily the last element of args; it might be the second-to-last.
69
+ assert new_args_len <= i + 2, new_args
70
+ rolled_args = new_args[i]
71
+ new_args = new_args[:i] + new_args[i + 1 :]
72
+ if param['kind'] == 'VAR_KEYWORD':
73
+ # As noted above, variable kwargs might show up either in args or in kwargs. If it's in args, it
74
+ # is necessarily the last element.
75
+ if new_args_len > i:
76
+ assert new_args_len <= i + 1, new_args
77
+ rolled_kwargs = new_args.pop()
78
+ if param['name'] in kwargs:
79
+ assert rolled_kwargs is None
80
+ rolled_kwargs = kwargs.pop(param['name'])
81
+
82
+ if rolled_args is not None:
83
+ assert rolled_args['_classname'] in ('InlineArray', 'InlineList')
84
+ new_args.extend(rolled_args['components'])
85
+ if rolled_kwargs is not None:
86
+ assert rolled_kwargs['_classname'] == 'InlineDict'
87
+ new_kwargs.update(zip(rolled_kwargs['keys'], rolled_kwargs['components']))
88
+
89
+ group_by_exprs = [components[i] for i in range(group_by_start_idx, group_by_stop_idx)]
90
+ order_by_exprs = [components[i] for i in range(order_by_start_idx, len(components))]
91
+
92
+ new_components = [*new_args, *new_kwargs.values(), *group_by_exprs, *order_by_exprs]
93
+
94
+ newv = {
95
+ 'fn': v['fn'],
96
+ 'arg_idxs': list(range(len(new_args))),
97
+ 'kwarg_idxs': {name: i + len(new_args) for i, name in enumerate(new_kwargs.keys())},
98
+ 'group_by_start_idx': len(new_args) + len(new_kwargs),
99
+ 'group_by_stop_idx': len(new_args) + len(new_kwargs) + len(group_by_exprs),
100
+ 'order_by_start_idx': len(new_args) + len(new_kwargs) + len(group_by_exprs),
101
+ 'is_method_call': False,
102
+ '_classname': 'FunctionCall',
103
+ 'components': new_components,
104
+ }
105
+ if 'return_type' in v:
106
+ newv['return_type'] = v['return_type']
107
+
108
+ return k, newv
109
+
110
+ return None