pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -2,10 +2,11 @@ from __future__ import annotations
2
2
 
3
3
  import datetime
4
4
  import enum
5
- from typing import Union
5
+ import uuid
6
6
 
7
7
  # Python types corresponding to our literal types
8
- LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime]
8
+ LiteralPythonTypes = str | int | float | bool | datetime.datetime | datetime.date | uuid.UUID
9
+
9
10
 
10
11
  def print_slice(s: slice) -> str:
11
12
  start_str = f'{str(s.start) if s.start is not None else ""}'
@@ -35,7 +36,7 @@ class ComparisonOperator(enum.Enum):
35
36
  return '>'
36
37
  if self == self.GE:
37
38
  return '>='
38
- assert False
39
+ raise AssertionError()
39
40
 
40
41
  def reverse(self) -> ComparisonOperator:
41
42
  if self == self.LT:
@@ -61,7 +62,7 @@ class LogicalOperator(enum.Enum):
61
62
  return '|'
62
63
  if self == self.NOT:
63
64
  return '~'
64
- assert False
65
+ raise AssertionError()
65
66
 
66
67
 
67
68
  class ArithmeticOperator(enum.Enum):
@@ -85,4 +86,16 @@ class ArithmeticOperator(enum.Enum):
85
86
  return '%'
86
87
  if self == self.FLOORDIV:
87
88
  return '//'
88
- assert False
89
+ raise AssertionError()
90
+
91
+
92
+ class StringOperator(enum.Enum):
93
+ CONCAT = 0
94
+ REPEAT = 1
95
+
96
+ def __str__(self) -> str:
97
+ if self == self.CONCAT:
98
+ return '+'
99
+ if self == self.REPEAT:
100
+ return '*'
101
+ raise AssertionError()
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Iterable, Optional
3
+ from typing import Any, Iterable
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -16,17 +16,18 @@ from .sql_element_cache import SqlElementCache
16
16
  class InPredicate(Expr):
17
17
  """Predicate corresponding to the SQL IN operator."""
18
18
 
19
- def __init__(self, lhs: Expr, value_set_literal: Optional[Iterable] = None, value_set_expr: Optional[Expr] = None):
19
+ def __init__(self, lhs: Expr, value_set_literal: Iterable | None = None, value_set_expr: Expr | None = None):
20
20
  assert (value_set_literal is None) != (value_set_expr is None)
21
21
  if not lhs.col_type.is_scalar_type():
22
22
  raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
23
23
  super().__init__(ts.BoolType())
24
24
 
25
- self.value_list: Optional[list] = None # only contains values of the correct type
25
+ self.value_list: list | None = None # only contains values of the correct type
26
26
  if value_set_expr is not None:
27
27
  if not value_set_expr.col_type.is_json_type():
28
28
  raise excs.Error(
29
- f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}')
29
+ f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}'
30
+ )
30
31
  self.components = [lhs.copy(), value_set_expr.copy()]
31
32
  else:
32
33
  assert value_set_literal is not None
@@ -70,9 +71,9 @@ class InPredicate(Expr):
70
71
  return self.value_list == other.value_list
71
72
 
72
73
  def _id_attrs(self) -> list[tuple[str, Any]]:
73
- return super()._id_attrs() + [('value_list', self.value_list)]
74
+ return [*super()._id_attrs(), ('value_list', self.value_list)]
74
75
 
75
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
76
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
76
77
  lhs_sql_exprs = sql_elements.get(self.components[0])
77
78
  if lhs_sql_exprs is None or self.value_list is None:
78
79
  return None
@@ -95,4 +96,3 @@ class InPredicate(Expr):
95
96
  assert 'value_list' in d
96
97
  assert len(components) <= 2
97
98
  return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
98
-
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- import copy
4
- from typing import Any, Iterable, Optional
3
+ from typing import Any, Iterable
5
4
 
6
5
  import numpy as np
7
6
  import sqlalchemy as sql
@@ -26,14 +25,14 @@ class InlineArray(Expr):
26
25
  for el in elements:
27
26
  if isinstance(el, Expr):
28
27
  exprs.append(el)
29
- elif isinstance(el, list) or isinstance(el, tuple):
30
- exprs.append(InlineArray(el))
28
+ elif isinstance(el, (list, tuple)):
29
+ exprs.append(Expr.from_array(el))
31
30
  else:
32
31
  exprs.append(Literal(el))
33
32
 
34
- inferred_element_type: Optional[ts.ColumnType] = ts.InvalidType()
33
+ inferred_element_type: ts.ColumnType | None = ts.InvalidType()
35
34
  for i, expr in enumerate(exprs):
36
- supertype = inferred_element_type.supertype(expr.col_type)
35
+ supertype = inferred_element_type.supertype(expr.col_type, for_inference=True)
37
36
  if supertype is None:
38
37
  raise excs.Error(
39
38
  f'Could not infer element type of array: element of type `{expr.col_type}` at index {i} '
@@ -45,10 +44,12 @@ class InlineArray(Expr):
45
44
  col_type = ts.ArrayType((len(exprs),), inferred_element_type)
46
45
  elif inferred_element_type.is_array_type():
47
46
  assert isinstance(inferred_element_type, ts.ArrayType)
48
- col_type = ts.ArrayType(
49
- (len(exprs), *inferred_element_type.shape),
50
- ts.ColumnType.make_type(inferred_element_type.dtype)
51
- )
47
+ dtype = inferred_element_type.dtype
48
+ shape = inferred_element_type.shape
49
+ if shape is not None and dtype is not None:
50
+ col_type = ts.ArrayType(shape=(len(exprs), *shape), dtype=dtype)
51
+ else:
52
+ col_type = ts.ArrayType(shape=None, dtype=dtype)
52
53
  else:
53
54
  raise excs.Error(f'Element type is not a valid dtype for an array: {inferred_element_type}')
54
55
 
@@ -63,7 +64,7 @@ class InlineArray(Expr):
63
64
  def _equals(self, _: InlineArray) -> bool:
64
65
  return True # Always true if components match
65
66
 
66
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
67
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
67
68
  return None
68
69
 
69
70
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -83,6 +84,14 @@ class InlineArray(Expr):
83
84
  # loaded and their types are known.
84
85
  return InlineList(components) # type: ignore[return-value]
85
86
 
87
+ def as_literal(self) -> Literal | None:
88
+ assert isinstance(self.col_type, ts.ArrayType)
89
+ if not all(isinstance(comp, Literal) for comp in self.components):
90
+ return None
91
+ return Literal(
92
+ np.array([c.as_literal().val for c in self.components], dtype=self.col_type.dtype), self.col_type
93
+ )
94
+
86
95
 
87
96
  class InlineList(Expr):
88
97
  """
@@ -90,16 +99,7 @@ class InlineList(Expr):
90
99
  """
91
100
 
92
101
  def __init__(self, elements: Iterable):
93
- exprs = []
94
- for el in elements:
95
- if isinstance(el, Expr):
96
- exprs.append(el)
97
- elif isinstance(el, list) or isinstance(el, tuple):
98
- exprs.append(InlineList(el))
99
- elif isinstance(el, dict):
100
- exprs.append(InlineDict(el))
101
- else:
102
- exprs.append(Literal(el))
102
+ exprs = [Expr.from_object(el) for el in elements]
103
103
 
104
104
  super().__init__(ts.JsonType())
105
105
  self.components.extend(exprs)
@@ -112,7 +112,7 @@ class InlineList(Expr):
112
112
  def _equals(self, _: InlineList) -> bool:
113
113
  return True # Always true if components match
114
114
 
115
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
115
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
116
116
  return None
117
117
 
118
118
  def eval(self, data_row: DataRow, _: RowBuilder) -> None:
@@ -125,6 +125,11 @@ class InlineList(Expr):
125
125
  def _from_dict(cls, _: dict, components: list[Expr]) -> InlineList:
126
126
  return cls(components)
127
127
 
128
+ def as_literal(self) -> Literal | None:
129
+ if not all(isinstance(comp, Literal) for comp in self.components):
130
+ return None
131
+ return Literal([c.as_literal().val for c in self.components], self.col_type)
132
+
128
133
 
129
134
  class InlineDict(Expr):
130
135
  """
@@ -140,21 +145,14 @@ class InlineDict(Expr):
140
145
  if not isinstance(key, str):
141
146
  raise excs.Error(f'Dictionary requires string keys; {key} has type {type(key)}')
142
147
  self.keys.append(key)
143
- if isinstance(val, Expr):
144
- exprs.append(val)
145
- elif isinstance(val, dict):
146
- exprs.append(InlineDict(val))
147
- elif isinstance(val, list) or isinstance(val, tuple):
148
- exprs.append(InlineList(val))
149
- else:
150
- exprs.append(Literal(val))
148
+ exprs.append(Expr.from_object(val))
151
149
 
152
150
  super().__init__(ts.JsonType())
153
151
  self.components.extend(exprs)
154
152
  self.id = self._create_id()
155
153
 
156
154
  def __repr__(self) -> str:
157
- item_strs = list(f"'{key}': {str(expr)}" for key, expr in zip(self.keys, self.components))
155
+ item_strs = [f"'{key}': {expr}" for key, expr in zip(self.keys, self.components)]
158
156
  return '{' + ', '.join(item_strs) + '}'
159
157
 
160
158
  def _equals(self, other: InlineDict) -> bool:
@@ -162,17 +160,14 @@ class InlineDict(Expr):
162
160
  return self.keys == other.keys
163
161
 
164
162
  def _id_attrs(self) -> list[tuple[str, Any]]:
165
- return super()._id_attrs() + [('keys', self.keys)]
163
+ return [*super()._id_attrs(), ('keys', self.keys)]
166
164
 
167
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
165
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
168
166
  return None
169
167
 
170
168
  def eval(self, data_row: DataRow, _: RowBuilder) -> None:
171
169
  assert len(self.keys) == len(self.components)
172
- data_row[self.slot_idx] = {
173
- key: data_row[expr.slot_idx]
174
- for key, expr in zip(self.keys, self.components)
175
- }
170
+ data_row[self.slot_idx] = {key: data_row[expr.slot_idx] for key, expr in zip(self.keys, self.components)}
176
171
 
177
172
  def to_kwargs(self) -> dict[str, Any]:
178
173
  """Deconstructs this expression into a dictionary by recursively unwrapping all Literals,
@@ -198,3 +193,8 @@ class InlineDict(Expr):
198
193
  assert len(d['keys']) == len(components)
199
194
  arg = dict(zip(d['keys'], components))
200
195
  return InlineDict(arg)
196
+
197
+ def as_literal(self) -> Literal | None:
198
+ if not all(isinstance(comp, Literal) for comp in self.components):
199
+ return None
200
+ return Literal(dict(zip(self.keys, (c.as_literal().val for c in self.components))), self.col_type)
@@ -1,11 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional
4
-
5
3
  import sqlalchemy as sql
6
4
 
7
5
  import pixeltable.type_system as ts
8
6
 
7
+ from .column_ref import ColumnRef
9
8
  from .data_row import DataRow
10
9
  from .expr import Expr
11
10
  from .row_builder import RowBuilder
@@ -19,12 +18,17 @@ class IsNull(Expr):
19
18
  self.id = self._create_id()
20
19
 
21
20
  def __repr__(self) -> str:
22
- return f'{str(self.components[0])} == None'
21
+ return f'{self.components[0]} == None'
23
22
 
24
23
  def _equals(self, other: IsNull) -> bool:
25
24
  return True
26
25
 
27
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
26
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
27
+ c = self.components[0]
28
+ if isinstance(c, ColumnRef) and c.col.stores_external_array():
29
+ # we also need to check CellMd.file_urls for null
30
+ e = sql.and_(c.col.sa_cellmd_col['file_urls'] == None, c.col.sa_col == None)
31
+ return e
28
32
  e = sql_elements.get(self.components[0])
29
33
  if e is None:
30
34
  return None
@@ -1,51 +1,143 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional
3
+ from typing import TYPE_CHECKING
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.type_system as ts
8
+
8
9
  from .data_row import DataRow
9
- from .expr import Expr, ExprScope, _GLOBAL_SCOPE
10
+ from .expr import _GLOBAL_SCOPE, Expr, ExprScope
10
11
  from .row_builder import RowBuilder
11
12
  from .sql_element_cache import SqlElementCache
12
13
 
14
+ if TYPE_CHECKING:
15
+ from .object_ref import ObjectRef
16
+
13
17
 
14
18
  class JsonMapper(Expr):
15
19
  """
16
20
  JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
17
21
  The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
18
22
  is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
23
+
24
+ JsonMapper is executed in two phases:
25
+ - the first phase is handled by Expr subclass JsonMapperDispatch, which constructs one nested DataRow per source
26
+ list element and evaluates the target expr within that (the nested DataRows are stored as a NestedRowList in the
27
+ slot of JsonMapperDispatch)
28
+ - JsonMapper.eval() collects the slot values of the target expr into its result list
19
29
  """
20
- def __init__(self, src_expr: Expr, target_expr: Expr):
30
+
31
+ target_expr_scope: ExprScope
32
+ parent_mapper: JsonMapper | None
33
+ target_expr_eval_ctx: RowBuilder.EvalCtx | None
34
+
35
+ def __init__(self, src_expr: Expr | None, target_expr: Expr | None):
21
36
  # TODO: type spec should be list[target_expr.col_type]
22
37
  super().__init__(ts.JsonType())
23
38
 
39
+ dispatch = JsonMapperDispatch(src_expr, target_expr)
40
+ self.components.append(dispatch)
41
+ self.id = self._create_id()
42
+
43
+ def __repr__(self) -> str:
44
+ return f'map({self._src_expr}, lambda R: {self._target_expr})'
45
+
46
+ @property
47
+ def _src_expr(self) -> Expr:
48
+ return self.components[0].src_expr
49
+
50
+ @property
51
+ def _target_expr(self) -> Expr:
52
+ return self.components[0].target_expr
53
+
54
+ def _equals(self, _: JsonMapper) -> bool:
55
+ return True
56
+
57
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
58
+ return None
59
+
60
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
61
+ from ..exec.expr_eval.evaluators import NestedRowList
62
+
63
+ dispatch_slot_idx = self.components[0].slot_idx
64
+ nested_rows = data_row.vals[dispatch_slot_idx]
65
+ if nested_rows is None:
66
+ data_row[self.slot_idx] = None
67
+ return
68
+ assert isinstance(nested_rows, NestedRowList)
69
+ # TODO: get the materialized slot idx, instead of relying on the fact that the target_expr is always at the end
70
+ data_row[self.slot_idx] = [row.vals[-1] for row in nested_rows.rows]
71
+
72
+ def _as_dict(self) -> dict:
73
+ """
74
+ We only serialize src and target exprs, everything else is re-created at runtime.
75
+ """
76
+ return {'components': [self._src_expr.as_dict(), self._target_expr.as_dict()]}
77
+
78
+ @classmethod
79
+ def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
80
+ assert len(components) == 2
81
+ src_expr, target_expr = components[0], components[1]
82
+ return cls(src_expr, target_expr)
83
+
84
+
85
+ class JsonMapperDispatch(Expr):
86
+ """
87
+ An operational Expr (ie, it doesn't represent any syntactic element) that is used by JsonMapper to materialize
88
+ its input DataRows. It has the same dependencies as the originating JsonMapper.
89
+
90
+ - The execution (= row dispatch) is handled by an expr_eval.Evaluator (JsonMapperDispatcher).
91
+ - It stores a NestedRowList instance in its slot.
92
+ """
93
+
94
+ target_expr_scope: ExprScope
95
+ parent_mapper: JsonMapperDispatch | None
96
+ target_expr_eval_ctx: RowBuilder.EvalCtx | None
97
+
98
+ def __init__(self, src_expr: Expr, target_expr: Expr):
99
+ super().__init__(ts.InvalidType())
100
+
24
101
  # we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
25
102
  # this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
26
103
  self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
27
104
 
28
105
  from .object_ref import ObjectRef
29
- scope_anchor = ObjectRef(self.target_expr_scope, self)
30
- self.components = [src_expr, target_expr, scope_anchor]
31
- self.parent_mapper: Optional[JsonMapper] = None
32
- self.target_expr_eval_ctx: Optional[RowBuilder.EvalCtx] = None
106
+
107
+ self.components = [src_expr, target_expr]
108
+ self.parent_mapper = None
109
+ self.target_expr_eval_ctx = None
110
+
111
+ # Intentionally create the id now, before adding the scope anchor; this ensures that JsonMapperDispatch
112
+ # instances will be recognized as equal so long as they have the same src_expr and target_expr.
113
+ # TODO: Might this cause problems after certain substitutions?
33
114
  self.id = self._create_id()
34
115
 
35
- def bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
36
- self._src_expr.bind_rel_paths(mapper)
37
- self._target_expr.bind_rel_paths(self)
116
+ scope_anchor = ObjectRef(self.target_expr_scope, self)
117
+ self.components.append(scope_anchor)
118
+
119
+ def _bind_rel_paths(self, mapper: JsonMapperDispatch | None = None) -> None:
120
+ self.src_expr._bind_rel_paths(mapper)
121
+ self.target_expr._bind_rel_paths(self)
38
122
  self.parent_mapper = mapper
39
123
  parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
40
124
  self.target_expr_scope.parent = parent_scope
41
125
 
126
+ def equals(self, other: Expr) -> bool:
127
+ """
128
+ We override equals() because we need to avoid comparing our scope anchor.
129
+ """
130
+ if type(self) is not type(other):
131
+ return False
132
+ return self.src_expr.equals(other.src_expr) and self.target_expr.equals(other.target_expr)
133
+
42
134
  def scope(self) -> ExprScope:
43
135
  # need to ignore target_expr
44
- return self._src_expr.scope()
136
+ return self.src_expr.scope()
45
137
 
46
138
  def dependencies(self) -> list[Expr]:
47
- result = [self._src_expr]
48
- result.extend(self._target_dependencies(self._target_expr))
139
+ result = [self.src_expr]
140
+ result.extend(self._target_dependencies(self.target_expr))
49
141
  return result
50
142
 
51
143
  def _target_dependencies(self, e: Expr) -> list[Expr]:
@@ -61,61 +153,35 @@ class JsonMapper(Expr):
61
153
  result.extend(self._target_dependencies(c))
62
154
  return result
63
155
 
64
- def equals(self, other: Expr) -> bool:
65
- """
66
- We override equals() because we need to avoid comparing our scope anchor.
67
- """
68
- if type(self) != type(other):
69
- return False
70
- return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
71
-
72
- def __repr__(self) -> str:
73
- return f'{str(self._src_expr)} >> {str(self._target_expr)}'
74
-
75
156
  @property
76
- def _src_expr(self) -> Expr:
157
+ def src_expr(self) -> Expr:
77
158
  return self.components[0]
78
159
 
79
160
  @property
80
- def _target_expr(self) -> Expr:
161
+ def target_expr(self) -> Expr:
81
162
  return self.components[1]
82
163
 
83
164
  @property
84
- def scope_anchor(self) -> Expr:
85
- return self.components[2]
165
+ def scope_anchor(self) -> 'ObjectRef':
166
+ from .object_ref import ObjectRef
86
167
 
87
- def _equals(self, _: JsonMapper) -> bool:
88
- return True
168
+ result = self.components[2]
169
+ assert isinstance(result, ObjectRef)
170
+ return result
89
171
 
90
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
91
- return None
172
+ def __repr__(self) -> str:
173
+ return 'JsonMapperDispatch()'
92
174
 
93
175
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
94
- # this will be called, but the value has already been materialized elsewhere
95
- src = data_row[self._src_expr.slot_idx]
96
- if not isinstance(src, list):
97
- # invalid/non-list src path
98
- data_row[self.slot_idx] = None
99
- return
100
-
101
- result = [None] * len(src)
102
- if self.target_expr_eval_ctx is None:
103
- self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
104
- for i, val in enumerate(src):
105
- data_row[self.scope_anchor.slot_idx] = val
106
- # stored target_expr
107
- row_builder.eval(data_row, self.target_expr_eval_ctx)
108
- result[i] = data_row[self._target_expr.slot_idx]
109
- data_row[self.slot_idx] = result
176
+ # eval is handled by JsonMapperDispatcher
177
+ raise AssertionError('this should never be called')
110
178
 
111
179
  def _as_dict(self) -> dict:
112
180
  """
113
- We need to avoid serializing component[2], which is an ObjectRef.
181
+ JsonMapperDispatch instances are only created by the JsonMapper c'tor and never need to be serialized.
114
182
  """
115
- return {'components': [c.as_dict() for c in self.components[0:2]]}
183
+ raise AssertionError('this should never be called')
116
184
 
117
185
  @classmethod
118
- def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
119
- assert len(components) == 2
120
- return cls(components[0], components[1])
121
-
186
+ def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapperDispatch:
187
+ raise AssertionError('this should never be called')