pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,59 +1,75 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional, Union
3
+ import io
4
+ from pathlib import Path
5
+ from typing import Any
4
6
 
5
7
  import jmespath
6
8
  import sqlalchemy as sql
7
9
 
8
- import pixeltable as pxt
9
- import pixeltable.catalog as catalog
10
- import pixeltable.exceptions as excs
11
- import pixeltable.type_system as ts
10
+ from pixeltable import catalog, exceptions as excs, type_system as ts
12
11
 
12
+ from .column_ref import ColumnRef
13
13
  from .data_row import DataRow
14
14
  from .expr import Expr
15
15
  from .globals import print_slice
16
- from .json_mapper import JsonMapper
16
+ from .json_mapper import JsonMapperDispatch
17
+ from .object_ref import ObjectRef
17
18
  from .row_builder import RowBuilder
18
19
  from .sql_element_cache import SqlElementCache
19
20
 
20
21
 
21
22
  class JsonPath(Expr):
23
+ """
24
+ anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
25
+ scope_idx: for relative paths, index of referenced JsonMapper
26
+ (0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
27
+ """
28
+
29
+ path_elements: list[str | int | slice]
30
+ compiled_path: jmespath.parser.ParsedResult | None
31
+ scope_idx: int
32
+ file_handles: dict[Path, io.BufferedReader] # key: file path
33
+
22
34
  def __init__(
23
- self,
24
- anchor: Optional['pxt.exprs.Expr'],
25
- path_elements: Optional[list[Union[str, int, slice]]] = None,
26
- scope_idx: int = 0
35
+ self, anchor: Expr | None, path_elements: list[str | int | slice] | None = None, scope_idx: int = 0
27
36
  ) -> None:
28
- """
29
- anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
30
- scope_idx: for relative paths, index of referenced JsonMapper
31
- (0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
32
- """
33
37
  if path_elements is None:
34
38
  path_elements = []
35
39
  super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
36
40
  if anchor is not None:
37
41
  self.components = [anchor]
38
- self.path_elements: list[Union[str, int, slice]] = path_elements
42
+ self.path_elements = path_elements
39
43
  self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
40
44
  self.scope_idx = scope_idx
41
45
  # NOTE: the _create_id() result will change if set_anchor() gets called;
42
46
  # this is not a problem, because _create_id() shouldn't be called after init()
43
47
  self.id = self._create_id()
48
+ self.file_handles = {}
49
+
50
+ def release(self) -> None:
51
+ for fh in self.file_handles.values():
52
+ fh.close()
53
+ self.file_handles.clear()
44
54
 
45
55
  def __repr__(self) -> str:
46
- # else "R": the anchor is RELATIVE_PATH_ROOT
47
- return (f'{str(self._anchor) if self._anchor is not None else "R"}'
48
- f'{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}')
56
+ # else 'R': the anchor is RELATIVE_PATH_ROOT
57
+ anchor_str = str(self.anchor) if self.anchor is not None else 'R'
58
+ if len(self.path_elements) == 0:
59
+ return anchor_str
60
+ return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
49
61
 
50
62
  def _as_dict(self) -> dict:
51
- path_elements = [
52
- [el.start, el.stop, el.step] if isinstance(el, slice)
53
- else el
54
- for el in self.path_elements
55
- ]
56
- return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **super()._as_dict()}
63
+ assert len(self.components) <= 1
64
+ components_dict: dict[str, Any]
65
+ if len(self.components) == 0 or isinstance(self.components[0], ObjectRef):
66
+ # If the anchor is an ObjectRef, it means this JsonPath is a bound relative path. We store it as a relative
67
+ # path, *not* a bound path (which has no meaning in the dict).
68
+ components_dict = {}
69
+ else:
70
+ components_dict = super()._as_dict()
71
+ path_elements = [[el.start, el.stop, el.step] if isinstance(el, slice) else el for el in self.path_elements]
72
+ return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **components_dict}
57
73
 
58
74
  @classmethod
59
75
  def _from_dict(cls, d: dict, components: list[Expr]) -> JsonPath:
@@ -61,15 +77,11 @@ class JsonPath(Expr):
61
77
  assert 'scope_idx' in d
62
78
  assert len(components) <= 1
63
79
  anchor = components[0] if len(components) == 1 else None
64
- path_elements = [
65
- slice(el[0], el[1], el[2]) if isinstance(el, list)
66
- else el
67
- for el in d['path_elements']
68
- ]
80
+ path_elements = [slice(el[0], el[1], el[2]) if isinstance(el, list) else el for el in d['path_elements']]
69
81
  return cls(anchor, path_elements, d['scope_idx'])
70
82
 
71
83
  @property
72
- def _anchor(self) -> Optional[Expr]:
84
+ def anchor(self) -> Expr | None:
73
85
  return None if len(self.components) == 0 else self.components[0]
74
86
 
75
87
  def set_anchor(self, anchor: Expr) -> None:
@@ -77,44 +89,42 @@ class JsonPath(Expr):
77
89
  self.components = [anchor]
78
90
 
79
91
  def is_relative_path(self) -> bool:
80
- return self._anchor is None
92
+ return self.anchor is None
81
93
 
82
- def bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
83
- if not self.is_relative_path():
84
- return
85
- # TODO: take scope_idx into account
86
- self.set_anchor(mapper.scope_anchor)
94
+ def _has_relative_path(self) -> bool:
95
+ return self.is_relative_path() or super()._has_relative_path()
96
+
97
+ def _bind_rel_paths(self, mapper: 'JsonMapperDispatch' | None = None) -> None:
98
+ if self.is_relative_path():
99
+ # TODO: take scope_idx into account
100
+ self.set_anchor(mapper.scope_anchor)
101
+ else:
102
+ self.anchor._bind_rel_paths(mapper)
87
103
 
88
104
  def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
89
105
  """
90
106
  Construct a relative path that references an ancestor of the immediately enclosing JsonMapper.
91
107
  """
92
108
  if not self.is_relative_path():
93
- raise excs.Error(f'() for an absolute path is invalid')
109
+ raise excs.Error('() for an absolute path is invalid')
94
110
  if len(args) != 1 or not isinstance(args[0], int) or args[0] >= 0:
95
- raise excs.Error(f'R() requires a negative index')
111
+ raise excs.Error('R() requires a negative index')
96
112
  return JsonPath(None, [], args[0])
97
113
 
98
114
  def __getattr__(self, name: str) -> 'JsonPath':
99
115
  assert isinstance(name, str)
100
- return JsonPath(self._anchor, self.path_elements + [name])
116
+ return JsonPath(self.anchor, [*self.path_elements, name])
101
117
 
102
118
  def __getitem__(self, index: object) -> 'JsonPath':
103
119
  if isinstance(index, (int, slice, str)):
104
- return JsonPath(self._anchor, self.path_elements + [index])
120
+ return JsonPath(self.anchor, [*self.path_elements, index])
105
121
  raise excs.Error(f'Invalid json list index: {index}')
106
122
 
107
- def __rshift__(self, other: object) -> 'JsonMapper':
108
- rhs_expr = Expr.from_object(other)
109
- if rhs_expr is None:
110
- raise excs.Error(f'>> requires an expression on the right-hand side, found {type(other)}')
111
- return JsonMapper(self, rhs_expr)
112
-
113
- def default_column_name(self) -> Optional[str]:
114
- anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
123
+ def default_column_name(self) -> str | None:
124
+ anchor_name = self.anchor.default_column_name() if self.anchor is not None else ''
115
125
  ret_name = f'{anchor_name}.{self._json_path()}'
116
126
 
117
- def cleanup_char(s : str) -> str:
127
+ def cleanup_char(s: str) -> str:
118
128
  if s == '.':
119
129
  return '_'
120
130
  elif s == '*':
@@ -125,8 +135,8 @@ class JsonPath(Expr):
125
135
  return ''
126
136
 
127
137
  clean_name = ''.join(map(cleanup_char, ret_name))
128
- clean_name = clean_name.lstrip('_') # remove leading underscore
129
- if clean_name == '':
138
+ clean_name = clean_name.lstrip('_') # remove leading underscore
139
+ if not clean_name: # Replace '' with None
130
140
  clean_name = None
131
141
 
132
142
  assert clean_name is None or catalog.is_valid_identifier(clean_name)
@@ -136,17 +146,17 @@ class JsonPath(Expr):
136
146
  return self.path_elements == other.path_elements
137
147
 
138
148
  def _id_attrs(self) -> list[tuple[str, Any]]:
139
- return super()._id_attrs() + [('path_elements', self.path_elements)]
149
+ return [*super()._id_attrs(), ('path_elements', self.path_elements)]
140
150
 
141
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
151
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
142
152
  """
143
153
  Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
144
154
  *two* rows (each containing col val 0), not a single row with [0, 0].
145
155
  We need to use a workaround: retrieve the entire dict, then use jmespath to extract the path correctly.
146
156
  """
147
- #path_str = '$.' + '.'.join(self.path_elements)
148
- #assert isinstance(self._anchor(), ColumnRef)
149
- #return sql.func.jsonb_path_query(self._anchor().col.sa_col, path_str)
157
+ # path_str = '$.' + '.'.join(self.path_elements)
158
+ # assert isinstance(self._anchor(), ColumnRef)
159
+ # return sql.func.jsonb_path_query(self._anchor().col.sa_col, path_str)
150
160
  return None
151
161
 
152
162
  def _json_path(self) -> str:
@@ -163,11 +173,31 @@ class JsonPath(Expr):
163
173
  result.append(f'[{print_slice(element)}]')
164
174
  return ''.join(result)
165
175
 
166
- def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
167
- val = data_row[self._anchor.slot_idx]
176
+ def eval(self, row: DataRow, row_builder: RowBuilder) -> None:
177
+ assert self.anchor is not None, self
178
+ val = row[self.anchor.slot_idx]
168
179
  if self.compiled_path is not None:
169
180
  val = self.compiled_path.search(val)
170
- data_row[self.slot_idx] = val
181
+ row[self.slot_idx] = val
182
+ if val is None or self.anchor is None or not isinstance(self.anchor, ColumnRef):
183
+ return
184
+
185
+ # the origin of val is a json-typed column, which might stored inlined objects
186
+ if self.anchor.slot_idx not in row.slot_md:
187
+ # we can infer that there aren't any inlined objects because our execution plan doesn't include
188
+ # materializing the cellmd (eg, insert plans)
189
+ # TODO: have the planner pass that fact into ExprEvalNode explicitly to streamline this path a bit more
190
+ return
191
+
192
+ # defer import until it's needed
193
+ from pixeltable.exec.cell_reconstruction_node import json_has_inlined_objs, reconstruct_json
194
+
195
+ cell_md = row.slot_md[self.anchor.slot_idx]
196
+ if cell_md is None or cell_md.file_urls is None or not json_has_inlined_objs(val):
197
+ # val doesn't contain inlined objects
198
+ return
199
+
200
+ row.vals[self.slot_idx] = reconstruct_json(val, cell_md.file_urls, self.file_handles)
171
201
 
172
202
 
173
203
  RELATIVE_PATH_ROOT = JsonPath(None)
@@ -1,8 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import base64
3
4
  import datetime
4
- from typing import Any, Optional
5
+ import uuid
6
+ from typing import Any
5
7
 
8
+ import numpy as np
6
9
  import sqlalchemy as sql
7
10
 
8
11
  import pixeltable.type_system as ts
@@ -15,7 +18,9 @@ from .sql_element_cache import SqlElementCache
15
18
 
16
19
 
17
20
  class Literal(Expr):
18
- def __init__(self, val: Any, col_type: Optional[ts.ColumnType] = None):
21
+ val: Any
22
+
23
+ def __init__(self, val: Any, col_type: ts.ColumnType | None = None):
19
24
  if col_type is not None:
20
25
  val = col_type.create_literal(val)
21
26
  else:
@@ -33,10 +38,13 @@ class Literal(Expr):
33
38
  val = val.replace(tzinfo=default_tz)
34
39
  # Now convert to UTC
35
40
  val = val.astimezone(datetime.timezone.utc)
41
+ if isinstance(val, tuple):
42
+ # Tuples are stored as a list
43
+ val = list(val)
36
44
  self.val = val
37
45
  self.id = self._create_id()
38
46
 
39
- def default_column_name(self) -> Optional[str]:
47
+ def default_column_name(self) -> str | None:
40
48
  return 'Literal'
41
49
 
42
50
  def __str__(self) -> str:
@@ -46,6 +54,15 @@ class Literal(Expr):
46
54
  assert isinstance(self.val, datetime.datetime)
47
55
  default_tz = Env.get().default_time_zone
48
56
  return f"'{self.val.astimezone(default_tz).isoformat()}'"
57
+ if self.col_type.is_date_type():
58
+ assert isinstance(self.val, datetime.date)
59
+ return f"'{self.val.isoformat()}'"
60
+ if self.col_type.is_uuid_type():
61
+ assert isinstance(self.val, uuid.UUID)
62
+ return f"'{self.val}'"
63
+ if self.col_type.is_array_type():
64
+ assert isinstance(self.val, np.ndarray)
65
+ return str(self.val.tolist())
49
66
  return str(self.val)
50
67
 
51
68
  def __repr__(self) -> str:
@@ -55,19 +72,18 @@ class Literal(Expr):
55
72
  return self.val == other.val
56
73
 
57
74
  def _id_attrs(self) -> list[tuple[str, Any]]:
58
- return super()._id_attrs() + [('val', self.val)]
75
+ return [*super()._id_attrs(), ('val', self.val)]
59
76
 
60
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
61
- # we need to return something here so that we can generate a Where clause for predicates
62
- # that involve literals (like Where c > 0)
63
- return sql.sql.expression.literal(self.val)
77
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
78
+ # Return a sql object so that constants can participate in SQL expressions
79
+ return sql.sql.expression.literal(self.val, type_=self.col_type.to_sa_type())
64
80
 
65
81
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
66
82
  # this will be called, even though sql_expr() does not return None
67
83
  data_row[self.slot_idx] = self.val
68
84
 
69
85
  def _as_dict(self) -> dict:
70
- # For some types, we need to explictly record their type, because JSON does not know
86
+ # For some types, we need to explicitly record their type, because JSON does not know
71
87
  # how to interpret them unambiguously
72
88
  if self.col_type.is_timestamp_type():
73
89
  assert isinstance(self.val, datetime.datetime)
@@ -76,18 +92,47 @@ class Literal(Expr):
76
92
  # stored as UTC in the database)
77
93
  encoded_val = self.val.isoformat()
78
94
  return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
95
+ elif self.col_type.is_date_type():
96
+ assert isinstance(self.val, datetime.date)
97
+ encoded_val = self.val.isoformat()
98
+ return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
99
+ elif self.col_type.is_uuid_type():
100
+ assert isinstance(self.val, uuid.UUID)
101
+ encoded_val = str(self.val)
102
+ return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
103
+ elif self.col_type.is_binary_type():
104
+ assert isinstance(self.val, bytes)
105
+ encoded_val = base64.b64encode(self.val).decode('utf-8')
106
+ return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
107
+ elif self.col_type.is_array_type():
108
+ assert isinstance(self.val, np.ndarray)
109
+ return {'val': self.val.tolist(), 'val_t': self.col_type._type.name, **super()._as_dict()}
79
110
  else:
80
111
  return {'val': self.val, **super()._as_dict()}
81
112
 
113
+ def as_literal(self) -> Literal | None:
114
+ return self
115
+
82
116
  @classmethod
83
117
  def _from_dict(cls, d: dict, components: list[Expr]) -> Literal:
84
118
  assert 'val' in d
85
119
  if 'val_t' in d:
86
120
  val_t = d['val_t']
87
- # Currently the only special-cased literal type is TIMESTAMP
88
- assert val_t == ts.ColumnType.Type.TIMESTAMP.name
89
- dt = datetime.datetime.fromisoformat(d['val'])
90
- assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
91
- return cls(dt)
92
- else:
93
- return cls(d['val'])
121
+ if val_t == ts.ColumnType.Type.DATE.name:
122
+ dt = datetime.date.fromisoformat(d['val'])
123
+ return cls(dt)
124
+ elif val_t == ts.ColumnType.Type.TIMESTAMP.name:
125
+ dt = datetime.datetime.fromisoformat(d['val'])
126
+ assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
127
+ return cls(dt)
128
+ elif val_t == ts.ColumnType.Type.UUID.name:
129
+ uuid_val = uuid.UUID(d['val'])
130
+ return cls(uuid_val)
131
+ elif val_t == ts.ColumnType.Type.BINARY.name:
132
+ assert isinstance(d['val'], str)
133
+ bytes_val = base64.b64decode(d['val'].encode('utf-8'))
134
+ return cls(bytes_val)
135
+ elif val_t == ts.ColumnType.Type.ARRAY.name:
136
+ arrays = np.array(d['val'])
137
+ return cls(arrays)
138
+ return cls(d['val'])
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -19,10 +19,11 @@ class MethodRef(Expr):
19
19
  When a `MethodRef` is called, it returns a `FunctionCall` with the base expression as the first argument.
20
20
  The effective arity of a `MethodRef` is one less than the arity of the underlying `Function`.
21
21
  """
22
+
22
23
  # TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
23
24
  # converted to a `FunctionCall` by binding any remaining parameters).
24
25
 
25
- def __init__(self, base_expr: Expr, method_name: str):
26
+ def __init__(self, base_expr: Expr, method_name: str) -> None:
26
27
  super().__init__(ts.InvalidType()) # The `MethodRef` is untyped until it is called.
27
28
  self.base_expr = base_expr
28
29
  self.method_name = method_name
@@ -42,7 +43,7 @@ class MethodRef(Expr):
42
43
  assert len(components) == 1
43
44
  return cls(components[0], d['method_name'])
44
45
 
45
- def __call__(self, *args, **kwargs) -> FunctionCall:
46
+ def __call__(self, *args: Any, **kwargs: Any) -> FunctionCall:
46
47
  result = self.fn(*[self.base_expr, *args], **kwargs)
47
48
  assert isinstance(result, FunctionCall)
48
49
  result.is_method_call = True
@@ -52,13 +53,13 @@ class MethodRef(Expr):
52
53
  return self.base_expr.id == other.base_expr.id and self.method_name == other.method_name
53
54
 
54
55
  def _id_attrs(self) -> list[tuple[str, Any]]:
55
- return super()._id_attrs() + [('method_name', self.method_name)]
56
+ return [*super()._id_attrs(), ('method_name', self.method_name)]
56
57
 
57
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
58
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
58
59
  return None
59
60
 
60
61
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
61
- assert False, 'MethodRef cannot be evaluated directly'
62
+ raise AssertionError('MethodRef cannot be evaluated directly')
62
63
 
63
64
  def __repr__(self) -> str:
64
65
  return f'{self.base_expr}.{self.method_name}'
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional
3
+ from typing import Any
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -8,7 +8,7 @@ import pixeltable.type_system as ts
8
8
 
9
9
  from .data_row import DataRow
10
10
  from .expr import Expr, ExprScope
11
- from .json_mapper import JsonMapper
11
+ from .json_mapper import JsonMapperDispatch
12
12
  from .row_builder import RowBuilder
13
13
  from .sql_element_cache import SqlElementCache
14
14
 
@@ -18,26 +18,37 @@ class ObjectRef(Expr):
18
18
  Reference to an intermediate result, such as the "scope variable" produced by a JsonMapper.
19
19
  The object is generated/materialized elsewhere and establishes a new scope.
20
20
  """
21
- def __init__(self, scope: ExprScope, owner: JsonMapper):
21
+
22
+ def __init__(self, scope: ExprScope, owner: JsonMapperDispatch):
22
23
  # TODO: do we need an Unknown type after all?
23
24
  super().__init__(ts.JsonType()) # JsonType: this could be anything
24
25
  self._scope = scope
25
26
  self.owner = owner
26
27
  self.id = self._create_id()
27
28
 
29
+ def _id_attrs(self) -> list[tuple[str, Any]]:
30
+ # We have no components, so we can't rely on the default behavior here (otherwise, all ObjectRef
31
+ # instances will be conflated into a single slot).
32
+ return [('addr', id(self))]
33
+
34
+ def substitute(self, subs: dict[Expr, Expr]) -> Expr:
35
+ # Just return self; we need to avoid creating a new id after doing the substitution, because otherwise
36
+ # we'll wind up in a situation where the scope_anchor of the enclosing JsonMapper is different from the
37
+ # nested ObjectRefs inside its target_expr (and therefore occupies a different slot_idx).
38
+ return self
39
+
28
40
  def scope(self) -> ExprScope:
29
41
  return self._scope
30
42
 
31
- def __str__(self) -> str:
32
- assert False
33
-
34
43
  def _equals(self, other: ObjectRef) -> bool:
35
- return self.owner is other.owner
44
+ return self.id == other.id
36
45
 
37
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
46
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
38
47
  return None
39
48
 
40
49
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
41
50
  # this will be called, but the value has already been materialized elsewhere
42
51
  pass
43
52
 
53
+ def __repr__(self) -> str:
54
+ return f'ObjectRef({self.owner}, {self.id}, {self.owner.id})'