pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +34 -6
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +590 -30
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +359 -45
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +195 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +34 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +256 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +122 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +418 -182
  88. pixeltable/tests/conftest.py +146 -88
  89. pixeltable/tests/functions/test_fireworks.py +42 -0
  90. pixeltable/tests/functions/test_functions.py +60 -0
  91. pixeltable/tests/functions/test_huggingface.py +158 -0
  92. pixeltable/tests/functions/test_openai.py +152 -0
  93. pixeltable/tests/functions/test_together.py +111 -0
  94. pixeltable/tests/test_audio.py +65 -0
  95. pixeltable/tests/test_catalog.py +27 -0
  96. pixeltable/tests/test_client.py +14 -14
  97. pixeltable/tests/test_component_view.py +370 -0
  98. pixeltable/tests/test_dataframe.py +439 -0
  99. pixeltable/tests/test_dirs.py +78 -62
  100. pixeltable/tests/test_document.py +120 -0
  101. pixeltable/tests/test_exprs.py +592 -135
  102. pixeltable/tests/test_function.py +297 -67
  103. pixeltable/tests/test_migration.py +43 -0
  104. pixeltable/tests/test_nos.py +54 -0
  105. pixeltable/tests/test_snapshot.py +208 -0
  106. pixeltable/tests/test_table.py +1195 -263
  107. pixeltable/tests/test_transactional_directory.py +42 -0
  108. pixeltable/tests/test_types.py +5 -11
  109. pixeltable/tests/test_video.py +151 -34
  110. pixeltable/tests/test_view.py +530 -0
  111. pixeltable/tests/utils.py +320 -45
  112. pixeltable/tool/create_test_db_dump.py +149 -0
  113. pixeltable/tool/create_test_video.py +81 -0
  114. pixeltable/type_system.py +445 -124
  115. pixeltable/utils/__init__.py +17 -46
  116. pixeltable/utils/arrow.py +98 -0
  117. pixeltable/utils/clip.py +12 -15
  118. pixeltable/utils/coco.py +136 -0
  119. pixeltable/utils/documents.py +39 -0
  120. pixeltable/utils/filecache.py +195 -0
  121. pixeltable/utils/help.py +11 -0
  122. pixeltable/utils/hf_datasets.py +157 -0
  123. pixeltable/utils/media_store.py +76 -0
  124. pixeltable/utils/parquet.py +167 -0
  125. pixeltable/utils/pytorch.py +91 -0
  126. pixeltable/utils/s3.py +13 -0
  127. pixeltable/utils/sql.py +17 -0
  128. pixeltable/utils/transactional_directory.py +35 -0
  129. pixeltable-0.2.4.dist-info/LICENSE +18 -0
  130. pixeltable-0.2.4.dist-info/METADATA +127 -0
  131. pixeltable-0.2.4.dist-info/RECORD +132 -0
  132. {pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
  133. pixeltable/catalog.py +0 -1421
  134. pixeltable/exprs.py +0 -1745
  135. pixeltable/function.py +0 -269
  136. pixeltable/functions/clip.py +0 -10
  137. pixeltable/functions/pil/__init__.py +0 -23
  138. pixeltable/functions/tf.py +0 -21
  139. pixeltable/index.py +0 -57
  140. pixeltable/tests/test_dict.py +0 -24
  141. pixeltable/tests/test_functions.py +0 -11
  142. pixeltable/tests/test_tf.py +0 -69
  143. pixeltable/tf.py +0 -33
  144. pixeltable/utils/tf.py +0 -33
  145. pixeltable/utils/video.py +0 -32
  146. pixeltable-0.1.0.dist-info/METADATA +0 -34
  147. pixeltable-0.1.0.dist-info/RECORD +0 -36
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ import copy
4
+
5
+ import sqlalchemy as sql
6
+ import numpy as np
7
+
8
+ from .expr import Expr
9
+ from .data_row import DataRow
10
+ from .inline_dict import InlineDict
11
+ from .row_builder import RowBuilder
12
+ import pixeltable.catalog as catalog
13
+ import pixeltable.type_system as ts
14
+
15
+
16
+ class InlineArray(Expr):
17
+ """
18
+ Array 'literal' which can use Exprs as values.
19
+
20
+ The literal can be cast as either a pixeltable `ArrayType` or `JsonType`. If `force_json`
21
+ is `True`, it will always be cast as a `JsonType`. If `force_json` is `False`, it will be cast as an
22
+ `ArrayType` if it is a homogenous array of scalars or arrays, or a `JsonType` otherwise.
23
+ """
24
+ def __init__(self, elements: Tuple, force_json: bool = False):
25
+ # we need to call this in order to populate self.components
26
+ super().__init__(ts.ArrayType((len(elements),), ts.IntType()))
27
+
28
+ # elements contains
29
+ # - for Expr elements: (index into components, None)
30
+ # - for non-Expr elements: (-1, value)
31
+ self.elements: List[Tuple[int, Any]] = []
32
+ for el in elements:
33
+ el = copy.deepcopy(el)
34
+ if isinstance(el, list):
35
+ # If col_type is an ArrayType, we'll require it to be a multidimensional array
36
+ # of the specified underlying type
37
+ el = InlineArray(tuple(el), force_json)
38
+ if isinstance(el, dict):
39
+ el = InlineDict(el)
40
+ if isinstance(el, Expr):
41
+ self.elements.append((len(self.components), None))
42
+ self.components.append(el)
43
+ else:
44
+ self.elements.append((-1, el))
45
+
46
+ inferred_element_type = ts.InvalidType()
47
+ for idx, val in self.elements:
48
+ if idx >= 0:
49
+ inferred_element_type = ts.ColumnType.supertype(inferred_element_type, self.components[idx].col_type)
50
+ else:
51
+ inferred_element_type = ts.ColumnType.supertype(inferred_element_type, ts.ColumnType.infer_literal_type(val))
52
+ if inferred_element_type is None:
53
+ break
54
+
55
+ if force_json or inferred_element_type is None:
56
+ # JSON conversion is forced, or there is no common supertype
57
+ # TODO: make sure this doesn't contain Images
58
+ self.col_type = ts.JsonType()
59
+ elif inferred_element_type.is_scalar_type():
60
+ self.col_type = ts.ArrayType((len(self.elements),), inferred_element_type)
61
+ elif inferred_element_type.is_array_type():
62
+ assert isinstance(inferred_element_type, ts.ArrayType)
63
+ self.col_type = ts.ArrayType(
64
+ (len(self.elements), *inferred_element_type.shape), ts.ColumnType.make_type(inferred_element_type.dtype))
65
+ else:
66
+ self.col_type = ts.JsonType()
67
+
68
+ self.id = self._create_id()
69
+
70
+ def __str__(self) -> str:
71
+ elem_strs = [str(val) if val is not None else str(self.components[idx]) for idx, val in self.elements]
72
+ return f'[{", ".join(elem_strs)}]'
73
+
74
+ def _equals(self, other: InlineDict) -> bool:
75
+ return self.elements == other.elements
76
+
77
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
78
+ return super()._id_attrs() + [('elements', self.elements)]
79
+
80
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
81
+ return None
82
+
83
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
84
+ result = [None] * len(self.elements)
85
+ for i, (child_idx, val) in enumerate(self.elements):
86
+ if child_idx >= 0:
87
+ result[i] = data_row[self.components[child_idx].slot_idx]
88
+ else:
89
+ result[i] = copy.deepcopy(val)
90
+ if self.col_type.is_array_type():
91
+ data_row[self.slot_idx] = np.array(result)
92
+ else:
93
+ data_row[self.slot_idx] = result
94
+
95
+ def _as_dict(self) -> Dict:
96
+ return {'elements': self.elements, **super()._as_dict()}
97
+
98
+ @classmethod
99
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
100
+ assert 'elements' in d
101
+ arg: List[Any] = []
102
+ for idx, val in d['elements']:
103
+ if idx >= 0:
104
+ arg.append(components[idx])
105
+ else:
106
+ arg.append(val)
107
+ return cls(tuple(arg))
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ import copy
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .data_row import DataRow
9
+ from .row_builder import RowBuilder
10
+ import pixeltable.exceptions as excs
11
+ import pixeltable.catalog as catalog
12
+ import pixeltable.type_system as ts
13
+
14
+
15
+ class InlineDict(Expr):
16
+ """
17
+ Dictionary 'literal' which can use Exprs as values.
18
+ """
19
+ def __init__(self, d: Dict):
20
+ from .inline_array import InlineArray
21
+ super().__init__(ts.JsonType()) # we need to call this in order to populate self.components
22
+ # dict_items contains
23
+ # - for Expr fields: (key, index into components, None)
24
+ # - for non-Expr fields: (key, -1, value)
25
+ self.dict_items: List[Tuple[str, int, Any]] = []
26
+ for key, val in d.items():
27
+ if not isinstance(key, str):
28
+ raise excs.Error(f'Dictionary requires string keys, {key} has type {type(key)}')
29
+ val = copy.deepcopy(val)
30
+ if isinstance(val, dict):
31
+ val = InlineDict(val)
32
+ if isinstance(val, list) or isinstance(val, tuple):
33
+ val = InlineArray(tuple(val), force_json=True)
34
+ if isinstance(val, Expr):
35
+ self.dict_items.append((key, len(self.components), None))
36
+ self.components.append(val)
37
+ else:
38
+ self.dict_items.append((key, -1, val))
39
+
40
+ self.type_spec: Optional[Dict[str, ts.ColumnType]] = {}
41
+ for key, idx, _ in self.dict_items:
42
+ if idx == -1:
43
+ # TODO: implement type inference for values
44
+ self.type_spec = None
45
+ break
46
+ self.type_spec[key] = self.components[idx].col_type
47
+ self.col_type = ts.JsonType(self.type_spec)
48
+
49
+ self.id = self._create_id()
50
+
51
+ def __str__(self) -> str:
52
+ item_strs: List[str] = []
53
+ i = 0
54
+ def print_val(val: Any) -> str:
55
+ if isinstance(val, str):
56
+ return f"'{val}'"
57
+ return str(val)
58
+ for key, idx, val in self.dict_items:
59
+ if idx != -1:
60
+ item_strs.append(f"'{key}': {str(self.components[i])}")
61
+ i += 1
62
+ else:
63
+ item_strs.append(f"'{key}': {print_val(val)}")
64
+ return '{' + ', '.join(item_strs) + '}'
65
+
66
+ def _equals(self, other: InlineDict) -> bool:
67
+ return self.dict_items == other.dict_items
68
+
69
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
70
+ return super()._id_attrs() + [('dict_items', self.dict_items)]
71
+
72
+ def to_dict(self) -> Dict[str, Any]:
73
+ """Return the original dict used to construct this"""
74
+ return {key: val if idx == -1 else self.components[idx] for key, idx, val in self.dict_items}
75
+
76
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
77
+ return None
78
+
79
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
80
+ result = {}
81
+ for key, idx, val in self.dict_items:
82
+ assert isinstance(key, str)
83
+ if idx >= 0:
84
+ result[key] = data_row[self.components[idx].slot_idx]
85
+ else:
86
+ result[key] = copy.deepcopy(val)
87
+ data_row[self.slot_idx] = result
88
+
89
+ def _as_dict(self) -> Dict:
90
+ return {'dict_items': self.dict_items, **super()._as_dict()}
91
+
92
+ @classmethod
93
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
94
+ assert 'dict_items' in d
95
+ arg: Dict[str, Any] = {}
96
+ for key, idx, val in d['dict_items']:
97
+ if idx >= 0:
98
+ arg[key] = components[idx]
99
+ else:
100
+ arg[key] = val
101
+ return cls(arg)
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Dict
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from .predicate import Predicate
7
+ from .expr import Expr
8
+ from .data_row import DataRow
9
+ from .row_builder import RowBuilder
10
+ import pixeltable.catalog as catalog
11
+
12
+
13
+ class IsNull(Predicate):
14
+ def __init__(self, e: Expr):
15
+ super().__init__()
16
+ self.components = [e]
17
+ self.id = self._create_id()
18
+
19
+ def __str__(self) -> str:
20
+ return f'{str(self.components[0])} == None'
21
+
22
+ def _equals(self, other: IsNull) -> bool:
23
+ return True
24
+
25
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
26
+ e = self.components[0].sql_expr()
27
+ if e is None:
28
+ return None
29
+ return e == None
30
+
31
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
32
+ data_row[self.slot_idx] = data_row[self.components[0].slot_idx] is None
33
+
34
+ @classmethod
35
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
36
+ assert len(components) == 1
37
+ return cls(components[0])
38
+
@@ -0,0 +1,121 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Dict
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from .expr import Expr, ExprScope, _GLOBAL_SCOPE
7
+ from .data_row import DataRow
8
+ from .row_builder import RowBuilder
9
+ import pixeltable.catalog as catalog
10
+ import pixeltable.type_system as ts
11
+
12
+
13
+ class JsonMapper(Expr):
14
+ """
15
+ JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
16
+ The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
17
+ is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
18
+ """
19
+ def __init__(self, src_expr: Expr, target_expr: Expr):
20
+ # TODO: type spec should be List[target_expr.col_type]
21
+ super().__init__(ts.JsonType())
22
+
23
+ # we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
24
+ # this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
25
+ self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
26
+
27
+ from .object_ref import ObjectRef
28
+ scope_anchor = ObjectRef(self.target_expr_scope, self)
29
+ self.components = [src_expr, target_expr, scope_anchor]
30
+ self.parent_mapper: Optional[JsonMapper] = None
31
+ self.target_expr_eval_ctx: Optional[RowBuilder.EvalCtx] = None
32
+ self.id = self._create_id()
33
+
34
+ def bind_rel_paths(self, mapper: Optional[JsonMapper]) -> None:
35
+ self._src_expr.bind_rel_paths(mapper)
36
+ self._target_expr.bind_rel_paths(self)
37
+ self.parent_mapper = mapper
38
+ parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
39
+ self.target_expr_scope.parent = parent_scope
40
+
41
+ def scope(self) -> ExprScope:
42
+ # need to ignore target_expr
43
+ return self._src_expr.scope()
44
+
45
+ def dependencies(self) -> List[Expr]:
46
+ result = [self._src_expr]
47
+ result.extend(self._target_dependencies(self._target_expr))
48
+ return result
49
+
50
+ def _target_dependencies(self, e: Expr) -> List[Expr]:
51
+ """
52
+ Return all subexprs of e of which the scope isn't contained in target_expr_scope.
53
+ Those need to be evaluated before us.
54
+ """
55
+ expr_scope = e.scope()
56
+ if not expr_scope.is_contained_in(self.target_expr_scope):
57
+ return [e]
58
+ result: List[Expr] = []
59
+ for c in e.components:
60
+ result.extend(self._target_dependencies(c))
61
+ return result
62
+
63
+ def equals(self, other: Expr) -> bool:
64
+ """
65
+ We override equals() because we need to avoid comparing our scope anchor.
66
+ """
67
+ if type(self) != type(other):
68
+ return False
69
+ return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
70
+
71
+ def __str__(self) -> str:
72
+ return f'{str(self._src_expr)} >> {str(self._target_expr)}'
73
+
74
+ @property
75
+ def _src_expr(self) -> Expr:
76
+ return self.components[0]
77
+
78
+ @property
79
+ def _target_expr(self) -> Expr:
80
+ return self.components[1]
81
+
82
+ @property
83
+ def scope_anchor(self) -> Expr:
84
+ return self.components[2]
85
+
86
+ def _equals(self, other: JsonMapper) -> bool:
87
+ return True
88
+
89
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
90
+ return None
91
+
92
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
93
+ # this will be called, but the value has already been materialized elsewhere
94
+ src = data_row[self._src_expr.slot_idx]
95
+ if not isinstance(src, list):
96
+ # invalid/non-list src path
97
+ data_row[self.slot_idx] = None
98
+ return
99
+
100
+ result = [None] * len(src)
101
+ if self.target_expr_eval_ctx is None:
102
+ self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
103
+ for i, val in enumerate(src):
104
+ data_row[self.scope_anchor.slot_idx] = val
105
+ # stored target_expr
106
+ exc_tb = row_builder.eval(data_row, self.target_expr_eval_ctx)
107
+ assert exc_tb is None
108
+ result[i] = data_row[self._target_expr.slot_idx]
109
+ data_row[self.slot_idx] = result
110
+
111
+ def _as_dict(self) -> Dict:
112
+ """
113
+ We need to avoid serializing component[2], which is an ObjectRef.
114
+ """
115
+ return {'components': [c.as_dict() for c in self.components[0:2]]}
116
+
117
+ @classmethod
118
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
119
+ assert len(components) == 2
120
+ return cls(components[0], components[1])
121
+
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple, Union
3
+
4
+ import jmespath
5
+ import sqlalchemy as sql
6
+
7
+ from .globals import print_slice
8
+ from .expr import Expr
9
+ from .json_mapper import JsonMapper
10
+ from .data_row import DataRow
11
+ from .row_builder import RowBuilder
12
+ import pixeltable
13
+ import pixeltable.exceptions as excs
14
+ import pixeltable.catalog as catalog
15
+ import pixeltable.type_system as ts
16
+
17
+
18
+ class JsonPath(Expr):
19
+ def __init__(self, anchor: Optional['pixeltable.exprs.ColumnRef'], path_elements: Optional[List[str]] = None, scope_idx: int = 0):
20
+ """
21
+ anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
22
+ scope_idx: for relative paths, index of referenced JsonMapper
23
+ (0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
24
+ """
25
+ if path_elements is None:
26
+ path_elements = []
27
+ super().__init__(ts.JsonType())
28
+ if anchor is not None:
29
+ self.components = [anchor]
30
+ self.path_elements: List[Union[str, int]] = path_elements
31
+ self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
32
+ self.scope_idx = scope_idx
33
+ # NOTE: the _create_id() result will change if set_anchor() gets called;
34
+ # this is not a problem, because _create_id() shouldn't be called after init()
35
+ self.id = self._create_id()
36
+
37
+ def __str__(self) -> str:
38
+ # else "R": the anchor is RELATIVE_PATH_ROOT
39
+ return (f'{str(self._anchor) if self._anchor is not None else "R"}'
40
+ f'{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}')
41
+
42
+ def _as_dict(self) -> Dict:
43
+ return {'path_elements': self.path_elements, 'scope_idx': self.scope_idx, **super()._as_dict()}
44
+
45
+ @classmethod
46
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
47
+ assert 'path_elements' in d
48
+ assert 'scope_idx' in d
49
+ assert len(components) <= 1
50
+ anchor = components[0] if len(components) == 1 else None
51
+ return cls(anchor, d['path_elements'], d['scope_idx'])
52
+
53
+ @property
54
+ def _anchor(self) -> Optional[Expr]:
55
+ return None if len(self.components) == 0 else self.components[0]
56
+
57
+ def set_anchor(self, anchor: Expr) -> None:
58
+ assert len(self.components) == 0
59
+ self.components = [anchor]
60
+
61
+ def is_relative_path(self) -> bool:
62
+ return self._anchor is None
63
+
64
+ def bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
65
+ if not self.is_relative_path():
66
+ return
67
+ # TODO: take scope_idx into account
68
+ self.set_anchor(mapper.scope_anchor)
69
+
70
+ def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
71
+ """
72
+ Construct a relative path that references an ancestor of the immediately enclosing JsonMapper.
73
+ """
74
+ if not self.is_relative_path():
75
+ raise excs.Error(f'() for an absolute path is invalid')
76
+ if len(args) != 1 or not isinstance(args[0], int) or args[0] >= 0:
77
+ raise excs.Error(f'R() requires a negative index')
78
+ return JsonPath(None, [], args[0])
79
+
80
+ def __getattr__(self, name: str) -> 'JsonPath':
81
+ assert isinstance(name, str)
82
+ return JsonPath(self._anchor, self.path_elements + [name])
83
+
84
+ def __getitem__(self, index: object) -> 'JsonPath':
85
+ if isinstance(index, str):
86
+ if index != '*':
87
+ raise excs.Error(f'Invalid json list index: {index}')
88
+ else:
89
+ if not isinstance(index, slice) and not isinstance(index, int):
90
+ raise excs.Error(f'Invalid json list index: {index}')
91
+ return JsonPath(self._anchor, self.path_elements + [index])
92
+
93
+ def __rshift__(self, other: object) -> 'JsonMapper':
94
+ rhs_expr = Expr.from_object(other)
95
+ if rhs_expr is None:
96
+ raise excs.Error(f'>> requires an expression on the right-hand side, found {type(other)}')
97
+ return JsonMapper(self, rhs_expr)
98
+
99
+ def default_column_name(self) -> Optional[str]:
100
+ anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
101
+ ret_name = f'{anchor_name}.{self._json_path()}'
102
+
103
+ def cleanup_char(s : str) -> str:
104
+ if s == '.':
105
+ return '_'
106
+ elif s == '*':
107
+ return 'star'
108
+ elif s.isalnum():
109
+ return s
110
+ else:
111
+ return ''
112
+
113
+ clean_name = ''.join(map(cleanup_char, ret_name))
114
+ clean_name = clean_name.lstrip('_') # remove leading underscore
115
+ if clean_name == '':
116
+ clean_name = None
117
+
118
+ assert clean_name is None or catalog.is_valid_identifier(clean_name)
119
+ return clean_name
120
+
121
+ def _equals(self, other: JsonPath) -> bool:
122
+ return self.path_elements == other.path_elements
123
+
124
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
125
+ return super()._id_attrs() + [('path_elements', self.path_elements)]
126
+
127
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
128
+ """
129
+ Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
130
+ *two* rows (each containing col val 0), not a single row with [0, 0].
131
+ We need to use a workaround: retrieve the entire dict, then use jmespath to extract the path correctly.
132
+ """
133
+ #path_str = '$.' + '.'.join(self.path_elements)
134
+ #assert isinstance(self._anchor(), ColumnRef)
135
+ #return sql.func.jsonb_path_query(self._anchor().col.sa_col, path_str)
136
+ return None
137
+
138
+ def _json_path(self) -> str:
139
+ assert len(self.path_elements) > 0
140
+ result: List[str] = []
141
+ for element in self.path_elements:
142
+ if element == '*':
143
+ result.append('[*]')
144
+ elif isinstance(element, str):
145
+ result.append(f'{"." if len(result) > 0 else ""}{element}')
146
+ elif isinstance(element, int):
147
+ result.append(f'[{element}]')
148
+ elif isinstance(element, slice):
149
+ result.append(f'[{print_slice(element)}]')
150
+ return ''.join(result)
151
+
152
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
153
+ val = data_row[self._anchor.slot_idx]
154
+ if self.compiled_path is not None:
155
+ val = self.compiled_path.search(val)
156
+ data_row[self.slot_idx] = val
157
+
158
+
159
+ RELATIVE_PATH_ROOT = JsonPath(None)
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from .expr import Expr
7
+ from .data_row import DataRow
8
+ from .row_builder import RowBuilder
9
+ import pixeltable.catalog as catalog
10
+ import pixeltable.type_system as ts
11
+
12
+ class Literal(Expr):
13
+ def __init__(self, val: Any, col_type: Optional[ts.ColumnType] = None):
14
+ if col_type is not None:
15
+ val = col_type.create_literal(val)
16
+ else:
17
+ # try to determine a type for val
18
+ col_type = ts.ColumnType.infer_literal_type(val)
19
+ if col_type is None:
20
+ raise TypeError(f'Not a valid literal: {val}')
21
+ super().__init__(col_type)
22
+ self.val = val
23
+ self.id = self._create_id()
24
+
25
+ def default_column_name(self) -> Optional[str]:
26
+ return 'Literal'
27
+
28
+ def __str__(self) -> str:
29
+ if self.col_type.is_string_type() or self.col_type.is_timestamp_type():
30
+ return f"'{self.val}'"
31
+ return str(self.val)
32
+
33
+ def _equals(self, other: Literal) -> bool:
34
+ return self.val == other.val
35
+
36
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
37
+ return super()._id_attrs() + [('val', self.val)]
38
+
39
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
40
+ # we need to return something here so that we can generate a Where clause for predicates
41
+ # that involve literals (like Where c > 0)
42
+ return sql.sql.expression.literal(self.val)
43
+
44
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
45
+ # this will be called, even though sql_expr() does not return None
46
+ data_row[self.slot_idx] = self.val
47
+
48
+ def _as_dict(self) -> Dict:
49
+ return {'val': self.val, **super()._as_dict()}
50
+
51
+ @classmethod
52
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
53
+ assert 'val' in d
54
+ return cls(d['val'])
@@ -0,0 +1,41 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ import copy
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr, ExprScope
8
+ from .json_mapper import JsonMapper
9
+ from .data_row import DataRow
10
+ from .row_builder import RowBuilder
11
+ import pixeltable.type_system as ts
12
+
13
+
14
+ class ObjectRef(Expr):
15
+ """
16
+ Reference to an intermediate result, such as the "scope variable" produced by a JsonMapper.
17
+ The object is generated/materialized elsewhere and establishes a new scope.
18
+ """
19
+ def __init__(self, scope: ExprScope, owner: JsonMapper):
20
+ # TODO: do we need an Unknown type after all?
21
+ super().__init__(ts.JsonType()) # JsonType: this could be anything
22
+ self._scope = scope
23
+ self.owner = owner
24
+ self.id = self._create_id()
25
+
26
+ def scope(self) -> ExprScope:
27
+ return self._scope
28
+
29
+ def __str__(self) -> str:
30
+ assert False
31
+
32
+ def _equals(self, other: ObjectRef) -> bool:
33
+ return self.owner is other.owner
34
+
35
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
36
+ return None
37
+
38
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
39
+ # this will be called, but the value has already been materialized elsewhere
40
+ pass
41
+
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Tuple, Callable
3
+
4
+ from .expr import Expr
5
+ from .globals import LogicalOperator
6
+ import pixeltable
7
+ import pixeltable.type_system as ts
8
+
9
+
10
+ class Predicate(Expr):
11
+ def __init__(self) -> None:
12
+ super().__init__(ts.BoolType())
13
+
14
+ def split_conjuncts(
15
+ self, condition: Callable[[Predicate], bool]) -> Tuple[List[Predicate], Optional[Predicate]]:
16
+ """
17
+ Returns clauses of a conjunction that meet condition in the first element.
18
+ The second element contains remaining clauses, rolled into a conjunction.
19
+ """
20
+ if condition(self):
21
+ return [self], None
22
+ else:
23
+ return [], self
24
+
25
+ def __and__(self, other: object) -> 'pixeltable.exprs.CompoundPredicate':
26
+ if not isinstance(other, Expr):
27
+ raise TypeError(f'Other needs to be an expression: {type(other)}')
28
+ if not other.col_type.is_bool_type():
29
+ raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
30
+ from .compound_predicate import CompoundPredicate
31
+ return CompoundPredicate(LogicalOperator.AND, [self, other])
32
+
33
+ def __or__(self, other: object) -> 'pixeltable.exprs.CompoundPredicate':
34
+ if not isinstance(other, Expr):
35
+ raise TypeError(f'Other needs to be an expression: {type(other)}')
36
+ if not other.col_type.is_bool_type():
37
+ raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
38
+ from .compound_predicate import CompoundPredicate
39
+ return CompoundPredicate(LogicalOperator.OR, [self, other])
40
+
41
+ def __invert__(self) -> 'pixeltable.exprs.CompoundPredicate':
42
+ from .compound_predicate import CompoundPredicate
43
+ return CompoundPredicate(LogicalOperator.NOT, [self])
44
+