pixeltable 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +34 -6
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +520 -30
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +373 -45
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +187 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +61 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +88 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +27 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +413 -182
- pixeltable/tests/conftest.py +143 -87
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +372 -0
- pixeltable/tests/test_dataframe.py +433 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +117 -0
- pixeltable/tests/test_exprs.py +591 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_functions.py +283 -1
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1085 -262
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +149 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +186 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/type_system.py +490 -126
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +126 -0
- pixeltable/utils/pytorch.py +172 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.0.dist-info/LICENSE +18 -0
- pixeltable-0.2.0.dist-info/METADATA +117 -0
- pixeltable-0.2.0.dist-info/RECORD +125 -0
- {pixeltable-0.1.1.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.1.dist-info/METADATA +0 -31
- pixeltable-0.1.1.dist-info/RECORD +0 -36
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
import copy
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from .expr import Expr
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
from .inline_dict import InlineDict
|
|
11
|
+
from .row_builder import RowBuilder
|
|
12
|
+
import pixeltable.catalog as catalog
|
|
13
|
+
import pixeltable.type_system as ts
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class InlineArray(Expr):
|
|
17
|
+
"""
|
|
18
|
+
Array 'literal' which can use Exprs as values.
|
|
19
|
+
|
|
20
|
+
The literal can be cast as either a pixeltable `ArrayType` or `JsonType`. If `force_json`
|
|
21
|
+
is `True`, it will always be cast as a `JsonType`. If `force_json` is `False`, it will be cast as an
|
|
22
|
+
`ArrayType` if it is a homogenous array of scalars or arrays, or a `JsonType` otherwise.
|
|
23
|
+
"""
|
|
24
|
+
def __init__(self, elements: Tuple, force_json: bool = False):
|
|
25
|
+
# we need to call this in order to populate self.components
|
|
26
|
+
super().__init__(ts.ArrayType((len(elements),), ts.IntType()))
|
|
27
|
+
|
|
28
|
+
# elements contains
|
|
29
|
+
# - for Expr elements: (index into components, None)
|
|
30
|
+
# - for non-Expr elements: (-1, value)
|
|
31
|
+
self.elements: List[Tuple[int, Any]] = []
|
|
32
|
+
for el in elements:
|
|
33
|
+
el = copy.deepcopy(el)
|
|
34
|
+
if isinstance(el, list):
|
|
35
|
+
# If col_type is an ArrayType, we'll require it to be a multidimensional array
|
|
36
|
+
# of the specified underlying type
|
|
37
|
+
el = InlineArray(tuple(el), force_json)
|
|
38
|
+
if isinstance(el, dict):
|
|
39
|
+
el = InlineDict(el)
|
|
40
|
+
if isinstance(el, Expr):
|
|
41
|
+
self.elements.append((len(self.components), None))
|
|
42
|
+
self.components.append(el)
|
|
43
|
+
else:
|
|
44
|
+
self.elements.append((-1, el))
|
|
45
|
+
|
|
46
|
+
inferred_element_type = ts.InvalidType()
|
|
47
|
+
for idx, val in self.elements:
|
|
48
|
+
if idx >= 0:
|
|
49
|
+
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, self.components[idx].col_type)
|
|
50
|
+
else:
|
|
51
|
+
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, ts.ColumnType.infer_literal_type(val))
|
|
52
|
+
if inferred_element_type is None:
|
|
53
|
+
break
|
|
54
|
+
|
|
55
|
+
if force_json or inferred_element_type is None:
|
|
56
|
+
# JSON conversion is forced, or there is no common supertype
|
|
57
|
+
# TODO: make sure this doesn't contain Images
|
|
58
|
+
self.col_type = ts.JsonType()
|
|
59
|
+
elif inferred_element_type.is_scalar_type():
|
|
60
|
+
self.col_type = ts.ArrayType((len(self.elements),), inferred_element_type)
|
|
61
|
+
elif inferred_element_type.is_array_type():
|
|
62
|
+
assert isinstance(inferred_element_type, ts.ArrayType)
|
|
63
|
+
self.col_type = ts.ArrayType(
|
|
64
|
+
(len(self.elements), *inferred_element_type.shape), ts.ColumnType.make_type(inferred_element_type.dtype))
|
|
65
|
+
else:
|
|
66
|
+
self.col_type = ts.JsonType()
|
|
67
|
+
|
|
68
|
+
self.id = self._create_id()
|
|
69
|
+
|
|
70
|
+
def __str__(self) -> str:
|
|
71
|
+
elem_strs = [str(val) if val is not None else str(self.components[idx]) for idx, val in self.elements]
|
|
72
|
+
return f'[{", ".join(elem_strs)}]'
|
|
73
|
+
|
|
74
|
+
def _equals(self, other: InlineDict) -> bool:
|
|
75
|
+
return self.elements == other.elements
|
|
76
|
+
|
|
77
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
78
|
+
return super()._id_attrs() + [('elements', self.elements)]
|
|
79
|
+
|
|
80
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
84
|
+
result = [None] * len(self.elements)
|
|
85
|
+
for i, (child_idx, val) in enumerate(self.elements):
|
|
86
|
+
if child_idx >= 0:
|
|
87
|
+
result[i] = data_row[self.components[child_idx].slot_idx]
|
|
88
|
+
else:
|
|
89
|
+
result[i] = copy.deepcopy(val)
|
|
90
|
+
if self.col_type.is_array_type():
|
|
91
|
+
data_row[self.slot_idx] = np.array(result)
|
|
92
|
+
else:
|
|
93
|
+
data_row[self.slot_idx] = result
|
|
94
|
+
|
|
95
|
+
def _as_dict(self) -> Dict:
|
|
96
|
+
return {'elements': self.elements, **super()._as_dict()}
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
100
|
+
assert 'elements' in d
|
|
101
|
+
arg: List[Any] = []
|
|
102
|
+
for idx, val in d['elements']:
|
|
103
|
+
if idx >= 0:
|
|
104
|
+
arg.append(components[idx])
|
|
105
|
+
else:
|
|
106
|
+
arg.append(val)
|
|
107
|
+
return cls(tuple(arg))
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
import copy
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .row_builder import RowBuilder
|
|
10
|
+
import pixeltable.exceptions as excs
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
12
|
+
import pixeltable.type_system as ts
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class InlineDict(Expr):
|
|
16
|
+
"""
|
|
17
|
+
Dictionary 'literal' which can use Exprs as values.
|
|
18
|
+
"""
|
|
19
|
+
def __init__(self, d: Dict):
|
|
20
|
+
from .inline_array import InlineArray
|
|
21
|
+
super().__init__(ts.JsonType()) # we need to call this in order to populate self.components
|
|
22
|
+
# dict_items contains
|
|
23
|
+
# - for Expr fields: (key, index into components, None)
|
|
24
|
+
# - for non-Expr fields: (key, -1, value)
|
|
25
|
+
self.dict_items: List[Tuple[str, int, Any]] = []
|
|
26
|
+
for key, val in d.items():
|
|
27
|
+
if not isinstance(key, str):
|
|
28
|
+
raise excs.Error(f'Dictionary requires string keys, {key} has type {type(key)}')
|
|
29
|
+
val = copy.deepcopy(val)
|
|
30
|
+
if isinstance(val, dict):
|
|
31
|
+
val = InlineDict(val)
|
|
32
|
+
if isinstance(val, list) or isinstance(val, tuple):
|
|
33
|
+
val = InlineArray(tuple(val), force_json=True)
|
|
34
|
+
if isinstance(val, Expr):
|
|
35
|
+
self.dict_items.append((key, len(self.components), None))
|
|
36
|
+
self.components.append(val)
|
|
37
|
+
else:
|
|
38
|
+
self.dict_items.append((key, -1, val))
|
|
39
|
+
|
|
40
|
+
self.type_spec: Optional[Dict[str, ts.ColumnType]] = {}
|
|
41
|
+
for key, idx, _ in self.dict_items:
|
|
42
|
+
if idx == -1:
|
|
43
|
+
# TODO: implement type inference for values
|
|
44
|
+
self.type_spec = None
|
|
45
|
+
break
|
|
46
|
+
self.type_spec[key] = self.components[idx].col_type
|
|
47
|
+
self.col_type = ts.JsonType(self.type_spec)
|
|
48
|
+
|
|
49
|
+
self.id = self._create_id()
|
|
50
|
+
|
|
51
|
+
def __str__(self) -> str:
|
|
52
|
+
item_strs: List[str] = []
|
|
53
|
+
i = 0
|
|
54
|
+
def print_val(val: Any) -> str:
|
|
55
|
+
if isinstance(val, str):
|
|
56
|
+
return f"'{val}'"
|
|
57
|
+
return str(val)
|
|
58
|
+
for key, idx, val in self.dict_items:
|
|
59
|
+
if idx != -1:
|
|
60
|
+
item_strs.append(f"'{key}': {str(self.components[i])}")
|
|
61
|
+
i += 1
|
|
62
|
+
else:
|
|
63
|
+
item_strs.append(f"'{key}': {print_val(val)}")
|
|
64
|
+
return '{' + ', '.join(item_strs) + '}'
|
|
65
|
+
|
|
66
|
+
def _equals(self, other: InlineDict) -> bool:
|
|
67
|
+
return self.dict_items == other.dict_items
|
|
68
|
+
|
|
69
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
70
|
+
return super()._id_attrs() + [('dict_items', self.dict_items)]
|
|
71
|
+
|
|
72
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
73
|
+
"""Return the original dict used to construct this"""
|
|
74
|
+
return {key: val if idx == -1 else self.components[idx] for key, idx, val in self.dict_items}
|
|
75
|
+
|
|
76
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
80
|
+
result = {}
|
|
81
|
+
for key, idx, val in self.dict_items:
|
|
82
|
+
assert isinstance(key, str)
|
|
83
|
+
if idx >= 0:
|
|
84
|
+
result[key] = data_row[self.components[idx].slot_idx]
|
|
85
|
+
else:
|
|
86
|
+
result[key] = copy.deepcopy(val)
|
|
87
|
+
data_row[self.slot_idx] = result
|
|
88
|
+
|
|
89
|
+
def _as_dict(self) -> Dict:
|
|
90
|
+
return {'dict_items': self.dict_items, **super()._as_dict()}
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
94
|
+
assert 'dict_items' in d
|
|
95
|
+
arg: Dict[str, Any] = {}
|
|
96
|
+
for key, idx, val in d['dict_items']:
|
|
97
|
+
if idx >= 0:
|
|
98
|
+
arg[key] = components[idx]
|
|
99
|
+
else:
|
|
100
|
+
arg[key] = val
|
|
101
|
+
return cls(arg)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Dict
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from .predicate import Predicate
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .row_builder import RowBuilder
|
|
10
|
+
import pixeltable.catalog as catalog
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class IsNull(Predicate):
|
|
14
|
+
def __init__(self, e: Expr):
|
|
15
|
+
super().__init__()
|
|
16
|
+
self.components = [e]
|
|
17
|
+
self.id = self._create_id()
|
|
18
|
+
|
|
19
|
+
def __str__(self) -> str:
|
|
20
|
+
return f'{str(self.components[0])} == None'
|
|
21
|
+
|
|
22
|
+
def _equals(self, other: IsNull) -> bool:
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
26
|
+
e = self.components[0].sql_expr()
|
|
27
|
+
if e is None:
|
|
28
|
+
return None
|
|
29
|
+
return e == None
|
|
30
|
+
|
|
31
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
32
|
+
data_row[self.slot_idx] = data_row[self.components[0].slot_idx] is None
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
36
|
+
assert len(components) == 1
|
|
37
|
+
return cls(components[0])
|
|
38
|
+
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Dict
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from .expr import Expr, ExprScope, _GLOBAL_SCOPE
|
|
7
|
+
from .data_row import DataRow
|
|
8
|
+
from .row_builder import RowBuilder
|
|
9
|
+
import pixeltable.catalog as catalog
|
|
10
|
+
import pixeltable.type_system as ts
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class JsonMapper(Expr):
|
|
14
|
+
"""
|
|
15
|
+
JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
|
|
16
|
+
The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
|
|
17
|
+
is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
|
|
18
|
+
"""
|
|
19
|
+
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
20
|
+
# TODO: type spec should be List[target_expr.col_type]
|
|
21
|
+
super().__init__(ts.JsonType())
|
|
22
|
+
|
|
23
|
+
# we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
|
|
24
|
+
# this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
|
|
25
|
+
self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
|
|
26
|
+
|
|
27
|
+
from .object_ref import ObjectRef
|
|
28
|
+
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
29
|
+
self.components = [src_expr, target_expr, scope_anchor]
|
|
30
|
+
self.parent_mapper: Optional[JsonMapper] = None
|
|
31
|
+
self.target_expr_eval_ctx: Optional[RowBuilder.EvalCtx] = None
|
|
32
|
+
self.id = self._create_id()
|
|
33
|
+
|
|
34
|
+
def bind_rel_paths(self, mapper: Optional[JsonMapper]) -> None:
|
|
35
|
+
self._src_expr.bind_rel_paths(mapper)
|
|
36
|
+
self._target_expr.bind_rel_paths(self)
|
|
37
|
+
self.parent_mapper = mapper
|
|
38
|
+
parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
|
|
39
|
+
self.target_expr_scope.parent = parent_scope
|
|
40
|
+
|
|
41
|
+
def scope(self) -> ExprScope:
|
|
42
|
+
# need to ignore target_expr
|
|
43
|
+
return self._src_expr.scope()
|
|
44
|
+
|
|
45
|
+
def dependencies(self) -> List[Expr]:
|
|
46
|
+
result = [self._src_expr]
|
|
47
|
+
result.extend(self._target_dependencies(self._target_expr))
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
def _target_dependencies(self, e: Expr) -> List[Expr]:
|
|
51
|
+
"""
|
|
52
|
+
Return all subexprs of e of which the scope isn't contained in target_expr_scope.
|
|
53
|
+
Those need to be evaluated before us.
|
|
54
|
+
"""
|
|
55
|
+
expr_scope = e.scope()
|
|
56
|
+
if not expr_scope.is_contained_in(self.target_expr_scope):
|
|
57
|
+
return [e]
|
|
58
|
+
result: List[Expr] = []
|
|
59
|
+
for c in e.components:
|
|
60
|
+
result.extend(self._target_dependencies(c))
|
|
61
|
+
return result
|
|
62
|
+
|
|
63
|
+
def equals(self, other: Expr) -> bool:
|
|
64
|
+
"""
|
|
65
|
+
We override equals() because we need to avoid comparing our scope anchor.
|
|
66
|
+
"""
|
|
67
|
+
if type(self) != type(other):
|
|
68
|
+
return False
|
|
69
|
+
return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
|
|
70
|
+
|
|
71
|
+
def __str__(self) -> str:
|
|
72
|
+
return f'{str(self._src_expr)} >> {str(self._target_expr)}'
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def _src_expr(self) -> Expr:
|
|
76
|
+
return self.components[0]
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def _target_expr(self) -> Expr:
|
|
80
|
+
return self.components[1]
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def scope_anchor(self) -> Expr:
|
|
84
|
+
return self.components[2]
|
|
85
|
+
|
|
86
|
+
def _equals(self, other: JsonMapper) -> bool:
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
93
|
+
# this will be called, but the value has already been materialized elsewhere
|
|
94
|
+
src = data_row[self._src_expr.slot_idx]
|
|
95
|
+
if not isinstance(src, list):
|
|
96
|
+
# invalid/non-list src path
|
|
97
|
+
data_row[self.slot_idx] = None
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
result = [None] * len(src)
|
|
101
|
+
if self.target_expr_eval_ctx is None:
|
|
102
|
+
self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
|
|
103
|
+
for i, val in enumerate(src):
|
|
104
|
+
data_row[self.scope_anchor.slot_idx] = val
|
|
105
|
+
# stored target_expr
|
|
106
|
+
exc_tb = row_builder.eval(data_row, self.target_expr_eval_ctx)
|
|
107
|
+
assert exc_tb is None
|
|
108
|
+
result[i] = data_row[self._target_expr.slot_idx]
|
|
109
|
+
data_row[self.slot_idx] = result
|
|
110
|
+
|
|
111
|
+
def _as_dict(self) -> Dict:
|
|
112
|
+
"""
|
|
113
|
+
We need to avoid serializing component[2], which is an ObjectRef.
|
|
114
|
+
"""
|
|
115
|
+
return {'components': [c.as_dict() for c in self.components[0:2]]}
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
119
|
+
assert len(components) == 2
|
|
120
|
+
return cls(components[0], components[1])
|
|
121
|
+
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple, Union
|
|
3
|
+
|
|
4
|
+
import jmespath
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .globals import print_slice
|
|
8
|
+
from .expr import Expr
|
|
9
|
+
from .json_mapper import JsonMapper
|
|
10
|
+
from .data_row import DataRow
|
|
11
|
+
from .row_builder import RowBuilder
|
|
12
|
+
import pixeltable
|
|
13
|
+
import pixeltable.exceptions as excs
|
|
14
|
+
import pixeltable.catalog as catalog
|
|
15
|
+
import pixeltable.type_system as ts
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class JsonPath(Expr):
|
|
19
|
+
def __init__(self, anchor: Optional['pixeltable.exprs.ColumnRef'], path_elements: Optional[List[str]] = None, scope_idx: int = 0):
|
|
20
|
+
"""
|
|
21
|
+
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
22
|
+
scope_idx: for relative paths, index of referenced JsonMapper
|
|
23
|
+
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
24
|
+
"""
|
|
25
|
+
if path_elements is None:
|
|
26
|
+
path_elements = []
|
|
27
|
+
super().__init__(ts.JsonType())
|
|
28
|
+
if anchor is not None:
|
|
29
|
+
self.components = [anchor]
|
|
30
|
+
self.path_elements: List[Union[str, int]] = path_elements
|
|
31
|
+
self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
|
|
32
|
+
self.scope_idx = scope_idx
|
|
33
|
+
# NOTE: the _create_id() result will change if set_anchor() gets called;
|
|
34
|
+
# this is not a problem, because _create_id() shouldn't be called after init()
|
|
35
|
+
self.id = self._create_id()
|
|
36
|
+
|
|
37
|
+
def __str__(self) -> str:
|
|
38
|
+
# else "R": the anchor is RELATIVE_PATH_ROOT
|
|
39
|
+
return (f'{str(self._anchor) if self._anchor is not None else "R"}'
|
|
40
|
+
f'{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}')
|
|
41
|
+
|
|
42
|
+
def _as_dict(self) -> Dict:
|
|
43
|
+
return {'path_elements': self.path_elements, 'scope_idx': self.scope_idx, **super()._as_dict()}
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
47
|
+
assert 'path_elements' in d
|
|
48
|
+
assert 'scope_idx' in d
|
|
49
|
+
assert len(components) <= 1
|
|
50
|
+
anchor = components[0] if len(components) == 1 else None
|
|
51
|
+
return cls(anchor, d['path_elements'], d['scope_idx'])
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def _anchor(self) -> Optional[Expr]:
|
|
55
|
+
return None if len(self.components) == 0 else self.components[0]
|
|
56
|
+
|
|
57
|
+
def set_anchor(self, anchor: Expr) -> None:
|
|
58
|
+
assert len(self.components) == 0
|
|
59
|
+
self.components = [anchor]
|
|
60
|
+
|
|
61
|
+
def is_relative_path(self) -> bool:
|
|
62
|
+
return self._anchor is None
|
|
63
|
+
|
|
64
|
+
def bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
|
|
65
|
+
if not self.is_relative_path():
|
|
66
|
+
return
|
|
67
|
+
# TODO: take scope_idx into account
|
|
68
|
+
self.set_anchor(mapper.scope_anchor)
|
|
69
|
+
|
|
70
|
+
def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
|
|
71
|
+
"""
|
|
72
|
+
Construct a relative path that references an ancestor of the immediately enclosing JsonMapper.
|
|
73
|
+
"""
|
|
74
|
+
if not self.is_relative_path():
|
|
75
|
+
raise excs.Error(f'() for an absolute path is invalid')
|
|
76
|
+
if len(args) != 1 or not isinstance(args[0], int) or args[0] >= 0:
|
|
77
|
+
raise excs.Error(f'R() requires a negative index')
|
|
78
|
+
return JsonPath(None, [], args[0])
|
|
79
|
+
|
|
80
|
+
def __getattr__(self, name: str) -> 'JsonPath':
|
|
81
|
+
assert isinstance(name, str)
|
|
82
|
+
return JsonPath(self._anchor, self.path_elements + [name])
|
|
83
|
+
|
|
84
|
+
def __getitem__(self, index: object) -> 'JsonPath':
|
|
85
|
+
if isinstance(index, str):
|
|
86
|
+
if index != '*':
|
|
87
|
+
raise excs.Error(f'Invalid json list index: {index}')
|
|
88
|
+
else:
|
|
89
|
+
if not isinstance(index, slice) and not isinstance(index, int):
|
|
90
|
+
raise excs.Error(f'Invalid json list index: {index}')
|
|
91
|
+
return JsonPath(self._anchor, self.path_elements + [index])
|
|
92
|
+
|
|
93
|
+
def __rshift__(self, other: object) -> 'JsonMapper':
|
|
94
|
+
rhs_expr = Expr.from_object(other)
|
|
95
|
+
if rhs_expr is None:
|
|
96
|
+
raise excs.Error(f'>> requires an expression on the right-hand side, found {type(other)}')
|
|
97
|
+
return JsonMapper(self, rhs_expr)
|
|
98
|
+
|
|
99
|
+
def default_column_name(self) -> Optional[str]:
|
|
100
|
+
anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
|
|
101
|
+
ret_name = f'{anchor_name}.{self._json_path()}'
|
|
102
|
+
|
|
103
|
+
def cleanup_char(s : str) -> str:
|
|
104
|
+
if s == '.':
|
|
105
|
+
return '_'
|
|
106
|
+
elif s == '*':
|
|
107
|
+
return 'star'
|
|
108
|
+
elif s.isalnum():
|
|
109
|
+
return s
|
|
110
|
+
else:
|
|
111
|
+
return ''
|
|
112
|
+
|
|
113
|
+
clean_name = ''.join(map(cleanup_char, ret_name))
|
|
114
|
+
clean_name = clean_name.lstrip('_') # remove leading underscore
|
|
115
|
+
if clean_name == '':
|
|
116
|
+
clean_name = None
|
|
117
|
+
|
|
118
|
+
assert clean_name is None or catalog.is_valid_identifier(clean_name)
|
|
119
|
+
return clean_name
|
|
120
|
+
|
|
121
|
+
def _equals(self, other: JsonPath) -> bool:
|
|
122
|
+
return self.path_elements == other.path_elements
|
|
123
|
+
|
|
124
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
125
|
+
return super()._id_attrs() + [('path_elements', self.path_elements)]
|
|
126
|
+
|
|
127
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
128
|
+
"""
|
|
129
|
+
Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
|
|
130
|
+
*two* rows (each containing col val 0), not a single row with [0, 0].
|
|
131
|
+
We need to use a workaround: retrieve the entire dict, then use jmespath to extract the path correctly.
|
|
132
|
+
"""
|
|
133
|
+
#path_str = '$.' + '.'.join(self.path_elements)
|
|
134
|
+
#assert isinstance(self._anchor(), ColumnRef)
|
|
135
|
+
#return sql.func.jsonb_path_query(self._anchor().col.sa_col, path_str)
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
def _json_path(self) -> str:
|
|
139
|
+
assert len(self.path_elements) > 0
|
|
140
|
+
result: List[str] = []
|
|
141
|
+
for element in self.path_elements:
|
|
142
|
+
if element == '*':
|
|
143
|
+
result.append('[*]')
|
|
144
|
+
elif isinstance(element, str):
|
|
145
|
+
result.append(f'{"." if len(result) > 0 else ""}{element}')
|
|
146
|
+
elif isinstance(element, int):
|
|
147
|
+
result.append(f'[{element}]')
|
|
148
|
+
elif isinstance(element, slice):
|
|
149
|
+
result.append(f'[{print_slice(element)}]')
|
|
150
|
+
return ''.join(result)
|
|
151
|
+
|
|
152
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
153
|
+
val = data_row[self._anchor.slot_idx]
|
|
154
|
+
if self.compiled_path is not None:
|
|
155
|
+
val = self.compiled_path.search(val)
|
|
156
|
+
data_row[self.slot_idx] = val
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
RELATIVE_PATH_ROOT = JsonPath(None)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from .expr import Expr
|
|
7
|
+
from .data_row import DataRow
|
|
8
|
+
from .row_builder import RowBuilder
|
|
9
|
+
import pixeltable.catalog as catalog
|
|
10
|
+
import pixeltable.type_system as ts
|
|
11
|
+
|
|
12
|
+
class Literal(Expr):
|
|
13
|
+
def __init__(self, val: Any, col_type: Optional[ts.ColumnType] = None):
|
|
14
|
+
if col_type is not None:
|
|
15
|
+
val = col_type.create_literal(val)
|
|
16
|
+
else:
|
|
17
|
+
# try to determine a type for val
|
|
18
|
+
col_type = ts.ColumnType.infer_literal_type(val)
|
|
19
|
+
if col_type is None:
|
|
20
|
+
raise TypeError(f'Not a valid literal: {val}')
|
|
21
|
+
super().__init__(col_type)
|
|
22
|
+
self.val = val
|
|
23
|
+
self.id = self._create_id()
|
|
24
|
+
|
|
25
|
+
def default_column_name(self) -> Optional[str]:
|
|
26
|
+
return 'Literal'
|
|
27
|
+
|
|
28
|
+
def __str__(self) -> str:
|
|
29
|
+
if self.col_type.is_string_type() or self.col_type.is_timestamp_type():
|
|
30
|
+
return f"'{self.val}'"
|
|
31
|
+
return str(self.val)
|
|
32
|
+
|
|
33
|
+
def _equals(self, other: Literal) -> bool:
|
|
34
|
+
return self.val == other.val
|
|
35
|
+
|
|
36
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
37
|
+
return super()._id_attrs() + [('val', self.val)]
|
|
38
|
+
|
|
39
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
40
|
+
# we need to return something here so that we can generate a Where clause for predicates
|
|
41
|
+
# that involve literals (like Where c > 0)
|
|
42
|
+
return sql.sql.expression.literal(self.val)
|
|
43
|
+
|
|
44
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
45
|
+
# this will be called, even though sql_expr() does not return None
|
|
46
|
+
data_row[self.slot_idx] = self.val
|
|
47
|
+
|
|
48
|
+
def _as_dict(self) -> Dict:
|
|
49
|
+
return {'val': self.val, **super()._as_dict()}
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
53
|
+
assert 'val' in d
|
|
54
|
+
return cls(d['val'])
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
import copy
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr, ExprScope
|
|
8
|
+
from .json_mapper import JsonMapper
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
from .row_builder import RowBuilder
|
|
11
|
+
import pixeltable.type_system as ts
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ObjectRef(Expr):
|
|
15
|
+
"""
|
|
16
|
+
Reference to an intermediate result, such as the "scope variable" produced by a JsonMapper.
|
|
17
|
+
The object is generated/materialized elsewhere and establishes a new scope.
|
|
18
|
+
"""
|
|
19
|
+
def __init__(self, scope: ExprScope, owner: JsonMapper):
|
|
20
|
+
# TODO: do we need an Unknown type after all?
|
|
21
|
+
super().__init__(ts.JsonType()) # JsonType: this could be anything
|
|
22
|
+
self._scope = scope
|
|
23
|
+
self.owner = owner
|
|
24
|
+
self.id = self._create_id()
|
|
25
|
+
|
|
26
|
+
def scope(self) -> ExprScope:
|
|
27
|
+
return self._scope
|
|
28
|
+
|
|
29
|
+
def __str__(self) -> str:
|
|
30
|
+
assert False
|
|
31
|
+
|
|
32
|
+
def _equals(self, other: ObjectRef) -> bool:
|
|
33
|
+
return self.owner is other.owner
|
|
34
|
+
|
|
35
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
39
|
+
# this will be called, but the value has already been materialized elsewhere
|
|
40
|
+
pass
|
|
41
|
+
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Tuple, Callable
|
|
3
|
+
|
|
4
|
+
from .expr import Expr
|
|
5
|
+
from .globals import LogicalOperator
|
|
6
|
+
import pixeltable
|
|
7
|
+
import pixeltable.type_system as ts
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Predicate(Expr):
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
super().__init__(ts.BoolType())
|
|
13
|
+
|
|
14
|
+
def split_conjuncts(
|
|
15
|
+
self, condition: Callable[[Predicate], bool]) -> Tuple[List[Predicate], Optional[Predicate]]:
|
|
16
|
+
"""
|
|
17
|
+
Returns clauses of a conjunction that meet condition in the first element.
|
|
18
|
+
The second element contains remaining clauses, rolled into a conjunction.
|
|
19
|
+
"""
|
|
20
|
+
if condition(self):
|
|
21
|
+
return [self], None
|
|
22
|
+
else:
|
|
23
|
+
return [], self
|
|
24
|
+
|
|
25
|
+
def __and__(self, other: object) -> 'pixeltable.exprs.CompoundPredicate':
|
|
26
|
+
if not isinstance(other, Expr):
|
|
27
|
+
raise TypeError(f'Other needs to be an expression: {type(other)}')
|
|
28
|
+
if not other.col_type.is_bool_type():
|
|
29
|
+
raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
|
|
30
|
+
from .compound_predicate import CompoundPredicate
|
|
31
|
+
return CompoundPredicate(LogicalOperator.AND, [self, other])
|
|
32
|
+
|
|
33
|
+
def __or__(self, other: object) -> 'pixeltable.exprs.CompoundPredicate':
|
|
34
|
+
if not isinstance(other, Expr):
|
|
35
|
+
raise TypeError(f'Other needs to be an expression: {type(other)}')
|
|
36
|
+
if not other.col_type.is_bool_type():
|
|
37
|
+
raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
|
|
38
|
+
from .compound_predicate import CompoundPredicate
|
|
39
|
+
return CompoundPredicate(LogicalOperator.OR, [self, other])
|
|
40
|
+
|
|
41
|
+
def __invert__(self) -> 'pixeltable.exprs.CompoundPredicate':
|
|
42
|
+
from .compound_predicate import CompoundPredicate
|
|
43
|
+
return CompoundPredicate(LogicalOperator.NOT, [self])
|
|
44
|
+
|