pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (120) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/column.py +37 -11
  5. pixeltable/catalog/globals.py +21 -0
  6. pixeltable/catalog/insertable_table.py +6 -4
  7. pixeltable/catalog/table.py +227 -148
  8. pixeltable/catalog/table_version.py +66 -28
  9. pixeltable/catalog/table_version_path.py +0 -8
  10. pixeltable/catalog/view.py +18 -19
  11. pixeltable/dataframe.py +16 -32
  12. pixeltable/env.py +6 -1
  13. pixeltable/exec/__init__.py +1 -2
  14. pixeltable/exec/aggregation_node.py +27 -17
  15. pixeltable/exec/cache_prefetch_node.py +1 -1
  16. pixeltable/exec/data_row_batch.py +9 -26
  17. pixeltable/exec/exec_node.py +36 -7
  18. pixeltable/exec/expr_eval_node.py +19 -11
  19. pixeltable/exec/in_memory_data_node.py +14 -11
  20. pixeltable/exec/sql_node.py +266 -138
  21. pixeltable/exprs/__init__.py +1 -0
  22. pixeltable/exprs/arithmetic_expr.py +3 -1
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +93 -14
  26. pixeltable/exprs/comparison.py +5 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +56 -36
  29. pixeltable/exprs/expr.py +65 -63
  30. pixeltable/exprs/expr_dict.py +55 -0
  31. pixeltable/exprs/expr_set.py +26 -15
  32. pixeltable/exprs/function_call.py +53 -24
  33. pixeltable/exprs/globals.py +4 -1
  34. pixeltable/exprs/in_predicate.py +8 -7
  35. pixeltable/exprs/inline_expr.py +4 -4
  36. pixeltable/exprs/is_null.py +4 -4
  37. pixeltable/exprs/json_mapper.py +11 -12
  38. pixeltable/exprs/json_path.py +5 -10
  39. pixeltable/exprs/literal.py +5 -5
  40. pixeltable/exprs/method_ref.py +5 -4
  41. pixeltable/exprs/object_ref.py +2 -1
  42. pixeltable/exprs/row_builder.py +88 -36
  43. pixeltable/exprs/rowid_ref.py +14 -13
  44. pixeltable/exprs/similarity_expr.py +12 -7
  45. pixeltable/exprs/sql_element_cache.py +12 -6
  46. pixeltable/exprs/type_cast.py +8 -6
  47. pixeltable/exprs/variable.py +5 -4
  48. pixeltable/ext/functions/whisperx.py +7 -2
  49. pixeltable/func/aggregate_function.py +1 -1
  50. pixeltable/func/callable_function.py +2 -2
  51. pixeltable/func/function.py +11 -10
  52. pixeltable/func/function_registry.py +6 -7
  53. pixeltable/func/query_template_function.py +11 -12
  54. pixeltable/func/signature.py +17 -15
  55. pixeltable/func/udf.py +0 -4
  56. pixeltable/functions/__init__.py +2 -2
  57. pixeltable/functions/audio.py +4 -6
  58. pixeltable/functions/globals.py +84 -42
  59. pixeltable/functions/huggingface.py +31 -34
  60. pixeltable/functions/image.py +59 -45
  61. pixeltable/functions/json.py +0 -1
  62. pixeltable/functions/llama_cpp.py +106 -0
  63. pixeltable/functions/mistralai.py +2 -2
  64. pixeltable/functions/ollama.py +147 -0
  65. pixeltable/functions/openai.py +22 -25
  66. pixeltable/functions/replicate.py +72 -0
  67. pixeltable/functions/string.py +59 -50
  68. pixeltable/functions/timestamp.py +20 -20
  69. pixeltable/functions/together.py +2 -2
  70. pixeltable/functions/video.py +11 -20
  71. pixeltable/functions/whisper.py +2 -20
  72. pixeltable/globals.py +65 -74
  73. pixeltable/index/base.py +2 -2
  74. pixeltable/index/btree.py +20 -7
  75. pixeltable/index/embedding_index.py +12 -14
  76. pixeltable/io/__init__.py +1 -2
  77. pixeltable/io/external_store.py +11 -5
  78. pixeltable/io/fiftyone.py +178 -0
  79. pixeltable/io/globals.py +98 -2
  80. pixeltable/io/hf_datasets.py +1 -1
  81. pixeltable/io/label_studio.py +6 -6
  82. pixeltable/io/parquet.py +14 -13
  83. pixeltable/iterators/base.py +3 -2
  84. pixeltable/iterators/document.py +10 -8
  85. pixeltable/iterators/video.py +126 -60
  86. pixeltable/metadata/__init__.py +4 -3
  87. pixeltable/metadata/converters/convert_14.py +4 -2
  88. pixeltable/metadata/converters/convert_15.py +1 -1
  89. pixeltable/metadata/converters/convert_19.py +1 -0
  90. pixeltable/metadata/converters/convert_20.py +1 -1
  91. pixeltable/metadata/converters/convert_21.py +34 -0
  92. pixeltable/metadata/converters/util.py +54 -12
  93. pixeltable/metadata/notes.py +1 -0
  94. pixeltable/metadata/schema.py +40 -21
  95. pixeltable/plan.py +149 -165
  96. pixeltable/py.typed +0 -0
  97. pixeltable/store.py +57 -37
  98. pixeltable/tool/create_test_db_dump.py +6 -6
  99. pixeltable/tool/create_test_video.py +1 -1
  100. pixeltable/tool/doc_plugins/griffe.py +3 -34
  101. pixeltable/tool/embed_udf.py +1 -1
  102. pixeltable/tool/mypy_plugin.py +55 -0
  103. pixeltable/type_system.py +260 -61
  104. pixeltable/utils/arrow.py +10 -9
  105. pixeltable/utils/coco.py +4 -4
  106. pixeltable/utils/documents.py +16 -2
  107. pixeltable/utils/filecache.py +9 -9
  108. pixeltable/utils/formatter.py +10 -11
  109. pixeltable/utils/http_server.py +2 -5
  110. pixeltable/utils/media_store.py +6 -6
  111. pixeltable/utils/pytorch.py +10 -11
  112. pixeltable/utils/sql.py +2 -1
  113. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
  114. pixeltable-0.2.22.dist-info/RECORD +153 -0
  115. pixeltable/exec/media_validation_node.py +0 -43
  116. pixeltable/utils/help.py +0 -11
  117. pixeltable-0.2.20.dist-info/RECORD +0 -147
  118. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
  119. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
  120. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
@@ -1,15 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, List, Any, Dict, Tuple, Iterable
3
+ from typing import Any, Iterable, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.exceptions as excs
8
- from .sql_element_cache import SqlElementCache
9
8
  import pixeltable.type_system as ts
9
+
10
10
  from .data_row import DataRow
11
11
  from .expr import Expr
12
12
  from .row_builder import RowBuilder
13
+ from .sql_element_cache import SqlElementCache
13
14
 
14
15
 
15
16
  class InPredicate(Expr):
@@ -43,7 +44,7 @@ class InPredicate(Expr):
43
44
  assert len(self.components) == 2
44
45
  return self.components[1]
45
46
 
46
- def _normalize_value_set(self, value_set: Any, filter_type_mismatches: bool = True) -> Iterable:
47
+ def _normalize_value_set(self, value_set: Iterable, filter_type_mismatches: bool = True) -> list:
47
48
  if not isinstance(value_set, Iterable):
48
49
  raise excs.Error(f'isin(): argument must be an Iterable (eg, list, dict, ...), not {value_set!r}')
49
50
  value_list = list(value_set)
@@ -68,10 +69,10 @@ class InPredicate(Expr):
68
69
  def _equals(self, other: InPredicate) -> bool:
69
70
  return self.value_list == other.value_list
70
71
 
71
- def _id_attrs(self) -> List[Tuple[str, Any]]:
72
+ def _id_attrs(self) -> list[tuple[str, Any]]:
72
73
  return super()._id_attrs() + [('value_list', self.value_list)]
73
74
 
74
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
75
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
75
76
  lhs_sql_exprs = sql_elements.get(self.components[0])
76
77
  if lhs_sql_exprs is None or self.value_list is None:
77
78
  return None
@@ -86,11 +87,11 @@ class InPredicate(Expr):
86
87
  value_list = self._normalize_value_set(value_set, filter_type_mismatches=False)
87
88
  data_row[self.slot_idx] = lhs_val in value_list
88
89
 
89
- def _as_dict(self) -> Dict:
90
+ def _as_dict(self) -> dict:
90
91
  return {'value_list': self.value_list, **super()._as_dict()}
91
92
 
92
93
  @classmethod
93
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
94
+ def _from_dict(cls, d: dict, components: list[Expr]) -> InPredicate:
94
95
  assert 'value_list' in d
95
96
  assert len(components) <= 2
96
97
  return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
@@ -73,7 +73,7 @@ class InlineArray(Expr):
73
73
  return super()._as_dict()
74
74
 
75
75
  @classmethod
76
- def _from_dict(cls, _: dict, components: list[Expr]) -> Expr:
76
+ def _from_dict(cls, _: dict, components: list[Expr]) -> InlineArray:
77
77
  try:
78
78
  return cls(components)
79
79
  except excs.Error:
@@ -81,7 +81,7 @@ class InlineArray(Expr):
81
81
  # This is because in schema versions <= 19, `InlineArray` was serialized incorrectly, and
82
82
  # there is no way to determine the correct expression type until the subexpressions are
83
83
  # loaded and their types are known.
84
- return InlineList(components)
84
+ return InlineList(components) # type: ignore[return-value]
85
85
 
86
86
 
87
87
  class InlineList(Expr):
@@ -122,7 +122,7 @@ class InlineList(Expr):
122
122
  return super()._as_dict()
123
123
 
124
124
  @classmethod
125
- def _from_dict(cls, _: dict, components: list[Expr]) -> Expr:
125
+ def _from_dict(cls, _: dict, components: list[Expr]) -> InlineList:
126
126
  return cls(components)
127
127
 
128
128
 
@@ -193,7 +193,7 @@ class InlineDict(Expr):
193
193
  return {'keys': self.keys, **super()._as_dict()}
194
194
 
195
195
  @classmethod
196
- def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
196
+ def _from_dict(cls, d: dict, components: list[Expr]) -> InlineDict:
197
197
  assert 'keys' in d
198
198
  assert len(d['keys']) == len(components)
199
199
  arg = dict(zip(d['keys'], components))
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, List, Dict
3
+ from typing import Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.type_system as ts
8
+
8
9
  from .data_row import DataRow
9
10
  from .expr import Expr
10
11
  from .row_builder import RowBuilder
@@ -23,7 +24,7 @@ class IsNull(Expr):
23
24
  def _equals(self, other: IsNull) -> bool:
24
25
  return True
25
26
 
26
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
27
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
27
28
  e = sql_elements.get(self.components[0])
28
29
  if e is None:
29
30
  return None
@@ -33,7 +34,6 @@ class IsNull(Expr):
33
34
  data_row[self.slot_idx] = data_row[self.components[0].slot_idx] is None
34
35
 
35
36
  @classmethod
36
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
37
+ def _from_dict(cls, d: dict, components: list[Expr]) -> IsNull:
37
38
  assert len(components) == 1
38
39
  return cls(components[0])
39
-
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, List, Dict
3
+ from typing import Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -18,7 +18,7 @@ class JsonMapper(Expr):
18
18
  is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
19
19
  """
20
20
  def __init__(self, src_expr: Expr, target_expr: Expr):
21
- # TODO: type spec should be List[target_expr.col_type]
21
+ # TODO: type spec should be list[target_expr.col_type]
22
22
  super().__init__(ts.JsonType())
23
23
 
24
24
  # we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
@@ -32,7 +32,7 @@ class JsonMapper(Expr):
32
32
  self.target_expr_eval_ctx: Optional[RowBuilder.EvalCtx] = None
33
33
  self.id = self._create_id()
34
34
 
35
- def bind_rel_paths(self, mapper: Optional[JsonMapper]) -> None:
35
+ def bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
36
36
  self._src_expr.bind_rel_paths(mapper)
37
37
  self._target_expr.bind_rel_paths(self)
38
38
  self.parent_mapper = mapper
@@ -43,12 +43,12 @@ class JsonMapper(Expr):
43
43
  # need to ignore target_expr
44
44
  return self._src_expr.scope()
45
45
 
46
- def dependencies(self) -> List[Expr]:
46
+ def dependencies(self) -> list[Expr]:
47
47
  result = [self._src_expr]
48
48
  result.extend(self._target_dependencies(self._target_expr))
49
49
  return result
50
50
 
51
- def _target_dependencies(self, e: Expr) -> List[Expr]:
51
+ def _target_dependencies(self, e: Expr) -> list[Expr]:
52
52
  """
53
53
  Return all subexprs of e of which the scope isn't contained in target_expr_scope.
54
54
  Those need to be evaluated before us.
@@ -56,7 +56,7 @@ class JsonMapper(Expr):
56
56
  expr_scope = e.scope()
57
57
  if not expr_scope.is_contained_in(self.target_expr_scope):
58
58
  return [e]
59
- result: List[Expr] = []
59
+ result: list[Expr] = []
60
60
  for c in e.components:
61
61
  result.extend(self._target_dependencies(c))
62
62
  return result
@@ -84,10 +84,10 @@ class JsonMapper(Expr):
84
84
  def scope_anchor(self) -> Expr:
85
85
  return self.components[2]
86
86
 
87
- def _equals(self, other: JsonMapper) -> bool:
87
+ def _equals(self, _: JsonMapper) -> bool:
88
88
  return True
89
89
 
90
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
90
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
91
91
  return None
92
92
 
93
93
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -104,19 +104,18 @@ class JsonMapper(Expr):
104
104
  for i, val in enumerate(src):
105
105
  data_row[self.scope_anchor.slot_idx] = val
106
106
  # stored target_expr
107
- exc_tb = row_builder.eval(data_row, self.target_expr_eval_ctx)
108
- assert exc_tb is None
107
+ row_builder.eval(data_row, self.target_expr_eval_ctx)
109
108
  result[i] = data_row[self._target_expr.slot_idx]
110
109
  data_row[self.slot_idx] = result
111
110
 
112
- def _as_dict(self) -> Dict:
111
+ def _as_dict(self) -> dict:
113
112
  """
114
113
  We need to avoid serializing component[2], which is an ObjectRef.
115
114
  """
116
115
  return {'components': [c.as_dict() for c in self.components[0:2]]}
117
116
 
118
117
  @classmethod
119
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
118
+ def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
120
119
  assert len(components) == 2
121
120
  return cls(components[0], components[1])
122
121
 
@@ -5,28 +5,23 @@ from typing import Any, Optional, Union
5
5
  import jmespath
6
6
  import sqlalchemy as sql
7
7
 
8
- import pixeltable
8
+ import pixeltable as pxt
9
9
  import pixeltable.catalog as catalog
10
10
  import pixeltable.exceptions as excs
11
11
  import pixeltable.type_system as ts
12
- from .data_row import DataRow
13
- from .expr import Expr
14
- from .globals import print_slice
15
- from .json_mapper import JsonMapper
16
- from .row_builder import RowBuilder
17
- from .sql_element_cache import SqlElementCache
18
12
 
19
13
  from .data_row import DataRow
20
14
  from .expr import Expr
21
15
  from .globals import print_slice
22
16
  from .json_mapper import JsonMapper
23
17
  from .row_builder import RowBuilder
18
+ from .sql_element_cache import SqlElementCache
24
19
 
25
20
 
26
21
  class JsonPath(Expr):
27
22
  def __init__(
28
23
  self,
29
- anchor: Optional['pixeltable.exprs.ColumnRef'],
24
+ anchor: Optional['pxt.exprs.Expr'],
30
25
  path_elements: Optional[list[Union[str, int, slice]]] = None,
31
26
  scope_idx: int = 0
32
27
  ) -> None:
@@ -61,7 +56,7 @@ class JsonPath(Expr):
61
56
  return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **super()._as_dict()}
62
57
 
63
58
  @classmethod
64
- def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
59
+ def _from_dict(cls, d: dict, components: list[Expr]) -> JsonPath:
65
60
  assert 'path_elements' in d
66
61
  assert 'scope_idx' in d
67
62
  assert len(components) <= 1
@@ -143,7 +138,7 @@ class JsonPath(Expr):
143
138
  def _id_attrs(self) -> list[tuple[str, Any]]:
144
139
  return super()._id_attrs() + [('path_elements', self.path_elements)]
145
140
 
146
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
141
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
147
142
  """
148
143
  Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
149
144
  *two* rows (each containing col val 0), not a single row with [0, 0].
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import datetime
4
- from typing import Any, Dict, List, Optional, Tuple
4
+ from typing import Any, Optional
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
@@ -54,10 +54,10 @@ class Literal(Expr):
54
54
  def _equals(self, other: Literal) -> bool:
55
55
  return self.val == other.val
56
56
 
57
- def _id_attrs(self) -> List[Tuple[str, Any]]:
57
+ def _id_attrs(self) -> list[tuple[str, Any]]:
58
58
  return super()._id_attrs() + [('val', self.val)]
59
59
 
60
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
60
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
61
61
  # we need to return something here so that we can generate a Where clause for predicates
62
62
  # that involve literals (like Where c > 0)
63
63
  return sql.sql.expression.literal(self.val)
@@ -66,7 +66,7 @@ class Literal(Expr):
66
66
  # this will be called, even though sql_expr() does not return None
67
67
  data_row[self.slot_idx] = self.val
68
68
 
69
- def _as_dict(self) -> Dict:
69
+ def _as_dict(self) -> dict:
70
70
  # For some types, we need to explictly record their type, because JSON does not know
71
71
  # how to interpret them unambiguously
72
72
  if self.col_type.is_timestamp_type():
@@ -80,7 +80,7 @@ class Literal(Expr):
80
80
  return {'val': self.val, **super()._as_dict()}
81
81
 
82
82
  @classmethod
83
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
83
+ def _from_dict(cls, d: dict, components: list[Expr]) -> Literal:
84
84
  assert 'val' in d
85
85
  if 'val_t' in d:
86
86
  val_t = d['val_t']
@@ -5,6 +5,7 @@ import sqlalchemy as sql
5
5
  import pixeltable.type_system as ts
6
6
  from pixeltable.exprs import Expr, FunctionCall
7
7
  from pixeltable.func import FunctionRegistry
8
+
8
9
  from .data_row import DataRow
9
10
  from .row_builder import RowBuilder
10
11
  from .sql_element_cache import SqlElementCache
@@ -36,10 +37,10 @@ class MethodRef(Expr):
36
37
  return {'method_name': self.method_name, **super()._as_dict()}
37
38
 
38
39
  @classmethod
39
- def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
40
+ def _from_dict(cls, d: dict, components: list[Expr]) -> 'MethodRef':
40
41
  assert 'method_name' in d
41
42
  assert len(components) == 1
42
- return cls(d['method_name'], components[0])
43
+ return cls(components[0], d['method_name'])
43
44
 
44
45
  def __call__(self, *args, **kwargs) -> FunctionCall:
45
46
  result = self.fn(*[self.base_expr, *args], **kwargs)
@@ -48,12 +49,12 @@ class MethodRef(Expr):
48
49
  return result
49
50
 
50
51
  def _equals(self, other: 'MethodRef') -> bool:
51
- return self.base_expr == other.base_expr and self.method_name == other.method_name
52
+ return self.base_expr.id == other.base_expr.id and self.method_name == other.method_name
52
53
 
53
54
  def _id_attrs(self) -> list[tuple[str, Any]]:
54
55
  return super()._id_attrs() + [('method_name', self.method_name)]
55
56
 
56
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
57
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
57
58
  return None
58
59
 
59
60
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -5,6 +5,7 @@ from typing import Optional
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.type_system as ts
8
+
8
9
  from .data_row import DataRow
9
10
  from .expr import Expr, ExprScope
10
11
  from .json_mapper import JsonMapper
@@ -33,7 +34,7 @@ class ObjectRef(Expr):
33
34
  def _equals(self, other: ObjectRef) -> bool:
34
35
  return self.owner is other.owner
35
36
 
36
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
37
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
37
38
  return None
38
39
 
39
40
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -3,7 +3,8 @@ from __future__ import annotations
3
3
  import sys
4
4
  import time
5
5
  from dataclasses import dataclass
6
- from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
6
+ from typing import Any, Iterable, Optional, Sequence
7
+ from uuid import UUID
7
8
 
8
9
  import sqlalchemy as sql
9
10
 
@@ -11,7 +12,6 @@ import pixeltable.catalog as catalog
11
12
  import pixeltable.exceptions as excs
12
13
  import pixeltable.func as func
13
14
  import pixeltable.utils as utils
14
-
15
15
  from .data_row import DataRow
16
16
  from .expr import Expr
17
17
  from .expr_set import ExprSet
@@ -48,14 +48,37 @@ class RowBuilder:
48
48
  For ColumnRefs to unstored iterator columns:
49
49
  - in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
50
50
  """
51
+ unique_exprs: ExprSet
52
+ next_slot_idx: int
53
+ input_expr_slot_idxs: set[int]
54
+
55
+ # output exprs: all exprs the caller wants to materialize
56
+ # - explicitly requested output_exprs
57
+ # - values for computed columns
58
+ output_exprs: ExprSet
59
+
60
+ input_exprs: ExprSet
61
+
62
+ table_columns: list[ColumnSlotIdx]
63
+ default_eval_ctx: EvalCtx
64
+ unstored_iter_args: dict[UUID, Expr]
65
+
66
+ # transitive dependents for the purpose of exception propagation: an exception for slot i is propagated to
67
+ # _exc_dependents[i]
68
+ # (list of set of slot_idxs, indexed by slot_idx)
69
+ _exc_dependents: list[set[int]]
70
+
71
+ # records the output_expr that a subexpr belongs to
72
+ # (a subexpr can be shared across multiple output exprs)
73
+ output_expr_ids: list[set[int]]
51
74
 
52
75
  @dataclass
53
76
  class EvalCtx:
54
77
  """Context for evaluating a set of target exprs"""
55
- slot_idxs: List[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
56
- exprs: List[Expr] # exprs corresponding to slot_idxs
57
- target_slot_idxs: List[int] # slot idxs of target exprs; might contain duplicates
58
- target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
78
+ slot_idxs: list[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
79
+ exprs: list[Expr] # exprs corresponding to slot_idxs
80
+ target_slot_idxs: list[int] # slot idxs of target exprs; might contain duplicates
81
+ target_exprs: list[Expr] # exprs corresponding to target_slot_idxs
59
82
 
60
83
  def __init__(
61
84
  self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Iterable[Expr]
@@ -67,35 +90,56 @@ class RowBuilder:
67
90
  input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
68
91
  TODO: enforce that output_exprs doesn't overlap with input_exprs?
69
92
  """
70
- self.unique_exprs = ExprSet() # dependencies precede their dependents
93
+ self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
71
94
  self.next_slot_idx = 0
72
95
 
73
96
  # record input and output exprs; make copies to avoid reusing execution state
74
97
  unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
75
98
  self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
76
99
 
77
- # output exprs: all exprs the caller wants to materialize
78
- # - explicitly requested output_exprs
79
- # - values for computed columns
80
100
  resolve_cols = set(columns)
81
101
  self.output_exprs = ExprSet([
82
102
  self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
83
103
  for e in output_exprs
84
104
  ])
85
105
 
86
- # record columns for create_table_row()
106
+ # if init(columns):
107
+ # - we are creating table rows and need to record columns for create_table_row()
108
+ # - output_exprs materialize those columns
109
+ # - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
110
+ # - media validation:
111
+ # * for write-validated columns, we need to create validating ColumnRefs
112
+ # * further references to that column (eg, computed cols) need to resolve to the validating ColumnRef
87
113
  from .column_ref import ColumnRef
88
- self.table_columns: List[ColumnSlotIdx] = []
114
+ self.table_columns: list[ColumnSlotIdx] = []
115
+ self.input_exprs = ExprSet()
116
+ validating_colrefs: dict[Expr, Expr] = {} # key: non-validating colref, value: corresp. validating colref
89
117
  for col in columns:
118
+ expr: Expr
90
119
  if col.is_computed:
91
120
  assert col.value_expr is not None
92
121
  # create a copy here so we don't reuse execution state and resolve references to computed columns
93
122
  expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
123
+ expr = expr.substitute(validating_colrefs)
94
124
  expr = self._record_unique_expr(expr, recursive=True)
95
125
  else:
96
126
  # record a ColumnRef so that references to this column resolve to the same slot idx
97
- expr = ColumnRef(col)
98
- expr = self._record_unique_expr(expr, recursive=False)
127
+ perform_validation = (
128
+ None if not col.col_type.is_media_type()
129
+ else col.media_validation == catalog.MediaValidation.ON_WRITE
130
+ )
131
+ expr = ColumnRef(col, perform_validation=perform_validation)
132
+ # recursive=True: needed for validating ColumnRef
133
+ expr = self._record_unique_expr(expr, recursive=True)
134
+
135
+ if perform_validation:
136
+ # if expr is a validating ColumnRef, the input is the non-validating ColumnRef
137
+ non_validating_colref = expr.components[0]
138
+ self.input_exprs.add(non_validating_colref)
139
+ validating_colrefs[non_validating_colref] = expr
140
+ else:
141
+ self.input_exprs.add(expr)
142
+
99
143
  self.add_table_column(col, expr.slot_idx)
100
144
  self.output_exprs.add(expr)
101
145
 
@@ -118,8 +162,9 @@ class RowBuilder:
118
162
  unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
119
163
  component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
120
164
  unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
121
- self.unstored_iter_args = \
122
- {id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()}
165
+ self.unstored_iter_args = {
166
+ id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()
167
+ }
123
168
 
124
169
  for col_ref in unstored_iter_col_refs:
125
170
  iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.tbl.id]])
@@ -129,25 +174,28 @@ class RowBuilder:
129
174
  for i, expr in enumerate(self.unique_exprs):
130
175
  assert expr.slot_idx == i
131
176
 
132
- # record transitive dependencies (list of set of slot_idxs, indexed by slot_idx)
133
- self.dependencies: List[Set[int]] = [set() for _ in range(self.num_materialized)]
177
+ # determine transitive dependencies for the purpose of exception propagation
178
+ # (list of set of slot_idxs, indexed by slot_idx)
179
+ exc_dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)]
180
+ from .column_property_ref import ColumnPropertyRef
134
181
  for expr in self.unique_exprs:
135
182
  if expr.slot_idx in self.input_expr_slot_idxs:
136
183
  # this is input and therefore doesn't depend on other exprs
137
184
  continue
185
+ # error properties don't have exceptions themselves
186
+ if isinstance(expr, ColumnPropertyRef) and expr.is_error_prop():
187
+ continue
138
188
  for d in expr.dependencies():
139
- self.dependencies[expr.slot_idx].add(d.slot_idx)
140
- self.dependencies[expr.slot_idx].update(self.dependencies[d.slot_idx])
189
+ exc_dependencies[expr.slot_idx].add(d.slot_idx)
190
+ exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
141
191
 
142
- # derive transitive dependents
143
- self.dependents: List[Set[int]] = [set() for _ in range(self.num_materialized)]
192
+ self._exc_dependents = [set() for _ in range(self.num_materialized)]
144
193
  for expr in self.unique_exprs:
145
- for d in self.dependencies[expr.slot_idx]:
146
- self.dependents[d].add(expr.slot_idx)
194
+ assert expr.slot_idx is not None
195
+ for d_idx in exc_dependencies[expr.slot_idx]:
196
+ self._exc_dependents[d_idx].add(expr.slot_idx)
147
197
 
148
- # records the output_expr that a subexpr belongs to
149
- # (a subexpr can be shared across multiple output exprs)
150
- self.output_expr_ids: List[Set[int]] = [set() for _ in range(self.num_materialized)]
198
+ self.output_expr_ids = [set() for _ in range(self.num_materialized)]
151
199
  for e in self.output_exprs:
152
200
  self._record_output_expr_id(e, e.slot_idx)
153
201
 
@@ -155,7 +203,7 @@ class RowBuilder:
155
203
  """Record a column that is part of the table row"""
156
204
  self.table_columns.append(ColumnSlotIdx(col, slot_idx))
157
205
 
158
- def output_slot_idxs(self) -> List[ColumnSlotIdx]:
206
+ def output_slot_idxs(self) -> list[ColumnSlotIdx]:
159
207
  """Return ColumnSlotIdx for output columns"""
160
208
  return self.table_columns
161
209
 
@@ -206,9 +254,9 @@ class RowBuilder:
206
254
  for d in e.dependencies():
207
255
  self._record_output_expr_id(d, output_expr_id)
208
256
 
209
- def _compute_dependencies(self, target_slot_idxs: List[int], excluded_slot_idxs: List[int]) -> List[int]:
257
+ def _compute_dependencies(self, target_slot_idxs: list[int], excluded_slot_idxs: list[int]) -> list[int]:
210
258
  """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
211
- dependencies = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
259
+ dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
212
260
  # doing this front-to-back ensures that we capture transitive dependencies
213
261
  max_target_slot_idx = max(target_slot_idxs)
214
262
  for expr in self.unique_exprs:
@@ -237,6 +285,8 @@ class RowBuilder:
237
285
  for e in expr_list:
238
286
  self.__set_slot_idxs_aux(e)
239
287
  if remove_duplicates:
288
+ # only allowed if `expr_list` is a mutable list
289
+ assert isinstance(expr_list, list)
240
290
  deduped = list(ExprSet(expr_list))
241
291
  expr_list[:] = deduped
242
292
 
@@ -248,13 +298,14 @@ class RowBuilder:
248
298
  for c in e.components:
249
299
  self.__set_slot_idxs_aux(c)
250
300
 
251
- def get_dependencies(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> List[Expr]:
301
+ def get_dependencies(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> list[Expr]:
252
302
  """
253
303
  Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
254
304
  The exprs given in 'exclude' are excluded.
255
305
  Returns:
256
306
  list of Exprs from unique_exprs (= with slot_idx set)
257
307
  """
308
+ targets = list(targets)
258
309
  if exclude is None:
259
310
  exclude = []
260
311
  if len(targets) == 0:
@@ -270,8 +321,9 @@ class RowBuilder:
270
321
  result_ids.sort()
271
322
  return [self.unique_exprs[id] for id in result_ids]
272
323
 
273
- def create_eval_ctx(self, targets: list[Expr], exclude: Optional[list[Expr]] = None) -> EvalCtx:
324
+ def create_eval_ctx(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> EvalCtx:
274
325
  """Return EvalCtx for targets"""
326
+ targets = list(targets)
275
327
  if exclude is None:
276
328
  exclude = []
277
329
  if len(targets) == 0:
@@ -287,7 +339,7 @@ class RowBuilder:
287
339
  def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
288
340
  """Record an exception in data_row and propagate it to dependents"""
289
341
  data_row.set_exc(slot_idx, exc)
290
- for slot_idx in self.dependents[slot_idx]:
342
+ for slot_idx in self._exc_dependents[slot_idx]:
291
343
  data_row.set_exc(slot_idx, exc)
292
344
 
293
345
  def eval(
@@ -318,14 +370,14 @@ class RowBuilder:
318
370
  raise excs.ExprEvalError(
319
371
  expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
320
372
 
321
- def create_table_row(self, data_row: DataRow, exc_col_ids: Set[int]) -> Tuple[Dict[str, Any], int]:
373
+ def create_table_row(self, data_row: DataRow, exc_col_ids: set[int]) -> tuple[dict[str, Any], int]:
322
374
  """Create a table row from the slots that have an output column assigned
323
375
 
324
- Return Tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
376
+ Return tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
325
377
  This excludes system columns.
326
378
  """
327
379
  num_excs = 0
328
- table_row: Dict[str, Any] = {}
380
+ table_row: dict[str, Any] = {}
329
381
  for info in self.table_columns:
330
382
  col, slot_idx = info.col, info.slot_idx
331
383
  if data_row.has_exc(slot_idx):