pixeltable 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+ from typing import Iterable, Optional
3
+
4
+ from .data_row_batch import DataRowBatch
5
+ from .exec_node import ExecNode
6
+ import pixeltable.exprs as exprs
7
+ import pixeltable.exceptions as excs
8
+
9
+
10
+ class MediaValidationNode(ExecNode):
11
+ """Validation of selected media slots
12
+ Records exceptions in the rows of the input batch
13
+ """
14
+ def __init__(
15
+ self, row_builder: exprs.RowBuilder, media_slots: Iterable[exprs.ColumnSlotIdx],
16
+ input: Optional[ExecNode]):
17
+ super().__init__(row_builder, [], [], input)
18
+ self.row_builder = row_builder
19
+ self.input = input
20
+ for col in [c.col for c in media_slots]:
21
+ assert col.col_type.is_media_type()
22
+ self.media_slots = media_slots
23
+
24
+ def __next__(self) -> DataRowBatch:
25
+ assert self.input is not None
26
+ row_batch = next(self.input)
27
+ for row in row_batch:
28
+ for slot_idx, col in [(c.slot_idx, c.col) for c in self.media_slots]:
29
+ if row.has_exc(slot_idx):
30
+ continue
31
+ assert row.has_val[slot_idx]
32
+ path = row.file_paths[slot_idx]
33
+ if path is None:
34
+ continue
35
+
36
+ try:
37
+ col.col_type.validate_media(path)
38
+ except excs.Error as exc:
39
+ self.row_builder.set_exc(row, slot_idx, exc)
40
+ if not self.ctx.ignore_errors:
41
+ raise exc
42
+
43
+ return row_batch
@@ -0,0 +1,225 @@
1
+ from typing import List, Optional, Tuple, Iterable, Set
2
+ from uuid import UUID
3
+ import logging
4
+ import warnings
5
+
6
+ import sqlalchemy as sql
7
+
8
+ from .data_row_batch import DataRowBatch
9
+ from .exec_node import ExecNode
10
+ import pixeltable.exprs as exprs
11
+ import pixeltable.catalog as catalog
12
+
13
+
14
+ _logger = logging.getLogger('pixeltable')
15
+
16
+ class SqlScanNode(ExecNode):
17
+ """Materializes data from the store via SQL
18
+ """
19
+ def __init__(
20
+ self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
21
+ select_list: Iterable[exprs.Expr],
22
+ where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Predicate] = None,
23
+ order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
24
+ similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None,
25
+ limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
26
+ ):
27
+ """
28
+ Args:
29
+ select_list: output of the query
30
+ sql_where_clause: SQL Where clause
31
+ filter: additional Where-clause predicate that can't be evaluated via SQL
32
+ limit: max number of rows to return: 0 = no limit
33
+ set_pk: if True, sets the primary for each DataRow
34
+ exact_version_only: tables for which we only want to see rows created at the current version
35
+ """
36
+ # create Select stmt
37
+ if order_by_items is None:
38
+ order_by_items = []
39
+ if exact_version_only is None:
40
+ exact_version_only = []
41
+ super().__init__(row_builder, [], [], None)
42
+ self.tbl = tbl
43
+ target = tbl.tbl_version # the stored table we're scanning
44
+ self.sql_exprs = exprs.ExprSet(select_list)
45
+ # unstored iter columns: we also need to retrieve whatever is needed to materialize the iter args
46
+ for iter_arg in row_builder.unstored_iter_args.values():
47
+ sql_subexprs = iter_arg.subexprs(filter=lambda e: e.sql_expr() is not None, traverse_matches=False)
48
+ [self.sql_exprs.append(e) for e in sql_subexprs]
49
+ self.filter = filter
50
+ self.filter_eval_ctx = \
51
+ row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
52
+ self.limit = limit
53
+
54
+ # change rowid refs against a base table to rowid refs against the target table, so that we minimize
55
+ # the number of tables that need to be joined to the target table
56
+ for rowid_ref in [e for e in self.sql_exprs if isinstance(e, exprs.RowidRef)]:
57
+ rowid_ref.set_tbl(tbl)
58
+
59
+ where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
60
+ refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
61
+ sql_select_list = [e.sql_expr() for e in self.sql_exprs]
62
+ assert len(sql_select_list) == len(self.sql_exprs)
63
+ assert all([e is not None for e in sql_select_list])
64
+ self.set_pk = set_pk
65
+ self.num_pk_cols = 0
66
+ if set_pk:
67
+ # we also need to retrieve the pk columns
68
+ pk_columns = target.store_tbl.pk_columns()
69
+ self.num_pk_cols = len(pk_columns)
70
+ sql_select_list += pk_columns
71
+
72
+ self.stmt = sql.select(*sql_select_list)
73
+ self.stmt = self.create_from_clause(
74
+ tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
75
+
76
+ # change rowid refs against a base table to rowid refs against the target table, so that we minimize
77
+ # the number of tables that need to be joined to the target table
78
+ for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
79
+ rowid_ref.set_tbl(tbl)
80
+ order_by_clause = [e.sql_expr().desc() if not asc else e.sql_expr() for e, asc in order_by_items]
81
+
82
+ if where_clause is not None:
83
+ sql_where_clause = where_clause.sql_expr()
84
+ assert sql_where_clause is not None
85
+ self.stmt = self.stmt.where(sql_where_clause)
86
+ if similarity_clause is not None:
87
+ self.stmt = self.stmt.order_by(
88
+ similarity_clause.img_col_ref.col.sa_idx_col.l2_distance(similarity_clause.embedding()))
89
+ if len(order_by_clause) > 0:
90
+ self.stmt = self.stmt.order_by(*order_by_clause)
91
+ elif target.id in row_builder.unstored_iter_args:
92
+ # we are referencing unstored iter columns from this view and try to order by our primary key,
93
+ # which ensures that iterators will see monotonically increasing pos values
94
+ self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
95
+ if limit != 0 and self.filter is None:
96
+ # if we need to do post-SQL filtering, we can't use LIMIT
97
+ self.stmt = self.stmt.limit(limit)
98
+
99
+ self.result_cursor: Optional[sql.engine.CursorResult] = None
100
+
101
+ try:
102
+ # log stmt, if possible
103
+ stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
104
+ _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
105
+ except Exception as e:
106
+ pass
107
+
108
+ @classmethod
109
+ def create_from_clause(
110
+ cls, tbl: catalog.TableVersionPath, stmt: sql.Select, refd_tbl_ids: Optional[Set[UUID]] = None,
111
+ exact_version_only: Optional[Set[UUID]] = None
112
+ ) -> sql.Select:
113
+ """Add From clause to stmt for tables/views referenced by materialized_exprs
114
+ Args:
115
+ tbl: root table of join chain
116
+ stmt: stmt to add From clause to
117
+ materialized_exprs: list of exprs that reference tables in the join chain; if empty, include only the root
118
+ exact_version_only: set of table ids for which we only want to see rows created at the current version
119
+ Returns:
120
+ augmented stmt
121
+ """
122
+ # we need to include at least the root
123
+ if refd_tbl_ids is None:
124
+ refd_tbl_ids = {}
125
+ if exact_version_only is None:
126
+ exact_version_only = {}
127
+ candidates = tbl.get_tbl_versions()
128
+ assert len(candidates) > 0
129
+ joined_tbls: List[catalog.TableVersion] = [candidates[0]]
130
+ for tbl in candidates[1:]:
131
+ if tbl.id in refd_tbl_ids:
132
+ joined_tbls.append(tbl)
133
+
134
+ first = True
135
+ for tbl in joined_tbls[::-1]:
136
+ if first:
137
+ stmt = stmt.select_from(tbl.store_tbl.sa_tbl)
138
+ first = False
139
+ else:
140
+ # join tbl to prev_tbl on prev_tbl's rowid cols
141
+ prev_tbl_rowid_cols = prev_tbl.store_tbl.rowid_columns()
142
+ tbl_rowid_cols = tbl.store_tbl.rowid_columns()
143
+ rowid_clauses = \
144
+ [c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[:len(prev_tbl_rowid_cols)])]
145
+ stmt = stmt.join(tbl.store_tbl.sa_tbl, sql.and_(*rowid_clauses))
146
+ if tbl.id in exact_version_only:
147
+ stmt = stmt.where(tbl.store_tbl.v_min_col == tbl.version)
148
+ else:
149
+ stmt = stmt \
150
+ .where(tbl.store_tbl.v_min_col <= tbl.version) \
151
+ .where(tbl.store_tbl.v_max_col > tbl.version)
152
+ prev_tbl = tbl
153
+ return stmt
154
+
155
+ def _log_explain(self, conn: sql.engine.Connection) -> None:
156
+ try:
157
+ # don't set dialect=Env.get().engine.dialect: x % y turns into x %% y, which results in a syntax error
158
+ stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
159
+ explain_result = self.ctx.conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
160
+ explain_str = '\n'.join([str(row) for row in explain_result])
161
+ _logger.debug(f'SqlScanNode explain:\n{explain_str}')
162
+ except Exception as e:
163
+ _logger.warning(f'EXPLAIN failed')
164
+
165
+ def __next__(self) -> DataRowBatch:
166
+ if self.result_cursor is None:
167
+ # run the query; do this here rather than in _open(), exceptions are only expected during iteration
168
+ assert self.ctx.conn is not None
169
+ try:
170
+ self._log_explain(self.ctx.conn)
171
+ with warnings.catch_warnings(record=True) as w:
172
+ self.result_cursor = self.ctx.conn.execute(self.stmt)
173
+ for warning in w:
174
+ pass
175
+ self.has_more_rows = True
176
+ except Exception as e:
177
+ self.has_more_rows = False
178
+ raise e
179
+
180
+ if not self.has_more_rows:
181
+ raise StopIteration
182
+
183
+ output_batch = DataRowBatch(self.tbl.tbl_version, self.row_builder)
184
+ needs_row = True
185
+ while self.ctx.batch_size == 0 or len(output_batch) < self.ctx.batch_size:
186
+ try:
187
+ sql_row = next(self.result_cursor)
188
+ except StopIteration:
189
+ self.has_more_rows = False
190
+ break
191
+
192
+ if needs_row:
193
+ output_row = output_batch.add_row()
194
+ if self.num_pk_cols > 0:
195
+ output_row.set_pk(tuple(sql_row[-self.num_pk_cols:]))
196
+ # copy the output of the SQL query into the output row
197
+ for i, e in enumerate(self.sql_exprs):
198
+ slot_idx = e.slot_idx
199
+ output_row[slot_idx] = sql_row[i]
200
+ if self.filter is not None:
201
+ self.row_builder.eval(output_row, self.filter_eval_ctx, profile=self.ctx.profile)
202
+ if output_row[self.filter.slot_idx]:
203
+ needs_row = True
204
+ if self.limit is not None and len(output_batch) >= self.limit:
205
+ self.has_more_rows = False
206
+ break
207
+ else:
208
+ # we re-use this row for the next sql row if it didn't pass the filter
209
+ needs_row = False
210
+ output_row.clear()
211
+
212
+ if not needs_row:
213
+ # the last row didn't pass the filter
214
+ assert self.filter is not None
215
+ output_batch.pop_row()
216
+
217
+ _logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
218
+ if len(output_batch) == 0:
219
+ raise StopIteration
220
+ return output_batch
221
+
222
+ def _close(self) -> None:
223
+ if self.result_cursor is not None:
224
+ self.result_cursor.close()
225
+
@@ -0,0 +1,24 @@
1
+ from .arithmetic_expr import ArithmeticExpr
2
+ from .array_slice import ArraySlice
3
+ from .column_property_ref import ColumnPropertyRef
4
+ from .column_ref import ColumnRef
5
+ from .comparison import Comparison
6
+ from .compound_predicate import CompoundPredicate
7
+ from .data_row import DataRow
8
+ from .expr import Expr
9
+ from .function_call import FunctionCall
10
+ from .image_member_access import ImageMemberAccess
11
+ from .image_similarity_predicate import ImageSimilarityPredicate
12
+ from .inline_array import InlineArray
13
+ from .inline_dict import InlineDict
14
+ from .is_null import IsNull
15
+ from .json_mapper import JsonMapper
16
+ from .json_path import RELATIVE_PATH_ROOT, JsonPath
17
+ from .literal import Literal
18
+ from .object_ref import ObjectRef
19
+ from .variable import Variable
20
+ from .predicate import Predicate
21
+ from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
22
+ from .rowid_ref import RowidRef
23
+ from .expr_set import ExprSet
24
+ from .type_cast import TypeCast
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from .globals import ArithmeticOperator
7
+ from .expr import Expr
8
+ from .data_row import DataRow
9
+ from .row_builder import RowBuilder
10
+ import pixeltable.exceptions as excs
11
+ import pixeltable.catalog as catalog
12
+ import pixeltable.type_system as ts
13
+
14
+
15
+ class ArithmeticExpr(Expr):
16
+ """
17
+ Allows arithmetic exprs on json paths
18
+ """
19
+ def __init__(self, operator: ArithmeticOperator, op1: Expr, op2: Expr):
20
+ # TODO: determine most specific common supertype
21
+ if op1.col_type.is_json_type() or op2.col_type.is_json_type():
22
+ # we assume it's a float
23
+ super().__init__(ts.FloatType())
24
+ else:
25
+ super().__init__(ts.ColumnType.supertype(op1.col_type, op2.col_type))
26
+ self.operator = operator
27
+ self.components = [op1, op2]
28
+
29
+ # do typechecking after initialization in order for __str__() to work
30
+ if not op1.col_type.is_numeric_type() and not op1.col_type.is_json_type():
31
+ raise excs.Error(f'{self}: {operator} requires numeric types, but {op1} has type {op1.col_type}')
32
+ if not op2.col_type.is_numeric_type() and not op2.col_type.is_json_type():
33
+ raise excs.Error(f'{self}: {operator} requires numeric types, but {op2} has type {op2.col_type}')
34
+
35
+ self.id = self._create_id()
36
+
37
+ def __str__(self) -> str:
38
+ # add parentheses around operands that are ArithmeticExprs to express precedence
39
+ op1_str = f'({self._op1})' if isinstance(self._op1, ArithmeticExpr) else str(self._op1)
40
+ op2_str = f'({self._op2})' if isinstance(self._op2, ArithmeticExpr) else str(self._op2)
41
+ return f'{op1_str} {str(self.operator)} {op2_str}'
42
+
43
+ def _equals(self, other: ArithmeticExpr) -> bool:
44
+ return self.operator == other.operator
45
+
46
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
47
+ return super()._id_attrs() + [('operator', self.operator.value)]
48
+
49
+ @property
50
+ def _op1(self) -> Expr:
51
+ return self.components[0]
52
+
53
+ @property
54
+ def _op2(self) -> Expr:
55
+ return self.components[1]
56
+
57
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
58
+ left = self._op1.sql_expr()
59
+ right = self._op2.sql_expr()
60
+ if left is None or right is None:
61
+ return None
62
+ if self.operator == ArithmeticOperator.ADD:
63
+ return left + right
64
+ if self.operator == ArithmeticOperator.SUB:
65
+ return left - right
66
+ if self.operator == ArithmeticOperator.MUL:
67
+ return left * right
68
+ if self.operator == ArithmeticOperator.DIV:
69
+ return left / right
70
+ if self.operator == ArithmeticOperator.MOD:
71
+ return left % right
72
+
73
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
74
+ op1_val = data_row[self._op1.slot_idx]
75
+ op2_val = data_row[self._op2.slot_idx]
76
+ # check types if we couldn't do that prior to execution
77
+ if self._op1.col_type.is_json_type() and not isinstance(op1_val, int) and not isinstance(op1_val, float):
78
+ raise excs.Error(
79
+ f'{self.operator} requires numeric type, but {self._op1} has type {type(op1_val).__name__}')
80
+ if self._op2.col_type.is_json_type() and not isinstance(op2_val, int) and not isinstance(op2_val, float):
81
+ raise excs.Error(
82
+ f'{self.operator} requires numeric type, but {self._op2} has type {type(op2_val).__name__}')
83
+
84
+ if self.operator == ArithmeticOperator.ADD:
85
+ data_row[self.slot_idx] = op1_val + op2_val
86
+ elif self.operator == ArithmeticOperator.SUB:
87
+ data_row[self.slot_idx] = op1_val - op2_val
88
+ elif self.operator == ArithmeticOperator.MUL:
89
+ data_row[self.slot_idx] = op1_val * op2_val
90
+ elif self.operator == ArithmeticOperator.DIV:
91
+ data_row[self.slot_idx] = op1_val / op2_val
92
+ elif self.operator == ArithmeticOperator.MOD:
93
+ data_row[self.slot_idx] = op1_val % op2_val
94
+
95
+ def _as_dict(self) -> Dict:
96
+ return {'operator': self.operator.value, **super()._as_dict()}
97
+
98
+ @classmethod
99
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
100
+ assert 'operator' in d
101
+ assert len(components) == 2
102
+ return cls(ArithmeticOperator(d['operator']), components[0], components[1])
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ import copy
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .globals import print_slice
9
+ from .data_row import DataRow
10
+ from .row_builder import RowBuilder
11
+ import pixeltable.catalog as catalog
12
+
13
+
14
+ class ArraySlice(Expr):
15
+ """
16
+ Slice operation on an array, eg, t.array_col[:, 1:2].
17
+ """
18
+ def __init__(self, arr: Expr, index: Tuple):
19
+ assert arr.col_type.is_array_type()
20
+ # determine result type
21
+ super().__init__(arr.col_type)
22
+ self.components = [arr]
23
+ self.index = index
24
+ self.id = self._create_id()
25
+
26
+ def __str__(self) -> str:
27
+ index_strs: List[str] = []
28
+ for el in self.index:
29
+ if isinstance(el, int):
30
+ index_strs.append(str(el))
31
+ if isinstance(el, slice):
32
+ index_strs.append(print_slice(el))
33
+ return f'{self._array}[{", ".join(index_strs)}]'
34
+
35
+ @property
36
+ def _array(self) -> Expr:
37
+ return self.components[0]
38
+
39
+ def _equals(self, other: ArraySlice) -> bool:
40
+ return self.index == other.index
41
+
42
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
43
+ return super()._id_attrs() + [('index', self.index)]
44
+
45
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
46
+ return None
47
+
48
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
49
+ val = data_row[self._array.slot_idx]
50
+ data_row[self.slot_idx] = val[self.index]
51
+
52
+ def _as_dict(self) -> Dict:
53
+ index = []
54
+ for el in self.index:
55
+ if isinstance(el, slice):
56
+ index.append([el.start, el.stop, el.step])
57
+ else:
58
+ index.append(el)
59
+ return {'index': index, **super()._as_dict()}
60
+
61
+ @classmethod
62
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
63
+ assert 'index' in d
64
+ index = []
65
+ for el in d['index']:
66
+ if isinstance(el, list):
67
+ index.append(slice(el[0], el[1], el[2]))
68
+ else:
69
+ index.append(el)
70
+ return cls(components[0], tuple(index))
71
+
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ import enum
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .column_ref import ColumnRef
9
+ from .row_builder import RowBuilder
10
+ from .data_row import DataRow
11
+ import pixeltable.catalog as catalog
12
+ import pixeltable.type_system as ts
13
+
14
+
15
+ class ColumnPropertyRef(Expr):
16
+ """A reference to a property of a table column
17
+
18
+ The properties themselves are type-specific and may or may not need to reference the underlying column data.
19
+ """
20
+ class Property(enum.Enum):
21
+ ERRORTYPE = 0
22
+ ERRORMSG = 1
23
+ FILEURL = 2
24
+ LOCALPATH = 3
25
+
26
+ def __init__(self, col_ref: ColumnRef, prop: Property):
27
+ super().__init__(ts.StringType(nullable=True))
28
+ self.components = [col_ref]
29
+ self.prop = prop
30
+ self.id = self._create_id()
31
+
32
+ def default_column_name(self) -> Optional[str]:
33
+ return str(self).replace('.', '_')
34
+
35
+ def _equals(self, other: ColumnRef) -> bool:
36
+ return self.prop == other.prop
37
+
38
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
39
+ return super()._id_attrs() + [('prop', self.prop.value)]
40
+
41
+ @property
42
+ def _col_ref(self) -> ColumnRef:
43
+ return self.components[0]
44
+
45
+ def __str__(self) -> str:
46
+ return f'{self._col_ref}.{self.prop.name.lower()}'
47
+
48
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
49
+ if not self._col_ref.col.is_stored:
50
+ return None
51
+ if self.prop == self.Property.ERRORTYPE:
52
+ assert self._col_ref.col.sa_errortype_col is not None
53
+ return self._col_ref.col.sa_errortype_col
54
+ if self.prop == self.Property.ERRORMSG:
55
+ assert self._col_ref.col.sa_errormsg_col is not None
56
+ return self._col_ref.col.sa_errormsg_col
57
+ if self.prop == self.Property.FILEURL:
58
+ # the file url is stored as the column value
59
+ return self._col_ref.sql_expr()
60
+ return None
61
+
62
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
63
+ assert self.prop == self.Property.FILEURL or self.prop == self.Property.LOCALPATH
64
+ assert data_row.has_val[self._col_ref.slot_idx]
65
+ if self.prop == self.Property.FILEURL:
66
+ data_row[self.slot_idx] = data_row.file_urls[self._col_ref.slot_idx]
67
+ if self.prop == self.Property.LOCALPATH:
68
+ data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
69
+
70
+ def _as_dict(self) -> Dict:
71
+ return {'prop': self.prop.value, **super()._as_dict()}
72
+
73
+ @classmethod
74
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
75
+ assert 'prop' in d
76
+ return cls(components[0], cls.Property(d['prop']))
77
+
@@ -0,0 +1,105 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ from uuid import UUID
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .data_row import DataRow
9
+ from .row_builder import RowBuilder
10
+ import pixeltable.iterators as iters
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.catalog as catalog
13
+
14
+
15
+ class ColumnRef(Expr):
16
+ """A reference to a table column
17
+
18
+ When this reference is created in the context of a view, it can also refer to a column of the view base.
19
+ For that reason, a ColumnRef needs to be serialized with the qualifying table id (column ids are only
20
+ unique in the context of a particular table).
21
+ """
22
+ def __init__(self, col: catalog.Column):
23
+ super().__init__(col.col_type)
24
+ assert col.tbl is not None
25
+ self.col = col
26
+ self.is_unstored_iter_col = \
27
+ col.tbl.is_component_view() and col.tbl.is_iterator_column(col) and not col.is_stored
28
+ self.iter_arg_ctx: Optional[RowBuilder.EvalCtx] = None
29
+ # number of rowid columns in the base table
30
+ self.base_rowid_len = col.tbl.base.num_rowid_columns() if self.is_unstored_iter_col else 0
31
+ self.base_rowid = [None] * self.base_rowid_len
32
+ self.iterator: Optional[iters.ComponentIterator] = None
33
+ # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
34
+ self.pos_idx: Optional[int] = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
35
+ self.id = self._create_id()
36
+
37
+ def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
38
+ self.iter_arg_ctx = iter_arg_ctx
39
+ assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
40
+
41
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
42
+ return super()._id_attrs() + [('tbl_id', self.col.tbl.id), ('col_id', self.col.id)]
43
+
44
+ def __getattr__(self, name: str) -> Expr:
45
+ from .column_property_ref import ColumnPropertyRef
46
+ # resolve column properties
47
+ if name == ColumnPropertyRef.Property.ERRORTYPE.name.lower() \
48
+ or name == ColumnPropertyRef.Property.ERRORMSG.name.lower():
49
+ if not (self.col.is_computed and self.col.is_stored) and not self.col.col_type.is_media_type():
50
+ raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
51
+ return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
52
+ if name == ColumnPropertyRef.Property.FILEURL.name.lower() \
53
+ or name == ColumnPropertyRef.Property.LOCALPATH.name.lower():
54
+ if not self.col.col_type.is_media_type():
55
+ raise excs.Error(f'{name} only valid for image/video/audio/document columns: {self}')
56
+ if self.col.is_computed and not self.col.is_stored:
57
+ raise excs.Error(f'{name} not valid for computed unstored columns: {self}')
58
+ return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
59
+
60
+ if self.col_type.is_json_type():
61
+ from .json_path import JsonPath
62
+ return JsonPath(self, [name])
63
+
64
+ return super().__getattr__(name)
65
+
66
+ def default_column_name(self) -> Optional[str]:
67
+ return str(self)
68
+
69
+ def _equals(self, other: ColumnRef) -> bool:
70
+ return self.col == other.col
71
+
72
+ def __str__(self) -> str:
73
+ return self.col.name
74
+
75
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
76
+ return self.col.sa_col
77
+
78
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
79
+ if not self.is_unstored_iter_col:
80
+ assert data_row.has_val[self.slot_idx]
81
+ return
82
+
83
+ # if this is a new base row, we need to instantiate a new iterator
84
+ if self.base_rowid != data_row.pk[:self.base_rowid_len]:
85
+ row_builder.eval(data_row, self.iter_arg_ctx)
86
+ iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
87
+ self.iterator = self.col.tbl.iterator_cls(**iterator_args)
88
+ self.base_rowid = data_row.pk[:self.base_rowid_len]
89
+ self.iterator.set_pos(data_row.pk[self.pos_idx])
90
+ res = next(self.iterator)
91
+ data_row[self.slot_idx] = res[self.col.name]
92
+
93
+ def _as_dict(self) -> Dict:
94
+ tbl = self.col.tbl
95
+ version = tbl.version if tbl.is_snapshot else None
96
+ return {'tbl_id': str(tbl.id), 'tbl_version': version, 'col_id': self.col.id}
97
+
98
+ @classmethod
99
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
100
+ tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
101
+ tbl_version = catalog.Catalog.get().tbl_versions[(tbl_id, version)]
102
+ assert col_id in tbl_version.cols_by_id
103
+ col = tbl_version.cols_by_id[col_id]
104
+ return cls(col)
105
+