pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,226 @@
1
+ from typing import List, Optional, Tuple, Iterable, Set
2
+ from uuid import UUID
3
+ import logging
4
+ import warnings
5
+
6
+ import sqlalchemy as sql
7
+
8
+ from .data_row_batch import DataRowBatch
9
+ from .exec_node import ExecNode
10
+ import pixeltable.exprs as exprs
11
+ import pixeltable.catalog as catalog
12
+
13
+
14
+ _logger = logging.getLogger('pixeltable')
15
+
16
+ class SqlScanNode(ExecNode):
17
+ """Materializes data from the store via SQL
18
+ """
19
+ def __init__(
20
+ self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
21
+ select_list: Iterable[exprs.Expr],
22
+ where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Predicate] = None,
23
+ order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
24
+ limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
25
+ ):
26
+ """
27
+ Args:
28
+ select_list: output of the query
29
+ sql_where_clause: SQL Where clause
30
+ filter: additional Where-clause predicate that can't be evaluated via SQL
31
+ limit: max number of rows to return: 0 = no limit
32
+ set_pk: if True, sets the primary for each DataRow
33
+ exact_version_only: tables for which we only want to see rows created at the current version
34
+ """
35
+ # create Select stmt
36
+ if order_by_items is None:
37
+ order_by_items = []
38
+ if exact_version_only is None:
39
+ exact_version_only = []
40
+ super().__init__(row_builder, [], [], None)
41
+ self.tbl = tbl
42
+ target = tbl.tbl_version # the stored table we're scanning
43
+ self.sql_exprs = exprs.ExprSet(select_list)
44
+ # unstored iter columns: we also need to retrieve whatever is needed to materialize the iter args
45
+ for iter_arg in row_builder.unstored_iter_args.values():
46
+ sql_subexprs = iter_arg.subexprs(filter=lambda e: e.sql_expr() is not None, traverse_matches=False)
47
+ [self.sql_exprs.append(e) for e in sql_subexprs]
48
+ self.filter = filter
49
+ self.filter_eval_ctx = \
50
+ row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
51
+ self.limit = limit
52
+
53
+ # change rowid refs against a base table to rowid refs against the target table, so that we minimize
54
+ # the number of tables that need to be joined to the target table
55
+ for rowid_ref in [e for e in self.sql_exprs if isinstance(e, exprs.RowidRef)]:
56
+ rowid_ref.set_tbl(tbl)
57
+
58
+ where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
59
+ refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
60
+ sql_select_list = [e.sql_expr() for e in self.sql_exprs]
61
+ assert len(sql_select_list) == len(self.sql_exprs)
62
+ assert all([e is not None for e in sql_select_list])
63
+ self.set_pk = set_pk
64
+ self.num_pk_cols = 0
65
+ if set_pk:
66
+ # we also need to retrieve the pk columns
67
+ pk_columns = target.store_tbl.pk_columns()
68
+ self.num_pk_cols = len(pk_columns)
69
+ sql_select_list += pk_columns
70
+
71
+ self.stmt = sql.select(*sql_select_list)
72
+ self.stmt = self.create_from_clause(
73
+ tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
74
+
75
+ # change rowid refs against a base table to rowid refs against the target table, so that we minimize
76
+ # the number of tables that need to be joined to the target table
77
+ for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
78
+ rowid_ref.set_tbl(tbl)
79
+ order_by_clause: List[sql.ClauseElement] = []
80
+ for e, asc in order_by_items:
81
+ if isinstance(e, exprs.SimilarityExpr):
82
+ order_by_clause.append(e.as_order_by_clause(asc))
83
+ else:
84
+ order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
85
+
86
+ if where_clause is not None:
87
+ sql_where_clause = where_clause.sql_expr()
88
+ assert sql_where_clause is not None
89
+ self.stmt = self.stmt.where(sql_where_clause)
90
+ if len(order_by_clause) > 0:
91
+ self.stmt = self.stmt.order_by(*order_by_clause)
92
+ elif target.id in row_builder.unstored_iter_args:
93
+ # we are referencing unstored iter columns from this view and try to order by our primary key,
94
+ # which ensures that iterators will see monotonically increasing pos values
95
+ self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
96
+ if limit != 0 and self.filter is None:
97
+ # if we need to do post-SQL filtering, we can't use LIMIT
98
+ self.stmt = self.stmt.limit(limit)
99
+
100
+ self.result_cursor: Optional[sql.engine.CursorResult] = None
101
+
102
+ try:
103
+ # log stmt, if possible
104
+ stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
105
+ _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
106
+ except Exception as e:
107
+ pass
108
+
109
+ @classmethod
110
+ def create_from_clause(
111
+ cls, tbl: catalog.TableVersionPath, stmt: sql.Select, refd_tbl_ids: Optional[Set[UUID]] = None,
112
+ exact_version_only: Optional[Set[UUID]] = None
113
+ ) -> sql.Select:
114
+ """Add From clause to stmt for tables/views referenced by materialized_exprs
115
+ Args:
116
+ tbl: root table of join chain
117
+ stmt: stmt to add From clause to
118
+ materialized_exprs: list of exprs that reference tables in the join chain; if empty, include only the root
119
+ exact_version_only: set of table ids for which we only want to see rows created at the current version
120
+ Returns:
121
+ augmented stmt
122
+ """
123
+ # we need to include at least the root
124
+ if refd_tbl_ids is None:
125
+ refd_tbl_ids = {}
126
+ if exact_version_only is None:
127
+ exact_version_only = {}
128
+ candidates = tbl.get_tbl_versions()
129
+ assert len(candidates) > 0
130
+ joined_tbls: List[catalog.TableVersion] = [candidates[0]]
131
+ for tbl in candidates[1:]:
132
+ if tbl.id in refd_tbl_ids:
133
+ joined_tbls.append(tbl)
134
+
135
+ first = True
136
+ for tbl in joined_tbls[::-1]:
137
+ if first:
138
+ stmt = stmt.select_from(tbl.store_tbl.sa_tbl)
139
+ first = False
140
+ else:
141
+ # join tbl to prev_tbl on prev_tbl's rowid cols
142
+ prev_tbl_rowid_cols = prev_tbl.store_tbl.rowid_columns()
143
+ tbl_rowid_cols = tbl.store_tbl.rowid_columns()
144
+ rowid_clauses = \
145
+ [c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[:len(prev_tbl_rowid_cols)])]
146
+ stmt = stmt.join(tbl.store_tbl.sa_tbl, sql.and_(*rowid_clauses))
147
+ if tbl.id in exact_version_only:
148
+ stmt = stmt.where(tbl.store_tbl.v_min_col == tbl.version)
149
+ else:
150
+ stmt = stmt \
151
+ .where(tbl.store_tbl.v_min_col <= tbl.version) \
152
+ .where(tbl.store_tbl.v_max_col > tbl.version)
153
+ prev_tbl = tbl
154
+ return stmt
155
+
156
+ def _log_explain(self, conn: sql.engine.Connection) -> None:
157
+ try:
158
+ # don't set dialect=Env.get().engine.dialect: x % y turns into x %% y, which results in a syntax error
159
+ stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
160
+ explain_result = self.ctx.conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
161
+ explain_str = '\n'.join([str(row) for row in explain_result])
162
+ _logger.debug(f'SqlScanNode explain:\n{explain_str}')
163
+ except Exception as e:
164
+ _logger.warning(f'EXPLAIN failed')
165
+
166
+ def __next__(self) -> DataRowBatch:
167
+ if self.result_cursor is None:
168
+ # run the query; do this here rather than in _open(), exceptions are only expected during iteration
169
+ assert self.ctx.conn is not None
170
+ try:
171
+ self._log_explain(self.ctx.conn)
172
+ with warnings.catch_warnings(record=True) as w:
173
+ self.result_cursor = self.ctx.conn.execute(self.stmt)
174
+ for warning in w:
175
+ pass
176
+ self.has_more_rows = True
177
+ except Exception as e:
178
+ self.has_more_rows = False
179
+ raise e
180
+
181
+ if not self.has_more_rows:
182
+ raise StopIteration
183
+
184
+ output_batch = DataRowBatch(self.tbl.tbl_version, self.row_builder)
185
+ needs_row = True
186
+ while self.ctx.batch_size == 0 or len(output_batch) < self.ctx.batch_size:
187
+ try:
188
+ sql_row = next(self.result_cursor)
189
+ except StopIteration:
190
+ self.has_more_rows = False
191
+ break
192
+
193
+ if needs_row:
194
+ output_row = output_batch.add_row()
195
+ if self.num_pk_cols > 0:
196
+ output_row.set_pk(tuple(sql_row[-self.num_pk_cols:]))
197
+ # copy the output of the SQL query into the output row
198
+ for i, e in enumerate(self.sql_exprs):
199
+ slot_idx = e.slot_idx
200
+ output_row[slot_idx] = sql_row[i]
201
+ if self.filter is not None:
202
+ self.row_builder.eval(output_row, self.filter_eval_ctx, profile=self.ctx.profile)
203
+ if output_row[self.filter.slot_idx]:
204
+ needs_row = True
205
+ if self.limit > 0 and len(output_batch) >= self.limit:
206
+ self.has_more_rows = False
207
+ break
208
+ else:
209
+ # we re-use this row for the next sql row if it didn't pass the filter
210
+ needs_row = False
211
+ output_row.clear()
212
+
213
+ if not needs_row:
214
+ # the last row didn't pass the filter
215
+ assert self.filter is not None
216
+ output_batch.pop_row()
217
+
218
+ _logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
219
+ if len(output_batch) == 0:
220
+ raise StopIteration
221
+ return output_batch
222
+
223
+ def _close(self) -> None:
224
+ if self.result_cursor is not None:
225
+ self.result_cursor.close()
226
+
@@ -0,0 +1,25 @@
1
+ from .arithmetic_expr import ArithmeticExpr
2
+ from .array_slice import ArraySlice
3
+ from .column_property_ref import ColumnPropertyRef
4
+ from .column_ref import ColumnRef
5
+ from .comparison import Comparison
6
+ from .compound_predicate import CompoundPredicate
7
+ from .data_row import DataRow
8
+ from .expr import Expr
9
+ from .expr_set import ExprSet
10
+ from .function_call import FunctionCall
11
+ from .image_member_access import ImageMemberAccess
12
+ from .in_predicate import InPredicate
13
+ from .inline_array import InlineArray
14
+ from .inline_dict import InlineDict
15
+ from .is_null import IsNull
16
+ from .json_mapper import JsonMapper
17
+ from .json_path import RELATIVE_PATH_ROOT, JsonPath
18
+ from .literal import Literal
19
+ from .object_ref import ObjectRef
20
+ from .predicate import Predicate
21
+ from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
22
+ from .rowid_ref import RowidRef
23
+ from .similarity_expr import SimilarityExpr
24
+ from .type_cast import TypeCast
25
+ from .variable import Variable
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from .globals import ArithmeticOperator
7
+ from .expr import Expr
8
+ from .data_row import DataRow
9
+ from .row_builder import RowBuilder
10
+ import pixeltable.exceptions as excs
11
+ import pixeltable.catalog as catalog
12
+ import pixeltable.type_system as ts
13
+
14
+
15
+ class ArithmeticExpr(Expr):
16
+ """
17
+ Allows arithmetic exprs on json paths
18
+ """
19
+ def __init__(self, operator: ArithmeticOperator, op1: Expr, op2: Expr):
20
+ # TODO: determine most specific common supertype
21
+ if op1.col_type.is_json_type() or op2.col_type.is_json_type():
22
+ # we assume it's a float
23
+ super().__init__(ts.FloatType())
24
+ else:
25
+ super().__init__(ts.ColumnType.supertype(op1.col_type, op2.col_type))
26
+ self.operator = operator
27
+ self.components = [op1, op2]
28
+
29
+ # do typechecking after initialization in order for __str__() to work
30
+ if not op1.col_type.is_numeric_type() and not op1.col_type.is_json_type():
31
+ raise excs.Error(f'{self}: {operator} requires numeric types, but {op1} has type {op1.col_type}')
32
+ if not op2.col_type.is_numeric_type() and not op2.col_type.is_json_type():
33
+ raise excs.Error(f'{self}: {operator} requires numeric types, but {op2} has type {op2.col_type}')
34
+
35
+ self.id = self._create_id()
36
+
37
+ def __str__(self) -> str:
38
+ # add parentheses around operands that are ArithmeticExprs to express precedence
39
+ op1_str = f'({self._op1})' if isinstance(self._op1, ArithmeticExpr) else str(self._op1)
40
+ op2_str = f'({self._op2})' if isinstance(self._op2, ArithmeticExpr) else str(self._op2)
41
+ return f'{op1_str} {str(self.operator)} {op2_str}'
42
+
43
+ def _equals(self, other: ArithmeticExpr) -> bool:
44
+ return self.operator == other.operator
45
+
46
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
47
+ return super()._id_attrs() + [('operator', self.operator.value)]
48
+
49
+ @property
50
+ def _op1(self) -> Expr:
51
+ return self.components[0]
52
+
53
+ @property
54
+ def _op2(self) -> Expr:
55
+ return self.components[1]
56
+
57
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
58
+ left = self._op1.sql_expr()
59
+ right = self._op2.sql_expr()
60
+ if left is None or right is None:
61
+ return None
62
+ if self.operator == ArithmeticOperator.ADD:
63
+ return left + right
64
+ if self.operator == ArithmeticOperator.SUB:
65
+ return left - right
66
+ if self.operator == ArithmeticOperator.MUL:
67
+ return left * right
68
+ if self.operator == ArithmeticOperator.DIV:
69
+ return left / right
70
+ if self.operator == ArithmeticOperator.MOD:
71
+ return left % right
72
+
73
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
74
+ op1_val = data_row[self._op1.slot_idx]
75
+ op2_val = data_row[self._op2.slot_idx]
76
+ # check types if we couldn't do that prior to execution
77
+ if self._op1.col_type.is_json_type() and not isinstance(op1_val, int) and not isinstance(op1_val, float):
78
+ raise excs.Error(
79
+ f'{self.operator} requires numeric type, but {self._op1} has type {type(op1_val).__name__}')
80
+ if self._op2.col_type.is_json_type() and not isinstance(op2_val, int) and not isinstance(op2_val, float):
81
+ raise excs.Error(
82
+ f'{self.operator} requires numeric type, but {self._op2} has type {type(op2_val).__name__}')
83
+
84
+ if self.operator == ArithmeticOperator.ADD:
85
+ data_row[self.slot_idx] = op1_val + op2_val
86
+ elif self.operator == ArithmeticOperator.SUB:
87
+ data_row[self.slot_idx] = op1_val - op2_val
88
+ elif self.operator == ArithmeticOperator.MUL:
89
+ data_row[self.slot_idx] = op1_val * op2_val
90
+ elif self.operator == ArithmeticOperator.DIV:
91
+ data_row[self.slot_idx] = op1_val / op2_val
92
+ elif self.operator == ArithmeticOperator.MOD:
93
+ data_row[self.slot_idx] = op1_val % op2_val
94
+
95
+ def _as_dict(self) -> Dict:
96
+ return {'operator': self.operator.value, **super()._as_dict()}
97
+
98
+ @classmethod
99
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
100
+ assert 'operator' in d
101
+ assert len(components) == 2
102
+ return cls(ArithmeticOperator(d['operator']), components[0], components[1])
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ import copy
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .globals import print_slice
9
+ from .data_row import DataRow
10
+ from .row_builder import RowBuilder
11
+ import pixeltable.catalog as catalog
12
+
13
+
14
+ class ArraySlice(Expr):
15
+ """
16
+ Slice operation on an array, eg, t.array_col[:, 1:2].
17
+ """
18
+ def __init__(self, arr: Expr, index: Tuple):
19
+ assert arr.col_type.is_array_type()
20
+ # determine result type
21
+ super().__init__(arr.col_type)
22
+ self.components = [arr]
23
+ self.index = index
24
+ self.id = self._create_id()
25
+
26
+ def __str__(self) -> str:
27
+ index_strs: List[str] = []
28
+ for el in self.index:
29
+ if isinstance(el, int):
30
+ index_strs.append(str(el))
31
+ if isinstance(el, slice):
32
+ index_strs.append(print_slice(el))
33
+ return f'{self._array}[{", ".join(index_strs)}]'
34
+
35
+ @property
36
+ def _array(self) -> Expr:
37
+ return self.components[0]
38
+
39
+ def _equals(self, other: ArraySlice) -> bool:
40
+ return self.index == other.index
41
+
42
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
43
+ return super()._id_attrs() + [('index', self.index)]
44
+
45
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
46
+ return None
47
+
48
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
49
+ val = data_row[self._array.slot_idx]
50
+ data_row[self.slot_idx] = val[self.index]
51
+
52
+ def _as_dict(self) -> Dict:
53
+ index = []
54
+ for el in self.index:
55
+ if isinstance(el, slice):
56
+ index.append([el.start, el.stop, el.step])
57
+ else:
58
+ index.append(el)
59
+ return {'index': index, **super()._as_dict()}
60
+
61
+ @classmethod
62
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
63
+ assert 'index' in d
64
+ index = []
65
+ for el in d['index']:
66
+ if isinstance(el, list):
67
+ index.append(slice(el[0], el[1], el[2]))
68
+ else:
69
+ index.append(el)
70
+ return cls(components[0], tuple(index))
71
+
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ import enum
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .column_ref import ColumnRef
9
+ from .row_builder import RowBuilder
10
+ from .data_row import DataRow
11
+ import pixeltable.catalog as catalog
12
+ import pixeltable.type_system as ts
13
+
14
+
15
+ class ColumnPropertyRef(Expr):
16
+ """A reference to a property of a table column
17
+
18
+ The properties themselves are type-specific and may or may not need to reference the underlying column data.
19
+ """
20
+ class Property(enum.Enum):
21
+ ERRORTYPE = 0
22
+ ERRORMSG = 1
23
+ FILEURL = 2
24
+ LOCALPATH = 3
25
+
26
+ def __init__(self, col_ref: ColumnRef, prop: Property):
27
+ super().__init__(ts.StringType(nullable=True))
28
+ self.components = [col_ref]
29
+ self.prop = prop
30
+ self.id = self._create_id()
31
+
32
+ def default_column_name(self) -> Optional[str]:
33
+ return str(self).replace('.', '_')
34
+
35
+ def _equals(self, other: ColumnRef) -> bool:
36
+ return self.prop == other.prop
37
+
38
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
39
+ return super()._id_attrs() + [('prop', self.prop.value)]
40
+
41
+ @property
42
+ def _col_ref(self) -> ColumnRef:
43
+ return self.components[0]
44
+
45
+ def __str__(self) -> str:
46
+ return f'{self._col_ref}.{self.prop.name.lower()}'
47
+
48
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
49
+ if not self._col_ref.col.is_stored:
50
+ return None
51
+ if self.prop == self.Property.ERRORTYPE:
52
+ assert self._col_ref.col.sa_errortype_col is not None
53
+ return self._col_ref.col.sa_errortype_col
54
+ if self.prop == self.Property.ERRORMSG:
55
+ assert self._col_ref.col.sa_errormsg_col is not None
56
+ return self._col_ref.col.sa_errormsg_col
57
+ if self.prop == self.Property.FILEURL:
58
+ # the file url is stored as the column value
59
+ return self._col_ref.sql_expr()
60
+ return None
61
+
62
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
63
+ assert self.prop == self.Property.FILEURL or self.prop == self.Property.LOCALPATH
64
+ assert data_row.has_val[self._col_ref.slot_idx]
65
+ if self.prop == self.Property.FILEURL:
66
+ data_row[self.slot_idx] = data_row.file_urls[self._col_ref.slot_idx]
67
+ if self.prop == self.Property.LOCALPATH:
68
+ data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
69
+
70
+ def _as_dict(self) -> Dict:
71
+ return {'prop': self.prop.value, **super()._as_dict()}
72
+
73
+ @classmethod
74
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
75
+ assert 'prop' in d
76
+ return cls(components[0], cls.Property(d['prop']))
77
+
@@ -0,0 +1,114 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ from uuid import UUID
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .data_row import DataRow
9
+ from .row_builder import RowBuilder
10
+ import pixeltable.iterators as iters
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.catalog as catalog
13
+
14
+
15
+ class ColumnRef(Expr):
16
+ """A reference to a table column
17
+
18
+ When this reference is created in the context of a view, it can also refer to a column of the view base.
19
+ For that reason, a ColumnRef needs to be serialized with the qualifying table id (column ids are only
20
+ unique in the context of a particular table).
21
+ """
22
+ def __init__(self, col: catalog.Column):
23
+ super().__init__(col.col_type)
24
+ assert col.tbl is not None
25
+ self.col = col
26
+ self.is_unstored_iter_col = \
27
+ col.tbl.is_component_view() and col.tbl.is_iterator_column(col) and not col.is_stored
28
+ self.iter_arg_ctx: Optional[RowBuilder.EvalCtx] = None
29
+ # number of rowid columns in the base table
30
+ self.base_rowid_len = col.tbl.base.num_rowid_columns() if self.is_unstored_iter_col else 0
31
+ self.base_rowid = [None] * self.base_rowid_len
32
+ self.iterator: Optional[iters.ComponentIterator] = None
33
+ # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
34
+ self.pos_idx: Optional[int] = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
35
+ self.id = self._create_id()
36
+
37
+ def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
38
+ self.iter_arg_ctx = iter_arg_ctx
39
+ assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
40
+
41
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
42
+ return super()._id_attrs() + [('tbl_id', self.col.tbl.id), ('col_id', self.col.id)]
43
+
44
+ def __getattr__(self, name: str) -> Expr:
45
+ from .column_property_ref import ColumnPropertyRef
46
+ # resolve column properties
47
+ if name == ColumnPropertyRef.Property.ERRORTYPE.name.lower() \
48
+ or name == ColumnPropertyRef.Property.ERRORMSG.name.lower():
49
+ if not (self.col.is_computed and self.col.is_stored) and not self.col.col_type.is_media_type():
50
+ raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
51
+ return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
52
+ if name == ColumnPropertyRef.Property.FILEURL.name.lower() \
53
+ or name == ColumnPropertyRef.Property.LOCALPATH.name.lower():
54
+ if not self.col.col_type.is_media_type():
55
+ raise excs.Error(f'{name} only valid for image/video/audio/document columns: {self}')
56
+ if self.col.is_computed and not self.col.is_stored:
57
+ raise excs.Error(f'{name} not valid for computed unstored columns: {self}')
58
+ return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
59
+
60
+ if self.col_type.is_json_type():
61
+ from .json_path import JsonPath
62
+ return JsonPath(self, [name])
63
+
64
+ return super().__getattr__(name)
65
+
66
+ def similarity(self, other: Any) -> Expr:
67
+ if isinstance(other, Expr):
68
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
69
+ item = Expr.from_object(other)
70
+ if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
71
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
72
+ from .similarity_expr import SimilarityExpr
73
+ return SimilarityExpr(self, item)
74
+
75
+ def default_column_name(self) -> Optional[str]:
76
+ return str(self)
77
+
78
+ def _equals(self, other: ColumnRef) -> bool:
79
+ return self.col == other.col
80
+
81
+ def __str__(self) -> str:
82
+ return self.col.name
83
+
84
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
85
+ return self.col.sa_col
86
+
87
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
88
+ if not self.is_unstored_iter_col:
89
+ assert data_row.has_val[self.slot_idx]
90
+ return
91
+
92
+ # if this is a new base row, we need to instantiate a new iterator
93
+ if self.base_rowid != data_row.pk[:self.base_rowid_len]:
94
+ row_builder.eval(data_row, self.iter_arg_ctx)
95
+ iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
96
+ self.iterator = self.col.tbl.iterator_cls(**iterator_args)
97
+ self.base_rowid = data_row.pk[:self.base_rowid_len]
98
+ self.iterator.set_pos(data_row.pk[self.pos_idx])
99
+ res = next(self.iterator)
100
+ data_row[self.slot_idx] = res[self.col.name]
101
+
102
+ def _as_dict(self) -> Dict:
103
+ tbl = self.col.tbl
104
+ version = tbl.version if tbl.is_snapshot else None
105
+ return {'tbl_id': str(tbl.id), 'tbl_version': version, 'col_id': self.col.id}
106
+
107
+ @classmethod
108
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
109
+ tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
110
+ tbl_version = catalog.Catalog.get().tbl_versions[(tbl_id, version)]
111
+ assert col_id in tbl_version.cols_by_id
112
+ col = tbl_version.cols_by_id[col_id]
113
+ return cls(col)
114
+