pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,329 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple, Set, Iterable
3
+ from dataclasses import dataclass
4
+ import time
5
+ import sys
6
+
7
+ from .expr import Expr
8
+ from .expr_set import ExprSet
9
+ from .data_row import DataRow
10
+ import pixeltable.utils as utils
11
+ import pixeltable.func as func
12
+ import pixeltable.exceptions as excs
13
+ import pixeltable.catalog as catalog
14
+
15
+
16
+ class ExecProfile:
17
+ def __init__(self, row_builder: RowBuilder):
18
+ self.eval_time = [0.0] * row_builder.num_materialized
19
+ self.eval_count = [0] * row_builder.num_materialized
20
+ self.row_builder = row_builder
21
+
22
+ def print(self, num_rows: int) -> str:
23
+ for i in range(self.row_builder.num_materialized):
24
+ if self.eval_count[i] == 0:
25
+ continue
26
+ per_call_time = self.eval_time[i] / self.eval_count[i]
27
+ calls_per_row = self.eval_count[i] / num_rows
28
+ multiple_str = f'({calls_per_row}x)' if calls_per_row > 1 else ''
29
+ print(f'{self.row_builder.unique_exprs[i]}: {utils.print_perf_counter_delta(per_call_time)} {multiple_str}')
30
+
31
+
32
+ @dataclass
33
+ class ColumnSlotIdx:
34
+ """Info for how to locate materialized column in DataRow
35
+ TODO: can this be integrated into RowBuilder directly?
36
+ """
37
+ col: catalog.Column
38
+ slot_idx: int
39
+
40
+
41
+ class RowBuilder:
42
+ """Create and populate DataRows and table rows from exprs and computed columns
43
+
44
+ For ColumnRefs to unstored iterator columns:
45
+ - in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
46
+ """
47
+
48
+ @dataclass
49
+ class EvalCtx:
50
+ """Context for evaluating a set of target exprs"""
51
+ slot_idxs: List[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
52
+ exprs: List[Expr] # exprs corresponding to slot_idxs
53
+ target_slot_idxs: List[int] # slot idxs of target exprs; might contain duplicates
54
+ target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
55
+
56
+ def __init__(
57
+ self, output_exprs: List[Expr], columns: List[catalog.Column], input_exprs: List[Expr]
58
+ ):
59
+ """
60
+ Args:
61
+ output_exprs: list of Exprs to be evaluated
62
+ columns: list of columns to be materialized
63
+ input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
64
+ TODO: enforce that output_exprs doesn't overlap with input_exprs?
65
+ """
66
+ self.unique_exprs = ExprSet() # dependencies precede their dependents
67
+ self.next_slot_idx = 0
68
+
69
+ # record input and output exprs; make copies to avoid reusing execution state
70
+ unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
71
+ self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
72
+
73
+ # output exprs: all exprs the caller wants to materialize
74
+ # - explicitly requested output_exprs
75
+ # - values for computed columns
76
+ resolve_cols = set(columns)
77
+ self.output_exprs = [
78
+ self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
79
+ for e in output_exprs
80
+ ]
81
+
82
+ # record columns for create_table_row()
83
+ from .column_ref import ColumnRef
84
+ self.table_columns: List[ColumnSlotIdx] = []
85
+ for col in columns:
86
+ if col.is_computed:
87
+ assert col.value_expr is not None
88
+ # create a copy here so we don't reuse execution state and resolve references to computed columns
89
+ expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
90
+ expr = self._record_unique_expr(expr, recursive=True)
91
+ self.add_table_column(col, expr.slot_idx)
92
+ self.output_exprs.append(expr)
93
+ else:
94
+ # record a ColumnRef so that references to this column resolve to the same slot idx
95
+ ref = ColumnRef(col)
96
+ ref = self._record_unique_expr(ref, recursive=False)
97
+ self.add_table_column(col, ref.slot_idx)
98
+
99
+ # default eval ctx: all output exprs
100
+ self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
101
+
102
+ # references to unstored iterator columns:
103
+ # - those ColumnRefs need to instantiate iterators
104
+ # - we create and record the iterator args here and pass them to their respective ColumnRefs
105
+ # - we do this instead of simply recording the iterator args as a component of those ColumnRefs,
106
+ # because that would cause them to be evaluated for every single row
107
+ # - the separate eval ctx allows the ColumnRef to materialize the iterator args only when the underlying
108
+ # iterated object changes
109
+ col_refs = [e for e in self.unique_exprs if isinstance(e, ColumnRef)]
110
+ def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
111
+ tbl = col_ref.col.tbl
112
+ return tbl.is_component_view() and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
113
+ unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
114
+ component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
115
+ unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
116
+ self.unstored_iter_args = \
117
+ {id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()}
118
+
119
+ for col_ref in unstored_iter_col_refs:
120
+ iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.tbl.id]])
121
+ col_ref.set_iter_arg_ctx(iter_arg_ctx)
122
+
123
+ # we guarantee that we can compute the expr DAG in a single front-to-back pass
124
+ for i, expr in enumerate(self.unique_exprs):
125
+ assert expr.slot_idx == i
126
+
127
+ # record transitive dependencies (list of set of slot_idxs, indexed by slot_idx)
128
+ self.dependencies: List[Set[int]] = [set() for _ in range(self.num_materialized)]
129
+ for expr in self.unique_exprs:
130
+ if expr.slot_idx in self.input_expr_slot_idxs:
131
+ # this is input and therefore doesn't depend on other exprs
132
+ continue
133
+ for d in expr.dependencies():
134
+ self.dependencies[expr.slot_idx].add(d.slot_idx)
135
+ self.dependencies[expr.slot_idx].update(self.dependencies[d.slot_idx])
136
+
137
+ # derive transitive dependents
138
+ self.dependents: List[Set[int]] = [set() for _ in range(self.num_materialized)]
139
+ for expr in self.unique_exprs:
140
+ for d in self.dependencies[expr.slot_idx]:
141
+ self.dependents[d].add(expr.slot_idx)
142
+
143
+ # records the output_expr that a subexpr belongs to
144
+ # (a subexpr can be shared across multiple output exprs)
145
+ self.output_expr_ids: List[Set[int]] = [set() for _ in range(self.num_materialized)]
146
+ for e in self.output_exprs:
147
+ self._record_output_expr_id(e, e.slot_idx)
148
+
149
+ def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
150
+ """Record a column that is part of the table row"""
151
+ self.table_columns.append(ColumnSlotIdx(col, slot_idx))
152
+
153
+ def output_slot_idxs(self) -> List[ColumnSlotIdx]:
154
+ """Return ColumnSlotIdx for output columns"""
155
+ return self.table_columns
156
+
157
+ @property
158
+ def num_materialized(self) -> int:
159
+ return self.next_slot_idx
160
+
161
+ def get_output_exprs(self) -> List[Expr]:
162
+ """Returns exprs that were requested in the c'tor and require evaluation"""
163
+ return self.output_exprs
164
+
165
+ def _next_slot_idx(self) -> int:
166
+ result = self.next_slot_idx
167
+ self.next_slot_idx += 1
168
+ return result
169
+
170
+ def _record_unique_expr(self, expr: Expr, recursive: bool) -> Expr:
171
+ """Records the expr if it's not a duplicate and assigns a slot idx to expr and its components"
172
+ Returns:
173
+ the unique expr
174
+ """
175
+ if expr in self.unique_exprs:
176
+ # expr is a duplicate: we use the original instead
177
+ return self.unique_exprs[expr]
178
+
179
+ # expr value needs to be computed via Expr.eval()
180
+ if recursive:
181
+ for i, c in enumerate(expr.components):
182
+ # make sure we only refer to components that have themselves been recorded
183
+ expr.components[i] = self._record_unique_expr(c, True)
184
+ assert expr.slot_idx is None
185
+ expr.slot_idx = self._next_slot_idx()
186
+ self.unique_exprs.append(expr)
187
+ return expr
188
+
189
+ def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
190
+ assert e.slot_idx is not None
191
+ assert output_expr_id is not None
192
+ if e.slot_idx in self.input_expr_slot_idxs:
193
+ return
194
+ self.output_expr_ids[e.slot_idx].add(output_expr_id)
195
+ for d in e.dependencies():
196
+ self._record_output_expr_id(d, output_expr_id)
197
+
198
+ def _compute_dependencies(self, target_slot_idxs: List[int], excluded_slot_idxs: List[int]) -> List[int]:
199
+ """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
200
+ dependencies = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
201
+ # doing this front-to-back ensures that we capture transitive dependencies
202
+ max_target_slot_idx = max(target_slot_idxs)
203
+ for expr in self.unique_exprs:
204
+ if expr.slot_idx > max_target_slot_idx:
205
+ # we're done
206
+ break
207
+ if expr.slot_idx in excluded_slot_idxs:
208
+ continue
209
+ if expr.slot_idx in self.input_expr_slot_idxs:
210
+ # this is input and therefore doesn't depend on other exprs
211
+ continue
212
+ for d in expr.dependencies():
213
+ if d.slot_idx in excluded_slot_idxs:
214
+ continue
215
+ dependencies[expr.slot_idx].add(d.slot_idx)
216
+ dependencies[expr.slot_idx].update(dependencies[d.slot_idx])
217
+ # merge dependencies and convert to list
218
+ return sorted(set().union(*[dependencies[i] for i in target_slot_idxs]))
219
+
220
+ def substitute_exprs(self, expr_list: List[Expr], remove_duplicates: bool = True) -> None:
221
+ """Substitutes exprs with their executable counterparts from unique_exprs and optionally removes duplicates"""
222
+ i = 0
223
+ unique_ids: Set[i] = set() # slot idxs within expr_list
224
+ while i < len(expr_list):
225
+ unique_expr = self.unique_exprs[expr_list[i]]
226
+ if unique_expr.slot_idx in unique_ids and remove_duplicates:
227
+ del expr_list[i]
228
+ else:
229
+ expr_list[i] = unique_expr
230
+ unique_ids.add(unique_expr.slot_idx)
231
+ i += 1
232
+
233
+ def get_dependencies(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> List[Expr]:
234
+ """
235
+ Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
236
+ The exprs given in 'exclude' are excluded.
237
+ Returns:
238
+ list of Exprs from unique_exprs (= with slot_idx set)
239
+ """
240
+ if exclude is None:
241
+ exclude = []
242
+ if len(targets) == 0:
243
+ return []
244
+ # make sure we only refer to recorded exprs
245
+ targets = [self.unique_exprs[e] for e in targets]
246
+ exclude = [self.unique_exprs[e] for e in exclude]
247
+ target_slot_idxs = [e.slot_idx for e in targets]
248
+ excluded_slot_idxs = [e.slot_idx for e in exclude]
249
+ all_dependencies = set(self._compute_dependencies(target_slot_idxs, excluded_slot_idxs))
250
+ all_dependencies.update(target_slot_idxs)
251
+ result_ids = list(all_dependencies)
252
+ result_ids.sort()
253
+ return [self.unique_exprs[id] for id in result_ids]
254
+
255
+ def create_eval_ctx(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> EvalCtx:
256
+ """Return EvalCtx for targets"""
257
+ if exclude is None:
258
+ exclude = []
259
+ if len(targets) == 0:
260
+ return self.EvalCtx([], [], [], [])
261
+ dependencies = self.get_dependencies(targets, exclude)
262
+ targets = [self.unique_exprs[e] for e in targets]
263
+ target_slot_idxs = [e.slot_idx for e in targets]
264
+ ctx_slot_idxs = [e.slot_idx for e in dependencies]
265
+ return self.EvalCtx(
266
+ slot_idxs=ctx_slot_idxs, exprs=[self.unique_exprs[slot_idx] for slot_idx in ctx_slot_idxs],
267
+ target_slot_idxs=target_slot_idxs, target_exprs=targets)
268
+
269
+ def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
270
+ """Record an exception in data_row and propagate it to dependents"""
271
+ data_row.set_exc(slot_idx, exc)
272
+ for slot_idx in self.dependents[slot_idx]:
273
+ data_row.set_exc(slot_idx, exc)
274
+
275
+ def eval(
276
+ self, data_row: DataRow, ctx: EvalCtx, profile: Optional[ExecProfile] = None, ignore_errors: bool = False
277
+ ) -> None:
278
+ """
279
+ Populates the slots in data_row given in ctx.
280
+ If an expr.eval() raises an exception, records the exception in the corresponding slot of data_row
281
+ and omits any of that expr's dependents's eval().
282
+ profile: if present, populated with execution time of each expr.eval() call; indexed by expr.slot_idx
283
+ ignore_errors: if False, raises ExprEvalError if any expr.eval() raises an exception
284
+ """
285
+ for expr in ctx.exprs:
286
+ assert expr.slot_idx >= 0
287
+ if data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx):
288
+ continue
289
+ try:
290
+ start_time = time.perf_counter()
291
+ expr.eval(data_row, self)
292
+ if profile is not None:
293
+ profile.eval_time[expr.slot_idx] += time.perf_counter() - start_time
294
+ profile.eval_count[expr.slot_idx] += 1
295
+ except Exception as exc:
296
+ _, _, exc_tb = sys.exc_info()
297
+ self.set_exc(data_row, expr.slot_idx, exc)
298
+ if not ignore_errors:
299
+ input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
300
+ raise excs.ExprEvalError(
301
+ expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
302
+
303
+ def create_table_row(self, data_row: DataRow, exc_col_ids: Set[int]) -> Tuple[Dict[str, Any], int]:
304
+ """Create a table row from the slots that have an output column assigned
305
+
306
+ Return Tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
307
+ This excludes system columns.
308
+ """
309
+ num_excs = 0
310
+ table_row: Dict[str, Any] = {}
311
+ for info in self.table_columns:
312
+ col, slot_idx = info.col, info.slot_idx
313
+ if data_row.has_exc(slot_idx):
314
+ # exceptions get stored in the errortype/-msg columns
315
+ exc = data_row.get_exc(slot_idx)
316
+ num_excs += 1
317
+ exc_col_ids.add(col.id)
318
+ table_row[col.store_name()] = None
319
+ table_row[col.errortype_store_name()] = type(exc).__name__
320
+ table_row[col.errormsg_store_name()] = str(exc)
321
+ else:
322
+ val = data_row.get_stored_val(slot_idx, col.sa_col.type)
323
+ table_row[col.store_name()] = val
324
+ # we unfortunately need to set these, even if there are no errors
325
+ table_row[col.errortype_store_name()] = None
326
+ table_row[col.errormsg_store_name()] = None
327
+
328
+ return table_row, num_excs
329
+
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ from uuid import UUID
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .data_row import DataRow
9
+ from .row_builder import RowBuilder
10
+ import pixeltable.type_system as ts
11
+ import pixeltable.catalog as catalog
12
+
13
+
14
+ class RowidRef(Expr):
15
+ """A reference to a part of a table rowid
16
+
17
+ This is used internally to support grouping by a base table and for references to the 'pos' column.
18
+ When a RowidRef is part of a computed column in a view, the view's TableVersion isn't available when
19
+ _from_dict()/init() is called, which is why this class effectively has two separate paths for construction
20
+ (with and without a TableVersion).
21
+ """
22
+ def __init__(
23
+ self, tbl: catalog.TableVersion, idx: int,
24
+ tbl_id: Optional[UUID] = None, normalized_base_id: Optional[UUID] = None):
25
+ super().__init__(ts.IntType(nullable=False))
26
+ self.tbl = tbl
27
+ if tbl is not None:
28
+ # normalize to simplify comparisons: we refer to the lowest base table that has the requested rowid idx
29
+ # (which has the same values as all its descendent views)
30
+ normalized_base = tbl
31
+ # don't try to reference tbl.store_tbl here
32
+ while normalized_base.base is not None and normalized_base.base.num_rowid_columns() > idx:
33
+ normalized_base = normalized_base.base
34
+ self.normalized_base = normalized_base
35
+ else:
36
+ self.normalized_base = None
37
+
38
+ # if we're initialized by _from_dict(), we only have the ids, not the TableVersion itself
39
+ self.tbl_id = tbl.id if tbl is not None else tbl_id
40
+ self.normalized_base_id = self.normalized_base.id if self.normalized_base is not None else normalized_base_id
41
+ self.rowid_component_idx = idx
42
+ self.id = self._create_id()
43
+
44
+ def default_column_name(self) -> Optional[str]:
45
+ return str(self)
46
+
47
+ def _equals(self, other: RowidRef) -> bool:
48
+ return self.normalized_base_id == other.normalized_base_id \
49
+ and self.rowid_component_idx == other.rowid_component_idx
50
+
51
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
52
+ return super()._id_attrs() +\
53
+ [('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
54
+
55
+ def __str__(self) -> str:
56
+ # check if this is the pos column of a component view
57
+ tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
58
+ if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx:
59
+ return catalog.globals.POS_COLUMN_NAME
60
+ return ''
61
+
62
+ def set_tbl(self, tbl: catalog.TableVersionPath) -> None:
63
+ """Change the table that is being referenced.
64
+ This can be necessary during query planning, because at that stage we try to minimize the total number of
65
+ tables that are referenced/need to be joined.
66
+ We can only change to a view of the original table (which shares the base's rowid columns).
67
+ """
68
+ if self.tbl_id == tbl.tbl_version.id:
69
+ return
70
+ tbl_version_ids = [tbl_version.id for tbl_version in tbl.get_tbl_versions()]
71
+ assert self.tbl_id in tbl_version_ids
72
+ self.tbl = tbl.tbl_version
73
+ self.tbl_id = self.tbl.id
74
+
75
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
76
+ tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
77
+ rowid_cols = tbl.store_tbl.rowid_columns()
78
+ return rowid_cols[self.rowid_component_idx]
79
+
80
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
81
+ data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
82
+
83
+ def _as_dict(self) -> Dict:
84
+ return {
85
+ 'tbl_id': str(self.tbl_id),
86
+ 'normalized_base_id': str(self.normalized_base_id),
87
+ 'idx': self.rowid_component_idx,
88
+ }
89
+
90
+ @classmethod
91
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
92
+ tbl_id, normalized_base_id, idx = UUID(d['tbl_id']), UUID(d['normalized_base_id']), d['idx']
93
+ return cls(tbl=None, idx=idx, tbl_id=tbl_id, normalized_base_id=normalized_base_id)
94
+
@@ -0,0 +1,65 @@
1
+ from typing import Optional, List
2
+
3
+ import sqlalchemy as sql
4
+ import PIL.Image
5
+
6
+ import pixeltable.exceptions as excs
7
+ import pixeltable.type_system as ts
8
+ from .column_ref import ColumnRef
9
+ from .data_row import DataRow
10
+ from .expr import Expr
11
+ from .literal import Literal
12
+ from .row_builder import RowBuilder
13
+
14
+
15
+ class SimilarityExpr(Expr):
16
+
17
+ def __init__(self, col_ref: ColumnRef, item: Expr):
18
+ super().__init__(ts.FloatType())
19
+ self.components = [col_ref, item]
20
+ self.id = self._create_id()
21
+ assert isinstance(item, Literal)
22
+ assert item.col_type.is_string_type() or item.col_type.is_image_type()
23
+
24
+ # determine index to use
25
+ idx_info = col_ref.col.get_idx_info()
26
+ if len(idx_info) == 0:
27
+ raise excs.Error(f'No index found for column {col_ref.col}')
28
+ if len(idx_info) > 1:
29
+ raise excs.Error(
30
+ f'Column {col_ref.col.name} has multiple indices; use the index name to disambiguate, '
31
+ f'e.g., `{col_ref.col.name}.<index-name>.similarity(...)`')
32
+ self.idx_info = next(iter(idx_info.values()))
33
+ idx = self.idx_info.idx
34
+
35
+ if item.col_type.is_string_type() and idx.txt_embed is None:
36
+ raise excs.Error(
37
+ f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
38
+ f'text_embed parameter and does not support text queries')
39
+ if item.col_type.is_image_type() and idx.img_embed is None:
40
+ raise excs.Error(
41
+ f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
42
+ f'img_embed parameter and does not support image queries')
43
+
44
+ def __str__(self) -> str:
45
+ return f'{self.components[0]}.similarity({self.components[1]})'
46
+
47
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
48
+ assert isinstance(self.components[1], Literal)
49
+ item = self.components[1].val
50
+ return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
51
+
52
+ def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ClauseElement]:
53
+ assert isinstance(self.components[1], Literal)
54
+ item = self.components[1].val
55
+ return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
56
+
57
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
58
+ # this should never get called
59
+ assert False
60
+
61
+ @classmethod
62
+ def _from_dict(cls, d: dict, components: List[Expr]) -> Expr:
63
+ assert len(components) == 2
64
+ assert isinstance(components[0], ColumnRef)
65
+ return cls(components[0], components[1])
@@ -0,0 +1,53 @@
1
+ import json
2
+ from typing import Optional, Dict, List, Tuple, Any
3
+
4
+ import sqlalchemy as sql
5
+
6
+ import pixeltable.type_system as ts
7
+ from .expr import DataRow, Expr
8
+ from .row_builder import RowBuilder
9
+
10
+
11
+ class TypeCast(Expr):
12
+ """
13
+ An `Expr` that represents a type conversion from an underlying `Expr` to
14
+ a specified `ColumnType`.
15
+ """
16
+ def __init__(self, underlying: Expr, new_type: ts.ColumnType):
17
+ super().__init__(new_type)
18
+ self.components: List[Expr] = [underlying]
19
+ self.id: Optional[int] = self._create_id()
20
+
21
+ @property
22
+ def _underlying(self):
23
+ return self.components[0]
24
+
25
+ def _equals(self, other: 'TypeCast') -> bool:
26
+ # `TypeCast` has no properties beyond those captured by `Expr`.
27
+ return True
28
+
29
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
30
+ return super()._id_attrs() + [('new_type', self.col_type)]
31
+
32
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
33
+ """
34
+ `sql_expr` is unimplemented for now, in order to sidestep potentially thorny
35
+ questions about consistency of doing type conversions in both Python and Postgres.
36
+ """
37
+ return None
38
+
39
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
40
+ original_val = data_row[self._underlying.slot_idx]
41
+ data_row[self.slot_idx] = self.col_type.create_literal(original_val)
42
+
43
+ def _as_dict(self) -> Dict:
44
+ return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
45
+
46
+ @classmethod
47
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
48
+ assert 'new_type' in d
49
+ assert len(components) == 1
50
+ return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
51
+
52
+ def __str__(self) -> str:
53
+ return f'{self._underlying}.astype({self.col_type})'
@@ -0,0 +1,45 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Tuple, Any, Dict, NoReturn
4
+
5
+ import pixeltable.type_system as ts
6
+ from .data_row import DataRow
7
+ from .expr import Expr
8
+ from .row_builder import RowBuilder
9
+
10
+
11
+ class Variable(Expr):
12
+ """An expr parameter, needed for ExprTemplateFunctions
13
+
14
+ A Variable has a name and type and needs to have been replaced by an actual expression before evaluation.
15
+ """
16
+
17
+ def __init__(self, name: str, col_type: ts.ColumnType):
18
+ super().__init__(col_type)
19
+ self.name = name
20
+ self.id = self._create_id()
21
+
22
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
23
+ return super()._id_attrs() + [('name', self.name)]
24
+
25
+ def default_column_name(self) -> NoReturn:
26
+ raise NotImplementedError()
27
+
28
+ def _equals(self, other: Variable) -> bool:
29
+ return self.name == other.name
30
+
31
+ def __str__(self) -> str:
32
+ return self.name
33
+
34
+ def sql_expr(self) -> NoReturn:
35
+ raise NotImplementedError()
36
+
37
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
38
+ raise NotImplementedError()
39
+
40
+ def _as_dict(self) -> Dict:
41
+ return {'name': self.name, 'type': self.col_type.as_dict(), **super()._as_dict()}
42
+
43
+ @classmethod
44
+ def _from_dict(cls, d: Dict, _: List[Expr]) -> Expr:
45
+ return cls(d['name'], ts.ColumnType.from_dict(d['type']))
@@ -0,0 +1,5 @@
1
+ """
2
+ Extended integrations for Pixeltable. This package contains experimental or demonstration features that
3
+ are not intended for production use. Long-term support cannot be guaranteed, usually because the features
4
+ have dependencies whose future support is unclear.
5
+ """
@@ -0,0 +1,92 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Iterable, Iterator
4
+ from urllib.request import urlretrieve
5
+
6
+ import PIL.Image
7
+ import numpy as np
8
+ import torch
9
+ from yolox.data import ValTransform
10
+ from yolox.exp import get_exp, Exp
11
+ from yolox.models import YOLOX
12
+ from yolox.utils import postprocess
13
+
14
+ import pixeltable as pxt
15
+ from pixeltable import env
16
+ from pixeltable.func import Batch
17
+ from pixeltable.functions.util import resolve_torch_device
18
+
19
+ _logger = logging.getLogger('pixeltable')
20
+
21
+
22
+ @pxt.udf(batch_size=4)
23
+ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
24
+ """
25
+ Runs the specified YOLOX object detection model on an image.
26
+
27
+ YOLOX support is part of the `pixeltable.ext` package: long-term support is not guaranteed, and it is not
28
+ intended for use in production applications.
29
+
30
+ Parameters:
31
+ - `model_id` - one of: `yolox_nano, `yolox_tiny`, `yolox_s`, `yolox_m`, `yolox_l`, `yolox_x`
32
+ - `threshold` - the threshold for object detection
33
+ """
34
+ model, exp = _lookup_model(model_id, 'cpu')
35
+ image_tensors = list(_images_to_tensors(images, exp))
36
+ batch_tensor = torch.stack(image_tensors)
37
+
38
+ with torch.no_grad():
39
+ output_tensor = model(batch_tensor)
40
+
41
+ outputs = postprocess(
42
+ output_tensor, 80, threshold, exp.nmsthre, class_agnostic=False
43
+ )
44
+
45
+ results: list[dict] = []
46
+ for image in images:
47
+ ratio = min(exp.test_size[0] / image.height, exp.test_size[1] / image.width)
48
+ if outputs[0] is None:
49
+ results.append({'bboxes': [], 'scores': [], 'labels': []})
50
+ else:
51
+ results.append({
52
+ 'bboxes': [(output[:4] / ratio).tolist() for output in outputs[0]],
53
+ 'scores': [output[4].item() * output[5].item() for output in outputs[0]],
54
+ 'labels': [int(output[6]) for output in outputs[0]]
55
+ })
56
+ return results
57
+
58
+
59
+ def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: Exp) -> Iterator[torch.Tensor]:
60
+ for image in images:
61
+ image_transform, _ = _val_transform(np.array(image), None, exp.test_size)
62
+ yield torch.from_numpy(image_transform)
63
+
64
+
65
+ def _lookup_model(model_id: str, device: str) -> (YOLOX, Exp):
66
+ key = (model_id, device)
67
+ if key in _model_cache:
68
+ return _model_cache[key]
69
+
70
+ weights_url = f'https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/{model_id}.pth'
71
+ weights_file = Path(f'{env.Env.get().tmp_dir}/{model_id}.pth')
72
+ if not weights_file.exists():
73
+ _logger.info(f'Downloading weights for YOLOX model {model_id}: from {weights_url} -> {weights_file}')
74
+ urlretrieve(weights_url, weights_file)
75
+
76
+ exp = get_exp(exp_name=model_id)
77
+ model = exp.get_model().to(device)
78
+
79
+ model.eval()
80
+ model.head.training = False
81
+ model.training = False
82
+
83
+ # Load in the weights from training
84
+ weights = torch.load(weights_file, map_location=torch.device(device))
85
+ model.load_state_dict(weights['model'])
86
+
87
+ _model_cache[key] = (model, exp)
88
+ return model, exp
89
+
90
+
91
+ _model_cache = {}
92
+ _val_transform = ValTransform(legacy=False)