pixeltable 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +53 -0
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +181 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +192 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +695 -0
- pixeltable/catalog/table_version.py +1026 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/dataframe.py +749 -0
- pixeltable/env.py +466 -0
- pixeltable/exceptions.py +17 -0
- pixeltable/exec/__init__.py +10 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +116 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +94 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +73 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +226 -0
- pixeltable/exprs/__init__.py +25 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +114 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +199 -0
- pixeltable/exprs/expr.py +594 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +382 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +96 -0
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +109 -0
- pixeltable/exprs/inline_dict.py +103 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +66 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +329 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/similarity_expr.py +65 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/ext/__init__.py +5 -0
- pixeltable/ext/functions/yolox.py +92 -0
- pixeltable/func/__init__.py +7 -0
- pixeltable/func/aggregate_function.py +197 -0
- pixeltable/func/callable_function.py +113 -0
- pixeltable/func/expr_template_function.py +99 -0
- pixeltable/func/function.py +141 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +46 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +162 -0
- pixeltable/func/udf.py +164 -0
- pixeltable/functions/__init__.py +95 -0
- pixeltable/functions/eval.py +215 -0
- pixeltable/functions/fireworks.py +34 -0
- pixeltable/functions/huggingface.py +167 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +289 -0
- pixeltable/functions/pil/image.py +147 -0
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +143 -0
- pixeltable/functions/util.py +52 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/globals.py +425 -0
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +51 -0
- pixeltable/index/embedding_index.py +168 -0
- pixeltable/io/__init__.py +3 -0
- pixeltable/io/hf_datasets.py +188 -0
- pixeltable/io/pandas.py +148 -0
- pixeltable/io/parquet.py +192 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +52 -0
- pixeltable/iterators/document.py +432 -0
- pixeltable/iterators/video.py +88 -0
- pixeltable/metadata/__init__.py +58 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/schema.py +234 -0
- pixeltable/plan.py +620 -0
- pixeltable/store.py +424 -0
- pixeltable/tool/create_test_db_dump.py +184 -0
- pixeltable/tool/create_test_video.py +81 -0
- pixeltable/type_system.py +846 -0
- pixeltable/utils/__init__.py +17 -0
- pixeltable/utils/arrow.py +98 -0
- pixeltable/utils/clip.py +18 -0
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +69 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/http_server.py +70 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/pytorch.py +91 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.0.0.dist-info/LICENSE +18 -0
- pixeltable-0.0.0.dist-info/METADATA +131 -0
- pixeltable-0.0.0.dist-info/RECORD +119 -0
- pixeltable-0.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple, Set, Iterable
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import time
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .expr_set import ExprSet
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
import pixeltable.utils as utils
|
|
11
|
+
import pixeltable.func as func
|
|
12
|
+
import pixeltable.exceptions as excs
|
|
13
|
+
import pixeltable.catalog as catalog
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ExecProfile:
|
|
17
|
+
def __init__(self, row_builder: RowBuilder):
|
|
18
|
+
self.eval_time = [0.0] * row_builder.num_materialized
|
|
19
|
+
self.eval_count = [0] * row_builder.num_materialized
|
|
20
|
+
self.row_builder = row_builder
|
|
21
|
+
|
|
22
|
+
def print(self, num_rows: int) -> str:
|
|
23
|
+
for i in range(self.row_builder.num_materialized):
|
|
24
|
+
if self.eval_count[i] == 0:
|
|
25
|
+
continue
|
|
26
|
+
per_call_time = self.eval_time[i] / self.eval_count[i]
|
|
27
|
+
calls_per_row = self.eval_count[i] / num_rows
|
|
28
|
+
multiple_str = f'({calls_per_row}x)' if calls_per_row > 1 else ''
|
|
29
|
+
print(f'{self.row_builder.unique_exprs[i]}: {utils.print_perf_counter_delta(per_call_time)} {multiple_str}')
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ColumnSlotIdx:
|
|
34
|
+
"""Info for how to locate materialized column in DataRow
|
|
35
|
+
TODO: can this be integrated into RowBuilder directly?
|
|
36
|
+
"""
|
|
37
|
+
col: catalog.Column
|
|
38
|
+
slot_idx: int
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RowBuilder:
|
|
42
|
+
"""Create and populate DataRows and table rows from exprs and computed columns
|
|
43
|
+
|
|
44
|
+
For ColumnRefs to unstored iterator columns:
|
|
45
|
+
- in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class EvalCtx:
|
|
50
|
+
"""Context for evaluating a set of target exprs"""
|
|
51
|
+
slot_idxs: List[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
|
|
52
|
+
exprs: List[Expr] # exprs corresponding to slot_idxs
|
|
53
|
+
target_slot_idxs: List[int] # slot idxs of target exprs; might contain duplicates
|
|
54
|
+
target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self, output_exprs: List[Expr], columns: List[catalog.Column], input_exprs: List[Expr]
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Args:
|
|
61
|
+
output_exprs: list of Exprs to be evaluated
|
|
62
|
+
columns: list of columns to be materialized
|
|
63
|
+
input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
|
|
64
|
+
TODO: enforce that output_exprs doesn't overlap with input_exprs?
|
|
65
|
+
"""
|
|
66
|
+
self.unique_exprs = ExprSet() # dependencies precede their dependents
|
|
67
|
+
self.next_slot_idx = 0
|
|
68
|
+
|
|
69
|
+
# record input and output exprs; make copies to avoid reusing execution state
|
|
70
|
+
unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
|
|
71
|
+
self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
|
|
72
|
+
|
|
73
|
+
# output exprs: all exprs the caller wants to materialize
|
|
74
|
+
# - explicitly requested output_exprs
|
|
75
|
+
# - values for computed columns
|
|
76
|
+
resolve_cols = set(columns)
|
|
77
|
+
self.output_exprs = [
|
|
78
|
+
self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
|
|
79
|
+
for e in output_exprs
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
# record columns for create_table_row()
|
|
83
|
+
from .column_ref import ColumnRef
|
|
84
|
+
self.table_columns: List[ColumnSlotIdx] = []
|
|
85
|
+
for col in columns:
|
|
86
|
+
if col.is_computed:
|
|
87
|
+
assert col.value_expr is not None
|
|
88
|
+
# create a copy here so we don't reuse execution state and resolve references to computed columns
|
|
89
|
+
expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
|
|
90
|
+
expr = self._record_unique_expr(expr, recursive=True)
|
|
91
|
+
self.add_table_column(col, expr.slot_idx)
|
|
92
|
+
self.output_exprs.append(expr)
|
|
93
|
+
else:
|
|
94
|
+
# record a ColumnRef so that references to this column resolve to the same slot idx
|
|
95
|
+
ref = ColumnRef(col)
|
|
96
|
+
ref = self._record_unique_expr(ref, recursive=False)
|
|
97
|
+
self.add_table_column(col, ref.slot_idx)
|
|
98
|
+
|
|
99
|
+
# default eval ctx: all output exprs
|
|
100
|
+
self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
|
|
101
|
+
|
|
102
|
+
# references to unstored iterator columns:
|
|
103
|
+
# - those ColumnRefs need to instantiate iterators
|
|
104
|
+
# - we create and record the iterator args here and pass them to their respective ColumnRefs
|
|
105
|
+
# - we do this instead of simply recording the iterator args as a component of those ColumnRefs,
|
|
106
|
+
# because that would cause them to be evaluated for every single row
|
|
107
|
+
# - the separate eval ctx allows the ColumnRef to materialize the iterator args only when the underlying
|
|
108
|
+
# iterated object changes
|
|
109
|
+
col_refs = [e for e in self.unique_exprs if isinstance(e, ColumnRef)]
|
|
110
|
+
def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
|
|
111
|
+
tbl = col_ref.col.tbl
|
|
112
|
+
return tbl.is_component_view() and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
|
|
113
|
+
unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
|
|
114
|
+
component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
|
|
115
|
+
unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
|
|
116
|
+
self.unstored_iter_args = \
|
|
117
|
+
{id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()}
|
|
118
|
+
|
|
119
|
+
for col_ref in unstored_iter_col_refs:
|
|
120
|
+
iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.tbl.id]])
|
|
121
|
+
col_ref.set_iter_arg_ctx(iter_arg_ctx)
|
|
122
|
+
|
|
123
|
+
# we guarantee that we can compute the expr DAG in a single front-to-back pass
|
|
124
|
+
for i, expr in enumerate(self.unique_exprs):
|
|
125
|
+
assert expr.slot_idx == i
|
|
126
|
+
|
|
127
|
+
# record transitive dependencies (list of set of slot_idxs, indexed by slot_idx)
|
|
128
|
+
self.dependencies: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
129
|
+
for expr in self.unique_exprs:
|
|
130
|
+
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
131
|
+
# this is input and therefore doesn't depend on other exprs
|
|
132
|
+
continue
|
|
133
|
+
for d in expr.dependencies():
|
|
134
|
+
self.dependencies[expr.slot_idx].add(d.slot_idx)
|
|
135
|
+
self.dependencies[expr.slot_idx].update(self.dependencies[d.slot_idx])
|
|
136
|
+
|
|
137
|
+
# derive transitive dependents
|
|
138
|
+
self.dependents: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
139
|
+
for expr in self.unique_exprs:
|
|
140
|
+
for d in self.dependencies[expr.slot_idx]:
|
|
141
|
+
self.dependents[d].add(expr.slot_idx)
|
|
142
|
+
|
|
143
|
+
# records the output_expr that a subexpr belongs to
|
|
144
|
+
# (a subexpr can be shared across multiple output exprs)
|
|
145
|
+
self.output_expr_ids: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
146
|
+
for e in self.output_exprs:
|
|
147
|
+
self._record_output_expr_id(e, e.slot_idx)
|
|
148
|
+
|
|
149
|
+
def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
|
|
150
|
+
"""Record a column that is part of the table row"""
|
|
151
|
+
self.table_columns.append(ColumnSlotIdx(col, slot_idx))
|
|
152
|
+
|
|
153
|
+
def output_slot_idxs(self) -> List[ColumnSlotIdx]:
|
|
154
|
+
"""Return ColumnSlotIdx for output columns"""
|
|
155
|
+
return self.table_columns
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def num_materialized(self) -> int:
|
|
159
|
+
return self.next_slot_idx
|
|
160
|
+
|
|
161
|
+
def get_output_exprs(self) -> List[Expr]:
|
|
162
|
+
"""Returns exprs that were requested in the c'tor and require evaluation"""
|
|
163
|
+
return self.output_exprs
|
|
164
|
+
|
|
165
|
+
def _next_slot_idx(self) -> int:
|
|
166
|
+
result = self.next_slot_idx
|
|
167
|
+
self.next_slot_idx += 1
|
|
168
|
+
return result
|
|
169
|
+
|
|
170
|
+
def _record_unique_expr(self, expr: Expr, recursive: bool) -> Expr:
|
|
171
|
+
"""Records the expr if it's not a duplicate and assigns a slot idx to expr and its components"
|
|
172
|
+
Returns:
|
|
173
|
+
the unique expr
|
|
174
|
+
"""
|
|
175
|
+
if expr in self.unique_exprs:
|
|
176
|
+
# expr is a duplicate: we use the original instead
|
|
177
|
+
return self.unique_exprs[expr]
|
|
178
|
+
|
|
179
|
+
# expr value needs to be computed via Expr.eval()
|
|
180
|
+
if recursive:
|
|
181
|
+
for i, c in enumerate(expr.components):
|
|
182
|
+
# make sure we only refer to components that have themselves been recorded
|
|
183
|
+
expr.components[i] = self._record_unique_expr(c, True)
|
|
184
|
+
assert expr.slot_idx is None
|
|
185
|
+
expr.slot_idx = self._next_slot_idx()
|
|
186
|
+
self.unique_exprs.append(expr)
|
|
187
|
+
return expr
|
|
188
|
+
|
|
189
|
+
def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
|
|
190
|
+
assert e.slot_idx is not None
|
|
191
|
+
assert output_expr_id is not None
|
|
192
|
+
if e.slot_idx in self.input_expr_slot_idxs:
|
|
193
|
+
return
|
|
194
|
+
self.output_expr_ids[e.slot_idx].add(output_expr_id)
|
|
195
|
+
for d in e.dependencies():
|
|
196
|
+
self._record_output_expr_id(d, output_expr_id)
|
|
197
|
+
|
|
198
|
+
def _compute_dependencies(self, target_slot_idxs: List[int], excluded_slot_idxs: List[int]) -> List[int]:
|
|
199
|
+
"""Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
|
|
200
|
+
dependencies = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
201
|
+
# doing this front-to-back ensures that we capture transitive dependencies
|
|
202
|
+
max_target_slot_idx = max(target_slot_idxs)
|
|
203
|
+
for expr in self.unique_exprs:
|
|
204
|
+
if expr.slot_idx > max_target_slot_idx:
|
|
205
|
+
# we're done
|
|
206
|
+
break
|
|
207
|
+
if expr.slot_idx in excluded_slot_idxs:
|
|
208
|
+
continue
|
|
209
|
+
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
210
|
+
# this is input and therefore doesn't depend on other exprs
|
|
211
|
+
continue
|
|
212
|
+
for d in expr.dependencies():
|
|
213
|
+
if d.slot_idx in excluded_slot_idxs:
|
|
214
|
+
continue
|
|
215
|
+
dependencies[expr.slot_idx].add(d.slot_idx)
|
|
216
|
+
dependencies[expr.slot_idx].update(dependencies[d.slot_idx])
|
|
217
|
+
# merge dependencies and convert to list
|
|
218
|
+
return sorted(set().union(*[dependencies[i] for i in target_slot_idxs]))
|
|
219
|
+
|
|
220
|
+
def substitute_exprs(self, expr_list: List[Expr], remove_duplicates: bool = True) -> None:
|
|
221
|
+
"""Substitutes exprs with their executable counterparts from unique_exprs and optionally removes duplicates"""
|
|
222
|
+
i = 0
|
|
223
|
+
unique_ids: Set[i] = set() # slot idxs within expr_list
|
|
224
|
+
while i < len(expr_list):
|
|
225
|
+
unique_expr = self.unique_exprs[expr_list[i]]
|
|
226
|
+
if unique_expr.slot_idx in unique_ids and remove_duplicates:
|
|
227
|
+
del expr_list[i]
|
|
228
|
+
else:
|
|
229
|
+
expr_list[i] = unique_expr
|
|
230
|
+
unique_ids.add(unique_expr.slot_idx)
|
|
231
|
+
i += 1
|
|
232
|
+
|
|
233
|
+
def get_dependencies(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> List[Expr]:
|
|
234
|
+
"""
|
|
235
|
+
Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
|
|
236
|
+
The exprs given in 'exclude' are excluded.
|
|
237
|
+
Returns:
|
|
238
|
+
list of Exprs from unique_exprs (= with slot_idx set)
|
|
239
|
+
"""
|
|
240
|
+
if exclude is None:
|
|
241
|
+
exclude = []
|
|
242
|
+
if len(targets) == 0:
|
|
243
|
+
return []
|
|
244
|
+
# make sure we only refer to recorded exprs
|
|
245
|
+
targets = [self.unique_exprs[e] for e in targets]
|
|
246
|
+
exclude = [self.unique_exprs[e] for e in exclude]
|
|
247
|
+
target_slot_idxs = [e.slot_idx for e in targets]
|
|
248
|
+
excluded_slot_idxs = [e.slot_idx for e in exclude]
|
|
249
|
+
all_dependencies = set(self._compute_dependencies(target_slot_idxs, excluded_slot_idxs))
|
|
250
|
+
all_dependencies.update(target_slot_idxs)
|
|
251
|
+
result_ids = list(all_dependencies)
|
|
252
|
+
result_ids.sort()
|
|
253
|
+
return [self.unique_exprs[id] for id in result_ids]
|
|
254
|
+
|
|
255
|
+
def create_eval_ctx(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> EvalCtx:
|
|
256
|
+
"""Return EvalCtx for targets"""
|
|
257
|
+
if exclude is None:
|
|
258
|
+
exclude = []
|
|
259
|
+
if len(targets) == 0:
|
|
260
|
+
return self.EvalCtx([], [], [], [])
|
|
261
|
+
dependencies = self.get_dependencies(targets, exclude)
|
|
262
|
+
targets = [self.unique_exprs[e] for e in targets]
|
|
263
|
+
target_slot_idxs = [e.slot_idx for e in targets]
|
|
264
|
+
ctx_slot_idxs = [e.slot_idx for e in dependencies]
|
|
265
|
+
return self.EvalCtx(
|
|
266
|
+
slot_idxs=ctx_slot_idxs, exprs=[self.unique_exprs[slot_idx] for slot_idx in ctx_slot_idxs],
|
|
267
|
+
target_slot_idxs=target_slot_idxs, target_exprs=targets)
|
|
268
|
+
|
|
269
|
+
def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
|
|
270
|
+
"""Record an exception in data_row and propagate it to dependents"""
|
|
271
|
+
data_row.set_exc(slot_idx, exc)
|
|
272
|
+
for slot_idx in self.dependents[slot_idx]:
|
|
273
|
+
data_row.set_exc(slot_idx, exc)
|
|
274
|
+
|
|
275
|
+
def eval(
|
|
276
|
+
self, data_row: DataRow, ctx: EvalCtx, profile: Optional[ExecProfile] = None, ignore_errors: bool = False
|
|
277
|
+
) -> None:
|
|
278
|
+
"""
|
|
279
|
+
Populates the slots in data_row given in ctx.
|
|
280
|
+
If an expr.eval() raises an exception, records the exception in the corresponding slot of data_row
|
|
281
|
+
and omits any of that expr's dependents's eval().
|
|
282
|
+
profile: if present, populated with execution time of each expr.eval() call; indexed by expr.slot_idx
|
|
283
|
+
ignore_errors: if False, raises ExprEvalError if any expr.eval() raises an exception
|
|
284
|
+
"""
|
|
285
|
+
for expr in ctx.exprs:
|
|
286
|
+
assert expr.slot_idx >= 0
|
|
287
|
+
if data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx):
|
|
288
|
+
continue
|
|
289
|
+
try:
|
|
290
|
+
start_time = time.perf_counter()
|
|
291
|
+
expr.eval(data_row, self)
|
|
292
|
+
if profile is not None:
|
|
293
|
+
profile.eval_time[expr.slot_idx] += time.perf_counter() - start_time
|
|
294
|
+
profile.eval_count[expr.slot_idx] += 1
|
|
295
|
+
except Exception as exc:
|
|
296
|
+
_, _, exc_tb = sys.exc_info()
|
|
297
|
+
self.set_exc(data_row, expr.slot_idx, exc)
|
|
298
|
+
if not ignore_errors:
|
|
299
|
+
input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
|
|
300
|
+
raise excs.ExprEvalError(
|
|
301
|
+
expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
|
|
302
|
+
|
|
303
|
+
def create_table_row(self, data_row: DataRow, exc_col_ids: Set[int]) -> Tuple[Dict[str, Any], int]:
|
|
304
|
+
"""Create a table row from the slots that have an output column assigned
|
|
305
|
+
|
|
306
|
+
Return Tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
|
|
307
|
+
This excludes system columns.
|
|
308
|
+
"""
|
|
309
|
+
num_excs = 0
|
|
310
|
+
table_row: Dict[str, Any] = {}
|
|
311
|
+
for info in self.table_columns:
|
|
312
|
+
col, slot_idx = info.col, info.slot_idx
|
|
313
|
+
if data_row.has_exc(slot_idx):
|
|
314
|
+
# exceptions get stored in the errortype/-msg columns
|
|
315
|
+
exc = data_row.get_exc(slot_idx)
|
|
316
|
+
num_excs += 1
|
|
317
|
+
exc_col_ids.add(col.id)
|
|
318
|
+
table_row[col.store_name()] = None
|
|
319
|
+
table_row[col.errortype_store_name()] = type(exc).__name__
|
|
320
|
+
table_row[col.errormsg_store_name()] = str(exc)
|
|
321
|
+
else:
|
|
322
|
+
val = data_row.get_stored_val(slot_idx, col.sa_col.type)
|
|
323
|
+
table_row[col.store_name()] = val
|
|
324
|
+
# we unfortunately need to set these, even if there are no errors
|
|
325
|
+
table_row[col.errortype_store_name()] = None
|
|
326
|
+
table_row[col.errormsg_store_name()] = None
|
|
327
|
+
|
|
328
|
+
return table_row, num_excs
|
|
329
|
+
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .row_builder import RowBuilder
|
|
10
|
+
import pixeltable.type_system as ts
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RowidRef(Expr):
|
|
15
|
+
"""A reference to a part of a table rowid
|
|
16
|
+
|
|
17
|
+
This is used internally to support grouping by a base table and for references to the 'pos' column.
|
|
18
|
+
When a RowidRef is part of a computed column in a view, the view's TableVersion isn't available when
|
|
19
|
+
_from_dict()/init() is called, which is why this class effectively has two separate paths for construction
|
|
20
|
+
(with and without a TableVersion).
|
|
21
|
+
"""
|
|
22
|
+
def __init__(
|
|
23
|
+
self, tbl: catalog.TableVersion, idx: int,
|
|
24
|
+
tbl_id: Optional[UUID] = None, normalized_base_id: Optional[UUID] = None):
|
|
25
|
+
super().__init__(ts.IntType(nullable=False))
|
|
26
|
+
self.tbl = tbl
|
|
27
|
+
if tbl is not None:
|
|
28
|
+
# normalize to simplify comparisons: we refer to the lowest base table that has the requested rowid idx
|
|
29
|
+
# (which has the same values as all its descendent views)
|
|
30
|
+
normalized_base = tbl
|
|
31
|
+
# don't try to reference tbl.store_tbl here
|
|
32
|
+
while normalized_base.base is not None and normalized_base.base.num_rowid_columns() > idx:
|
|
33
|
+
normalized_base = normalized_base.base
|
|
34
|
+
self.normalized_base = normalized_base
|
|
35
|
+
else:
|
|
36
|
+
self.normalized_base = None
|
|
37
|
+
|
|
38
|
+
# if we're initialized by _from_dict(), we only have the ids, not the TableVersion itself
|
|
39
|
+
self.tbl_id = tbl.id if tbl is not None else tbl_id
|
|
40
|
+
self.normalized_base_id = self.normalized_base.id if self.normalized_base is not None else normalized_base_id
|
|
41
|
+
self.rowid_component_idx = idx
|
|
42
|
+
self.id = self._create_id()
|
|
43
|
+
|
|
44
|
+
def default_column_name(self) -> Optional[str]:
|
|
45
|
+
return str(self)
|
|
46
|
+
|
|
47
|
+
def _equals(self, other: RowidRef) -> bool:
|
|
48
|
+
return self.normalized_base_id == other.normalized_base_id \
|
|
49
|
+
and self.rowid_component_idx == other.rowid_component_idx
|
|
50
|
+
|
|
51
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
52
|
+
return super()._id_attrs() +\
|
|
53
|
+
[('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
|
|
54
|
+
|
|
55
|
+
def __str__(self) -> str:
|
|
56
|
+
# check if this is the pos column of a component view
|
|
57
|
+
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
58
|
+
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx:
|
|
59
|
+
return catalog.globals.POS_COLUMN_NAME
|
|
60
|
+
return ''
|
|
61
|
+
|
|
62
|
+
def set_tbl(self, tbl: catalog.TableVersionPath) -> None:
|
|
63
|
+
"""Change the table that is being referenced.
|
|
64
|
+
This can be necessary during query planning, because at that stage we try to minimize the total number of
|
|
65
|
+
tables that are referenced/need to be joined.
|
|
66
|
+
We can only change to a view of the original table (which shares the base's rowid columns).
|
|
67
|
+
"""
|
|
68
|
+
if self.tbl_id == tbl.tbl_version.id:
|
|
69
|
+
return
|
|
70
|
+
tbl_version_ids = [tbl_version.id for tbl_version in tbl.get_tbl_versions()]
|
|
71
|
+
assert self.tbl_id in tbl_version_ids
|
|
72
|
+
self.tbl = tbl.tbl_version
|
|
73
|
+
self.tbl_id = self.tbl.id
|
|
74
|
+
|
|
75
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
76
|
+
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
77
|
+
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
78
|
+
return rowid_cols[self.rowid_component_idx]
|
|
79
|
+
|
|
80
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
81
|
+
data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
|
|
82
|
+
|
|
83
|
+
def _as_dict(self) -> Dict:
|
|
84
|
+
return {
|
|
85
|
+
'tbl_id': str(self.tbl_id),
|
|
86
|
+
'normalized_base_id': str(self.normalized_base_id),
|
|
87
|
+
'idx': self.rowid_component_idx,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
92
|
+
tbl_id, normalized_base_id, idx = UUID(d['tbl_id']), UUID(d['normalized_base_id']), d['idx']
|
|
93
|
+
return cls(tbl=None, idx=idx, tbl_id=tbl_id, normalized_base_id=normalized_base_id)
|
|
94
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Optional, List
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
import PIL.Image
|
|
5
|
+
|
|
6
|
+
import pixeltable.exceptions as excs
|
|
7
|
+
import pixeltable.type_system as ts
|
|
8
|
+
from .column_ref import ColumnRef
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
from .expr import Expr
|
|
11
|
+
from .literal import Literal
|
|
12
|
+
from .row_builder import RowBuilder
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SimilarityExpr(Expr):
|
|
16
|
+
|
|
17
|
+
def __init__(self, col_ref: ColumnRef, item: Expr):
|
|
18
|
+
super().__init__(ts.FloatType())
|
|
19
|
+
self.components = [col_ref, item]
|
|
20
|
+
self.id = self._create_id()
|
|
21
|
+
assert isinstance(item, Literal)
|
|
22
|
+
assert item.col_type.is_string_type() or item.col_type.is_image_type()
|
|
23
|
+
|
|
24
|
+
# determine index to use
|
|
25
|
+
idx_info = col_ref.col.get_idx_info()
|
|
26
|
+
if len(idx_info) == 0:
|
|
27
|
+
raise excs.Error(f'No index found for column {col_ref.col}')
|
|
28
|
+
if len(idx_info) > 1:
|
|
29
|
+
raise excs.Error(
|
|
30
|
+
f'Column {col_ref.col.name} has multiple indices; use the index name to disambiguate, '
|
|
31
|
+
f'e.g., `{col_ref.col.name}.<index-name>.similarity(...)`')
|
|
32
|
+
self.idx_info = next(iter(idx_info.values()))
|
|
33
|
+
idx = self.idx_info.idx
|
|
34
|
+
|
|
35
|
+
if item.col_type.is_string_type() and idx.txt_embed is None:
|
|
36
|
+
raise excs.Error(
|
|
37
|
+
f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
|
|
38
|
+
f'text_embed parameter and does not support text queries')
|
|
39
|
+
if item.col_type.is_image_type() and idx.img_embed is None:
|
|
40
|
+
raise excs.Error(
|
|
41
|
+
f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
|
|
42
|
+
f'img_embed parameter and does not support image queries')
|
|
43
|
+
|
|
44
|
+
def __str__(self) -> str:
|
|
45
|
+
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
46
|
+
|
|
47
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
48
|
+
assert isinstance(self.components[1], Literal)
|
|
49
|
+
item = self.components[1].val
|
|
50
|
+
return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
|
|
51
|
+
|
|
52
|
+
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ClauseElement]:
|
|
53
|
+
assert isinstance(self.components[1], Literal)
|
|
54
|
+
item = self.components[1].val
|
|
55
|
+
return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
|
|
56
|
+
|
|
57
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
58
|
+
# this should never get called
|
|
59
|
+
assert False
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def _from_dict(cls, d: dict, components: List[Expr]) -> Expr:
|
|
63
|
+
assert len(components) == 2
|
|
64
|
+
assert isinstance(components[0], ColumnRef)
|
|
65
|
+
return cls(components[0], components[1])
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Optional, Dict, List, Tuple, Any
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
import pixeltable.type_system as ts
|
|
7
|
+
from .expr import DataRow, Expr
|
|
8
|
+
from .row_builder import RowBuilder
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TypeCast(Expr):
|
|
12
|
+
"""
|
|
13
|
+
An `Expr` that represents a type conversion from an underlying `Expr` to
|
|
14
|
+
a specified `ColumnType`.
|
|
15
|
+
"""
|
|
16
|
+
def __init__(self, underlying: Expr, new_type: ts.ColumnType):
|
|
17
|
+
super().__init__(new_type)
|
|
18
|
+
self.components: List[Expr] = [underlying]
|
|
19
|
+
self.id: Optional[int] = self._create_id()
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def _underlying(self):
|
|
23
|
+
return self.components[0]
|
|
24
|
+
|
|
25
|
+
def _equals(self, other: 'TypeCast') -> bool:
|
|
26
|
+
# `TypeCast` has no properties beyond those captured by `Expr`.
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
30
|
+
return super()._id_attrs() + [('new_type', self.col_type)]
|
|
31
|
+
|
|
32
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
33
|
+
"""
|
|
34
|
+
`sql_expr` is unimplemented for now, in order to sidestep potentially thorny
|
|
35
|
+
questions about consistency of doing type conversions in both Python and Postgres.
|
|
36
|
+
"""
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
40
|
+
original_val = data_row[self._underlying.slot_idx]
|
|
41
|
+
data_row[self.slot_idx] = self.col_type.create_literal(original_val)
|
|
42
|
+
|
|
43
|
+
def _as_dict(self) -> Dict:
|
|
44
|
+
return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
48
|
+
assert 'new_type' in d
|
|
49
|
+
assert len(components) == 1
|
|
50
|
+
return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
|
|
51
|
+
|
|
52
|
+
def __str__(self) -> str:
|
|
53
|
+
return f'{self._underlying}.astype({self.col_type})'
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List, Tuple, Any, Dict, NoReturn
|
|
4
|
+
|
|
5
|
+
import pixeltable.type_system as ts
|
|
6
|
+
from .data_row import DataRow
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .row_builder import RowBuilder
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Variable(Expr):
|
|
12
|
+
"""An expr parameter, needed for ExprTemplateFunctions
|
|
13
|
+
|
|
14
|
+
A Variable has a name and type and needs to have been replaced by an actual expression before evaluation.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, name: str, col_type: ts.ColumnType):
|
|
18
|
+
super().__init__(col_type)
|
|
19
|
+
self.name = name
|
|
20
|
+
self.id = self._create_id()
|
|
21
|
+
|
|
22
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
23
|
+
return super()._id_attrs() + [('name', self.name)]
|
|
24
|
+
|
|
25
|
+
def default_column_name(self) -> NoReturn:
|
|
26
|
+
raise NotImplementedError()
|
|
27
|
+
|
|
28
|
+
def _equals(self, other: Variable) -> bool:
|
|
29
|
+
return self.name == other.name
|
|
30
|
+
|
|
31
|
+
def __str__(self) -> str:
|
|
32
|
+
return self.name
|
|
33
|
+
|
|
34
|
+
def sql_expr(self) -> NoReturn:
|
|
35
|
+
raise NotImplementedError()
|
|
36
|
+
|
|
37
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
|
|
38
|
+
raise NotImplementedError()
|
|
39
|
+
|
|
40
|
+
def _as_dict(self) -> Dict:
|
|
41
|
+
return {'name': self.name, 'type': self.col_type.as_dict(), **super()._as_dict()}
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _from_dict(cls, d: Dict, _: List[Expr]) -> Expr:
|
|
45
|
+
return cls(d['name'], ts.ColumnType.from_dict(d['type']))
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extended integrations for Pixeltable. This package contains experimental or demonstration features that
|
|
3
|
+
are not intended for production use. Long-term support cannot be guaranteed, usually because the features
|
|
4
|
+
have dependencies whose future support is unclear.
|
|
5
|
+
"""
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Iterable, Iterator
|
|
4
|
+
from urllib.request import urlretrieve
|
|
5
|
+
|
|
6
|
+
import PIL.Image
|
|
7
|
+
import numpy as np
|
|
8
|
+
import torch
|
|
9
|
+
from yolox.data import ValTransform
|
|
10
|
+
from yolox.exp import get_exp, Exp
|
|
11
|
+
from yolox.models import YOLOX
|
|
12
|
+
from yolox.utils import postprocess
|
|
13
|
+
|
|
14
|
+
import pixeltable as pxt
|
|
15
|
+
from pixeltable import env
|
|
16
|
+
from pixeltable.func import Batch
|
|
17
|
+
from pixeltable.functions.util import resolve_torch_device
|
|
18
|
+
|
|
19
|
+
_logger = logging.getLogger('pixeltable')
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pxt.udf(batch_size=4)
|
|
23
|
+
def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
|
|
24
|
+
"""
|
|
25
|
+
Runs the specified YOLOX object detection model on an image.
|
|
26
|
+
|
|
27
|
+
YOLOX support is part of the `pixeltable.ext` package: long-term support is not guaranteed, and it is not
|
|
28
|
+
intended for use in production applications.
|
|
29
|
+
|
|
30
|
+
Parameters:
|
|
31
|
+
- `model_id` - one of: `yolox_nano, `yolox_tiny`, `yolox_s`, `yolox_m`, `yolox_l`, `yolox_x`
|
|
32
|
+
- `threshold` - the threshold for object detection
|
|
33
|
+
"""
|
|
34
|
+
model, exp = _lookup_model(model_id, 'cpu')
|
|
35
|
+
image_tensors = list(_images_to_tensors(images, exp))
|
|
36
|
+
batch_tensor = torch.stack(image_tensors)
|
|
37
|
+
|
|
38
|
+
with torch.no_grad():
|
|
39
|
+
output_tensor = model(batch_tensor)
|
|
40
|
+
|
|
41
|
+
outputs = postprocess(
|
|
42
|
+
output_tensor, 80, threshold, exp.nmsthre, class_agnostic=False
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
results: list[dict] = []
|
|
46
|
+
for image in images:
|
|
47
|
+
ratio = min(exp.test_size[0] / image.height, exp.test_size[1] / image.width)
|
|
48
|
+
if outputs[0] is None:
|
|
49
|
+
results.append({'bboxes': [], 'scores': [], 'labels': []})
|
|
50
|
+
else:
|
|
51
|
+
results.append({
|
|
52
|
+
'bboxes': [(output[:4] / ratio).tolist() for output in outputs[0]],
|
|
53
|
+
'scores': [output[4].item() * output[5].item() for output in outputs[0]],
|
|
54
|
+
'labels': [int(output[6]) for output in outputs[0]]
|
|
55
|
+
})
|
|
56
|
+
return results
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: Exp) -> Iterator[torch.Tensor]:
|
|
60
|
+
for image in images:
|
|
61
|
+
image_transform, _ = _val_transform(np.array(image), None, exp.test_size)
|
|
62
|
+
yield torch.from_numpy(image_transform)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _lookup_model(model_id: str, device: str) -> (YOLOX, Exp):
|
|
66
|
+
key = (model_id, device)
|
|
67
|
+
if key in _model_cache:
|
|
68
|
+
return _model_cache[key]
|
|
69
|
+
|
|
70
|
+
weights_url = f'https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/{model_id}.pth'
|
|
71
|
+
weights_file = Path(f'{env.Env.get().tmp_dir}/{model_id}.pth')
|
|
72
|
+
if not weights_file.exists():
|
|
73
|
+
_logger.info(f'Downloading weights for YOLOX model {model_id}: from {weights_url} -> {weights_file}')
|
|
74
|
+
urlretrieve(weights_url, weights_file)
|
|
75
|
+
|
|
76
|
+
exp = get_exp(exp_name=model_id)
|
|
77
|
+
model = exp.get_model().to(device)
|
|
78
|
+
|
|
79
|
+
model.eval()
|
|
80
|
+
model.head.training = False
|
|
81
|
+
model.training = False
|
|
82
|
+
|
|
83
|
+
# Load in the weights from training
|
|
84
|
+
weights = torch.load(weights_file, map_location=torch.device(device))
|
|
85
|
+
model.load_state_dict(weights['model'])
|
|
86
|
+
|
|
87
|
+
_model_cache[key] = (model, exp)
|
|
88
|
+
return model, exp
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
_model_cache = {}
|
|
92
|
+
_val_transform = ValTransform(legacy=False)
|