pixeltable 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,586 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import hashlib
5
+ import importlib
6
+ import inspect
7
+ import json
8
+ import sys
9
+ import typing
10
+ from itertools import islice
11
+ from typing import Union, Optional, List, Callable, Any, Dict, Tuple, Set, Generator, Type
12
+ from uuid import UUID
13
+
14
+ import sqlalchemy as sql
15
+
16
+ import pixeltable
17
+ import pixeltable.catalog as catalog
18
+ import pixeltable.exceptions as excs
19
+ import pixeltable.type_system as ts
20
+ import pixeltable.func as func
21
+ from .data_row import DataRow
22
+ from .globals import ComparisonOperator, LogicalOperator, LiteralPythonTypes, ArithmeticOperator
23
+
24
+
25
+ class ExprScope:
26
+ """
27
+ Representation of the scope in which an Expr needs to be evaluated. Used to determine nesting of scopes.
28
+ parent is None: outermost scope
29
+ """
30
+ def __init__(self, parent: Optional[ExprScope]):
31
+ self.parent = parent
32
+
33
+ def is_contained_in(self, other: ExprScope) -> bool:
34
+ if self == other:
35
+ return True
36
+ if self.parent is None:
37
+ return False
38
+ return self.parent.is_contained_in(other)
39
+
40
+
41
+ _GLOBAL_SCOPE = ExprScope(None)
42
+
43
+
44
+ class Expr(abc.ABC):
45
+ """
46
+ Rules for using state in subclasses:
47
+ - all state except for components and slot_idx is shared between copies of an Expr
48
+ - slot_idx is set during analysis (DataFrame.show())
49
+ - during eval(), components can only be accessed via self.components; any Exprs outside of that won't
50
+ have slot_idx set
51
+ """
52
+ def __init__(self, col_type: ts.ColumnType):
53
+ self.col_type = col_type
54
+
55
+ # each instance has an id that is used for equality comparisons
56
+ # - set by the subclass's __init__()
57
+ # - produced by _create_id()
58
+ # - not expected to survive a serialize()/deserialize() roundtrip
59
+ self.id: Optional[int] = None
60
+
61
+ # index of the expr's value in the data row:
62
+ # - set for all materialized exprs
63
+ # - -1: not executable
64
+ # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
65
+ self.slot_idx = -1
66
+ self.components: List[Expr] = [] # the subexprs that are needed to construct this expr
67
+
68
+ def dependencies(self) -> List[Expr]:
69
+ """
70
+ Returns all exprs that need to have been evaluated before eval() can be called on this one.
71
+ """
72
+ return self.components
73
+
74
+ def scope(self) -> ExprScope:
75
+ # by default this is the innermost scope of any of our components
76
+ result = _GLOBAL_SCOPE
77
+ for c in self.components:
78
+ c_scope = c.scope()
79
+ if c_scope.is_contained_in(result):
80
+ result = c_scope
81
+ return result
82
+
83
+ def bind_rel_paths(self, mapper: Optional['pixeltable.exprs.JsonMapper'] = None) -> None:
84
+ """
85
+ Binds relative JsonPaths to mapper.
86
+ This needs to be done in a separate phase after __init__(), because RelativeJsonPath()(-1) cannot be resolved
87
+ by the immediately containing JsonMapper during initialization.
88
+ """
89
+ for c in self.components:
90
+ c.bind_rel_paths(mapper)
91
+
92
+ def default_column_name(self) -> Optional[str]:
93
+ """
94
+ Returns:
95
+ None if this expression lacks a default name,
96
+ or a valid identifier (according to catalog.is_valid_identifer) otherwise.
97
+ """
98
+ return None
99
+
100
+ def equals(self, other: Expr) -> bool:
101
+ """
102
+ Subclass-specific comparison. Implemented as a function because __eq__() is needed to construct Comparisons.
103
+ """
104
+ if type(self) != type(other):
105
+ return False
106
+ if len(self.components) != len(other.components):
107
+ return False
108
+ for i in range(len(self.components)):
109
+ if not self.components[i].equals(other.components[i]):
110
+ return False
111
+ return self._equals(other)
112
+
113
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
114
+ """Returns attribute name/value pairs that are used to construct the instance id.
115
+
116
+ Attribute values must be immutable and have str() defined.
117
+ """
118
+ return [('classname', self.__class__.__name__)]
119
+
120
+ def _create_id(self) -> int:
121
+ hasher = hashlib.sha256()
122
+ for attr, value in self._id_attrs():
123
+ hasher.update(attr.encode('utf-8'))
124
+ hasher.update(str(value).encode('utf-8'))
125
+ for expr in self.components:
126
+ hasher.update(str(expr.id).encode('utf-8'))
127
+ # truncate to machine's word size
128
+ return int(hasher.hexdigest(), 16) & sys.maxsize
129
+
130
+ def __hash__(self) -> int:
131
+ assert self.id is not None
132
+ return self.id
133
+
134
+ @classmethod
135
+ def list_equals(cls, a: List[Expr], b: List[Expr]) -> bool:
136
+ if len(a) != len(b):
137
+ return False
138
+ for i in range(len(a)):
139
+ if not a[i].equals(b[i]):
140
+ return False
141
+ return True
142
+
143
+ def copy(self) -> Expr:
144
+ """
145
+ Creates a copy that can be evaluated separately: it doesn't share any eval context (slot_idx)
146
+ but shares everything else (catalog objects, etc.)
147
+ """
148
+ cls = self.__class__
149
+ result = cls.__new__(cls)
150
+ result.__dict__.update(self.__dict__)
151
+ result.slot_idx = -1
152
+ result.components = [c.copy() for c in self.components]
153
+ return result
154
+
155
+ @classmethod
156
+ def copy_list(cls, expr_list: List[Expr]) -> List[Expr]:
157
+ return [e.copy() for e in expr_list]
158
+
159
+ def __deepcopy__(self, memo=None) -> Expr:
160
+ # we don't need to create an actual deep copy because all state other than execution state is read-only
161
+ if memo is None:
162
+ memo = {}
163
+ result = self.copy()
164
+ memo[id(self)] = result
165
+ return result
166
+
167
+ def substitute(self, old: Expr, new: Expr) -> Expr:
168
+ """
169
+ Replace 'old' with 'new' recursively.
170
+ """
171
+ if self.equals(old):
172
+ return new.copy()
173
+ for i in range(len(self.components)):
174
+ self.components[i] = self.components[i].substitute(old, new)
175
+ return self
176
+
177
+ def resolve_computed_cols(self, resolve_cols: Optional[Set[catalog.Column]] = None) -> Expr:
178
+ """
179
+ Recursively replace ColRefs to unstored computed columns with their value exprs.
180
+ Also replaces references to stored computed columns in resolve_cols.
181
+ """
182
+ from .expr_set import ExprSet
183
+ from .column_ref import ColumnRef
184
+ if resolve_cols is None:
185
+ resolve_cols = set()
186
+ result = self
187
+ while True:
188
+ target_col_refs = ExprSet([
189
+ e for e in result.subexprs()
190
+ if isinstance(e, ColumnRef) and e.col.is_computed and (not e.col.is_stored or e.col in resolve_cols)
191
+ ])
192
+ if len(target_col_refs) == 0:
193
+ return result
194
+ for ref in target_col_refs:
195
+ assert ref.col.value_expr is not None
196
+ result = result.substitute(ref, ref.col.value_expr)
197
+
198
+ def is_bound_by(self, tbl: catalog.TableVersionPath) -> bool:
199
+ """Returns True if this expr can be evaluated in the context of tbl."""
200
+ from .column_ref import ColumnRef
201
+ col_refs = self.subexprs(ColumnRef)
202
+ for col_ref in col_refs:
203
+ if not tbl.has_column(col_ref.col):
204
+ return False
205
+ return True
206
+
207
+ def retarget(self, tbl: catalog.TableVersionPath) -> Expr:
208
+ """Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
209
+ tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
210
+ return self._retarget(tbl_versions)
211
+
212
+ def _retarget(self, tbl_versions: Dict[UUID, catalog.TableVersion]) -> Expr:
213
+ from .column_ref import ColumnRef
214
+ if isinstance(self, ColumnRef):
215
+ target = tbl_versions[self.col.tbl.id]
216
+ assert self.col.id in target.cols_by_id
217
+ col = target.cols_by_id[self.col.id]
218
+ return ColumnRef(col)
219
+ for i in range (len(self.components)):
220
+ self.components[i] = self.components[i]._retarget(tbl_versions)
221
+ return self
222
+
223
+ @classmethod
224
+ def list_substitute(cls, expr_list: List[Expr], old: Expr, new: Expr) -> None:
225
+ for i in range(len(expr_list)):
226
+ expr_list[i] = expr_list[i].substitute(old, new)
227
+
228
+ @abc.abstractmethod
229
+ def __str__(self) -> str:
230
+ pass
231
+
232
+ def display_str(self, inline: bool = True) -> str:
233
+ """
234
+ inline: if False, use line breaks where appropriate; otherwise don't use linebreaks
235
+ """
236
+ return str(self)
237
+
238
+ @classmethod
239
+ def print_list(cls, expr_list: List[Expr]) -> str:
240
+ if len(expr_list) == 1:
241
+ return str(expr_list[0])
242
+ return f'({", ".join([str(e) for e in expr_list])})'
243
+
244
+ def subexprs(
245
+ self, expr_class: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None,
246
+ traverse_matches: bool = True
247
+ ) -> Generator[Expr, None, None]:
248
+ """
249
+ Iterate over all subexprs, including self.
250
+ """
251
+ assert expr_class is None or filter is None # at most one of them
252
+ if expr_class is not None:
253
+ filter = lambda e: isinstance(e, expr_class)
254
+ is_match = filter is None or filter(self)
255
+ if not is_match or traverse_matches:
256
+ for c in self.components:
257
+ yield from c.subexprs(filter=filter, traverse_matches=traverse_matches)
258
+ if is_match:
259
+ yield self
260
+
261
+ def contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
262
+ """
263
+ Returns True if any subexpr is an instance of cls.
264
+ """
265
+ assert (cls is not None) != (filter is not None) # need one of them
266
+ if cls is not None:
267
+ filter = lambda e: isinstance(e, cls)
268
+ try:
269
+ _ = next(self.subexprs(filter=filter, traverse_matches=False))
270
+ return True
271
+ except StopIteration:
272
+ return False
273
+
274
+ @classmethod
275
+ def list_subexprs(
276
+ cls, expr_list: List[Expr], expr_class: Optional[Type[Expr]] = None,
277
+ filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
278
+ ) -> Generator[Expr, None, None]:
279
+ """Produce subexprs for all exprs in list. Can contain duplicates."""
280
+ for e in expr_list:
281
+ yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
282
+
283
+ def tbl_ids(self) -> Set[UUID]:
284
+ """Returns table ids referenced by this expr."""
285
+ from .column_ref import ColumnRef
286
+ from .rowid_ref import RowidRef
287
+ return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
288
+
289
+ @classmethod
290
+ def list_tbl_ids(cls, expr_list: List[Expr]) -> Set[UUID]:
291
+ ids: Set[UUID] = set()
292
+ for e in expr_list:
293
+ ids.update(e.tbl_ids())
294
+ return ids
295
+
296
+ @classmethod
297
+ def from_object(cls, o: object) -> Optional[Expr]:
298
+ """
299
+ Try to turn a literal object into an Expr.
300
+ """
301
+ if isinstance(o, Expr):
302
+ return o
303
+ # try to create a literal
304
+ obj_type = ts.ColumnType.infer_literal_type(o)
305
+ if obj_type is not None:
306
+ from .literal import Literal
307
+ return Literal(o, col_type=obj_type)
308
+ if isinstance(o, dict):
309
+ from .inline_dict import InlineDict
310
+ return InlineDict(o)
311
+ elif isinstance(o, list):
312
+ from .inline_array import InlineArray
313
+ return InlineArray(tuple(o))
314
+ return None
315
+
316
+ @abc.abstractmethod
317
+ def _equals(self, other: Expr) -> bool:
318
+ pass
319
+
320
+ @abc.abstractmethod
321
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
322
+ """
323
+ If this expr can be materialized directly in SQL:
324
+ - returns a ClauseElement
325
+ - eval() will not be called (exception: Literal)
326
+ Otherwise
327
+ - returns None
328
+ - eval() will be called
329
+ """
330
+ pass
331
+
332
+ @abc.abstractmethod
333
+ def eval(self, data_row: DataRow, row_builder: 'pixeltable.exprs.RowBuilder') -> None:
334
+ """
335
+ Compute the expr value for data_row and store the result in data_row[slot_idx].
336
+ Not called if sql_expr() != None (exception: Literal).
337
+ """
338
+ pass
339
+
340
+ def release(self) -> None:
341
+ """
342
+ Allow Expr class to tear down execution state. This is called after the last eval() call.
343
+ """
344
+ for c in self.components:
345
+ c.release()
346
+
347
+ @classmethod
348
+ def release_list(cls, expr_list: List[Expr]) -> None:
349
+ for e in expr_list:
350
+ e.release()
351
+
352
+ def serialize(self) -> str:
353
+ return json.dumps(self.as_dict())
354
+
355
+ def as_dict(self) -> Dict:
356
+ """
357
+ Turn Expr object into a dict that can be passed to json.dumps().
358
+ Subclasses override _as_dict().
359
+ """
360
+ return {
361
+ '_classname': self.__class__.__name__,
362
+ **self._as_dict(),
363
+ }
364
+
365
+ @classmethod
366
+ def as_dict_list(self, expr_list: List[Expr]) -> List[Dict]:
367
+ return [e.as_dict() for e in expr_list]
368
+
369
+ def _as_dict(self) -> Dict:
370
+ if len(self.components) > 0:
371
+ return {'components': [c.as_dict() for c in self.components]}
372
+ return {}
373
+
374
+ @classmethod
375
+ def deserialize(cls, dict_str: str) -> Expr:
376
+ return cls.from_dict(json.loads(dict_str))
377
+
378
+ @classmethod
379
+ def from_dict(cls, d: Dict) -> Expr:
380
+ """
381
+ Turn dict that was produced by calling Expr.as_dict() into an instance of the correct Expr subclass.
382
+ """
383
+ assert '_classname' in d
384
+ exprs_module = importlib.import_module(cls.__module__.rsplit('.', 1)[0])
385
+ type_class = getattr(exprs_module, d['_classname'])
386
+ components: List[Expr] = []
387
+ if 'components' in d:
388
+ components = [cls.from_dict(component_dict) for component_dict in d['components']]
389
+ return type_class._from_dict(d, components)
390
+
391
+ @classmethod
392
+ def from_dict_list(cls, dict_list: List[Dict]) -> List[Expr]:
393
+ return [cls.from_dict(d) for d in dict_list]
394
+
395
+ @classmethod
396
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
397
+ assert False, 'not implemented'
398
+
399
+ def astype(self, new_type: ts.ColumnType) -> 'pixeltable.exprs.TypeCast':
400
+ from pixeltable.exprs import TypeCast
401
+ return TypeCast(self, new_type)
402
+
403
+ def apply(self, fn: Callable, *, col_type: Optional[ts.ColumnType] = None) -> 'pixeltable.exprs.FunctionCall':
404
+ function = self._make_applicator_function(fn, col_type)
405
+ # Return a `FunctionCall` obtained by passing this `Expr` to the new `function`.
406
+ return function(self)
407
+
408
+ def __getitem__(self, index: object) -> Expr:
409
+ if self.col_type.is_json_type():
410
+ from .json_path import JsonPath
411
+ return JsonPath(self).__getitem__(index)
412
+ if self.col_type.is_array_type():
413
+ from .array_slice import ArraySlice
414
+ return ArraySlice(self, index)
415
+ raise excs.Error(f'Type {self.col_type} is not subscriptable')
416
+
417
+ def __getattr__(self, name: str) -> Union['pixeltable.exprs.ImageMemberAccess', 'pixeltable.exprs.JsonPath']:
418
+ """
419
+ ex.: <img col>.rotate(60)
420
+ """
421
+ if self.col_type.is_image_type():
422
+ from .image_member_access import ImageMemberAccess
423
+ return ImageMemberAccess(name, self)
424
+ if self.col_type.is_json_type():
425
+ from .json_path import JsonPath
426
+ return JsonPath(self).__getattr__(name)
427
+ raise excs.Error(f'Member access not supported on type {self.col_type}: {name}')
428
+
429
+ def __bool__(self) -> bool:
430
+ raise TypeError(
431
+ 'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)')
432
+
433
+ def __lt__(self, other: object) -> 'pixeltable.exprs.Comparison':
434
+ return self._make_comparison(ComparisonOperator.LT, other)
435
+
436
+ def __le__(self, other: object) -> 'pixeltable.exprs.Comparison':
437
+ return self._make_comparison(ComparisonOperator.LE, other)
438
+
439
+ def __eq__(self, other: object) -> 'pixeltable.exprs.Comparison':
440
+ if other is None:
441
+ from .is_null import IsNull
442
+ return IsNull(self)
443
+ return self._make_comparison(ComparisonOperator.EQ, other)
444
+
445
+ def __ne__(self, other: object) -> 'pixeltable.exprs.Comparison':
446
+ if other is None:
447
+ from .compound_predicate import CompoundPredicate
448
+ from .is_null import IsNull
449
+ return CompoundPredicate(LogicalOperator.NOT, [IsNull(self)])
450
+ return self._make_comparison(ComparisonOperator.NE, other)
451
+
452
+ def __gt__(self, other: object) -> 'pixeltable.exprs.Comparison':
453
+ return self._make_comparison(ComparisonOperator.GT, other)
454
+
455
+ def __ge__(self, other: object) -> 'pixeltable.exprs.Comparison':
456
+ return self._make_comparison(ComparisonOperator.GE, other)
457
+
458
+ def _make_comparison(self, op: ComparisonOperator, other: object) -> 'pixeltable.exprs.Comparison':
459
+ """
460
+ other: Union[Expr, LiteralPythonTypes]
461
+ """
462
+ # TODO: check for compatibility
463
+ from .comparison import Comparison
464
+ from .literal import Literal
465
+ if isinstance(other, Expr):
466
+ return Comparison(op, self, other)
467
+ if isinstance(other, typing.get_args(LiteralPythonTypes)):
468
+ return Comparison(op, self, Literal(other)) # type: ignore[arg-type]
469
+ raise TypeError(f'Other must be Expr or literal: {type(other)}')
470
+
471
+ def __add__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
472
+ return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
473
+
474
+ def __sub__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
475
+ return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
476
+
477
+ def __mul__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
478
+ return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
479
+
480
+ def __truediv__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
481
+ return self._make_arithmetic_expr(ArithmeticOperator.DIV, other)
482
+
483
+ def __mod__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
484
+ return self._make_arithmetic_expr(ArithmeticOperator.MOD, other)
485
+
486
+ def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
487
+ """
488
+ other: Union[Expr, LiteralPythonTypes]
489
+ """
490
+ # TODO: check for compatibility
491
+ from .arithmetic_expr import ArithmeticExpr
492
+ from .literal import Literal
493
+ if isinstance(other, Expr):
494
+ return ArithmeticExpr(op, self, other)
495
+ if isinstance(other, typing.get_args(LiteralPythonTypes)):
496
+ return ArithmeticExpr(op, self, Literal(other)) # type: ignore[arg-type]
497
+ raise TypeError(f'Other must be Expr or literal: {type(other)}')
498
+
499
+ def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'pixeltable.func.Function':
500
+ """
501
+ Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
502
+ the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
503
+
504
+ Args:
505
+ fn: The `Callable` to encapsulate. Must have at least one parameter, and at most one required
506
+ parameter.
507
+ col_type: The pixeltable result type of the new `Function`.
508
+ """
509
+ if col_type is not None:
510
+ # col_type is specified explicitly
511
+ fn_type = col_type
512
+ elif fn in _known_applicator_types:
513
+ # For convenience, various built-ins and other Python functions that don't
514
+ # have type hints are hardcoded
515
+ fn_type = _known_applicator_types[fn]
516
+ elif 'return' in typing.get_type_hints(fn):
517
+ # Attempt to infer the column type from the return type of the callable;
518
+ # this will set fn_type to None if it cannot be inferred
519
+ return_type = typing.get_type_hints(fn)['return']
520
+ fn_type = ts.ColumnType.from_python_type(return_type)
521
+ else:
522
+ # No type hint
523
+ fn_type = None
524
+
525
+ if fn_type is None:
526
+ raise excs.Error(
527
+ f'Column type of `{fn.__name__}` cannot be inferred. '
528
+ f'Use `.apply({fn.__name__}, col_type=...)` to specify.')
529
+
530
+ # TODO(aaron-siegel) Currently we assume that `fn` has exactly one required parameter
531
+ # and all optional parameters take their default values. Should we provide a more
532
+ # flexible API? For example, by defining
533
+ # expr.apply(fn, my_kw=my_arg)
534
+ # to mean: transform each x by calling
535
+ # fn(x, my_kw=my_arg)
536
+ # In the current implementation, a lambda is needed in order to specify this pattern:
537
+ # expr.apply(lambda x: fn(x, my_kw=my_arg))
538
+
539
+ try:
540
+ # If `fn` is not a builtin, we can do some basic validation to ensure it's
541
+ # compatible with `apply`.
542
+ params = inspect.signature(fn).parameters
543
+ params_iter = iter(params.values())
544
+ first_param = next(params_iter) if len(params) >= 1 else None
545
+ second_param = next(params_iter) if len(params) >= 2 else None
546
+ # Check that fn has at least one positional parameter
547
+ if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
548
+ raise excs.Error(
549
+ f'Function `{fn.__name__}` has no positional parameters.'
550
+ )
551
+ # Check that fn has at most one required parameter, i.e., its second parameter
552
+ # has no default and is not a varargs
553
+ if len(params) >= 2 and \
554
+ second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) and \
555
+ second_param.default == inspect.Parameter.empty:
556
+ raise excs.Error(
557
+ f'Function `{fn.__name__}` has multiple required parameters.'
558
+ )
559
+ except ValueError:
560
+ # inspect.signature(fn) will raise a `ValueError` if `fn` is a builtin; I don't
561
+ # know of any way to get the signature of a builtin, nor to check for this in
562
+ # advance (without the try/except pattern). For now, builtins will not be
563
+ # validated.
564
+ pass
565
+
566
+ # Since `fn` might have optional parameters, we wrap it in a lambda to get a unary
567
+ # equivalent, so that its signature is understood by `make_function`. This also
568
+ # ensures that `decorated_fn` is never a builtin.
569
+ # We also set the display_name explicitly, so that the `FunctionCall` gets the
570
+ # name of `decorated_fn`, not the lambda.
571
+ return func.make_function(
572
+ decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__)
573
+
574
+
575
+ # A dictionary of result types of various stdlib functions that are
576
+ # commonly used in computed columns. stdlib does not have type hints, so these
577
+ # are used to infer their result types (as pixeltable types) to avoid having
578
+ # to specify them explicitly in Expr.apply().
579
+ # This is purely for convenience and does not impact the supported functionality
580
+ # (it's always possible to specify a result type explicitly for a function
581
+ # that does not have type hints and is not present in this dict).
582
+ _known_applicator_types: dict[Callable, ts.ColumnType] = {
583
+ str: ts.StringType(),
584
+ json.dumps: ts.StringType(),
585
+ json.loads: ts.JsonType(),
586
+ }
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, Dict, Iterable, Iterator
3
+
4
+ from .expr import Expr
5
+
6
+
7
+ class ExprSet:
8
+ """A set that also supports indexed lookup (by slot_idx and Expr.id)"""
9
+ def __init__(self, elements: Optional[Iterable[Expr]] = None):
10
+ self.exprs: Dict[int, Expr] = {} # Expr.id -> Expr
11
+ if elements is not None:
12
+ for e in elements:
13
+ self.append(e)
14
+
15
+ def append(self, expr: Expr) -> None:
16
+ if expr.id in self.exprs:
17
+ return
18
+ self.exprs[expr.id] = expr
19
+
20
+ def extend(self, elements: Iterable[Expr]) -> None:
21
+ for e in elements:
22
+ self.append(e)
23
+
24
+ def __contains__(self, item: Expr) -> bool:
25
+ return item.id in self.exprs
26
+
27
+ def __len__(self) -> int:
28
+ return len(self.exprs)
29
+
30
+ def __iter__(self) -> Iterator[Expr]:
31
+ return iter(self.exprs.values())
32
+
33
+ def __getitem__(self, index: object) -> Optional[Expr]:
34
+ assert isinstance(index, int) or isinstance(index, Expr)
35
+ if isinstance(index, int):
36
+ # return expr with matching slot_idx
37
+ return list(self.exprs.values())[index]
38
+ else:
39
+ return self.exprs.get(index.id)