pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,594 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import hashlib
5
+ import importlib
6
+ import inspect
7
+ import json
8
+ import sys
9
+ import typing
10
+ from itertools import islice
11
+ from typing import Union, Optional, List, Callable, Any, Dict, Tuple, Set, Generator, Type
12
+ from uuid import UUID
13
+
14
+ import sqlalchemy as sql
15
+
16
+ import pixeltable
17
+ import pixeltable.catalog as catalog
18
+ import pixeltable.exceptions as excs
19
+ import pixeltable.type_system as ts
20
+ import pixeltable.func as func
21
+ from .data_row import DataRow
22
+ from .globals import ComparisonOperator, LogicalOperator, LiteralPythonTypes, ArithmeticOperator
23
+
24
+
25
+ class ExprScope:
26
+ """
27
+ Representation of the scope in which an Expr needs to be evaluated. Used to determine nesting of scopes.
28
+ parent is None: outermost scope
29
+ """
30
+ def __init__(self, parent: Optional[ExprScope]):
31
+ self.parent = parent
32
+
33
+ def is_contained_in(self, other: ExprScope) -> bool:
34
+ if self == other:
35
+ return True
36
+ if self.parent is None:
37
+ return False
38
+ return self.parent.is_contained_in(other)
39
+
40
+
41
+ _GLOBAL_SCOPE = ExprScope(None)
42
+
43
+
44
+ class Expr(abc.ABC):
45
+ """
46
+ Rules for using state in subclasses:
47
+ - all state except for components and slot_idx is shared between copies of an Expr
48
+ - slot_idx is set during analysis (DataFrame.show())
49
+ - during eval(), components can only be accessed via self.components; any Exprs outside of that won't
50
+ have slot_idx set
51
+ """
52
+ def __init__(self, col_type: ts.ColumnType):
53
+ self.col_type = col_type
54
+
55
+ # each instance has an id that is used for equality comparisons
56
+ # - set by the subclass's __init__()
57
+ # - produced by _create_id()
58
+ # - not expected to survive a serialize()/deserialize() roundtrip
59
+ self.id: Optional[int] = None
60
+
61
+ # index of the expr's value in the data row:
62
+ # - set for all materialized exprs
63
+ # - None: not executable
64
+ # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
65
+ self.slot_idx: Optional[int] = None
66
+ self.components: List[Expr] = [] # the subexprs that are needed to construct this expr
67
+
68
+ def dependencies(self) -> List[Expr]:
69
+ """
70
+ Returns all exprs that need to have been evaluated before eval() can be called on this one.
71
+ """
72
+ return self.components
73
+
74
+ def scope(self) -> ExprScope:
75
+ # by default this is the innermost scope of any of our components
76
+ result = _GLOBAL_SCOPE
77
+ for c in self.components:
78
+ c_scope = c.scope()
79
+ if c_scope.is_contained_in(result):
80
+ result = c_scope
81
+ return result
82
+
83
+ def bind_rel_paths(self, mapper: Optional['pixeltable.exprs.JsonMapper'] = None) -> None:
84
+ """
85
+ Binds relative JsonPaths to mapper.
86
+ This needs to be done in a separate phase after __init__(), because RelativeJsonPath()(-1) cannot be resolved
87
+ by the immediately containing JsonMapper during initialization.
88
+ """
89
+ for c in self.components:
90
+ c.bind_rel_paths(mapper)
91
+
92
+ def default_column_name(self) -> Optional[str]:
93
+ """
94
+ Returns:
95
+ None if this expression lacks a default name,
96
+ or a valid identifier (according to catalog.is_valid_identifer) otherwise.
97
+ """
98
+ return None
99
+
100
+ def equals(self, other: Expr) -> bool:
101
+ """
102
+ Subclass-specific comparison. Implemented as a function because __eq__() is needed to construct Comparisons.
103
+ """
104
+ if type(self) != type(other):
105
+ return False
106
+ if len(self.components) != len(other.components):
107
+ return False
108
+ for i in range(len(self.components)):
109
+ if not self.components[i].equals(other.components[i]):
110
+ return False
111
+ return self._equals(other)
112
+
113
+ def _equals(self, other: Expr) -> bool:
114
+ # we already compared the type and components in equals(); subclasses that require additional comparisons
115
+ # override this
116
+ return True
117
+
118
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
119
+ """Returns attribute name/value pairs that are used to construct the instance id.
120
+
121
+ Attribute values must be immutable and have str() defined.
122
+ """
123
+ return [('classname', self.__class__.__name__)]
124
+
125
+ def _create_id(self) -> int:
126
+ hasher = hashlib.sha256()
127
+ for attr, value in self._id_attrs():
128
+ hasher.update(attr.encode('utf-8'))
129
+ hasher.update(str(value).encode('utf-8'))
130
+ for expr in self.components:
131
+ hasher.update(str(expr.id).encode('utf-8'))
132
+ # truncate to machine's word size
133
+ return int(hasher.hexdigest(), 16) & sys.maxsize
134
+
135
+ def __hash__(self) -> int:
136
+ assert self.id is not None
137
+ return self.id
138
+
139
+ @classmethod
140
+ def list_equals(cls, a: List[Expr], b: List[Expr]) -> bool:
141
+ if len(a) != len(b):
142
+ return False
143
+ for i in range(len(a)):
144
+ if not a[i].equals(b[i]):
145
+ return False
146
+ return True
147
+
148
+ def copy(self) -> Expr:
149
+ """
150
+ Creates a copy that can be evaluated separately: it doesn't share any eval context (slot_idx)
151
+ but shares everything else (catalog objects, etc.)
152
+ """
153
+ cls = self.__class__
154
+ result = cls.__new__(cls)
155
+ result.__dict__.update(self.__dict__)
156
+ result.slot_idx = None
157
+ result.components = [c.copy() for c in self.components]
158
+ return result
159
+
160
+ @classmethod
161
+ def copy_list(cls, expr_list: List[Expr]) -> List[Expr]:
162
+ return [e.copy() for e in expr_list]
163
+
164
+ def __deepcopy__(self, memo=None) -> Expr:
165
+ # we don't need to create an actual deep copy because all state other than execution state is read-only
166
+ if memo is None:
167
+ memo = {}
168
+ result = self.copy()
169
+ memo[id(self)] = result
170
+ return result
171
+
172
+ def substitute(self, old: Expr, new: Expr) -> Expr:
173
+ """
174
+ Replace 'old' with 'new' recursively.
175
+ """
176
+ if self.equals(old):
177
+ return new.copy()
178
+ for i in range(len(self.components)):
179
+ self.components[i] = self.components[i].substitute(old, new)
180
+ return self
181
+
182
+ def resolve_computed_cols(self, resolve_cols: Optional[Set[catalog.Column]] = None) -> Expr:
183
+ """
184
+ Recursively replace ColRefs to unstored computed columns with their value exprs.
185
+ Also replaces references to stored computed columns in resolve_cols.
186
+ """
187
+ from .expr_set import ExprSet
188
+ from .column_ref import ColumnRef
189
+ if resolve_cols is None:
190
+ resolve_cols = set()
191
+ result = self
192
+ while True:
193
+ target_col_refs = ExprSet([
194
+ e for e in result.subexprs()
195
+ if isinstance(e, ColumnRef) and e.col.is_computed and (not e.col.is_stored or e.col in resolve_cols)
196
+ ])
197
+ if len(target_col_refs) == 0:
198
+ return result
199
+ for ref in target_col_refs:
200
+ assert ref.col.value_expr is not None
201
+ result = result.substitute(ref, ref.col.value_expr)
202
+
203
+ def is_bound_by(self, tbl: catalog.TableVersionPath) -> bool:
204
+ """Returns True if this expr can be evaluated in the context of tbl."""
205
+ from .column_ref import ColumnRef
206
+ col_refs = self.subexprs(ColumnRef)
207
+ for col_ref in col_refs:
208
+ if not tbl.has_column(col_ref.col):
209
+ return False
210
+ return True
211
+
212
+ def retarget(self, tbl: catalog.TableVersionPath) -> Expr:
213
+ """Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
214
+ tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
215
+ return self._retarget(tbl_versions)
216
+
217
+ def _retarget(self, tbl_versions: Dict[UUID, catalog.TableVersion]) -> Expr:
218
+ from .column_ref import ColumnRef
219
+ if isinstance(self, ColumnRef):
220
+ target = tbl_versions[self.col.tbl.id]
221
+ assert self.col.id in target.cols_by_id
222
+ col = target.cols_by_id[self.col.id]
223
+ return ColumnRef(col)
224
+ for i in range (len(self.components)):
225
+ self.components[i] = self.components[i]._retarget(tbl_versions)
226
+ return self
227
+
228
+ @classmethod
229
+ def list_substitute(cls, expr_list: List[Expr], old: Expr, new: Expr) -> None:
230
+ for i in range(len(expr_list)):
231
+ expr_list[i] = expr_list[i].substitute(old, new)
232
+
233
+ @abc.abstractmethod
234
+ def __str__(self) -> str:
235
+ pass
236
+
237
+ def display_str(self, inline: bool = True) -> str:
238
+ """
239
+ inline: if False, use line breaks where appropriate; otherwise don't use linebreaks
240
+ """
241
+ return str(self)
242
+
243
+ @classmethod
244
+ def print_list(cls, expr_list: List[Expr]) -> str:
245
+ if len(expr_list) == 1:
246
+ return str(expr_list[0])
247
+ return f'({", ".join([str(e) for e in expr_list])})'
248
+
249
+ def subexprs(
250
+ self, expr_class: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None,
251
+ traverse_matches: bool = True
252
+ ) -> Generator[Expr, None, None]:
253
+ """
254
+ Iterate over all subexprs, including self.
255
+ """
256
+ assert expr_class is None or filter is None # at most one of them
257
+ if expr_class is not None:
258
+ filter = lambda e: isinstance(e, expr_class)
259
+ is_match = filter is None or filter(self)
260
+ if not is_match or traverse_matches:
261
+ for c in self.components:
262
+ yield from c.subexprs(filter=filter, traverse_matches=traverse_matches)
263
+ if is_match:
264
+ yield self
265
+
266
+ def contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
267
+ """
268
+ Returns True if any subexpr is an instance of cls.
269
+ """
270
+ assert (cls is not None) != (filter is not None) # need one of them
271
+ if cls is not None:
272
+ filter = lambda e: isinstance(e, cls)
273
+ try:
274
+ _ = next(self.subexprs(filter=filter, traverse_matches=False))
275
+ return True
276
+ except StopIteration:
277
+ return False
278
+
279
+ @classmethod
280
+ def list_subexprs(
281
+ cls, expr_list: List[Expr], expr_class: Optional[Type[Expr]] = None,
282
+ filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
283
+ ) -> Generator[Expr, None, None]:
284
+ """Produce subexprs for all exprs in list. Can contain duplicates."""
285
+ for e in expr_list:
286
+ yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
287
+
288
+ def tbl_ids(self) -> Set[UUID]:
289
+ """Returns table ids referenced by this expr."""
290
+ from .column_ref import ColumnRef
291
+ from .rowid_ref import RowidRef
292
+ return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
293
+
294
+ @classmethod
295
+ def list_tbl_ids(cls, expr_list: List[Expr]) -> Set[UUID]:
296
+ ids: Set[UUID] = set()
297
+ for e in expr_list:
298
+ ids.update(e.tbl_ids())
299
+ return ids
300
+
301
+ @classmethod
302
+ def from_object(cls, o: object) -> Optional[Expr]:
303
+ """
304
+ Try to turn a literal object into an Expr.
305
+ """
306
+ if isinstance(o, Expr):
307
+ return o
308
+ # try to create a literal
309
+ obj_type = ts.ColumnType.infer_literal_type(o)
310
+ if obj_type is not None:
311
+ from .literal import Literal
312
+ return Literal(o, col_type=obj_type)
313
+ if isinstance(o, dict):
314
+ from .inline_dict import InlineDict
315
+ return InlineDict(o)
316
+ elif isinstance(o, list):
317
+ from .inline_array import InlineArray
318
+ return InlineArray(tuple(o))
319
+ return None
320
+
321
+ @abc.abstractmethod
322
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
323
+ """
324
+ If this expr can be materialized directly in SQL:
325
+ - returns a ClauseElement
326
+ - eval() will not be called (exception: Literal)
327
+ Otherwise
328
+ - returns None
329
+ - eval() will be called
330
+ """
331
+ pass
332
+
333
+ @abc.abstractmethod
334
+ def eval(self, data_row: DataRow, row_builder: 'pixeltable.exprs.RowBuilder') -> None:
335
+ """
336
+ Compute the expr value for data_row and store the result in data_row[slot_idx].
337
+ Not called if sql_expr() != None (exception: Literal).
338
+ """
339
+ pass
340
+
341
+ def release(self) -> None:
342
+ """
343
+ Allow Expr class to tear down execution state. This is called after the last eval() call.
344
+ """
345
+ for c in self.components:
346
+ c.release()
347
+
348
+ @classmethod
349
+ def release_list(cls, expr_list: List[Expr]) -> None:
350
+ for e in expr_list:
351
+ e.release()
352
+
353
+ def serialize(self) -> str:
354
+ return json.dumps(self.as_dict())
355
+
356
+ def as_dict(self) -> Dict:
357
+ """
358
+ Turn Expr object into a dict that can be passed to json.dumps().
359
+ Subclasses override _as_dict().
360
+ """
361
+ return {
362
+ '_classname': self.__class__.__name__,
363
+ **self._as_dict(),
364
+ }
365
+
366
+ @classmethod
367
+ def as_dict_list(self, expr_list: List[Expr]) -> List[Dict]:
368
+ return [e.as_dict() for e in expr_list]
369
+
370
+ def _as_dict(self) -> Dict:
371
+ if len(self.components) > 0:
372
+ return {'components': [c.as_dict() for c in self.components]}
373
+ return {}
374
+
375
+ @classmethod
376
+ def deserialize(cls, dict_str: str) -> Expr:
377
+ return cls.from_dict(json.loads(dict_str))
378
+
379
+ @classmethod
380
+ def from_dict(cls, d: Dict) -> Expr:
381
+ """
382
+ Turn dict that was produced by calling Expr.as_dict() into an instance of the correct Expr subclass.
383
+ """
384
+ assert '_classname' in d
385
+ exprs_module = importlib.import_module(cls.__module__.rsplit('.', 1)[0])
386
+ type_class = getattr(exprs_module, d['_classname'])
387
+ components: List[Expr] = []
388
+ if 'components' in d:
389
+ components = [cls.from_dict(component_dict) for component_dict in d['components']]
390
+ return type_class._from_dict(d, components)
391
+
392
+ @classmethod
393
+ def from_dict_list(cls, dict_list: List[Dict]) -> List[Expr]:
394
+ return [cls.from_dict(d) for d in dict_list]
395
+
396
+ @classmethod
397
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
398
+ assert False, 'not implemented'
399
+
400
+ def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
401
+ from .in_predicate import InPredicate
402
+ if isinstance(value_set, Expr):
403
+ return InPredicate(self, value_set_expr=value_set)
404
+ else:
405
+ return InPredicate(self, value_set_literal=value_set)
406
+
407
+ def astype(self, new_type: ts.ColumnType) -> 'pixeltable.exprs.TypeCast':
408
+ from pixeltable.exprs import TypeCast
409
+ return TypeCast(self, new_type)
410
+
411
+ def apply(self, fn: Callable, *, col_type: Optional[ts.ColumnType] = None) -> 'pixeltable.exprs.FunctionCall':
412
+ function = self._make_applicator_function(fn, col_type)
413
+ # Return a `FunctionCall` obtained by passing this `Expr` to the new `function`.
414
+ return function(self)
415
+
416
+ def __getitem__(self, index: object) -> Expr:
417
+ if self.col_type.is_json_type():
418
+ from .json_path import JsonPath
419
+ return JsonPath(self).__getitem__(index)
420
+ if self.col_type.is_array_type():
421
+ from .array_slice import ArraySlice
422
+ return ArraySlice(self, index)
423
+ raise excs.Error(f'Type {self.col_type} is not subscriptable')
424
+
425
+ def __getattr__(self, name: str) -> Union['pixeltable.exprs.ImageMemberAccess', 'pixeltable.exprs.JsonPath']:
426
+ """
427
+ ex.: <img col>.rotate(60)
428
+ """
429
+ if self.col_type.is_image_type():
430
+ from .image_member_access import ImageMemberAccess
431
+ return ImageMemberAccess(name, self)
432
+ if self.col_type.is_json_type():
433
+ from .json_path import JsonPath
434
+ return JsonPath(self).__getattr__(name)
435
+ raise excs.Error(f'Member access not supported on type {self.col_type}: {name}')
436
+
437
+ def __bool__(self) -> bool:
438
+ raise TypeError(
439
+ 'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)')
440
+
441
+ def __lt__(self, other: object) -> 'pixeltable.exprs.Comparison':
442
+ return self._make_comparison(ComparisonOperator.LT, other)
443
+
444
+ def __le__(self, other: object) -> 'pixeltable.exprs.Comparison':
445
+ return self._make_comparison(ComparisonOperator.LE, other)
446
+
447
+ def __eq__(self, other: object) -> 'pixeltable.exprs.Comparison':
448
+ if other is None:
449
+ from .is_null import IsNull
450
+ return IsNull(self)
451
+ return self._make_comparison(ComparisonOperator.EQ, other)
452
+
453
+ def __ne__(self, other: object) -> 'pixeltable.exprs.Comparison':
454
+ if other is None:
455
+ from .compound_predicate import CompoundPredicate
456
+ from .is_null import IsNull
457
+ return CompoundPredicate(LogicalOperator.NOT, [IsNull(self)])
458
+ return self._make_comparison(ComparisonOperator.NE, other)
459
+
460
+ def __gt__(self, other: object) -> 'pixeltable.exprs.Comparison':
461
+ return self._make_comparison(ComparisonOperator.GT, other)
462
+
463
+ def __ge__(self, other: object) -> 'pixeltable.exprs.Comparison':
464
+ return self._make_comparison(ComparisonOperator.GE, other)
465
+
466
+ def _make_comparison(self, op: ComparisonOperator, other: object) -> 'pixeltable.exprs.Comparison':
467
+ """
468
+ other: Union[Expr, LiteralPythonTypes]
469
+ """
470
+ # TODO: check for compatibility
471
+ from .comparison import Comparison
472
+ from .literal import Literal
473
+ if isinstance(other, Expr):
474
+ return Comparison(op, self, other)
475
+ if isinstance(other, typing.get_args(LiteralPythonTypes)):
476
+ return Comparison(op, self, Literal(other)) # type: ignore[arg-type]
477
+ raise TypeError(f'Other must be Expr or literal: {type(other)}')
478
+
479
+ def __add__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
480
+ return self._make_arithmetic_expr(ArithmeticOperator.ADD, other)
481
+
482
+ def __sub__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
483
+ return self._make_arithmetic_expr(ArithmeticOperator.SUB, other)
484
+
485
+ def __mul__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
486
+ return self._make_arithmetic_expr(ArithmeticOperator.MUL, other)
487
+
488
+ def __truediv__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
489
+ return self._make_arithmetic_expr(ArithmeticOperator.DIV, other)
490
+
491
+ def __mod__(self, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
492
+ return self._make_arithmetic_expr(ArithmeticOperator.MOD, other)
493
+
494
+ def _make_arithmetic_expr(self, op: ArithmeticOperator, other: object) -> 'pixeltable.exprs.ArithmeticExpr':
495
+ """
496
+ other: Union[Expr, LiteralPythonTypes]
497
+ """
498
+ # TODO: check for compatibility
499
+ from .arithmetic_expr import ArithmeticExpr
500
+ from .literal import Literal
501
+ if isinstance(other, Expr):
502
+ return ArithmeticExpr(op, self, other)
503
+ if isinstance(other, typing.get_args(LiteralPythonTypes)):
504
+ return ArithmeticExpr(op, self, Literal(other)) # type: ignore[arg-type]
505
+ raise TypeError(f'Other must be Expr or literal: {type(other)}')
506
+
507
+ def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'pixeltable.func.Function':
508
+ """
509
+ Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
510
+ the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
511
+
512
+ Args:
513
+ fn: The `Callable` to encapsulate. Must have at least one parameter, and at most one required
514
+ parameter.
515
+ col_type: The pixeltable result type of the new `Function`.
516
+ """
517
+ if col_type is not None:
518
+ # col_type is specified explicitly
519
+ fn_type = col_type
520
+ elif fn in _known_applicator_types:
521
+ # For convenience, various built-ins and other Python functions that don't
522
+ # have type hints are hardcoded
523
+ fn_type = _known_applicator_types[fn]
524
+ elif 'return' in typing.get_type_hints(fn):
525
+ # Attempt to infer the column type from the return type of the callable;
526
+ # this will set fn_type to None if it cannot be inferred
527
+ return_type = typing.get_type_hints(fn)['return']
528
+ fn_type = ts.ColumnType.from_python_type(return_type)
529
+ else:
530
+ # No type hint
531
+ fn_type = None
532
+
533
+ if fn_type is None:
534
+ raise excs.Error(
535
+ f'Column type of `{fn.__name__}` cannot be inferred. '
536
+ f'Use `.apply({fn.__name__}, col_type=...)` to specify.')
537
+
538
+ # TODO(aaron-siegel) Currently we assume that `fn` has exactly one required parameter
539
+ # and all optional parameters take their default values. Should we provide a more
540
+ # flexible API? For example, by defining
541
+ # expr.apply(fn, my_kw=my_arg)
542
+ # to mean: transform each x by calling
543
+ # fn(x, my_kw=my_arg)
544
+ # In the current implementation, a lambda is needed in order to specify this pattern:
545
+ # expr.apply(lambda x: fn(x, my_kw=my_arg))
546
+
547
+ try:
548
+ # If `fn` is not a builtin, we can do some basic validation to ensure it's
549
+ # compatible with `apply`.
550
+ params = inspect.signature(fn).parameters
551
+ params_iter = iter(params.values())
552
+ first_param = next(params_iter) if len(params) >= 1 else None
553
+ second_param = next(params_iter) if len(params) >= 2 else None
554
+ # Check that fn has at least one positional parameter
555
+ if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
556
+ raise excs.Error(
557
+ f'Function `{fn.__name__}` has no positional parameters.'
558
+ )
559
+ # Check that fn has at most one required parameter, i.e., its second parameter
560
+ # has no default and is not a varargs
561
+ if len(params) >= 2 and \
562
+ second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) and \
563
+ second_param.default == inspect.Parameter.empty:
564
+ raise excs.Error(
565
+ f'Function `{fn.__name__}` has multiple required parameters.'
566
+ )
567
+ except ValueError:
568
+ # inspect.signature(fn) will raise a `ValueError` if `fn` is a builtin; I don't
569
+ # know of any way to get the signature of a builtin, nor to check for this in
570
+ # advance (without the try/except pattern). For now, builtins will not be
571
+ # validated.
572
+ pass
573
+
574
+ # Since `fn` might have optional parameters, we wrap it in a lambda to get a unary
575
+ # equivalent, so that its signature is understood by `make_function`. This also
576
+ # ensures that `decorated_fn` is never a builtin.
577
+ # We also set the display_name explicitly, so that the `FunctionCall` gets the
578
+ # name of `decorated_fn`, not the lambda.
579
+ return func.make_function(
580
+ decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__)
581
+
582
+
583
+ # A dictionary of result types of various stdlib functions that are
584
+ # commonly used in computed columns. stdlib does not have type hints, so these
585
+ # are used to infer their result types (as pixeltable types) to avoid having
586
+ # to specify them explicitly in Expr.apply().
587
+ # This is purely for convenience and does not impact the supported functionality
588
+ # (it's always possible to specify a result type explicitly for a function
589
+ # that does not have type hints and is not present in this dict).
590
+ _known_applicator_types: dict[Callable, ts.ColumnType] = {
591
+ str: ts.StringType(),
592
+ json.dumps: ts.StringType(),
593
+ json.loads: ts.JsonType(),
594
+ }
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, Dict, Iterable, Iterator
3
+
4
+ from .expr import Expr
5
+
6
+
7
+ class ExprSet:
8
+ """A set that also supports indexed lookup (by slot_idx and Expr.id)"""
9
+ def __init__(self, elements: Optional[Iterable[Expr]] = None):
10
+ self.exprs: Dict[int, Expr] = {} # Expr.id -> Expr
11
+ if elements is not None:
12
+ for e in elements:
13
+ self.append(e)
14
+
15
+ def append(self, expr: Expr) -> None:
16
+ if expr.id in self.exprs:
17
+ return
18
+ self.exprs[expr.id] = expr
19
+
20
+ def extend(self, elements: Iterable[Expr]) -> None:
21
+ for e in elements:
22
+ self.append(e)
23
+
24
+ def __contains__(self, item: Expr) -> bool:
25
+ return item.id in self.exprs
26
+
27
+ def __len__(self) -> int:
28
+ return len(self.exprs)
29
+
30
+ def __iter__(self) -> Iterator[Expr]:
31
+ return iter(self.exprs.values())
32
+
33
+ def __getitem__(self, index: object) -> Optional[Expr]:
34
+ assert isinstance(index, int) or isinstance(index, Expr)
35
+ if isinstance(index, int):
36
+ # return expr with matching slot_idx
37
+ return list(self.exprs.values())[index]
38
+ else:
39
+ return self.exprs.get(index.id)