pixeltable 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (150) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +22 -12
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +121 -101
  14. pixeltable/catalog/table_version.py +291 -142
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +67 -26
  17. pixeltable/dataframe.py +106 -81
  18. pixeltable/env.py +28 -24
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -9
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +13 -7
  27. pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
  28. pixeltable/exec/expr_eval/globals.py +30 -7
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +151 -31
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +108 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +32 -17
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +16 -12
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +231 -113
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +60 -26
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +2 -1
  101. pixeltable/io/label_studio.py +77 -68
  102. pixeltable/io/pandas.py +36 -23
  103. pixeltable/io/parquet.py +9 -12
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +7 -1
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/share/__init__.py +0 -0
  128. pixeltable/share/packager.py +218 -0
  129. pixeltable/store.py +42 -26
  130. pixeltable/type_system.py +102 -75
  131. pixeltable/utils/arrow.py +7 -8
  132. pixeltable/utils/coco.py +16 -17
  133. pixeltable/utils/code.py +1 -1
  134. pixeltable/utils/console_output.py +6 -3
  135. pixeltable/utils/description_helper.py +7 -7
  136. pixeltable/utils/documents.py +3 -1
  137. pixeltable/utils/filecache.py +12 -7
  138. pixeltable/utils/http_server.py +9 -8
  139. pixeltable/utils/iceberg.py +14 -0
  140. pixeltable/utils/media_store.py +3 -2
  141. pixeltable/utils/pytorch.py +11 -14
  142. pixeltable/utils/s3.py +1 -0
  143. pixeltable/utils/sql.py +1 -0
  144. pixeltable/utils/transactional_directory.py +2 -2
  145. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/METADATA +9 -9
  146. pixeltable-0.3.4.dist-info/RECORD +166 -0
  147. pixeltable-0.3.2.dist-info/RECORD +0 -161
  148. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/LICENSE +0 -0
  149. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/WHEEL +0 -0
  150. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/entry_points.txt +0 -0
@@ -26,7 +26,8 @@ class InPredicate(Expr):
26
26
  if value_set_expr is not None:
27
27
  if not value_set_expr.col_type.is_json_type():
28
28
  raise excs.Error(
29
- f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}')
29
+ f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}'
30
+ )
30
31
  self.components = [lhs.copy(), value_set_expr.copy()]
31
32
  else:
32
33
  assert value_set_literal is not None
@@ -95,4 +96,3 @@ class InPredicate(Expr):
95
96
  assert 'value_list' in d
96
97
  assert len(components) <= 2
97
98
  return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
98
-
@@ -45,8 +45,7 @@ class InlineArray(Expr):
45
45
  elif inferred_element_type.is_array_type():
46
46
  assert isinstance(inferred_element_type, ts.ArrayType)
47
47
  col_type = ts.ArrayType(
48
- (len(exprs), *inferred_element_type.shape),
49
- ts.ColumnType.make_type(inferred_element_type.dtype)
48
+ (len(exprs), *inferred_element_type.shape), ts.ColumnType.make_type(inferred_element_type.dtype)
50
49
  )
51
50
  else:
52
51
  raise excs.Error(f'Element type is not a valid dtype for an array: {inferred_element_type}')
@@ -82,9 +81,14 @@ class InlineArray(Expr):
82
81
  # loaded and their types are known.
83
82
  return InlineList(components) # type: ignore[return-value]
84
83
 
85
- def _as_constant(self) -> Optional[np.ndarray]:
84
+ def as_literal(self) -> Optional[Literal]:
86
85
  assert isinstance(self.col_type, ts.ArrayType)
87
- return np.array([c.as_constant() for c in self.components], dtype=self.col_type.numpy_dtype())
86
+ if not all(isinstance(comp, Literal) for comp in self.components):
87
+ return None
88
+ return Literal(
89
+ np.array([c.as_literal().val for c in self.components], dtype=self.col_type.numpy_dtype()), self.col_type
90
+ )
91
+
88
92
 
89
93
  class InlineList(Expr):
90
94
  """
@@ -97,7 +101,7 @@ class InlineList(Expr):
97
101
  json_schema = {
98
102
  'type': 'array',
99
103
  'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
100
- 'items': False # No additional items (fixed length)
104
+ 'items': False, # No additional items (fixed length)
101
105
  }
102
106
 
103
107
  super().__init__(ts.JsonType(json_schema))
@@ -124,8 +128,11 @@ class InlineList(Expr):
124
128
  def _from_dict(cls, _: dict, components: list[Expr]) -> InlineList:
125
129
  return cls(components)
126
130
 
127
- def _as_constant(self) -> Optional[list[Any]]:
128
- return list(c.as_constant() for c in self.components)
131
+ def as_literal(self) -> Optional[Literal]:
132
+ if not all(isinstance(comp, Literal) for comp in self.components):
133
+ return None
134
+ return Literal(list(c.as_literal().val for c in self.components), self.col_type)
135
+
129
136
 
130
137
  class InlineDict(Expr):
131
138
  """
@@ -147,10 +154,7 @@ class InlineDict(Expr):
147
154
  try:
148
155
  json_schema = {
149
156
  'type': 'object',
150
- 'properties': {
151
- key: expr.col_type.to_json_schema()
152
- for key, expr in zip(self.keys, exprs)
153
- },
157
+ 'properties': {key: expr.col_type.to_json_schema() for key, expr in zip(self.keys, exprs)},
154
158
  }
155
159
  except excs.Error:
156
160
  # InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
@@ -177,10 +181,7 @@ class InlineDict(Expr):
177
181
 
178
182
  def eval(self, data_row: DataRow, _: RowBuilder) -> None:
179
183
  assert len(self.keys) == len(self.components)
180
- data_row[self.slot_idx] = {
181
- key: data_row[expr.slot_idx]
182
- for key, expr in zip(self.keys, self.components)
183
- }
184
+ data_row[self.slot_idx] = {key: data_row[expr.slot_idx] for key, expr in zip(self.keys, self.components)}
184
185
 
185
186
  def to_kwargs(self) -> dict[str, Any]:
186
187
  """Deconstructs this expression into a dictionary by recursively unwrapping all Literals,
@@ -207,5 +208,7 @@ class InlineDict(Expr):
207
208
  arg = dict(zip(d['keys'], components))
208
209
  return InlineDict(arg)
209
210
 
210
- def _as_constant(self) -> Optional[dict[str, Any]]:
211
- return dict(zip(self.keys, (c.as_constant() for c in self.components)))
211
+ def as_literal(self) -> Optional[Literal]:
212
+ if not all(isinstance(comp, Literal) for comp in self.components):
213
+ return None
214
+ return Literal(dict(zip(self.keys, (c.as_literal().val for c in self.components))), self.col_type)
@@ -5,8 +5,9 @@ from typing import Optional
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.type_system as ts
8
+
8
9
  from .data_row import DataRow
9
- from .expr import Expr, ExprScope, _GLOBAL_SCOPE
10
+ from .expr import _GLOBAL_SCOPE, Expr, ExprScope
10
11
  from .row_builder import RowBuilder
11
12
  from .sql_element_cache import SqlElementCache
12
13
 
@@ -17,6 +18,7 @@ class JsonMapper(Expr):
17
18
  The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
18
19
  is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
19
20
  """
21
+
20
22
  def __init__(self, src_expr: Expr, target_expr: Expr):
21
23
  # TODO: type spec should be list[target_expr.col_type]
22
24
  super().__init__(ts.JsonType())
@@ -26,6 +28,7 @@ class JsonMapper(Expr):
26
28
  self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
27
29
 
28
30
  from .object_ref import ObjectRef
31
+
29
32
  scope_anchor = ObjectRef(self.target_expr_scope, self)
30
33
  self.components = [src_expr, target_expr, scope_anchor]
31
34
  self.parent_mapper: Optional[JsonMapper] = None
@@ -118,4 +121,3 @@ class JsonMapper(Expr):
118
121
  def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
119
122
  assert len(components) == 2
120
123
  return cls(components[0], components[1])
121
-
@@ -23,7 +23,7 @@ class JsonPath(Expr):
23
23
  self,
24
24
  anchor: Optional['pxt.exprs.Expr'],
25
25
  path_elements: Optional[list[Union[str, int, slice]]] = None,
26
- scope_idx: int = 0
26
+ scope_idx: int = 0,
27
27
  ) -> None:
28
28
  """
29
29
  anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
@@ -44,15 +44,13 @@ class JsonPath(Expr):
44
44
 
45
45
  def __repr__(self) -> str:
46
46
  # else "R": the anchor is RELATIVE_PATH_ROOT
47
- return (f'{str(self._anchor) if self._anchor is not None else "R"}'
48
- f'{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}')
47
+ return (
48
+ f'{str(self._anchor) if self._anchor is not None else "R"}'
49
+ f'{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
50
+ )
49
51
 
50
52
  def _as_dict(self) -> dict:
51
- path_elements = [
52
- [el.start, el.stop, el.step] if isinstance(el, slice)
53
- else el
54
- for el in self.path_elements
55
- ]
53
+ path_elements = [[el.start, el.stop, el.step] if isinstance(el, slice) else el for el in self.path_elements]
56
54
  return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **super()._as_dict()}
57
55
 
58
56
  @classmethod
@@ -61,11 +59,7 @@ class JsonPath(Expr):
61
59
  assert 'scope_idx' in d
62
60
  assert len(components) <= 1
63
61
  anchor = components[0] if len(components) == 1 else None
64
- path_elements = [
65
- slice(el[0], el[1], el[2]) if isinstance(el, list)
66
- else el
67
- for el in d['path_elements']
68
- ]
62
+ path_elements = [slice(el[0], el[1], el[2]) if isinstance(el, list) else el for el in d['path_elements']]
69
63
  return cls(anchor, path_elements, d['scope_idx'])
70
64
 
71
65
  @property
@@ -114,7 +108,7 @@ class JsonPath(Expr):
114
108
  anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
115
109
  ret_name = f'{anchor_name}.{self._json_path()}'
116
110
 
117
- def cleanup_char(s : str) -> str:
111
+ def cleanup_char(s: str) -> str:
118
112
  if s == '.':
119
113
  return '_'
120
114
  elif s == '*':
@@ -125,7 +119,7 @@ class JsonPath(Expr):
125
119
  return ''
126
120
 
127
121
  clean_name = ''.join(map(cleanup_char, ret_name))
128
- clean_name = clean_name.lstrip('_') # remove leading underscore
122
+ clean_name = clean_name.lstrip('_') # remove leading underscore
129
123
  if clean_name == '':
130
124
  clean_name = None
131
125
 
@@ -144,9 +138,9 @@ class JsonPath(Expr):
144
138
  *two* rows (each containing col val 0), not a single row with [0, 0].
145
139
  We need to use a workaround: retrieve the entire dict, then use jmespath to extract the path correctly.
146
140
  """
147
- #path_str = '$.' + '.'.join(self.path_elements)
148
- #assert isinstance(self._anchor(), ColumnRef)
149
- #return sql.func.jsonb_path_query(self._anchor().col.sa_col, path_str)
141
+ # path_str = '$.' + '.'.join(self.path_elements)
142
+ # assert isinstance(self._anchor(), ColumnRef)
143
+ # return sql.func.jsonb_path_query(self._anchor().col.sa_col, path_str)
150
144
  return None
151
145
 
152
146
  def _json_path(self) -> str:
@@ -3,8 +3,8 @@ from __future__ import annotations
3
3
  import datetime
4
4
  from typing import Any, Optional
5
5
 
6
- import sqlalchemy as sql
7
6
  import numpy as np
7
+ import sqlalchemy as sql
8
8
 
9
9
  import pixeltable.type_system as ts
10
10
  from pixeltable.env import Env
@@ -65,9 +65,8 @@ class Literal(Expr):
65
65
  return super()._id_attrs() + [('val', self.val)]
66
66
 
67
67
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
68
- # we need to return something here so that we can generate a Where clause for predicates
69
- # that involve literals (like Where c > 0)
70
- return sql.sql.expression.literal(self.val)
68
+ # Return a sql object so that constants can participate in SQL expressions
69
+ return sql.sql.expression.literal(self.val, type_=self.col_type.to_sa_type())
71
70
 
72
71
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
73
72
  # this will be called, even though sql_expr() does not return None
@@ -89,11 +88,8 @@ class Literal(Expr):
89
88
  else:
90
89
  return {'val': self.val, **super()._as_dict()}
91
90
 
92
- def _as_constant(self) -> Any:
93
- return self.val
94
-
95
- def is_constant(self) -> bool:
96
- return True
91
+ def as_literal(self) -> Optional[Literal]:
92
+ return self
97
93
 
98
94
  @classmethod
99
95
  def _from_dict(cls, d: dict, components: list[Expr]) -> Literal:
@@ -19,6 +19,7 @@ class MethodRef(Expr):
19
19
  When a `MethodRef` is called, it returns a `FunctionCall` with the base expression as the first argument.
20
20
  The effective arity of a `MethodRef` is one less than the arity of the underlying `Function`.
21
21
  """
22
+
22
23
  # TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
23
24
  # converted to a `FunctionCall` by binding any remaining parameters).
24
25
 
@@ -18,6 +18,7 @@ class ObjectRef(Expr):
18
18
  Reference to an intermediate result, such as the "scope variable" produced by a JsonMapper.
19
19
  The object is generated/materialized elsewhere and establishes a new scope.
20
20
  """
21
+
21
22
  def __init__(self, scope: ExprScope, owner: JsonMapper):
22
23
  # TODO: do we need an Unknown type after all?
23
24
  super().__init__(ts.JsonType()) # JsonType: this could be anything
@@ -40,4 +41,3 @@ class ObjectRef(Expr):
40
41
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
41
42
  # this will be called, but the value has already been materialized elsewhere
42
43
  pass
43
-
@@ -8,13 +8,15 @@ from uuid import UUID
8
8
 
9
9
  import numpy as np
10
10
  import sqlalchemy as sql
11
+
11
12
  import pixeltable.catalog as catalog
12
13
  import pixeltable.exceptions as excs
13
14
  import pixeltable.func as func
14
15
  import pixeltable.utils as utils
16
+ from pixeltable.env import Env
15
17
  from pixeltable.utils.media_store import MediaStore
18
+
16
19
  from .data_row import DataRow
17
- from pixeltable.env import Env
18
20
  from .expr import Expr
19
21
  from .expr_set import ExprSet
20
22
 
@@ -32,7 +34,9 @@ class ExecProfile:
32
34
  per_call_time = self.eval_time[i] / self.eval_count[i]
33
35
  calls_per_row = self.eval_count[i] / num_rows
34
36
  multiple_str = f'({calls_per_row}x)' if calls_per_row > 1 else ''
35
- Env.get().console_logger.info(f'{self.row_builder.unique_exprs[i]}: {utils.print_perf_counter_delta(per_call_time)} {multiple_str}')
37
+ Env.get().console_logger.info(
38
+ f'{self.row_builder.unique_exprs[i]}: {utils.print_perf_counter_delta(per_call_time)} {multiple_str}'
39
+ )
36
40
 
37
41
 
38
42
  @dataclass
@@ -40,6 +44,7 @@ class ColumnSlotIdx:
40
44
  """Info for how to locate materialized column in DataRow
41
45
  TODO: can this be integrated into RowBuilder directly?
42
46
  """
47
+
43
48
  col: catalog.Column
44
49
  slot_idx: int
45
50
 
@@ -50,6 +55,7 @@ class RowBuilder:
50
55
  For ColumnRefs to unstored iterator columns:
51
56
  - in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
52
57
  """
58
+
53
59
  unique_exprs: ExprSet
54
60
  next_slot_idx: int
55
61
  input_expr_slot_idxs: set[int]
@@ -83,14 +89,13 @@ class RowBuilder:
83
89
  @dataclass
84
90
  class EvalCtx:
85
91
  """Context for evaluating a set of target exprs"""
92
+
86
93
  slot_idxs: list[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
87
94
  exprs: list[Expr] # exprs corresponding to slot_idxs
88
95
  target_slot_idxs: list[int] # slot idxs of target exprs; might contain duplicates
89
96
  target_exprs: list[Expr] # exprs corresponding to target_slot_idxs
90
97
 
91
- def __init__(
92
- self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Iterable[Expr]
93
- ):
98
+ def __init__(self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Iterable[Expr]):
94
99
  """
95
100
  Args:
96
101
  output_exprs: list of Exprs to be evaluated
@@ -106,10 +111,12 @@ class RowBuilder:
106
111
  self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
107
112
 
108
113
  resolve_cols = set(columns)
109
- self.output_exprs = ExprSet([
110
- self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
111
- for e in output_exprs
112
- ])
114
+ self.output_exprs = ExprSet(
115
+ [
116
+ self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
117
+ for e in output_exprs
118
+ ]
119
+ )
113
120
 
114
121
  # if init(columns):
115
122
  # - we are creating table rows and need to record columns for create_table_row()
@@ -119,6 +126,7 @@ class RowBuilder:
119
126
  # * for write-validated columns, we need to create validating ColumnRefs
120
127
  # * further references to that column (eg, computed cols) need to resolve to the validating ColumnRef
121
128
  from .column_ref import ColumnRef
129
+
122
130
  self.table_columns: list[ColumnSlotIdx] = []
123
131
  self.input_exprs = ExprSet()
124
132
  validating_colrefs: dict[Expr, Expr] = {} # key: non-validating colref, value: corresp. validating colref
@@ -133,7 +141,8 @@ class RowBuilder:
133
141
  else:
134
142
  # record a ColumnRef so that references to this column resolve to the same slot idx
135
143
  perform_validation = (
136
- None if not col.col_type.is_media_type()
144
+ None
145
+ if not col.col_type.is_media_type()
137
146
  else col.media_validation == catalog.MediaValidation.ON_WRITE
138
147
  )
139
148
  expr = ColumnRef(col, perform_validation=perform_validation)
@@ -184,10 +193,11 @@ class RowBuilder:
184
193
 
185
194
  # determine transitive dependencies for the purpose of exception propagation
186
195
  # (list of set of slot_idxs, indexed by slot_idx)
187
- #self.dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
196
+ # self.dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
188
197
  self.dependencies = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
189
198
  exc_dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)]
190
199
  from .column_property_ref import ColumnPropertyRef
200
+
191
201
  for expr in self.unique_exprs:
192
202
  if expr.slot_idx in self.input_expr_slot_idxs:
193
203
  # this is input and therefore doesn't depend on other exprs
@@ -204,8 +214,8 @@ class RowBuilder:
204
214
  self.dependents = self.dependencies.T
205
215
  self.transitive_dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
206
216
  for i in reversed(range(self.num_materialized)):
207
- self.transitive_dependents[i] = (
208
- self.dependents[i] | np.any(self.transitive_dependents[self.dependents[i]], axis=0)
217
+ self.transitive_dependents[i] = self.dependents[i] | np.any(
218
+ self.transitive_dependents[self.dependents[i]], axis=0
209
219
  )
210
220
 
211
221
  self._exc_dependents = [set() for _ in range(self.num_materialized)]
@@ -228,6 +238,7 @@ class RowBuilder:
228
238
 
229
239
  def set_conn(self, conn: sql.engine.Connection) -> None:
230
240
  from .function_call import FunctionCall
241
+
231
242
  for expr in self.unique_exprs:
232
243
  if isinstance(expr, FunctionCall) and isinstance(expr.fn, func.QueryTemplateFunction):
233
244
  expr.fn.set_conn(conn)
@@ -352,8 +363,11 @@ class RowBuilder:
352
363
  target_slot_idxs = [e.slot_idx for e in targets]
353
364
  ctx_slot_idxs = [e.slot_idx for e in dependencies]
354
365
  return self.EvalCtx(
355
- slot_idxs=ctx_slot_idxs, exprs=[self.unique_exprs[slot_idx] for slot_idx in ctx_slot_idxs],
356
- target_slot_idxs=target_slot_idxs, target_exprs=targets)
366
+ slot_idxs=ctx_slot_idxs,
367
+ exprs=[self.unique_exprs[slot_idx] for slot_idx in ctx_slot_idxs],
368
+ target_slot_idxs=target_slot_idxs,
369
+ target_exprs=targets,
370
+ )
357
371
 
358
372
  def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
359
373
  """Record an exception in data_row and propagate it to dependents"""
@@ -362,7 +376,7 @@ class RowBuilder:
362
376
  data_row.set_exc(slot_idx, exc)
363
377
 
364
378
  def eval(
365
- self, data_row: DataRow, ctx: EvalCtx, profile: Optional[ExecProfile] = None, ignore_errors: bool = False
379
+ self, data_row: DataRow, ctx: EvalCtx, profile: Optional[ExecProfile] = None, ignore_errors: bool = False
366
380
  ) -> None:
367
381
  """
368
382
  Populates the slots in data_row given in ctx.
@@ -387,7 +401,8 @@ class RowBuilder:
387
401
  if not ignore_errors:
388
402
  input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
389
403
  raise excs.ExprEvalError(
390
- expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0) from exc
404
+ expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0
405
+ ) from exc
391
406
 
392
407
  def create_table_row(self, data_row: DataRow, exc_col_ids: set[int]) -> tuple[dict[str, Any], int]:
393
408
  """Create a table row from the slots that have an output column assigned
@@ -22,9 +22,14 @@ class RowidRef(Expr):
22
22
  _from_dict()/init() is called, which is why this class effectively has two separate paths for construction
23
23
  (with and without a TableVersion).
24
24
  """
25
+
25
26
  def __init__(
26
- self, tbl: catalog.TableVersion, idx: int,
27
- tbl_id: Optional[UUID] = None, normalized_base_id: Optional[UUID] = None):
27
+ self,
28
+ tbl: catalog.TableVersion,
29
+ idx: int,
30
+ tbl_id: Optional[UUID] = None,
31
+ normalized_base_id: Optional[UUID] = None,
32
+ ):
28
33
  super().__init__(ts.IntType(nullable=False))
29
34
  self.tbl = tbl
30
35
  if tbl is not None:
@@ -48,12 +53,16 @@ class RowidRef(Expr):
48
53
  return str(self)
49
54
 
50
55
  def _equals(self, other: RowidRef) -> bool:
51
- return self.normalized_base_id == other.normalized_base_id \
56
+ return (
57
+ self.normalized_base_id == other.normalized_base_id
52
58
  and self.rowid_component_idx == other.rowid_component_idx
59
+ )
53
60
 
54
61
  def _id_attrs(self) -> list[tuple[str, Any]]:
55
- return super()._id_attrs() +\
56
- [('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
62
+ return super()._id_attrs() + [
63
+ ('normalized_base_id', self.normalized_base_id),
64
+ ('idx', self.rowid_component_idx),
65
+ ]
57
66
 
58
67
  def __repr__(self) -> str:
59
68
  # check if this is the pos column of a component view
@@ -14,11 +14,10 @@ from .sql_element_cache import SqlElementCache
14
14
 
15
15
 
16
16
  class SimilarityExpr(Expr):
17
-
18
17
  def __init__(self, col_ref: ColumnRef, item: Any, idx_name: Optional[str] = None):
19
18
  super().__init__(ts.FloatType())
20
19
  item_expr = Expr.from_object(item)
21
- if item_expr is None or not(item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()):
20
+ if item_expr is None or not (item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()):
22
21
  raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(item)}')
23
22
  assert item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()
24
23
 
@@ -27,6 +26,7 @@ class SimilarityExpr(Expr):
27
26
  # determine index to use
28
27
  idx_info = col_ref.col.get_idx_info()
29
28
  from pixeltable import index
29
+
30
30
  embedding_idx_info = {
31
31
  info.name: info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)
32
32
  }
@@ -38,7 +38,8 @@ class SimilarityExpr(Expr):
38
38
  if idx_name is None:
39
39
  raise excs.Error(
40
40
  f'Column {col_ref.col.name!r} has multiple indices; use the index name to disambiguate: '
41
- f'`{col_ref.col.name}.similarity(..., idx=<name>)`')
41
+ f'`{col_ref.col.name}.similarity(..., idx=<name>)`'
42
+ )
42
43
  self.idx_info = embedding_idx_info[idx_name]
43
44
  else:
44
45
  self.idx_info = next(iter(embedding_idx_info.values()))
@@ -48,11 +49,13 @@ class SimilarityExpr(Expr):
48
49
  if item_expr.col_type.is_string_type() and idx.string_embed is None:
49
50
  raise excs.Error(
50
51
  f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} does not have a '
51
- f"string embedding and does not support string queries")
52
+ f'string embedding and does not support string queries'
53
+ )
52
54
  if item_expr.col_type.is_image_type() and idx.image_embed is None:
53
55
  raise excs.Error(
54
56
  f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} does not have an '
55
- f"image embedding and does not support image queries")
57
+ f'image embedding and does not support image queries'
58
+ )
56
59
  self.id = self._create_id()
57
60
 
58
61
  def __repr__(self) -> str:
@@ -66,9 +69,10 @@ class SimilarityExpr(Expr):
66
69
 
67
70
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
68
71
  if not isinstance(self.components[1], Literal):
69
- raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
72
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
70
73
  item = self.components[1].val
71
74
  from pixeltable import index
75
+
72
76
  assert isinstance(self.idx_info.idx, index.EmbeddingIndex)
73
77
  return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
74
78
 
@@ -77,6 +81,7 @@ class SimilarityExpr(Expr):
77
81
  raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
78
82
  item = self.components[1].val
79
83
  from pixeltable import index
84
+
80
85
  assert isinstance(self.idx_info.idx, index.EmbeddingIndex)
81
86
  return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
82
87
 
@@ -1,4 +1,4 @@
1
- from typing import Iterable, Union, Optional, cast
1
+ from typing import Iterable, Optional, Union, cast
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -1,10 +1,12 @@
1
- from typing import Any, Optional
1
+ from typing import Any, Optional, Union
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
5
+ import pixeltable.exprs as exprs
5
6
  import pixeltable.type_system as ts
6
7
 
7
8
  from .expr import DataRow, Expr
9
+ from .literal import Literal
8
10
  from .row_builder import RowBuilder
9
11
  from .sql_element_cache import SqlElementCache
10
12
 
@@ -14,21 +16,19 @@ class TypeCast(Expr):
14
16
  An `Expr` that represents a type conversion from an underlying `Expr` to
15
17
  a specified `ColumnType`.
16
18
  """
19
+
17
20
  def __init__(self, underlying: Expr, new_type: ts.ColumnType):
18
21
  super().__init__(new_type)
19
22
  self.components: list[Expr] = [underlying]
20
23
  self.id: Optional[int] = self._create_id()
21
24
 
22
- @property
23
- def _underlying(self):
24
- return self.components[0]
25
-
26
25
  def _equals(self, other: 'TypeCast') -> bool:
27
26
  # `TypeCast` has no properties beyond those captured by `Expr`.
28
27
  return True
29
28
 
30
- def _id_attrs(self) -> list[tuple[str, Any]]:
31
- return super()._id_attrs() + [('new_type', self.col_type)]
29
+ @property
30
+ def _op1(self) -> Expr:
31
+ return self.components[0]
32
32
 
33
33
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
34
34
  """
@@ -38,9 +38,24 @@ class TypeCast(Expr):
38
38
  return None
39
39
 
40
40
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
41
- original_val = data_row[self._underlying.slot_idx]
41
+ original_val = data_row[self._op1.slot_idx]
42
42
  data_row[self.slot_idx] = self.col_type.create_literal(original_val)
43
43
 
44
+ def as_literal(self) -> Optional[Literal]:
45
+ op1_lit = self._op1.as_literal()
46
+ if op1_lit is None:
47
+ return None
48
+ if not (
49
+ self.col_type.is_numeric_type() and (op1_lit.col_type.is_numeric_type() or op1_lit.col_type.is_bool_type())
50
+ ):
51
+ return None
52
+
53
+ op1_val = op1_lit.val
54
+ if self.col_type.is_int_type():
55
+ return Literal(int(op1_val), self.col_type)
56
+ elif self.col_type.is_float_type():
57
+ return Literal(float(op1_val), self.col_type)
58
+ return None
44
59
 
45
60
  def _as_dict(self) -> dict:
46
61
  return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
@@ -52,4 +67,4 @@ class TypeCast(Expr):
52
67
  return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
53
68
 
54
69
  def __repr__(self) -> str:
55
- return f'{self._underlying}.astype({self.col_type._to_str(as_schema=True)})'
70
+ return f'{self._op1}.astype({self.col_type._to_str(as_schema=True)})'
@@ -5,6 +5,7 @@ have dependencies whose future support is unclear.
5
5
  """
6
6
 
7
7
  from pixeltable.utils.code import local_public_names
8
+
8
9
  from . import functions
9
10
 
10
11
  __all__ = local_public_names(__name__)
@@ -1,4 +1,5 @@
1
1
  from pixeltable.utils.code import local_public_names
2
+
2
3
  from . import whisperx, yolox
3
4
 
4
5
  __all__ = local_public_names(__name__)
@@ -15,7 +15,7 @@ def transcribe(
15
15
  model: str,
16
16
  compute_type: Optional[str] = None,
17
17
  language: Optional[str] = None,
18
- chunk_size: int = 30
18
+ chunk_size: int = 30,
19
19
  ) -> dict:
20
20
  """
21
21
  Transcribe an audio file using WhisperX.
@@ -44,7 +44,7 @@ def transcribe(
44
44
  Add a computed column that applies the model `tiny.en` to an existing Pixeltable column `tbl.audio`
45
45
  of the table `tbl`:
46
46
 
47
- >>> tbl['result'] = transcribe(tbl.audio, model='tiny.en')
47
+ >>> tbl.add_computed_column(result=transcribe(tbl.audio, model='tiny.en'))
48
48
  """
49
49
  import torch
50
50
  import whisperx # type: ignore[import-untyped]