pixeltable 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (87) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +8 -7
  4. pixeltable/catalog/column.py +11 -8
  5. pixeltable/catalog/insertable_table.py +1 -1
  6. pixeltable/catalog/path_dict.py +8 -6
  7. pixeltable/catalog/table.py +20 -14
  8. pixeltable/catalog/table_version.py +92 -55
  9. pixeltable/catalog/table_version_path.py +7 -9
  10. pixeltable/catalog/view.py +3 -2
  11. pixeltable/dataframe.py +2 -2
  12. pixeltable/env.py +205 -86
  13. pixeltable/exceptions.py +5 -1
  14. pixeltable/exec/aggregation_node.py +2 -1
  15. pixeltable/exec/component_iteration_node.py +2 -2
  16. pixeltable/exec/sql_node.py +11 -8
  17. pixeltable/exprs/__init__.py +2 -2
  18. pixeltable/exprs/arithmetic_expr.py +4 -4
  19. pixeltable/exprs/array_slice.py +2 -1
  20. pixeltable/exprs/column_property_ref.py +9 -7
  21. pixeltable/exprs/column_ref.py +2 -1
  22. pixeltable/exprs/comparison.py +10 -7
  23. pixeltable/exprs/compound_predicate.py +3 -2
  24. pixeltable/exprs/data_row.py +19 -4
  25. pixeltable/exprs/expr.py +51 -41
  26. pixeltable/exprs/expr_set.py +32 -9
  27. pixeltable/exprs/function_call.py +62 -40
  28. pixeltable/exprs/in_predicate.py +3 -2
  29. pixeltable/exprs/inline_expr.py +200 -0
  30. pixeltable/exprs/is_null.py +3 -2
  31. pixeltable/exprs/json_mapper.py +5 -4
  32. pixeltable/exprs/json_path.py +7 -1
  33. pixeltable/exprs/literal.py +34 -7
  34. pixeltable/exprs/method_ref.py +3 -3
  35. pixeltable/exprs/object_ref.py +6 -5
  36. pixeltable/exprs/row_builder.py +25 -17
  37. pixeltable/exprs/rowid_ref.py +2 -1
  38. pixeltable/exprs/similarity_expr.py +2 -1
  39. pixeltable/exprs/sql_element_cache.py +30 -0
  40. pixeltable/exprs/type_cast.py +3 -3
  41. pixeltable/exprs/variable.py +2 -1
  42. pixeltable/ext/functions/whisperx.py +6 -4
  43. pixeltable/ext/functions/yolox.py +11 -9
  44. pixeltable/func/aggregate_function.py +1 -0
  45. pixeltable/func/function.py +28 -4
  46. pixeltable/functions/__init__.py +4 -2
  47. pixeltable/functions/anthropic.py +15 -5
  48. pixeltable/functions/fireworks.py +1 -1
  49. pixeltable/functions/globals.py +6 -1
  50. pixeltable/functions/huggingface.py +91 -14
  51. pixeltable/functions/image.py +20 -5
  52. pixeltable/functions/json.py +5 -5
  53. pixeltable/functions/mistralai.py +188 -0
  54. pixeltable/functions/openai.py +6 -10
  55. pixeltable/functions/string.py +3 -2
  56. pixeltable/functions/timestamp.py +95 -7
  57. pixeltable/functions/together.py +18 -11
  58. pixeltable/functions/video.py +2 -2
  59. pixeltable/functions/vision.py +69 -37
  60. pixeltable/functions/whisper.py +4 -1
  61. pixeltable/globals.py +5 -1
  62. pixeltable/io/hf_datasets.py +17 -15
  63. pixeltable/io/pandas.py +0 -2
  64. pixeltable/io/parquet.py +15 -14
  65. pixeltable/iterators/document.py +16 -15
  66. pixeltable/metadata/__init__.py +1 -1
  67. pixeltable/metadata/converters/convert_18.py +1 -1
  68. pixeltable/metadata/converters/convert_19.py +46 -0
  69. pixeltable/metadata/converters/convert_20.py +56 -0
  70. pixeltable/metadata/converters/util.py +29 -4
  71. pixeltable/metadata/notes.py +2 -0
  72. pixeltable/metadata/schema.py +5 -4
  73. pixeltable/plan.py +100 -78
  74. pixeltable/store.py +5 -1
  75. pixeltable/tool/create_test_db_dump.py +18 -6
  76. pixeltable/type_system.py +15 -15
  77. pixeltable/utils/documents.py +45 -42
  78. pixeltable/utils/formatter.py +2 -2
  79. pixeltable-0.2.19.dist-info/LICENSE +201 -0
  80. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/METADATA +84 -24
  81. pixeltable-0.2.19.dist-info/RECORD +147 -0
  82. pixeltable/exprs/inline_array.py +0 -116
  83. pixeltable/exprs/inline_dict.py +0 -103
  84. pixeltable-0.2.17.dist-info/LICENSE +0 -18
  85. pixeltable-0.2.17.dist-info/RECORD +0 -144
  86. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/WHEEL +0 -0
  87. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/entry_points.txt +0 -0
@@ -5,6 +5,7 @@ from typing import Optional, List, Any, Dict, Tuple, Iterable
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.exceptions as excs
8
+ from .sql_element_cache import SqlElementCache
8
9
  import pixeltable.type_system as ts
9
10
  from .data_row import DataRow
10
11
  from .expr import Expr
@@ -70,8 +71,8 @@ class InPredicate(Expr):
70
71
  def _id_attrs(self) -> List[Tuple[str, Any]]:
71
72
  return super()._id_attrs() + [('value_list', self.value_list)]
72
73
 
73
- def sql_expr(self) -> Optional[sql.ClauseElement]:
74
- lhs_sql_exprs = self.components[0].sql_expr()
74
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
75
+ lhs_sql_exprs = sql_elements.get(self.components[0])
75
76
  if lhs_sql_exprs is None or self.value_list is None:
76
77
  return None
77
78
  return lhs_sql_exprs.in_(self.value_list)
@@ -0,0 +1,200 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ from typing import Any, Iterable, Optional
5
+
6
+ import numpy as np
7
+ import sqlalchemy as sql
8
+
9
+ import pixeltable.exceptions as excs
10
+ import pixeltable.type_system as ts
11
+
12
+ from .data_row import DataRow
13
+ from .expr import Expr
14
+ from .literal import Literal
15
+ from .row_builder import RowBuilder
16
+ from .sql_element_cache import SqlElementCache
17
+
18
+
19
+ class InlineArray(Expr):
20
+ """
21
+ Array 'literal' which can use Exprs as values.
22
+ """
23
+
24
+ def __init__(self, elements: Iterable):
25
+ exprs = []
26
+ for el in elements:
27
+ if isinstance(el, Expr):
28
+ exprs.append(el)
29
+ elif isinstance(el, list) or isinstance(el, tuple):
30
+ exprs.append(InlineArray(el))
31
+ else:
32
+ exprs.append(Literal(el))
33
+
34
+ inferred_element_type: Optional[ts.ColumnType] = ts.InvalidType()
35
+ for i, expr in enumerate(exprs):
36
+ supertype = inferred_element_type.supertype(expr.col_type)
37
+ if supertype is None:
38
+ raise excs.Error(
39
+ f'Could not infer element type of array: element of type `{expr.col_type}` at index {i} '
40
+ f'is not compatible with type `{inferred_element_type}` of preceding elements'
41
+ )
42
+ inferred_element_type = supertype
43
+
44
+ if inferred_element_type.is_scalar_type():
45
+ col_type = ts.ArrayType((len(exprs),), inferred_element_type)
46
+ elif inferred_element_type.is_array_type():
47
+ assert isinstance(inferred_element_type, ts.ArrayType)
48
+ col_type = ts.ArrayType(
49
+ (len(exprs), *inferred_element_type.shape),
50
+ ts.ColumnType.make_type(inferred_element_type.dtype)
51
+ )
52
+ else:
53
+ raise excs.Error(f'Element type is not a valid dtype for an array: {inferred_element_type}')
54
+
55
+ super().__init__(col_type)
56
+ self.components.extend(exprs)
57
+ self.id = self._create_id()
58
+
59
+ def __str__(self) -> str:
60
+ elem_strs = [str(expr) for expr in self.components]
61
+ return f'[{", ".join(elem_strs)}]'
62
+
63
+ def _equals(self, _: InlineArray) -> bool:
64
+ return True # Always true if components match
65
+
66
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
67
+ return None
68
+
69
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
70
+ data_row[self.slot_idx] = np.array([data_row[el.slot_idx] for el in self.components])
71
+
72
+ def _as_dict(self) -> dict:
73
+ return super()._as_dict()
74
+
75
+ @classmethod
76
+ def _from_dict(cls, _: dict, components: list[Expr]) -> Expr:
77
+ try:
78
+ return cls(components)
79
+ except excs.Error:
80
+ # For legacy compatibility reasons, we need to try constructing as an `InlineList`.
81
+ # This is because in schema versions <= 19, `InlineArray` was serialized incorrectly, and
82
+ # there is no way to determine the correct expression type until the subexpressions are
83
+ # loaded and their types are known.
84
+ return InlineList(components)
85
+
86
+
87
+ class InlineList(Expr):
88
+ """
89
+ List 'literal' which can use Exprs as values.
90
+ """
91
+
92
+ def __init__(self, elements: Iterable):
93
+ exprs = []
94
+ for el in elements:
95
+ if isinstance(el, Expr):
96
+ exprs.append(el)
97
+ elif isinstance(el, list) or isinstance(el, tuple):
98
+ exprs.append(InlineList(el))
99
+ elif isinstance(el, dict):
100
+ exprs.append(InlineDict(el))
101
+ else:
102
+ exprs.append(Literal(el))
103
+
104
+ super().__init__(ts.JsonType())
105
+ self.components.extend(exprs)
106
+ self.id = self._create_id()
107
+
108
+ def __str__(self) -> str:
109
+ elem_strs = [str(expr) for expr in self.components]
110
+ return f'[{", ".join(elem_strs)}]'
111
+
112
+ def _equals(self, _: InlineList) -> bool:
113
+ return True # Always true if components match
114
+
115
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
116
+ return None
117
+
118
+ def eval(self, data_row: DataRow, _: RowBuilder) -> None:
119
+ data_row[self.slot_idx] = [data_row[el.slot_idx] for el in self.components]
120
+
121
+ def _as_dict(self) -> dict:
122
+ return super()._as_dict()
123
+
124
+ @classmethod
125
+ def _from_dict(cls, _: dict, components: list[Expr]) -> Expr:
126
+ return cls(components)
127
+
128
+
129
+ class InlineDict(Expr):
130
+ """
131
+ Dictionary 'literal' which can use Exprs as values.
132
+ """
133
+
134
+ keys: list[str]
135
+
136
+ def __init__(self, d: dict[str, Any]):
137
+ self.keys = []
138
+ exprs: list[Expr] = []
139
+ for key, val in d.items():
140
+ if not isinstance(key, str):
141
+ raise excs.Error(f'Dictionary requires string keys; {key} has type {type(key)}')
142
+ self.keys.append(key)
143
+ if isinstance(val, Expr):
144
+ exprs.append(val)
145
+ elif isinstance(val, dict):
146
+ exprs.append(InlineDict(val))
147
+ elif isinstance(val, list) or isinstance(val, tuple):
148
+ exprs.append(InlineList(val))
149
+ else:
150
+ exprs.append(Literal(val))
151
+
152
+ super().__init__(ts.JsonType())
153
+ self.components.extend(exprs)
154
+ self.id = self._create_id()
155
+
156
+ def __str__(self) -> str:
157
+ item_strs = list(f"'{key}': {str(expr)}" for key, expr in zip(self.keys, self.components))
158
+ return '{' + ', '.join(item_strs) + '}'
159
+
160
+ def _equals(self, other: InlineDict) -> bool:
161
+ # The dict values are just the components, which have already been checked
162
+ return self.keys == other.keys
163
+
164
+ def _id_attrs(self) -> list[tuple[str, Any]]:
165
+ return super()._id_attrs() + [('keys', self.keys)]
166
+
167
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
168
+ return None
169
+
170
+ def eval(self, data_row: DataRow, _: RowBuilder) -> None:
171
+ assert len(self.keys) == len(self.components)
172
+ data_row[self.slot_idx] = {
173
+ key: data_row[expr.slot_idx]
174
+ for key, expr in zip(self.keys, self.components)
175
+ }
176
+
177
+ def to_kwargs(self) -> dict[str, Any]:
178
+ """Deconstructs this expression into a dictionary by recursively unwrapping all Literals,
179
+ InlineDicts, and InlineLists."""
180
+ return InlineDict._to_kwarg_element(self)
181
+
182
+ @classmethod
183
+ def _to_kwarg_element(cls, expr: Expr) -> Any:
184
+ if isinstance(expr, Literal):
185
+ return expr.val
186
+ if isinstance(expr, InlineDict):
187
+ return {key: cls._to_kwarg_element(val) for key, val in zip(expr.keys, expr.components)}
188
+ if isinstance(expr, InlineList):
189
+ return [cls._to_kwarg_element(el) for el in expr.components]
190
+ return expr
191
+
192
+ def _as_dict(self) -> dict[str, Any]:
193
+ return {'keys': self.keys, **super()._as_dict()}
194
+
195
+ @classmethod
196
+ def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
197
+ assert 'keys' in d
198
+ assert len(d['keys']) == len(components)
199
+ arg = dict(zip(d['keys'], components))
200
+ return InlineDict(arg)
@@ -8,6 +8,7 @@ import pixeltable.type_system as ts
8
8
  from .data_row import DataRow
9
9
  from .expr import Expr
10
10
  from .row_builder import RowBuilder
11
+ from .sql_element_cache import SqlElementCache
11
12
 
12
13
 
13
14
  class IsNull(Expr):
@@ -22,8 +23,8 @@ class IsNull(Expr):
22
23
  def _equals(self, other: IsNull) -> bool:
23
24
  return True
24
25
 
25
- def sql_expr(self) -> Optional[sql.ClauseElement]:
26
- e = self.components[0].sql_expr()
26
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
27
+ e = sql_elements.get(self.components[0])
27
28
  if e is None:
28
29
  return None
29
30
  return e == None
@@ -1,13 +1,14 @@
1
1
  from __future__ import annotations
2
+
2
3
  from typing import Optional, List, Dict
3
4
 
4
5
  import sqlalchemy as sql
5
6
 
6
- from .expr import Expr, ExprScope, _GLOBAL_SCOPE
7
+ import pixeltable.type_system as ts
7
8
  from .data_row import DataRow
9
+ from .expr import Expr, ExprScope, _GLOBAL_SCOPE
8
10
  from .row_builder import RowBuilder
9
- import pixeltable.catalog as catalog
10
- import pixeltable.type_system as ts
11
+ from .sql_element_cache import SqlElementCache
11
12
 
12
13
 
13
14
  class JsonMapper(Expr):
@@ -86,7 +87,7 @@ class JsonMapper(Expr):
86
87
  def _equals(self, other: JsonMapper) -> bool:
87
88
  return True
88
89
 
89
- def sql_expr(self) -> Optional[sql.ClauseElement]:
90
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
90
91
  return None
91
92
 
92
93
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -9,6 +9,12 @@ import pixeltable
9
9
  import pixeltable.catalog as catalog
10
10
  import pixeltable.exceptions as excs
11
11
  import pixeltable.type_system as ts
12
+ from .data_row import DataRow
13
+ from .expr import Expr
14
+ from .globals import print_slice
15
+ from .json_mapper import JsonMapper
16
+ from .row_builder import RowBuilder
17
+ from .sql_element_cache import SqlElementCache
12
18
 
13
19
  from .data_row import DataRow
14
20
  from .expr import Expr
@@ -140,7 +146,7 @@ class JsonPath(Expr):
140
146
  def _id_attrs(self) -> list[tuple[str, Any]]:
141
147
  return super()._id_attrs() + [('path_elements', self.path_elements)]
142
148
 
143
- def sql_expr(self) -> Optional[sql.ClauseElement]:
149
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
144
150
  """
145
151
  Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
146
152
  *two* rows (each containing col val 0), not a single row with [0, 0].
@@ -1,15 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import datetime
4
- from typing import Optional, List, Any, Dict, Tuple
4
+ from typing import Any, Dict, List, Optional, Tuple
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- import pixeltable.exceptions as excs
9
8
  import pixeltable.type_system as ts
9
+ from pixeltable.env import Env
10
+
10
11
  from .data_row import DataRow
11
12
  from .expr import Expr
12
13
  from .row_builder import RowBuilder
14
+ from .sql_element_cache import SqlElementCache
13
15
 
14
16
 
15
17
  class Literal(Expr):
@@ -22,6 +24,15 @@ class Literal(Expr):
22
24
  if col_type is None:
23
25
  raise TypeError(f'Not a valid literal: {val}')
24
26
  super().__init__(col_type)
27
+ if isinstance(val, datetime.datetime):
28
+ # Normalize the datetime to UTC: all timestamps are stored as UTC (both in the database and in literals)
29
+ if val.tzinfo is None:
30
+ # We have a naive datetime. Modify it to use the configured default time zone
31
+ default_tz = Env.get().default_time_zone
32
+ if default_tz is not None:
33
+ val = val.replace(tzinfo=default_tz)
34
+ # Now convert to UTC
35
+ val = val.astimezone(datetime.timezone.utc)
25
36
  self.val = val
26
37
  self.id = self._create_id()
27
38
 
@@ -29,17 +40,24 @@ class Literal(Expr):
29
40
  return 'Literal'
30
41
 
31
42
  def __str__(self) -> str:
32
- if self.col_type.is_string_type() or self.col_type.is_timestamp_type():
43
+ if self.col_type.is_string_type():
33
44
  return f"'{self.val}'"
45
+ if self.col_type.is_timestamp_type():
46
+ assert isinstance(self.val, datetime.datetime)
47
+ default_tz = Env.get().default_time_zone
48
+ return f"'{self.val.astimezone(default_tz).isoformat()}'"
34
49
  return str(self.val)
35
50
 
51
+ def __repr__(self) -> str:
52
+ return f'Literal({self.val!r})'
53
+
36
54
  def _equals(self, other: Literal) -> bool:
37
55
  return self.val == other.val
38
56
 
39
57
  def _id_attrs(self) -> List[Tuple[str, Any]]:
40
58
  return super()._id_attrs() + [('val', self.val)]
41
59
 
42
- def sql_expr(self) -> Optional[sql.ClauseElement]:
60
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
43
61
  # we need to return something here so that we can generate a Where clause for predicates
44
62
  # that involve literals (like Where c > 0)
45
63
  return sql.sql.expression.literal(self.val)
@@ -52,7 +70,12 @@ class Literal(Expr):
52
70
  # For some types, we need to explictly record their type, because JSON does not know
53
71
  # how to interpret them unambiguously
54
72
  if self.col_type.is_timestamp_type():
55
- return {'val': self.val.isoformat(), 'val_t': self.col_type._type.name, **super()._as_dict()}
73
+ assert isinstance(self.val, datetime.datetime)
74
+ assert self.val.tzinfo == datetime.timezone.utc # Must be UTC in a literal
75
+ # Convert to ISO format in UTC (in keeping with the principle: all timestamps are
76
+ # stored as UTC in the database)
77
+ encoded_val = self.val.isoformat()
78
+ return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
56
79
  else:
57
80
  return {'val': self.val, **super()._as_dict()}
58
81
 
@@ -61,6 +84,10 @@ class Literal(Expr):
61
84
  assert 'val' in d
62
85
  if 'val_t' in d:
63
86
  val_t = d['val_t']
87
+ # Currently the only special-cased literal type is TIMESTAMP
64
88
  assert val_t == ts.ColumnType.Type.TIMESTAMP.name
65
- return cls(datetime.datetime.fromisoformat(d['val']))
66
- return cls(d['val'])
89
+ dt = datetime.datetime.fromisoformat(d['val'])
90
+ assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
91
+ return cls(dt)
92
+ else:
93
+ return cls(d['val'])
@@ -2,12 +2,12 @@ from typing import Any, Optional
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
5
- import pixeltable.exceptions as excs
6
5
  import pixeltable.type_system as ts
7
6
  from pixeltable.exprs import Expr, FunctionCall
8
- from pixeltable.func import FunctionRegistry, CallableFunction
7
+ from pixeltable.func import FunctionRegistry
9
8
  from .data_row import DataRow
10
9
  from .row_builder import RowBuilder
10
+ from .sql_element_cache import SqlElementCache
11
11
 
12
12
 
13
13
  class MethodRef(Expr):
@@ -53,7 +53,7 @@ class MethodRef(Expr):
53
53
  def _id_attrs(self) -> list[tuple[str, Any]]:
54
54
  return super()._id_attrs() + [('method_name', self.method_name)]
55
55
 
56
- def sql_expr(self) -> Optional[sql.ClauseElement]:
56
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
57
57
  return None
58
58
 
59
59
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -1,14 +1,15 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
3
- import copy
2
+
3
+ from typing import Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
+ import pixeltable.type_system as ts
8
+ from .data_row import DataRow
7
9
  from .expr import Expr, ExprScope
8
10
  from .json_mapper import JsonMapper
9
- from .data_row import DataRow
10
11
  from .row_builder import RowBuilder
11
- import pixeltable.type_system as ts
12
+ from .sql_element_cache import SqlElementCache
12
13
 
13
14
 
14
15
  class ObjectRef(Expr):
@@ -32,7 +33,7 @@ class ObjectRef(Expr):
32
33
  def _equals(self, other: ObjectRef) -> bool:
33
34
  return self.owner is other.owner
34
35
 
35
- def sql_expr(self) -> Optional[sql.ClauseElement]:
36
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
36
37
  return None
37
38
 
38
39
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import sys
4
4
  import time
5
5
  from dataclasses import dataclass
6
- from typing import Optional, List, Any, Dict, Sequence, Tuple, Set
6
+ from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
7
7
 
8
8
  import sqlalchemy as sql
9
9
 
@@ -11,6 +11,7 @@ import pixeltable.catalog as catalog
11
11
  import pixeltable.exceptions as excs
12
12
  import pixeltable.func as func
13
13
  import pixeltable.utils as utils
14
+
14
15
  from .data_row import DataRow
15
16
  from .expr import Expr
16
17
  from .expr_set import ExprSet
@@ -22,7 +23,7 @@ class ExecProfile:
22
23
  self.eval_count = [0] * row_builder.num_materialized
23
24
  self.row_builder = row_builder
24
25
 
25
- def print(self, num_rows: int) -> str:
26
+ def print(self, num_rows: int) -> None:
26
27
  for i in range(self.row_builder.num_materialized):
27
28
  if self.eval_count[i] == 0:
28
29
  continue
@@ -57,7 +58,7 @@ class RowBuilder:
57
58
  target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
58
59
 
59
60
  def __init__(
60
- self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Sequence[Expr]
61
+ self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Iterable[Expr]
61
62
  ):
62
63
  """
63
64
  Args:
@@ -96,7 +97,7 @@ class RowBuilder:
96
97
  expr = ColumnRef(col)
97
98
  expr = self._record_unique_expr(expr, recursive=False)
98
99
  self.add_table_column(col, expr.slot_idx)
99
- self.output_exprs.append(expr)
100
+ self.output_exprs.add(expr)
100
101
 
101
102
  # default eval ctx: all output exprs
102
103
  self.default_eval_ctx = self.create_eval_ctx(list(self.output_exprs), exclude=unique_input_exprs)
@@ -193,7 +194,7 @@ class RowBuilder:
193
194
  expr.components[i] = self._record_unique_expr(c, True)
194
195
  assert expr.slot_idx is None
195
196
  expr.slot_idx = self._next_slot_idx()
196
- self.unique_exprs.append(expr)
197
+ self.unique_exprs.add(expr)
197
198
  return expr
198
199
 
199
200
  def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
@@ -227,18 +228,25 @@ class RowBuilder:
227
228
  # merge dependencies and convert to list
228
229
  return sorted(set().union(*[dependencies[i] for i in target_slot_idxs]))
229
230
 
230
- def substitute_exprs(self, expr_list: list, remove_duplicates: bool = True) -> None:
231
- """Substitutes exprs with their executable counterparts from unique_exprs and optionally removes duplicates"""
232
- i = 0
233
- unique_ids: set[int] = set() # slot idxs within expr_list
234
- while i < len(expr_list):
235
- unique_expr = self.unique_exprs[expr_list[i]]
236
- if unique_expr.slot_idx in unique_ids and remove_duplicates:
237
- del expr_list[i]
238
- else:
239
- expr_list[i] = unique_expr
240
- unique_ids.add(unique_expr.slot_idx)
241
- i += 1
231
+ def set_slot_idxs(self, expr_list: Sequence[Expr], remove_duplicates: bool = True) -> None:
232
+ """
233
+ Recursively sets slot_idx in expr_list and its components
234
+
235
+ remove_duplicates == True: removes duplicates in-place
236
+ """
237
+ for e in expr_list:
238
+ self.__set_slot_idxs_aux(e)
239
+ if remove_duplicates:
240
+ deduped = list(ExprSet(expr_list))
241
+ expr_list[:] = deduped
242
+
243
+ def __set_slot_idxs_aux(self, e: Expr) -> None:
244
+ """Recursively sets slot_idx in e and its components"""
245
+ if e not in self.unique_exprs:
246
+ return
247
+ e.slot_idx = self.unique_exprs[e].slot_idx
248
+ for c in e.components:
249
+ self.__set_slot_idxs_aux(c)
242
250
 
243
251
  def get_dependencies(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> List[Expr]:
244
252
  """
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
  from typing import Optional, List, Any, Dict, Tuple
3
+ from .sql_element_cache import SqlElementCache
3
4
  from uuid import UUID
4
5
 
5
6
  import sqlalchemy as sql
@@ -72,7 +73,7 @@ class RowidRef(Expr):
72
73
  self.tbl = tbl.tbl_version
73
74
  self.tbl_id = self.tbl.id
74
75
 
75
- def sql_expr(self) -> Optional[sql.ClauseElement]:
76
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
76
77
  tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
77
78
  rowid_cols = tbl.store_tbl.rowid_columns()
78
79
  return rowid_cols[self.rowid_component_idx]
@@ -1,4 +1,5 @@
1
1
  from typing import Optional, List, Any
2
+ from .sql_element_cache import SqlElementCache
2
3
 
3
4
  import sqlalchemy as sql
4
5
  import PIL.Image
@@ -56,7 +57,7 @@ class SimilarityExpr(Expr):
56
57
  def __str__(self) -> str:
57
58
  return f'{self.components[0]}.similarity({self.components[1]})'
58
59
 
59
- def sql_expr(self) -> Optional[sql.ClauseElement]:
60
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
60
61
  if not isinstance(self.components[1], Literal):
61
62
  raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
62
63
  item = self.components[1].val
@@ -0,0 +1,30 @@
1
+ from typing import Iterable, Union, Optional
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from .expr import Expr
6
+
7
+
8
+ class SqlElementCache:
9
+ """Cache of sql.ColumnElements for exprs"""
10
+
11
+ cache: dict[int, Optional[sql.ColumnElement]] # key: Expr.id
12
+
13
+ def __init__(self):
14
+ self.cache = {}
15
+
16
+ def get(self, e: Expr) -> Optional[sql.ColumnElement]:
17
+ """Returns the sql.ColumnElement for the given Expr, or None if Expr.to_sql() returns None."""
18
+ try:
19
+ return self.cache[e.id]
20
+ except KeyError:
21
+ pass
22
+ el = e.sql_expr(self)
23
+ self.cache[e.id] = el
24
+ return el
25
+
26
+ def contains(self, items: Union[Expr, Iterable[Expr]]) -> bool:
27
+ """Returns True if every item has a (non-None) sql.ColumnElement."""
28
+ if isinstance(items, Expr):
29
+ return self.get(items) is not None
30
+ return all(self.get(e) is not None for e in items)
@@ -1,4 +1,3 @@
1
- import json
2
1
  from typing import Optional, Dict, List, Tuple, Any
3
2
 
4
3
  import sqlalchemy as sql
@@ -6,6 +5,7 @@ import sqlalchemy as sql
6
5
  import pixeltable.type_system as ts
7
6
  from .expr import DataRow, Expr
8
7
  from .row_builder import RowBuilder
8
+ from .sql_element_cache import SqlElementCache
9
9
 
10
10
 
11
11
  class TypeCast(Expr):
@@ -29,9 +29,9 @@ class TypeCast(Expr):
29
29
  def _id_attrs(self) -> List[Tuple[str, Any]]:
30
30
  return super()._id_attrs() + [('new_type', self.col_type)]
31
31
 
32
- def sql_expr(self) -> Optional[sql.ClauseElement]:
32
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
33
33
  """
34
- `sql_expr` is unimplemented for now, in order to sidestep potentially thorny
34
+ sql_expr() is unimplemented for now, in order to sidestep potentially thorny
35
35
  questions about consistency of doing type conversions in both Python and Postgres.
36
36
  """
37
37
  return None
@@ -6,6 +6,7 @@ import pixeltable.type_system as ts
6
6
  from .data_row import DataRow
7
7
  from .expr import Expr
8
8
  from .row_builder import RowBuilder
9
+ from .sql_element_cache import SqlElementCache
9
10
 
10
11
 
11
12
  class Variable(Expr):
@@ -31,7 +32,7 @@ class Variable(Expr):
31
32
  def __str__(self) -> str:
32
33
  return self.name
33
34
 
34
- def sql_expr(self) -> NoReturn:
35
+ def sql_expr(self, _: SqlElementCache) -> NoReturn:
35
36
  raise NotImplementedError()
36
37
 
37
38
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
@@ -1,9 +1,9 @@
1
- from typing import Optional, TYPE_CHECKING
1
+ from typing import TYPE_CHECKING, Optional
2
2
 
3
3
  from pixeltable.utils.code import local_public_names
4
4
 
5
5
  if TYPE_CHECKING:
6
- from whisperx.asr import FasterWhisperPipeline
6
+ from whisperx.asr import FasterWhisperPipeline # type: ignore[import-untyped]
7
7
 
8
8
  import pixeltable as pxt
9
9
 
@@ -19,6 +19,8 @@ def transcribe(
19
19
  equivalent to the WhisperX `transcribe` function, as described in the
20
20
  [WhisperX library documentation](https://github.com/m-bain/whisperX).
21
21
 
22
+ WhisperX is part of the `pixeltable.ext` package: long-term support in Pixeltable is not guaranteed.
23
+
22
24
  __Requirements:__
23
25
 
24
26
  - `pip install whisperx`
@@ -40,7 +42,7 @@ def transcribe(
40
42
  >>> tbl['result'] = transcribe(tbl.audio, model='tiny.en')
41
43
  """
42
44
  import torch
43
- import whisperx
45
+ import whisperx # type: ignore[import-untyped]
44
46
 
45
47
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
46
48
  compute_type = compute_type or ('float16' if device == 'cuda' else 'int8')
@@ -60,7 +62,7 @@ def _lookup_model(model_id: str, device: str, compute_type: str) -> 'FasterWhisp
60
62
  return _model_cache[key]
61
63
 
62
64
 
63
- _model_cache = {}
65
+ _model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
64
66
 
65
67
 
66
68
  __all__ = local_public_names(__name__)