pixeltable 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (78) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +9 -1
  4. pixeltable/catalog/catalog.py +559 -134
  5. pixeltable/catalog/column.py +36 -32
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +12 -0
  8. pixeltable/catalog/insertable_table.py +30 -25
  9. pixeltable/catalog/schema_object.py +9 -6
  10. pixeltable/catalog/table.py +334 -267
  11. pixeltable/catalog/table_version.py +358 -241
  12. pixeltable/catalog/table_version_handle.py +18 -2
  13. pixeltable/catalog/table_version_path.py +86 -16
  14. pixeltable/catalog/view.py +47 -23
  15. pixeltable/dataframe.py +198 -19
  16. pixeltable/env.py +6 -4
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/__init__.py +1 -1
  19. pixeltable/exec/exec_node.py +2 -0
  20. pixeltable/exec/expr_eval/evaluators.py +4 -1
  21. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  22. pixeltable/exec/in_memory_data_node.py +1 -1
  23. pixeltable/exec/sql_node.py +188 -22
  24. pixeltable/exprs/column_property_ref.py +16 -6
  25. pixeltable/exprs/column_ref.py +33 -11
  26. pixeltable/exprs/comparison.py +1 -1
  27. pixeltable/exprs/data_row.py +5 -3
  28. pixeltable/exprs/expr.py +11 -4
  29. pixeltable/exprs/literal.py +2 -0
  30. pixeltable/exprs/row_builder.py +4 -6
  31. pixeltable/exprs/rowid_ref.py +8 -0
  32. pixeltable/exprs/similarity_expr.py +1 -0
  33. pixeltable/func/__init__.py +1 -0
  34. pixeltable/func/mcp.py +74 -0
  35. pixeltable/func/query_template_function.py +5 -3
  36. pixeltable/func/tools.py +12 -2
  37. pixeltable/func/udf.py +2 -2
  38. pixeltable/functions/__init__.py +1 -0
  39. pixeltable/functions/anthropic.py +19 -45
  40. pixeltable/functions/deepseek.py +19 -38
  41. pixeltable/functions/fireworks.py +9 -18
  42. pixeltable/functions/gemini.py +2 -3
  43. pixeltable/functions/groq.py +108 -0
  44. pixeltable/functions/llama_cpp.py +6 -6
  45. pixeltable/functions/mistralai.py +16 -53
  46. pixeltable/functions/ollama.py +1 -1
  47. pixeltable/functions/openai.py +82 -165
  48. pixeltable/functions/string.py +212 -58
  49. pixeltable/functions/together.py +22 -80
  50. pixeltable/globals.py +10 -4
  51. pixeltable/index/base.py +5 -0
  52. pixeltable/index/btree.py +5 -0
  53. pixeltable/index/embedding_index.py +5 -0
  54. pixeltable/io/external_store.py +10 -31
  55. pixeltable/io/label_studio.py +5 -5
  56. pixeltable/io/parquet.py +2 -2
  57. pixeltable/io/table_data_conduit.py +1 -32
  58. pixeltable/metadata/__init__.py +11 -2
  59. pixeltable/metadata/converters/convert_13.py +2 -2
  60. pixeltable/metadata/converters/convert_30.py +6 -11
  61. pixeltable/metadata/converters/convert_35.py +9 -0
  62. pixeltable/metadata/converters/convert_36.py +38 -0
  63. pixeltable/metadata/converters/convert_37.py +15 -0
  64. pixeltable/metadata/converters/util.py +3 -9
  65. pixeltable/metadata/notes.py +3 -0
  66. pixeltable/metadata/schema.py +13 -1
  67. pixeltable/plan.py +135 -12
  68. pixeltable/share/packager.py +138 -14
  69. pixeltable/share/publish.py +2 -2
  70. pixeltable/store.py +19 -13
  71. pixeltable/type_system.py +30 -0
  72. pixeltable/utils/dbms.py +1 -1
  73. pixeltable/utils/formatter.py +64 -42
  74. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
  75. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/RECORD +78 -73
  76. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
  77. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
  78. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -15,7 +15,6 @@ from .globals import MediaValidation, is_valid_identifier
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  from .table_version import TableVersion
18
- from .table_version_handle import TableVersionHandle
19
18
  from .table_version_path import TableVersionPath
20
19
 
21
20
  _logger = logging.getLogger('pixeltable')
@@ -43,8 +42,9 @@ class Column:
43
42
  sa_errortype_col: Optional[sql.schema.Column]
44
43
  _value_expr: Optional[exprs.Expr]
45
44
  value_expr_dict: Optional[dict[str, Any]]
46
- dependent_cols: set[Column]
47
- tbl: Optional[TableVersionHandle]
45
+ # we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
46
+ # (re-resolving it later to a different instance doesn't make sense)
47
+ tbl: Optional[TableVersion]
48
48
 
49
49
  def __init__(
50
50
  self,
@@ -60,6 +60,7 @@ class Column:
60
60
  sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
61
61
  records_errors: Optional[bool] = None,
62
62
  value_expr_dict: Optional[dict[str, Any]] = None,
63
+ tbl: Optional[TableVersion] = None,
63
64
  ):
64
65
  """Column constructor.
65
66
 
@@ -84,6 +85,7 @@ class Column:
84
85
  if name is not None and not is_valid_identifier(name):
85
86
  raise excs.Error(f"Invalid column name: '{name}'")
86
87
  self.name = name
88
+ self.tbl = tbl
87
89
  if col_type is None and computed_with is None:
88
90
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
89
91
 
@@ -92,6 +94,7 @@ class Column:
92
94
  if computed_with is not None:
93
95
  value_expr = exprs.Expr.from_object(computed_with)
94
96
  if value_expr is None:
97
+ # TODO: this shouldn't be a user-facing error
95
98
  raise excs.Error(
96
99
  f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
97
100
  f'but it is a {type(computed_with)}'
@@ -99,13 +102,15 @@ class Column:
99
102
  else:
100
103
  self._value_expr = value_expr.copy()
101
104
  self.col_type = self._value_expr.col_type
105
+ if self._value_expr is not None and self.value_expr_dict is None:
106
+ self.value_expr_dict = self._value_expr.as_dict()
102
107
 
103
108
  if col_type is not None:
104
109
  self.col_type = col_type
105
110
  assert self.col_type is not None
106
111
 
107
112
  self.stored = stored
108
- self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
113
+ # self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
109
114
  self.id = col_id
110
115
  self.is_pk = is_pk
111
116
  self._media_validation = media_validation
@@ -122,36 +127,35 @@ class Column:
122
127
  self.sa_errormsg_col = None
123
128
  self.sa_errortype_col = None
124
129
 
125
- self.tbl = None # set by owning TableVersion
130
+ def init_value_expr(self) -> None:
131
+ from pixeltable import exprs
132
+
133
+ if self._value_expr is not None or self.value_expr_dict is None:
134
+ return
135
+ self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
136
+ self._value_expr.bind_rel_paths()
137
+ if not self._value_expr.is_valid:
138
+ message = (
139
+ dedent(
140
+ f"""
141
+ The computed column {self.name!r} in table {self.tbl.name!r} is no longer valid.
142
+ {{validation_error}}
143
+ You can continue to query existing data from this column, but evaluating it on new data will raise an error.
144
+ """ # noqa: E501
145
+ )
146
+ .strip()
147
+ .format(validation_error=self._value_expr.validation_error)
148
+ )
149
+ warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
126
150
 
127
151
  @property
128
152
  def value_expr(self) -> Optional[exprs.Expr]:
129
- """Instantiate value_expr on-demand"""
130
- # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
131
- # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
132
- if self.value_expr_dict is not None and self._value_expr is None:
133
- from pixeltable import exprs
134
-
135
- self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
136
- self._value_expr.bind_rel_paths()
137
- if not self._value_expr.is_valid:
138
- message = (
139
- dedent(
140
- f"""
141
- The computed column {self.name!r} in table {self.tbl.get().name!r} is no longer valid.
142
- {{validation_error}}
143
- You can continue to query existing data from this column, but evaluating it on new data will raise an error.
144
- """ # noqa: E501
145
- )
146
- .strip()
147
- .format(validation_error=self._value_expr.validation_error)
148
- )
149
- warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
153
+ assert self.value_expr_dict is None or self._value_expr is not None
150
154
  return self._value_expr
151
155
 
152
156
  def set_value_expr(self, value_expr: exprs.Expr) -> None:
153
157
  self._value_expr = value_expr
154
- self.value_expr_dict = None
158
+ self.value_expr_dict = self._value_expr.as_dict()
155
159
 
156
160
  def check_value_expr(self) -> None:
157
161
  assert self._value_expr is not None
@@ -175,8 +179,8 @@ class Column:
175
179
  # multiple dependents)
176
180
  def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
177
181
  assert self.tbl is not None
178
- tbl = reference_tbl.tbl_version if reference_tbl is not None else self.tbl
179
- return {name: info for name, info in tbl.get().idxs_by_name.items() if info.col == self}
182
+ tbl = reference_tbl.tbl_version.get() if reference_tbl is not None else self.tbl
183
+ return {name: info for name, info in tbl.idxs_by_name.items() if info.col == self}
180
184
 
181
185
  @property
182
186
  def is_computed(self) -> bool:
@@ -199,14 +203,14 @@ class Column:
199
203
  @property
200
204
  def qualified_name(self) -> str:
201
205
  assert self.tbl is not None
202
- return f'{self.tbl.get().name}.{self.name}'
206
+ return f'{self.tbl.name}.{self.name}'
203
207
 
204
208
  @property
205
209
  def media_validation(self) -> MediaValidation:
206
210
  if self._media_validation is not None:
207
211
  return self._media_validation
208
212
  assert self.tbl is not None
209
- return self.tbl.get().media_validation
213
+ return self.tbl.media_validation
210
214
 
211
215
  @property
212
216
  def is_required_for_insert(self) -> bool:
@@ -256,7 +260,7 @@ class Column:
256
260
  return f'{self.name}: {self.col_type}'
257
261
 
258
262
  def __repr__(self) -> str:
259
- return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.get().name!r})'
263
+ return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
260
264
 
261
265
  def __hash__(self) -> int:
262
266
  # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
pixeltable/catalog/dir.py CHANGED
@@ -38,13 +38,12 @@ class Dir(SchemaObject):
38
38
  def _display_name(cls) -> str:
39
39
  return 'directory'
40
40
 
41
- @property
42
41
  def _path(self) -> str:
43
42
  """Returns the path to this schema object."""
44
43
  if self._dir_id is None:
45
44
  # we're the root dir
46
45
  return ''
47
- return super()._path
46
+ return super()._path()
48
47
 
49
48
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
50
49
  # print(
@@ -5,6 +5,7 @@ import enum
5
5
  import itertools
6
6
  import logging
7
7
  from typing import Optional
8
+ from uuid import UUID
8
9
 
9
10
  from typing_extensions import Self
10
11
 
@@ -21,6 +22,17 @@ _ROWID_COLUMN_NAME = '_rowid'
21
22
  _PREDEF_SYMBOLS: Optional[set[str]] = None
22
23
 
23
24
 
25
+ @dataclasses.dataclass(frozen=True)
26
+ class QColumnId:
27
+ """Qualified column id"""
28
+
29
+ tbl_id: UUID
30
+ col_id: int
31
+
32
+ # def __hash__(self) -> int:
33
+ # return hash((self.tbl_id, self.col_id))
34
+
35
+
24
36
  @dataclasses.dataclass
25
37
  class UpdateStatus:
26
38
  """
@@ -51,6 +51,7 @@ class InsertableTable(Table):
51
51
  def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
52
52
  tbl_version_path = TableVersionPath(tbl_version)
53
53
  super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
54
+ self._tbl_version = tbl_version
54
55
 
55
56
  @classmethod
56
57
  def _display_name(cls) -> str:
@@ -103,8 +104,8 @@ class InsertableTable(Table):
103
104
  Env.get().console_logger.info(f'Created table `{name}`.')
104
105
  return tbl
105
106
 
106
- def get_metadata(self) -> dict[str, Any]:
107
- md = super().get_metadata()
107
+ def _get_metadata(self) -> dict[str, Any]:
108
+ md = super()._get_metadata()
108
109
  md['is_view'] = False
109
110
  md['is_snapshot'] = False
110
111
  return md
@@ -138,37 +139,40 @@ class InsertableTable(Table):
138
139
  print_stats: bool = False,
139
140
  **kwargs: Any,
140
141
  ) -> UpdateStatus:
142
+ from pixeltable.catalog import Catalog
141
143
  from pixeltable.io.table_data_conduit import UnkTableDataConduit
142
144
 
143
- table = self
144
- if source is None:
145
- source = [kwargs]
146
- kwargs = None
145
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
146
+ table = self
147
+ if source is None:
148
+ source = [kwargs]
149
+ kwargs = None
147
150
 
148
- tds = UnkTableDataConduit(
149
- source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
150
- )
151
- data_source = tds.specialize()
152
- if data_source.source_column_map is None:
153
- data_source.src_pk = []
151
+ tds = UnkTableDataConduit(
152
+ source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
153
+ )
154
+ data_source = tds.specialize()
155
+ if data_source.source_column_map is None:
156
+ data_source.src_pk = []
154
157
 
155
- assert isinstance(table, Table)
156
- data_source.add_table_info(table)
157
- data_source.prepare_for_insert_into_table()
158
+ assert isinstance(table, Table)
159
+ data_source.add_table_info(table)
160
+ data_source.prepare_for_insert_into_table()
158
161
 
159
- fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
160
- return table.insert_table_data_source(
161
- data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
162
- )
162
+ fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
163
+ return table.insert_table_data_source(
164
+ data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
165
+ )
163
166
 
164
167
  def insert_table_data_source(
165
168
  self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
166
169
  ) -> pxt.UpdateStatus:
167
170
  """Insert row batches into this table from a `TableDataConduit`."""
171
+ from pixeltable.catalog import Catalog
168
172
  from pixeltable.io.table_data_conduit import DFTableDataConduit
169
173
 
170
174
  status = pxt.UpdateStatus()
171
- with Env.get().begin_xact():
175
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
172
176
  if isinstance(data_source, DFTableDataConduit):
173
177
  status += self._tbl_version.get().insert(
174
178
  rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
@@ -186,7 +190,7 @@ class InsertableTable(Table):
186
190
 
187
191
  def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
188
192
  """Verify that the input rows match the table schema"""
189
- valid_col_names = set(self._schema.keys())
193
+ valid_col_names = set(self._get_schema().keys())
190
194
  reqd_col_names = set(self._tbl_version_path.tbl_version.get().get_required_col_names())
191
195
  computed_col_names = set(self._tbl_version_path.tbl_version.get().get_computed_col_names())
192
196
  for row in rows:
@@ -226,11 +230,12 @@ class InsertableTable(Table):
226
230
 
227
231
  >>> tbl.delete(tbl.a > 5)
228
232
  """
229
- with Env.get().begin_xact():
233
+ from pixeltable.catalog import Catalog
234
+
235
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
230
236
  return self._tbl_version.get().delete(where=where)
231
237
 
232
- @property
233
- def _base_table(self) -> Optional['Table']:
238
+ def _get_base_table(self) -> Optional['Table']:
234
239
  return None
235
240
 
236
241
  @property
@@ -238,4 +243,4 @@ class InsertableTable(Table):
238
243
  return []
239
244
 
240
245
  def _table_descriptor(self) -> str:
241
- return f'Table {self._path!r}'
246
+ return f'Table {self._path()!r}'
@@ -2,8 +2,6 @@ from abc import abstractmethod
2
2
  from typing import TYPE_CHECKING, Any, Optional
3
3
  from uuid import UUID
4
4
 
5
- from pixeltable.env import Env
6
-
7
5
  if TYPE_CHECKING:
8
6
  from pixeltable import catalog
9
7
 
@@ -28,24 +26,29 @@ class SchemaObject:
28
26
  """Returns the parent directory of this schema object."""
29
27
  from .catalog import Catalog
30
28
 
31
- with Env.get().begin_xact():
29
+ with Catalog.get().begin_xact(for_write=False):
32
30
  if self._dir_id is None:
33
31
  return None
34
32
  return Catalog.get().get_dir(self._dir_id)
35
33
 
36
- @property
37
34
  def _path(self) -> str:
38
35
  """Returns the path to this schema object."""
39
36
  from .catalog import Catalog
40
37
 
41
38
  assert self._dir_id is not None
42
- with Env.get().begin_xact():
39
+ with Catalog.get().begin_xact(for_write=False):
43
40
  path = Catalog.get().get_dir_path(self._dir_id)
44
41
  return str(path.append(self._name))
45
42
 
46
43
  def get_metadata(self) -> dict[str, Any]:
47
44
  """Returns metadata associated with this schema object."""
48
- return {'name': self._name, 'path': self._path}
45
+ from pixeltable.catalog import Catalog
46
+
47
+ with Catalog.get().begin_xact(for_write=False):
48
+ return self._get_metadata()
49
+
50
+ def _get_metadata(self) -> dict[str, Any]:
51
+ return {'name': self._name, 'path': self._path()}
49
52
 
50
53
  @classmethod
51
54
  @abstractmethod