pixeltable 0.3.15__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +296 -105
  3. pixeltable/catalog/column.py +10 -8
  4. pixeltable/catalog/dir.py +1 -2
  5. pixeltable/catalog/insertable_table.py +25 -20
  6. pixeltable/catalog/schema_object.py +3 -6
  7. pixeltable/catalog/table.py +261 -189
  8. pixeltable/catalog/table_version.py +333 -202
  9. pixeltable/catalog/table_version_handle.py +15 -2
  10. pixeltable/catalog/table_version_path.py +60 -14
  11. pixeltable/catalog/view.py +38 -6
  12. pixeltable/dataframe.py +196 -18
  13. pixeltable/env.py +4 -4
  14. pixeltable/exec/__init__.py +1 -1
  15. pixeltable/exec/expr_eval/evaluators.py +4 -1
  16. pixeltable/exec/in_memory_data_node.py +1 -1
  17. pixeltable/exec/sql_node.py +171 -22
  18. pixeltable/exprs/column_property_ref.py +15 -6
  19. pixeltable/exprs/column_ref.py +32 -11
  20. pixeltable/exprs/comparison.py +1 -1
  21. pixeltable/exprs/data_row.py +5 -3
  22. pixeltable/exprs/expr.py +7 -0
  23. pixeltable/exprs/literal.py +2 -0
  24. pixeltable/exprs/row_builder.py +4 -6
  25. pixeltable/exprs/rowid_ref.py +8 -0
  26. pixeltable/exprs/similarity_expr.py +1 -0
  27. pixeltable/func/query_template_function.py +1 -1
  28. pixeltable/func/tools.py +1 -1
  29. pixeltable/functions/gemini.py +0 -1
  30. pixeltable/functions/string.py +212 -58
  31. pixeltable/globals.py +12 -4
  32. pixeltable/index/base.py +5 -0
  33. pixeltable/index/btree.py +5 -0
  34. pixeltable/index/embedding_index.py +5 -0
  35. pixeltable/io/external_store.py +8 -29
  36. pixeltable/io/label_studio.py +1 -1
  37. pixeltable/io/parquet.py +2 -2
  38. pixeltable/io/table_data_conduit.py +0 -31
  39. pixeltable/metadata/__init__.py +11 -2
  40. pixeltable/metadata/converters/convert_13.py +2 -2
  41. pixeltable/metadata/converters/convert_30.py +6 -11
  42. pixeltable/metadata/converters/convert_35.py +9 -0
  43. pixeltable/metadata/converters/convert_36.py +38 -0
  44. pixeltable/metadata/converters/util.py +3 -9
  45. pixeltable/metadata/notes.py +2 -0
  46. pixeltable/metadata/schema.py +8 -1
  47. pixeltable/plan.py +221 -14
  48. pixeltable/share/packager.py +137 -13
  49. pixeltable/share/publish.py +2 -2
  50. pixeltable/store.py +19 -13
  51. pixeltable/utils/dbms.py +1 -1
  52. pixeltable/utils/formatter.py +64 -42
  53. pixeltable/utils/sample.py +25 -0
  54. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/METADATA +2 -1
  55. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/RECORD +58 -55
  56. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/entry_points.txt +0 -0
@@ -15,7 +15,6 @@ from .globals import MediaValidation, is_valid_identifier
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  from .table_version import TableVersion
18
- from .table_version_handle import TableVersionHandle
19
18
  from .table_version_path import TableVersionPath
20
19
 
21
20
  _logger = logging.getLogger('pixeltable')
@@ -44,7 +43,10 @@ class Column:
44
43
  _value_expr: Optional[exprs.Expr]
45
44
  value_expr_dict: Optional[dict[str, Any]]
46
45
  dependent_cols: set[Column]
47
- tbl: Optional[TableVersionHandle]
46
+ # we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
47
+ # (re-resolving it later to a different instance doesn't make sense)
48
+ tbl: Optional[TableVersion]
49
+ # tbl: Optional[TableVersionHandle]
48
50
 
49
51
  def __init__(
50
52
  self,
@@ -138,7 +140,7 @@ class Column:
138
140
  message = (
139
141
  dedent(
140
142
  f"""
141
- The computed column {self.name!r} in table {self.tbl.get().name!r} is no longer valid.
143
+ The computed column {self.name!r} in table {self.tbl.name!r} is no longer valid.
142
144
  {{validation_error}}
143
145
  You can continue to query existing data from this column, but evaluating it on new data will raise an error.
144
146
  """ # noqa: E501
@@ -175,8 +177,8 @@ class Column:
175
177
  # multiple dependents)
176
178
  def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
177
179
  assert self.tbl is not None
178
- tbl = reference_tbl.tbl_version if reference_tbl is not None else self.tbl
179
- return {name: info for name, info in tbl.get().idxs_by_name.items() if info.col == self}
180
+ tbl = reference_tbl.tbl_version.get() if reference_tbl is not None else self.tbl
181
+ return {name: info for name, info in tbl.idxs_by_name.items() if info.col == self}
180
182
 
181
183
  @property
182
184
  def is_computed(self) -> bool:
@@ -199,14 +201,14 @@ class Column:
199
201
  @property
200
202
  def qualified_name(self) -> str:
201
203
  assert self.tbl is not None
202
- return f'{self.tbl.get().name}.{self.name}'
204
+ return f'{self.tbl.name}.{self.name}'
203
205
 
204
206
  @property
205
207
  def media_validation(self) -> MediaValidation:
206
208
  if self._media_validation is not None:
207
209
  return self._media_validation
208
210
  assert self.tbl is not None
209
- return self.tbl.get().media_validation
211
+ return self.tbl.media_validation
210
212
 
211
213
  @property
212
214
  def is_required_for_insert(self) -> bool:
@@ -256,7 +258,7 @@ class Column:
256
258
  return f'{self.name}: {self.col_type}'
257
259
 
258
260
  def __repr__(self) -> str:
259
- return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.get().name!r})'
261
+ return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
260
262
 
261
263
  def __hash__(self) -> int:
262
264
  # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
pixeltable/catalog/dir.py CHANGED
@@ -38,13 +38,12 @@ class Dir(SchemaObject):
38
38
  def _display_name(cls) -> str:
39
39
  return 'directory'
40
40
 
41
- @property
42
41
  def _path(self) -> str:
43
42
  """Returns the path to this schema object."""
44
43
  if self._dir_id is None:
45
44
  # we're the root dir
46
45
  return ''
47
- return super()._path
46
+ return super()._path()
48
47
 
49
48
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
50
49
  # print(
@@ -138,37 +138,40 @@ class InsertableTable(Table):
138
138
  print_stats: bool = False,
139
139
  **kwargs: Any,
140
140
  ) -> UpdateStatus:
141
+ from pixeltable.catalog import Catalog
141
142
  from pixeltable.io.table_data_conduit import UnkTableDataConduit
142
143
 
143
- table = self
144
- if source is None:
145
- source = [kwargs]
146
- kwargs = None
144
+ with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
145
+ table = self
146
+ if source is None:
147
+ source = [kwargs]
148
+ kwargs = None
147
149
 
148
- tds = UnkTableDataConduit(
149
- source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
150
- )
151
- data_source = tds.specialize()
152
- if data_source.source_column_map is None:
153
- data_source.src_pk = []
150
+ tds = UnkTableDataConduit(
151
+ source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
152
+ )
153
+ data_source = tds.specialize()
154
+ if data_source.source_column_map is None:
155
+ data_source.src_pk = []
154
156
 
155
- assert isinstance(table, Table)
156
- data_source.add_table_info(table)
157
- data_source.prepare_for_insert_into_table()
157
+ assert isinstance(table, Table)
158
+ data_source.add_table_info(table)
159
+ data_source.prepare_for_insert_into_table()
158
160
 
159
- fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
160
- return table.insert_table_data_source(
161
- data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
162
- )
161
+ fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
162
+ return table.insert_table_data_source(
163
+ data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
164
+ )
163
165
 
164
166
  def insert_table_data_source(
165
167
  self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
166
168
  ) -> pxt.UpdateStatus:
167
169
  """Insert row batches into this table from a `TableDataConduit`."""
170
+ from pixeltable.catalog import Catalog
168
171
  from pixeltable.io.table_data_conduit import DFTableDataConduit
169
172
 
170
173
  status = pxt.UpdateStatus()
171
- with Env.get().begin_xact():
174
+ with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
172
175
  if isinstance(data_source, DFTableDataConduit):
173
176
  status += self._tbl_version.get().insert(
174
177
  rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
@@ -226,7 +229,9 @@ class InsertableTable(Table):
226
229
 
227
230
  >>> tbl.delete(tbl.a > 5)
228
231
  """
229
- with Env.get().begin_xact():
232
+ from pixeltable.catalog import Catalog
233
+
234
+ with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
230
235
  return self._tbl_version.get().delete(where=where)
231
236
 
232
237
  @property
@@ -238,4 +243,4 @@ class InsertableTable(Table):
238
243
  return []
239
244
 
240
245
  def _table_descriptor(self) -> str:
241
- return f'Table {self._path!r}'
246
+ return f'Table {self._path()!r}'
@@ -2,8 +2,6 @@ from abc import abstractmethod
2
2
  from typing import TYPE_CHECKING, Any, Optional
3
3
  from uuid import UUID
4
4
 
5
- from pixeltable.env import Env
6
-
7
5
  if TYPE_CHECKING:
8
6
  from pixeltable import catalog
9
7
 
@@ -28,24 +26,23 @@ class SchemaObject:
28
26
  """Returns the parent directory of this schema object."""
29
27
  from .catalog import Catalog
30
28
 
31
- with Env.get().begin_xact():
29
+ with Catalog.get().begin_xact(for_write=False):
32
30
  if self._dir_id is None:
33
31
  return None
34
32
  return Catalog.get().get_dir(self._dir_id)
35
33
 
36
- @property
37
34
  def _path(self) -> str:
38
35
  """Returns the path to this schema object."""
39
36
  from .catalog import Catalog
40
37
 
41
38
  assert self._dir_id is not None
42
- with Env.get().begin_xact():
39
+ with Catalog.get().begin_xact(for_write=False):
43
40
  path = Catalog.get().get_dir_path(self._dir_id)
44
41
  return str(path.append(self._name))
45
42
 
46
43
  def get_metadata(self) -> dict[str, Any]:
47
44
  """Returns metadata associated with this schema object."""
48
- return {'name': self._name, 'path': self._path}
45
+ return {'name': self._name, 'path': self._path()}
49
46
 
50
47
  @classmethod
51
48
  @abstractmethod