pixeltable 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +3 -11
  4. pixeltable/catalog/catalog.py +575 -220
  5. pixeltable/catalog/column.py +22 -23
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +2 -148
  8. pixeltable/catalog/insertable_table.py +15 -13
  9. pixeltable/catalog/path.py +6 -0
  10. pixeltable/catalog/schema_object.py +9 -4
  11. pixeltable/catalog/table.py +96 -85
  12. pixeltable/catalog/table_version.py +257 -174
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/tbl_ops.py +44 -0
  15. pixeltable/catalog/update_status.py +179 -0
  16. pixeltable/catalog/view.py +50 -56
  17. pixeltable/config.py +76 -12
  18. pixeltable/dataframe.py +19 -6
  19. pixeltable/env.py +50 -4
  20. pixeltable/exec/data_row_batch.py +3 -1
  21. pixeltable/exec/exec_node.py +7 -24
  22. pixeltable/exec/expr_eval/schedulers.py +134 -7
  23. pixeltable/exec/in_memory_data_node.py +6 -7
  24. pixeltable/exprs/column_property_ref.py +21 -9
  25. pixeltable/exprs/column_ref.py +7 -2
  26. pixeltable/exprs/function_call.py +2 -2
  27. pixeltable/exprs/row_builder.py +10 -9
  28. pixeltable/exprs/rowid_ref.py +0 -4
  29. pixeltable/func/function.py +3 -3
  30. pixeltable/functions/audio.py +36 -9
  31. pixeltable/functions/gemini.py +4 -4
  32. pixeltable/functions/openai.py +1 -2
  33. pixeltable/functions/video.py +59 -16
  34. pixeltable/globals.py +109 -24
  35. pixeltable/io/__init__.py +1 -1
  36. pixeltable/io/datarows.py +2 -1
  37. pixeltable/io/external_store.py +3 -55
  38. pixeltable/io/globals.py +4 -4
  39. pixeltable/io/hf_datasets.py +10 -2
  40. pixeltable/io/label_studio.py +16 -16
  41. pixeltable/io/pandas.py +1 -0
  42. pixeltable/io/table_data_conduit.py +12 -13
  43. pixeltable/iterators/audio.py +17 -8
  44. pixeltable/iterators/image.py +5 -2
  45. pixeltable/metadata/__init__.py +1 -1
  46. pixeltable/metadata/converters/convert_39.py +125 -0
  47. pixeltable/metadata/converters/util.py +3 -0
  48. pixeltable/metadata/notes.py +1 -0
  49. pixeltable/metadata/schema.py +50 -1
  50. pixeltable/plan.py +4 -0
  51. pixeltable/share/packager.py +20 -38
  52. pixeltable/store.py +40 -51
  53. pixeltable/type_system.py +2 -2
  54. pixeltable/utils/coroutine.py +6 -23
  55. pixeltable/utils/media_store.py +50 -0
  56. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
  57. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/RECORD +60 -57
  58. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
  59. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
  60. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0
@@ -36,11 +36,10 @@ class Column:
36
36
  _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
37
37
  schema_version_add: Optional[int]
38
38
  schema_version_drop: Optional[int]
39
- _records_errors: Optional[bool]
39
+ _stores_cellmd: Optional[bool]
40
40
  sa_col: Optional[sql.schema.Column]
41
41
  sa_col_type: Optional[sql.sqltypes.TypeEngine]
42
- sa_errormsg_col: Optional[sql.schema.Column]
43
- sa_errortype_col: Optional[sql.schema.Column]
42
+ sa_cellmd_col: Optional[sql.schema.Column] # JSON metadata for the cell, e.g. errortype, errormsg for media columns
44
43
  _value_expr: Optional[exprs.Expr]
45
44
  value_expr_dict: Optional[dict[str, Any]]
46
45
  # we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
@@ -59,7 +58,7 @@ class Column:
59
58
  schema_version_add: Optional[int] = None,
60
59
  schema_version_drop: Optional[int] = None,
61
60
  sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
62
- records_errors: Optional[bool] = None,
61
+ stores_cellmd: Optional[bool] = None,
63
62
  value_expr_dict: Optional[dict[str, Any]] = None,
64
63
  tbl: Optional[TableVersion] = None,
65
64
  ):
@@ -118,15 +117,14 @@ class Column:
118
117
  self.schema_version_add = schema_version_add
119
118
  self.schema_version_drop = schema_version_drop
120
119
 
121
- self._records_errors = records_errors
120
+ self._stores_cellmd = stores_cellmd
122
121
 
123
122
  # column in the stored table for the values of this Column
124
123
  self.sa_col = None
125
124
  self.sa_col_type = sa_col_type
126
125
 
127
126
  # computed cols also have storage columns for the exception string and type
128
- self.sa_errormsg_col = None
129
- self.sa_errortype_col = None
127
+ self.sa_cellmd_col = None
130
128
 
131
129
  def init_value_expr(self) -> None:
132
130
  from pixeltable import exprs
@@ -203,11 +201,11 @@ class Column:
203
201
  return self.stored
204
202
 
205
203
  @property
206
- def records_errors(self) -> bool:
204
+ def stores_cellmd(self) -> bool:
207
205
  """True if this column also stores error information."""
208
206
  # default: record errors for computed and media columns
209
- if self._records_errors is not None:
210
- return self._records_errors
207
+ if self._stores_cellmd is not None:
208
+ return self._stores_cellmd
211
209
  return self.is_stored and (self.is_computed or self.col_type.is_media_type())
212
210
 
213
211
  @property
@@ -243,28 +241,29 @@ class Column:
243
241
  """
244
242
  assert self.is_stored
245
243
  # all storage columns are nullable (we deal with null errors in Pixeltable directly)
246
- self.sa_col = sql.Column(
247
- self.store_name(),
248
- self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
249
- nullable=True,
250
- )
251
- if self.is_computed or self.col_type.is_media_type():
252
- self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
253
- self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
244
+ self.sa_col = sql.Column(self.store_name(), self.get_sa_col_type(), nullable=True)
245
+ if self.stores_cellmd:
246
+ # JSON metadata for the cell, e.g. errortype, errormsg for media columns
247
+ self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
254
248
 
255
249
  def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
256
250
  return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
257
251
 
252
+ @classmethod
253
+ def cellmd_type(cls) -> ts.ColumnType:
254
+ return ts.JsonType(nullable=True)
255
+
256
+ @classmethod
257
+ def sa_cellmd_type(cls) -> sql.sqltypes.TypeEngine:
258
+ return cls.cellmd_type().to_sa_type()
259
+
258
260
  def store_name(self) -> str:
259
261
  assert self.id is not None
260
262
  assert self.is_stored
261
263
  return f'col_{self.id}'
262
264
 
263
- def errormsg_store_name(self) -> str:
264
- return f'{self.store_name()}_errormsg'
265
-
266
- def errortype_store_name(self) -> str:
267
- return f'{self.store_name()}_errortype'
265
+ def cellmd_store_name(self) -> str:
266
+ return f'{self.store_name()}_cellmd'
268
267
 
269
268
  def __str__(self) -> str:
270
269
  return f'{self.name}: {self.col_type}'
pixeltable/catalog/dir.py CHANGED
@@ -34,8 +34,7 @@ class Dir(SchemaObject):
34
34
  dir = cls(dir_record.id, parent_id, name)
35
35
  return dir
36
36
 
37
- @classmethod
38
- def _display_name(cls) -> str:
37
+ def _display_name(self) -> str:
39
38
  return 'directory'
40
39
 
41
40
  def _path(self) -> str:
@@ -1,19 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
- import dataclasses
4
3
  import enum
5
4
  import itertools
6
5
  import logging
7
- from dataclasses import dataclass, field
8
- from typing import TYPE_CHECKING, Optional
6
+ from dataclasses import dataclass
7
+ from typing import Optional
9
8
  from uuid import UUID
10
9
 
11
10
  import pixeltable.exceptions as excs
12
11
 
13
- if TYPE_CHECKING:
14
- from IPython.lib.pretty import RepresentationPrinter
15
-
16
-
17
12
  _logger = logging.getLogger('pixeltable')
18
13
 
19
14
  # name of the position column in a component view
@@ -33,147 +28,6 @@ class QColumnId:
33
28
  col_id: int
34
29
 
35
30
 
36
- @dataclass(frozen=True)
37
- class RowCountStats:
38
- """
39
- Statistics about the counts of rows affected by a table operation.
40
- """
41
-
42
- ins_rows: int = 0 # rows inserted
43
- del_rows: int = 0 # rows deleted
44
- upd_rows: int = 0 # rows updated
45
- num_excs: int = 0 # total number of exceptions
46
- # TODO: disambiguate what this means: # of slots computed or # of columns computed?
47
- computed_values: int = 0 # number of computed values (e.g., computed columns) affected by the operation
48
-
49
- @property
50
- def num_rows(self) -> int:
51
- return self.ins_rows + self.del_rows + self.upd_rows
52
-
53
- def insert_to_update(self) -> 'RowCountStats':
54
- """
55
- Convert insert row count stats to update row count stats.
56
- This is used when an insert operation is treated as an update.
57
- """
58
- return RowCountStats(
59
- ins_rows=0,
60
- del_rows=self.del_rows,
61
- upd_rows=self.upd_rows + self.ins_rows,
62
- num_excs=self.num_excs,
63
- computed_values=self.computed_values,
64
- )
65
-
66
- def __add__(self, other: 'RowCountStats') -> 'RowCountStats':
67
- """
68
- Add the stats from two RowCountStats objects together.
69
- """
70
- return RowCountStats(
71
- ins_rows=self.ins_rows + other.ins_rows,
72
- del_rows=self.del_rows + other.del_rows,
73
- upd_rows=self.upd_rows + other.upd_rows,
74
- num_excs=self.num_excs + other.num_excs,
75
- computed_values=self.computed_values + other.computed_values,
76
- )
77
-
78
-
79
- @dataclass(frozen=True)
80
- class UpdateStatus:
81
- """
82
- Information about updates that resulted from a table operation.
83
- """
84
-
85
- updated_cols: list[str] = dataclasses.field(default_factory=list)
86
- cols_with_excs: list[str] = dataclasses.field(default_factory=list)
87
-
88
- # stats for the rows affected by the operation
89
- row_count_stats: RowCountStats = field(default_factory=RowCountStats)
90
-
91
- # stats for changes cascaded to other tables
92
- cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
93
-
94
- @property
95
- def num_rows(self) -> int:
96
- return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
97
-
98
- @property
99
- def num_excs(self) -> int:
100
- return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
101
-
102
- @property
103
- def num_computed_values(self) -> int:
104
- return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
105
-
106
- def insert_to_update(self) -> 'UpdateStatus':
107
- """
108
- Convert the update status from an insert operation to an update operation.
109
- This is used when an insert operation is treated as an update.
110
- """
111
- return UpdateStatus(
112
- updated_cols=self.updated_cols,
113
- cols_with_excs=self.cols_with_excs,
114
- row_count_stats=self.row_count_stats.insert_to_update(),
115
- cascade_row_count_stats=self.cascade_row_count_stats.insert_to_update(),
116
- )
117
-
118
- def to_cascade(self) -> 'UpdateStatus':
119
- """
120
- Convert the update status to a cascade update status.
121
- This is used when an operation cascades changes to other tables.
122
- """
123
- return UpdateStatus(
124
- updated_cols=self.updated_cols,
125
- cols_with_excs=self.cols_with_excs,
126
- row_count_stats=RowCountStats(),
127
- cascade_row_count_stats=self.cascade_row_count_stats + self.row_count_stats,
128
- )
129
-
130
- def __add__(self, other: 'UpdateStatus') -> UpdateStatus:
131
- """
132
- Add the update status from two UpdateStatus objects together.
133
- """
134
- return UpdateStatus(
135
- updated_cols=list(dict.fromkeys(self.updated_cols + other.updated_cols)),
136
- cols_with_excs=list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs)),
137
- row_count_stats=self.row_count_stats + other.row_count_stats,
138
- cascade_row_count_stats=self.cascade_row_count_stats + other.cascade_row_count_stats,
139
- )
140
-
141
- @property
142
- def insert_msg(self) -> str:
143
- """Return a message describing the results of an insert operation."""
144
- if self.num_excs == 0:
145
- cols_with_excs_str = ''
146
- else:
147
- cols_with_excs_str = (
148
- f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
149
- )
150
- cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
151
- msg = (
152
- f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
153
- f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
154
- )
155
- return msg
156
-
157
- @classmethod
158
- def __cnt_str(cls, cnt: int, item: str) -> str:
159
- assert cnt > 0
160
- return f'{cnt} {item}{"" if cnt == 1 else "s"}'
161
-
162
- def _repr_pretty_(self, p: 'RepresentationPrinter', cycle: bool) -> None:
163
- messages = []
164
- if self.row_count_stats.ins_rows > 0:
165
- messages.append(f'{self.__cnt_str(self.row_count_stats.ins_rows, "row")} inserted')
166
- if self.row_count_stats.del_rows > 0:
167
- messages.append(f'{self.__cnt_str(self.row_count_stats.del_rows, "row")} deleted')
168
- if self.row_count_stats.upd_rows > 0:
169
- messages.append(f'{self.__cnt_str(self.row_count_stats.upd_rows, "row")} updated')
170
- if self.num_computed_values > 0:
171
- messages.append(f'{self.__cnt_str(self.num_computed_values, "value")} computed')
172
- if self.row_count_stats.num_excs > 0:
173
- messages.append(self.__cnt_str(self.row_count_stats.num_excs, 'exception'))
174
- p.text(', '.join(messages) + '.' if len(messages) > 0 else 'No rows affected.')
175
-
176
-
177
31
  class MediaValidation(enum.Enum):
178
32
  ON_READ = 0
179
33
  ON_WRITE = 1
@@ -10,11 +10,12 @@ from pixeltable import exceptions as excs, type_system as ts
10
10
  from pixeltable.env import Env
11
11
  from pixeltable.utils.filecache import FileCache
12
12
 
13
- from .globals import MediaValidation, UpdateStatus
13
+ from .globals import MediaValidation
14
14
  from .table import Table
15
15
  from .table_version import TableVersion
16
16
  from .table_version_handle import TableVersionHandle
17
17
  from .table_version_path import TableVersionPath
18
+ from .update_status import UpdateStatus
18
19
 
19
20
  if TYPE_CHECKING:
20
21
  from pixeltable import exprs
@@ -53,8 +54,8 @@ class InsertableTable(Table):
53
54
  super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
54
55
  self._tbl_version = tbl_version
55
56
 
56
- @classmethod
57
- def _display_name(cls) -> str:
57
+ def _display_name(self) -> str:
58
+ assert not self._tbl_version_path.is_replica()
58
59
  return 'table'
59
60
 
60
61
  @classmethod
@@ -74,10 +75,10 @@ class InsertableTable(Table):
74
75
  column_names = [col.name for col in columns]
75
76
  for pk_col in primary_key:
76
77
  if pk_col not in column_names:
77
- raise excs.Error(f'Primary key column {pk_col} not found in table schema')
78
+ raise excs.Error(f'Primary key column {pk_col!r} not found in table schema.')
78
79
  col = columns[column_names.index(pk_col)]
79
80
  if col.col_type.nullable:
80
- raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
81
+ raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
81
82
  col.is_pk = True
82
83
 
83
84
  _, tbl_version = TableVersion.create(
@@ -100,12 +101,13 @@ class InsertableTable(Table):
100
101
  tbl_version.insert(None, df, fail_on_exception=True)
101
102
  session.commit()
102
103
 
103
- _logger.info(f'Created table `{name}`, id={tbl_version.id}')
104
- Env.get().console_logger.info(f'Created table `{name}`.')
104
+ _logger.info(f'Created table {name!r}, id={tbl_version.id}')
105
+ Env.get().console_logger.info(f'Created table {name!r}.')
105
106
  return tbl
106
107
 
107
108
  def _get_metadata(self) -> dict[str, Any]:
108
109
  md = super()._get_metadata()
110
+ md['base'] = None
109
111
  md['is_view'] = False
110
112
  md['is_snapshot'] = False
111
113
  return md
@@ -171,14 +173,14 @@ class InsertableTable(Table):
171
173
  from pixeltable.catalog import Catalog
172
174
  from pixeltable.io.table_data_conduit import DFTableDataConduit
173
175
 
174
- status = pxt.UpdateStatus()
175
176
  with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
176
177
  if isinstance(data_source, DFTableDataConduit):
177
- status = self._tbl_version.get().insert(
178
+ status = pxt.UpdateStatus()
179
+ status += self._tbl_version.get().insert(
178
180
  rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
179
181
  )
180
182
  else:
181
- status = UpdateStatus()
183
+ status = pxt.UpdateStatus()
182
184
  for row_batch in data_source.valid_row_batch():
183
185
  status += self._tbl_version.get().insert(
184
186
  rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
@@ -202,9 +204,9 @@ class InsertableTable(Table):
202
204
 
203
205
  for col_name, val in row.items():
204
206
  if col_name not in valid_col_names:
205
- raise excs.Error(f'Unknown column name {col_name} in row {row}')
207
+ raise excs.Error(f'Unknown column name {col_name!r} in row {row}')
206
208
  if col_name in computed_col_names:
207
- raise excs.Error(f'Value for computed column {col_name} in row {row}')
209
+ raise excs.Error(f'Value for computed column {col_name!r} in row {row}')
208
210
 
209
211
  # validate data
210
212
  col = self._tbl_version_path.get_column(col_name)
@@ -244,4 +246,4 @@ class InsertableTable(Table):
244
246
  return []
245
247
 
246
248
  def _table_descriptor(self) -> str:
247
- return f'Table {self._path()!r}'
249
+ return self._display_str()
@@ -77,5 +77,11 @@ class Path:
77
77
  def __str__(self) -> str:
78
78
  return '.'.join(self.components)
79
79
 
80
+ def __eq__(self, other: object) -> bool:
81
+ return isinstance(other, Path) and str(self) == str(other)
82
+
83
+ def __hash__(self) -> int:
84
+ return hash(str(self))
85
+
80
86
  def __lt__(self, other: Path) -> bool:
81
87
  return str(self) < str(other)
@@ -42,22 +42,27 @@ class SchemaObject:
42
42
 
43
43
  def get_metadata(self) -> dict[str, Any]:
44
44
  """Returns metadata associated with this schema object."""
45
- from pixeltable.catalog import Catalog
45
+ from pixeltable.catalog import retry_loop
46
46
 
47
- with Catalog.get().begin_xact(for_write=False):
47
+ @retry_loop(for_write=False)
48
+ def op() -> dict[str, Any]:
48
49
  return self._get_metadata()
49
50
 
51
+ return op()
52
+
50
53
  def _get_metadata(self) -> dict[str, Any]:
51
54
  return {'name': self._name, 'path': self._path()}
52
55
 
53
- @classmethod
54
56
  @abstractmethod
55
- def _display_name(cls) -> str:
57
+ def _display_name(self) -> str:
56
58
  """
57
59
  Return name displayed in error messages.
58
60
  """
59
61
  pass
60
62
 
63
+ def _display_str(self) -> str:
64
+ return f'{self._display_name()} {self._path()!r}'
65
+
61
66
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
62
67
  """Subclasses need to override this to make the change persistent"""
63
68
  self._name = new_name