pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +11 -2
  4. pixeltable/catalog/catalog.py +407 -119
  5. pixeltable/catalog/column.py +38 -26
  6. pixeltable/catalog/globals.py +130 -15
  7. pixeltable/catalog/insertable_table.py +10 -9
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +245 -119
  10. pixeltable/catalog/table_version.py +142 -116
  11. pixeltable/catalog/table_version_handle.py +30 -2
  12. pixeltable/catalog/table_version_path.py +28 -4
  13. pixeltable/catalog/view.py +14 -20
  14. pixeltable/config.py +4 -0
  15. pixeltable/dataframe.py +10 -9
  16. pixeltable/env.py +5 -11
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/exec_node.py +2 -0
  19. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  20. pixeltable/exec/sql_node.py +47 -30
  21. pixeltable/exprs/column_property_ref.py +2 -10
  22. pixeltable/exprs/column_ref.py +24 -21
  23. pixeltable/exprs/data_row.py +9 -0
  24. pixeltable/exprs/expr.py +4 -4
  25. pixeltable/exprs/row_builder.py +44 -13
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/mcp.py +74 -0
  28. pixeltable/func/query_template_function.py +4 -2
  29. pixeltable/func/tools.py +12 -2
  30. pixeltable/func/udf.py +2 -2
  31. pixeltable/functions/__init__.py +1 -0
  32. pixeltable/functions/groq.py +108 -0
  33. pixeltable/functions/huggingface.py +8 -6
  34. pixeltable/functions/mistralai.py +2 -13
  35. pixeltable/functions/openai.py +1 -6
  36. pixeltable/functions/replicate.py +2 -2
  37. pixeltable/functions/util.py +6 -1
  38. pixeltable/globals.py +0 -2
  39. pixeltable/io/external_store.py +81 -54
  40. pixeltable/io/globals.py +1 -1
  41. pixeltable/io/label_studio.py +49 -45
  42. pixeltable/io/table_data_conduit.py +1 -1
  43. pixeltable/metadata/__init__.py +1 -1
  44. pixeltable/metadata/converters/convert_37.py +15 -0
  45. pixeltable/metadata/converters/convert_38.py +39 -0
  46. pixeltable/metadata/notes.py +2 -0
  47. pixeltable/metadata/schema.py +5 -0
  48. pixeltable/metadata/utils.py +78 -0
  49. pixeltable/plan.py +59 -139
  50. pixeltable/share/packager.py +2 -2
  51. pixeltable/store.py +114 -103
  52. pixeltable/type_system.py +30 -0
  53. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
  54. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
  55. pixeltable/utils/sample.py +0 -25
  56. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
@@ -15,6 +15,7 @@ from .globals import MediaValidation, is_valid_identifier
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  from .table_version import TableVersion
18
+ from .table_version_handle import ColumnHandle
18
19
  from .table_version_path import TableVersionPath
19
20
 
20
21
  _logger = logging.getLogger('pixeltable')
@@ -42,11 +43,9 @@ class Column:
42
43
  sa_errortype_col: Optional[sql.schema.Column]
43
44
  _value_expr: Optional[exprs.Expr]
44
45
  value_expr_dict: Optional[dict[str, Any]]
45
- dependent_cols: set[Column]
46
46
  # we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
47
47
  # (re-resolving it later to a different instance doesn't make sense)
48
48
  tbl: Optional[TableVersion]
49
- # tbl: Optional[TableVersionHandle]
50
49
 
51
50
  def __init__(
52
51
  self,
@@ -62,6 +61,7 @@ class Column:
62
61
  sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
63
62
  records_errors: Optional[bool] = None,
64
63
  value_expr_dict: Optional[dict[str, Any]] = None,
64
+ tbl: Optional[TableVersion] = None,
65
65
  ):
66
66
  """Column constructor.
67
67
 
@@ -86,6 +86,7 @@ class Column:
86
86
  if name is not None and not is_valid_identifier(name):
87
87
  raise excs.Error(f"Invalid column name: '{name}'")
88
88
  self.name = name
89
+ self.tbl = tbl
89
90
  if col_type is None and computed_with is None:
90
91
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
91
92
 
@@ -94,6 +95,7 @@ class Column:
94
95
  if computed_with is not None:
95
96
  value_expr = exprs.Expr.from_object(computed_with)
96
97
  if value_expr is None:
98
+ # TODO: this shouldn't be a user-facing error
97
99
  raise excs.Error(
98
100
  f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
99
101
  f'but it is a {type(computed_with)}'
@@ -101,13 +103,15 @@ class Column:
101
103
  else:
102
104
  self._value_expr = value_expr.copy()
103
105
  self.col_type = self._value_expr.col_type
106
+ if self._value_expr is not None and self.value_expr_dict is None:
107
+ self.value_expr_dict = self._value_expr.as_dict()
104
108
 
105
109
  if col_type is not None:
106
110
  self.col_type = col_type
107
111
  assert self.col_type is not None
108
112
 
109
113
  self.stored = stored
110
- self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
114
+ # self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
111
115
  self.id = col_id
112
116
  self.is_pk = is_pk
113
117
  self._media_validation = media_validation
@@ -124,36 +128,44 @@ class Column:
124
128
  self.sa_errormsg_col = None
125
129
  self.sa_errortype_col = None
126
130
 
127
- self.tbl = None # set by owning TableVersion
131
+ def init_value_expr(self) -> None:
132
+ from pixeltable import exprs
133
+
134
+ if self._value_expr is not None or self.value_expr_dict is None:
135
+ return
136
+ self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
137
+ self._value_expr.bind_rel_paths()
138
+ if not self._value_expr.is_valid:
139
+ message = (
140
+ dedent(
141
+ f"""
142
+ The computed column {self.name!r} in table {self.tbl.name!r} is no longer valid.
143
+ {{validation_error}}
144
+ You can continue to query existing data from this column, but evaluating it on new data will raise an error.
145
+ """ # noqa: E501
146
+ )
147
+ .strip()
148
+ .format(validation_error=self._value_expr.validation_error)
149
+ )
150
+ warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
151
+
152
+ @property
153
+ def handle(self) -> 'ColumnHandle':
154
+ """Returns a ColumnHandle for this Column."""
155
+ from .table_version_handle import ColumnHandle
156
+
157
+ assert self.tbl is not None
158
+ assert self.id is not None
159
+ return ColumnHandle(self.tbl.handle, self.id)
128
160
 
129
161
  @property
130
162
  def value_expr(self) -> Optional[exprs.Expr]:
131
- """Instantiate value_expr on-demand"""
132
- # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
133
- # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
134
- if self.value_expr_dict is not None and self._value_expr is None:
135
- from pixeltable import exprs
136
-
137
- self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
138
- self._value_expr.bind_rel_paths()
139
- if not self._value_expr.is_valid:
140
- message = (
141
- dedent(
142
- f"""
143
- The computed column {self.name!r} in table {self.tbl.name!r} is no longer valid.
144
- {{validation_error}}
145
- You can continue to query existing data from this column, but evaluating it on new data will raise an error.
146
- """ # noqa: E501
147
- )
148
- .strip()
149
- .format(validation_error=self._value_expr.validation_error)
150
- )
151
- warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
163
+ assert self.value_expr_dict is None or self._value_expr is not None
152
164
  return self._value_expr
153
165
 
154
166
  def set_value_expr(self, value_expr: exprs.Expr) -> None:
155
167
  self._value_expr = value_expr
156
- self.value_expr_dict = None
168
+ self.value_expr_dict = self._value_expr.as_dict()
157
169
 
158
170
  def check_value_expr(self) -> None:
159
171
  assert self._value_expr is not None
@@ -4,12 +4,16 @@ import dataclasses
4
4
  import enum
5
5
  import itertools
6
6
  import logging
7
- from typing import Optional
8
-
9
- from typing_extensions import Self
7
+ from dataclasses import dataclass, field
8
+ from typing import TYPE_CHECKING, Optional
9
+ from uuid import UUID
10
10
 
11
11
  import pixeltable.exceptions as excs
12
12
 
13
+ if TYPE_CHECKING:
14
+ from IPython.lib.pretty import RepresentationPrinter
15
+
16
+
13
17
  _logger = logging.getLogger('pixeltable')
14
18
 
15
19
  # name of the position column in a component view
@@ -21,26 +25,118 @@ _ROWID_COLUMN_NAME = '_rowid'
21
25
  _PREDEF_SYMBOLS: Optional[set[str]] = None
22
26
 
23
27
 
24
- @dataclasses.dataclass
28
+ @dataclass(frozen=True)
29
+ class QColumnId:
30
+ """Qualified column id"""
31
+
32
+ tbl_id: UUID
33
+ col_id: int
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class RowCountStats:
38
+ """
39
+ Statistics about the counts of rows affected by a table operation.
40
+ """
41
+
42
+ ins_rows: int = 0 # rows inserted
43
+ del_rows: int = 0 # rows deleted
44
+ upd_rows: int = 0 # rows updated
45
+ num_excs: int = 0 # total number of exceptions
46
+ # TODO: disambiguate what this means: # of slots computed or # of columns computed?
47
+ computed_values: int = 0 # number of computed values (e.g., computed columns) affected by the operation
48
+
49
+ @property
50
+ def num_rows(self) -> int:
51
+ return self.ins_rows + self.del_rows + self.upd_rows
52
+
53
+ def insert_to_update(self) -> 'RowCountStats':
54
+ """
55
+ Convert insert row count stats to update row count stats.
56
+ This is used when an insert operation is treated as an update.
57
+ """
58
+ return RowCountStats(
59
+ ins_rows=0,
60
+ del_rows=self.del_rows,
61
+ upd_rows=self.upd_rows + self.ins_rows,
62
+ num_excs=self.num_excs,
63
+ computed_values=self.computed_values,
64
+ )
65
+
66
+ def __add__(self, other: 'RowCountStats') -> 'RowCountStats':
67
+ """
68
+ Add the stats from two RowCountStats objects together.
69
+ """
70
+ return RowCountStats(
71
+ ins_rows=self.ins_rows + other.ins_rows,
72
+ del_rows=self.del_rows + other.del_rows,
73
+ upd_rows=self.upd_rows + other.upd_rows,
74
+ num_excs=self.num_excs + other.num_excs,
75
+ computed_values=self.computed_values + other.computed_values,
76
+ )
77
+
78
+
79
+ @dataclass(frozen=True)
25
80
  class UpdateStatus:
26
81
  """
27
82
  Information about updates that resulted from a table operation.
28
83
  """
29
84
 
30
- num_rows: int = 0
31
- # TODO: disambiguate what this means: # of slots computed or # of columns computed?
32
- num_computed_values: int = 0
33
- num_excs: int = 0
34
85
  updated_cols: list[str] = dataclasses.field(default_factory=list)
35
86
  cols_with_excs: list[str] = dataclasses.field(default_factory=list)
36
87
 
37
- def __iadd__(self, other: 'UpdateStatus') -> Self:
38
- self.num_rows += other.num_rows
39
- self.num_computed_values += other.num_computed_values
40
- self.num_excs += other.num_excs
41
- self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
42
- self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
43
- return self
88
+ # stats for the rows affected by the operation
89
+ row_count_stats: RowCountStats = field(default_factory=RowCountStats)
90
+
91
+ # stats for changes cascaded to other tables
92
+ cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
93
+
94
+ @property
95
+ def num_rows(self) -> int:
96
+ return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
97
+
98
+ @property
99
+ def num_excs(self) -> int:
100
+ return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
101
+
102
+ @property
103
+ def num_computed_values(self) -> int:
104
+ return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
105
+
106
+ def insert_to_update(self) -> 'UpdateStatus':
107
+ """
108
+ Convert the update status from an insert operation to an update operation.
109
+ This is used when an insert operation is treated as an update.
110
+ """
111
+ return UpdateStatus(
112
+ updated_cols=self.updated_cols,
113
+ cols_with_excs=self.cols_with_excs,
114
+ row_count_stats=self.row_count_stats.insert_to_update(),
115
+ cascade_row_count_stats=self.cascade_row_count_stats.insert_to_update(),
116
+ )
117
+
118
+ def to_cascade(self) -> 'UpdateStatus':
119
+ """
120
+ Convert the update status to a cascade update status.
121
+ This is used when an operation cascades changes to other tables.
122
+ """
123
+ return UpdateStatus(
124
+ updated_cols=self.updated_cols,
125
+ cols_with_excs=self.cols_with_excs,
126
+ row_count_stats=RowCountStats(),
127
+ cascade_row_count_stats=self.cascade_row_count_stats + self.row_count_stats,
128
+ )
129
+
130
+ def __add__(self, other: 'UpdateStatus') -> UpdateStatus:
131
+ """
132
+ Add the update status from two UpdateStatus objects together.
133
+ """
134
+ return UpdateStatus(
135
+ updated_cols=list(dict.fromkeys(self.updated_cols + other.updated_cols)),
136
+ cols_with_excs=list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs)),
137
+ row_count_stats=self.row_count_stats + other.row_count_stats,
138
+ cascade_row_count_stats=self.cascade_row_count_stats + other.cascade_row_count_stats,
139
+ )
44
140
 
45
141
  @property
46
142
  def insert_msg(self) -> str:
@@ -58,6 +154,25 @@ class UpdateStatus:
58
154
  )
59
155
  return msg
60
156
 
157
+ @classmethod
158
+ def __cnt_str(cls, cnt: int, item: str) -> str:
159
+ assert cnt > 0
160
+ return f'{cnt} {item}{"" if cnt == 1 else "s"}'
161
+
162
+ def _repr_pretty_(self, p: 'RepresentationPrinter', cycle: bool) -> None:
163
+ messages = []
164
+ if self.row_count_stats.ins_rows > 0:
165
+ messages.append(f'{self.__cnt_str(self.row_count_stats.ins_rows, "row")} inserted')
166
+ if self.row_count_stats.del_rows > 0:
167
+ messages.append(f'{self.__cnt_str(self.row_count_stats.del_rows, "row")} deleted')
168
+ if self.row_count_stats.upd_rows > 0:
169
+ messages.append(f'{self.__cnt_str(self.row_count_stats.upd_rows, "row")} updated')
170
+ if self.num_computed_values > 0:
171
+ messages.append(f'{self.__cnt_str(self.num_computed_values, "value")} computed')
172
+ if self.row_count_stats.num_excs > 0:
173
+ messages.append(self.__cnt_str(self.row_count_stats.num_excs, 'exception'))
174
+ p.text(', '.join(messages) + '.' if len(messages) > 0 else 'No rows affected.')
175
+
61
176
 
62
177
  class MediaValidation(enum.Enum):
63
178
  ON_READ = 0
@@ -51,6 +51,7 @@ class InsertableTable(Table):
51
51
  def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
52
52
  tbl_version_path = TableVersionPath(tbl_version)
53
53
  super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
54
+ self._tbl_version = tbl_version
54
55
 
55
56
  @classmethod
56
57
  def _display_name(cls) -> str:
@@ -103,8 +104,8 @@ class InsertableTable(Table):
103
104
  Env.get().console_logger.info(f'Created table `{name}`.')
104
105
  return tbl
105
106
 
106
- def get_metadata(self) -> dict[str, Any]:
107
- md = super().get_metadata()
107
+ def _get_metadata(self) -> dict[str, Any]:
108
+ md = super()._get_metadata()
108
109
  md['is_view'] = False
109
110
  md['is_snapshot'] = False
110
111
  return md
@@ -141,7 +142,7 @@ class InsertableTable(Table):
141
142
  from pixeltable.catalog import Catalog
142
143
  from pixeltable.io.table_data_conduit import UnkTableDataConduit
143
144
 
144
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
145
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
145
146
  table = self
146
147
  if source is None:
147
148
  source = [kwargs]
@@ -171,12 +172,13 @@ class InsertableTable(Table):
171
172
  from pixeltable.io.table_data_conduit import DFTableDataConduit
172
173
 
173
174
  status = pxt.UpdateStatus()
174
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
175
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
175
176
  if isinstance(data_source, DFTableDataConduit):
176
- status += self._tbl_version.get().insert(
177
+ status = self._tbl_version.get().insert(
177
178
  rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
178
179
  )
179
180
  else:
181
+ status = UpdateStatus()
180
182
  for row_batch in data_source.valid_row_batch():
181
183
  status += self._tbl_version.get().insert(
182
184
  rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
@@ -189,7 +191,7 @@ class InsertableTable(Table):
189
191
 
190
192
  def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
191
193
  """Verify that the input rows match the table schema"""
192
- valid_col_names = set(self._schema.keys())
194
+ valid_col_names = set(self._get_schema().keys())
193
195
  reqd_col_names = set(self._tbl_version_path.tbl_version.get().get_required_col_names())
194
196
  computed_col_names = set(self._tbl_version_path.tbl_version.get().get_computed_col_names())
195
197
  for row in rows:
@@ -231,11 +233,10 @@ class InsertableTable(Table):
231
233
  """
232
234
  from pixeltable.catalog import Catalog
233
235
 
234
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
236
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
235
237
  return self._tbl_version.get().delete(where=where)
236
238
 
237
- @property
238
- def _base_table(self) -> Optional['Table']:
239
+ def _get_base_table(self) -> Optional['Table']:
239
240
  return None
240
241
 
241
242
  @property
@@ -42,6 +42,12 @@ class SchemaObject:
42
42
 
43
43
  def get_metadata(self) -> dict[str, Any]:
44
44
  """Returns metadata associated with this schema object."""
45
+ from pixeltable.catalog import Catalog
46
+
47
+ with Catalog.get().begin_xact(for_write=False):
48
+ return self._get_metadata()
49
+
50
+ def _get_metadata(self) -> dict[str, Any]:
45
51
  return {'name': self._name, 'path': self._path()}
46
52
 
47
53
  @classmethod