pixeltable 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -2
  4. pixeltable/catalog/column.py +1 -1
  5. pixeltable/catalog/dir.py +1 -1
  6. pixeltable/catalog/table.py +1 -1
  7. pixeltable/catalog/table_version.py +12 -2
  8. pixeltable/catalog/table_version_path.py +2 -2
  9. pixeltable/catalog/view.py +64 -20
  10. pixeltable/dataframe.py +14 -14
  11. pixeltable/env.py +20 -3
  12. pixeltable/exec/component_iteration_node.py +1 -2
  13. pixeltable/exec/expr_eval/evaluators.py +4 -2
  14. pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
  15. pixeltable/exprs/comparison.py +8 -4
  16. pixeltable/exprs/data_row.py +5 -3
  17. pixeltable/exprs/expr.py +9 -2
  18. pixeltable/exprs/function_call.py +155 -313
  19. pixeltable/func/aggregate_function.py +29 -15
  20. pixeltable/func/callable_function.py +11 -8
  21. pixeltable/func/expr_template_function.py +3 -9
  22. pixeltable/func/function.py +148 -74
  23. pixeltable/func/signature.py +65 -30
  24. pixeltable/func/udf.py +1 -1
  25. pixeltable/functions/__init__.py +1 -0
  26. pixeltable/functions/deepseek.py +121 -0
  27. pixeltable/functions/image.py +7 -7
  28. pixeltable/functions/openai.py +49 -10
  29. pixeltable/functions/video.py +14 -7
  30. pixeltable/globals.py +14 -3
  31. pixeltable/index/embedding_index.py +4 -13
  32. pixeltable/io/globals.py +88 -77
  33. pixeltable/io/hf_datasets.py +34 -34
  34. pixeltable/io/pandas.py +75 -87
  35. pixeltable/io/parquet.py +19 -27
  36. pixeltable/io/utils.py +115 -0
  37. pixeltable/iterators/audio.py +2 -1
  38. pixeltable/iterators/video.py +1 -1
  39. pixeltable/metadata/__init__.py +2 -1
  40. pixeltable/metadata/converters/convert_15.py +18 -8
  41. pixeltable/metadata/converters/convert_27.py +31 -0
  42. pixeltable/metadata/converters/convert_28.py +15 -0
  43. pixeltable/metadata/converters/convert_29.py +111 -0
  44. pixeltable/metadata/converters/util.py +12 -1
  45. pixeltable/metadata/notes.py +3 -0
  46. pixeltable/metadata/schema.py +8 -0
  47. pixeltable/share/__init__.py +1 -0
  48. pixeltable/share/packager.py +246 -0
  49. pixeltable/share/publish.py +97 -0
  50. pixeltable/type_system.py +87 -42
  51. pixeltable/utils/__init__.py +41 -0
  52. pixeltable/utils/arrow.py +45 -12
  53. pixeltable/utils/formatter.py +1 -1
  54. pixeltable/utils/iceberg.py +14 -0
  55. pixeltable/utils/media_store.py +1 -1
  56. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/METADATA +37 -50
  57. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/RECORD +60 -51
  58. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/WHEEL +1 -1
  59. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/LICENSE +0 -0
  60. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -19,6 +19,7 @@ from .globals import (
19
19
  list_functions,
20
20
  list_tables,
21
21
  move,
22
+ publish_snapshot,
22
23
  tool,
23
24
  tools,
24
25
  )
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.3.3'
3
- __version_tuple__ = (0, 3, 3)
2
+ __version__ = '0.3.5'
3
+ __version_tuple__ = (0, 3, 5)
@@ -58,7 +58,7 @@ class Catalog:
58
58
  if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
59
59
  return
60
60
  # create a top-level directory, so that every schema object has a directory
61
- dir_md = schema.DirMd(name='')
61
+ dir_md = schema.DirMd(name='', user=None, additional_md={})
62
62
  dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
63
63
  session.add(dir_record)
64
64
  session.flush()
@@ -142,6 +142,7 @@ class Catalog:
142
142
  base_tbl_id = base_path.tbl_id()
143
143
  is_snapshot = view_md is not None and view_md.is_snapshot
144
144
  snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
145
+ include_base_columns = view_md is None or view_md.include_base_columns
145
146
  if snapshot_only:
146
147
  # this is a pure snapshot, without a physical table backing it
147
148
  view_path = base_path
@@ -158,7 +159,13 @@ class Catalog:
158
159
  view_path = TableVersionPath(tbl_version, base=base_path)
159
160
 
160
161
  tbl: Table = View(
161
- tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id, snapshot_only=snapshot_only
162
+ tbl_record.id,
163
+ tbl_record.dir_id,
164
+ tbl_md.name,
165
+ view_path,
166
+ base_tbl_id,
167
+ snapshot_only=snapshot_only,
168
+ include_base_columns=include_base_columns,
162
169
  )
163
170
  self.tbl_dependents[base_tbl_id].append(tbl)
164
171
 
@@ -72,7 +72,7 @@ class Column:
72
72
  col_type is None
73
73
  - when loaded from md store: ``computed_with`` is set and col_type is set
74
74
 
75
- ``stored`` (only valid for computed image columns):
75
+ ``stored`` (only valid for computed columns):
76
76
  - if True: the column is present in the stored table
77
77
  - if False: the column is not present in the stored table and recomputed during a query
78
78
  - if None: the system chooses for you (at present, this is always False, but this may change in the future)
pixeltable/catalog/dir.py CHANGED
@@ -32,7 +32,7 @@ class Dir(SchemaObject):
32
32
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
33
33
  super()._move(new_name, new_dir_id)
34
34
  with Env.get().engine.begin() as conn:
35
- dir_md = schema.DirMd(name=new_name)
35
+ dir_md = schema.DirMd(name=new_name, user=None, additional_md={})
36
36
  conn.execute(
37
37
  sql.update(schema.Dir.__table__)
38
38
  .values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
@@ -569,7 +569,7 @@ class Table(SchemaObject):
569
569
 
570
570
  Args:
571
571
  kwargs: Exactly one keyword argument of the form `col_name=expression`.
572
- stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
572
+ stored: Whether the column is materialized and stored or computed on demand.
573
573
  print_stats: If `True`, print execution metrics during evaluation.
574
574
  on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
575
575
  row.
@@ -59,6 +59,7 @@ class TableVersion:
59
59
  schema_version: int
60
60
  view_md: Optional[schema.ViewMd]
61
61
  is_snapshot: bool
62
+ include_base_columns: bool
62
63
  effective_version: Optional[int]
63
64
  path: Optional[pxt.catalog.TableVersionPath]
64
65
  base: Optional[TableVersion]
@@ -115,6 +116,7 @@ class TableVersion:
115
116
  self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
116
117
  is_view = tbl_md.view_md is not None
117
118
  self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
119
+ self.include_base_columns = not is_view or tbl_md.view_md.include_base_columns
118
120
  self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
119
121
  # a mutable TableVersion doesn't have a static version
120
122
  self.effective_version = self.version if self.is_snapshot else None
@@ -228,6 +230,7 @@ class TableVersion:
228
230
  column_md = cls._create_column_md(cols)
229
231
  table_md = schema.TableMd(
230
232
  name=name,
233
+ user=None,
231
234
  current_version=0,
232
235
  current_schema_version=0,
233
236
  next_col_id=len(cols),
@@ -237,6 +240,7 @@ class TableVersion:
237
240
  index_md={},
238
241
  external_stores=[],
239
242
  view_md=view_md,
243
+ additional_md={},
240
244
  )
241
245
  # create a schema.Table here, we need it to call our c'tor;
242
246
  # don't add it to the session yet, we might add index metadata
@@ -244,7 +248,7 @@ class TableVersion:
244
248
  tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
245
249
 
246
250
  # create schema.TableVersion
247
- table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0)
251
+ table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0, additional_md={})
248
252
  tbl_version_record = schema.TableVersion(
249
253
  tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
250
254
  )
@@ -266,6 +270,7 @@ class TableVersion:
266
270
  num_retained_versions=num_retained_versions,
267
271
  comment=comment,
268
272
  media_validation=media_validation.name.lower(),
273
+ additional_md={},
269
274
  )
270
275
  schema_version_record = schema.TableSchemaVersion(
271
276
  tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
@@ -1342,6 +1347,7 @@ class TableVersion:
1342
1347
  def _create_tbl_md(self) -> schema.TableMd:
1343
1348
  return schema.TableMd(
1344
1349
  name=self.name,
1350
+ user=None,
1345
1351
  current_version=self.version,
1346
1352
  current_schema_version=self.schema_version,
1347
1353
  next_col_id=self.next_col_id,
@@ -1351,10 +1357,13 @@ class TableVersion:
1351
1357
  index_md=self.idx_md,
1352
1358
  external_stores=self._create_stores_md(self.external_stores.values()),
1353
1359
  view_md=self.view_md,
1360
+ additional_md={},
1354
1361
  )
1355
1362
 
1356
1363
  def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1357
- return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
1364
+ return schema.TableVersionMd(
1365
+ created_at=timestamp, version=self.version, schema_version=self.schema_version, additional_md={}
1366
+ )
1358
1367
 
1359
1368
  def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
1360
1369
  column_md: dict[int, schema.SchemaColumn] = {}
@@ -1372,6 +1381,7 @@ class TableVersion:
1372
1381
  num_retained_versions=self.num_retained_versions,
1373
1382
  comment=self.comment,
1374
1383
  media_validation=self.media_validation.name.lower(),
1384
+ additional_md={},
1375
1385
  )
1376
1386
 
1377
1387
  def as_dict(self) -> dict:
@@ -86,7 +86,7 @@ class TableVersionPath:
86
86
  from pixeltable.exprs import ColumnRef
87
87
 
88
88
  if col_name not in self.tbl_version.cols_by_name:
89
- if self.base is None:
89
+ if self.base is None or not self.tbl_version.include_base_columns:
90
90
  raise AttributeError(f'Column {col_name} unknown')
91
91
  return self.base.get_column_ref(col_name)
92
92
  col = self.tbl_version.cols_by_name[col_name]
@@ -95,7 +95,7 @@ class TableVersionPath:
95
95
  def columns(self) -> list[Column]:
96
96
  """Return all user columns visible in this tbl version path, including columns from bases"""
97
97
  result = list(self.tbl_version.cols_by_name.values())
98
- if self.base is not None:
98
+ if self.base is not None and self.tbl_version.include_base_columns:
99
99
  base_cols = self.base.columns()
100
100
  # we only include base columns that don't conflict with one of our column names
101
101
  result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional
5
+ from typing import TYPE_CHECKING, Any, Iterable, List, Literal, Optional
6
6
  from uuid import UUID
7
7
 
8
8
  import sqlalchemy.orm as orm
@@ -37,7 +37,14 @@ class View(Table):
37
37
  """
38
38
 
39
39
  def __init__(
40
- self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID, snapshot_only: bool
40
+ self,
41
+ id: UUID,
42
+ dir_id: UUID,
43
+ name: str,
44
+ tbl_version_path: TableVersionPath,
45
+ base_id: UUID,
46
+ snapshot_only: bool,
47
+ include_base_columns: bool,
41
48
  ):
42
49
  super().__init__(id, dir_id, name, tbl_version_path)
43
50
  assert base_id in catalog.Catalog.get().tbl_dependents
@@ -48,12 +55,28 @@ class View(Table):
48
55
  def _display_name(cls) -> str:
49
56
  return 'view'
50
57
 
58
+ @classmethod
59
+ def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, Optional[str]]]) -> dict[str, dict]:
60
+ """Returns a list of columns in the same format as the additional_columns parameter of View.create.
61
+ The source is the list of expressions from a select() statement on a DataFrame.
62
+ If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
63
+ """
64
+ from pixeltable.dataframe import DataFrame
65
+
66
+ r: dict[str, dict] = {}
67
+ exps, names = DataFrame._normalize_select_list([], select_list)
68
+ for expr, name in zip(exps, names):
69
+ stored = not isinstance(expr, exprs.ColumnRef)
70
+ r[name] = {'value': expr, 'stored': stored}
71
+ return r
72
+
51
73
  @classmethod
52
74
  def _create(
53
75
  cls,
54
76
  dir_id: UUID,
55
77
  name: str,
56
78
  base: TableVersionPath,
79
+ select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
57
80
  additional_columns: dict[str, Any],
58
81
  predicate: Optional['pxt.exprs.Expr'],
59
82
  is_snapshot: bool,
@@ -63,7 +86,15 @@ class View(Table):
63
86
  iterator_cls: Optional[type[ComponentIterator]],
64
87
  iterator_args: Optional[dict],
65
88
  ) -> View:
66
- columns = cls._create_columns(additional_columns)
89
+ # Convert select_list to more additional_columns if present
90
+ include_base_columns: bool = select_list is None
91
+ select_list_columns: List[Column] = []
92
+ if not include_base_columns:
93
+ r = cls.select_list_to_additional_columns(select_list)
94
+ select_list_columns = cls._create_columns(r)
95
+
96
+ columns_from_additional_columns = cls._create_columns(additional_columns)
97
+ columns = select_list_columns + columns_from_additional_columns
67
98
  cls._verify_schema(columns)
68
99
 
69
100
  # verify that filter can be evaluated in the context of the base
@@ -88,31 +119,34 @@ class View(Table):
88
119
 
89
120
  # validate iterator_args
90
121
  py_signature = inspect.signature(iterator_cls.__init__)
122
+
123
+ # make sure iterator_args can be used to instantiate iterator_cls
124
+ bound_args: dict[str, Any]
91
125
  try:
92
- # make sure iterator_args can be used to instantiate iterator_cls
93
126
  bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
94
- # we ignore 'self'
95
- first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
96
- del bound_args[first_param_name]
97
-
98
- # construct Signature and type-check bound_args
99
- params = [
100
- func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
101
- for param_name, param_type in iterator_cls.input_schema().items()
102
- ]
103
- sig = func.Signature(ts.InvalidType(), params)
104
- from pixeltable.exprs import FunctionCall
105
-
106
- FunctionCall.normalize_args(iterator_cls.__name__, sig, bound_args)
107
127
  except TypeError as e:
108
- raise excs.Error(f'Cannot instantiate iterator with given arguments: {e}')
128
+ raise excs.Error(f'Invalid iterator arguments: {e}')
129
+ # we ignore 'self'
130
+ first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
131
+ del bound_args[first_param_name]
132
+
133
+ # construct Signature and type-check bound_args
134
+ params = [
135
+ func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
136
+ for param_name, param_type in iterator_cls.input_schema().items()
137
+ ]
138
+ sig = func.Signature(ts.InvalidType(), params)
139
+
140
+ expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
141
+ sig.validate_args(expr_args, context=f'in iterator {iterator_cls.__name__!r}')
142
+ literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
109
143
 
110
144
  # prepend pos and output_schema columns to cols:
111
145
  # a component view exposes the pos column of its rowid;
112
146
  # we create that column here, so it gets assigned a column id;
113
147
  # stored=False: it is not stored separately (it's already stored as part of the rowid)
114
148
  iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
115
- output_dict, unstored_cols = iterator_cls.output_schema(**bound_args)
149
+ output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
116
150
  iterator_cols.extend(
117
151
  [
118
152
  Column(col_name, col_type, stored=col_name not in unstored_cols)
@@ -153,6 +187,7 @@ class View(Table):
153
187
 
154
188
  view_md = md_schema.ViewMd(
155
189
  is_snapshot=is_snapshot,
190
+ include_base_columns=include_base_columns,
156
191
  predicate=predicate.as_dict() if predicate is not None else None,
157
192
  base_versions=base_versions,
158
193
  iterator_class_fqn=iterator_class_fqn,
@@ -172,7 +207,15 @@ class View(Table):
172
207
  )
173
208
  if tbl_version is None:
174
209
  # this is purely a snapshot: we use the base's tbl version path
175
- view = cls(id, dir_id, name, base_version_path, base.tbl_id(), snapshot_only=True)
210
+ view = cls(
211
+ id,
212
+ dir_id,
213
+ name,
214
+ base_version_path,
215
+ base.tbl_id(),
216
+ snapshot_only=True,
217
+ include_base_columns=include_base_columns,
218
+ )
176
219
  _logger.info(f'created snapshot {name}')
177
220
  else:
178
221
  view = cls(
@@ -182,6 +225,7 @@ class View(Table):
182
225
  TableVersionPath(tbl_version, base=base_version_path),
183
226
  base.tbl_id(),
184
227
  snapshot_only=False,
228
+ include_base_columns=include_base_columns,
185
229
  )
186
230
  _logger.info(f'Created view `{name}`, id={tbl_version.id}')
187
231
 
pixeltable/dataframe.py CHANGED
@@ -578,15 +578,9 @@ class DataFrame:
578
578
  # analyze select list; wrap literals with the corresponding expressions
579
579
  select_list: list[tuple[exprs.Expr, Optional[str]]] = []
580
580
  for raw_expr, name in base_list:
581
- if isinstance(raw_expr, exprs.Expr):
582
- select_list.append((raw_expr, name))
583
- elif isinstance(raw_expr, (dict, list, tuple)):
584
- select_list.append((exprs.Expr.from_object(raw_expr), name))
585
- elif isinstance(raw_expr, np.ndarray):
586
- select_list.append((exprs.Expr.from_array(raw_expr), name))
587
- else:
588
- select_list.append((exprs.Literal(raw_expr), name))
589
- expr = select_list[-1][0]
581
+ expr = exprs.Expr.from_object(raw_expr)
582
+ if expr is None:
583
+ raise excs.Error(f'Invalid expression: {raw_expr}')
590
584
  if expr.col_type.is_invalid_type():
591
585
  raise excs.Error(f'Invalid type: {raw_expr}')
592
586
  if not expr.is_bound_by(self._from_clause.tbls):
@@ -594,6 +588,7 @@ class DataFrame:
594
588
  f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
595
589
  f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
596
590
  )
591
+ select_list.append((expr, name))
597
592
 
598
593
  # check user provided names do not conflict among themselves or with auto-generated ones
599
594
  seen: set[str] = set()
@@ -956,7 +951,7 @@ class DataFrame:
956
951
 
957
952
  >>> df = person.where(t.year == 2014).update({'age': 30})
958
953
  """
959
- self._validate_mutable('update')
954
+ self._validate_mutable('update', False)
960
955
  return self._first_tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
961
956
 
962
957
  def delete(self) -> UpdateStatus:
@@ -976,18 +971,23 @@ class DataFrame:
976
971
 
977
972
  >>> df = person.where(t.age < 18).delete()
978
973
  """
979
- self._validate_mutable('delete')
974
+ self._validate_mutable('delete', False)
980
975
  if not self._first_tbl.is_insertable():
981
976
  raise excs.Error(f'Cannot delete from view')
982
977
  return self._first_tbl.tbl_version.delete(where=self.where_clause)
983
978
 
984
- def _validate_mutable(self, op_name: str) -> None:
985
- """Tests whether this DataFrame can be mutated (such as by an update operation)."""
979
+ def _validate_mutable(self, op_name: str, allow_select: bool) -> None:
980
+ """Tests whether this DataFrame can be mutated (such as by an update operation).
981
+
982
+ Args:
983
+ op_name: The name of the operation for which the test is being performed.
984
+ allow_select: If True, allow a select() specification in the Dataframe.
985
+ """
986
986
  if self.group_by_clause is not None or self.grouping_tbl is not None:
987
987
  raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
988
988
  if self.order_by_clause is not None:
989
989
  raise excs.Error(f'Cannot use `{op_name}` after `order_by`')
990
- if self.select_list is not None:
990
+ if self.select_list is not None and not allow_select:
991
991
  raise excs.Error(f'Cannot use `{op_name}` after `select`')
992
992
  if self.limit_val is not None:
993
993
  raise excs.Error(f'Cannot use `{op_name}` after `limit`')
pixeltable/env.py CHANGED
@@ -76,6 +76,8 @@ class Env:
76
76
  _module_log_level: dict[str, int] # module name -> log level
77
77
  _config_file: Optional[Path]
78
78
  _config: Optional[Config]
79
+ _file_cache_size_g: float
80
+ _pxt_api_key: Optional[str]
79
81
  _stdout_handler: logging.StreamHandler
80
82
  _initialized: bool
81
83
 
@@ -289,6 +291,7 @@ class Env:
289
291
  f'(either add a `file_cache_size_g` entry to the `pixeltable` section of {self._config_file},\n'
290
292
  'or set the PIXELTABLE_FILE_CACHE_SIZE_G environment variable)'
291
293
  )
294
+ self._pxt_api_key = self._config.get_string_value('api_key')
292
295
 
293
296
  # Disable spurious warnings
294
297
  warnings.simplefilter('ignore', category=TqdmWarning)
@@ -333,9 +336,7 @@ class Env:
333
336
  http_logger.addHandler(http_fh)
334
337
  http_logger.propagate = False
335
338
 
336
- # empty tmp dir
337
- for path in glob.glob(f'{self._tmp_dir}/*'):
338
- os.remove(path)
339
+ self.clear_tmp_dir()
339
340
 
340
341
  self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
341
342
  self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
@@ -461,6 +462,15 @@ class Env:
461
462
  def _upgrade_metadata(self) -> None:
462
463
  metadata.upgrade_md(self._sa_engine)
463
464
 
465
+ @property
466
+ def pxt_api_key(self) -> str:
467
+ if self._pxt_api_key is None:
468
+ raise excs.Error(
469
+ 'No API key is configured. Set the PIXELTABLE_API_KEY environment variable, or add an entry to '
470
+ f'config.toml as described here:\nhttps://pixeltable.github.io/pixeltable/config/'
471
+ )
472
+ return self._pxt_api_key
473
+
464
474
  def get_client(self, name: str) -> Any:
465
475
  """
466
476
  Gets the client with the specified name, initializing it if necessary.
@@ -628,6 +638,13 @@ class Env:
628
638
  )
629
639
  self.__optional_packages['spacy'].is_installed = False
630
640
 
641
+ def clear_tmp_dir(self) -> None:
642
+ for path in glob.glob(f'{self._tmp_dir}/*'):
643
+ if os.path.isdir(path):
644
+ shutil.rmtree(path)
645
+ else:
646
+ os.remove(path)
647
+
631
648
  def num_tmp_files(self) -> int:
632
649
  return len(glob.glob(f'{self._tmp_dir}/*'))
633
650
 
@@ -1,5 +1,4 @@
1
- import inspect
2
- from typing import AsyncIterator, Iterator, Optional
1
+ from typing import AsyncIterator
3
2
 
4
3
  import pixeltable.catalog as catalog
5
4
  import pixeltable.exceptions as excs
@@ -160,8 +160,10 @@ class FnCallEvaluator(Evaluator):
160
160
 
161
161
  def _create_batch_call_args(self, call_args: list[FnCallArgs]) -> FnCallArgs:
162
162
  """Roll call_args into a single batched FnCallArgs"""
163
- batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.args))]
164
- batch_kwargs: dict[str, list[Optional[Any]]] = {k: [None] * len(call_args) for k in self.fn_call.kwargs.keys()}
163
+ batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.arg_idxs))]
164
+ batch_kwargs: dict[str, list[Optional[Any]]] = {
165
+ k: [None] * len(call_args) for k in self.fn_call.kwarg_idxs.keys()
166
+ }
165
167
  assert isinstance(self.fn, func.CallableFunction)
166
168
  for i, item in enumerate(call_args):
167
169
  for j in range(len(item.args)):
@@ -308,7 +308,10 @@ class ExprEvalNode(ExecNode):
308
308
  if self.exc_event.is_set():
309
309
  # we got an exception that we need to propagate through __iter__()
310
310
  _logger.debug(f'Propagating exception {self.error}')
311
- raise self.error
311
+ if isinstance(self.error, excs.ExprEvalError):
312
+ raise self.error from self.error.exc
313
+ else:
314
+ raise self.error
312
315
  if completed_aw in done:
313
316
  self._log_state('completed_aw done')
314
317
  completed_aw = None
@@ -1,11 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import TYPE_CHECKING, Any, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.exceptions as excs
8
- import pixeltable.index as index
9
8
  import pixeltable.type_system as ts
10
9
 
11
10
  from .column_ref import ColumnRef
@@ -16,12 +15,17 @@ from .literal import Literal
16
15
  from .row_builder import RowBuilder
17
16
  from .sql_element_cache import SqlElementCache
18
17
 
18
+ if TYPE_CHECKING:
19
+ from pixeltable import index
20
+
19
21
 
20
22
  class Comparison(Expr):
21
23
  is_search_arg_comparison: bool
22
24
  operator: ComparisonOperator
23
25
 
24
26
  def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
27
+ from pixeltable import index
28
+
25
29
  super().__init__(ts.BoolType())
26
30
  self.operator = operator
27
31
 
@@ -38,8 +42,6 @@ class Comparison(Expr):
38
42
  self.is_search_arg_comparison = False
39
43
  self.components = [op1, op2]
40
44
 
41
- import pixeltable.index as index
42
-
43
45
  if (
44
46
  self.is_search_arg_comparison
45
47
  and self._op2.col_type.is_string_type()
@@ -71,6 +73,8 @@ class Comparison(Expr):
71
73
  return self.components[1]
72
74
 
73
75
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
76
+ from pixeltable import index
77
+
74
78
  if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
75
79
  # Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
76
80
  # TODO(aaron-siegel): We may be able to handle some cases in SQL by casting one side to the other's type
@@ -4,6 +4,7 @@ import datetime
4
4
  import io
5
5
  import urllib.parse
6
6
  import urllib.request
7
+ from pathlib import Path
7
8
  from typing import Any, Optional
8
9
 
9
10
  import numpy as np
@@ -206,9 +207,10 @@ class DataRow:
206
207
  # local file path
207
208
  assert self.file_urls[idx] is None and self.file_paths[idx] is None
208
209
  if len(parsed.scheme) <= 1:
209
- self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(val))
210
- self.file_paths[idx] = val
211
- else:
210
+ path = str(Path(val).absolute()) # Ensure we're using an absolute pathname.
211
+ self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(path))
212
+ self.file_paths[idx] = path
213
+ else: # file:// URL
212
214
  self.file_urls[idx] = val
213
215
  # Wrap the path in a url2pathname() call to ensure proper handling on Windows.
214
216
  self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
pixeltable/exprs/expr.py CHANGED
@@ -10,6 +10,7 @@ import typing
10
10
  from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, Union, overload
11
11
  from uuid import UUID
12
12
 
13
+ import numpy as np
13
14
  import sqlalchemy as sql
14
15
  from typing_extensions import Self, _AnnotatedAlias
15
16
 
@@ -379,6 +380,12 @@ class Expr(abc.ABC):
379
380
  @classmethod
380
381
  def from_array(cls, elements: Iterable) -> Optional[Expr]:
381
382
  from .inline_expr import InlineArray
383
+ from .literal import Literal
384
+
385
+ if isinstance(elements, np.ndarray):
386
+ pxttype = ts.ArrayType.from_literal(elements)
387
+ if pxttype is not None:
388
+ return Literal(elements, col_type=pxttype)
382
389
 
383
390
  inline_array = InlineArray(elements)
384
391
  return inline_array.maybe_literal()
@@ -576,7 +583,7 @@ class Expr(abc.ABC):
576
583
 
577
584
  def __bool__(self) -> bool:
578
585
  raise TypeError(
579
- 'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
586
+ f'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)\n{self!r}'
580
587
  )
581
588
 
582
589
  def __lt__(self, other: object) -> 'exprs.Comparison':
@@ -777,7 +784,7 @@ class Expr(abc.ABC):
777
784
  if (
778
785
  len(params) >= 2
779
786
  and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
780
- and second_param.default == inspect.Parameter.empty
787
+ and second_param.default is inspect.Parameter.empty
781
788
  ):
782
789
  raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
783
790
  except ValueError: