pixeltable 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (57) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -2
  4. pixeltable/catalog/column.py +1 -1
  5. pixeltable/catalog/dir.py +1 -1
  6. pixeltable/catalog/table.py +1 -1
  7. pixeltable/catalog/table_version.py +12 -2
  8. pixeltable/catalog/table_version_path.py +2 -2
  9. pixeltable/catalog/view.py +64 -20
  10. pixeltable/dataframe.py +10 -5
  11. pixeltable/env.py +12 -0
  12. pixeltable/exec/expr_eval/evaluators.py +4 -2
  13. pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
  14. pixeltable/exprs/comparison.py +8 -4
  15. pixeltable/exprs/data_row.py +5 -3
  16. pixeltable/exprs/expr.py +2 -2
  17. pixeltable/exprs/function_call.py +155 -313
  18. pixeltable/func/aggregate_function.py +29 -15
  19. pixeltable/func/callable_function.py +11 -8
  20. pixeltable/func/expr_template_function.py +3 -9
  21. pixeltable/func/function.py +148 -74
  22. pixeltable/func/signature.py +65 -30
  23. pixeltable/func/udf.py +1 -1
  24. pixeltable/functions/__init__.py +1 -0
  25. pixeltable/functions/deepseek.py +121 -0
  26. pixeltable/functions/image.py +7 -7
  27. pixeltable/functions/openai.py +23 -9
  28. pixeltable/functions/video.py +14 -7
  29. pixeltable/globals.py +14 -3
  30. pixeltable/index/embedding_index.py +4 -13
  31. pixeltable/io/globals.py +88 -77
  32. pixeltable/io/hf_datasets.py +34 -34
  33. pixeltable/io/pandas.py +75 -76
  34. pixeltable/io/parquet.py +19 -27
  35. pixeltable/io/utils.py +115 -0
  36. pixeltable/iterators/audio.py +2 -1
  37. pixeltable/iterators/video.py +1 -1
  38. pixeltable/metadata/__init__.py +2 -1
  39. pixeltable/metadata/converters/convert_15.py +18 -8
  40. pixeltable/metadata/converters/convert_27.py +31 -0
  41. pixeltable/metadata/converters/convert_28.py +15 -0
  42. pixeltable/metadata/converters/convert_29.py +111 -0
  43. pixeltable/metadata/converters/util.py +12 -1
  44. pixeltable/metadata/notes.py +3 -0
  45. pixeltable/metadata/schema.py +8 -0
  46. pixeltable/share/__init__.py +1 -0
  47. pixeltable/share/packager.py +41 -13
  48. pixeltable/share/publish.py +97 -0
  49. pixeltable/type_system.py +40 -14
  50. pixeltable/utils/__init__.py +41 -0
  51. pixeltable/utils/arrow.py +40 -7
  52. pixeltable/utils/formatter.py +1 -1
  53. {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/METADATA +34 -49
  54. {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/RECORD +57 -51
  55. {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/WHEEL +1 -1
  56. {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -19,6 +19,7 @@ from .globals import (
19
19
  list_functions,
20
20
  list_tables,
21
21
  move,
22
+ publish_snapshot,
22
23
  tool,
23
24
  tools,
24
25
  )
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.3.4'
3
- __version_tuple__ = (0, 3, 4)
2
+ __version__ = '0.3.5'
3
+ __version_tuple__ = (0, 3, 5)
@@ -58,7 +58,7 @@ class Catalog:
58
58
  if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
59
59
  return
60
60
  # create a top-level directory, so that every schema object has a directory
61
- dir_md = schema.DirMd(name='')
61
+ dir_md = schema.DirMd(name='', user=None, additional_md={})
62
62
  dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
63
63
  session.add(dir_record)
64
64
  session.flush()
@@ -142,6 +142,7 @@ class Catalog:
142
142
  base_tbl_id = base_path.tbl_id()
143
143
  is_snapshot = view_md is not None and view_md.is_snapshot
144
144
  snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
145
+ include_base_columns = view_md is None or view_md.include_base_columns
145
146
  if snapshot_only:
146
147
  # this is a pure snapshot, without a physical table backing it
147
148
  view_path = base_path
@@ -158,7 +159,13 @@ class Catalog:
158
159
  view_path = TableVersionPath(tbl_version, base=base_path)
159
160
 
160
161
  tbl: Table = View(
161
- tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id, snapshot_only=snapshot_only
162
+ tbl_record.id,
163
+ tbl_record.dir_id,
164
+ tbl_md.name,
165
+ view_path,
166
+ base_tbl_id,
167
+ snapshot_only=snapshot_only,
168
+ include_base_columns=include_base_columns,
162
169
  )
163
170
  self.tbl_dependents[base_tbl_id].append(tbl)
164
171
 
@@ -72,7 +72,7 @@ class Column:
72
72
  col_type is None
73
73
  - when loaded from md store: ``computed_with`` is set and col_type is set
74
74
 
75
- ``stored`` (only valid for computed image columns):
75
+ ``stored`` (only valid for computed columns):
76
76
  - if True: the column is present in the stored table
77
77
  - if False: the column is not present in the stored table and recomputed during a query
78
78
  - if None: the system chooses for you (at present, this is always False, but this may change in the future)
pixeltable/catalog/dir.py CHANGED
@@ -32,7 +32,7 @@ class Dir(SchemaObject):
32
32
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
33
33
  super()._move(new_name, new_dir_id)
34
34
  with Env.get().engine.begin() as conn:
35
- dir_md = schema.DirMd(name=new_name)
35
+ dir_md = schema.DirMd(name=new_name, user=None, additional_md={})
36
36
  conn.execute(
37
37
  sql.update(schema.Dir.__table__)
38
38
  .values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
@@ -569,7 +569,7 @@ class Table(SchemaObject):
569
569
 
570
570
  Args:
571
571
  kwargs: Exactly one keyword argument of the form `col_name=expression`.
572
- stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
572
+ stored: Whether the column is materialized and stored or computed on demand.
573
573
  print_stats: If `True`, print execution metrics during evaluation.
574
574
  on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
575
575
  row.
@@ -59,6 +59,7 @@ class TableVersion:
59
59
  schema_version: int
60
60
  view_md: Optional[schema.ViewMd]
61
61
  is_snapshot: bool
62
+ include_base_columns: bool
62
63
  effective_version: Optional[int]
63
64
  path: Optional[pxt.catalog.TableVersionPath]
64
65
  base: Optional[TableVersion]
@@ -115,6 +116,7 @@ class TableVersion:
115
116
  self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
116
117
  is_view = tbl_md.view_md is not None
117
118
  self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
119
+ self.include_base_columns = not is_view or tbl_md.view_md.include_base_columns
118
120
  self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
119
121
  # a mutable TableVersion doesn't have a static version
120
122
  self.effective_version = self.version if self.is_snapshot else None
@@ -228,6 +230,7 @@ class TableVersion:
228
230
  column_md = cls._create_column_md(cols)
229
231
  table_md = schema.TableMd(
230
232
  name=name,
233
+ user=None,
231
234
  current_version=0,
232
235
  current_schema_version=0,
233
236
  next_col_id=len(cols),
@@ -237,6 +240,7 @@ class TableVersion:
237
240
  index_md={},
238
241
  external_stores=[],
239
242
  view_md=view_md,
243
+ additional_md={},
240
244
  )
241
245
  # create a schema.Table here, we need it to call our c'tor;
242
246
  # don't add it to the session yet, we might add index metadata
@@ -244,7 +248,7 @@ class TableVersion:
244
248
  tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
245
249
 
246
250
  # create schema.TableVersion
247
- table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0)
251
+ table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0, additional_md={})
248
252
  tbl_version_record = schema.TableVersion(
249
253
  tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
250
254
  )
@@ -266,6 +270,7 @@ class TableVersion:
266
270
  num_retained_versions=num_retained_versions,
267
271
  comment=comment,
268
272
  media_validation=media_validation.name.lower(),
273
+ additional_md={},
269
274
  )
270
275
  schema_version_record = schema.TableSchemaVersion(
271
276
  tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
@@ -1342,6 +1347,7 @@ class TableVersion:
1342
1347
  def _create_tbl_md(self) -> schema.TableMd:
1343
1348
  return schema.TableMd(
1344
1349
  name=self.name,
1350
+ user=None,
1345
1351
  current_version=self.version,
1346
1352
  current_schema_version=self.schema_version,
1347
1353
  next_col_id=self.next_col_id,
@@ -1351,10 +1357,13 @@ class TableVersion:
1351
1357
  index_md=self.idx_md,
1352
1358
  external_stores=self._create_stores_md(self.external_stores.values()),
1353
1359
  view_md=self.view_md,
1360
+ additional_md={},
1354
1361
  )
1355
1362
 
1356
1363
  def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1357
- return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
1364
+ return schema.TableVersionMd(
1365
+ created_at=timestamp, version=self.version, schema_version=self.schema_version, additional_md={}
1366
+ )
1358
1367
 
1359
1368
  def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
1360
1369
  column_md: dict[int, schema.SchemaColumn] = {}
@@ -1372,6 +1381,7 @@ class TableVersion:
1372
1381
  num_retained_versions=self.num_retained_versions,
1373
1382
  comment=self.comment,
1374
1383
  media_validation=self.media_validation.name.lower(),
1384
+ additional_md={},
1375
1385
  )
1376
1386
 
1377
1387
  def as_dict(self) -> dict:
@@ -86,7 +86,7 @@ class TableVersionPath:
86
86
  from pixeltable.exprs import ColumnRef
87
87
 
88
88
  if col_name not in self.tbl_version.cols_by_name:
89
- if self.base is None:
89
+ if self.base is None or not self.tbl_version.include_base_columns:
90
90
  raise AttributeError(f'Column {col_name} unknown')
91
91
  return self.base.get_column_ref(col_name)
92
92
  col = self.tbl_version.cols_by_name[col_name]
@@ -95,7 +95,7 @@ class TableVersionPath:
95
95
  def columns(self) -> list[Column]:
96
96
  """Return all user columns visible in this tbl version path, including columns from bases"""
97
97
  result = list(self.tbl_version.cols_by_name.values())
98
- if self.base is not None:
98
+ if self.base is not None and self.tbl_version.include_base_columns:
99
99
  base_cols = self.base.columns()
100
100
  # we only include base columns that don't conflict with one of our column names
101
101
  result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional
5
+ from typing import TYPE_CHECKING, Any, Iterable, List, Literal, Optional
6
6
  from uuid import UUID
7
7
 
8
8
  import sqlalchemy.orm as orm
@@ -37,7 +37,14 @@ class View(Table):
37
37
  """
38
38
 
39
39
  def __init__(
40
- self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID, snapshot_only: bool
40
+ self,
41
+ id: UUID,
42
+ dir_id: UUID,
43
+ name: str,
44
+ tbl_version_path: TableVersionPath,
45
+ base_id: UUID,
46
+ snapshot_only: bool,
47
+ include_base_columns: bool,
41
48
  ):
42
49
  super().__init__(id, dir_id, name, tbl_version_path)
43
50
  assert base_id in catalog.Catalog.get().tbl_dependents
@@ -48,12 +55,28 @@ class View(Table):
48
55
  def _display_name(cls) -> str:
49
56
  return 'view'
50
57
 
58
+ @classmethod
59
+ def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, Optional[str]]]) -> dict[str, dict]:
60
+ """Returns a list of columns in the same format as the additional_columns parameter of View.create.
61
+ The source is the list of expressions from a select() statement on a DataFrame.
62
+ If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
63
+ """
64
+ from pixeltable.dataframe import DataFrame
65
+
66
+ r: dict[str, dict] = {}
67
+ exps, names = DataFrame._normalize_select_list([], select_list)
68
+ for expr, name in zip(exps, names):
69
+ stored = not isinstance(expr, exprs.ColumnRef)
70
+ r[name] = {'value': expr, 'stored': stored}
71
+ return r
72
+
51
73
  @classmethod
52
74
  def _create(
53
75
  cls,
54
76
  dir_id: UUID,
55
77
  name: str,
56
78
  base: TableVersionPath,
79
+ select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
57
80
  additional_columns: dict[str, Any],
58
81
  predicate: Optional['pxt.exprs.Expr'],
59
82
  is_snapshot: bool,
@@ -63,7 +86,15 @@ class View(Table):
63
86
  iterator_cls: Optional[type[ComponentIterator]],
64
87
  iterator_args: Optional[dict],
65
88
  ) -> View:
66
- columns = cls._create_columns(additional_columns)
89
+ # Convert select_list to more additional_columns if present
90
+ include_base_columns: bool = select_list is None
91
+ select_list_columns: List[Column] = []
92
+ if not include_base_columns:
93
+ r = cls.select_list_to_additional_columns(select_list)
94
+ select_list_columns = cls._create_columns(r)
95
+
96
+ columns_from_additional_columns = cls._create_columns(additional_columns)
97
+ columns = select_list_columns + columns_from_additional_columns
67
98
  cls._verify_schema(columns)
68
99
 
69
100
  # verify that filter can be evaluated in the context of the base
@@ -88,31 +119,34 @@ class View(Table):
88
119
 
89
120
  # validate iterator_args
90
121
  py_signature = inspect.signature(iterator_cls.__init__)
122
+
123
+ # make sure iterator_args can be used to instantiate iterator_cls
124
+ bound_args: dict[str, Any]
91
125
  try:
92
- # make sure iterator_args can be used to instantiate iterator_cls
93
126
  bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
94
- # we ignore 'self'
95
- first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
96
- del bound_args[first_param_name]
97
-
98
- # construct Signature and type-check bound_args
99
- params = [
100
- func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
101
- for param_name, param_type in iterator_cls.input_schema().items()
102
- ]
103
- sig = func.Signature(ts.InvalidType(), params)
104
- from pixeltable.exprs import FunctionCall
105
-
106
- FunctionCall.normalize_args(iterator_cls.__name__, sig, bound_args)
107
127
  except TypeError as e:
108
- raise excs.Error(f'Cannot instantiate iterator with given arguments: {e}')
128
+ raise excs.Error(f'Invalid iterator arguments: {e}')
129
+ # we ignore 'self'
130
+ first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
131
+ del bound_args[first_param_name]
132
+
133
+ # construct Signature and type-check bound_args
134
+ params = [
135
+ func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
136
+ for param_name, param_type in iterator_cls.input_schema().items()
137
+ ]
138
+ sig = func.Signature(ts.InvalidType(), params)
139
+
140
+ expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
141
+ sig.validate_args(expr_args, context=f'in iterator {iterator_cls.__name__!r}')
142
+ literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
109
143
 
110
144
  # prepend pos and output_schema columns to cols:
111
145
  # a component view exposes the pos column of its rowid;
112
146
  # we create that column here, so it gets assigned a column id;
113
147
  # stored=False: it is not stored separately (it's already stored as part of the rowid)
114
148
  iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
115
- output_dict, unstored_cols = iterator_cls.output_schema(**bound_args)
149
+ output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
116
150
  iterator_cols.extend(
117
151
  [
118
152
  Column(col_name, col_type, stored=col_name not in unstored_cols)
@@ -153,6 +187,7 @@ class View(Table):
153
187
 
154
188
  view_md = md_schema.ViewMd(
155
189
  is_snapshot=is_snapshot,
190
+ include_base_columns=include_base_columns,
156
191
  predicate=predicate.as_dict() if predicate is not None else None,
157
192
  base_versions=base_versions,
158
193
  iterator_class_fqn=iterator_class_fqn,
@@ -172,7 +207,15 @@ class View(Table):
172
207
  )
173
208
  if tbl_version is None:
174
209
  # this is purely a snapshot: we use the base's tbl version path
175
- view = cls(id, dir_id, name, base_version_path, base.tbl_id(), snapshot_only=True)
210
+ view = cls(
211
+ id,
212
+ dir_id,
213
+ name,
214
+ base_version_path,
215
+ base.tbl_id(),
216
+ snapshot_only=True,
217
+ include_base_columns=include_base_columns,
218
+ )
176
219
  _logger.info(f'created snapshot {name}')
177
220
  else:
178
221
  view = cls(
@@ -182,6 +225,7 @@ class View(Table):
182
225
  TableVersionPath(tbl_version, base=base_version_path),
183
226
  base.tbl_id(),
184
227
  snapshot_only=False,
228
+ include_base_columns=include_base_columns,
185
229
  )
186
230
  _logger.info(f'Created view `{name}`, id={tbl_version.id}')
187
231
 
pixeltable/dataframe.py CHANGED
@@ -951,7 +951,7 @@ class DataFrame:
951
951
 
952
952
  >>> df = person.where(t.year == 2014).update({'age': 30})
953
953
  """
954
- self._validate_mutable('update')
954
+ self._validate_mutable('update', False)
955
955
  return self._first_tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
956
956
 
957
957
  def delete(self) -> UpdateStatus:
@@ -971,18 +971,23 @@ class DataFrame:
971
971
 
972
972
  >>> df = person.where(t.age < 18).delete()
973
973
  """
974
- self._validate_mutable('delete')
974
+ self._validate_mutable('delete', False)
975
975
  if not self._first_tbl.is_insertable():
976
976
  raise excs.Error(f'Cannot delete from view')
977
977
  return self._first_tbl.tbl_version.delete(where=self.where_clause)
978
978
 
979
- def _validate_mutable(self, op_name: str) -> None:
980
- """Tests whether this DataFrame can be mutated (such as by an update operation)."""
979
+ def _validate_mutable(self, op_name: str, allow_select: bool) -> None:
980
+ """Tests whether this DataFrame can be mutated (such as by an update operation).
981
+
982
+ Args:
983
+ op_name: The name of the operation for which the test is being performed.
984
+ allow_select: If True, allow a select() specification in the Dataframe.
985
+ """
981
986
  if self.group_by_clause is not None or self.grouping_tbl is not None:
982
987
  raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
983
988
  if self.order_by_clause is not None:
984
989
  raise excs.Error(f'Cannot use `{op_name}` after `order_by`')
985
- if self.select_list is not None:
990
+ if self.select_list is not None and not allow_select:
986
991
  raise excs.Error(f'Cannot use `{op_name}` after `select`')
987
992
  if self.limit_val is not None:
988
993
  raise excs.Error(f'Cannot use `{op_name}` after `limit`')
pixeltable/env.py CHANGED
@@ -76,6 +76,8 @@ class Env:
76
76
  _module_log_level: dict[str, int] # module name -> log level
77
77
  _config_file: Optional[Path]
78
78
  _config: Optional[Config]
79
+ _file_cache_size_g: float
80
+ _pxt_api_key: Optional[str]
79
81
  _stdout_handler: logging.StreamHandler
80
82
  _initialized: bool
81
83
 
@@ -289,6 +291,7 @@ class Env:
289
291
  f'(either add a `file_cache_size_g` entry to the `pixeltable` section of {self._config_file},\n'
290
292
  'or set the PIXELTABLE_FILE_CACHE_SIZE_G environment variable)'
291
293
  )
294
+ self._pxt_api_key = self._config.get_string_value('api_key')
292
295
 
293
296
  # Disable spurious warnings
294
297
  warnings.simplefilter('ignore', category=TqdmWarning)
@@ -459,6 +462,15 @@ class Env:
459
462
  def _upgrade_metadata(self) -> None:
460
463
  metadata.upgrade_md(self._sa_engine)
461
464
 
465
+ @property
466
+ def pxt_api_key(self) -> str:
467
+ if self._pxt_api_key is None:
468
+ raise excs.Error(
469
+ 'No API key is configured. Set the PIXELTABLE_API_KEY environment variable, or add an entry to '
470
+ f'config.toml as described here:\nhttps://pixeltable.github.io/pixeltable/config/'
471
+ )
472
+ return self._pxt_api_key
473
+
462
474
  def get_client(self, name: str) -> Any:
463
475
  """
464
476
  Gets the client with the specified name, initializing it if necessary.
@@ -160,8 +160,10 @@ class FnCallEvaluator(Evaluator):
160
160
 
161
161
  def _create_batch_call_args(self, call_args: list[FnCallArgs]) -> FnCallArgs:
162
162
  """Roll call_args into a single batched FnCallArgs"""
163
- batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.args))]
164
- batch_kwargs: dict[str, list[Optional[Any]]] = {k: [None] * len(call_args) for k in self.fn_call.kwargs.keys()}
163
+ batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.arg_idxs))]
164
+ batch_kwargs: dict[str, list[Optional[Any]]] = {
165
+ k: [None] * len(call_args) for k in self.fn_call.kwarg_idxs.keys()
166
+ }
165
167
  assert isinstance(self.fn, func.CallableFunction)
166
168
  for i, item in enumerate(call_args):
167
169
  for j in range(len(item.args)):
@@ -308,7 +308,10 @@ class ExprEvalNode(ExecNode):
308
308
  if self.exc_event.is_set():
309
309
  # we got an exception that we need to propagate through __iter__()
310
310
  _logger.debug(f'Propagating exception {self.error}')
311
- raise self.error
311
+ if isinstance(self.error, excs.ExprEvalError):
312
+ raise self.error from self.error.exc
313
+ else:
314
+ raise self.error
312
315
  if completed_aw in done:
313
316
  self._log_state('completed_aw done')
314
317
  completed_aw = None
@@ -1,11 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import TYPE_CHECKING, Any, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.exceptions as excs
8
- import pixeltable.index as index
9
8
  import pixeltable.type_system as ts
10
9
 
11
10
  from .column_ref import ColumnRef
@@ -16,12 +15,17 @@ from .literal import Literal
16
15
  from .row_builder import RowBuilder
17
16
  from .sql_element_cache import SqlElementCache
18
17
 
18
+ if TYPE_CHECKING:
19
+ from pixeltable import index
20
+
19
21
 
20
22
  class Comparison(Expr):
21
23
  is_search_arg_comparison: bool
22
24
  operator: ComparisonOperator
23
25
 
24
26
  def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
27
+ from pixeltable import index
28
+
25
29
  super().__init__(ts.BoolType())
26
30
  self.operator = operator
27
31
 
@@ -38,8 +42,6 @@ class Comparison(Expr):
38
42
  self.is_search_arg_comparison = False
39
43
  self.components = [op1, op2]
40
44
 
41
- import pixeltable.index as index
42
-
43
45
  if (
44
46
  self.is_search_arg_comparison
45
47
  and self._op2.col_type.is_string_type()
@@ -71,6 +73,8 @@ class Comparison(Expr):
71
73
  return self.components[1]
72
74
 
73
75
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
76
+ from pixeltable import index
77
+
74
78
  if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
75
79
  # Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
76
80
  # TODO(aaron-siegel): We may be able to handle some cases in SQL by casting one side to the other's type
@@ -4,6 +4,7 @@ import datetime
4
4
  import io
5
5
  import urllib.parse
6
6
  import urllib.request
7
+ from pathlib import Path
7
8
  from typing import Any, Optional
8
9
 
9
10
  import numpy as np
@@ -206,9 +207,10 @@ class DataRow:
206
207
  # local file path
207
208
  assert self.file_urls[idx] is None and self.file_paths[idx] is None
208
209
  if len(parsed.scheme) <= 1:
209
- self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(val))
210
- self.file_paths[idx] = val
211
- else:
210
+ path = str(Path(val).absolute()) # Ensure we're using an absolute pathname.
211
+ self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(path))
212
+ self.file_paths[idx] = path
213
+ else: # file:// URL
212
214
  self.file_urls[idx] = val
213
215
  # Wrap the path in a url2pathname() call to ensure proper handling on Windows.
214
216
  self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
pixeltable/exprs/expr.py CHANGED
@@ -583,7 +583,7 @@ class Expr(abc.ABC):
583
583
 
584
584
  def __bool__(self) -> bool:
585
585
  raise TypeError(
586
- 'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
586
+ f'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)\n{self!r}'
587
587
  )
588
588
 
589
589
  def __lt__(self, other: object) -> 'exprs.Comparison':
@@ -784,7 +784,7 @@ class Expr(abc.ABC):
784
784
  if (
785
785
  len(params) >= 2
786
786
  and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
787
- and second_param.default == inspect.Parameter.empty
787
+ and second_param.default is inspect.Parameter.empty
788
788
  ):
789
789
  raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
790
790
  except ValueError: