pixeltable 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (150) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +22 -12
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +121 -101
  14. pixeltable/catalog/table_version.py +291 -142
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +67 -26
  17. pixeltable/dataframe.py +106 -81
  18. pixeltable/env.py +28 -24
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -9
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +13 -7
  27. pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
  28. pixeltable/exec/expr_eval/globals.py +30 -7
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +151 -31
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +108 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +32 -17
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +16 -12
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +231 -113
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +60 -26
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +2 -1
  101. pixeltable/io/label_studio.py +77 -68
  102. pixeltable/io/pandas.py +36 -23
  103. pixeltable/io/parquet.py +9 -12
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +7 -1
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/share/__init__.py +0 -0
  128. pixeltable/share/packager.py +218 -0
  129. pixeltable/store.py +42 -26
  130. pixeltable/type_system.py +102 -75
  131. pixeltable/utils/arrow.py +7 -8
  132. pixeltable/utils/coco.py +16 -17
  133. pixeltable/utils/code.py +1 -1
  134. pixeltable/utils/console_output.py +6 -3
  135. pixeltable/utils/description_helper.py +7 -7
  136. pixeltable/utils/documents.py +3 -1
  137. pixeltable/utils/filecache.py +12 -7
  138. pixeltable/utils/http_server.py +9 -8
  139. pixeltable/utils/iceberg.py +14 -0
  140. pixeltable/utils/media_store.py +3 -2
  141. pixeltable/utils/pytorch.py +11 -14
  142. pixeltable/utils/s3.py +1 -0
  143. pixeltable/utils/sql.py +1 -0
  144. pixeltable/utils/transactional_directory.py +2 -2
  145. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/METADATA +9 -9
  146. pixeltable-0.3.4.dist-info/RECORD +166 -0
  147. pixeltable-0.3.2.dist-info/RECORD +0 -161
  148. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/LICENSE +0 -0
  149. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/WHEEL +0 -0
  150. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/entry_points.txt +0 -0
@@ -84,6 +84,7 @@ class TableVersionPath:
84
84
  def get_column_ref(self, col_name: str) -> exprs.ColumnRef:
85
85
  """Return a ColumnRef for the given column name."""
86
86
  from pixeltable.exprs import ColumnRef
87
+
87
88
  if col_name not in self.tbl_version.cols_by_name:
88
89
  if self.base is None:
89
90
  raise AttributeError(f'Column {col_name} unknown')
@@ -121,11 +122,13 @@ class TableVersionPath:
121
122
  return None
122
123
 
123
124
  def has_column(self, col: Column, include_bases: bool = True) -> bool:
124
- """Return True if this table has the given column.
125
- """
125
+ """Return True if this table has the given column."""
126
126
  assert col.tbl is not None
127
- if col.tbl.id == self.tbl_version.id and col.tbl.effective_version == self.tbl_version.effective_version \
128
- and col.id in self.tbl_version.cols_by_id:
127
+ if (
128
+ col.tbl.id == self.tbl_version.id
129
+ and col.tbl.effective_version == self.tbl_version.effective_version
130
+ and col.id in self.tbl_version.cols_by_id
131
+ ):
129
132
  # the column is visible in this table version
130
133
  return True
131
134
  elif self.base is not None and include_bases:
@@ -136,7 +139,7 @@ class TableVersionPath:
136
139
  def as_dict(self) -> dict:
137
140
  return {
138
141
  'tbl_version': self.tbl_version.as_dict(),
139
- 'base': self.base.as_dict() if self.base is not None else None
142
+ 'base': self.base.as_dict() if self.base is not None else None,
140
143
  }
141
144
 
142
145
  @classmethod
@@ -35,9 +35,10 @@ class View(Table):
35
35
  The exception is a snapshot view without a predicate and without additional columns: in that case, the view
36
36
  is simply a reference to a specific set of base versions.
37
37
  """
38
+
38
39
  def __init__(
39
- self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID,
40
- snapshot_only: bool):
40
+ self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID, snapshot_only: bool
41
+ ):
41
42
  super().__init__(id, dir_id, name, tbl_version_path)
42
43
  assert base_id in catalog.Catalog.get().tbl_dependents
43
44
  self._base_id = base_id # keep a reference to the base Table ID, so that we can keep track of its dependents
@@ -49,10 +50,18 @@ class View(Table):
49
50
 
50
51
  @classmethod
51
52
  def _create(
52
- cls, dir_id: UUID, name: str, base: TableVersionPath, additional_columns: dict[str, Any],
53
- predicate: Optional['pxt.exprs.Expr'], is_snapshot: bool, num_retained_versions: int, comment: str,
54
- media_validation: MediaValidation,
55
- iterator_cls: Optional[type[ComponentIterator]], iterator_args: Optional[dict]
53
+ cls,
54
+ dir_id: UUID,
55
+ name: str,
56
+ base: TableVersionPath,
57
+ additional_columns: dict[str, Any],
58
+ predicate: Optional['pxt.exprs.Expr'],
59
+ is_snapshot: bool,
60
+ num_retained_versions: int,
61
+ comment: str,
62
+ media_validation: MediaValidation,
63
+ iterator_cls: Optional[type[ComponentIterator]],
64
+ iterator_args: Optional[dict],
56
65
  ) -> View:
57
66
  columns = cls._create_columns(additional_columns)
58
67
  cls._verify_schema(columns)
@@ -71,7 +80,8 @@ class View(Table):
71
80
  # make sure that the value can be computed in the context of the base
72
81
  if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
73
82
  raise excs.Error(
74
- f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
83
+ f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}'
84
+ )
75
85
 
76
86
  if iterator_cls is not None:
77
87
  assert iterator_args is not None
@@ -92,6 +102,7 @@ class View(Table):
92
102
  ]
93
103
  sig = func.Signature(ts.InvalidType(), params)
94
104
  from pixeltable.exprs import FunctionCall
105
+
95
106
  FunctionCall.normalize_args(iterator_cls.__name__, sig, bound_args)
96
107
  except TypeError as e:
97
108
  raise excs.Error(f'Cannot instantiate iterator with given arguments: {e}')
@@ -102,22 +113,28 @@ class View(Table):
102
113
  # stored=False: it is not stored separately (it's already stored as part of the rowid)
103
114
  iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
104
115
  output_dict, unstored_cols = iterator_cls.output_schema(**bound_args)
105
- iterator_cols.extend([
106
- Column(col_name, col_type, stored=col_name not in unstored_cols)
107
- for col_name, col_type in output_dict.items()
108
- ])
116
+ iterator_cols.extend(
117
+ [
118
+ Column(col_name, col_type, stored=col_name not in unstored_cols)
119
+ for col_name, col_type in output_dict.items()
120
+ ]
121
+ )
109
122
 
110
123
  iterator_col_names = {col.name for col in iterator_cols}
111
124
  for col in columns:
112
125
  if col.name in iterator_col_names:
113
- raise excs.Error(f'Duplicate name: column {col.name} is already present in the iterator output schema')
126
+ raise excs.Error(
127
+ f'Duplicate name: column {col.name} is already present in the iterator output schema'
128
+ )
114
129
  columns = iterator_cols + columns
115
130
 
116
131
  with orm.Session(Env.get().engine, future=True) as session:
117
132
  from pixeltable.exprs import InlineDict
133
+
118
134
  iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
119
- iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
120
- else None
135
+ iterator_class_fqn = (
136
+ f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None else None
137
+ )
121
138
  base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
122
139
  base_versions = [
123
140
  (tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
@@ -127,35 +144,53 @@ class View(Table):
127
144
  # if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
128
145
  if is_snapshot:
129
146
  predicate = predicate.retarget(base_version_path) if predicate is not None else None
130
- iterator_args_expr = iterator_args_expr.retarget(base_version_path) \
131
- if iterator_args_expr is not None else None
147
+ iterator_args_expr = (
148
+ iterator_args_expr.retarget(base_version_path) if iterator_args_expr is not None else None
149
+ )
132
150
  for col in columns:
133
151
  if col.value_expr is not None:
134
152
  col.set_value_expr(col.value_expr.retarget(base_version_path))
135
153
 
136
154
  view_md = md_schema.ViewMd(
137
- is_snapshot=is_snapshot, predicate=predicate.as_dict() if predicate is not None else None,
155
+ is_snapshot=is_snapshot,
156
+ predicate=predicate.as_dict() if predicate is not None else None,
138
157
  base_versions=base_versions,
139
158
  iterator_class_fqn=iterator_class_fqn,
140
- iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None)
159
+ iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
160
+ )
141
161
 
142
162
  id, tbl_version = TableVersion.create(
143
- session, dir_id, name, columns, num_retained_versions, comment, media_validation=media_validation,
144
- base_path=base_version_path, view_md=view_md)
163
+ session,
164
+ dir_id,
165
+ name,
166
+ columns,
167
+ num_retained_versions,
168
+ comment,
169
+ media_validation=media_validation,
170
+ base_path=base_version_path,
171
+ view_md=view_md,
172
+ )
145
173
  if tbl_version is None:
146
174
  # this is purely a snapshot: we use the base's tbl version path
147
175
  view = cls(id, dir_id, name, base_version_path, base.tbl_id(), snapshot_only=True)
148
176
  _logger.info(f'created snapshot {name}')
149
177
  else:
150
178
  view = cls(
151
- id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base.tbl_id(),
152
- snapshot_only=False)
179
+ id,
180
+ dir_id,
181
+ name,
182
+ TableVersionPath(tbl_version, base=base_version_path),
183
+ base.tbl_id(),
184
+ snapshot_only=False,
185
+ )
153
186
  _logger.info(f'Created view `{name}`, id={tbl_version.id}')
154
187
 
155
188
  from pixeltable.plan import Planner
189
+
156
190
  plan, num_values_per_row = Planner.create_view_load_plan(view._tbl_version_path)
157
191
  num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(
158
- plan, session.connection(), v_min=tbl_version.version)
192
+ plan, session.connection(), v_min=tbl_version.version
193
+ )
159
194
  Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
160
195
 
161
196
  session.commit()
@@ -188,7 +223,8 @@ class View(Table):
188
223
 
189
224
  return TableVersionPath(
190
225
  tbl_version,
191
- base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None)
226
+ base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
227
+ )
192
228
 
193
229
  def _drop(self) -> None:
194
230
  cat = catalog.Catalog.get()
@@ -216,8 +252,13 @@ class View(Table):
216
252
  return md
217
253
 
218
254
  def insert(
219
- self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
220
- on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
255
+ self,
256
+ rows: Optional[Iterable[dict[str, Any]]] = None,
257
+ /,
258
+ *,
259
+ print_stats: bool = False,
260
+ on_error: Literal['abort', 'ignore'] = 'abort',
261
+ **kwargs: Any,
221
262
  ) -> UpdateStatus:
222
263
  raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
223
264
 
pixeltable/dataframe.py CHANGED
@@ -8,7 +8,7 @@ import json
8
8
  import logging
9
9
  import traceback
10
10
  from pathlib import Path
11
- from typing import TYPE_CHECKING, Any, Callable, Hashable, Iterator, Optional, Sequence, Union, AsyncIterator, NoReturn
11
+ from typing import TYPE_CHECKING, Any, AsyncIterator, Callable, Hashable, Iterator, NoReturn, Optional, Sequence, Union
12
12
 
13
13
  import numpy as np
14
14
  import pandas as pd
@@ -138,7 +138,7 @@ class DataFrame:
138
138
  group_by_clause: Optional[list[exprs.Expr]]
139
139
  grouping_tbl: Optional[catalog.TableVersion]
140
140
  order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
141
- limit_val: Optional[int]
141
+ limit_val: Optional[exprs.Expr]
142
142
 
143
143
  def __init__(
144
144
  self,
@@ -148,7 +148,7 @@ class DataFrame:
148
148
  group_by_clause: Optional[list[exprs.Expr]] = None,
149
149
  grouping_tbl: Optional[catalog.TableVersion] = None,
150
150
  order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None, # list[(expr, asc)]
151
- limit: Optional[int] = None,
151
+ limit: Optional[exprs.Expr] = None,
152
152
  ):
153
153
  self._from_clause = from_clause
154
154
 
@@ -171,9 +171,7 @@ class DataFrame:
171
171
 
172
172
  @classmethod
173
173
  def _normalize_select_list(
174
- cls,
175
- tbls: list[catalog.TableVersionPath],
176
- select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
174
+ cls, tbls: list[catalog.TableVersionPath], select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]]
177
175
  ) -> tuple[list[exprs.Expr], list[str]]:
178
176
  """
179
177
  Expand select list information with all columns and their names
@@ -227,6 +225,8 @@ class DataFrame:
227
225
  all_exprs.extend(self.group_by_clause)
228
226
  if self.order_by_clause is not None:
229
227
  all_exprs.extend([expr for expr, _ in self.order_by_clause])
228
+ if self.limit_val is not None:
229
+ all_exprs.append(self.limit_val)
230
230
  vars = exprs.Expr.list_subexprs(all_exprs, expr_class=exprs.Variable)
231
231
  unique_vars: dict[str, exprs.Variable] = {}
232
232
  for var in vars:
@@ -301,7 +301,7 @@ class DataFrame:
301
301
  where_clause=self.where_clause,
302
302
  group_by_clause=group_by_clause,
303
303
  order_by_clause=self.order_by_clause if self.order_by_clause is not None else [],
304
- limit=self.limit_val
304
+ limit=self.limit_val,
305
305
  )
306
306
 
307
307
  def _has_joins(self) -> bool:
@@ -369,8 +369,12 @@ class DataFrame:
369
369
  select_list_exprs = copy.deepcopy(self._select_list_exprs)
370
370
  where_clause = copy.deepcopy(self.where_clause)
371
371
  group_by_clause = copy.deepcopy(self.group_by_clause)
372
- order_by_exprs = [copy.deepcopy(order_by_expr) for order_by_expr, _ in self.order_by_clause] \
373
- if self.order_by_clause is not None else None
372
+ order_by_exprs = (
373
+ [copy.deepcopy(order_by_expr) for order_by_expr, _ in self.order_by_clause]
374
+ if self.order_by_clause is not None
375
+ else None
376
+ )
377
+ limit_val = copy.deepcopy(self.limit_val)
374
378
 
375
379
  var_exprs: dict[exprs.Expr, exprs.Expr] = {}
376
380
  vars = self._vars()
@@ -386,7 +390,7 @@ class DataFrame:
386
390
 
387
391
  exprs.Expr.list_substitute(select_list_exprs, var_exprs)
388
392
  if where_clause is not None:
389
- where_clause.substitute(var_exprs)
393
+ where_clause = where_clause.substitute(var_exprs)
390
394
  if group_by_clause is not None:
391
395
  exprs.Expr.list_substitute(group_by_clause, var_exprs)
392
396
  if order_by_exprs is not None:
@@ -398,14 +402,23 @@ class DataFrame:
398
402
  order_by_clause = [
399
403
  (expr, asc) for expr, asc in zip(order_by_exprs, [asc for _, asc in self.order_by_clause])
400
404
  ]
405
+ if limit_val is not None:
406
+ limit_val = limit_val.substitute(var_exprs)
407
+ if limit_val is not None and not isinstance(limit_val, exprs.Literal):
408
+ raise excs.Error(f'limit(): parameter must be a constant, but got {limit_val}')
401
409
 
402
410
  return DataFrame(
403
- from_clause=self._from_clause, select_list=select_list, where_clause=where_clause,
404
- group_by_clause=group_by_clause, grouping_tbl=self.grouping_tbl,
405
- order_by_clause=order_by_clause, limit=self.limit_val)
411
+ from_clause=self._from_clause,
412
+ select_list=select_list,
413
+ where_clause=where_clause,
414
+ group_by_clause=group_by_clause,
415
+ grouping_tbl=self.grouping_tbl,
416
+ order_by_clause=order_by_clause,
417
+ limit=limit_val,
418
+ )
406
419
 
407
420
  def _raise_expr_eval_err(self, e: excs.ExprEvalError) -> NoReturn:
408
- msg = f'In row {e.row_num} the {e.expr_msg} encountered exception ' f'{type(e.exc).__name__}:\n{str(e.exc)}'
421
+ msg = f'In row {e.row_num} the {e.expr_msg} encountered exception {type(e.exc).__name__}:\n{str(e.exc)}'
409
422
  if len(e.input_vals) > 0:
410
423
  input_msgs = [
411
424
  f"'{d}' = {d.col_type.print_value(e.input_vals[i])}" for i, d in enumerate(e.expr.dependencies())
@@ -419,7 +432,7 @@ class DataFrame:
419
432
  nl = '\n'
420
433
  # [-1:0:-1]: leave out entry 0 and reverse order, so that the most recent frame is at the top
421
434
  msg += f'\nStack:\n{nl.join(stack_trace[-1:1:-1])}'
422
- raise excs.Error(msg)
435
+ raise excs.Error(msg) from e
423
436
 
424
437
  def _output_row_iterator(self, conn: Optional[sql.engine.Connection] = None) -> Iterator[list]:
425
438
  try:
@@ -438,10 +451,7 @@ class DataFrame:
438
451
 
439
452
  async def _acollect(self, conn: sql.engine.Connection) -> DataFrameResultSet:
440
453
  try:
441
- result = [
442
- [row[e.slot_idx] for e in self._select_list_exprs]
443
- async for row in self._aexec(conn)
444
- ]
454
+ result = [[row[e.slot_idx] for e in self._select_list_exprs] async for row in self._aexec(conn)]
445
455
  return DataFrameResultSet(result, self.schema)
446
456
  except excs.ExprEvalError as e:
447
457
  self._raise_expr_eval_err(e)
@@ -471,14 +481,16 @@ class DataFrame:
471
481
  return helper
472
482
 
473
483
  def _col_descriptor(self) -> pd.DataFrame:
474
- return pd.DataFrame([
475
- {
476
- 'Name': name,
477
- 'Type': expr.col_type._to_str(as_schema=True),
478
- 'Expression': expr.display_str(inline=False),
479
- }
480
- for name, expr in zip(self.schema.keys(), self._select_list_exprs)
481
- ])
484
+ return pd.DataFrame(
485
+ [
486
+ {
487
+ 'Name': name,
488
+ 'Type': expr.col_type._to_str(as_schema=True),
489
+ 'Expression': expr.display_str(inline=False),
490
+ }
491
+ for name, expr in zip(self.schema.keys(), self._select_list_exprs)
492
+ ]
493
+ )
482
494
 
483
495
  def _query_descriptor(self) -> pd.DataFrame:
484
496
  heading_vals: list[str] = []
@@ -500,7 +512,7 @@ class DataFrame:
500
512
  )
501
513
  if self.limit_val is not None:
502
514
  heading_vals.append('Limit')
503
- info_vals.append(str(self.limit_val))
515
+ info_vals.append(self.limit_val.display_str(inline=False))
504
516
  assert len(heading_vals) == len(info_vals)
505
517
  return pd.DataFrame(info_vals, index=heading_vals)
506
518
 
@@ -512,6 +524,7 @@ class DataFrame:
512
524
  """
513
525
  if getattr(builtins, '__IPYTHON__', False):
514
526
  from IPython.display import display
527
+
515
528
  display(self._repr_html_())
516
529
  else:
517
530
  print(repr(self))
@@ -523,7 +536,7 @@ class DataFrame:
523
536
  return self._descriptors().to_html()
524
537
 
525
538
  def select(self, *items: Any, **named_items: Any) -> DataFrame:
526
- """ Select columns or expressions from the DataFrame.
539
+ """Select columns or expressions from the DataFrame.
527
540
 
528
541
  Args:
529
542
  items: expressions to be selected
@@ -565,21 +578,17 @@ class DataFrame:
565
578
  # analyze select list; wrap literals with the corresponding expressions
566
579
  select_list: list[tuple[exprs.Expr, Optional[str]]] = []
567
580
  for raw_expr, name in base_list:
568
- if isinstance(raw_expr, exprs.Expr):
569
- select_list.append((raw_expr, name))
570
- elif isinstance(raw_expr, (dict, list, tuple)):
571
- select_list.append((exprs.Expr.from_object(raw_expr), name))
572
- elif isinstance(raw_expr, np.ndarray):
573
- select_list.append((exprs.Expr.from_array(raw_expr), name))
574
- else:
575
- select_list.append((exprs.Literal(raw_expr), name))
576
- expr = select_list[-1][0]
581
+ expr = exprs.Expr.from_object(raw_expr)
582
+ if expr is None:
583
+ raise excs.Error(f'Invalid expression: {raw_expr}')
577
584
  if expr.col_type.is_invalid_type():
578
585
  raise excs.Error(f'Invalid type: {raw_expr}')
579
586
  if not expr.is_bound_by(self._from_clause.tbls):
580
587
  raise excs.Error(
581
588
  f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
582
- f"({','.join(tbl.tbl_name() for tbl in self._from_clause.tbls)})")
589
+ f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
590
+ )
591
+ select_list.append((expr, name))
583
592
 
584
593
  # check user provided names do not conflict among themselves or with auto-generated ones
585
594
  seen: set[str] = set()
@@ -640,7 +649,7 @@ class DataFrame:
640
649
  )
641
650
 
642
651
  def _create_join_predicate(
643
- self, other: catalog.TableVersionPath, on: Union[exprs.Expr, Sequence[exprs.ColumnRef]]
652
+ self, other: catalog.TableVersionPath, on: Union[exprs.Expr, Sequence[exprs.ColumnRef]]
644
653
  ) -> exprs.Expr:
645
654
  """Verifies user-specified 'on' argument and converts it into a join predicate."""
646
655
  col_refs: list[exprs.ColumnRef] = []
@@ -656,14 +665,12 @@ class DataFrame:
656
665
  return on
657
666
  else:
658
667
  if not isinstance(on, Sequence) or len(on) == 0:
659
- raise excs.Error(
660
- f"'on': must be a sequence of column references or a boolean expression")
668
+ raise excs.Error(f"'on': must be a sequence of column references or a boolean expression")
661
669
 
662
670
  assert isinstance(on, Sequence)
663
671
  for col_ref in on:
664
672
  if not isinstance(col_ref, exprs.ColumnRef):
665
- raise excs.Error(
666
- f"'on': must be a sequence of column references or a boolean expression")
673
+ raise excs.Error(f"'on': must be a sequence of column references or a boolean expression")
667
674
  if not col_ref.is_bound_by(joined_tbls):
668
675
  raise excs.Error(f"'on': expression cannot be evaluated in the context of the joined tables: {col_ref}")
669
676
  col_refs.append(col_ref)
@@ -693,8 +700,7 @@ class DataFrame:
693
700
  lhs_col_ref = exprs.ColumnRef(col)
694
701
  if lhs_col_ref is None:
695
702
  tbl_names = [tbl.tbl_name() for tbl in self._from_clause.tbls]
696
- raise excs.Error(
697
- f"'on': column {col_ref.col.name!r} not found in any of: {' '.join(tbl_names)}")
703
+ raise excs.Error(f"'on': column {col_ref.col.name!r} not found in any of: {' '.join(tbl_names)}")
698
704
  pred = exprs.Comparison(exprs.ComparisonOperator.EQ, lhs_col_ref, rhs_col_ref)
699
705
  predicates.append(pred)
700
706
 
@@ -705,8 +711,10 @@ class DataFrame:
705
711
  return exprs.CompoundPredicate(operator=exprs.LogicalOperator.AND, operands=predicates)
706
712
 
707
713
  def join(
708
- self, other: catalog.Table, on: Optional[Union[exprs.Expr, Sequence[exprs.ColumnRef]]] = None,
709
- how: plan.JoinType.LiteralType = 'inner'
714
+ self,
715
+ other: catalog.Table,
716
+ on: Optional[Union[exprs.Expr, Sequence[exprs.ColumnRef]]] = None,
717
+ how: plan.JoinType.LiteralType = 'inner',
710
718
  ) -> DataFrame:
711
719
  """
712
720
  Join this DataFrame with a table.
@@ -766,16 +774,20 @@ class DataFrame:
766
774
  join_clause = plan.JoinClause(join_type=plan.JoinType.validated(how, "'how'"), join_predicate=join_pred)
767
775
  from_clause = plan.FromClause(
768
776
  tbls=[*self._from_clause.tbls, other._tbl_version_path],
769
- join_clauses=[*self._from_clause.join_clauses, join_clause])
777
+ join_clauses=[*self._from_clause.join_clauses, join_clause],
778
+ )
770
779
  return DataFrame(
771
780
  from_clause=from_clause,
772
- select_list=self.select_list, where_clause=self.where_clause,
773
- group_by_clause=self.group_by_clause, grouping_tbl=self.grouping_tbl,
774
- order_by_clause=self.order_by_clause, limit=self.limit_val,
781
+ select_list=self.select_list,
782
+ where_clause=self.where_clause,
783
+ group_by_clause=self.group_by_clause,
784
+ grouping_tbl=self.grouping_tbl,
785
+ order_by_clause=self.order_by_clause,
786
+ limit=self.limit_val,
775
787
  )
776
788
 
777
789
  def group_by(self, *grouping_items: Any) -> DataFrame:
778
- """ Add a group-by clause to this DataFrame.
790
+ """Add a group-by clause to this DataFrame.
779
791
 
780
792
  Variants:
781
793
  - group_by(<base table>): group a component view by their respective base table rows
@@ -846,7 +858,7 @@ class DataFrame:
846
858
  )
847
859
 
848
860
  def order_by(self, *expr_list: exprs.Expr, asc: bool = True) -> DataFrame:
849
- """ Add an order-by clause to this DataFrame.
861
+ """Add an order-by clause to this DataFrame.
850
862
 
851
863
  Args:
852
864
  expr_list: expressions to order by
@@ -891,7 +903,7 @@ class DataFrame:
891
903
  )
892
904
 
893
905
  def limit(self, n: int) -> DataFrame:
894
- """ Limit the number of rows in the DataFrame.
906
+ """Limit the number of rows in the DataFrame.
895
907
 
896
908
  Args:
897
909
  n: Number of rows to select.
@@ -899,8 +911,10 @@ class DataFrame:
899
911
  Returns:
900
912
  A new DataFrame with the specified limited rows.
901
913
  """
902
- # TODO: allow n to be a Variable that can be substituted in bind()
903
- assert n is not None and isinstance(n, int)
914
+ assert n is not None
915
+ n = exprs.Expr.from_object(n)
916
+ if not n.col_type.is_int_type():
917
+ raise excs.Error(f'limit(): parameter must be of type int, instead of {n.col_type}')
904
918
  return DataFrame(
905
919
  from_clause=self._from_clause,
906
920
  select_list=self.select_list,
@@ -912,7 +926,7 @@ class DataFrame:
912
926
  )
913
927
 
914
928
  def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
915
- """ Update rows in the underlying table of the DataFrame.
929
+ """Update rows in the underlying table of the DataFrame.
916
930
 
917
931
  Update rows in the table with the specified value_spec.
918
932
 
@@ -941,7 +955,7 @@ class DataFrame:
941
955
  return self._first_tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
942
956
 
943
957
  def delete(self) -> UpdateStatus:
944
- """ Delete rows form the underlying table of the DataFrame.
958
+ """Delete rows form the underlying table of the DataFrame.
945
959
 
946
960
  The delete operation is only allowed for DataFrames on base tables.
947
961
 
@@ -982,17 +996,20 @@ class DataFrame:
982
996
  '_classname': 'DataFrame',
983
997
  'from_clause': {
984
998
  'tbls': [tbl.as_dict() for tbl in self._from_clause.tbls],
985
- 'join_clauses': [dataclasses.asdict(clause) for clause in self._from_clause.join_clauses]
999
+ 'join_clauses': [dataclasses.asdict(clause) for clause in self._from_clause.join_clauses],
986
1000
  },
987
- 'select_list':
988
- [(e.as_dict(), name) for (e, name) in self.select_list] if self.select_list is not None else None,
1001
+ 'select_list': [(e.as_dict(), name) for (e, name) in self.select_list]
1002
+ if self.select_list is not None
1003
+ else None,
989
1004
  'where_clause': self.where_clause.as_dict() if self.where_clause is not None else None,
990
- 'group_by_clause':
991
- [e.as_dict() for e in self.group_by_clause] if self.group_by_clause is not None else None,
1005
+ 'group_by_clause': [e.as_dict() for e in self.group_by_clause]
1006
+ if self.group_by_clause is not None
1007
+ else None,
992
1008
  'grouping_tbl': self.grouping_tbl.as_dict() if self.grouping_tbl is not None else None,
993
- 'order_by_clause':
994
- [(e.as_dict(), asc) for (e,asc) in self.order_by_clause] if self.order_by_clause is not None else None,
995
- 'limit_val': self.limit_val,
1009
+ 'order_by_clause': [(e.as_dict(), asc) for (e, asc) in self.order_by_clause]
1010
+ if self.order_by_clause is not None
1011
+ else None,
1012
+ 'limit_val': self.limit_val.as_dict() if self.limit_val is not None else None,
996
1013
  }
997
1014
  return d
998
1015
 
@@ -1001,21 +1018,29 @@ class DataFrame:
1001
1018
  tbls = [catalog.TableVersionPath.from_dict(tbl_dict) for tbl_dict in d['from_clause']['tbls']]
1002
1019
  join_clauses = [plan.JoinClause(**clause_dict) for clause_dict in d['from_clause']['join_clauses']]
1003
1020
  from_clause = plan.FromClause(tbls=tbls, join_clauses=join_clauses)
1004
- select_list = [(exprs.Expr.from_dict(e), name) for e, name in d['select_list']] \
1005
- if d['select_list'] is not None else None
1006
- where_clause = exprs.Expr.from_dict(d['where_clause']) \
1007
- if d['where_clause'] is not None else None
1008
- group_by_clause = [exprs.Expr.from_dict(e) for e in d['group_by_clause']] \
1009
- if d['group_by_clause'] is not None else None
1010
- grouping_tbl = catalog.TableVersion.from_dict(d['grouping_tbl']) \
1011
- if d['grouping_tbl'] is not None else None
1012
- order_by_clause = [(exprs.Expr.from_dict(e), asc) for e, asc in d['order_by_clause']] \
1013
- if d['order_by_clause'] is not None else None
1014
- limit_val = d['limit_val']
1021
+ select_list = (
1022
+ [(exprs.Expr.from_dict(e), name) for e, name in d['select_list']] if d['select_list'] is not None else None
1023
+ )
1024
+ where_clause = exprs.Expr.from_dict(d['where_clause']) if d['where_clause'] is not None else None
1025
+ group_by_clause = (
1026
+ [exprs.Expr.from_dict(e) for e in d['group_by_clause']] if d['group_by_clause'] is not None else None
1027
+ )
1028
+ grouping_tbl = catalog.TableVersion.from_dict(d['grouping_tbl']) if d['grouping_tbl'] is not None else None
1029
+ order_by_clause = (
1030
+ [(exprs.Expr.from_dict(e), asc) for e, asc in d['order_by_clause']]
1031
+ if d['order_by_clause'] is not None
1032
+ else None
1033
+ )
1034
+ limit_val = exprs.Expr.from_dict(d['limit_val']) if d['limit_val'] is not None else None
1015
1035
  return DataFrame(
1016
- from_clause=from_clause, select_list=select_list, where_clause=where_clause,
1017
- group_by_clause=group_by_clause, grouping_tbl=grouping_tbl, order_by_clause=order_by_clause,
1018
- limit=limit_val)
1036
+ from_clause=from_clause,
1037
+ select_list=select_list,
1038
+ where_clause=where_clause,
1039
+ group_by_clause=group_by_clause,
1040
+ grouping_tbl=grouping_tbl,
1041
+ order_by_clause=order_by_clause,
1042
+ limit=limit_val,
1043
+ )
1019
1044
 
1020
1045
  def _hash_result_set(self) -> str:
1021
1046
  """Return a hash that changes when the result set changes."""