pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/dataframe.py CHANGED
@@ -8,18 +8,7 @@ import json
8
8
  import logging
9
9
  import traceback
10
10
  from pathlib import Path
11
- from typing import (
12
- TYPE_CHECKING,
13
- Any,
14
- AsyncIterator,
15
- Callable,
16
- Hashable,
17
- Iterator,
18
- NoReturn,
19
- Optional,
20
- Sequence,
21
- TypeVar,
22
- )
11
+ from typing import TYPE_CHECKING, Any, AsyncIterator, Callable, Hashable, Iterator, NoReturn, Sequence, TypeVar
23
12
 
24
13
  import pandas as pd
25
14
  import pydantic
@@ -162,14 +151,14 @@ class DataFrameResultSet:
162
151
  # # output of the agg stage
163
152
  # self.agg_output_exprs: list[exprs.Expr] = []
164
153
  # # Where clause of the Select stmt of the SQL scan stage
165
- # self.sql_where_clause: Optional[sql.ClauseElement] = None
154
+ # self.sql_where_clause: sql.ClauseElement | None = None
166
155
  # # filter predicate applied to input rows of the SQL scan stage
167
- # self.filter: Optional[exprs.Predicate] = None
168
- # self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
156
+ # self.filter: exprs.Predicate | None = None
157
+ # self.similarity_clause: exprs.ImageSimilarityPredicate | None = None
169
158
  # self.agg_fn_calls: list[exprs.FunctionCall] = [] # derived from unique_exprs
170
159
  # self.has_frame_col: bool = False # True if we're referencing the frame col
171
160
  #
172
- # self.evaluator: Optional[exprs.Evaluator] = None
161
+ # self.evaluator: exprs.Evaluator | None = None
173
162
  # self.sql_scan_eval_ctx: list[exprs.Expr] = [] # needed to materialize output of SQL scan stage
174
163
  # self.agg_eval_ctx: list[exprs.Expr] = [] # needed to materialize output of agg stage
175
164
  # self.filter_eval_ctx: list[exprs.Expr] = []
@@ -191,24 +180,24 @@ class DataFrame:
191
180
  _from_clause: plan.FromClause
192
181
  _select_list_exprs: list[exprs.Expr]
193
182
  _schema: dict[str, ts.ColumnType]
194
- select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]]
195
- where_clause: Optional[exprs.Expr]
196
- group_by_clause: Optional[list[exprs.Expr]]
197
- grouping_tbl: Optional[catalog.TableVersion]
198
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
199
- limit_val: Optional[exprs.Expr]
200
- sample_clause: Optional[SampleClause]
183
+ select_list: list[tuple[exprs.Expr, str | None]] | None
184
+ where_clause: exprs.Expr | None
185
+ group_by_clause: list[exprs.Expr] | None
186
+ grouping_tbl: catalog.TableVersion | None
187
+ order_by_clause: list[tuple[exprs.Expr, bool]] | None
188
+ limit_val: exprs.Expr | None
189
+ sample_clause: SampleClause | None
201
190
 
202
191
  def __init__(
203
192
  self,
204
- from_clause: Optional[plan.FromClause] = None,
205
- select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None,
206
- where_clause: Optional[exprs.Expr] = None,
207
- group_by_clause: Optional[list[exprs.Expr]] = None,
208
- grouping_tbl: Optional[catalog.TableVersion] = None,
209
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None, # list[(expr, asc)]
210
- limit: Optional[exprs.Expr] = None,
211
- sample_clause: Optional[SampleClause] = None,
193
+ from_clause: plan.FromClause | None = None,
194
+ select_list: list[tuple[exprs.Expr, str | None]] | None = None,
195
+ where_clause: exprs.Expr | None = None,
196
+ group_by_clause: list[exprs.Expr] | None = None,
197
+ grouping_tbl: catalog.TableVersion | None = None,
198
+ order_by_clause: list[tuple[exprs.Expr, bool]] | None = None, # list[(expr, asc)]
199
+ limit: exprs.Expr | None = None,
200
+ sample_clause: SampleClause | None = None,
212
201
  ):
213
202
  self._from_clause = from_clause
214
203
 
@@ -232,7 +221,7 @@ class DataFrame:
232
221
 
233
222
  @classmethod
234
223
  def _normalize_select_list(
235
- cls, tbls: list[catalog.TableVersionPath], select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]]
224
+ cls, tbls: list[catalog.TableVersionPath], select_list: list[tuple[exprs.Expr, str | None]] | None
236
225
  ) -> tuple[list[exprs.Expr], list[str]]:
237
226
  """
238
227
  Expand select list information with all columns and their names
@@ -293,23 +282,23 @@ class DataFrame:
293
282
  if var.name not in unique_vars:
294
283
  unique_vars[var.name] = var
295
284
  elif unique_vars[var.name].col_type != var.col_type:
296
- raise excs.Error(f'Multiple definitions of parameter {var.name}')
285
+ raise excs.Error(f'Multiple definitions of parameter {var.name!r}')
297
286
  return unique_vars
298
287
 
299
288
  @classmethod
300
289
  def _convert_param_to_typed_expr(
301
- cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: Optional[tuple[Any, Any]] = None
302
- ) -> Optional[exprs.Expr]:
290
+ cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: tuple[Any, Any] | None = None
291
+ ) -> exprs.Expr | None:
303
292
  if v is None:
304
293
  if required:
305
294
  raise excs.Error(f'{name!r} parameter must be present')
306
295
  return v
307
296
  v_expr = exprs.Expr.from_object(v)
308
297
  if not v_expr.col_type.matches(required_type):
309
- raise excs.Error(f'{name!r} parameter must be of type {required_type!r}, instead of {v_expr.col_type}')
298
+ raise excs.Error(f'{name!r} parameter must be of type `{required_type}`; got `{v_expr.col_type}`')
310
299
  if range is not None:
311
300
  if not isinstance(v_expr, exprs.Literal):
312
- raise excs.Error(f'{name!r} parameter must be a constant, not {v_expr}')
301
+ raise excs.Error(f'{name!r} parameter must be a constant; got: {v_expr}')
313
302
  if range[0] is not None and not (v_expr.val >= range[0]):
314
303
  raise excs.Error(f'{name!r} parameter must be >= {range[0]}')
315
304
  if range[1] is not None and not (v_expr.val <= range[1]):
@@ -318,7 +307,7 @@ class DataFrame:
318
307
 
319
308
  @classmethod
320
309
  def validate_constant_type_range(
321
- cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: Optional[tuple[Any, Any]] = None
310
+ cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: tuple[Any, Any] | None = None
322
311
  ) -> Any:
323
312
  """Validate that the given named parameter is a constant of the required type and within the specified range."""
324
313
  v_expr = cls._convert_param_to_typed_expr(v, required_type, required, name, range)
@@ -364,7 +353,7 @@ class DataFrame:
364
353
 
365
354
  def _create_query_plan(self) -> exec.ExecNode:
366
355
  # construct a group-by clause if we're grouping by a table
367
- group_by_clause: Optional[list[exprs.Expr]] = None
356
+ group_by_clause: list[exprs.Expr] | None = None
368
357
  if self.grouping_tbl is not None:
369
358
  assert self.group_by_clause is None
370
359
  num_rowid_cols = len(self.grouping_tbl.store_tbl.rowid_columns())
@@ -387,7 +376,7 @@ class DataFrame:
387
376
  sample_clause=self.sample_clause,
388
377
  )
389
378
 
390
- def __rowid_columns(self, num_rowid_cols: Optional[int] = None) -> list[exprs.Expr]:
379
+ def __rowid_columns(self, num_rowid_cols: int | None = None) -> list[exprs.Expr]:
391
380
  """Return list of RowidRef for the given number of associated rowids"""
392
381
  return Planner.rowid_columns(self._first_tbl.tbl_version, num_rowid_cols)
393
382
 
@@ -483,7 +472,7 @@ class DataFrame:
483
472
  var_expr = vars[arg_name]
484
473
  arg_expr = exprs.Expr.from_object(arg_val)
485
474
  if arg_expr is None:
486
- raise excs.Error(f'Cannot convert argument {arg_val} to a Pixeltable expression')
475
+ raise excs.Error(f'That argument cannot be converted to a Pixeltable expression: {arg_val}')
487
476
  var_exprs[var_expr] = arg_expr
488
477
 
489
478
  exprs.Expr.list_substitute(select_list_exprs, var_exprs)
@@ -495,7 +484,7 @@ class DataFrame:
495
484
  exprs.Expr.list_substitute(order_by_exprs, var_exprs)
496
485
 
497
486
  select_list = list(zip(select_list_exprs, self.schema.keys()))
498
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None
487
+ order_by_clause: list[tuple[exprs.Expr, bool]] | None = None
499
488
  if order_by_exprs is not None:
500
489
  order_by_clause = [
501
490
  (expr, asc) for expr, asc in zip(order_by_exprs, [asc for _, asc in self.order_by_clause])
@@ -503,7 +492,7 @@ class DataFrame:
503
492
  if limit_val is not None:
504
493
  limit_val = limit_val.substitute(var_exprs)
505
494
  if limit_val is not None and not isinstance(limit_val, exprs.Literal):
506
- raise excs.Error(f'limit(): parameter must be a constant, but got {limit_val}')
495
+ raise excs.Error(f'limit(): parameter must be a constant; got: {limit_val}')
507
496
 
508
497
  return DataFrame(
509
498
  from_clause=self._from_clause,
@@ -683,7 +672,7 @@ class DataFrame:
683
672
  return self
684
673
 
685
674
  # analyze select list; wrap literals with the corresponding expressions
686
- select_list: list[tuple[exprs.Expr, Optional[str]]] = []
675
+ select_list: list[tuple[exprs.Expr, str | None]] = []
687
676
  for raw_expr, name in base_list:
688
677
  expr = exprs.Expr.from_object(raw_expr)
689
678
  if expr is None:
@@ -703,8 +692,8 @@ class DataFrame:
703
692
  pass
704
693
  if not expr.is_bound_by(self._from_clause.tbls):
705
694
  raise excs.Error(
706
- f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
707
- f'({",".join(tbl.tbl_version.get().versioned_name for tbl in self._from_clause.tbls)})'
695
+ f"That expression cannot be evaluated in the context of this query's tables "
696
+ f'({",".join(tbl.tbl_version.get().versioned_name for tbl in self._from_clause.tbls)}): {expr}'
708
697
  )
709
698
  select_list.append((expr, name))
710
699
 
@@ -715,7 +704,7 @@ class DataFrame:
715
704
  if name in seen:
716
705
  repeated_names = [j for j, x in enumerate(names) if x == name]
717
706
  pretty = ', '.join(map(str, repeated_names))
718
- raise excs.Error(f'Repeated column name "{name}" in select() at positions: {pretty}')
707
+ raise excs.Error(f'Repeated column name {name!r} in select() at positions: {pretty}')
719
708
  seen.add(name)
720
709
 
721
710
  return DataFrame(
@@ -753,13 +742,13 @@ class DataFrame:
753
742
  >>> df = person.where(t.age > 30)
754
743
  """
755
744
  if self.where_clause is not None:
756
- raise excs.Error('Where clause already specified')
745
+ raise excs.Error('where() clause already specified')
757
746
  if self.sample_clause is not None:
758
- raise excs.Error('where cannot be used after sample()')
747
+ raise excs.Error('where() cannot be used after sample()')
759
748
  if not isinstance(pred, exprs.Expr):
760
- raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
749
+ raise excs.Error(f'where() expects a Pixeltable expression; got: {pred}')
761
750
  if not pred.col_type.is_bool_type():
762
- raise excs.Error(f'Where(): expression needs to return bool, but instead returns {pred.col_type}')
751
+ raise excs.Error(f'where() expression needs to return `Bool`, but instead returns `{pred.col_type}`')
763
752
  return DataFrame(
764
753
  from_clause=self._from_clause,
765
754
  select_list=self.select_list,
@@ -781,19 +770,21 @@ class DataFrame:
781
770
  on = [on]
782
771
  elif isinstance(on, exprs.Expr):
783
772
  if not on.is_bound_by(joined_tbls):
784
- raise excs.Error(f"'on': expression cannot be evaluated in the context of the joined tables: {on}")
773
+ raise excs.Error(f'`on` expression cannot be evaluated in the context of the joined tables: {on}')
785
774
  if not on.col_type.is_bool_type():
786
- raise excs.Error(f"'on': boolean expression expected, but got {on.col_type}: {on}")
775
+ raise excs.Error(
776
+ f'`on` expects an expression of type `Bool`, but got one of type `{on.col_type}`: {on}'
777
+ )
787
778
  return on
788
779
  elif not isinstance(on, Sequence) or len(on) == 0:
789
- raise excs.Error("'on': must be a sequence of column references or a boolean expression")
780
+ raise excs.Error('`on` must be a sequence of column references or a boolean expression')
790
781
 
791
782
  assert isinstance(on, Sequence)
792
783
  for col_ref in on:
793
784
  if not isinstance(col_ref, exprs.ColumnRef):
794
- raise excs.Error("'on': must be a sequence of column references or a boolean expression")
785
+ raise excs.Error('`on` must be a sequence of column references or a boolean expression')
795
786
  if not col_ref.is_bound_by(joined_tbls):
796
- raise excs.Error(f"'on': expression cannot be evaluated in the context of the joined tables: {col_ref}")
787
+ raise excs.Error(f'`on` expression cannot be evaluated in the context of the joined tables: {col_ref}')
797
788
  col_refs.append(col_ref)
798
789
 
799
790
  predicates: list[exprs.Expr] = []
@@ -803,10 +794,10 @@ class DataFrame:
803
794
  # identify the referenced column by name in 'other'
804
795
  rhs_col = other.get_column(col_ref.col.name)
805
796
  if rhs_col is None:
806
- raise excs.Error(f"'on': column {col_ref.col.name!r} not found in joined table")
797
+ raise excs.Error(f'`on` column {col_ref.col.name!r} not found in joined table')
807
798
  rhs_col_ref = exprs.ColumnRef(rhs_col)
808
799
 
809
- lhs_col_ref: Optional[exprs.ColumnRef] = None
800
+ lhs_col_ref: exprs.ColumnRef | None = None
810
801
  if any(tbl.has_column(col_ref.col) for tbl in self._from_clause.tbls):
811
802
  # col_ref comes from the existing from_clause, we use that directly
812
803
  lhs_col_ref = col_ref
@@ -817,11 +808,11 @@ class DataFrame:
817
808
  if col is None:
818
809
  continue
819
810
  if lhs_col_ref is not None:
820
- raise excs.Error(f"'on': ambiguous column reference: {col_ref.col.name!r}")
811
+ raise excs.Error(f'`on`: ambiguous column reference: {col_ref.col.name}')
821
812
  lhs_col_ref = exprs.ColumnRef(col)
822
813
  if lhs_col_ref is None:
823
814
  tbl_names = [tbl.tbl_name() for tbl in self._from_clause.tbls]
824
- raise excs.Error(f"'on': column {col_ref.col.name!r} not found in any of: {' '.join(tbl_names)}")
815
+ raise excs.Error(f'`on`: column {col_ref.col.name!r} not found in any of: {" ".join(tbl_names)}')
825
816
  pred = exprs.Comparison(exprs.ComparisonOperator.EQ, lhs_col_ref, rhs_col_ref)
826
817
  predicates.append(pred)
827
818
 
@@ -885,16 +876,16 @@ class DataFrame:
885
876
  """
886
877
  if self.sample_clause is not None:
887
878
  raise excs.Error('join() cannot be used with sample()')
888
- join_pred: Optional[exprs.Expr]
879
+ join_pred: exprs.Expr | None
889
880
  if how == 'cross':
890
881
  if on is not None:
891
- raise excs.Error("'on' not allowed for cross join")
882
+ raise excs.Error('`on` not allowed for cross join')
892
883
  join_pred = None
893
884
  else:
894
885
  if on is None:
895
- raise excs.Error(f"how={how!r} requires 'on'")
886
+ raise excs.Error(f'`how={how!r}` requires `on` to be present')
896
887
  join_pred = self._create_join_predicate(other._tbl_version_path, on)
897
- join_clause = plan.JoinClause(join_type=plan.JoinType.validated(how, "'how'"), join_predicate=join_pred)
888
+ join_clause = plan.JoinClause(join_type=plan.JoinType.validated(how, '`how`'), join_predicate=join_pred)
898
889
  from_clause = plan.FromClause(
899
890
  tbls=[*self._from_clause.tbls, other._tbl_version_path],
900
891
  join_clauses=[*self._from_clause.join_clauses, join_clause],
@@ -951,16 +942,16 @@ class DataFrame:
951
942
  >>> df = book.group_by(t.genre).select(t.genre, total=sum(t.price)).show()
952
943
  """
953
944
  if self.group_by_clause is not None:
954
- raise excs.Error('Group-by already specified')
945
+ raise excs.Error('group_by() already specified')
955
946
  if self.sample_clause is not None:
956
947
  raise excs.Error('group_by() cannot be used with sample()')
957
948
 
958
- grouping_tbl: Optional[catalog.TableVersion] = None
959
- group_by_clause: Optional[list[exprs.Expr]] = None
949
+ grouping_tbl: catalog.TableVersion | None = None
950
+ group_by_clause: list[exprs.Expr] | None = None
960
951
  for item in grouping_items:
961
952
  if isinstance(item, (catalog.Table, catalog.TableVersion)):
962
953
  if len(grouping_items) > 1:
963
- raise excs.Error('group_by(): only one table can be specified')
954
+ raise excs.Error('group_by(): only one Table can be specified')
964
955
  if len(self._from_clause.tbls) > 1:
965
956
  raise excs.Error('group_by() with Table not supported for joins')
966
957
  grouping_tbl = item if isinstance(item, catalog.TableVersion) else item._tbl_version.get()
@@ -968,7 +959,7 @@ class DataFrame:
968
959
  base = self._first_tbl.find_tbl_version(grouping_tbl.id)
969
960
  if base is None or base.id == self._first_tbl.tbl_id:
970
961
  raise excs.Error(
971
- f'group_by(): {grouping_tbl.name} is not a base table of {self._first_tbl.tbl_name()}'
962
+ f'group_by(): {grouping_tbl.name!r} is not a base table of {self._first_tbl.tbl_name()!r}'
972
963
  )
973
964
  break
974
965
  if not isinstance(item, exprs.Expr):
@@ -1080,10 +1071,10 @@ class DataFrame:
1080
1071
 
1081
1072
  def sample(
1082
1073
  self,
1083
- n: Optional[int] = None,
1084
- n_per_stratum: Optional[int] = None,
1085
- fraction: Optional[float] = None,
1086
- seed: Optional[int] = None,
1074
+ n: int | None = None,
1075
+ n_per_stratum: int | None = None,
1076
+ fraction: float | None = None,
1077
+ seed: int | None = None,
1087
1078
  stratify_by: Any = None,
1088
1079
  ) -> DataFrame:
1089
1080
  """
@@ -1137,7 +1128,7 @@ class DataFrame:
1137
1128
  """
1138
1129
  # Check context of usage
1139
1130
  if self.sample_clause is not None:
1140
- raise excs.Error('sample() cannot be used with sample()')
1131
+ raise excs.Error('Multiple sample() clauses not allowed')
1141
1132
  if self.group_by_clause is not None:
1142
1133
  raise excs.Error('sample() cannot be used with group_by()')
1143
1134
  if self.order_by_clause is not None:
@@ -1174,11 +1165,11 @@ class DataFrame:
1174
1165
  if expr is None or not isinstance(expr, exprs.Expr):
1175
1166
  raise excs.Error(f'Invalid expression: {expr}')
1176
1167
  if not expr.col_type.is_scalar_type():
1177
- raise excs.Error(f'Invalid type: expression must be a scalar type (not {expr.col_type})')
1168
+ raise excs.Error(f'Invalid type: expression must be a scalar type (not `{expr.col_type}`)')
1178
1169
  if not expr.is_bound_by(self._from_clause.tbls):
1179
1170
  raise excs.Error(
1180
- f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
1181
- f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
1171
+ f"That expression cannot be evaluated in the context of this query's tables "
1172
+ f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)}): {expr}'
1182
1173
  )
1183
1174
  stratify_exprs.append(expr)
1184
1175