pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +144 -118
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +139 -124
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +315 -246
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +69 -78
  18. pixeltable/env.py +78 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +16 -4
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +28 -27
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +1033 -6
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +36 -31
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +75 -40
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/document.py +88 -57
  109. pixeltable/iterators/video.py +66 -37
  110. pixeltable/metadata/converters/convert_18.py +2 -2
  111. pixeltable/metadata/converters/convert_19.py +2 -2
  112. pixeltable/metadata/converters/convert_20.py +2 -2
  113. pixeltable/metadata/converters/convert_21.py +2 -2
  114. pixeltable/metadata/converters/convert_22.py +2 -2
  115. pixeltable/metadata/converters/convert_24.py +2 -2
  116. pixeltable/metadata/converters/convert_25.py +2 -2
  117. pixeltable/metadata/converters/convert_26.py +2 -2
  118. pixeltable/metadata/converters/convert_29.py +4 -4
  119. pixeltable/metadata/converters/convert_34.py +2 -2
  120. pixeltable/metadata/converters/convert_36.py +2 -2
  121. pixeltable/metadata/converters/convert_38.py +2 -2
  122. pixeltable/metadata/converters/convert_39.py +1 -2
  123. pixeltable/metadata/converters/util.py +11 -13
  124. pixeltable/metadata/schema.py +22 -21
  125. pixeltable/metadata/utils.py +2 -6
  126. pixeltable/mypy/mypy_plugin.py +5 -5
  127. pixeltable/plan.py +32 -34
  128. pixeltable/share/packager.py +7 -7
  129. pixeltable/share/publish.py +3 -3
  130. pixeltable/store.py +126 -41
  131. pixeltable/type_system.py +43 -46
  132. pixeltable/utils/__init__.py +1 -2
  133. pixeltable/utils/arrow.py +4 -4
  134. pixeltable/utils/av.py +74 -38
  135. pixeltable/utils/azure_store.py +305 -0
  136. pixeltable/utils/code.py +1 -2
  137. pixeltable/utils/dbms.py +15 -19
  138. pixeltable/utils/description_helper.py +2 -3
  139. pixeltable/utils/documents.py +5 -6
  140. pixeltable/utils/exception_handler.py +2 -2
  141. pixeltable/utils/filecache.py +5 -5
  142. pixeltable/utils/formatter.py +4 -6
  143. pixeltable/utils/gcs_store.py +9 -9
  144. pixeltable/utils/local_store.py +17 -17
  145. pixeltable/utils/object_stores.py +59 -43
  146. pixeltable/utils/s3_store.py +35 -30
  147. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
  148. pixeltable-0.4.19.dist-info/RECORD +213 -0
  149. pixeltable/__version__.py +0 -3
  150. pixeltable-0.4.17.dist-info/RECORD +0 -211
  151. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  152. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  153. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ import abc
4
4
  import asyncio
5
5
  from dataclasses import dataclass
6
6
  from types import TracebackType
7
- from typing import Any, Iterable, Optional, Protocol
7
+ from typing import Any, Iterable, Protocol
8
8
 
9
9
  import numpy as np
10
10
 
@@ -18,11 +18,11 @@ class FnCallArgs:
18
18
  fn_call: exprs.FunctionCall
19
19
  rows: list[exprs.DataRow]
20
20
  # single call
21
- args: Optional[list[Any]] = None
22
- kwargs: Optional[dict[str, Any]] = None
21
+ args: list[Any] | None = None
22
+ kwargs: dict[str, Any] | None = None
23
23
  # batch call
24
- batch_args: Optional[list[list[Optional[Any]]]] = None
25
- batch_kwargs: Optional[dict[str, list[Optional[Any]]]] = None
24
+ batch_args: list[list[Any | None]] | None = None
25
+ batch_kwargs: dict[str, list[Any | None]] | None = None
26
26
 
27
27
  @property
28
28
  def pxt_fn(self) -> func.CallableFunction:
@@ -56,7 +56,7 @@ class Scheduler(abc.ABC):
56
56
  request: FnCallArgs
57
57
  num_retries: int
58
58
  exec_ctx: ExecCtx
59
- retry_after: Optional[float] = None # time.monotonic()
59
+ retry_after: float | None = None # time.monotonic()
60
60
 
61
61
  def __lt__(self, other: Scheduler.QueueItem) -> bool:
62
62
  # prioritize by number of retries (more retries = higher priority)
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Optional
5
4
 
6
5
  import numpy as np
7
6
 
@@ -14,7 +13,7 @@ class RowBuffer:
14
13
  """Fixed-length circular buffer of DataRows; knows how to maintain input order"""
15
14
 
16
15
  size: int
17
- row_pos_map: Optional[dict[int, int]] # id(row) -> position of row in output; None if not maintaining order
16
+ row_pos_map: dict[int, int] | None # id(row) -> position of row in output; None if not maintaining order
18
17
  num_rows: int # number of rows in the buffer
19
18
  num_ready: int # number of consecutive non-None rows at head
20
19
  buffer: np.ndarray # of object
@@ -7,7 +7,7 @@ import logging
7
7
  import re
8
8
  import sys
9
9
  import time
10
- from typing import Any, Awaitable, Collection, Optional
10
+ from typing import Any, Awaitable, Collection
11
11
 
12
12
  from pixeltable import env, func
13
13
  from pixeltable.config import Config
@@ -35,7 +35,7 @@ class RateLimitsScheduler(Scheduler):
35
35
  get_request_resources_param_names: list[str] # names of parameters of RateLimitsInfo.get_request_resources()
36
36
 
37
37
  # scheduling-related state
38
- pool_info: Optional[env.RateLimitsInfo]
38
+ pool_info: env.RateLimitsInfo | None
39
39
  est_usage: dict[str, int] # value per resource; accumulated estimates since the last util. report
40
40
 
41
41
  num_in_flight: int # unfinished tasks
@@ -77,7 +77,7 @@ class RateLimitsScheduler(Scheduler):
77
77
  self.est_usage = dict.fromkeys(self._resources, 0)
78
78
 
79
79
  async def _main_loop(self) -> None:
80
- item: Optional[RateLimitsScheduler.QueueItem] = None
80
+ item: RateLimitsScheduler.QueueItem | None = None
81
81
  while True:
82
82
  if item is None:
83
83
  item = await self.queue.get()
@@ -102,8 +102,8 @@ class RateLimitsScheduler(Scheduler):
102
102
  request_resources = self._get_request_resources(item.request)
103
103
  limits_info = self._check_resource_limits(request_resources)
104
104
  aws: list[Awaitable[None]] = []
105
- completed_aw: Optional[asyncio.Task] = None
106
- wait_for_reset: Optional[asyncio.Task] = None
105
+ completed_aw: asyncio.Task | None = None
106
+ wait_for_reset: asyncio.Task | None = None
107
107
  if limits_info is not None:
108
108
  # limits_info's resource is depleted, wait for capacity to free up
109
109
 
@@ -167,7 +167,7 @@ class RateLimitsScheduler(Scheduler):
167
167
  constant_kwargs, batch_kwargs = request.pxt_fn.create_batch_kwargs(batch_kwargs)
168
168
  return self.pool_info.get_request_resources(**constant_kwargs, **batch_kwargs)
169
169
 
170
- def _check_resource_limits(self, request_resources: dict[str, int]) -> Optional[env.RateLimitInfo]:
170
+ def _check_resource_limits(self, request_resources: dict[str, int]) -> env.RateLimitInfo | None:
171
171
  """Returns the most depleted resource, relative to its limit, or None if all resources are within limits"""
172
172
  candidates: list[tuple[env.RateLimitInfo, float]] = [] # (info, relative remaining)
173
173
  for resource, usage in request_resources.items():
@@ -405,7 +405,7 @@ class RequestRateScheduler(Scheduler):
405
405
  if is_task:
406
406
  self.num_in_flight -= 1
407
407
 
408
- def _is_rate_limit_error(self, exc: Exception) -> tuple[bool, Optional[float]]:
408
+ def _is_rate_limit_error(self, exc: Exception) -> tuple[bool, float | None]:
409
409
  """Returns True if the exception indicates a rate limit error, and the retry delay in seconds."""
410
410
  from http import HTTPStatus
411
411
 
@@ -413,7 +413,7 @@ class RequestRateScheduler(Scheduler):
413
413
  # We look for attributes that contain status codes, instead of checking the type of the exception,
414
414
  # in order to handle a wider variety of exception classes.
415
415
  is_rate_limit_error = False
416
- retry_delay: Optional[float] = None
416
+ retry_delay: float | None = None
417
417
 
418
418
  # requests.HTTPError/httpx.HTTPStatusError
419
419
  if (
@@ -443,7 +443,7 @@ class RequestRateScheduler(Scheduler):
443
443
 
444
444
  return False, None
445
445
 
446
- def _extract_retry_delay_from_headers(self, headers: Optional[Any]) -> Optional[float]:
446
+ def _extract_retry_delay_from_headers(self, headers: Any | None) -> float | None:
447
447
  """Extract retry delay from HTTP headers."""
448
448
  if headers is None:
449
449
  return None
@@ -489,7 +489,7 @@ class RequestRateScheduler(Scheduler):
489
489
 
490
490
  return None
491
491
 
492
- def _extract_retry_delay_from_message(self, msg: str) -> Optional[float]:
492
+ def _extract_retry_delay_from_message(self, msg: str) -> float | None:
493
493
  msg_lower = msg.lower()
494
494
  for pattern in self.RETRY_AFTER_PATTERNS:
495
495
  match = re.search(pattern, msg_lower)
@@ -500,7 +500,7 @@ class RequestRateScheduler(Scheduler):
500
500
  continue
501
501
  return None
502
502
 
503
- def _compute_retry_delay(self, num_retries: int, retry_after: Optional[float] = None) -> float:
503
+ def _compute_retry_delay(self, num_retries: int, retry_after: float | None = None) -> float:
504
504
  """
505
505
  Calculate exponential backoff delay for rate limit errors.
506
506
 
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Any, AsyncIterator, Optional
2
+ from typing import Any, AsyncIterator
3
3
 
4
4
  from pixeltable import catalog, exprs
5
5
  from pixeltable.utils.local_store import TempStore
@@ -23,7 +23,7 @@ class InMemoryDataNode(ExecNode):
23
23
 
24
24
  input_rows: list[dict[str, Any]]
25
25
  start_row_id: int
26
- output_batch: Optional[DataRowBatch]
26
+ output_batch: DataRowBatch | None
27
27
 
28
28
  # output_exprs is declared in the superclass, but we redeclare it here with a more specific type
29
29
  output_exprs: list[exprs.ColumnRef]
@@ -6,7 +6,7 @@ import logging
6
6
  from collections import defaultdict, deque
7
7
  from concurrent import futures
8
8
  from pathlib import Path
9
- from typing import AsyncIterator, Iterator, NamedTuple, Optional
9
+ from typing import AsyncIterator, Iterator, NamedTuple
10
10
 
11
11
  from pixeltable import exprs
12
12
  from pixeltable.utils.object_stores import ObjectOps, ObjectPath, StorageTarget
@@ -44,11 +44,11 @@ class ObjectStoreSaveNode(ExecNode):
44
44
  """Specify the source and destination for a WorkItem"""
45
45
 
46
46
  src_path: str # source of the file to be processed
47
- destination: Optional[str] # destination URI for the file to be processed
47
+ destination: str # destination URI for the file to be processed
48
48
 
49
49
  class WorkItem(NamedTuple):
50
50
  src_path: Path
51
- destination: Optional[str]
51
+ destination: str | None
52
52
  info: exprs.ColumnSlotIdx # column info for the file being processed
53
53
  destination_count: int = 1 # number of unique destinations for this file
54
54
 
@@ -60,7 +60,7 @@ class ObjectStoreSaveNode(ExecNode):
60
60
 
61
61
  # ready_rows: rows that are ready to be returned, ordered by row idx;
62
62
  # the implied row idx of ready_rows[0] is num_returned_rows
63
- ready_rows: deque[Optional[exprs.DataRow]]
63
+ ready_rows: deque[exprs.DataRow | None]
64
64
 
65
65
  in_flight_rows: dict[int, ObjectStoreSaveNode.RowState] # rows with in-flight work; id(row) -> RowState
66
66
  in_flight_requests: dict[
@@ -71,12 +71,12 @@ class ObjectStoreSaveNode(ExecNode):
71
71
  ] # WorkDesignator -> [(row, info)]
72
72
 
73
73
  input_finished: bool
74
- row_idx: Iterator[Optional[int]]
74
+ row_idx: Iterator[int | None]
75
75
 
76
76
  @dataclasses.dataclass
77
77
  class RowState:
78
78
  row: exprs.DataRow
79
- idx: Optional[int] # position in input stream; None if we don't retain input order
79
+ idx: int | None # position in input stream; None if we don't retain input order
80
80
  num_missing: int # number of references to media files in this row
81
81
  delete_destinations: list[Path] # paths to delete after all copies are complete
82
82
 
@@ -99,7 +99,7 @@ class ObjectStoreSaveNode(ExecNode):
99
99
  def queued_work(self) -> int:
100
100
  return len(self.in_flight_requests)
101
101
 
102
- async def get_input_batch(self, input_iter: AsyncIterator[DataRowBatch]) -> Optional[DataRowBatch]:
102
+ async def get_input_batch(self, input_iter: AsyncIterator[DataRowBatch]) -> DataRowBatch | None:
103
103
  """Get the next batch of input rows, or None if there are no more rows"""
104
104
  try:
105
105
  input_batch = await anext(input_iter)
@@ -148,7 +148,7 @@ class ObjectStoreSaveNode(ExecNode):
148
148
  sum(int(row is not None) for row in itertools.islice(self.ready_rows, self.BATCH_SIZE)) == self.BATCH_SIZE
149
149
  )
150
150
 
151
- def __add_ready_row(self, row: exprs.DataRow, row_idx: Optional[int]) -> None:
151
+ def __add_ready_row(self, row: exprs.DataRow, row_idx: int | None) -> None:
152
152
  if row_idx is None:
153
153
  self.ready_rows.append(row)
154
154
  else:
@@ -209,14 +209,11 @@ class ObjectStoreSaveNode(ExecNode):
209
209
  assert col.col_type.is_media_type()
210
210
 
211
211
  destination = info.col.destination
212
- soa = None if destination is None else ObjectPath.parse_object_storage_addr(destination, False)
213
- if (
214
- soa is not None
215
- and soa.storage_target == StorageTarget.LOCAL_STORE
216
- and LocalStore(soa).resolve_url(url) is not None
217
- ):
218
- # A local non-default destination was specified, and the url already points there
219
- continue
212
+ if destination is not None:
213
+ soa = ObjectPath.parse_object_storage_addr(destination, False)
214
+ if soa.storage_target == StorageTarget.LOCAL_STORE and LocalStore(soa).resolve_url(url) is not None:
215
+ # A local non-default destination was specified, and the url already points there
216
+ continue
220
217
 
221
218
  src_path = LocalStore.file_url_to_path(url)
222
219
  if src_path is None:
@@ -283,7 +280,7 @@ class ObjectStoreSaveNode(ExecNode):
283
280
  )
284
281
  _logger.debug(f'submitted {work_item}')
285
282
 
286
- def __persist_media_file(self, work_item: WorkItem) -> tuple[Optional[str], Optional[Exception]]:
283
+ def __persist_media_file(self, work_item: WorkItem) -> tuple[str | None, Exception | None]:
287
284
  """Move data from the TempStore to another location"""
288
285
  src_path = work_item.src_path
289
286
  col = work_item.info.col
@@ -2,7 +2,7 @@ import datetime
2
2
  import logging
3
3
  import warnings
4
4
  from decimal import Decimal
5
- from typing import TYPE_CHECKING, AsyncIterator, Iterable, NamedTuple, Optional, Sequence
5
+ from typing import TYPE_CHECKING, AsyncIterator, Iterable, NamedTuple, Sequence
6
6
  from uuid import UUID
7
7
 
8
8
  import sqlalchemy as sql
@@ -22,13 +22,13 @@ _logger = logging.getLogger('pixeltable')
22
22
 
23
23
  class OrderByItem(NamedTuple):
24
24
  expr: exprs.Expr
25
- asc: Optional[bool]
25
+ asc: bool | None
26
26
 
27
27
 
28
28
  OrderByClause = list[OrderByItem]
29
29
 
30
30
 
31
- def combine_order_by_clauses(clauses: Iterable[OrderByClause]) -> Optional[OrderByClause]:
31
+ def combine_order_by_clauses(clauses: Iterable[OrderByClause]) -> OrderByClause | None:
32
32
  """Returns a clause that's compatible with 'clauses', or None if that doesn't exist.
33
33
  Two clauses are compatible if for each of their respective items c1[i] and c2[i]
34
34
  a) the exprs are identical and
@@ -81,15 +81,15 @@ class SqlNode(ExecNode):
81
81
  set_pk: if True, sets the primary for each DataRow
82
82
  """
83
83
 
84
- tbl: Optional[catalog.TableVersionPath]
84
+ tbl: catalog.TableVersionPath | None
85
85
  select_list: exprs.ExprSet
86
86
  columns: list[catalog.Column] # for which columns to populate DataRow.cell_vals/cell_md
87
87
  cell_md_refs: list[exprs.ColumnPropertyRef] # of ColumnRefs which also need DataRow.slot_cellmd for evaluation
88
88
  set_pk: bool
89
89
  num_pk_cols: int
90
- py_filter: Optional[exprs.Expr] # a predicate that can only be run in Python
91
- py_filter_eval_ctx: Optional[exprs.RowBuilder.EvalCtx]
92
- cte: Optional[sql.CTE]
90
+ py_filter: exprs.Expr | None # a predicate that can only be run in Python
91
+ py_filter_eval_ctx: exprs.RowBuilder.EvalCtx | None
92
+ cte: sql.CTE | None
93
93
  sql_elements: exprs.SqlElementCache
94
94
 
95
95
  # execution state
@@ -99,15 +99,15 @@ class SqlNode(ExecNode):
99
99
  result_cursor: sql.engine.CursorResult | None
100
100
 
101
101
  # where_clause/-_element: allow subclass to set one or the other (but not both)
102
- where_clause: Optional[exprs.Expr]
103
- where_clause_element: Optional[sql.ColumnElement]
102
+ where_clause: exprs.Expr | None
103
+ where_clause_element: sql.ColumnElement | None
104
104
 
105
105
  order_by_clause: OrderByClause
106
- limit: Optional[int]
106
+ limit: int | None
107
107
 
108
108
  def __init__(
109
109
  self,
110
- tbl: Optional[catalog.TableVersionPath],
110
+ tbl: catalog.TableVersionPath | None,
111
111
  row_builder: exprs.RowBuilder,
112
112
  select_list: Iterable[exprs.Expr],
113
113
  columns: list[catalog.Column],
@@ -216,7 +216,7 @@ class SqlNode(ExecNode):
216
216
  def _ordering_tbl_ids(self) -> set[UUID]:
217
217
  return exprs.Expr.all_tbl_ids(e for e, _ in self.order_by_clause)
218
218
 
219
- def to_cte(self, keep_pk: bool = False) -> Optional[tuple[sql.CTE, exprs.ExprDict[sql.ColumnElement]]]:
219
+ def to_cte(self, keep_pk: bool = False) -> tuple[sql.CTE, exprs.ExprDict[sql.ColumnElement]] | None:
220
220
  """
221
221
  Creates a CTE that materializes the output of this node plus a mapping from select list expr to output column.
222
222
  keep_pk: if True, the PK columns are included in the CTE Select statement
@@ -245,8 +245,8 @@ class SqlNode(ExecNode):
245
245
  cls,
246
246
  tbl: catalog.TableVersionPath,
247
247
  stmt: sql.Select,
248
- refd_tbl_ids: Optional[set[UUID]] = None,
249
- exact_version_only: Optional[set[UUID]] = None,
248
+ refd_tbl_ids: set[UUID] | None = None,
249
+ exact_version_only: set[UUID] | None = None,
250
250
  ) -> sql.Select:
251
251
  """Add From clause to stmt for tables/views referenced by materialized_exprs
252
252
  Args:
@@ -270,7 +270,7 @@ class SqlNode(ExecNode):
270
270
  joined_tbls.append(t)
271
271
 
272
272
  first = True
273
- prev_tv: Optional[catalog.TableVersion] = None
273
+ prev_tv: catalog.TableVersion | None = None
274
274
  for t in joined_tbls[::-1]:
275
275
  tv = t.get()
276
276
  # _logger.debug(f'create_from_clause: tbl_id={tv.id} {id(tv.store_tbl.sa_tbl)}')
@@ -347,7 +347,7 @@ class SqlNode(ExecNode):
347
347
  pass
348
348
 
349
349
  output_batch = DataRowBatch(self.row_builder)
350
- output_row: Optional[exprs.DataRow] = None
350
+ output_row: exprs.DataRow | None = None
351
351
  num_rows_returned = 0
352
352
  is_using_cockroachdb = Env.get().is_using_cockroachdb
353
353
  tzinfo = Env.get().default_time_zone
@@ -450,7 +450,7 @@ class SqlScanNode(SqlNode):
450
450
  columns: list[catalog.Column],
451
451
  cell_md_col_refs: list[exprs.ColumnRef] | None = None,
452
452
  set_pk: bool = False,
453
- exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
453
+ exact_version_only: list[catalog.TableVersionHandle] | None = None,
454
454
  ):
455
455
  sql_elements = exprs.SqlElementCache()
456
456
  super().__init__(
@@ -528,17 +528,17 @@ class SqlAggregationNode(SqlNode):
528
528
  limit: max number of rows to return: None = no limit
529
529
  """
530
530
 
531
- group_by_items: Optional[list[exprs.Expr]]
532
- input_cte: Optional[sql.CTE]
531
+ group_by_items: list[exprs.Expr] | None
532
+ input_cte: sql.CTE | None
533
533
 
534
534
  def __init__(
535
535
  self,
536
536
  row_builder: exprs.RowBuilder,
537
537
  input: SqlNode,
538
538
  select_list: Iterable[exprs.Expr],
539
- group_by_items: Optional[list[exprs.Expr]] = None,
540
- limit: Optional[int] = None,
541
- exact_version_only: Optional[list[catalog.TableVersion]] = None,
539
+ group_by_items: list[exprs.Expr] | None = None,
540
+ limit: int | None = None,
541
+ exact_version_only: list[catalog.TableVersion] | None = None,
542
542
  ):
543
543
  assert len(input.cell_md_refs) == 0 # there's no aggregation over json or arrays in SQL
544
544
  self.input_cte, input_col_map = input.to_cte()
@@ -617,9 +617,9 @@ class SqlSampleNode(SqlNode):
617
617
  stratify_exprs: Analyzer processed list of expressions to stratify by.
618
618
  """
619
619
 
620
- input_cte: Optional[sql.CTE]
620
+ input_cte: sql.CTE | None
621
621
  pk_count: int
622
- stratify_exprs: Optional[list[exprs.Expr]]
622
+ stratify_exprs: list[exprs.Expr] | None
623
623
  sample_clause: 'SampleClause'
624
624
 
625
625
  def __init__(
@@ -648,7 +648,6 @@ class SqlSampleNode(SqlNode):
648
648
  )
649
649
  self.stratify_exprs = stratify_exprs
650
650
  self.sample_clause = sample_clause
651
- assert isinstance(self.sample_clause.seed, int)
652
651
 
653
652
  @classmethod
654
653
  def key_sql_expr(cls, seed: sql.ColumnElement, sql_cols: Iterable[sql.ColumnElement]) -> sql.ColumnElement:
@@ -667,7 +666,9 @@ class SqlSampleNode(SqlNode):
667
666
  """Create an expression for randomly ordering rows with a given seed"""
668
667
  rowid_cols = [*cte.c[-self.pk_count : -1]] # exclude the version column
669
668
  assert len(rowid_cols) > 0
670
- return self.key_sql_expr(sql.literal_column(str(self.sample_clause.seed)), rowid_cols)
669
+ # If seed is not set in the sample clause, use the random seed given by the execution context
670
+ seed = self.sample_clause.seed if self.sample_clause.seed is not None else self.ctx.random_seed
671
+ return self.key_sql_expr(sql.literal_column(str(seed)), rowid_cols)
671
672
 
672
673
  def _create_stmt(self) -> sql.Select:
673
674
  from pixeltable.plan import SampleClause
@@ -691,7 +692,7 @@ class SqlSampleNode(SqlNode):
691
692
 
692
693
  return self._create_stmt_stratified_n(self.sample_clause.n, self.sample_clause.n_per_stratum)
693
694
 
694
- def _create_stmt_stratified_n(self, n: Optional[int], n_per_stratum: Optional[int]) -> sql.Select:
695
+ def _create_stmt_stratified_n(self, n: int | None, n_per_stratum: int | None) -> sql.Select:
695
696
  """Create a Select stmt that returns n samples across all strata or n_per_stratum samples per stratum"""
696
697
 
697
698
  sql_strata_exprs = [self.sql_elements.get(e) for e in self.stratify_exprs]
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import Any
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -58,7 +58,7 @@ class ArithmeticExpr(Expr):
58
58
  def _id_attrs(self) -> list[tuple[str, Any]]:
59
59
  return [*super()._id_attrs(), ('operator', self.operator.value)]
60
60
 
61
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
61
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
62
62
  assert self.col_type.is_int_type() or self.col_type.is_float_type() or self.col_type.is_json_type()
63
63
  left = sql_elements.get(self._op1)
64
64
  right = sql_elements.get(self._op2)
@@ -118,7 +118,7 @@ class ArithmeticExpr(Expr):
118
118
 
119
119
  data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
120
120
 
121
- def eval_nullable(self, op1_val: Optional[float], op2_val: Optional[float]) -> Optional[float]:
121
+ def eval_nullable(self, op1_val: float | None, op2_val: float | None) -> float | None:
122
122
  """
123
123
  Return the result of evaluating the expression on two nullable int/float operands,
124
124
  None is interpreted as SQL NULL
@@ -144,7 +144,7 @@ class ArithmeticExpr(Expr):
144
144
  elif self.operator == ArithmeticOperator.FLOORDIV:
145
145
  return op1_val // op2_val
146
146
 
147
- def as_literal(self) -> Optional[Literal]:
147
+ def as_literal(self) -> Literal | None:
148
148
  op1_lit = self._op1.as_literal()
149
149
  if op1_lit is None:
150
150
  return None
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import Any
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -43,7 +43,7 @@ class ArraySlice(Expr):
43
43
  def _id_attrs(self) -> list[tuple[str, Any]]:
44
44
  return [*super()._id_attrs(), ('index', self.index)]
45
45
 
46
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
46
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
47
47
  return None
48
48
 
49
49
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import enum
4
- from typing import Any, Optional
4
+ from typing import Any
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
@@ -34,7 +34,7 @@ class ColumnPropertyRef(Expr):
34
34
  self.prop = prop
35
35
  self.id = self._create_id()
36
36
 
37
- def default_column_name(self) -> Optional[str]:
37
+ def default_column_name(self) -> str | None:
38
38
  return str(self).replace('.', '_')
39
39
 
40
40
  def _equals(self, other: ColumnPropertyRef) -> bool:
@@ -55,7 +55,7 @@ class ColumnPropertyRef(Expr):
55
55
  def is_cellmd_prop(self) -> bool:
56
56
  return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG, self.Property.CELLMD)
57
57
 
58
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
58
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
59
59
  if not self.col_ref.col_handle.get().is_stored:
60
60
  return None
61
61
  col = self.col_ref.col_handle.get()