pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/env.py CHANGED
@@ -21,7 +21,7 @@ from contextlib import contextmanager
21
21
  from dataclasses import dataclass, field
22
22
  from pathlib import Path
23
23
  from sys import stdout
24
- from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal, Optional, TypeVar
24
+ from typing import TYPE_CHECKING, Any, Callable, Iterator, TypeVar
25
25
  from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
26
26
 
27
27
  import nest_asyncio # type: ignore[import-untyped]
@@ -38,7 +38,7 @@ from pixeltable.config import Config
38
38
  from pixeltable.utils.console_output import ConsoleLogger, ConsoleMessageFilter, ConsoleOutputHandler, map_level
39
39
  from pixeltable.utils.dbms import CockroachDbms, Dbms, PostgresqlDbms
40
40
  from pixeltable.utils.http_server import make_server
41
- from pixeltable.utils.object_stores import ObjectPath, StorageObjectAddress
41
+ from pixeltable.utils.object_stores import ObjectPath
42
42
 
43
43
  if TYPE_CHECKING:
44
44
  import spacy
@@ -56,47 +56,50 @@ class Env:
56
56
  For a non-local environment, Pixeltable uses a connection string to the externally managed database.
57
57
  """
58
58
 
59
- _instance: Optional[Env] = None
59
+ SERIALIZABLE_ISOLATION_LEVEL = 'SERIALIZABLE'
60
+
61
+ _instance: Env | None = None
60
62
  __initializing: bool = False
61
63
  _log_fmt_str = '%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d: %(message)s'
62
64
 
63
- _media_dir: Optional[Path]
64
- _object_soa: Optional[StorageObjectAddress]
65
- _file_cache_dir: Optional[Path] # cached object files with external URL
66
- _dataset_cache_dir: Optional[Path] # cached datasets (eg, pytorch or COCO)
67
- _log_dir: Optional[Path] # log files
68
- _tmp_dir: Optional[Path] # any tmp files
69
- _sa_engine: Optional[sql.engine.base.Engine]
70
- _pgdata_dir: Optional[Path]
71
- _db_name: Optional[str]
72
- _db_server: Optional[pixeltable_pgserver.PostgresServer] # set only when running in local environment
73
- _db_url: Optional[str]
74
- _default_time_zone: Optional[ZoneInfo]
65
+ _media_dir: Path | None
66
+ _file_cache_dir: Path | None # cached object files with external URL
67
+ _dataset_cache_dir: Path | None # cached datasets (eg, pytorch or COCO)
68
+ _log_dir: Path | None # log files
69
+ _tmp_dir: Path | None # any tmp files
70
+ _sa_engine: sql.engine.base.Engine | None
71
+ _pgdata_dir: Path | None
72
+ _db_name: str | None
73
+ _db_server: pixeltable_pgserver.PostgresServer | None # set only when running in local environment
74
+ _db_url: str | None
75
+ _default_time_zone: ZoneInfo | None
75
76
  _verbosity: int
76
77
 
77
78
  # info about optional packages that are utilized by some parts of the code
78
79
  __optional_packages: dict[str, PackageInfo]
79
80
 
80
- _spacy_nlp: Optional[spacy.Language]
81
- _httpd: Optional[http.server.HTTPServer]
82
- _http_address: Optional[str]
81
+ _spacy_nlp: spacy.Language | None
82
+ _httpd: http.server.HTTPServer | None
83
+ _http_address: str | None
83
84
  _logger: logging.Logger
84
85
  _default_log_level: int
85
- _logfilename: Optional[str]
86
+ _logfilename: str | None
86
87
  _log_to_stdout: bool
87
88
  _module_log_level: dict[str, int] # module name -> log level
88
89
  _file_cache_size_g: float
89
- _pxt_api_key: Optional[str]
90
+ _default_input_media_dest: str | None
91
+ _default_output_media_dest: str | None
92
+ _pxt_api_key: str | None
90
93
  _stdout_handler: logging.StreamHandler
91
94
  _default_video_encoder: str | None
92
95
  _initialized: bool
93
96
 
94
97
  _resource_pool_info: dict[str, Any]
95
- _current_conn: Optional[sql.Connection]
96
- _current_session: Optional[orm.Session]
97
- _current_isolation_level: Optional[Literal['REPEATABLE_READ', 'SERIALIZABLE']]
98
- _dbms: Optional[Dbms]
99
- _event_loop: Optional[asyncio.AbstractEventLoop] # event loop for ExecNode
98
+ _current_conn: sql.Connection | None
99
+ _current_session: orm.Session | None
100
+ _current_isolation_level: str | None
101
+ _dbms: Dbms | None
102
+ _event_loop: asyncio.AbstractEventLoop | None # event loop for ExecNode
100
103
 
101
104
  @classmethod
102
105
  def get(cls) -> Env:
@@ -125,7 +128,6 @@ class Env:
125
128
  assert self._instance is None, 'Env is a singleton; use Env.get() to access the instance'
126
129
 
127
130
  self._media_dir = None # computed media files
128
- self._object_soa = None # computed object files in StorageObjectAddress format
129
131
  self._file_cache_dir = None # cached object files with external URL
130
132
  self._dataset_cache_dir = None # cached datasets (eg, pytorch or COCO)
131
133
  self._log_dir = None # log files
@@ -200,11 +202,11 @@ class Env:
200
202
  return self._http_address
201
203
 
202
204
  @property
203
- def user(self) -> Optional[str]:
205
+ def user(self) -> str | None:
204
206
  return Config.get().get_string_value('user')
205
207
 
206
208
  @user.setter
207
- def user(self, user: Optional[str]) -> None:
209
+ def user(self, user: str | None) -> None:
208
210
  if user is None:
209
211
  if 'PIXELTABLE_USER' in os.environ:
210
212
  del os.environ['PIXELTABLE_USER']
@@ -212,11 +214,11 @@ class Env:
212
214
  os.environ['PIXELTABLE_USER'] = user
213
215
 
214
216
  @property
215
- def default_time_zone(self) -> Optional[ZoneInfo]:
217
+ def default_time_zone(self) -> ZoneInfo | None:
216
218
  return self._default_time_zone
217
219
 
218
220
  @default_time_zone.setter
219
- def default_time_zone(self, tz: Optional[ZoneInfo]) -> None:
221
+ def default_time_zone(self, tz: ZoneInfo | None) -> None:
220
222
  """
221
223
  This is not a publicly visible setter; it is only for testing purposes.
222
224
  """
@@ -233,17 +235,17 @@ class Env:
233
235
  return self._verbosity
234
236
 
235
237
  @property
236
- def conn(self) -> Optional[sql.Connection]:
238
+ def conn(self) -> sql.Connection | None:
237
239
  assert self._current_conn is not None
238
240
  return self._current_conn
239
241
 
240
242
  @property
241
- def session(self) -> Optional[orm.Session]:
243
+ def session(self) -> orm.Session | None:
242
244
  assert self._current_session is not None
243
245
  return self._current_session
244
246
 
245
247
  @property
246
- def dbms(self) -> Optional[Dbms]:
248
+ def dbms(self) -> Dbms | None:
247
249
  assert self._dbms is not None
248
250
  return self._dbms
249
251
 
@@ -274,7 +276,7 @@ class Env:
274
276
  if self._current_conn is None:
275
277
  assert self._current_session is None
276
278
  try:
277
- self._current_isolation_level = 'SERIALIZABLE'
279
+ self._current_isolation_level = self.SERIALIZABLE_ISOLATION_LEVEL
278
280
  with (
279
281
  self.engine.connect().execution_options(isolation_level=self._current_isolation_level) as conn,
280
282
  orm.Session(conn) as session,
@@ -289,16 +291,16 @@ class Env:
289
291
  self._current_isolation_level = None
290
292
  else:
291
293
  assert self._current_session is not None
292
- assert for_write == (self._current_isolation_level == 'serializable')
294
+ assert self._current_isolation_level == self.SERIALIZABLE_ISOLATION_LEVEL or not for_write
293
295
  yield self._current_conn
294
296
 
295
297
  def configure_logging(
296
298
  self,
297
299
  *,
298
- to_stdout: Optional[bool] = None,
299
- level: Optional[int] = None,
300
- add: Optional[str] = None,
301
- remove: Optional[str] = None,
300
+ to_stdout: bool | None = None,
301
+ level: int | None = None,
302
+ add: str | None = None,
303
+ remove: str | None = None,
302
304
  ) -> None:
303
305
  """Configure logging.
304
306
 
@@ -340,7 +342,7 @@ class Env:
340
342
  def set_log_level(self, level: int) -> None:
341
343
  self._default_log_level = level
342
344
 
343
- def set_module_log_level(self, module: str, level: Optional[int]) -> None:
345
+ def set_module_log_level(self, module: str, level: int | None) -> None:
344
346
  if level is None:
345
347
  self._module_log_level.pop(module, None)
346
348
  else:
@@ -396,23 +398,18 @@ class Env:
396
398
  config = Config.get()
397
399
 
398
400
  self._initialized = True
401
+
399
402
  self._media_dir = Config.get().home / 'media'
400
403
  self._file_cache_dir = Config.get().home / 'file_cache'
401
404
  self._dataset_cache_dir = Config.get().home / 'dataset_cache'
402
405
  self._log_dir = Config.get().home / 'logs'
403
406
  self._tmp_dir = Config.get().home / 'tmp'
404
407
 
405
- if not self._media_dir.exists():
406
- self._media_dir.mkdir()
407
- self._object_soa = ObjectPath.parse_object_storage_addr(str(self._media_dir), may_contain_object_name=False)
408
- if not self._file_cache_dir.exists():
409
- self._file_cache_dir.mkdir()
410
- if not self._dataset_cache_dir.exists():
411
- self._dataset_cache_dir.mkdir()
412
- if not self._log_dir.exists():
413
- self._log_dir.mkdir()
414
- if not self._tmp_dir.exists():
415
- self._tmp_dir.mkdir()
408
+ self._media_dir.mkdir(exist_ok=True)
409
+ self._file_cache_dir.mkdir(exist_ok=True)
410
+ self._dataset_cache_dir.mkdir(exist_ok=True)
411
+ self._log_dir.mkdir(exist_ok=True)
412
+ self._tmp_dir.mkdir(exist_ok=True)
416
413
 
417
414
  self._file_cache_size_g = config.get_float_value('file_cache_size_g')
418
415
  if self._file_cache_size_g is None:
@@ -421,6 +418,16 @@ class Env:
421
418
  f'(either add a `file_cache_size_g` entry to the `pixeltable` section of {Config.get().config_file},\n'
422
419
  'or set the PIXELTABLE_FILE_CACHE_SIZE_G environment variable)'
423
420
  )
421
+
422
+ self._default_input_media_dest = config.get_string_value('input_media_dest')
423
+ self._default_output_media_dest = config.get_string_value('output_media_dest')
424
+ for mode, uri in (('input', self._default_input_media_dest), ('output', self._default_output_media_dest)):
425
+ if uri is not None:
426
+ try:
427
+ _ = ObjectPath.parse_object_storage_addr(uri, False)
428
+ except Exception as e:
429
+ raise excs.Error(f'Invalid {mode} media destination URI: {uri}') from e
430
+
424
431
  self._pxt_api_key = config.get_string_value('api_key')
425
432
 
426
433
  # Disable spurious warnings
@@ -647,7 +654,7 @@ class Env:
647
654
  metadata.upgrade_md(self._sa_engine)
648
655
 
649
656
  @property
650
- def pxt_api_key(self) -> Optional[str]:
657
+ def pxt_api_key(self) -> str | None:
651
658
  return self._pxt_api_key
652
659
 
653
660
  def get_client(self, name: str) -> Any:
@@ -669,7 +676,7 @@ class Env:
669
676
  # Determine the type of the parameter for proper config parsing.
670
677
  pname = param.name
671
678
  t = param.annotation
672
- # Deference Optional[T]
679
+ # Deference T | None
673
680
  if typing.get_origin(t) in (typing.Union, types.UnionType):
674
681
  args = typing.get_args(t)
675
682
  if args[0] is type(None):
@@ -767,6 +774,7 @@ class Env:
767
774
  self.__register_package('datasets')
768
775
  self.__register_package('diffusers')
769
776
  self.__register_package('fiftyone')
777
+ self.__register_package('twelvelabs')
770
778
  self.__register_package('fireworks', library_name='fireworks-ai')
771
779
  self.__register_package('google.cloud.storage', library_name='google-cloud-storage')
772
780
  self.__register_package('google.genai', library_name='google-genai')
@@ -799,7 +807,7 @@ class Env:
799
807
  self.__register_package('yolox', library_name='pixeltable-yolox')
800
808
  self.__register_package('lancedb')
801
809
 
802
- def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
810
+ def __register_package(self, package_name: str, library_name: str | None = None) -> None:
803
811
  is_installed: bool
804
812
  try:
805
813
  is_installed = importlib.util.find_spec(package_name) is not None
@@ -815,7 +823,7 @@ class Env:
815
823
  if not shutil.which(binary_name):
816
824
  raise excs.Error(f'{binary_name} is not installed or not in PATH. Please install it to use this feature.')
817
825
 
818
- def require_package(self, package_name: str, min_version: Optional[list[int]] = None) -> None:
826
+ def require_package(self, package_name: str, min_version: list[int] | None = None) -> None:
819
827
  """
820
828
  Checks whether the specified optional package is available. If not, raises an exception
821
829
  with an error message informing the user how to install it.
@@ -859,8 +867,8 @@ class Env:
859
867
  else:
860
868
  os.remove(path)
861
869
 
862
- # def get_resource_pool_info(self, pool_id: str, pool_info_cls: Optional[Type[T]]) -> T:
863
- def get_resource_pool_info(self, pool_id: str, make_pool_info: Optional[Callable[[], T]] = None) -> T:
870
+ # def get_resource_pool_info(self, pool_id: str, pool_info_cls: Type[T] | None) -> T:
871
+ def get_resource_pool_info(self, pool_id: str, make_pool_info: Callable[[], T] | None = None) -> T:
864
872
  """Returns the info object for the given id, creating it if necessary."""
865
873
  info = self._resource_pool_info.get(pool_id)
866
874
  if info is None and make_pool_info is not None:
@@ -874,10 +882,12 @@ class Env:
874
882
  return self._media_dir
875
883
 
876
884
  @property
877
- def object_soa(self) -> StorageObjectAddress:
878
- assert self._media_dir is not None
879
- assert self._object_soa is not None
880
- return self._object_soa
885
+ def default_input_media_dest(self) -> str | None:
886
+ return self._default_input_media_dest
887
+
888
+ @property
889
+ def default_output_media_dest(self) -> str | None:
890
+ return self._default_output_media_dest
881
891
 
882
892
  @property
883
893
  def file_cache_dir(self) -> Path:
@@ -1025,14 +1035,14 @@ _registered_clients: dict[str, ApiClient] = {}
1025
1035
  class ApiClient:
1026
1036
  init_fn: Callable
1027
1037
  params: dict[str, inspect.Parameter]
1028
- client_obj: Optional[Any] = None
1038
+ client_obj: Any | None = None
1029
1039
 
1030
1040
 
1031
1041
  @dataclass
1032
1042
  class PackageInfo:
1033
1043
  is_installed: bool
1034
1044
  library_name: str # pypi library name (may be different from package name)
1035
- version: Optional[list[int]] = None # installed version, as a list of components (such as [3,0,2] for "3.0.2")
1045
+ version: list[int] | None = None # installed version, as a list of components (such as [3,0,2] for "3.0.2")
1036
1046
 
1037
1047
 
1038
1048
  TIME_FORMAT = '%H:%M.%S %f'
@@ -1093,7 +1103,7 @@ class RateLimitsInfo:
1093
1103
  """Update self.resource_limits based on the exception headers"""
1094
1104
  self.has_exc = True
1095
1105
 
1096
- def get_retry_delay(self, exc: Exception) -> Optional[float]:
1106
+ def get_retry_delay(self, exc: Exception) -> float | None:
1097
1107
  """Returns number of seconds to wait before retry, or None if not retryable"""
1098
1108
  if len(self.resource_limits) == 0:
1099
1109
  return 1.0
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import sys
5
- from typing import Any, AsyncIterator, Iterable, Optional, cast
5
+ from typing import Any, AsyncIterator, Iterable, cast
6
6
 
7
7
  from pixeltable import catalog, exceptions as excs, exprs
8
8
 
@@ -19,18 +19,18 @@ class AggregationNode(ExecNode):
19
19
  At the moment, this returns all results in a single DataRowBatch.
20
20
  """
21
21
 
22
- group_by: Optional[list[exprs.Expr]]
22
+ group_by: list[exprs.Expr] | None
23
23
  input_exprs: list[exprs.Expr]
24
24
  agg_fn_eval_ctx: exprs.RowBuilder.EvalCtx
25
25
  agg_fn_calls: list[exprs.FunctionCall]
26
26
  output_batch: DataRowBatch
27
- limit: Optional[int]
27
+ limit: int | None
28
28
 
29
29
  def __init__(
30
30
  self,
31
31
  tbl: catalog.TableVersionHandle,
32
32
  row_builder: exprs.RowBuilder,
33
- group_by: Optional[list[exprs.Expr]],
33
+ group_by: list[exprs.Expr] | None,
34
34
  agg_fn_calls: list[exprs.FunctionCall],
35
35
  input_exprs: Iterable[exprs.Expr],
36
36
  input: ExecNode,
@@ -72,8 +72,8 @@ class AggregationNode(ExecNode):
72
72
  raise excs.ExprEvalError(fn_call, expr_msg, exc, exc_tb, input_vals, row_num) from exc
73
73
 
74
74
  async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
75
- prev_row: Optional[exprs.DataRow] = None
76
- current_group: Optional[list[Any]] = None # the values of the group-by exprs
75
+ prev_row: exprs.DataRow | None = None
76
+ current_group: list[Any] | None = None # the values of the group-by exprs
77
77
  num_input_rows = 0
78
78
  num_output_rows = 0
79
79
  async for row_batch in self.input:
@@ -9,7 +9,7 @@ import urllib.request
9
9
  from collections import deque
10
10
  from concurrent import futures
11
11
  from pathlib import Path
12
- from typing import AsyncIterator, Iterator, Optional
12
+ from typing import AsyncIterator, Iterator
13
13
  from uuid import UUID
14
14
 
15
15
  from pixeltable import exceptions as excs, exprs
@@ -43,18 +43,18 @@ class CachePrefetchNode(ExecNode):
43
43
 
44
44
  # ready_rows: rows that are ready to be returned, ordered by row idx;
45
45
  # the implied row idx of ready_rows[0] is num_returned_rows
46
- ready_rows: deque[Optional[exprs.DataRow]]
46
+ ready_rows: deque[exprs.DataRow | None]
47
47
 
48
48
  in_flight_rows: dict[int, CachePrefetchNode.RowState] # rows with in-flight urls; id(row) -> RowState
49
49
  in_flight_requests: dict[futures.Future, str] # in-flight requests for urls; future -> URL
50
50
  in_flight_urls: dict[str, list[tuple[exprs.DataRow, exprs.ColumnSlotIdx]]] # URL -> [(row, info)]
51
51
  input_finished: bool
52
- row_idx: Iterator[Optional[int]]
52
+ row_idx: Iterator[int | None]
53
53
 
54
54
  @dataclasses.dataclass
55
55
  class RowState:
56
56
  row: exprs.DataRow
57
- idx: Optional[int] # position in input stream; None if we don't retain input order
57
+ idx: int | None # position in input stream; None if we don't retain input order
58
58
  num_missing: int # number of missing URLs in this row
59
59
 
60
60
  def __init__(
@@ -78,7 +78,7 @@ class CachePrefetchNode(ExecNode):
78
78
  def queued_work(self) -> int:
79
79
  return len(self.in_flight_requests)
80
80
 
81
- async def get_input_batch(self, input_iter: AsyncIterator[DataRowBatch]) -> Optional[DataRowBatch]:
81
+ async def get_input_batch(self, input_iter: AsyncIterator[DataRowBatch]) -> DataRowBatch | None:
82
82
  """Get the next batch of input rows, or None if there are no more rows"""
83
83
  try:
84
84
  input_batch = await anext(input_iter)
@@ -127,7 +127,7 @@ class CachePrefetchNode(ExecNode):
127
127
  sum(int(row is not None) for row in itertools.islice(self.ready_rows, self.BATCH_SIZE)) == self.BATCH_SIZE
128
128
  )
129
129
 
130
- def __add_ready_row(self, row: exprs.DataRow, row_idx: Optional[int]) -> None:
130
+ def __add_ready_row(self, row: exprs.DataRow, row_idx: int | None) -> None:
131
131
  if row_idx is None:
132
132
  self.ready_rows.append(row)
133
133
  else:
@@ -144,12 +144,12 @@ class CachePrefetchNode(ExecNode):
144
144
  tmp_path, exc = f.result()
145
145
  if exc is not None and not ignore_errors:
146
146
  raise exc
147
- local_path: Optional[Path] = None
147
+ local_path: Path | None = None
148
148
  if tmp_path is not None:
149
149
  # register the file with the cache for the first column in which it's missing
150
150
  assert url in self.in_flight_urls
151
151
  _, info = self.in_flight_urls[url][0]
152
- local_path = file_cache.add(info.col.tbl.id, info.col.id, url, tmp_path)
152
+ local_path = file_cache.add(info.col.get_tbl().id, info.col.id, url, tmp_path)
153
153
  _logger.debug(f'cached {url} as {local_path}')
154
154
 
155
155
  # add the local path/exception to the slots that reference the url
@@ -174,7 +174,7 @@ class CachePrefetchNode(ExecNode):
174
174
  # the time it takes to get the next batch together
175
175
  cache_misses: list[str] = []
176
176
 
177
- url_pos: dict[str, Optional[int]] = {} # url -> row_idx; used for logging
177
+ url_pos: dict[str, int | None] = {} # url -> row_idx; used for logging
178
178
  for row in input_batch:
179
179
  # identify missing local files in input batch, or fill in their paths if they're already cached
180
180
  num_missing = 0
@@ -213,7 +213,7 @@ class CachePrefetchNode(ExecNode):
213
213
  _logger.debug(f'submitted {url} for idx {url_pos[url]}')
214
214
  self.in_flight_requests[f] = url
215
215
 
216
- def __fetch_url(self, url: str) -> tuple[Optional[Path], Optional[Exception]]:
216
+ def __fetch_url(self, url: str) -> tuple[Path | None, Exception | None]:
217
217
  """Fetches a remote URL into the TempStore and returns its path"""
218
218
  from pixeltable.utils.local_store import TempStore
219
219
 
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Iterator, Optional
4
+ from typing import Iterator
5
5
 
6
6
  from pixeltable import exprs
7
7
 
@@ -19,11 +19,11 @@ class DataRowBatch:
19
19
  row_builder: exprs.RowBuilder
20
20
  rows: list[exprs.DataRow]
21
21
 
22
- def __init__(self, row_builder: exprs.RowBuilder, rows: Optional[list[exprs.DataRow]] = None):
22
+ def __init__(self, row_builder: exprs.RowBuilder, rows: list[exprs.DataRow] | None = None):
23
23
  self.row_builder = row_builder
24
24
  self.rows = [] if rows is None else rows
25
25
 
26
- def add_row(self, row: Optional[exprs.DataRow]) -> exprs.DataRow:
26
+ def add_row(self, row: exprs.DataRow | None) -> exprs.DataRow:
27
27
  if row is None:
28
28
  row = self.row_builder.make_row()
29
29
  self.rows.append(row)
@@ -1,5 +1,4 @@
1
1
  import random
2
- from typing import Optional
3
2
 
4
3
  import sqlalchemy as sql
5
4
 
@@ -13,9 +12,9 @@ class ExecContext:
13
12
  profile: exprs.ExecProfile
14
13
  show_pbar: bool
15
14
  batch_size: int
16
- num_rows: Optional[int]
17
- conn: Optional[sql.engine.Connection]
18
- pk_clause: Optional[list[sql.ClauseElement]]
15
+ num_rows: int | None
16
+ conn: sql.engine.Connection | None
17
+ pk_clause: list[sql.ClauseElement] | None
19
18
  num_computed_exprs: int
20
19
  ignore_errors: bool
21
20
  random_seed: int # general-purpose source of randomness with execution scope
@@ -26,7 +25,7 @@ class ExecContext:
26
25
  *,
27
26
  show_pbar: bool = False,
28
27
  batch_size: int = 0,
29
- pk_clause: Optional[list[sql.ClauseElement]] = None,
28
+ pk_clause: list[sql.ClauseElement] | None = None,
30
29
  num_computed_exprs: int = 0,
31
30
  ignore_errors: bool = False,
32
31
  ):
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  import logging
5
- from typing import AsyncIterator, Iterable, Iterator, Optional, TypeVar
5
+ from typing import AsyncIterator, Iterable, Iterator, TypeVar
6
6
 
7
7
  from pixeltable import exprs
8
8
  from pixeltable.env import Env
@@ -18,16 +18,16 @@ class ExecNode(abc.ABC):
18
18
 
19
19
  output_exprs: Iterable[exprs.Expr]
20
20
  row_builder: exprs.RowBuilder
21
- input: Optional[ExecNode]
21
+ input: ExecNode | None
22
22
  flushed_img_slots: list[int] # idxs of image slots of our output_exprs dependencies
23
- ctx: Optional[ExecContext]
23
+ ctx: ExecContext | None
24
24
 
25
25
  def __init__(
26
26
  self,
27
27
  row_builder: exprs.RowBuilder,
28
28
  output_exprs: Iterable[exprs.Expr],
29
29
  input_exprs: Iterable[exprs.Expr],
30
- input: Optional[ExecNode] = None,
30
+ input: ExecNode | None = None,
31
31
  ):
32
32
  assert all(expr.is_valid for expr in output_exprs)
33
33
  self.output_exprs = output_exprs
@@ -85,7 +85,7 @@ class ExecNode(abc.ABC):
85
85
 
86
86
  T = TypeVar('T', bound='ExecNode')
87
87
 
88
- def get_node(self, node_class: type[T]) -> Optional[T]:
88
+ def get_node(self, node_class: type[T]) -> T | None:
89
89
  if isinstance(self, node_class):
90
90
  return self
91
91
  if self.input is not None:
@@ -5,7 +5,7 @@ import datetime
5
5
  import itertools
6
6
  import logging
7
7
  import sys
8
- from typing import Any, Callable, Iterator, Optional, cast
8
+ from typing import Any, Callable, Iterator, cast
9
9
 
10
10
  from pixeltable import exprs, func
11
11
 
@@ -64,11 +64,11 @@ class FnCallEvaluator(Evaluator):
64
64
 
65
65
  fn_call: exprs.FunctionCall
66
66
  fn: func.CallableFunction
67
- scalar_py_fn: Optional[Callable] # only set for non-batching CallableFunctions
67
+ scalar_py_fn: Callable | None # only set for non-batching CallableFunctions
68
68
 
69
69
  # only set if fn.is_batched
70
- call_args_queue: Optional[asyncio.Queue[FnCallArgs]] # FnCallArgs waiting for execution
71
- batch_size: Optional[int]
70
+ call_args_queue: asyncio.Queue[FnCallArgs] | None # FnCallArgs waiting for execution
71
+ batch_size: int | None
72
72
 
73
73
  def __init__(self, fn_call: exprs.FunctionCall, dispatcher: Dispatcher, exec_ctx: ExecCtx):
74
74
  super().__init__(dispatcher, exec_ctx)
@@ -160,8 +160,8 @@ class FnCallEvaluator(Evaluator):
160
160
 
161
161
  def _create_batch_call_args(self, call_args: list[FnCallArgs]) -> FnCallArgs:
162
162
  """Roll call_args into a single batched FnCallArgs"""
163
- batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.arg_idxs))]
164
- batch_kwargs: dict[str, list[Optional[Any]]] = {k: [None] * len(call_args) for k in self.fn_call.kwarg_idxs}
163
+ batch_args: list[list[Any | None]] = [[None] * len(call_args) for _ in range(len(self.fn_call.arg_idxs))]
164
+ batch_kwargs: dict[str, list[Any | None]] = {k: [None] * len(call_args) for k in self.fn_call.kwarg_idxs}
165
165
  assert isinstance(self.fn, func.CallableFunction)
166
166
  for i, item in enumerate(call_args):
167
167
  for j in range(len(item.args)):
@@ -4,7 +4,7 @@ import asyncio
4
4
  import logging
5
5
  import traceback
6
6
  from types import TracebackType
7
- from typing import AsyncIterator, Iterable, Optional
7
+ from typing import AsyncIterator, Iterable
8
8
 
9
9
  import numpy as np
10
10
 
@@ -49,17 +49,17 @@ class ExprEvalNode(ExecNode):
49
49
  # execution state
50
50
  tasks: set[asyncio.Task] # collects all running tasks to prevent them from getting gc'd
51
51
  exc_event: asyncio.Event # set if an exception needs to be propagated
52
- error: Optional[Exception] # exception that needs to be propagated
52
+ error: Exception | None # exception that needs to be propagated
53
53
  completed_rows: asyncio.Queue[exprs.DataRow] # rows that have completed evaluation
54
54
  completed_event: asyncio.Event # set when completed_rows is non-empty
55
55
  input_iter: AsyncIterator[DataRowBatch]
56
- current_input_batch: Optional[DataRowBatch] # batch from which we're currently consuming rows
56
+ current_input_batch: DataRowBatch | None # batch from which we're currently consuming rows
57
57
  input_row_idx: int # next row to consume from current_input_batch
58
- next_input_batch: Optional[DataRowBatch] # read-ahead input batch
58
+ next_input_batch: DataRowBatch | None # read-ahead input batch
59
59
  avail_input_rows: int # total number across both current_/next_input_batch
60
60
  input_complete: bool # True if we've received all input batches
61
61
  num_in_flight: int # number of dispatched rows that haven't completed
62
- row_pos_map: Optional[dict[int, int]] # id(row) -> position of row in input; only set if maintain_input_order
62
+ row_pos_map: dict[int, int] | None # id(row) -> position of row in input; only set if maintain_input_order
63
63
  output_buffer: RowBuffer # holds rows that are ready to be returned, in order
64
64
 
65
65
  # debugging
@@ -217,9 +217,10 @@ class ExprEvalNode(ExecNode):
217
217
 
218
218
  row: exprs.DataRow
219
219
  exc_event_aw = asyncio.create_task(self.exc_event.wait(), name='exc_event.wait()')
220
- input_batch_aw: Optional[asyncio.Task] = None
221
- completed_aw: Optional[asyncio.Task] = None
220
+ input_batch_aw: asyncio.Task | None = None
221
+ completed_aw: asyncio.Task | None = None
222
222
  closed_evaluators = False # True after calling Evaluator.close()
223
+ exprs.Expr.prepare_list(self.exec_ctx.all_exprs)
223
224
 
224
225
  try:
225
226
  while True:
@@ -4,7 +4,7 @@ import abc
4
4
  import asyncio
5
5
  from dataclasses import dataclass
6
6
  from types import TracebackType
7
- from typing import Any, Iterable, Optional, Protocol
7
+ from typing import Any, Iterable, Protocol
8
8
 
9
9
  import numpy as np
10
10
 
@@ -18,11 +18,11 @@ class FnCallArgs:
18
18
  fn_call: exprs.FunctionCall
19
19
  rows: list[exprs.DataRow]
20
20
  # single call
21
- args: Optional[list[Any]] = None
22
- kwargs: Optional[dict[str, Any]] = None
21
+ args: list[Any] | None = None
22
+ kwargs: dict[str, Any] | None = None
23
23
  # batch call
24
- batch_args: Optional[list[list[Optional[Any]]]] = None
25
- batch_kwargs: Optional[dict[str, list[Optional[Any]]]] = None
24
+ batch_args: list[list[Any | None]] | None = None
25
+ batch_kwargs: dict[str, list[Any | None]] | None = None
26
26
 
27
27
  @property
28
28
  def pxt_fn(self) -> func.CallableFunction:
@@ -56,7 +56,7 @@ class Scheduler(abc.ABC):
56
56
  request: FnCallArgs
57
57
  num_retries: int
58
58
  exec_ctx: ExecCtx
59
- retry_after: Optional[float] = None # time.monotonic()
59
+ retry_after: float | None = None # time.monotonic()
60
60
 
61
61
  def __lt__(self, other: Scheduler.QueueItem) -> bool:
62
62
  # prioritize by number of retries (more retries = higher priority)