pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/env.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import datetime
4
5
  import glob
5
6
  import http.server
@@ -13,19 +14,23 @@ import shutil
13
14
  import subprocess
14
15
  import sys
15
16
  import threading
16
- import uuid
17
+ import types
18
+ import typing
17
19
  import warnings
18
- from abc import abstractmethod
19
20
  from contextlib import contextmanager
20
21
  from dataclasses import dataclass, field
21
22
  from pathlib import Path
22
23
  from sys import stdout
23
- from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar
24
+ from typing import TYPE_CHECKING, Any, Callable, Iterator, TypeVar
24
25
  from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
25
26
 
27
+ import nest_asyncio # type: ignore[import-untyped]
26
28
  import pixeltable_pgserver
27
29
  import sqlalchemy as sql
30
+ import tzlocal
28
31
  from pillow_heif import register_heif_opener # type: ignore[import-untyped]
32
+ from sqlalchemy import orm
33
+ from tenacity import retry, stop_after_attempt, wait_exponential_jitter
29
34
  from tqdm import TqdmWarning
30
35
 
31
36
  from pixeltable import exceptions as excs
@@ -33,6 +38,7 @@ from pixeltable.config import Config
33
38
  from pixeltable.utils.console_output import ConsoleLogger, ConsoleMessageFilter, ConsoleOutputHandler, map_level
34
39
  from pixeltable.utils.dbms import CockroachDbms, Dbms, PostgresqlDbms
35
40
  from pixeltable.utils.http_server import make_server
41
+ from pixeltable.utils.object_stores import ObjectPath
36
42
 
37
43
  if TYPE_CHECKING:
38
44
  import spacy
@@ -50,42 +56,50 @@ class Env:
50
56
  For a non-local environment, Pixeltable uses a connection string to the externally managed database.
51
57
  """
52
58
 
53
- _instance: Optional[Env] = None
59
+ SERIALIZABLE_ISOLATION_LEVEL = 'SERIALIZABLE'
60
+
61
+ _instance: Env | None = None
54
62
  __initializing: bool = False
55
63
  _log_fmt_str = '%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d: %(message)s'
56
64
 
57
- _media_dir: Optional[Path]
58
- _file_cache_dir: Optional[Path] # cached media files with external URL
59
- _dataset_cache_dir: Optional[Path] # cached datasets (eg, pytorch or COCO)
60
- _log_dir: Optional[Path] # log files
61
- _tmp_dir: Optional[Path] # any tmp files
62
- _sa_engine: Optional[sql.engine.base.Engine]
63
- _pgdata_dir: Optional[Path]
64
- _db_name: Optional[str]
65
- _db_server: Optional[pixeltable_pgserver.PostgresServer] # set only when running in local environment
66
- _db_url: Optional[str]
67
- _default_time_zone: Optional[ZoneInfo]
65
+ _media_dir: Path | None
66
+ _file_cache_dir: Path | None # cached object files with external URL
67
+ _dataset_cache_dir: Path | None # cached datasets (eg, pytorch or COCO)
68
+ _log_dir: Path | None # log files
69
+ _tmp_dir: Path | None # any tmp files
70
+ _sa_engine: sql.engine.base.Engine | None
71
+ _pgdata_dir: Path | None
72
+ _db_name: str | None
73
+ _db_server: pixeltable_pgserver.PostgresServer | None # set only when running in local environment
74
+ _db_url: str | None
75
+ _default_time_zone: ZoneInfo | None
76
+ _verbosity: int
68
77
 
69
78
  # info about optional packages that are utilized by some parts of the code
70
79
  __optional_packages: dict[str, PackageInfo]
71
80
 
72
- _spacy_nlp: Optional[spacy.Language]
73
- _httpd: Optional[http.server.HTTPServer]
74
- _http_address: Optional[str]
81
+ _spacy_nlp: spacy.Language | None
82
+ _httpd: http.server.HTTPServer | None
83
+ _http_address: str | None
75
84
  _logger: logging.Logger
76
85
  _default_log_level: int
77
- _logfilename: Optional[str]
86
+ _logfilename: str | None
78
87
  _log_to_stdout: bool
79
88
  _module_log_level: dict[str, int] # module name -> log level
80
89
  _file_cache_size_g: float
81
- _pxt_api_key: Optional[str]
90
+ _default_input_media_dest: str | None
91
+ _default_output_media_dest: str | None
92
+ _pxt_api_key: str | None
82
93
  _stdout_handler: logging.StreamHandler
94
+ _default_video_encoder: str | None
83
95
  _initialized: bool
84
96
 
85
97
  _resource_pool_info: dict[str, Any]
86
- _current_conn: Optional[sql.Connection]
87
- _current_session: Optional[sql.orm.Session]
88
- _dbms: Optional[Dbms]
98
+ _current_conn: sql.Connection | None
99
+ _current_session: orm.Session | None
100
+ _current_isolation_level: str | None
101
+ _dbms: Dbms | None
102
+ _event_loop: asyncio.AbstractEventLoop | None # event loop for ExecNode
89
103
 
90
104
  @classmethod
91
105
  def get(cls) -> Env:
@@ -97,17 +111,24 @@ class Env:
97
111
  def _init_env(cls, reinit_db: bool = False) -> None:
98
112
  assert not cls.__initializing, 'Circular env initialization detected.'
99
113
  cls.__initializing = True
114
+ if cls._instance is not None:
115
+ cls._instance._clean_up()
116
+ cls._instance = None
100
117
  env = Env()
101
- env._set_up(reinit_db=reinit_db)
102
- env._upgrade_metadata()
103
- cls._instance = env
104
- cls.__initializing = False
118
+ try:
119
+ env._set_up(reinit_db=reinit_db)
120
+ env._upgrade_metadata()
121
+ cls._instance = env
122
+ finally:
123
+ # Reset the initializing flag, even if setup fails.
124
+ # This prevents the environment from being left in a broken state.
125
+ cls.__initializing = False
105
126
 
106
127
  def __init__(self) -> None:
107
128
  assert self._instance is None, 'Env is a singleton; use Env.get() to access the instance'
108
129
 
109
130
  self._media_dir = None # computed media files
110
- self._file_cache_dir = None # cached media files with external URL
131
+ self._file_cache_dir = None # cached object files with external URL
111
132
  self._dataset_cache_dir = None # cached datasets (eg, pytorch or COCO)
112
133
  self._log_dir = None # log files
113
134
  self._tmp_dir = None # any tmp files
@@ -121,6 +142,7 @@ class Env:
121
142
  self._spacy_nlp = None
122
143
  self._httpd = None
123
144
  self._http_address = None
145
+ self._default_video_encoder = None
124
146
 
125
147
  # logging-related state
126
148
  self._logger = logging.getLogger('pixeltable')
@@ -140,7 +162,34 @@ class Env:
140
162
  self._resource_pool_info = {}
141
163
  self._current_conn = None
142
164
  self._current_session = None
165
+ self._current_isolation_level = None
143
166
  self._dbms = None
167
+ self._event_loop = None
168
+
169
+ def _init_event_loop(self) -> None:
170
+ try:
171
+ # check if we are already in an event loop (eg, Jupyter's); if so, patch it to allow
172
+ # multiple run_until_complete()
173
+ running_loop = asyncio.get_running_loop()
174
+ self._event_loop = running_loop
175
+ _logger.debug('Patched running loop')
176
+ except RuntimeError:
177
+ self._event_loop = asyncio.new_event_loop()
178
+ asyncio.set_event_loop(self._event_loop)
179
+ # we set a deliberately long duration to avoid warnings getting printed to the console in debug mode
180
+ self._event_loop.slow_callback_duration = 3600
181
+
182
+ # always allow nested event loops, we need that to run async udfs synchronously (eg, for SimilarityExpr);
183
+ # see run_coroutine_synchronously()
184
+ nest_asyncio.apply()
185
+ if _logger.isEnabledFor(logging.DEBUG):
186
+ self._event_loop.set_debug(True)
187
+
188
+ @property
189
+ def event_loop(self) -> asyncio.AbstractEventLoop:
190
+ if self._event_loop is None:
191
+ self._init_event_loop()
192
+ return self._event_loop
144
193
 
145
194
  @property
146
195
  def db_url(self) -> str:
@@ -153,11 +202,11 @@ class Env:
153
202
  return self._http_address
154
203
 
155
204
  @property
156
- def user(self) -> Optional[str]:
205
+ def user(self) -> str | None:
157
206
  return Config.get().get_string_value('user')
158
207
 
159
208
  @user.setter
160
- def user(self, user: Optional[str]) -> None:
209
+ def user(self, user: str | None) -> None:
161
210
  if user is None:
162
211
  if 'PIXELTABLE_USER' in os.environ:
163
212
  del os.environ['PIXELTABLE_USER']
@@ -165,33 +214,46 @@ class Env:
165
214
  os.environ['PIXELTABLE_USER'] = user
166
215
 
167
216
  @property
168
- def default_time_zone(self) -> Optional[ZoneInfo]:
217
+ def default_time_zone(self) -> ZoneInfo | None:
169
218
  return self._default_time_zone
170
219
 
171
220
  @default_time_zone.setter
172
- def default_time_zone(self, tz: Optional[ZoneInfo]) -> None:
221
+ def default_time_zone(self, tz: ZoneInfo | None) -> None:
173
222
  """
174
223
  This is not a publicly visible setter; it is only for testing purposes.
175
224
  """
176
- tz_name = None if tz is None else tz.key
225
+ if tz is None:
226
+ tz_name = self._get_tz_name()
227
+ else:
228
+ assert isinstance(tz, ZoneInfo)
229
+ tz_name = tz.key
177
230
  self.engine.dispose()
178
231
  self._create_engine(time_zone_name=tz_name)
179
232
 
180
233
  @property
181
- def conn(self) -> Optional[sql.Connection]:
234
+ def verbosity(self) -> int:
235
+ return self._verbosity
236
+
237
+ @property
238
+ def conn(self) -> sql.Connection | None:
182
239
  assert self._current_conn is not None
183
240
  return self._current_conn
184
241
 
185
242
  @property
186
- def session(self) -> Optional[sql.orm.Session]:
243
+ def session(self) -> orm.Session | None:
187
244
  assert self._current_session is not None
188
245
  return self._current_session
189
246
 
190
247
  @property
191
- def dbms(self) -> Optional[Dbms]:
248
+ def dbms(self) -> Dbms | None:
192
249
  assert self._dbms is not None
193
250
  return self._dbms
194
251
 
252
+ @property
253
+ def is_using_cockroachdb(self) -> bool:
254
+ assert self._dbms is not None
255
+ return isinstance(self._dbms, CockroachDbms)
256
+
195
257
  @property
196
258
  def in_xact(self) -> bool:
197
259
  return self._current_conn is not None
@@ -202,29 +264,43 @@ class Env:
202
264
  return self._db_server is not None
203
265
 
204
266
  @contextmanager
205
- def begin_xact(self) -> Iterator[sql.Connection]:
206
- """Call Catalog.begin_xact() instead, unless there is a specific reason to call this directly."""
267
+ def begin_xact(self, *, for_write: bool = False) -> Iterator[sql.Connection]:
268
+ """
269
+ Call Catalog.begin_xact() instead, unless there is a specific reason to call this directly.
270
+
271
+ for_write: if True, uses serializable isolation; if False, uses repeatable_read
272
+
273
+ TODO: repeatable read is not available in Cockroachdb; instead, run queries against a snapshot TVP
274
+ that avoids tripping over any pending ops
275
+ """
207
276
  if self._current_conn is None:
208
277
  assert self._current_session is None
209
278
  try:
210
- with self.engine.begin() as conn, sql.orm.Session(conn) as session:
279
+ self._current_isolation_level = self.SERIALIZABLE_ISOLATION_LEVEL
280
+ with (
281
+ self.engine.connect().execution_options(isolation_level=self._current_isolation_level) as conn,
282
+ orm.Session(conn) as session,
283
+ conn.begin(),
284
+ ):
211
285
  self._current_conn = conn
212
286
  self._current_session = session
213
287
  yield conn
214
288
  finally:
215
289
  self._current_session = None
216
290
  self._current_conn = None
291
+ self._current_isolation_level = None
217
292
  else:
218
293
  assert self._current_session is not None
294
+ assert self._current_isolation_level == self.SERIALIZABLE_ISOLATION_LEVEL or not for_write
219
295
  yield self._current_conn
220
296
 
221
297
  def configure_logging(
222
298
  self,
223
299
  *,
224
- to_stdout: Optional[bool] = None,
225
- level: Optional[int] = None,
226
- add: Optional[str] = None,
227
- remove: Optional[str] = None,
300
+ to_stdout: bool | None = None,
301
+ level: int | None = None,
302
+ add: str | None = None,
303
+ remove: str | None = None,
228
304
  ) -> None:
229
305
  """Configure logging.
230
306
 
@@ -266,7 +342,7 @@ class Env:
266
342
  def set_log_level(self, level: int) -> None:
267
343
  self._default_log_level = level
268
344
 
269
- def set_module_log_level(self, module: str, level: Optional[int]) -> None:
345
+ def set_module_log_level(self, module: str, level: int | None) -> None:
270
346
  if level is None:
271
347
  self._module_log_level.pop(module, None)
272
348
  else:
@@ -281,6 +357,8 @@ class Env:
281
357
  # accept log messages from a configured pixeltable module (at any level of the module hierarchy)
282
358
  path_parts = list(Path(record.pathname).parts)
283
359
  path_parts.reverse()
360
+ if 'pixeltable' not in path_parts:
361
+ return False
284
362
  max_idx = path_parts.index('pixeltable')
285
363
  for module_name in path_parts[:max_idx]:
286
364
  if module_name in self._module_log_level and record.levelno >= self._module_log_level[module_name]:
@@ -291,6 +369,26 @@ class Env:
291
369
  def console_logger(self) -> ConsoleLogger:
292
370
  return self._console_logger
293
371
 
372
+ def _get_tz_name(self) -> str:
373
+ """Get the time zone name from the configuration, or the system local time zone if not specified.
374
+
375
+ Returns:
376
+ str: The time zone name.
377
+ """
378
+ tz_name = Config.get().get_string_value('time_zone')
379
+ if tz_name is not None:
380
+ # Validate tzname
381
+ if not isinstance(tz_name, str):
382
+ self._logger.error('Invalid time zone specified in configuration.')
383
+ else:
384
+ try:
385
+ _ = ZoneInfo(tz_name)
386
+ except ZoneInfoNotFoundError:
387
+ self._logger.error(f'Invalid time zone specified in configuration: {tz_name}')
388
+ else:
389
+ tz_name = tzlocal.get_localzone_name()
390
+ return tz_name
391
+
294
392
  def _set_up(self, echo: bool = False, reinit_db: bool = False) -> None:
295
393
  if self._initialized:
296
394
  return
@@ -300,22 +398,18 @@ class Env:
300
398
  config = Config.get()
301
399
 
302
400
  self._initialized = True
401
+
303
402
  self._media_dir = Config.get().home / 'media'
304
403
  self._file_cache_dir = Config.get().home / 'file_cache'
305
404
  self._dataset_cache_dir = Config.get().home / 'dataset_cache'
306
405
  self._log_dir = Config.get().home / 'logs'
307
406
  self._tmp_dir = Config.get().home / 'tmp'
308
407
 
309
- if not self._media_dir.exists():
310
- self._media_dir.mkdir()
311
- if not self._file_cache_dir.exists():
312
- self._file_cache_dir.mkdir()
313
- if not self._dataset_cache_dir.exists():
314
- self._dataset_cache_dir.mkdir()
315
- if not self._log_dir.exists():
316
- self._log_dir.mkdir()
317
- if not self._tmp_dir.exists():
318
- self._tmp_dir.mkdir()
408
+ self._media_dir.mkdir(exist_ok=True)
409
+ self._file_cache_dir.mkdir(exist_ok=True)
410
+ self._dataset_cache_dir.mkdir(exist_ok=True)
411
+ self._log_dir.mkdir(exist_ok=True)
412
+ self._tmp_dir.mkdir(exist_ok=True)
319
413
 
320
414
  self._file_cache_size_g = config.get_float_value('file_cache_size_g')
321
415
  if self._file_cache_size_g is None:
@@ -324,6 +418,16 @@ class Env:
324
418
  f'(either add a `file_cache_size_g` entry to the `pixeltable` section of {Config.get().config_file},\n'
325
419
  'or set the PIXELTABLE_FILE_CACHE_SIZE_G environment variable)'
326
420
  )
421
+
422
+ self._default_input_media_dest = config.get_string_value('input_media_dest')
423
+ self._default_output_media_dest = config.get_string_value('output_media_dest')
424
+ for mode, uri in (('input', self._default_input_media_dest), ('output', self._default_output_media_dest)):
425
+ if uri is not None:
426
+ try:
427
+ _ = ObjectPath.parse_object_storage_addr(uri, False)
428
+ except Exception as e:
429
+ raise excs.Error(f'Invalid {mode} media destination URI: {uri}') from e
430
+
327
431
  self._pxt_api_key = config.get_string_value('api_key')
328
432
 
329
433
  # Disable spurious warnings
@@ -333,10 +437,12 @@ class Env:
333
437
  warnings.simplefilter('ignore', category=UserWarning)
334
438
  warnings.simplefilter('ignore', category=FutureWarning)
335
439
 
336
- # Set verbose level for user visible console messages
337
- verbosity = map_level(config.get_int_value('verbosity'))
440
+ # Set verbosity level for user visible console messages
441
+ self._verbosity = config.get_int_value('verbosity')
442
+ if self._verbosity is None:
443
+ self._verbosity = 1
338
444
  stdout_handler = ConsoleOutputHandler(stream=stdout)
339
- stdout_handler.setLevel(verbosity)
445
+ stdout_handler.setLevel(map_level(self._verbosity))
340
446
  stdout_handler.addFilter(ConsoleMessageFilter())
341
447
  self._logger.addHandler(stdout_handler)
342
448
  self._console_logger = ConsoleLogger(self._logger)
@@ -370,6 +476,7 @@ class Env:
370
476
  http_logger.propagate = False
371
477
 
372
478
  self.clear_tmp_dir()
479
+ tz_name = self._get_tz_name()
373
480
 
374
481
  # configure pixeltable database
375
482
  self._init_db(config)
@@ -379,22 +486,10 @@ class Env:
379
486
  'Reinitializing pixeltable database is not supported when running in non-local environment'
380
487
  )
381
488
 
382
- tz_name = config.get_string_value('time_zone')
383
- if tz_name is not None:
384
- # Validate tzname
385
- if not isinstance(tz_name, str):
386
- self._logger.error('Invalid time zone specified in configuration.')
387
- else:
388
- try:
389
- _ = ZoneInfo(tz_name)
390
- except ZoneInfoNotFoundError:
391
- self._logger.error(f'Invalid time zone specified in configuration: {tz_name}')
392
-
393
489
  if reinit_db and self._store_db_exists():
394
490
  self._drop_store_db()
395
491
 
396
492
  create_db = not self._store_db_exists()
397
-
398
493
  if create_db:
399
494
  self._logger.info(f'creating database at: {self.db_url}')
400
495
  self._create_store_db()
@@ -439,7 +534,7 @@ class Env:
439
534
  raise excs.Error(error)
440
535
  self._logger.info(f'Using database at: {self.db_url}')
441
536
  else:
442
- self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
537
+ self._db_name = config.get_string_value('db') or 'pixeltable'
443
538
  self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(Config.get().home / 'pgdata')))
444
539
  # cleanup_mode=None will leave the postgres process running after Python exits
445
540
  # cleanup_mode='stop' will terminate the postgres process when Python exits
@@ -453,30 +548,49 @@ class Env:
453
548
  assert self._db_url is not None
454
549
  assert self._db_name is not None
455
550
 
551
+ @retry(
552
+ stop=stop_after_attempt(3), # Stop after 3 attempts
553
+ wait=wait_exponential_jitter(initial=0.2, max=1.0, jitter=0.2), # Exponential backoff with jitter
554
+ )
456
555
  def _init_metadata(self) -> None:
457
556
  """
458
557
  Create pixeltable metadata tables and system metadata.
459
558
  This is an idempotent operation.
559
+
560
+ Retry logic handles race conditions when multiple Pixeltable processes
561
+ attempt to initialize metadata tables simultaneously. The first process may succeed
562
+ in creating tables while others encounter database constraints (e.g., "table already exists").
563
+ Exponential backoff with jitter reduces contention between competing processes.
460
564
  """
461
565
  assert self._sa_engine is not None
462
566
  from pixeltable import metadata
463
567
 
568
+ self._logger.debug('Creating pixeltable metadata')
464
569
  metadata.schema.base_metadata.create_all(self._sa_engine, checkfirst=True)
465
570
  metadata.create_system_info(self._sa_engine)
466
571
 
467
- def _create_engine(self, time_zone_name: Optional[str], echo: bool = False) -> None:
468
- connect_args = {} if time_zone_name is None else {'options': f'-c timezone={time_zone_name}'}
572
+ def _create_engine(self, time_zone_name: str, echo: bool = False) -> None:
573
+ connect_args = {'options': f'-c timezone={time_zone_name}'}
574
+ self._logger.info(f'Creating SQLAlchemy engine with connection arguments: {connect_args}')
469
575
  self._sa_engine = sql.create_engine(
470
576
  self.db_url, echo=echo, isolation_level=self._dbms.transaction_isolation_level, connect_args=connect_args
471
577
  )
472
578
 
473
579
  self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
580
+ self._logger.info(f'Engine dialect: {self._sa_engine.dialect.name}')
581
+ self._logger.info(f'Engine driver : {self._sa_engine.dialect.driver}')
474
582
 
475
583
  with self.engine.begin() as conn:
476
584
  tz_name = conn.execute(sql.text('SHOW TIME ZONE')).scalar()
477
585
  assert isinstance(tz_name, str)
478
586
  self._logger.info(f'Database time zone is now: {tz_name}')
479
587
  self._default_time_zone = ZoneInfo(tz_name)
588
+ if self.is_using_cockroachdb:
589
+ # This could be set when the database is created, but we set it now
590
+ conn.execute(sql.text('SET null_ordered_last = true;'))
591
+ null_ordered_last = conn.execute(sql.text('SHOW null_ordered_last')).scalar()
592
+ assert isinstance(null_ordered_last, str)
593
+ self._logger.info(f'Database null_ordered_last is now: {null_ordered_last}')
480
594
 
481
595
  def _store_db_exists(self) -> bool:
482
596
  assert self._db_name is not None
@@ -511,6 +625,14 @@ class Env:
511
625
  finally:
512
626
  engine.dispose()
513
627
 
628
+ def _pgserver_terminate_connections_stmt(self) -> str:
629
+ return f"""
630
+ SELECT pg_terminate_backend(pg_stat_activity.pid)
631
+ FROM pg_stat_activity
632
+ WHERE pg_stat_activity.datname = '{self._db_name}'
633
+ AND pid <> pg_backend_pid()
634
+ """
635
+
514
636
  def _drop_store_db(self) -> None:
515
637
  assert self._db_name is not None
516
638
  engine = sql.create_engine(self._dbms.default_system_db_url(), future=True, isolation_level='AUTOCOMMIT')
@@ -519,13 +641,7 @@ class Env:
519
641
  with engine.begin() as conn:
520
642
  # terminate active connections
521
643
  if self._db_server is not None:
522
- stmt = f"""
523
- SELECT pg_terminate_backend(pg_stat_activity.pid)
524
- FROM pg_stat_activity
525
- WHERE pg_stat_activity.datname = '{self._db_name}'
526
- AND pid <> pg_backend_pid()
527
- """
528
- conn.execute(sql.text(stmt))
644
+ conn.execute(sql.text(self._pgserver_terminate_connections_stmt()))
529
645
  # drop db
530
646
  stmt = self._dbms.drop_db_stmt(preparer.quote(self._db_name))
531
647
  conn.execute(sql.text(stmt))
@@ -538,12 +654,7 @@ class Env:
538
654
  metadata.upgrade_md(self._sa_engine)
539
655
 
540
656
  @property
541
- def pxt_api_key(self) -> str:
542
- if self._pxt_api_key is None:
543
- raise excs.Error(
544
- 'No API key is configured. Set the PIXELTABLE_API_KEY environment variable, or add an entry to '
545
- 'config.toml as described here:\nhttps://pixeltable.github.io/pixeltable/config/'
546
- )
657
+ def pxt_api_key(self) -> str | None:
547
658
  return self._pxt_api_key
548
659
 
549
660
  def get_client(self, name: str) -> Any:
@@ -553,35 +664,51 @@ class Env:
553
664
  Args:
554
665
  - name: The name of the client
555
666
  """
556
- cl = _registered_clients[name]
557
- if cl.client_obj is not None:
558
- return cl.client_obj # Already initialized
559
-
560
- # Construct a client, retrieving each parameter from config.
561
-
562
- init_kwargs: dict[str, str] = {}
563
- for param in cl.param_names:
564
- arg = Config.get().get_string_value(param, section=name)
565
- if arg is not None and len(arg) > 0:
566
- init_kwargs[param] = arg
567
- else:
667
+ # Return the existing client if it has already been constructed
668
+ with _registered_clients_lock:
669
+ cl = _registered_clients[name]
670
+ if cl.client_obj is not None:
671
+ return cl.client_obj # Already initialized
672
+
673
+ # Retrieve parameters required to construct the requested client.
674
+ init_kwargs: dict[str, Any] = {}
675
+ for param in cl.params.values():
676
+ # Determine the type of the parameter for proper config parsing.
677
+ pname = param.name
678
+ t = param.annotation
679
+ # Deference T | None
680
+ if typing.get_origin(t) in (typing.Union, types.UnionType):
681
+ args = typing.get_args(t)
682
+ if args[0] is type(None):
683
+ t = args[1]
684
+ elif args[1] is type(None):
685
+ t = args[0]
686
+ assert isinstance(t, type), t
687
+ arg: Any = Config.get().get_value(pname, t, section=name)
688
+ if arg is not None:
689
+ init_kwargs[pname] = arg
690
+ elif param.default is inspect.Parameter.empty:
568
691
  raise excs.Error(
569
- f'`{name}` client not initialized: parameter `{param}` is not configured.\n'
570
- f'To fix this, specify the `{name.upper()}_{param.upper()}` environment variable, '
571
- f'or put `{param.lower()}` in the `{name.lower()}` section of $PIXELTABLE_HOME/config.toml.'
692
+ f'`{name}` client not initialized: parameter `{pname}` is not configured.\n'
693
+ f'To fix this, specify the `{name.upper()}_{pname.upper()}` environment variable, '
694
+ f'or put `{pname.lower()}` in the `{name.lower()}` section of $PIXELTABLE_HOME/config.toml.'
572
695
  )
573
696
 
574
- cl.client_obj = cl.init_fn(**init_kwargs)
575
- self._logger.info(f'Initialized `{name}` client.')
576
- return cl.client_obj
697
+ # Construct the requested client
698
+ with _registered_clients_lock:
699
+ if cl.client_obj is not None:
700
+ return cl.client_obj # Already initialized
701
+ cl.client_obj = cl.init_fn(**init_kwargs)
702
+ self._logger.info(f'Initialized `{name}` client with parameters: {init_kwargs}.')
703
+ return cl.client_obj
577
704
 
578
705
  def _start_web_server(self) -> None:
579
706
  """
580
707
  The http server root is the file system root.
581
708
  eg: /home/media/foo.mp4 is located at http://127.0.0.1:{port}/home/media/foo.mp4
582
- in windows, the server will translate paths like http://127.0.0.1:{port}/c:/media/foo.mp4
583
- This arrangement enables serving media hosted within _home,
584
- as well as external media inserted into pixeltable or produced by pixeltable.
709
+ On Windows, the server will translate paths like http://127.0.0.1:{port}/c:/media/foo.mp4
710
+ This arrangement enables serving objects hosted within _home,
711
+ as well as external objects inserted into pixeltable or produced by pixeltable.
585
712
  The port is chosen dynamically to prevent conflicts.
586
713
  """
587
714
  # Port 0 means OS picks one for us.
@@ -603,17 +730,60 @@ class Env:
603
730
  self._start_web_server()
604
731
  self.__register_packages()
605
732
 
733
+ @property
734
+ def default_video_encoder(self) -> str | None:
735
+ if self._default_video_encoder is None:
736
+ self._default_video_encoder = self._determine_default_video_encoder()
737
+ return self._default_video_encoder
738
+
739
+ def _determine_default_video_encoder(self) -> str | None:
740
+ """
741
+ Returns the first available encoder from a list of candidates.
742
+
743
+ TODO:
744
+ - the user might prefer a hardware-accelerated encoder (eg, h264_nvenc or h264_videotoolbox)
745
+ - allow user override via a config option 'video_encoder'
746
+ """
747
+ # look for available encoders, in this order
748
+ candidates = [
749
+ 'libx264', # GPL, best quality
750
+ 'libopenh264', # BSD
751
+ ]
752
+
753
+ try:
754
+ # Get list of available encoders
755
+ result = subprocess.run(['ffmpeg', '-encoders'], capture_output=True, text=True, timeout=10, check=True)
756
+
757
+ if result.returncode == 0:
758
+ available_encoders = result.stdout
759
+ for encoder in candidates:
760
+ # ffmpeg -encoders output format: " V..... encoder_name description"
761
+ if f' {encoder} ' in available_encoders:
762
+ _logger.debug(f'Using H.264 encoder: {encoder}')
763
+ return encoder
764
+ except Exception:
765
+ pass
766
+ return None
767
+
606
768
  def __register_packages(self) -> None:
607
769
  """Declare optional packages that are utilized by some parts of the code."""
770
+ self.__register_package('accelerate')
608
771
  self.__register_package('anthropic')
772
+ self.__register_package('azure.storage.blob', library_name='azure-storage-blob')
609
773
  self.__register_package('boto3')
610
774
  self.__register_package('datasets')
775
+ self.__register_package('diffusers')
611
776
  self.__register_package('fiftyone')
777
+ self.__register_package('twelvelabs')
612
778
  self.__register_package('fireworks', library_name='fireworks-ai')
779
+ self.__register_package('google.cloud.storage', library_name='google-cloud-storage')
613
780
  self.__register_package('google.genai', library_name='google-genai')
781
+ self.__register_package('groq')
614
782
  self.__register_package('huggingface_hub', library_name='huggingface-hub')
615
783
  self.__register_package('label_studio_sdk', library_name='label-studio-sdk')
784
+ self.__register_package('librosa')
616
785
  self.__register_package('llama_cpp', library_name='llama-cpp-python')
786
+ self.__register_package('mcp')
617
787
  self.__register_package('mistralai')
618
788
  self.__register_package('mistune')
619
789
  self.__register_package('ollama')
@@ -622,8 +792,10 @@ class Env:
622
792
  self.__register_package('pyarrow')
623
793
  self.__register_package('pydantic')
624
794
  self.__register_package('replicate')
795
+ self.__register_package('reve')
625
796
  self.__register_package('sentencepiece')
626
797
  self.__register_package('sentence_transformers', library_name='sentence-transformers')
798
+ self.__register_package('soundfile')
627
799
  self.__register_package('spacy')
628
800
  self.__register_package('tiktoken')
629
801
  self.__register_package('together')
@@ -634,8 +806,10 @@ class Env:
634
806
  self.__register_package('whisper', library_name='openai-whisper')
635
807
  self.__register_package('whisperx')
636
808
  self.__register_package('yolox', library_name='pixeltable-yolox')
809
+ self.__register_package('lancedb')
810
+ self.__register_package('scenedetect')
637
811
 
638
- def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
812
+ def __register_package(self, package_name: str, library_name: str | None = None) -> None:
639
813
  is_installed: bool
640
814
  try:
641
815
  is_installed = importlib.util.find_spec(package_name) is not None
@@ -647,7 +821,11 @@ class Env:
647
821
  library_name=library_name or package_name, # defaults to package_name unless specified otherwise
648
822
  )
649
823
 
650
- def require_package(self, package_name: str, min_version: Optional[list[int]] = None) -> None:
824
+ def require_binary(self, binary_name: str) -> None:
825
+ if not shutil.which(binary_name):
826
+ raise excs.Error(f'{binary_name} is not installed or not in PATH. Please install it to use this feature.')
827
+
828
+ def require_package(self, package_name: str, min_version: list[int] | None = None) -> None:
651
829
  """
652
830
  Checks whether the specified optional package is available. If not, raises an exception
653
831
  with an error message informing the user how to install it.
@@ -691,14 +869,8 @@ class Env:
691
869
  else:
692
870
  os.remove(path)
693
871
 
694
- def num_tmp_files(self) -> int:
695
- return len(glob.glob(f'{self._tmp_dir}/*'))
696
-
697
- def create_tmp_path(self, extension: str = '') -> Path:
698
- return self._tmp_dir / f'{uuid.uuid4()}{extension}'
699
-
700
- # def get_resource_pool_info(self, pool_id: str, pool_info_cls: Optional[Type[T]]) -> T:
701
- def get_resource_pool_info(self, pool_id: str, make_pool_info: Optional[Callable[[], T]] = None) -> T:
872
+ # def get_resource_pool_info(self, pool_id: str, pool_info_cls: Type[T] | None) -> T:
873
+ def get_resource_pool_info(self, pool_id: str, make_pool_info: Callable[[], T] | None = None) -> T:
702
874
  """Returns the info object for the given id, creating it if necessary."""
703
875
  info = self._resource_pool_info.get(pool_id)
704
876
  if info is None and make_pool_info is not None:
@@ -711,6 +883,14 @@ class Env:
711
883
  assert self._media_dir is not None
712
884
  return self._media_dir
713
885
 
886
+ @property
887
+ def default_input_media_dest(self) -> str | None:
888
+ return self._default_input_media_dest
889
+
890
+ @property
891
+ def default_output_media_dest(self) -> str | None:
892
+ return self._default_output_media_dest
893
+
714
894
  @property
715
895
  def file_cache_dir(self) -> Path:
716
896
  assert self._file_cache_dir is not None
@@ -746,24 +926,74 @@ class Env:
746
926
  have no sub-dependencies (in fact, this is how spaCy normally manages its model resources).
747
927
  """
748
928
  import spacy
749
- from spacy.cli.download import get_model_filename
929
+ from spacy.cli.download import download
750
930
 
751
931
  spacy_model = 'en_core_web_sm'
752
- spacy_model_version = '3.7.1'
753
- filename = get_model_filename(spacy_model, spacy_model_version, sdist=False)
754
- url = f'{spacy.about.__download_url__}/{filename}'
755
- # Try to `pip install` the model. We set check=False; if the pip command fails, it's not necessarily
756
- # a problem, because the model might have been installed on a previous attempt.
757
- self._logger.info(f'Ensuring spaCy model is installed: {filename}')
758
- ret = subprocess.run([sys.executable, '-m', 'pip', 'install', '-qU', url], check=False)
759
- if ret.returncode != 0:
760
- self._logger.warning(f'pip install failed for spaCy model: {filename}')
932
+ self._logger.info(f'Ensuring spaCy model is installed: {spacy_model}')
933
+ download(spacy_model)
761
934
  self._logger.info(f'Loading spaCy model: {spacy_model}')
762
935
  try:
763
936
  self._spacy_nlp = spacy.load(spacy_model)
764
937
  except Exception as exc:
765
938
  raise excs.Error(f'Failed to load spaCy model: {spacy_model}') from exc
766
939
 
940
+ def _clean_up(self) -> None:
941
+ """
942
+ Internal cleanup method that properly closes all resources and resets state.
943
+ This is called before destroying the singleton instance.
944
+ """
945
+ assert self._current_session is None
946
+ assert self._current_conn is None
947
+
948
+ # Stop HTTP server
949
+ if self._httpd is not None:
950
+ try:
951
+ self._httpd.shutdown()
952
+ self._httpd.server_close()
953
+ except Exception as e:
954
+ _logger.warning(f'Error stopping HTTP server: {e}')
955
+
956
+ # First terminate all connections to the database
957
+ if self._db_server is not None:
958
+ assert self._dbms is not None
959
+ assert self._db_name is not None
960
+ try:
961
+ temp_engine = sql.create_engine(self._dbms.default_system_db_url(), isolation_level='AUTOCOMMIT')
962
+ try:
963
+ with temp_engine.begin() as conn:
964
+ conn.execute(sql.text(self._pgserver_terminate_connections_stmt()))
965
+ _logger.info(f"Terminated all connections to database '{self._db_name}'")
966
+ except Exception as e:
967
+ _logger.warning(f'Error terminating database connections: {e}')
968
+ finally:
969
+ temp_engine.dispose()
970
+ except Exception as e:
971
+ _logger.warning(f'Error stopping database server: {e}')
972
+
973
+ # Dispose of SQLAlchemy engine (after stopping db server)
974
+ if self._sa_engine is not None:
975
+ try:
976
+ self._sa_engine.dispose()
977
+ except Exception as e:
978
+ _logger.warning(f'Error disposing engine: {e}')
979
+
980
+ # Close event loop
981
+ if self._event_loop is not None:
982
+ try:
983
+ if self._event_loop.is_running():
984
+ self._event_loop.stop()
985
+ self._event_loop.close()
986
+ except Exception as e:
987
+ _logger.warning(f'Error closing event loop: {e}')
988
+
989
+ # Remove logging handlers
990
+ for handler in self._logger.handlers[:]:
991
+ try:
992
+ handler.close()
993
+ self._logger.removeHandler(handler)
994
+ except Exception as e:
995
+ _logger.warning(f'Error removing handler: {e}')
996
+
767
997
 
768
998
  def register_client(name: str) -> Callable:
769
999
  """Decorator that registers a third-party API client for use by Pixeltable.
@@ -792,27 +1022,29 @@ def register_client(name: str) -> Callable:
792
1022
 
793
1023
  def decorator(fn: Callable) -> None:
794
1024
  sig = inspect.signature(fn)
795
- param_names = list(sig.parameters.keys())
796
- _registered_clients[name] = ApiClient(init_fn=fn, param_names=param_names)
1025
+ params = dict(sig.parameters)
1026
+ with _registered_clients_lock:
1027
+ _registered_clients[name] = ApiClient(init_fn=fn, params=params)
797
1028
 
798
1029
  return decorator
799
1030
 
800
1031
 
1032
+ _registered_clients_lock: threading.Lock = threading.Lock()
801
1033
  _registered_clients: dict[str, ApiClient] = {}
802
1034
 
803
1035
 
804
1036
  @dataclass
805
1037
  class ApiClient:
806
1038
  init_fn: Callable
807
- param_names: list[str]
808
- client_obj: Optional[Any] = None
1039
+ params: dict[str, inspect.Parameter]
1040
+ client_obj: Any | None = None
809
1041
 
810
1042
 
811
1043
  @dataclass
812
1044
  class PackageInfo:
813
1045
  is_installed: bool
814
1046
  library_name: str # pypi library name (may be different from package name)
815
- version: Optional[list[int]] = None # installed version, as a list of components (such as [3,0,2] for "3.0.2")
1047
+ version: list[int] | None = None # installed version, as a list of components (such as [3,0,2] for "3.0.2")
816
1048
 
817
1049
 
818
1050
  TIME_FORMAT = '%H:%M.%S %f'
@@ -838,6 +1070,10 @@ class RateLimitsInfo:
838
1070
  get_request_resources: Callable[..., dict[str, int]]
839
1071
 
840
1072
  resource_limits: dict[str, RateLimitInfo] = field(default_factory=dict)
1073
+ has_exc: bool = False
1074
+
1075
+ def debug_str(self) -> str:
1076
+ return ','.join(info.debug_str() for info in self.resource_limits.values())
841
1077
 
842
1078
  def is_initialized(self) -> bool:
843
1079
  return len(self.resource_limits) > 0
@@ -845,7 +1081,7 @@ class RateLimitsInfo:
845
1081
  def reset(self) -> None:
846
1082
  self.resource_limits.clear()
847
1083
 
848
- def record(self, **kwargs: Any) -> None:
1084
+ def record(self, reset_exc: bool = False, **kwargs: Any) -> None:
849
1085
  now = datetime.datetime.now(tz=datetime.timezone.utc)
850
1086
  if len(self.resource_limits) == 0:
851
1087
  self.resource_limits = {k: RateLimitInfo(k, now, *v) for k, v in kwargs.items() if v is not None}
@@ -856,14 +1092,30 @@ class RateLimitsInfo:
856
1092
  f'reset={info.reset_at.strftime(TIME_FORMAT)} delta={(info.reset_at - now).total_seconds()}'
857
1093
  )
858
1094
  else:
1095
+ if self.has_exc and not reset_exc:
1096
+ # ignore updates until we're asked to reset
1097
+ _logger.debug(f'rate_limits.record(): ignoring update {kwargs}')
1098
+ return
1099
+ self.has_exc = False
859
1100
  for k, v in kwargs.items():
860
1101
  if v is not None:
861
1102
  self.resource_limits[k].update(now, *v)
862
1103
 
863
- @abstractmethod
864
- def get_retry_delay(self, exc: Exception) -> Optional[float]:
1104
+ def record_exc(self, exc: Exception) -> None:
1105
+ """Update self.resource_limits based on the exception headers"""
1106
+ self.has_exc = True
1107
+
1108
+ def get_retry_delay(self, exc: Exception) -> float | None:
865
1109
  """Returns number of seconds to wait before retry, or None if not retryable"""
866
- pass
1110
+ if len(self.resource_limits) == 0:
1111
+ return 1.0
1112
+ # we're looking for the maximum delay across all depleted resources
1113
+ max_delay = 0.0
1114
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
1115
+ for limit_info in self.resource_limits.values():
1116
+ if limit_info.remaining < 0.05 * limit_info.limit:
1117
+ max_delay = max(max_delay, (limit_info.reset_at - now).total_seconds())
1118
+ return max_delay if max_delay > 0 else None
867
1119
 
868
1120
 
869
1121
  @dataclass
@@ -876,9 +1128,15 @@ class RateLimitInfo:
876
1128
  remaining: int
877
1129
  reset_at: datetime.datetime
878
1130
 
1131
+ def debug_str(self) -> str:
1132
+ return (
1133
+ f'{self.resource}@{self.recorded_at.strftime(TIME_FORMAT)}: '
1134
+ f'{self.limit}/{self.remaining}/{self.reset_at.strftime(TIME_FORMAT)}'
1135
+ )
1136
+
879
1137
  def update(self, recorded_at: datetime.datetime, limit: int, remaining: int, reset_at: datetime.datetime) -> None:
880
1138
  # we always update everything, even though responses may come back out-of-order: we can't use reset_at to
881
- # determine order, because it doesn't increase monotonically (the reeset duration shortens as output_tokens
1139
+ # determine order, because it doesn't increase monotonically (the reset duration shortens as output_tokens
882
1140
  # are freed up - going from max to actual)
883
1141
  self.recorded_at = recorded_at
884
1142
  self.limit = limit
@@ -890,3 +1148,16 @@ class RateLimitInfo:
890
1148
  f'Update {self.resource} rate limit: rem={self.remaining} reset={self.reset_at.strftime(TIME_FORMAT)} '
891
1149
  f'reset_delta={reset_delta.total_seconds()} recorded_delta={(self.reset_at - recorded_at).total_seconds()}'
892
1150
  )
1151
+
1152
+
1153
+ @dataclass
1154
+ class RuntimeCtx:
1155
+ """
1156
+ Container for runtime data provided by the execution system to udfs.
1157
+
1158
+ Udfs that accept the special _runtime_ctx parameter receive an instance of this class.
1159
+ """
1160
+
1161
+ # Indicates a retry attempt following a rate limit error (error code: 429). Requires a 'rate-limits' resource pool.
1162
+ # If True, call RateLimitsInfo.record() with reset_exc=True.
1163
+ is_retry: bool = False