pixeltable 0.4.13__py3-none-any.whl → 0.4.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -1
- pixeltable/catalog/catalog.py +187 -63
- pixeltable/catalog/column.py +24 -20
- pixeltable/catalog/table.py +24 -8
- pixeltable/catalog/table_metadata.py +1 -0
- pixeltable/catalog/table_version.py +16 -34
- pixeltable/catalog/update_status.py +12 -0
- pixeltable/catalog/view.py +22 -22
- pixeltable/config.py +2 -0
- pixeltable/dataframe.py +4 -2
- pixeltable/env.py +46 -21
- pixeltable/exec/__init__.py +1 -0
- pixeltable/exec/aggregation_node.py +0 -1
- pixeltable/exec/cache_prefetch_node.py +74 -98
- pixeltable/exec/data_row_batch.py +2 -18
- pixeltable/exec/expr_eval/expr_eval_node.py +11 -0
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/object_store_save_node.py +299 -0
- pixeltable/exec/sql_node.py +28 -33
- pixeltable/exprs/data_row.py +31 -25
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/row_builder.py +6 -12
- pixeltable/functions/gemini.py +1 -1
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/video.py +128 -15
- pixeltable/functions/whisperx.py +2 -0
- pixeltable/functions/yolox.py +2 -0
- pixeltable/globals.py +49 -30
- pixeltable/index/embedding_index.py +5 -8
- pixeltable/io/__init__.py +1 -0
- pixeltable/io/fiftyone.py +1 -1
- pixeltable/io/label_studio.py +4 -5
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +1 -1
- pixeltable/iterators/document.py +10 -12
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/schema.py +7 -0
- pixeltable/plan.py +26 -1
- pixeltable/share/packager.py +8 -2
- pixeltable/share/publish.py +3 -10
- pixeltable/store.py +1 -1
- pixeltable/type_system.py +1 -3
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/object_stores.py +497 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +354 -0
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/METADATA +1 -1
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/RECORD +53 -50
- pixeltable/utils/media_store.py +0 -248
- pixeltable/utils/s3.py +0 -17
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/licenses/LICENSE +0 -0
|
@@ -18,9 +18,8 @@ from pixeltable import exprs, index
|
|
|
18
18
|
from pixeltable.env import Env
|
|
19
19
|
from pixeltable.iterators import ComponentIterator
|
|
20
20
|
from pixeltable.metadata import schema
|
|
21
|
-
from pixeltable.utils.exception_handler import run_cleanup_on_exception
|
|
22
21
|
from pixeltable.utils.filecache import FileCache
|
|
23
|
-
from pixeltable.utils.
|
|
22
|
+
from pixeltable.utils.object_stores import ObjectOps
|
|
24
23
|
|
|
25
24
|
from .tbl_ops import TableOp
|
|
26
25
|
|
|
@@ -327,7 +326,7 @@ class TableVersion:
|
|
|
327
326
|
from .table_version_path import TableVersionPath
|
|
328
327
|
|
|
329
328
|
# clear out any remaining media files from an aborted previous attempt
|
|
330
|
-
|
|
329
|
+
self.delete_media()
|
|
331
330
|
view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
|
|
332
331
|
plan, _ = Planner.create_view_load_plan(view_path)
|
|
333
332
|
_, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
|
|
@@ -356,14 +355,23 @@ class TableVersion:
|
|
|
356
355
|
cat = pxt.catalog.Catalog.get()
|
|
357
356
|
# We're creating a new TableVersion replica, so we should never have seen this particular
|
|
358
357
|
# TableVersion instance before.
|
|
359
|
-
|
|
360
|
-
|
|
358
|
+
# Actually this isn't true, because we might be re-creating a dropped replica.
|
|
359
|
+
# TODO: Understand why old TableVersions are kept around even for a dropped table.
|
|
360
|
+
# assert tbl_version.effective_version is not None
|
|
361
|
+
# assert (tbl_version.id, tbl_version.effective_version) not in cat._tbl_versions
|
|
361
362
|
cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
362
363
|
tbl_version.init()
|
|
363
364
|
tbl_version.store_tbl.create()
|
|
364
365
|
tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
|
|
365
366
|
return tbl_version
|
|
366
367
|
|
|
368
|
+
def delete_media(self, tbl_version: Optional[int] = None) -> None:
|
|
369
|
+
# Assemble a set of column destinations and delete objects from all of them
|
|
370
|
+
# None is a valid column destination which refers to the default object location
|
|
371
|
+
destinations = {col.destination for col in self.cols if col.is_stored}
|
|
372
|
+
for dest in destinations:
|
|
373
|
+
ObjectOps.delete(dest, self.id, tbl_version=tbl_version)
|
|
374
|
+
|
|
367
375
|
def drop(self) -> None:
|
|
368
376
|
# if self.is_view and self.is_mutable:
|
|
369
377
|
# # update mutable_views
|
|
@@ -374,7 +382,7 @@ class TableVersion:
|
|
|
374
382
|
# if self.base.get().is_mutable:
|
|
375
383
|
# self.base.get().mutable_views.remove(TableVersionHandle.create(self))
|
|
376
384
|
|
|
377
|
-
|
|
385
|
+
self.delete_media()
|
|
378
386
|
FileCache.get().clear(tbl_id=self.id)
|
|
379
387
|
self.store_tbl.drop()
|
|
380
388
|
|
|
@@ -595,18 +603,7 @@ class TableVersion:
|
|
|
595
603
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
596
604
|
self._tbl_md.index_md[idx_id] = idx_md
|
|
597
605
|
self.idxs_by_name[idx_name] = idx_info
|
|
598
|
-
|
|
599
|
-
idx.create_index(self._store_idx_name(idx_id), val_col)
|
|
600
|
-
finally:
|
|
601
|
-
|
|
602
|
-
def cleanup_index() -> None:
|
|
603
|
-
"""Delete the newly added in-memory index structure"""
|
|
604
|
-
del self.idxs_by_name[idx_name]
|
|
605
|
-
del self._tbl_md.index_md[idx_id]
|
|
606
|
-
self.next_idx_id = idx_id
|
|
607
|
-
|
|
608
|
-
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
609
|
-
run_cleanup_on_exception(cleanup_index)
|
|
606
|
+
idx.create_index(self._store_idx_name(idx_id), val_col)
|
|
610
607
|
|
|
611
608
|
def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
612
609
|
val_col, undo_vol = self._create_index_columns(idx)
|
|
@@ -741,21 +738,6 @@ class TableVersion:
|
|
|
741
738
|
num_excs += excs_per_col
|
|
742
739
|
computed_values += plan.ctx.num_computed_exprs * row_count
|
|
743
740
|
finally:
|
|
744
|
-
# Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
|
|
745
|
-
def cleanup_on_error() -> None:
|
|
746
|
-
"""Delete columns that are added as part of current add_columns operation and re-initialize
|
|
747
|
-
the sqlalchemy schema"""
|
|
748
|
-
self.cols = [col for col in self.cols if col not in cols_to_add]
|
|
749
|
-
for col in cols_to_add:
|
|
750
|
-
# remove columns that we already added
|
|
751
|
-
if col.id in self.cols_by_id:
|
|
752
|
-
del self.cols_by_id[col.id]
|
|
753
|
-
if col.name is not None and col.name in self.cols_by_name:
|
|
754
|
-
del self.cols_by_name[col.name]
|
|
755
|
-
self.store_tbl.create_sa_tbl()
|
|
756
|
-
|
|
757
|
-
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
758
|
-
run_cleanup_on_exception(cleanup_on_error)
|
|
759
741
|
plan.close()
|
|
760
742
|
|
|
761
743
|
pxt.catalog.Catalog.get().record_column_dependencies(self)
|
|
@@ -1236,7 +1218,7 @@ class TableVersion:
|
|
|
1236
1218
|
)
|
|
1237
1219
|
|
|
1238
1220
|
# delete newly-added data
|
|
1239
|
-
|
|
1221
|
+
self.delete_media(tbl_version=self.version)
|
|
1240
1222
|
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
1241
1223
|
|
|
1242
1224
|
# revert new deletions
|
|
@@ -57,27 +57,35 @@ class UpdateStatus:
|
|
|
57
57
|
"""
|
|
58
58
|
|
|
59
59
|
updated_cols: list[str] = field(default_factory=list)
|
|
60
|
+
"""Columns that were updated."""
|
|
60
61
|
cols_with_excs: list[str] = field(default_factory=list)
|
|
62
|
+
"""Columns that encountered exceptions."""
|
|
61
63
|
|
|
62
64
|
# stats for the rows affected by the operation
|
|
63
65
|
row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
66
|
+
"""Row count statistics for rows affected by this operation."""
|
|
64
67
|
|
|
65
68
|
# stats for changes cascaded to other tables
|
|
66
69
|
cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
70
|
+
"""Row count statistics for changes cascaded to other tables."""
|
|
67
71
|
|
|
68
72
|
# stats for the rows affected by the operation in an external store
|
|
69
73
|
ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
74
|
+
"""Row count statistics for rows affected in an external store."""
|
|
70
75
|
|
|
71
76
|
@property
|
|
72
77
|
def num_rows(self) -> int:
|
|
78
|
+
"""Total number of rows affected (including cascaded changes)."""
|
|
73
79
|
return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
|
|
74
80
|
|
|
75
81
|
@property
|
|
76
82
|
def num_excs(self) -> int:
|
|
83
|
+
"""Total number of exceptions encountered (including cascaded changes)."""
|
|
77
84
|
return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
|
|
78
85
|
|
|
79
86
|
@property
|
|
80
87
|
def num_computed_values(self) -> int:
|
|
88
|
+
"""Total number of computed values affected (including cascaded changes)."""
|
|
81
89
|
return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
|
|
82
90
|
|
|
83
91
|
def insert_to_update(self) -> 'UpdateStatus':
|
|
@@ -164,16 +172,20 @@ class UpdateStatus:
|
|
|
164
172
|
|
|
165
173
|
@property
|
|
166
174
|
def external_rows_updated(self) -> int:
|
|
175
|
+
"""Number of rows updated in an external store."""
|
|
167
176
|
return self.ext_row_count_stats.upd_rows
|
|
168
177
|
|
|
169
178
|
@property
|
|
170
179
|
def external_rows_created(self) -> int:
|
|
180
|
+
"""Number of rows created in an external store."""
|
|
171
181
|
return self.ext_row_count_stats.ins_rows
|
|
172
182
|
|
|
173
183
|
@property
|
|
174
184
|
def external_rows_deleted(self) -> int:
|
|
185
|
+
"""Number of rows deleted from an external store."""
|
|
175
186
|
return self.ext_row_count_stats.del_rows
|
|
176
187
|
|
|
177
188
|
@property
|
|
178
189
|
def ext_num_rows(self) -> int:
|
|
190
|
+
"""Total number of rows affected in an external store."""
|
|
179
191
|
return self.ext_row_count_stats.num_rows
|
pixeltable/catalog/view.py
CHANGED
|
@@ -47,17 +47,13 @@ class View(Table):
|
|
|
47
47
|
self._tbl_version = tbl_version_path.tbl_version
|
|
48
48
|
|
|
49
49
|
def _display_name(self) -> str:
|
|
50
|
-
name: str
|
|
51
|
-
if self._tbl_version_path.is_snapshot():
|
|
52
|
-
name = 'snapshot'
|
|
53
|
-
elif self._tbl_version_path.is_view():
|
|
54
|
-
name = 'view'
|
|
55
|
-
else:
|
|
56
|
-
assert self._tbl_version_path.is_replica()
|
|
57
|
-
name = 'table'
|
|
58
50
|
if self._tbl_version_path.is_replica():
|
|
59
|
-
|
|
60
|
-
|
|
51
|
+
return 'replica'
|
|
52
|
+
if self._tbl_version_path.is_snapshot():
|
|
53
|
+
return 'snapshot'
|
|
54
|
+
if self._tbl_version_path.is_view():
|
|
55
|
+
return 'view'
|
|
56
|
+
return 'table'
|
|
61
57
|
|
|
62
58
|
@classmethod
|
|
63
59
|
def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, Optional[str]]]) -> dict[str, dict]:
|
|
@@ -270,12 +266,12 @@ class View(Table):
|
|
|
270
266
|
# Update name and path with version qualifiers.
|
|
271
267
|
md['name'] = f'{self._name}:{self._tbl_version_path.version()}'
|
|
272
268
|
md['path'] = f'{self._path()}:{self._tbl_version_path.version()}'
|
|
273
|
-
|
|
274
|
-
if
|
|
275
|
-
|
|
276
|
-
|
|
269
|
+
base_tbl_id = self._base_tbl_id
|
|
270
|
+
if base_tbl_id is not None:
|
|
271
|
+
base_tbl = self._get_base_table()
|
|
272
|
+
base_path = '<anonymous base table>' if base_tbl is None else base_tbl._path()
|
|
277
273
|
base_version = self._effective_base_versions[0]
|
|
278
|
-
md['base'] =
|
|
274
|
+
md['base'] = base_path if base_version is None else f'{base_path}:{base_version}'
|
|
279
275
|
return md
|
|
280
276
|
|
|
281
277
|
def insert(
|
|
@@ -294,17 +290,21 @@ class View(Table):
|
|
|
294
290
|
def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
|
|
295
291
|
raise excs.Error(f'{self._display_str()}: Cannot delete from a {self._display_name()}.')
|
|
296
292
|
|
|
297
|
-
|
|
293
|
+
@property
|
|
294
|
+
def _base_tbl_id(self) -> Optional[UUID]:
|
|
298
295
|
if self._tbl_version_path.tbl_id != self._id:
|
|
299
296
|
# _tbl_version_path represents a different schema object from this one. This can only happen if this is a
|
|
300
297
|
# named pure snapshot.
|
|
301
|
-
|
|
302
|
-
|
|
298
|
+
return self._tbl_version_path.tbl_id
|
|
299
|
+
if self._tbl_version_path.base is None:
|
|
303
300
|
return None
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
301
|
+
return self._tbl_version_path.base.tbl_id
|
|
302
|
+
|
|
303
|
+
def _get_base_table(self) -> Optional['Table']:
|
|
304
|
+
"""Returns None if there is no base table, or if the base table is hidden."""
|
|
305
|
+
base_tbl_id = self._base_tbl_id
|
|
306
|
+
with catalog.Catalog.get().begin_xact(tbl_id=base_tbl_id, for_write=False):
|
|
307
|
+
return catalog.Catalog.get().get_table_by_id(base_tbl_id)
|
|
308
308
|
|
|
309
309
|
@property
|
|
310
310
|
def _effective_base_versions(self) -> list[Optional[int]]:
|
pixeltable/config.py
CHANGED
|
@@ -161,6 +161,8 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
161
161
|
'hide_warnings': 'Hide warnings from the console',
|
|
162
162
|
'verbosity': 'Verbosity level for console output',
|
|
163
163
|
'api_key': 'API key for Pixeltable cloud',
|
|
164
|
+
'r2_profile': 'AWS config profile name used to access R2 storage',
|
|
165
|
+
's3_profile': 'AWS config profile name used to access S3 storage',
|
|
164
166
|
},
|
|
165
167
|
'anthropic': {'api_key': 'Anthropic API key'},
|
|
166
168
|
'bedrock': {'api_key': 'AWS Bedrock API key'},
|
pixeltable/dataframe.py
CHANGED
|
@@ -456,6 +456,7 @@ class DataFrame:
|
|
|
456
456
|
|
|
457
457
|
@property
|
|
458
458
|
def schema(self) -> dict[str, ColumnType]:
|
|
459
|
+
"""Column names and types in this DataFrame."""
|
|
459
460
|
return self._schema
|
|
460
461
|
|
|
461
462
|
def bind(self, args: dict[str, Any]) -> DataFrame:
|
|
@@ -1276,10 +1277,11 @@ class DataFrame:
|
|
|
1276
1277
|
|
|
1277
1278
|
# TODO: Reconcile these with Table.__check_mutable()
|
|
1278
1279
|
assert len(self._from_clause.tbls) == 1
|
|
1279
|
-
if
|
|
1280
|
-
raise excs.Error(f'Cannot use `{op_name}` on a snapshot.')
|
|
1280
|
+
# First check if it's a replica, since every replica handle is also a snapshot
|
|
1281
1281
|
if self._first_tbl.is_replica():
|
|
1282
1282
|
raise excs.Error(f'Cannot use `{op_name}` on a replica.')
|
|
1283
|
+
if self._first_tbl.is_snapshot():
|
|
1284
|
+
raise excs.Error(f'Cannot use `{op_name}` on a snapshot.')
|
|
1283
1285
|
|
|
1284
1286
|
def _validate_mutable_op_sequence(self, op_name: str, allow_select: bool) -> None:
|
|
1285
1287
|
"""Tests whether the sequence of operations on this DataFrame is valid for a mutation operation."""
|
pixeltable/env.py
CHANGED
|
@@ -28,6 +28,7 @@ import nest_asyncio # type: ignore[import-untyped]
|
|
|
28
28
|
import pixeltable_pgserver
|
|
29
29
|
import sqlalchemy as sql
|
|
30
30
|
from pillow_heif import register_heif_opener # type: ignore[import-untyped]
|
|
31
|
+
from sqlalchemy import orm
|
|
31
32
|
from tenacity import retry, stop_after_attempt, wait_exponential_jitter
|
|
32
33
|
from tqdm import TqdmWarning
|
|
33
34
|
|
|
@@ -36,6 +37,7 @@ from pixeltable.config import Config
|
|
|
36
37
|
from pixeltable.utils.console_output import ConsoleLogger, ConsoleMessageFilter, ConsoleOutputHandler, map_level
|
|
37
38
|
from pixeltable.utils.dbms import CockroachDbms, Dbms, PostgresqlDbms
|
|
38
39
|
from pixeltable.utils.http_server import make_server
|
|
40
|
+
from pixeltable.utils.object_stores import ObjectPath, StorageObjectAddress
|
|
39
41
|
|
|
40
42
|
if TYPE_CHECKING:
|
|
41
43
|
import spacy
|
|
@@ -58,7 +60,8 @@ class Env:
|
|
|
58
60
|
_log_fmt_str = '%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d: %(message)s'
|
|
59
61
|
|
|
60
62
|
_media_dir: Optional[Path]
|
|
61
|
-
|
|
63
|
+
_object_soa: Optional[StorageObjectAddress]
|
|
64
|
+
_file_cache_dir: Optional[Path] # cached object files with external URL
|
|
62
65
|
_dataset_cache_dir: Optional[Path] # cached datasets (eg, pytorch or COCO)
|
|
63
66
|
_log_dir: Optional[Path] # log files
|
|
64
67
|
_tmp_dir: Optional[Path] # any tmp files
|
|
@@ -88,7 +91,7 @@ class Env:
|
|
|
88
91
|
|
|
89
92
|
_resource_pool_info: dict[str, Any]
|
|
90
93
|
_current_conn: Optional[sql.Connection]
|
|
91
|
-
_current_session: Optional[
|
|
94
|
+
_current_session: Optional[orm.Session]
|
|
92
95
|
_current_isolation_level: Optional[Literal['REPEATABLE_READ', 'SERIALIZABLE']]
|
|
93
96
|
_dbms: Optional[Dbms]
|
|
94
97
|
_event_loop: Optional[asyncio.AbstractEventLoop] # event loop for ExecNode
|
|
@@ -120,7 +123,8 @@ class Env:
|
|
|
120
123
|
assert self._instance is None, 'Env is a singleton; use Env.get() to access the instance'
|
|
121
124
|
|
|
122
125
|
self._media_dir = None # computed media files
|
|
123
|
-
self.
|
|
126
|
+
self._object_soa = None # computed object files in StorageObjectAddress format
|
|
127
|
+
self._file_cache_dir = None # cached object files with external URL
|
|
124
128
|
self._dataset_cache_dir = None # cached datasets (eg, pytorch or COCO)
|
|
125
129
|
self._log_dir = None # log files
|
|
126
130
|
self._tmp_dir = None # any tmp files
|
|
@@ -224,7 +228,7 @@ class Env:
|
|
|
224
228
|
return self._current_conn
|
|
225
229
|
|
|
226
230
|
@property
|
|
227
|
-
def session(self) -> Optional[
|
|
231
|
+
def session(self) -> Optional[orm.Session]:
|
|
228
232
|
assert self._current_session is not None
|
|
229
233
|
return self._current_session
|
|
230
234
|
|
|
@@ -258,7 +262,7 @@ class Env:
|
|
|
258
262
|
self._current_isolation_level = 'SERIALIZABLE'
|
|
259
263
|
with (
|
|
260
264
|
self.engine.connect().execution_options(isolation_level=self._current_isolation_level) as conn,
|
|
261
|
-
|
|
265
|
+
orm.Session(conn) as session,
|
|
262
266
|
conn.begin(),
|
|
263
267
|
):
|
|
264
268
|
self._current_conn = conn
|
|
@@ -363,6 +367,7 @@ class Env:
|
|
|
363
367
|
|
|
364
368
|
if not self._media_dir.exists():
|
|
365
369
|
self._media_dir.mkdir()
|
|
370
|
+
self._object_soa = ObjectPath.parse_object_storage_addr(str(self._media_dir), may_contain_object_name=False)
|
|
366
371
|
if not self._file_cache_dir.exists():
|
|
367
372
|
self._file_cache_dir.mkdir()
|
|
368
373
|
if not self._dataset_cache_dir.exists():
|
|
@@ -615,15 +620,17 @@ class Env:
|
|
|
615
620
|
Args:
|
|
616
621
|
- name: The name of the client
|
|
617
622
|
"""
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
+
# Return the existing client if it has already been constructed
|
|
624
|
+
with _registered_clients_lock:
|
|
625
|
+
cl = _registered_clients[name]
|
|
626
|
+
if cl.client_obj is not None:
|
|
627
|
+
return cl.client_obj # Already initialized
|
|
623
628
|
|
|
629
|
+
# Retrieve parameters required to construct the requested client.
|
|
624
630
|
init_kwargs: dict[str, Any] = {}
|
|
625
631
|
for param in cl.params.values():
|
|
626
632
|
# Determine the type of the parameter for proper config parsing.
|
|
633
|
+
pname = param.name
|
|
627
634
|
t = param.annotation
|
|
628
635
|
# Deference Optional[T]
|
|
629
636
|
if typing.get_origin(t) in (typing.Union, types.UnionType):
|
|
@@ -633,27 +640,31 @@ class Env:
|
|
|
633
640
|
elif args[1] is type(None):
|
|
634
641
|
t = args[0]
|
|
635
642
|
assert isinstance(t, type), t
|
|
636
|
-
arg: Any = Config.get().get_value(
|
|
643
|
+
arg: Any = Config.get().get_value(pname, t, section=name)
|
|
637
644
|
if arg is not None:
|
|
638
|
-
init_kwargs[
|
|
645
|
+
init_kwargs[pname] = arg
|
|
639
646
|
elif param.default is inspect.Parameter.empty:
|
|
640
647
|
raise excs.Error(
|
|
641
|
-
f'`{name}` client not initialized: parameter `{
|
|
642
|
-
f'To fix this, specify the `{name.upper()}_{
|
|
643
|
-
f'or put `{
|
|
648
|
+
f'`{name}` client not initialized: parameter `{pname}` is not configured.\n'
|
|
649
|
+
f'To fix this, specify the `{name.upper()}_{pname.upper()}` environment variable, '
|
|
650
|
+
f'or put `{pname.lower()}` in the `{name.lower()}` section of $PIXELTABLE_HOME/config.toml.'
|
|
644
651
|
)
|
|
645
652
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
653
|
+
# Construct the requested client
|
|
654
|
+
with _registered_clients_lock:
|
|
655
|
+
if cl.client_obj is not None:
|
|
656
|
+
return cl.client_obj # Already initialized
|
|
657
|
+
cl.client_obj = cl.init_fn(**init_kwargs)
|
|
658
|
+
self._logger.info(f'Initialized `{name}` client with parameters: {init_kwargs}.')
|
|
659
|
+
return cl.client_obj
|
|
649
660
|
|
|
650
661
|
def _start_web_server(self) -> None:
|
|
651
662
|
"""
|
|
652
663
|
The http server root is the file system root.
|
|
653
664
|
eg: /home/media/foo.mp4 is located at http://127.0.0.1:{port}/home/media/foo.mp4
|
|
654
665
|
On Windows, the server will translate paths like http://127.0.0.1:{port}/c:/media/foo.mp4
|
|
655
|
-
This arrangement enables serving
|
|
656
|
-
as well as external
|
|
666
|
+
This arrangement enables serving objects hosted within _home,
|
|
667
|
+
as well as external objects inserted into pixeltable or produced by pixeltable.
|
|
657
668
|
The port is chosen dynamically to prevent conflicts.
|
|
658
669
|
"""
|
|
659
670
|
# Port 0 means OS picks one for us.
|
|
@@ -713,10 +724,12 @@ class Env:
|
|
|
713
724
|
def __register_packages(self) -> None:
|
|
714
725
|
"""Declare optional packages that are utilized by some parts of the code."""
|
|
715
726
|
self.__register_package('anthropic')
|
|
727
|
+
self.__register_package('azure.storage.blob', library_name='azure-storage-blob')
|
|
716
728
|
self.__register_package('boto3')
|
|
717
729
|
self.__register_package('datasets')
|
|
718
730
|
self.__register_package('fiftyone')
|
|
719
731
|
self.__register_package('fireworks', library_name='fireworks-ai')
|
|
732
|
+
self.__register_package('google.cloud.storage', library_name='google-cloud-storage')
|
|
720
733
|
self.__register_package('google.genai', library_name='google-genai')
|
|
721
734
|
self.__register_package('groq')
|
|
722
735
|
self.__register_package('huggingface_hub', library_name='huggingface-hub')
|
|
@@ -757,6 +770,10 @@ class Env:
|
|
|
757
770
|
library_name=library_name or package_name, # defaults to package_name unless specified otherwise
|
|
758
771
|
)
|
|
759
772
|
|
|
773
|
+
def require_binary(self, binary_name: str) -> None:
|
|
774
|
+
if not shutil.which(binary_name):
|
|
775
|
+
raise excs.Error(f'{binary_name} is not installed or not in PATH. Please install it to use this feature.')
|
|
776
|
+
|
|
760
777
|
def require_package(self, package_name: str, min_version: Optional[list[int]] = None) -> None:
|
|
761
778
|
"""
|
|
762
779
|
Checks whether the specified optional package is available. If not, raises an exception
|
|
@@ -815,6 +832,12 @@ class Env:
|
|
|
815
832
|
assert self._media_dir is not None
|
|
816
833
|
return self._media_dir
|
|
817
834
|
|
|
835
|
+
@property
|
|
836
|
+
def object_soa(self) -> StorageObjectAddress:
|
|
837
|
+
assert self._media_dir is not None
|
|
838
|
+
assert self._object_soa is not None
|
|
839
|
+
return self._object_soa
|
|
840
|
+
|
|
818
841
|
@property
|
|
819
842
|
def file_cache_dir(self) -> Path:
|
|
820
843
|
assert self._file_cache_dir is not None
|
|
@@ -947,11 +970,13 @@ def register_client(name: str) -> Callable:
|
|
|
947
970
|
def decorator(fn: Callable) -> None:
|
|
948
971
|
sig = inspect.signature(fn)
|
|
949
972
|
params = dict(sig.parameters)
|
|
950
|
-
|
|
973
|
+
with _registered_clients_lock:
|
|
974
|
+
_registered_clients[name] = ApiClient(init_fn=fn, params=params)
|
|
951
975
|
|
|
952
976
|
return decorator
|
|
953
977
|
|
|
954
978
|
|
|
979
|
+
_registered_clients_lock: threading.Lock = threading.Lock()
|
|
955
980
|
_registered_clients: dict[str, ApiClient] = {}
|
|
956
981
|
|
|
957
982
|
|
pixeltable/exec/__init__.py
CHANGED
|
@@ -8,5 +8,6 @@ from .exec_context import ExecContext
|
|
|
8
8
|
from .exec_node import ExecNode
|
|
9
9
|
from .expr_eval import ExprEvalNode
|
|
10
10
|
from .in_memory_data_node import InMemoryDataNode
|
|
11
|
+
from .object_store_save_node import ObjectStoreSaveNode
|
|
11
12
|
from .row_update_node import RowUpdateNode
|
|
12
13
|
from .sql_node import SqlAggregationNode, SqlJoinNode, SqlLookupNode, SqlNode, SqlSampleNode, SqlScanNode
|
|
@@ -103,6 +103,5 @@ class AggregationNode(ExecNode):
|
|
|
103
103
|
self.row_builder.eval(prev_row, self.agg_fn_eval_ctx, profile=self.ctx.profile)
|
|
104
104
|
self.output_batch.add_row(prev_row)
|
|
105
105
|
|
|
106
|
-
self.output_batch.flush_imgs(None, self.row_builder.stored_img_cols, self.flushed_img_slots)
|
|
107
106
|
_logger.debug(f'AggregateNode: consumed {num_input_rows} rows, returning {len(self.output_batch.rows)} rows')
|
|
108
107
|
yield self.output_batch
|