pixeltable 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +63 -36
- pixeltable/catalog/column.py +11 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +28 -14
- pixeltable/catalog/insertable_table.py +81 -43
- pixeltable/catalog/path.py +2 -2
- pixeltable/catalog/table.py +140 -109
- pixeltable/catalog/table_version.py +60 -43
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +17 -9
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +109 -43
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +2 -3
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -17
- pixeltable/exprs/__init__.py +3 -2
- pixeltable/exprs/arithmetic_expr.py +2 -0
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +39 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +51 -21
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/globals.py +12 -0
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +3 -10
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +6 -21
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +22 -65
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -5
- pixeltable/func/expr_template_function.py +22 -2
- pixeltable/func/function.py +4 -5
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +2 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +22 -11
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +85 -33
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +8 -5
- pixeltable/io/datarows.py +138 -0
- pixeltable/io/external_store.py +8 -5
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/globals.py +7 -160
- pixeltable/io/hf_datasets.py +21 -98
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +35 -48
- pixeltable/io/parquet.py +17 -42
- pixeltable/io/table_data_conduit.py +569 -0
- pixeltable/io/utils.py +6 -21
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_30.py +50 -0
- pixeltable/metadata/converters/util.py +26 -1
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +3 -0
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +19 -7
- pixeltable/utils/arrow.py +32 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.11.dist-info/METADATA +436 -0
- pixeltable-0.3.11.dist-info/RECORD +179 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +1 -1
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.9.dist-info/METADATA +0 -382
- pixeltable-0.3.9.dist-info/RECORD +0 -175
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
pixeltable/env.py
CHANGED
|
@@ -30,6 +30,7 @@ from tqdm import TqdmWarning
|
|
|
30
30
|
from pixeltable import exceptions as excs
|
|
31
31
|
from pixeltable.config import Config
|
|
32
32
|
from pixeltable.utils.console_output import ConsoleLogger, ConsoleMessageFilter, ConsoleOutputHandler, map_level
|
|
33
|
+
from pixeltable.utils.dbms import CockroachDbms, Dbms, PostgresqlDbms
|
|
33
34
|
from pixeltable.utils.http_server import make_server
|
|
34
35
|
|
|
35
36
|
if TYPE_CHECKING:
|
|
@@ -43,7 +44,9 @@ T = TypeVar('T')
|
|
|
43
44
|
|
|
44
45
|
class Env:
|
|
45
46
|
"""
|
|
46
|
-
Store for
|
|
47
|
+
Store runtime globals for both local and non-local environments.
|
|
48
|
+
For a local environment, Pixeltable uses an embedded PostgreSQL server that runs locally in a separate process.
|
|
49
|
+
For a non-local environment, Pixeltable uses a connection string to the externally managed database.
|
|
47
50
|
"""
|
|
48
51
|
|
|
49
52
|
_instance: Optional[Env] = None
|
|
@@ -58,7 +61,7 @@ class Env:
|
|
|
58
61
|
_sa_engine: Optional[sql.engine.base.Engine]
|
|
59
62
|
_pgdata_dir: Optional[Path]
|
|
60
63
|
_db_name: Optional[str]
|
|
61
|
-
_db_server: Optional[pixeltable_pgserver.PostgresServer]
|
|
64
|
+
_db_server: Optional[pixeltable_pgserver.PostgresServer] # set only when running in local environment
|
|
62
65
|
_db_url: Optional[str]
|
|
63
66
|
_default_time_zone: Optional[ZoneInfo]
|
|
64
67
|
|
|
@@ -81,6 +84,7 @@ class Env:
|
|
|
81
84
|
_resource_pool_info: dict[str, Any]
|
|
82
85
|
_current_conn: Optional[sql.Connection]
|
|
83
86
|
_current_session: Optional[sql.orm.Session]
|
|
87
|
+
_dbms: Optional[Dbms]
|
|
84
88
|
|
|
85
89
|
@classmethod
|
|
86
90
|
def get(cls) -> Env:
|
|
@@ -98,7 +102,7 @@ class Env:
|
|
|
98
102
|
cls._instance = env
|
|
99
103
|
cls.__initializing = False
|
|
100
104
|
|
|
101
|
-
def __init__(self):
|
|
105
|
+
def __init__(self) -> None:
|
|
102
106
|
assert self._instance is None, 'Env is a singleton; use Env.get() to access the instance'
|
|
103
107
|
|
|
104
108
|
self._media_dir = None # computed media files
|
|
@@ -112,7 +116,6 @@ class Env:
|
|
|
112
116
|
self._db_server = None
|
|
113
117
|
self._db_url = None
|
|
114
118
|
self._default_time_zone = None
|
|
115
|
-
|
|
116
119
|
self.__optional_packages = {}
|
|
117
120
|
self._spacy_nlp = None
|
|
118
121
|
self._httpd = None
|
|
@@ -136,6 +139,7 @@ class Env:
|
|
|
136
139
|
self._resource_pool_info = {}
|
|
137
140
|
self._current_conn = None
|
|
138
141
|
self._current_session = None
|
|
142
|
+
self._dbms = None
|
|
139
143
|
|
|
140
144
|
@property
|
|
141
145
|
def db_url(self) -> str:
|
|
@@ -147,6 +151,18 @@ class Env:
|
|
|
147
151
|
assert self._http_address is not None
|
|
148
152
|
return self._http_address
|
|
149
153
|
|
|
154
|
+
@property
|
|
155
|
+
def user(self) -> Optional[str]:
|
|
156
|
+
return Config.get().get_string_value('user')
|
|
157
|
+
|
|
158
|
+
@user.setter
|
|
159
|
+
def user(self, user: Optional[str]) -> None:
|
|
160
|
+
if user is None:
|
|
161
|
+
if 'PIXELTABLE_USER' in os.environ:
|
|
162
|
+
del os.environ['PIXELTABLE_USER']
|
|
163
|
+
else:
|
|
164
|
+
os.environ['PIXELTABLE_USER'] = user
|
|
165
|
+
|
|
150
166
|
@property
|
|
151
167
|
def default_time_zone(self) -> Optional[ZoneInfo]:
|
|
152
168
|
return self._default_time_zone
|
|
@@ -170,9 +186,19 @@ class Env:
|
|
|
170
186
|
assert self._current_session is not None
|
|
171
187
|
return self._current_session
|
|
172
188
|
|
|
189
|
+
@property
|
|
190
|
+
def dbms(self) -> Optional[Dbms]:
|
|
191
|
+
assert self._dbms is not None
|
|
192
|
+
return self._dbms
|
|
193
|
+
|
|
173
194
|
def in_xact(self) -> bool:
|
|
174
195
|
return self._current_conn is not None
|
|
175
196
|
|
|
197
|
+
@property
|
|
198
|
+
def is_local(self) -> bool:
|
|
199
|
+
assert self._db_url is not None # is_local should be called only after db initialization
|
|
200
|
+
return self._db_server is not None
|
|
201
|
+
|
|
176
202
|
@contextmanager
|
|
177
203
|
def begin_xact(self) -> Iterator[sql.Connection]:
|
|
178
204
|
"""Return a context manager that yields a connection to the database. Idempotent."""
|
|
@@ -346,16 +372,13 @@ class Env:
|
|
|
346
372
|
|
|
347
373
|
self.clear_tmp_dir()
|
|
348
374
|
|
|
349
|
-
|
|
350
|
-
self.
|
|
375
|
+
# configure pixeltable database
|
|
376
|
+
self._init_db(config)
|
|
351
377
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
cleanup_mode = 'stop' if platform.system() == 'Windows' else None
|
|
357
|
-
self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=cleanup_mode)
|
|
358
|
-
self._db_url = self._db_server.get_uri(database=self._db_name, driver='psycopg')
|
|
378
|
+
if reinit_db and not self.is_local:
|
|
379
|
+
raise excs.Error(
|
|
380
|
+
'Reinitializing pixeltable database is not supported when running in non-local environment'
|
|
381
|
+
)
|
|
359
382
|
|
|
360
383
|
tz_name = config.get_string_value('time_zone')
|
|
361
384
|
if tz_name is not None:
|
|
@@ -382,11 +405,8 @@ class Env:
|
|
|
382
405
|
# Create the SQLAlchemy engine. This will also set the default time zone.
|
|
383
406
|
self._create_engine(time_zone_name=tz_name, echo=echo)
|
|
384
407
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
metadata.schema.base_metadata.create_all(self._sa_engine)
|
|
389
|
-
metadata.create_system_info(self._sa_engine)
|
|
408
|
+
# Create catalog tables and system metadata
|
|
409
|
+
self._init_metadata()
|
|
390
410
|
|
|
391
411
|
self.console_logger.info(f'Connected to Pixeltable database at: {self.db_url}')
|
|
392
412
|
|
|
@@ -394,12 +414,65 @@ class Env:
|
|
|
394
414
|
self._set_up_runtime()
|
|
395
415
|
self.log_to_stdout(False)
|
|
396
416
|
|
|
417
|
+
def _init_db(self, config: Config) -> None:
|
|
418
|
+
"""
|
|
419
|
+
Initialize the pixeltable database along with its associated DBMS.
|
|
420
|
+
"""
|
|
421
|
+
db_connect_str = config.get_string_value('DB_CONNECT_STR')
|
|
422
|
+
if db_connect_str is not None:
|
|
423
|
+
try:
|
|
424
|
+
db_url = sql.make_url(db_connect_str)
|
|
425
|
+
except sql.exc.ArgumentError as e:
|
|
426
|
+
error = f'Invalid db connection string {db_connect_str}: {e}'
|
|
427
|
+
self._logger.error(error)
|
|
428
|
+
raise excs.Error(error) from e
|
|
429
|
+
self._db_url = db_url.render_as_string(hide_password=False)
|
|
430
|
+
self._db_name = db_url.database # use the dbname given in connect string
|
|
431
|
+
dialect = db_url.get_dialect().name
|
|
432
|
+
if dialect == 'cockroachdb':
|
|
433
|
+
self._dbms = CockroachDbms(db_url)
|
|
434
|
+
else:
|
|
435
|
+
raise excs.Error(f'Unsupported DBMS {dialect}')
|
|
436
|
+
# Check if database exists
|
|
437
|
+
if not self._store_db_exists():
|
|
438
|
+
error = f'Database {self._db_name!r} does not exist'
|
|
439
|
+
self._logger.error(error)
|
|
440
|
+
raise excs.Error(error)
|
|
441
|
+
self._logger.info(f'Using database at: {self.db_url}')
|
|
442
|
+
else:
|
|
443
|
+
self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
|
|
444
|
+
self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(Config.get().home / 'pgdata')))
|
|
445
|
+
# cleanup_mode=None will leave the postgres process running after Python exits
|
|
446
|
+
# cleanup_mode='stop' will terminate the postgres process when Python exits
|
|
447
|
+
# On Windows, we need cleanup_mode='stop' because child processes are killed automatically when the parent
|
|
448
|
+
# process (such as Terminal or VSCode) exits, potentially leaving it in an unusable state.
|
|
449
|
+
cleanup_mode = 'stop' if platform.system() == 'Windows' else None
|
|
450
|
+
self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=cleanup_mode)
|
|
451
|
+
self._db_url = self._db_server.get_uri(database=self._db_name, driver='psycopg')
|
|
452
|
+
self._dbms = PostgresqlDbms(sql.make_url(self._db_url))
|
|
453
|
+
assert self._dbms is not None
|
|
454
|
+
assert self._db_url is not None
|
|
455
|
+
assert self._db_name is not None
|
|
456
|
+
|
|
457
|
+
def _init_metadata(self) -> None:
|
|
458
|
+
"""
|
|
459
|
+
Create pixeltable metadata tables and system metadata.
|
|
460
|
+
This is an idempotent operation.
|
|
461
|
+
"""
|
|
462
|
+
assert self._sa_engine is not None
|
|
463
|
+
from pixeltable import metadata
|
|
464
|
+
|
|
465
|
+
metadata.schema.base_metadata.create_all(self._sa_engine, checkfirst=True)
|
|
466
|
+
metadata.create_system_info(self._sa_engine)
|
|
467
|
+
|
|
397
468
|
def _create_engine(self, time_zone_name: Optional[str], echo: bool = False) -> None:
|
|
398
469
|
connect_args = {} if time_zone_name is None else {'options': f'-c timezone={time_zone_name}'}
|
|
399
470
|
self._sa_engine = sql.create_engine(
|
|
400
|
-
self.db_url, echo=echo, isolation_level=
|
|
471
|
+
self.db_url, echo=echo, isolation_level=self._dbms.transaction_isolation_level, connect_args=connect_args
|
|
401
472
|
)
|
|
473
|
+
|
|
402
474
|
self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
|
|
475
|
+
|
|
403
476
|
with self.engine.begin() as conn:
|
|
404
477
|
tz_name = conn.execute(sql.text('SHOW TIME ZONE')).scalar()
|
|
405
478
|
assert isinstance(tz_name, str)
|
|
@@ -409,8 +482,7 @@ class Env:
|
|
|
409
482
|
def _store_db_exists(self) -> bool:
|
|
410
483
|
assert self._db_name is not None
|
|
411
484
|
# don't try to connect to self.db_name, it may not exist
|
|
412
|
-
|
|
413
|
-
engine = sql.create_engine(db_url, future=True)
|
|
485
|
+
engine = sql.create_engine(self._dbms.default_system_db_url(), future=True)
|
|
414
486
|
try:
|
|
415
487
|
with engine.begin() as conn:
|
|
416
488
|
stmt = f"SELECT COUNT(*) FROM pg_database WHERE datname = '{self._db_name}'"
|
|
@@ -423,23 +495,17 @@ class Env:
|
|
|
423
495
|
def _create_store_db(self) -> None:
|
|
424
496
|
assert self._db_name is not None
|
|
425
497
|
# create the db
|
|
426
|
-
|
|
427
|
-
engine = sql.create_engine(pg_db_url, future=True, isolation_level='AUTOCOMMIT')
|
|
498
|
+
engine = sql.create_engine(self._dbms.default_system_db_url(), future=True, isolation_level='AUTOCOMMIT')
|
|
428
499
|
preparer = engine.dialect.identifier_preparer
|
|
429
500
|
try:
|
|
430
501
|
with engine.begin() as conn:
|
|
431
|
-
|
|
432
|
-
stmt = (
|
|
433
|
-
f'CREATE DATABASE {preparer.quote(self._db_name)} '
|
|
434
|
-
"ENCODING 'utf-8' LC_COLLATE 'C' LC_CTYPE 'C' TEMPLATE template0"
|
|
435
|
-
)
|
|
502
|
+
stmt = self._dbms.create_db_stmt(preparer.quote(self._db_name))
|
|
436
503
|
conn.execute(sql.text(stmt))
|
|
437
504
|
finally:
|
|
438
505
|
engine.dispose()
|
|
439
506
|
|
|
440
507
|
# enable pgvector
|
|
441
|
-
|
|
442
|
-
engine = sql.create_engine(store_db_url, future=True, isolation_level='AUTOCOMMIT')
|
|
508
|
+
engine = sql.create_engine(self.db_url, future=True, isolation_level='AUTOCOMMIT')
|
|
443
509
|
try:
|
|
444
510
|
with engine.begin() as conn:
|
|
445
511
|
conn.execute(sql.text('CREATE EXTENSION vector'))
|
|
@@ -448,21 +514,21 @@ class Env:
|
|
|
448
514
|
|
|
449
515
|
def _drop_store_db(self) -> None:
|
|
450
516
|
assert self._db_name is not None
|
|
451
|
-
|
|
452
|
-
engine = sql.create_engine(db_url, future=True, isolation_level='AUTOCOMMIT')
|
|
517
|
+
engine = sql.create_engine(self._dbms.default_system_db_url(), future=True, isolation_level='AUTOCOMMIT')
|
|
453
518
|
preparer = engine.dialect.identifier_preparer
|
|
454
519
|
try:
|
|
455
520
|
with engine.begin() as conn:
|
|
456
521
|
# terminate active connections
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
522
|
+
if self._db_server is not None:
|
|
523
|
+
stmt = f"""
|
|
524
|
+
SELECT pg_terminate_backend(pg_stat_activity.pid)
|
|
525
|
+
FROM pg_stat_activity
|
|
526
|
+
WHERE pg_stat_activity.datname = '{self._db_name}'
|
|
527
|
+
AND pid <> pg_backend_pid()
|
|
528
|
+
"""
|
|
529
|
+
conn.execute(sql.text(stmt))
|
|
464
530
|
# drop db
|
|
465
|
-
stmt =
|
|
531
|
+
stmt = self._dbms.drop_db_stmt(preparer.quote(self._db_name))
|
|
466
532
|
conn.execute(sql.text(stmt))
|
|
467
533
|
finally:
|
|
468
534
|
engine.dispose()
|
|
@@ -524,7 +590,7 @@ class Env:
|
|
|
524
590
|
port = self._httpd.server_address[1]
|
|
525
591
|
self._http_address = f'http://127.0.0.1:{port}'
|
|
526
592
|
|
|
527
|
-
def run_server():
|
|
593
|
+
def run_server() -> None:
|
|
528
594
|
logging.log(logging.INFO, f'running web server at {self._http_address}')
|
|
529
595
|
self._httpd.serve_forever()
|
|
530
596
|
|
|
@@ -567,7 +633,7 @@ class Env:
|
|
|
567
633
|
self.__register_package('transformers')
|
|
568
634
|
self.__register_package('whisper', library_name='openai-whisper')
|
|
569
635
|
self.__register_package('whisperx')
|
|
570
|
-
self.__register_package('yolox', library_name='
|
|
636
|
+
self.__register_package('yolox', library_name='pixeltable-yolox')
|
|
571
637
|
|
|
572
638
|
def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
|
|
573
639
|
is_installed: bool
|
|
@@ -779,7 +845,7 @@ class RateLimitsInfo:
|
|
|
779
845
|
def reset(self) -> None:
|
|
780
846
|
self.resource_limits.clear()
|
|
781
847
|
|
|
782
|
-
def record(self, **kwargs) -> None:
|
|
848
|
+
def record(self, **kwargs: Any) -> None:
|
|
783
849
|
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
784
850
|
if len(self.resource_limits) == 0:
|
|
785
851
|
self.resource_limits = {k: RateLimitInfo(k, now, *v) for k, v in kwargs.items() if v is not None}
|
pixeltable/exec/__init__.py
CHANGED
|
@@ -2,11 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import sys
|
|
5
|
-
from typing import Any, AsyncIterator, Iterable,
|
|
5
|
+
from typing import Any, AsyncIterator, Iterable, Optional, cast
|
|
6
6
|
|
|
7
|
-
import
|
|
8
|
-
import pixeltable.exceptions as excs
|
|
9
|
-
import pixeltable.exprs as exprs
|
|
7
|
+
from pixeltable import catalog, exceptions as excs, exprs
|
|
10
8
|
|
|
11
9
|
from .data_row_batch import DataRowBatch
|
|
12
10
|
from .exec_node import ExecNode
|
|
@@ -52,20 +50,20 @@ class AggregationNode(ExecNode):
|
|
|
52
50
|
for fn_call in self.agg_fn_calls:
|
|
53
51
|
try:
|
|
54
52
|
fn_call.reset_agg()
|
|
55
|
-
except Exception as
|
|
53
|
+
except Exception as exc:
|
|
56
54
|
_, _, exc_tb = sys.exc_info()
|
|
57
55
|
expr_msg = f'init() function of the aggregate {fn_call}'
|
|
58
|
-
raise excs.ExprEvalError(fn_call, expr_msg,
|
|
56
|
+
raise excs.ExprEvalError(fn_call, expr_msg, exc, exc_tb, [], row_num) from exc
|
|
59
57
|
|
|
60
58
|
def _update_agg_state(self, row: exprs.DataRow, row_num: int) -> None:
|
|
61
59
|
for fn_call in self.agg_fn_calls:
|
|
62
60
|
try:
|
|
63
61
|
fn_call.update(row)
|
|
64
|
-
except Exception as
|
|
62
|
+
except Exception as exc:
|
|
65
63
|
_, _, exc_tb = sys.exc_info()
|
|
66
64
|
expr_msg = f'update() function of the aggregate {fn_call}'
|
|
67
65
|
input_vals = [row[d.slot_idx] for d in fn_call.dependencies()]
|
|
68
|
-
raise excs.ExprEvalError(fn_call, expr_msg,
|
|
66
|
+
raise excs.ExprEvalError(fn_call, expr_msg, exc, exc_tb, input_vals, row_num) from exc
|
|
69
67
|
|
|
70
68
|
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
|
71
69
|
prev_row: Optional[exprs.DataRow] = None
|
|
@@ -12,10 +12,7 @@ from pathlib import Path
|
|
|
12
12
|
from typing import Any, AsyncIterator, Iterator, Optional
|
|
13
13
|
from uuid import UUID
|
|
14
14
|
|
|
15
|
-
import
|
|
16
|
-
import pixeltable.exceptions as excs
|
|
17
|
-
import pixeltable.exprs as exprs
|
|
18
|
-
from pixeltable import catalog
|
|
15
|
+
from pixeltable import catalog, env, exceptions as excs, exprs
|
|
19
16
|
from pixeltable.utils.filecache import FileCache
|
|
20
17
|
|
|
21
18
|
from .data_row_batch import DataRowBatch
|
|
@@ -234,7 +231,7 @@ class CachePrefetchNode(ExecNode):
|
|
|
234
231
|
assert len(parsed.scheme) > 1 and parsed.scheme != 'file'
|
|
235
232
|
# preserve the file extension, if there is one
|
|
236
233
|
extension = ''
|
|
237
|
-
if parsed.path
|
|
234
|
+
if parsed.path:
|
|
238
235
|
p = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed.path)))
|
|
239
236
|
extension = p.suffix
|
|
240
237
|
tmp_path = env.Env.get().create_tmp_path(extension=extension)
|
|
@@ -253,12 +250,12 @@ class CachePrefetchNode(ExecNode):
|
|
|
253
250
|
}
|
|
254
251
|
self.boto_client = get_client(**config)
|
|
255
252
|
self.boto_client.download_file(parsed.netloc, parsed.path.lstrip('/'), str(tmp_path))
|
|
256
|
-
elif parsed.scheme
|
|
253
|
+
elif parsed.scheme in ('http', 'https'):
|
|
257
254
|
with urllib.request.urlopen(url) as resp, open(tmp_path, 'wb') as f:
|
|
258
255
|
data = resp.read()
|
|
259
256
|
f.write(data)
|
|
260
257
|
else:
|
|
261
|
-
|
|
258
|
+
raise AssertionError(f'Unsupported URL scheme: {parsed.scheme}')
|
|
262
259
|
_logger.debug(f'Downloaded {url} to {tmp_path}')
|
|
263
260
|
return tmp_path, None
|
|
264
261
|
except Exception as e:
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
from typing import AsyncIterator
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import pixeltable.exceptions as excs
|
|
5
|
-
import pixeltable.exprs as exprs
|
|
3
|
+
from pixeltable import catalog, exceptions as excs, exprs
|
|
6
4
|
|
|
7
5
|
from .data_row_batch import DataRowBatch
|
|
8
6
|
from .exec_node import ExecNode
|
|
@@ -3,8 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
from typing import Iterator, Optional
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
import pixeltable.exprs as exprs
|
|
6
|
+
from pixeltable import catalog, exprs
|
|
8
7
|
from pixeltable.utils.media_store import MediaStore
|
|
9
8
|
|
|
10
9
|
_logger = logging.getLogger('pixeltable')
|
pixeltable/exec/exec_context.py
CHANGED
pixeltable/exec/exec_node.py
CHANGED
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import asyncio
|
|
5
5
|
import logging
|
|
6
|
-
import sys
|
|
7
6
|
from typing import AsyncIterator, Iterable, Iterator, Optional, TypeVar
|
|
8
7
|
|
|
9
8
|
from pixeltable import exprs
|
|
@@ -70,12 +69,12 @@ class ExecNode(abc.ABC):
|
|
|
70
69
|
|
|
71
70
|
nest_asyncio.apply()
|
|
72
71
|
loop = running_loop
|
|
73
|
-
_logger.debug(
|
|
72
|
+
_logger.debug('Patched running loop')
|
|
74
73
|
except RuntimeError:
|
|
75
74
|
loop = asyncio.new_event_loop()
|
|
76
75
|
asyncio.set_event_loop(loop)
|
|
77
76
|
|
|
78
|
-
if
|
|
77
|
+
if _logger.isEnabledFor(logging.DEBUG):
|
|
79
78
|
loop.set_debug(True)
|
|
80
79
|
|
|
81
80
|
aiter = self.__aiter__()
|