pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +119 -100
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +118 -122
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +322 -257
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +68 -77
- pixeltable/env.py +74 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +4 -5
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +25 -25
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +18 -20
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +2 -24
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +52 -36
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/video.py +8 -13
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +30 -28
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +125 -61
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +8 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.18.dist-info/RECORD +0 -211
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/store.py
CHANGED
|
@@ -3,8 +3,9 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import logging
|
|
5
5
|
import sys
|
|
6
|
+
import time
|
|
6
7
|
import warnings
|
|
7
|
-
from typing import Any, Iterable, Iterator
|
|
8
|
+
from typing import Any, Iterable, Iterator
|
|
8
9
|
|
|
9
10
|
import more_itertools
|
|
10
11
|
import psycopg
|
|
@@ -33,11 +34,11 @@ class StoreBase:
|
|
|
33
34
|
|
|
34
35
|
tbl_version: catalog.TableVersionHandle
|
|
35
36
|
sa_md: sql.MetaData
|
|
36
|
-
sa_tbl:
|
|
37
|
+
sa_tbl: sql.Table | None
|
|
37
38
|
_pk_cols: list[sql.Column]
|
|
38
39
|
v_min_col: sql.Column
|
|
39
40
|
v_max_col: sql.Column
|
|
40
|
-
base:
|
|
41
|
+
base: StoreBase | None
|
|
41
42
|
|
|
42
43
|
# In my cursory experiments this was the optimal batch size: it was an improvement over 5_000 and there was no real
|
|
43
44
|
# benefit to going higher.
|
|
@@ -79,12 +80,13 @@ class StoreBase:
|
|
|
79
80
|
self._pk_cols = [*rowid_cols, self.v_min_col]
|
|
80
81
|
return [*rowid_cols, self.v_min_col, self.v_max_col]
|
|
81
82
|
|
|
82
|
-
def create_sa_tbl(self, tbl_version:
|
|
83
|
+
def create_sa_tbl(self, tbl_version: catalog.TableVersion | None = None) -> None:
|
|
83
84
|
"""Create self.sa_tbl from self.tbl_version."""
|
|
84
85
|
if tbl_version is None:
|
|
85
86
|
tbl_version = self.tbl_version.get()
|
|
86
87
|
system_cols = self._create_system_columns()
|
|
87
88
|
all_cols = system_cols.copy()
|
|
89
|
+
# we captured all columns, including dropped ones: they're still part of the physical table
|
|
88
90
|
for col in [c for c in tbl_version.cols if c.is_stored]:
|
|
89
91
|
# re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
|
|
90
92
|
# to the last sql.Table version we created and cannot be reused
|
|
@@ -111,7 +113,10 @@ class StoreBase:
|
|
|
111
113
|
idx_name = f'vmax_idx_{tbl_version.id.hex}'
|
|
112
114
|
idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=Env.get().dbms.version_index_type))
|
|
113
115
|
|
|
114
|
-
#
|
|
116
|
+
# we only capture indices visible in this version
|
|
117
|
+
for idx_info in tbl_version.idxs.values():
|
|
118
|
+
idx = idx_info.idx.sa_index(tbl_version._store_idx_name(idx_info.id), idx_info.val_col)
|
|
119
|
+
idxs.append(idx)
|
|
115
120
|
|
|
116
121
|
self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
|
|
117
122
|
# _logger.debug(f'created sa tbl for {tbl_version.id!s} (sa_tbl={id(self.sa_tbl):x}, tv={id(tbl_version):x})')
|
|
@@ -137,35 +142,122 @@ class StoreBase:
|
|
|
137
142
|
assert isinstance(result, int)
|
|
138
143
|
return result
|
|
139
144
|
|
|
145
|
+
def _exec_if_not_exists(self, stmt: str, wait_for_table: bool) -> None:
|
|
146
|
+
"""
|
|
147
|
+
Execute a statement containing 'IF NOT EXISTS' and ignore any duplicate object-related errors.
|
|
148
|
+
|
|
149
|
+
The statement needs to run in a separate transaction, because the expected error conditions will abort the
|
|
150
|
+
enclosing transaction (and the ability to run additional statements in that same transaction).
|
|
151
|
+
"""
|
|
152
|
+
while True:
|
|
153
|
+
with Env.get().begin_xact(for_write=True) as conn:
|
|
154
|
+
try:
|
|
155
|
+
if wait_for_table:
|
|
156
|
+
# Try to lock the table to make sure that it exists. This needs to run in the same transaction
|
|
157
|
+
# as 'stmt' to avoid a race condition.
|
|
158
|
+
# TODO: adapt this for CockroachDB
|
|
159
|
+
lock_stmt = f'LOCK TABLE {self._storage_name()} IN ACCESS EXCLUSIVE MODE'
|
|
160
|
+
conn.execute(sql.text(lock_stmt))
|
|
161
|
+
conn.execute(sql.text(stmt))
|
|
162
|
+
return
|
|
163
|
+
except (sql.exc.IntegrityError, sql.exc.ProgrammingError) as e:
|
|
164
|
+
Env.get().console_logger.info(f'{stmt} failed with: {e}')
|
|
165
|
+
if (
|
|
166
|
+
isinstance(e.orig, psycopg.errors.UniqueViolation)
|
|
167
|
+
and 'duplicate key value violates unique constraint' in str(e.orig)
|
|
168
|
+
) or (
|
|
169
|
+
isinstance(e.orig, (psycopg.errors.DuplicateObject, psycopg.errors.DuplicateTable))
|
|
170
|
+
and 'already exists' in str(e.orig)
|
|
171
|
+
):
|
|
172
|
+
# table already exists
|
|
173
|
+
return
|
|
174
|
+
elif isinstance(e.orig, psycopg.errors.UndefinedTable):
|
|
175
|
+
# the Lock Table failed because the table doesn't exist yet; try again
|
|
176
|
+
time.sleep(1)
|
|
177
|
+
continue
|
|
178
|
+
else:
|
|
179
|
+
raise
|
|
180
|
+
|
|
181
|
+
def _store_tbl_exists(self) -> bool:
|
|
182
|
+
"""Returns True if the store table exists, False otherwise."""
|
|
183
|
+
with Env.get().begin_xact(for_write=False) as conn:
|
|
184
|
+
q = (
|
|
185
|
+
'SELECT COUNT(*) FROM pg_catalog.pg_tables '
|
|
186
|
+
f"WHERE schemaname = 'public' AND tablename = {self._storage_name()!r}"
|
|
187
|
+
)
|
|
188
|
+
res = conn.execute(sql.text(q)).scalar_one()
|
|
189
|
+
return res == 1
|
|
190
|
+
|
|
140
191
|
def create(self) -> None:
|
|
141
|
-
"""
|
|
192
|
+
"""
|
|
193
|
+
Create or update store table to bring it in sync with self.sa_tbl. Idempotent.
|
|
194
|
+
|
|
195
|
+
This runs a sequence of DDL statements (Create Table, Alter Table Add Column, Create Index), each of which
|
|
196
|
+
is run in its own transaction.
|
|
197
|
+
|
|
198
|
+
The exception to that are local replicas, for which TableRestorer creates an enclosing transaction. In theory,
|
|
199
|
+
this should avoid the potential for race conditions that motivate the error handling present in
|
|
200
|
+
_exec_if_not_exists() (meaning: we shouldn't see those errors when creating local replicas).
|
|
201
|
+
TODO: remove the special case for local replicas in order to make the logic easier to reason about.
|
|
202
|
+
"""
|
|
203
|
+
postgres_dialect = sql.dialects.postgresql.dialect()
|
|
204
|
+
|
|
205
|
+
if not self._store_tbl_exists():
|
|
206
|
+
# run Create Table If Not Exists; we always need If Not Exists to avoid race conditions between concurrent
|
|
207
|
+
# Pixeltable processes
|
|
208
|
+
create_stmt = sql.schema.CreateTable(self.sa_tbl, if_not_exists=True).compile(dialect=postgres_dialect)
|
|
209
|
+
self._exec_if_not_exists(str(create_stmt), wait_for_table=False)
|
|
210
|
+
else:
|
|
211
|
+
# ensure that all columns exist by running Alter Table Add Column If Not Exists for all columns
|
|
212
|
+
for col in self.sa_tbl.columns:
|
|
213
|
+
stmt = self._add_column_stmt(col)
|
|
214
|
+
self._exec_if_not_exists(stmt, wait_for_table=True)
|
|
215
|
+
# TODO: do we also need to ensure that these columns are now visible (ie, is there another potential race
|
|
216
|
+
# condition here?)
|
|
217
|
+
|
|
218
|
+
# ensure that all visible indices exist by running Create Index If Not Exists
|
|
219
|
+
for index in self.sa_tbl.indexes:
|
|
220
|
+
create_stmt = sql.schema.CreateIndex(index, if_not_exists=True).compile(dialect=postgres_dialect)
|
|
221
|
+
self._exec_if_not_exists(str(create_stmt), wait_for_table=True)
|
|
222
|
+
|
|
223
|
+
def create_index(self, idx_id: int) -> None:
|
|
224
|
+
"""Create If Not Exists for this index"""
|
|
225
|
+
idx_info = self.tbl_version.get().idxs[idx_id]
|
|
226
|
+
sa_idx = idx_info.idx.sa_index(self.tbl_version.get()._store_idx_name(idx_id), idx_info.val_col)
|
|
142
227
|
conn = Env.get().conn
|
|
143
|
-
stmt = sql.schema.
|
|
228
|
+
stmt = sql.schema.CreateIndex(sa_idx, if_not_exists=True).compile(conn)
|
|
144
229
|
create_stmt = str(stmt)
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
if
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
230
|
+
self._exec_if_not_exists(create_stmt, wait_for_table=True)
|
|
231
|
+
|
|
232
|
+
def validate(self) -> None:
|
|
233
|
+
"""Validate store table against self.table_version"""
|
|
234
|
+
with Env.get().begin_xact() as conn:
|
|
235
|
+
# check that all columns are present
|
|
236
|
+
q = f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r}'
|
|
237
|
+
store_col_info = {row[0] for row in conn.execute(sql.text(q)).fetchall()}
|
|
238
|
+
tbl_col_info = {col.store_name() for col in self.tbl_version.get().cols if col.is_stored}
|
|
239
|
+
assert tbl_col_info.issubset(store_col_info)
|
|
240
|
+
|
|
241
|
+
# check that all visible indices are present
|
|
242
|
+
q = f'SELECT indexname FROM pg_indexes WHERE tablename = {self._storage_name()!r}'
|
|
243
|
+
store_idx_names = {row[0] for row in conn.execute(sql.text(q)).fetchall()}
|
|
244
|
+
tbl_index_names = {
|
|
245
|
+
self.tbl_version.get()._store_idx_name(info.id) for info in self.tbl_version.get().idxs.values()
|
|
246
|
+
}
|
|
247
|
+
assert tbl_index_names.issubset(store_idx_names)
|
|
163
248
|
|
|
164
249
|
def drop(self) -> None:
|
|
165
250
|
"""Drop store table"""
|
|
166
251
|
conn = Env.get().conn
|
|
167
252
|
self.sa_md.drop_all(bind=conn)
|
|
168
253
|
|
|
254
|
+
def _add_column_stmt(self, sa_col: sql.Column) -> str:
|
|
255
|
+
col_type_str = sa_col.type.compile(dialect=sql.dialects.postgresql.dialect())
|
|
256
|
+
return (
|
|
257
|
+
f'ALTER TABLE {self._storage_name()} ADD COLUMN IF NOT EXISTS '
|
|
258
|
+
f'{sa_col.name} {col_type_str} {"NOT " if not sa_col.nullable else ""} NULL'
|
|
259
|
+
)
|
|
260
|
+
|
|
169
261
|
def add_column(self, col: catalog.Column) -> None:
|
|
170
262
|
"""Add column(s) to the store-resident table based on a catalog column
|
|
171
263
|
|
|
@@ -174,7 +266,7 @@ class StoreBase:
|
|
|
174
266
|
"""
|
|
175
267
|
assert col.is_stored
|
|
176
268
|
conn = Env.get().conn
|
|
177
|
-
col_type_str = col.
|
|
269
|
+
col_type_str = col.sa_col_type.compile(dialect=conn.dialect)
|
|
178
270
|
s_txt = f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL'
|
|
179
271
|
added_storage_cols = [col.store_name()]
|
|
180
272
|
if col.stores_cellmd:
|
|
@@ -197,34 +289,6 @@ class StoreBase:
|
|
|
197
289
|
log_stmt(_logger, stmt)
|
|
198
290
|
Env.get().conn.execute(stmt)
|
|
199
291
|
|
|
200
|
-
def ensure_updated_schema(self) -> None:
|
|
201
|
-
from pixeltable.utils.dbms import PostgresqlDbms
|
|
202
|
-
|
|
203
|
-
# This should only be called during replica creation where the underlying DBMS is Postgres.
|
|
204
|
-
assert isinstance(Env.get().dbms, PostgresqlDbms)
|
|
205
|
-
|
|
206
|
-
conn = Env.get().conn
|
|
207
|
-
tv = self.tbl_version.get()
|
|
208
|
-
|
|
209
|
-
# Ensure columns exist
|
|
210
|
-
sql_text = f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r}'
|
|
211
|
-
result = conn.execute(sql.text(sql_text))
|
|
212
|
-
existing_cols = {row[0] for row in result}
|
|
213
|
-
for col in tv.cols:
|
|
214
|
-
if col.is_stored and col.store_name() not in existing_cols:
|
|
215
|
-
_logger.debug(f'Adding missing column {col.store_name()!r} to store table {self._storage_name()!r}')
|
|
216
|
-
self.add_column(col)
|
|
217
|
-
|
|
218
|
-
# Ensure indices exist
|
|
219
|
-
sql_text = f'SELECT indexname FROM pg_indexes WHERE tablename = {self._storage_name()!r}'
|
|
220
|
-
result = conn.execute(sql.text(sql_text))
|
|
221
|
-
existing_idxs = {row[0] for row in result}
|
|
222
|
-
for idx_name, idx_info in tv.all_idxs.items():
|
|
223
|
-
store_name = tv._store_idx_name(idx_info.id)
|
|
224
|
-
if store_name not in existing_idxs:
|
|
225
|
-
_logger.debug(f'Creating missing index {idx_name!r} on store table {self._storage_name()!r}')
|
|
226
|
-
idx_info.idx.create_index(store_name, idx_info.val_col)
|
|
227
|
-
|
|
228
292
|
def load_column(self, col: catalog.Column, exec_plan: ExecNode, abort_on_exc: bool) -> int:
|
|
229
293
|
"""Update store column of a computed column with values produced by an execution plan
|
|
230
294
|
|
|
@@ -234,7 +298,7 @@ class StoreBase:
|
|
|
234
298
|
sql.exc.DBAPIError if there was a SQL error during execution
|
|
235
299
|
excs.Error if on_error='abort' and there was an exception during row evaluation
|
|
236
300
|
"""
|
|
237
|
-
assert col.
|
|
301
|
+
assert col.get_tbl().id == self.tbl_version.id
|
|
238
302
|
num_excs = 0
|
|
239
303
|
num_rows = 0
|
|
240
304
|
# create temp table to store output of exec_plan, with the same primary key as the store table
|
|
@@ -304,7 +368,7 @@ class StoreBase:
|
|
|
304
368
|
exec_plan: ExecNode,
|
|
305
369
|
v_min: int,
|
|
306
370
|
show_progress: bool = True,
|
|
307
|
-
rowids:
|
|
371
|
+
rowids: Iterator[int] | None = None,
|
|
308
372
|
abort_on_exc: bool = False,
|
|
309
373
|
) -> tuple[set[int], RowCountStats]:
|
|
310
374
|
"""Insert rows into the store table and update the catalog table's md
|
|
@@ -316,7 +380,7 @@ class StoreBase:
|
|
|
316
380
|
num_excs = 0
|
|
317
381
|
num_rows = 0
|
|
318
382
|
cols_with_excs: set[int] = set()
|
|
319
|
-
progress_bar:
|
|
383
|
+
progress_bar: tqdm | None = None # create this only after we started executing
|
|
320
384
|
row_builder = exec_plan.row_builder
|
|
321
385
|
|
|
322
386
|
store_col_names = row_builder.store_column_names()
|
|
@@ -389,7 +453,7 @@ class StoreBase:
|
|
|
389
453
|
# stmt_text = f'INSERT INTO {self.sa_tbl.name} ({col_names_str}) VALUES ({placeholders_str})'
|
|
390
454
|
# conn.exec_driver_sql(stmt_text, table_rows)
|
|
391
455
|
|
|
392
|
-
def _versions_clause(self, versions: list[
|
|
456
|
+
def _versions_clause(self, versions: list[int | None], match_on_vmin: bool) -> sql.ColumnElement[bool]:
|
|
393
457
|
"""Return filter for base versions"""
|
|
394
458
|
v = versions[0]
|
|
395
459
|
if v is None:
|
|
@@ -407,9 +471,9 @@ class StoreBase:
|
|
|
407
471
|
def delete_rows(
|
|
408
472
|
self,
|
|
409
473
|
current_version: int,
|
|
410
|
-
base_versions: list[
|
|
474
|
+
base_versions: list[int | None],
|
|
411
475
|
match_on_vmin: bool,
|
|
412
|
-
where_clause:
|
|
476
|
+
where_clause: sql.ColumnElement[bool] | None,
|
|
413
477
|
) -> int:
|
|
414
478
|
"""Mark rows as deleted that are live and were created prior to current_version.
|
|
415
479
|
Also: populate the undo columns
|
|
@@ -535,7 +599,7 @@ class StoreComponentView(StoreView):
|
|
|
535
599
|
self.rowid_cols.append(self.pos_col)
|
|
536
600
|
return self.rowid_cols
|
|
537
601
|
|
|
538
|
-
def create_sa_tbl(self, tbl_version:
|
|
602
|
+
def create_sa_tbl(self, tbl_version: catalog.TableVersion | None = None) -> None:
|
|
539
603
|
if tbl_version is None:
|
|
540
604
|
tbl_version = self.tbl_version.get()
|
|
541
605
|
super().create_sa_tbl(tbl_version)
|
pixeltable/type_system.py
CHANGED
|
@@ -10,7 +10,7 @@ import typing
|
|
|
10
10
|
import urllib.parse
|
|
11
11
|
import urllib.request
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import Any, ClassVar, Iterable, Literal, Mapping,
|
|
13
|
+
from typing import Any, ClassVar, Iterable, Literal, Mapping, Sequence, Union
|
|
14
14
|
|
|
15
15
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
16
16
|
|
|
@@ -51,11 +51,11 @@ class ColumnType:
|
|
|
51
51
|
@classmethod
|
|
52
52
|
def supertype(
|
|
53
53
|
cls,
|
|
54
|
-
type1:
|
|
55
|
-
type2:
|
|
54
|
+
type1: 'ColumnType.Type' | None,
|
|
55
|
+
type2: 'ColumnType.Type' | None,
|
|
56
56
|
# we need to pass this in because we can't easily append it as a class member
|
|
57
57
|
common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
|
|
58
|
-
) ->
|
|
58
|
+
) -> 'ColumnType.Type' | None:
|
|
59
59
|
if type1 == type2:
|
|
60
60
|
return type1
|
|
61
61
|
t = common_supertypes.get((type1, type2))
|
|
@@ -188,7 +188,7 @@ class ColumnType:
|
|
|
188
188
|
if as_schema:
|
|
189
189
|
return base_str if self.nullable else f'Required[{base_str}]'
|
|
190
190
|
else:
|
|
191
|
-
return f'
|
|
191
|
+
return f'{base_str} | None' if self.nullable else base_str
|
|
192
192
|
|
|
193
193
|
def _to_base_str(self) -> str:
|
|
194
194
|
"""
|
|
@@ -217,7 +217,7 @@ class ColumnType:
|
|
|
217
217
|
# Default: just compare base types (this works for all types whose only parameter is nullable)
|
|
218
218
|
return self._type == other._type
|
|
219
219
|
|
|
220
|
-
def supertype(self, other: ColumnType) ->
|
|
220
|
+
def supertype(self, other: ColumnType) -> ColumnType | None:
|
|
221
221
|
if self == other:
|
|
222
222
|
return self
|
|
223
223
|
if self.matches(other):
|
|
@@ -237,7 +237,7 @@ class ColumnType:
|
|
|
237
237
|
return None
|
|
238
238
|
|
|
239
239
|
@classmethod
|
|
240
|
-
def infer_literal_type(cls, val: Any, nullable: bool = False) ->
|
|
240
|
+
def infer_literal_type(cls, val: Any, nullable: bool = False) -> ColumnType | None:
|
|
241
241
|
if val is None:
|
|
242
242
|
return InvalidType(nullable=True)
|
|
243
243
|
if isinstance(val, str):
|
|
@@ -271,7 +271,7 @@ class ColumnType:
|
|
|
271
271
|
return None
|
|
272
272
|
|
|
273
273
|
@classmethod
|
|
274
|
-
def infer_common_literal_type(cls, vals: Iterable[Any]) ->
|
|
274
|
+
def infer_common_literal_type(cls, vals: Iterable[Any]) -> ColumnType | None:
|
|
275
275
|
"""
|
|
276
276
|
Returns the most specific type that is a supertype of all literals in `vals`. If no such type
|
|
277
277
|
exists, returns None.
|
|
@@ -279,7 +279,7 @@ class ColumnType:
|
|
|
279
279
|
Args:
|
|
280
280
|
vals: A collection of literals.
|
|
281
281
|
"""
|
|
282
|
-
inferred_type:
|
|
282
|
+
inferred_type: ColumnType | None = None
|
|
283
283
|
for val in vals:
|
|
284
284
|
val_type = cls.infer_literal_type(val)
|
|
285
285
|
if inferred_type is None:
|
|
@@ -299,7 +299,7 @@ class ColumnType:
|
|
|
299
299
|
nullable_default: bool = False,
|
|
300
300
|
allow_builtin_types: bool = True,
|
|
301
301
|
infer_pydantic_json: bool = False,
|
|
302
|
-
) ->
|
|
302
|
+
) -> ColumnType | None:
|
|
303
303
|
"""
|
|
304
304
|
Convert a Python type into a Pixeltable `ColumnType` instance.
|
|
305
305
|
|
|
@@ -317,9 +317,9 @@ class ColumnType:
|
|
|
317
317
|
origin = typing.get_origin(t)
|
|
318
318
|
type_args = typing.get_args(t)
|
|
319
319
|
if origin in (typing.Union, types.UnionType):
|
|
320
|
-
# Check if `t` has the form
|
|
320
|
+
# Check if `t` has the form T | None.
|
|
321
321
|
if len(type_args) == 2 and type(None) in type_args:
|
|
322
|
-
# `t` is a type of the form
|
|
322
|
+
# `t` is a type of the form T | None (equivalently, T | None or None | T).
|
|
323
323
|
# We treat it as the underlying type but with nullable=True.
|
|
324
324
|
underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
|
|
325
325
|
underlying = cls.from_python_type(
|
|
@@ -338,7 +338,7 @@ class ColumnType:
|
|
|
338
338
|
if isinstance(parameters, ColumnType):
|
|
339
339
|
return parameters.copy(nullable=nullable_default)
|
|
340
340
|
else:
|
|
341
|
-
# It's something other than
|
|
341
|
+
# It's something other than T | None, Required[T], or an explicitly annotated type.
|
|
342
342
|
if origin is not None:
|
|
343
343
|
# Discard type parameters to ensure that parameterized types such as `list[T]`
|
|
344
344
|
# are correctly mapped to Pixeltable types.
|
|
@@ -411,7 +411,7 @@ class ColumnType:
|
|
|
411
411
|
raise excs.Error(f'Unknown type: {t}')
|
|
412
412
|
|
|
413
413
|
@classmethod
|
|
414
|
-
def from_json_schema(cls, schema: dict[str, Any]) ->
|
|
414
|
+
def from_json_schema(cls, schema: dict[str, Any]) -> ColumnType | None:
|
|
415
415
|
# We first express the JSON schema as a Python type, and then convert it to a Pixeltable type.
|
|
416
416
|
# TODO: Is there a meaningful fallback if one of these operations fails? (Maybe another use case for a pxt Any
|
|
417
417
|
# type?)
|
|
@@ -704,10 +704,10 @@ class DateType(ColumnType):
|
|
|
704
704
|
|
|
705
705
|
|
|
706
706
|
class JsonType(ColumnType):
|
|
707
|
-
json_schema:
|
|
708
|
-
__validator:
|
|
707
|
+
json_schema: dict[str, Any] | None
|
|
708
|
+
__validator: jsonschema.protocols.Validator | None
|
|
709
709
|
|
|
710
|
-
def __init__(self, json_schema:
|
|
710
|
+
def __init__(self, json_schema: dict[str, Any] | None = None, nullable: bool = False):
|
|
711
711
|
super().__init__(self.Type.JSON, nullable=nullable)
|
|
712
712
|
self.json_schema = json_schema
|
|
713
713
|
if json_schema is None:
|
|
@@ -777,7 +777,7 @@ class JsonType(ColumnType):
|
|
|
777
777
|
return val.model_dump()
|
|
778
778
|
return val
|
|
779
779
|
|
|
780
|
-
def supertype(self, other: ColumnType) ->
|
|
780
|
+
def supertype(self, other: ColumnType) -> JsonType | None:
|
|
781
781
|
# Try using the (much faster) supertype logic in ColumnType first. That will work if, for example, the types
|
|
782
782
|
# are identical except for nullability. If that doesn't work and both types are JsonType, then we will need to
|
|
783
783
|
# merge their schemas.
|
|
@@ -799,7 +799,7 @@ class JsonType(ColumnType):
|
|
|
799
799
|
)
|
|
800
800
|
|
|
801
801
|
@classmethod
|
|
802
|
-
def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) ->
|
|
802
|
+
def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any] | None:
|
|
803
803
|
# Defining a general type hierarchy over all JSON schemas would be a challenging problem. In order to keep
|
|
804
804
|
# things manageable, we only define a hierarchy among "conforming" schemas, which provides enough generality
|
|
805
805
|
# for the most important use cases (unions for type inference, validation of inline exprs). A schema is
|
|
@@ -859,7 +859,7 @@ class JsonType(ColumnType):
|
|
|
859
859
|
return {} # Unresolvable type conflict; the supertype is an unrestricted JsonType.
|
|
860
860
|
|
|
861
861
|
@classmethod
|
|
862
|
-
def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) ->
|
|
862
|
+
def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any] | None:
|
|
863
863
|
a, a_nullable = cls.__unpack_null_from_schema(a)
|
|
864
864
|
b, b_nullable = cls.__unpack_null_from_schema(b)
|
|
865
865
|
|
|
@@ -888,15 +888,12 @@ class JsonType(ColumnType):
|
|
|
888
888
|
|
|
889
889
|
|
|
890
890
|
class ArrayType(ColumnType):
|
|
891
|
-
shape:
|
|
892
|
-
pxt_dtype:
|
|
893
|
-
dtype:
|
|
891
|
+
shape: tuple[int | None, ...] | None
|
|
892
|
+
pxt_dtype: ColumnType | None
|
|
893
|
+
dtype: ColumnType.Type | None
|
|
894
894
|
|
|
895
895
|
def __init__(
|
|
896
|
-
self,
|
|
897
|
-
shape: Optional[tuple[Optional[int], ...]] = None,
|
|
898
|
-
dtype: Optional[ColumnType] = None,
|
|
899
|
-
nullable: bool = False,
|
|
896
|
+
self, shape: tuple[int | None, ...] | None = None, dtype: ColumnType | None = None, nullable: bool = False
|
|
900
897
|
):
|
|
901
898
|
super().__init__(self.Type.ARRAY, nullable=nullable)
|
|
902
899
|
assert shape is None or dtype is not None, (shape, dtype) # cannot specify a shape without a dtype
|
|
@@ -921,7 +918,7 @@ class ArrayType(ColumnType):
|
|
|
921
918
|
def __hash__(self) -> int:
|
|
922
919
|
return hash((self._type, self.nullable, self.shape, self.dtype))
|
|
923
920
|
|
|
924
|
-
def supertype(self, other: ColumnType) ->
|
|
921
|
+
def supertype(self, other: ColumnType) -> ArrayType | None:
|
|
925
922
|
basic_supertype = super().supertype(other)
|
|
926
923
|
if basic_supertype is not None:
|
|
927
924
|
assert isinstance(basic_supertype, ArrayType)
|
|
@@ -934,7 +931,7 @@ class ArrayType(ColumnType):
|
|
|
934
931
|
if super_dtype is None:
|
|
935
932
|
# if the dtypes are incompatible, then the supertype is a fully general array
|
|
936
933
|
return ArrayType(nullable=(self.nullable or other.nullable))
|
|
937
|
-
super_shape:
|
|
934
|
+
super_shape: tuple[int | None, ...] | None
|
|
938
935
|
if self.shape is None or other.shape is None or len(self.shape) != len(other.shape):
|
|
939
936
|
super_shape = None
|
|
940
937
|
else:
|
|
@@ -965,7 +962,7 @@ class ArrayType(ColumnType):
|
|
|
965
962
|
return cls(shape, dtype, nullable=d['nullable'])
|
|
966
963
|
|
|
967
964
|
@classmethod
|
|
968
|
-
def from_np_dtype(cls, dtype: np.dtype, nullable: bool) ->
|
|
965
|
+
def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> ColumnType | None:
|
|
969
966
|
"""
|
|
970
967
|
Return pixeltable type corresponding to a given simple numpy dtype
|
|
971
968
|
"""
|
|
@@ -994,10 +991,10 @@ class ArrayType(ColumnType):
|
|
|
994
991
|
return None
|
|
995
992
|
|
|
996
993
|
@classmethod
|
|
997
|
-
def from_literal(cls, val: np.ndarray, nullable: bool = False) ->
|
|
994
|
+
def from_literal(cls, val: np.ndarray, nullable: bool = False) -> ArrayType | None:
|
|
998
995
|
# determine our dtype
|
|
999
996
|
assert isinstance(val, np.ndarray)
|
|
1000
|
-
pxttype:
|
|
997
|
+
pxttype: ColumnType | None = cls.from_np_dtype(val.dtype, nullable)
|
|
1001
998
|
if pxttype is None:
|
|
1002
999
|
return None
|
|
1003
1000
|
return cls(val.shape, dtype=pxttype, nullable=nullable)
|
|
@@ -1060,7 +1057,7 @@ class ArrayType(ColumnType):
|
|
|
1060
1057
|
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
1061
1058
|
return sql.LargeBinary()
|
|
1062
1059
|
|
|
1063
|
-
def numpy_dtype(self) ->
|
|
1060
|
+
def numpy_dtype(self) -> np.dtype | None:
|
|
1064
1061
|
if self.dtype is None:
|
|
1065
1062
|
return None
|
|
1066
1063
|
if self.dtype == self.Type.INT:
|
|
@@ -1077,10 +1074,10 @@ class ArrayType(ColumnType):
|
|
|
1077
1074
|
class ImageType(ColumnType):
|
|
1078
1075
|
def __init__(
|
|
1079
1076
|
self,
|
|
1080
|
-
width:
|
|
1081
|
-
height:
|
|
1082
|
-
size:
|
|
1083
|
-
mode:
|
|
1077
|
+
width: int | None = None,
|
|
1078
|
+
height: int | None = None,
|
|
1079
|
+
size: tuple[int, int] | None = None,
|
|
1080
|
+
mode: str | None = None,
|
|
1084
1081
|
nullable: bool = False,
|
|
1085
1082
|
):
|
|
1086
1083
|
# TODO: does it make sense to specify only width or height?
|
|
@@ -1121,7 +1118,7 @@ class ImageType(ColumnType):
|
|
|
1121
1118
|
def __hash__(self) -> int:
|
|
1122
1119
|
return hash((self._type, self.nullable, self.size, self.mode))
|
|
1123
1120
|
|
|
1124
|
-
def supertype(self, other: ColumnType) ->
|
|
1121
|
+
def supertype(self, other: ColumnType) -> ImageType | None:
|
|
1125
1122
|
basic_supertype = super().supertype(other)
|
|
1126
1123
|
if basic_supertype is not None:
|
|
1127
1124
|
assert isinstance(basic_supertype, ImageType)
|
|
@@ -1136,7 +1133,7 @@ class ImageType(ColumnType):
|
|
|
1136
1133
|
return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
|
|
1137
1134
|
|
|
1138
1135
|
@property
|
|
1139
|
-
def size(self) ->
|
|
1136
|
+
def size(self) -> tuple[int, int] | None:
|
|
1140
1137
|
if self.width is None or self.height is None:
|
|
1141
1138
|
return None
|
|
1142
1139
|
return (self.width, self.height)
|
|
@@ -1255,7 +1252,7 @@ class DocumentType(ColumnType):
|
|
|
1255
1252
|
TXT = 4
|
|
1256
1253
|
|
|
1257
1254
|
@classmethod
|
|
1258
|
-
def from_extension(cls, ext: str) ->
|
|
1255
|
+
def from_extension(cls, ext: str) -> 'DocumentType.DocumentFormat' | None:
|
|
1259
1256
|
if ext in ('.htm', '.html'):
|
|
1260
1257
|
return cls.HTML
|
|
1261
1258
|
if ext == '.md':
|
|
@@ -1268,7 +1265,7 @@ class DocumentType(ColumnType):
|
|
|
1268
1265
|
return cls.TXT
|
|
1269
1266
|
return None
|
|
1270
1267
|
|
|
1271
|
-
def __init__(self, nullable: bool = False, doc_formats:
|
|
1268
|
+
def __init__(self, nullable: bool = False, doc_formats: str | None = None):
|
|
1272
1269
|
super().__init__(self.Type.DOCUMENT, nullable=nullable)
|
|
1273
1270
|
self.doc_formats = doc_formats
|
|
1274
1271
|
if doc_formats is not None:
|
|
@@ -1365,13 +1362,13 @@ class Array(np.ndarray, _PxtType):
|
|
|
1365
1362
|
def __class_getitem__(cls, item: Any) -> _AnnotatedAlias:
|
|
1366
1363
|
"""
|
|
1367
1364
|
`item` (the type subscript) must be a tuple with exactly two elements (in any order):
|
|
1368
|
-
- A tuple of `
|
|
1365
|
+
- A tuple of `int | None`s, specifying the shape of the array
|
|
1369
1366
|
- A type, specifying the dtype of the array
|
|
1370
1367
|
Example: Array[(3, None, 2), pxt.Float]
|
|
1371
1368
|
"""
|
|
1372
1369
|
params = item if isinstance(item, tuple) else (item,)
|
|
1373
|
-
shape:
|
|
1374
|
-
dtype:
|
|
1370
|
+
shape: tuple | None = None
|
|
1371
|
+
dtype: ColumnType | None = None
|
|
1375
1372
|
if not any(isinstance(param, (type, _AnnotatedAlias)) for param in params):
|
|
1376
1373
|
raise TypeError('Array type parameter must include a dtype.')
|
|
1377
1374
|
for param in params:
|
|
@@ -1411,8 +1408,8 @@ class Image(PIL.Image.Image, _PxtType):
|
|
|
1411
1408
|
else:
|
|
1412
1409
|
# Not a tuple (single arg)
|
|
1413
1410
|
params = (item,)
|
|
1414
|
-
size:
|
|
1415
|
-
mode:
|
|
1411
|
+
size: tuple | None = None
|
|
1412
|
+
mode: str | None = None
|
|
1416
1413
|
for param in params:
|
|
1417
1414
|
if isinstance(param, tuple):
|
|
1418
1415
|
if (
|
pixeltable/utils/__init__.py
CHANGED
|
@@ -2,7 +2,6 @@ import hashlib
|
|
|
2
2
|
import urllib.parse
|
|
3
3
|
import urllib.request
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Optional
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
def print_perf_counter_delta(delta: float) -> str:
|
|
@@ -39,7 +38,7 @@ def sha256sum(path: Path | str) -> str:
|
|
|
39
38
|
return h.hexdigest()
|
|
40
39
|
|
|
41
40
|
|
|
42
|
-
def parse_local_file_path(file_or_url: str) ->
|
|
41
|
+
def parse_local_file_path(file_or_url: str) -> Path | None:
|
|
43
42
|
"""
|
|
44
43
|
Parses a string that may be either a URL or a local file path.
|
|
45
44
|
|
pixeltable/utils/arrow.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import io
|
|
3
3
|
import json
|
|
4
|
-
from typing import TYPE_CHECKING, Any, Iterator,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Iterator, cast
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import PIL.Image
|
|
@@ -48,7 +48,7 @@ PXT_TO_PA_TYPES: dict[type[ts.ColumnType], pa.DataType] = {
|
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def to_pixeltable_type(arrow_type: pa.DataType, nullable: bool) ->
|
|
51
|
+
def to_pixeltable_type(arrow_type: pa.DataType, nullable: bool) -> ts.ColumnType | None:
|
|
52
52
|
"""Convert a pyarrow DataType to a pixeltable ColumnType if one is defined.
|
|
53
53
|
Returns None if no conversion is currently implemented.
|
|
54
54
|
"""
|
|
@@ -66,7 +66,7 @@ def to_pixeltable_type(arrow_type: pa.DataType, nullable: bool) -> Optional[ts.C
|
|
|
66
66
|
return None
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
def to_arrow_type(pixeltable_type: ts.ColumnType) ->
|
|
69
|
+
def to_arrow_type(pixeltable_type: ts.ColumnType) -> pa.DataType | None:
|
|
70
70
|
"""Convert a pixeltable DataType to a pyarrow datatype if one is defined.
|
|
71
71
|
Returns None if no conversion is currently implemented.
|
|
72
72
|
"""
|
|
@@ -240,7 +240,7 @@ def _ar_val_to_pxt_val(val: Any, pxt_type: ts.ColumnType) -> Any:
|
|
|
240
240
|
|
|
241
241
|
|
|
242
242
|
def iter_tuples2(
|
|
243
|
-
batch: pa.Table | pa.RecordBatch, col_mapping:
|
|
243
|
+
batch: pa.Table | pa.RecordBatch, col_mapping: dict[str, str] | None, schema: dict[str, ts.ColumnType]
|
|
244
244
|
) -> Iterator[dict[str, Any]]:
|
|
245
245
|
"""Convert a RecordBatch to an iterator of dictionaries. also works with pa.Table and pa.RowGroup"""
|
|
246
246
|
pydict = to_pydict(batch)
|
pixeltable/utils/av.py
CHANGED
|
@@ -5,6 +5,14 @@ import av.stream
|
|
|
5
5
|
|
|
6
6
|
from pixeltable.env import Env
|
|
7
7
|
|
|
8
|
+
# format -> (codec, extension)
|
|
9
|
+
AUDIO_FORMATS: dict[str, tuple[str, str]] = {
|
|
10
|
+
'wav': ('pcm_s16le', 'wav'),
|
|
11
|
+
'mp3': ('libmp3lame', 'mp3'),
|
|
12
|
+
'flac': ('flac', 'flac'),
|
|
13
|
+
'mp4': ('aac', 'm4a'),
|
|
14
|
+
}
|
|
15
|
+
|
|
8
16
|
|
|
9
17
|
def get_metadata(path: str) -> dict:
|
|
10
18
|
with av.open(path) as container:
|