pixeltable 0.4.13__py3-none-any.whl → 0.4.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -1
- pixeltable/catalog/catalog.py +187 -63
- pixeltable/catalog/column.py +24 -20
- pixeltable/catalog/table.py +24 -8
- pixeltable/catalog/table_metadata.py +1 -0
- pixeltable/catalog/table_version.py +16 -34
- pixeltable/catalog/update_status.py +12 -0
- pixeltable/catalog/view.py +22 -22
- pixeltable/config.py +2 -0
- pixeltable/dataframe.py +4 -2
- pixeltable/env.py +46 -21
- pixeltable/exec/__init__.py +1 -0
- pixeltable/exec/aggregation_node.py +0 -1
- pixeltable/exec/cache_prefetch_node.py +74 -98
- pixeltable/exec/data_row_batch.py +2 -18
- pixeltable/exec/expr_eval/expr_eval_node.py +11 -0
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/object_store_save_node.py +299 -0
- pixeltable/exec/sql_node.py +28 -33
- pixeltable/exprs/data_row.py +31 -25
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/row_builder.py +6 -12
- pixeltable/functions/gemini.py +1 -1
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/video.py +128 -15
- pixeltable/functions/whisperx.py +2 -0
- pixeltable/functions/yolox.py +2 -0
- pixeltable/globals.py +49 -30
- pixeltable/index/embedding_index.py +5 -8
- pixeltable/io/__init__.py +1 -0
- pixeltable/io/fiftyone.py +1 -1
- pixeltable/io/label_studio.py +4 -5
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +1 -1
- pixeltable/iterators/document.py +10 -12
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/schema.py +7 -0
- pixeltable/plan.py +26 -1
- pixeltable/share/packager.py +8 -2
- pixeltable/share/publish.py +3 -10
- pixeltable/store.py +1 -1
- pixeltable/type_system.py +1 -3
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/object_stores.py +497 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +354 -0
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/METADATA +1 -1
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/RECORD +53 -50
- pixeltable/utils/media_store.py +0 -248
- pixeltable/utils/s3.py +0 -17
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/licenses/LICENSE +0 -0
pixeltable/exec/sql_node.py
CHANGED
|
@@ -71,6 +71,13 @@ class SqlNode(ExecNode):
|
|
|
71
71
|
If set_pk is True, they are added to the end of the result set when creating the SQL statement
|
|
72
72
|
so they can always be referenced as cols[-num_pk_cols:] in the result set.
|
|
73
73
|
The pk_columns consist of the rowid columns of the target table followed by the version number.
|
|
74
|
+
|
|
75
|
+
If row_builder contains references to unstored iter columns, expands the select list to include their
|
|
76
|
+
SQL-materializable subexpressions.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
select_list: output of the query
|
|
80
|
+
set_pk: if True, sets the primary for each DataRow
|
|
74
81
|
"""
|
|
75
82
|
|
|
76
83
|
tbl: Optional[catalog.TableVersionPath]
|
|
@@ -97,14 +104,6 @@ class SqlNode(ExecNode):
|
|
|
97
104
|
sql_elements: exprs.SqlElementCache,
|
|
98
105
|
set_pk: bool = False,
|
|
99
106
|
):
|
|
100
|
-
"""
|
|
101
|
-
If row_builder contains references to unstored iter columns, expands the select list to include their
|
|
102
|
-
SQL-materializable subexpressions.
|
|
103
|
-
|
|
104
|
-
Args:
|
|
105
|
-
select_list: output of the query
|
|
106
|
-
set_pk: if True, sets the primary for each DataRow
|
|
107
|
-
"""
|
|
108
107
|
# create Select stmt
|
|
109
108
|
self.sql_elements = sql_elements
|
|
110
109
|
self.tbl = tbl
|
|
@@ -374,6 +373,11 @@ class SqlScanNode(SqlNode):
|
|
|
374
373
|
Materializes data from the store via a Select stmt.
|
|
375
374
|
|
|
376
375
|
Supports filtering and ordering.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
select_list: output of the query
|
|
379
|
+
set_pk: if True, sets the primary for each DataRow
|
|
380
|
+
exact_version_only: tables for which we only want to see rows created at the current version
|
|
377
381
|
"""
|
|
378
382
|
|
|
379
383
|
exact_version_only: list[catalog.TableVersionHandle]
|
|
@@ -386,12 +390,6 @@ class SqlScanNode(SqlNode):
|
|
|
386
390
|
set_pk: bool = False,
|
|
387
391
|
exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
|
|
388
392
|
):
|
|
389
|
-
"""
|
|
390
|
-
Args:
|
|
391
|
-
select_list: output of the query
|
|
392
|
-
set_pk: if True, sets the primary for each DataRow
|
|
393
|
-
exact_version_only: tables for which we only want to see rows created at the current version
|
|
394
|
-
"""
|
|
395
393
|
sql_elements = exprs.SqlElementCache()
|
|
396
394
|
super().__init__(tbl, row_builder, select_list, sql_elements, set_pk=set_pk)
|
|
397
395
|
# create Select stmt
|
|
@@ -413,6 +411,11 @@ class SqlScanNode(SqlNode):
|
|
|
413
411
|
class SqlLookupNode(SqlNode):
|
|
414
412
|
"""
|
|
415
413
|
Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
select_list: output of the query
|
|
417
|
+
sa_key_cols: list of key columns in the store table
|
|
418
|
+
key_vals: list of key values to look up
|
|
416
419
|
"""
|
|
417
420
|
|
|
418
421
|
def __init__(
|
|
@@ -423,12 +426,6 @@ class SqlLookupNode(SqlNode):
|
|
|
423
426
|
sa_key_cols: list[sql.Column],
|
|
424
427
|
key_vals: list[tuple],
|
|
425
428
|
):
|
|
426
|
-
"""
|
|
427
|
-
Args:
|
|
428
|
-
select_list: output of the query
|
|
429
|
-
sa_key_cols: list of key columns in the store table
|
|
430
|
-
key_vals: list of key values to look up
|
|
431
|
-
"""
|
|
432
429
|
sql_elements = exprs.SqlElementCache()
|
|
433
430
|
super().__init__(tbl, row_builder, select_list, sql_elements, set_pk=True)
|
|
434
431
|
# Where clause: (key-col-1, key-col-2, ...) IN ((val-1, val-2, ...), ...)
|
|
@@ -444,6 +441,11 @@ class SqlLookupNode(SqlNode):
|
|
|
444
441
|
class SqlAggregationNode(SqlNode):
|
|
445
442
|
"""
|
|
446
443
|
Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
|
|
444
|
+
|
|
445
|
+
Args:
|
|
446
|
+
select_list: can contain calls to AggregateFunctions
|
|
447
|
+
group_by_items: list of expressions to group by
|
|
448
|
+
limit: max number of rows to return: None = no limit
|
|
447
449
|
"""
|
|
448
450
|
|
|
449
451
|
group_by_items: Optional[list[exprs.Expr]]
|
|
@@ -458,12 +460,6 @@ class SqlAggregationNode(SqlNode):
|
|
|
458
460
|
limit: Optional[int] = None,
|
|
459
461
|
exact_version_only: Optional[list[catalog.TableVersion]] = None,
|
|
460
462
|
):
|
|
461
|
-
"""
|
|
462
|
-
Args:
|
|
463
|
-
select_list: can contain calls to AggregateFunctions
|
|
464
|
-
group_by_items: list of expressions to group by
|
|
465
|
-
limit: max number of rows to return: None = no limit
|
|
466
|
-
"""
|
|
467
463
|
self.input_cte, input_col_map = input.to_cte()
|
|
468
464
|
sql_elements = exprs.SqlElementCache(input_col_map)
|
|
469
465
|
super().__init__(None, row_builder, select_list, sql_elements)
|
|
@@ -529,6 +525,12 @@ class SqlJoinNode(SqlNode):
|
|
|
529
525
|
class SqlSampleNode(SqlNode):
|
|
530
526
|
"""
|
|
531
527
|
Returns rows sampled from the input node.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
input: SqlNode to sample from
|
|
531
|
+
select_list: can contain calls to AggregateFunctions
|
|
532
|
+
sample_clause: specifies the sampling method
|
|
533
|
+
stratify_exprs: Analyzer processed list of expressions to stratify by.
|
|
532
534
|
"""
|
|
533
535
|
|
|
534
536
|
input_cte: Optional[sql.CTE]
|
|
@@ -544,13 +546,6 @@ class SqlSampleNode(SqlNode):
|
|
|
544
546
|
sample_clause: 'SampleClause',
|
|
545
547
|
stratify_exprs: list[exprs.Expr],
|
|
546
548
|
):
|
|
547
|
-
"""
|
|
548
|
-
Args:
|
|
549
|
-
input: SqlNode to sample from
|
|
550
|
-
select_list: can contain calls to AggregateFunctions
|
|
551
|
-
sample_clause: specifies the sampling method
|
|
552
|
-
stratify_exprs: Analyzer processed list of expressions to stratify by.
|
|
553
|
-
"""
|
|
554
549
|
assert isinstance(input, SqlNode)
|
|
555
550
|
self.input_cte, input_col_map = input.to_cte(keep_pk=True)
|
|
556
551
|
self.pk_count = input.num_pk_cols
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -14,7 +14,7 @@ import PIL.Image
|
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
|
|
16
16
|
from pixeltable import catalog, env
|
|
17
|
-
from pixeltable.utils.
|
|
17
|
+
from pixeltable.utils.local_store import TempStore
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class DataRow:
|
|
@@ -257,42 +257,48 @@ class DataRow:
|
|
|
257
257
|
self.vals[idx] = val
|
|
258
258
|
self.has_val[idx] = True
|
|
259
259
|
|
|
260
|
-
def
|
|
261
|
-
"""
|
|
260
|
+
def prepare_col_val_for_save(self, index: int, col: Optional[catalog.Column] = None) -> bool:
|
|
261
|
+
"""
|
|
262
|
+
Prepare to save a column's value into the appropriate store. Discard unneeded values.
|
|
263
|
+
|
|
264
|
+
Return:
|
|
265
|
+
True if the media object in the column needs to be saved.
|
|
266
|
+
"""
|
|
262
267
|
if self.vals[index] is None:
|
|
263
|
-
return
|
|
268
|
+
return False
|
|
269
|
+
|
|
270
|
+
if self.file_urls[index] is not None:
|
|
271
|
+
return False
|
|
272
|
+
|
|
264
273
|
assert self.excs[index] is None
|
|
265
274
|
if self.file_paths[index] is None:
|
|
266
275
|
if col is not None:
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
if isinstance(image, PIL.Image.Image):
|
|
270
|
-
# Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
|
|
271
|
-
# In that case, use WebP instead.
|
|
272
|
-
format = 'webp' if image.has_transparency_data else 'jpeg'
|
|
273
|
-
filepath, url = MediaStore.get().save_media_object(image, col, format=format)
|
|
274
|
-
self.file_paths[index] = str(filepath)
|
|
275
|
-
self.file_urls[index] = url
|
|
276
|
+
# This is a media object that needs to be saved
|
|
277
|
+
return True
|
|
276
278
|
else:
|
|
277
|
-
# we
|
|
279
|
+
# This is a media object that we don't care about, so we discard it
|
|
278
280
|
self.has_val[index] = False
|
|
279
281
|
else:
|
|
280
282
|
# we already have a file for this image, nothing left to do
|
|
281
283
|
pass
|
|
284
|
+
|
|
282
285
|
self.vals[index] = None
|
|
286
|
+
return False
|
|
283
287
|
|
|
284
|
-
def
|
|
285
|
-
"""
|
|
286
|
-
|
|
287
|
-
return
|
|
288
|
-
assert self.excs[index] is None
|
|
288
|
+
def save_media_to_temp(self, index: int, col: catalog.Column) -> str:
|
|
289
|
+
"""Save the media object in the column to the TempStore.
|
|
290
|
+
Objects cannot be saved directly to general destinations."""
|
|
289
291
|
assert col.col_type.is_media_type()
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
292
|
+
val = self.vals[index]
|
|
293
|
+
format = None
|
|
294
|
+
if isinstance(val, PIL.Image.Image):
|
|
295
|
+
# Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
|
|
296
|
+
# In that case, use WebP instead.
|
|
297
|
+
format = 'webp' if val.has_transparency_data else 'jpeg'
|
|
298
|
+
filepath, url = TempStore.save_media_object(val, col, format=format)
|
|
299
|
+
self.file_paths[index] = str(filepath) if filepath is not None else None
|
|
300
|
+
self.vals[index] = None
|
|
301
|
+
return url
|
|
296
302
|
|
|
297
303
|
@property
|
|
298
304
|
def rowid(self) -> tuple[int, ...]:
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -17,14 +17,15 @@ from .sql_element_cache import SqlElementCache
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class JsonPath(Expr):
|
|
20
|
+
"""
|
|
21
|
+
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
22
|
+
scope_idx: for relative paths, index of referenced JsonMapper
|
|
23
|
+
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
24
|
+
"""
|
|
25
|
+
|
|
20
26
|
def __init__(
|
|
21
27
|
self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
|
|
22
28
|
) -> None:
|
|
23
|
-
"""
|
|
24
|
-
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
25
|
-
scope_idx: for relative paths, index of referenced JsonMapper
|
|
26
|
-
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
27
|
-
"""
|
|
28
29
|
if path_elements is None:
|
|
29
30
|
path_elements = []
|
|
30
31
|
super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -48,6 +48,12 @@ class RowBuilder:
|
|
|
48
48
|
|
|
49
49
|
For ColumnRefs to unstored iterator columns:
|
|
50
50
|
- in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
output_exprs: list of Exprs to be evaluated
|
|
54
|
+
columns: list of columns to be materialized
|
|
55
|
+
input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
|
|
56
|
+
TODO: enforce that output_exprs doesn't overlap with input_exprs?
|
|
51
57
|
"""
|
|
52
58
|
|
|
53
59
|
unique_exprs: ExprSet
|
|
@@ -105,13 +111,6 @@ class RowBuilder:
|
|
|
105
111
|
input_exprs: Iterable[Expr],
|
|
106
112
|
tbl: Optional[catalog.TableVersion] = None,
|
|
107
113
|
):
|
|
108
|
-
"""
|
|
109
|
-
Args:
|
|
110
|
-
output_exprs: list of Exprs to be evaluated
|
|
111
|
-
columns: list of columns to be materialized
|
|
112
|
-
input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
|
|
113
|
-
TODO: enforce that output_exprs doesn't overlap with input_exprs?
|
|
114
|
-
"""
|
|
115
114
|
self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
|
|
116
115
|
self.next_slot_idx = 0
|
|
117
116
|
self.stored_img_cols = []
|
|
@@ -474,11 +473,6 @@ class RowBuilder:
|
|
|
474
473
|
# exceptions get stored in the errortype/-msg properties of the cellmd column
|
|
475
474
|
table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
|
|
476
475
|
else:
|
|
477
|
-
if col.col_type.is_media_type():
|
|
478
|
-
if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
|
|
479
|
-
# we have yet to store this image
|
|
480
|
-
data_row.flush_img(slot_idx, col)
|
|
481
|
-
data_row.move_tmp_media_file(slot_idx, col)
|
|
482
476
|
val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
|
|
483
477
|
table_row.append(val)
|
|
484
478
|
if col.stores_cellmd:
|
pixeltable/functions/gemini.py
CHANGED
|
@@ -15,7 +15,7 @@ import PIL.Image
|
|
|
15
15
|
import pixeltable as pxt
|
|
16
16
|
from pixeltable import env, exceptions as excs, exprs
|
|
17
17
|
from pixeltable.utils.code import local_public_names
|
|
18
|
-
from pixeltable.utils.
|
|
18
|
+
from pixeltable.utils.local_store import TempStore
|
|
19
19
|
|
|
20
20
|
if TYPE_CHECKING:
|
|
21
21
|
from google import genai
|
pixeltable/functions/openai.py
CHANGED
|
@@ -23,7 +23,7 @@ import pixeltable as pxt
|
|
|
23
23
|
from pixeltable import env, exprs, type_system as ts
|
|
24
24
|
from pixeltable.func import Batch, Tools
|
|
25
25
|
from pixeltable.utils.code import local_public_names
|
|
26
|
-
from pixeltable.utils.
|
|
26
|
+
from pixeltable.utils.local_store import TempStore
|
|
27
27
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
29
|
import openai
|
pixeltable/functions/video.py
CHANGED
|
@@ -4,7 +4,6 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import pathlib
|
|
7
|
-
import shutil
|
|
8
7
|
import subprocess
|
|
9
8
|
from typing import Literal, NoReturn
|
|
10
9
|
|
|
@@ -17,7 +16,7 @@ import pixeltable as pxt
|
|
|
17
16
|
import pixeltable.utils.av as av_utils
|
|
18
17
|
from pixeltable.env import Env
|
|
19
18
|
from pixeltable.utils.code import local_public_names
|
|
20
|
-
from pixeltable.utils.
|
|
19
|
+
from pixeltable.utils.local_store import TempStore
|
|
21
20
|
|
|
22
21
|
_logger = logging.getLogger('pixeltable')
|
|
23
22
|
_format_defaults: dict[str, tuple[str, str]] = { # format -> (codec, ext)
|
|
@@ -49,6 +48,10 @@ class make_video(pxt.Aggregator):
|
|
|
49
48
|
"""
|
|
50
49
|
Aggregator that creates a video from a sequence of images, using the default video encoder and yuv420p pixel format.
|
|
51
50
|
|
|
51
|
+
Follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video
|
|
52
|
+
|
|
53
|
+
TODO: provide parameters for video_encoder and pix_fmt
|
|
54
|
+
|
|
52
55
|
Args:
|
|
53
56
|
fps: Frames per second for the output video.
|
|
54
57
|
|
|
@@ -98,11 +101,6 @@ class make_video(pxt.Aggregator):
|
|
|
98
101
|
fps: int
|
|
99
102
|
|
|
100
103
|
def __init__(self, fps: int = 25):
|
|
101
|
-
"""
|
|
102
|
-
Follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video
|
|
103
|
-
|
|
104
|
-
TODO: provide parameters for video_encoder and pix_fmt
|
|
105
|
-
"""
|
|
106
104
|
self.container = None
|
|
107
105
|
self.stream = None
|
|
108
106
|
self.fps = fps
|
|
@@ -328,6 +326,7 @@ def clip(
|
|
|
328
326
|
Returns:
|
|
329
327
|
New video containing only the specified time range or None if start_time is beyond the end of the video.
|
|
330
328
|
"""
|
|
329
|
+
Env.get().require_binary('ffmpeg')
|
|
331
330
|
if start_time < 0:
|
|
332
331
|
raise pxt.Error(f'start_time must be non-negative, got {start_time}')
|
|
333
332
|
if end_time is not None and end_time <= start_time:
|
|
@@ -336,8 +335,6 @@ def clip(
|
|
|
336
335
|
raise pxt.Error(f'duration must be positive, got {duration}')
|
|
337
336
|
if end_time is not None and duration is not None:
|
|
338
337
|
raise pxt.Error('end_time and duration cannot both be specified')
|
|
339
|
-
if not shutil.which('ffmpeg'):
|
|
340
|
-
raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use get_clip().')
|
|
341
338
|
|
|
342
339
|
video_duration = av_utils.get_video_duration(video)
|
|
343
340
|
if video_duration is not None and start_time > video_duration:
|
|
@@ -389,10 +386,9 @@ def segment_video(video: pxt.Video, *, duration: float) -> list[str]:
|
|
|
389
386
|
>>> duration = tbl.video.get_duration()
|
|
390
387
|
>>> tbl.select(segment_paths=tbl.video.segment_video(duration=duration / 2 + 1)).collect()
|
|
391
388
|
"""
|
|
389
|
+
Env.get().require_binary('ffmpeg')
|
|
392
390
|
if duration <= 0:
|
|
393
391
|
raise pxt.Error(f'duration must be positive, got {duration}')
|
|
394
|
-
if not shutil.which('ffmpeg'):
|
|
395
|
-
raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use segment_video().')
|
|
396
392
|
|
|
397
393
|
base_path = TempStore.create_path(extension='')
|
|
398
394
|
|
|
@@ -437,10 +433,9 @@ def concat_videos(videos: list[pxt.Video]) -> pxt.Video:
|
|
|
437
433
|
Returns:
|
|
438
434
|
A new video containing the merged videos.
|
|
439
435
|
"""
|
|
436
|
+
Env.get().require_binary('ffmpeg')
|
|
440
437
|
if len(videos) == 0:
|
|
441
438
|
raise pxt.Error('concat_videos(): empty argument list')
|
|
442
|
-
if not shutil.which('ffmpeg'):
|
|
443
|
-
raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use concat_videos().')
|
|
444
439
|
|
|
445
440
|
# Check that all videos have the same resolution
|
|
446
441
|
resolutions: list[tuple[int, int]] = []
|
|
@@ -529,6 +524,125 @@ def concat_videos(videos: list[pxt.Video]) -> pxt.Video:
|
|
|
529
524
|
filelist_path.unlink()
|
|
530
525
|
|
|
531
526
|
|
|
527
|
+
@pxt.udf
|
|
528
|
+
def with_audio(
|
|
529
|
+
video: pxt.Video,
|
|
530
|
+
audio: pxt.Audio,
|
|
531
|
+
*,
|
|
532
|
+
video_start_time: float = 0.0,
|
|
533
|
+
video_duration: float | None = None,
|
|
534
|
+
audio_start_time: float = 0.0,
|
|
535
|
+
audio_duration: float | None = None,
|
|
536
|
+
) -> pxt.Video:
|
|
537
|
+
"""
|
|
538
|
+
Creates a new video that combines the video stream from `video` and the audio stream from `audio`.
|
|
539
|
+
The `start_time` and `duration` parameters can be used to select a specific time range from each input.
|
|
540
|
+
If the audio input (or selected time range) is longer than the video, the audio will be truncated.
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
__Requirements:__
|
|
544
|
+
|
|
545
|
+
- `ffmpeg` needs to be installed and in PATH
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
video: Input video.
|
|
549
|
+
audio: Input audio.
|
|
550
|
+
video_start_time: Start time in the video input (in seconds).
|
|
551
|
+
video_duration: Duration of video segment (in seconds). If None, uses the remainder of the video after
|
|
552
|
+
`video_start_time`. `video_duration` determines the duration of the output video.
|
|
553
|
+
audio_start_time: Start time in the audio input (in seconds).
|
|
554
|
+
audio_duration: Duration of audio segment (in seconds). If None, uses the remainder of the audio after
|
|
555
|
+
`audio_start_time`. If the audio is longer than the output video, it will be truncated.
|
|
556
|
+
|
|
557
|
+
Returns:
|
|
558
|
+
A new video file with the audio track added.
|
|
559
|
+
|
|
560
|
+
Examples:
|
|
561
|
+
Add background music to a video:
|
|
562
|
+
|
|
563
|
+
>>> tbl.select(tbl.video.with_audio(tbl.music_track)).collect()
|
|
564
|
+
|
|
565
|
+
Add audio starting 5 seconds into both files:
|
|
566
|
+
|
|
567
|
+
>>> tbl.select(
|
|
568
|
+
... tbl.video.with_audio(
|
|
569
|
+
... tbl.music_track,
|
|
570
|
+
... video_start_time=5.0,
|
|
571
|
+
... audio_start_time=5.0
|
|
572
|
+
... )
|
|
573
|
+
... ).collect()
|
|
574
|
+
|
|
575
|
+
Use a 10-second clip from the middle of both files:
|
|
576
|
+
|
|
577
|
+
>>> tbl.select(
|
|
578
|
+
... tbl.video.with_audio(
|
|
579
|
+
... tbl.music_track,
|
|
580
|
+
... video_start_time=30.0,
|
|
581
|
+
... video_duration=10.0,
|
|
582
|
+
... audio_start_time=15.0,
|
|
583
|
+
... audio_duration=10.0
|
|
584
|
+
... )
|
|
585
|
+
... ).collect()
|
|
586
|
+
"""
|
|
587
|
+
Env.get().require_binary('ffmpeg')
|
|
588
|
+
if video_start_time < 0:
|
|
589
|
+
raise pxt.Error(f'video_offset must be non-negative, got {video_start_time}')
|
|
590
|
+
if audio_start_time < 0:
|
|
591
|
+
raise pxt.Error(f'audio_offset must be non-negative, got {audio_start_time}')
|
|
592
|
+
if video_duration is not None and video_duration <= 0:
|
|
593
|
+
raise pxt.Error(f'video_duration must be positive, got {video_duration}')
|
|
594
|
+
if audio_duration is not None and audio_duration <= 0:
|
|
595
|
+
raise pxt.Error(f'audio_duration must be positive, got {audio_duration}')
|
|
596
|
+
|
|
597
|
+
output_path = str(TempStore.create_path(extension='.mp4'))
|
|
598
|
+
|
|
599
|
+
cmd = ['ffmpeg']
|
|
600
|
+
if video_start_time > 0:
|
|
601
|
+
# fast seek, must precede -i
|
|
602
|
+
cmd.extend(['-ss', str(video_start_time)])
|
|
603
|
+
if video_duration is not None:
|
|
604
|
+
cmd.extend(['-t', str(video_duration)])
|
|
605
|
+
else:
|
|
606
|
+
video_duration = av_utils.get_video_duration(video)
|
|
607
|
+
cmd.extend(['-i', str(video)])
|
|
608
|
+
|
|
609
|
+
if audio_start_time > 0:
|
|
610
|
+
cmd.extend(['-ss', str(audio_start_time)])
|
|
611
|
+
if audio_duration is not None:
|
|
612
|
+
cmd.extend(['-t', str(audio_duration)])
|
|
613
|
+
cmd.extend(['-i', str(audio)])
|
|
614
|
+
|
|
615
|
+
cmd.extend(
|
|
616
|
+
[
|
|
617
|
+
'-map',
|
|
618
|
+
'0:v:0', # video from first input
|
|
619
|
+
'-map',
|
|
620
|
+
'1:a:0', # audio from second input
|
|
621
|
+
'-c:v',
|
|
622
|
+
'copy', # avoid re-encoding
|
|
623
|
+
'-c:a',
|
|
624
|
+
'copy', # avoid re-encoding
|
|
625
|
+
'-t',
|
|
626
|
+
str(video_duration), # limit output duration to video duration
|
|
627
|
+
'-loglevel',
|
|
628
|
+
'error', # only show errors
|
|
629
|
+
output_path,
|
|
630
|
+
]
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
_logger.debug(f'with_audio(): {" ".join(cmd)}')
|
|
634
|
+
|
|
635
|
+
try:
|
|
636
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
637
|
+
output_file = pathlib.Path(output_path)
|
|
638
|
+
if not output_file.exists() or output_file.stat().st_size == 0:
|
|
639
|
+
stderr_output = result.stderr.strip() if result.stderr is not None else ''
|
|
640
|
+
raise pxt.Error(f'ffmpeg failed to create output file for commandline: {" ".join(cmd)}\n{stderr_output}')
|
|
641
|
+
return output_path
|
|
642
|
+
except subprocess.CalledProcessError as e:
|
|
643
|
+
_handle_ffmpeg_error(e)
|
|
644
|
+
|
|
645
|
+
|
|
532
646
|
@pxt.udf(is_method=True)
|
|
533
647
|
def overlay_text(
|
|
534
648
|
video: pxt.Video,
|
|
@@ -615,8 +729,7 @@ def overlay_text(
|
|
|
615
729
|
... )
|
|
616
730
|
... ).collect()
|
|
617
731
|
"""
|
|
618
|
-
|
|
619
|
-
raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use overlay_text().')
|
|
732
|
+
Env.get().require_binary('ffmpeg')
|
|
620
733
|
if font_size <= 0:
|
|
621
734
|
raise pxt.Error(f'font_size must be positive, got {font_size}')
|
|
622
735
|
if opacity < 0.0 or opacity > 1.0:
|
pixeltable/functions/whisperx.py
CHANGED
pixeltable/functions/yolox.py
CHANGED
pixeltable/globals.py
CHANGED
|
@@ -397,40 +397,54 @@ def create_snapshot(
|
|
|
397
397
|
)
|
|
398
398
|
|
|
399
399
|
|
|
400
|
-
def
|
|
401
|
-
destination: str,
|
|
400
|
+
def publish(
|
|
402
401
|
source: str | catalog.Table,
|
|
402
|
+
destination_uri: str,
|
|
403
403
|
bucket_name: str | None = None,
|
|
404
404
|
access: Literal['public', 'private'] = 'private',
|
|
405
|
-
) ->
|
|
405
|
+
) -> None:
|
|
406
406
|
"""
|
|
407
|
-
|
|
408
|
-
|
|
407
|
+
Publishes a replica of a local Pixeltable table to Pixeltable cloud. A given table can be published to at most one
|
|
408
|
+
URI per Pixeltable cloud database.
|
|
409
409
|
|
|
410
410
|
Args:
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
411
|
+
source: Path or table handle of the local table to be published.
|
|
412
|
+
destination_uri: Remote URI where the replica will be published, such as `'pxt://org_name/my_dir/my_table'`.
|
|
413
|
+
bucket_name: The name of the bucket to use to store replica's data. The bucket must be registered with
|
|
414
|
+
Pixeltable cloud. If no `bucket_name` is provided, the default storage bucket for the destination
|
|
415
|
+
database will be used.
|
|
416
416
|
access: Access control for the replica.
|
|
417
417
|
|
|
418
418
|
- `'public'`: Anyone can access this replica.
|
|
419
|
-
- `'private'`: Only the
|
|
419
|
+
- `'private'`: Only the host organization can access.
|
|
420
420
|
"""
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
421
|
+
if not destination_uri.startswith('pxt://'):
|
|
422
|
+
raise excs.Error("`destination_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
|
|
423
|
+
|
|
424
|
+
if isinstance(source, str):
|
|
425
|
+
source = get_table(source)
|
|
426
|
+
|
|
427
|
+
share.push_replica(destination_uri, source, bucket_name, access)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def replicate(remote_uri: str, local_path: str) -> catalog.Table:
|
|
431
|
+
"""
|
|
432
|
+
Retrieve a replica from Pixeltable cloud as a local table. This will create a full local copy of the replica in a
|
|
433
|
+
way that preserves the table structure of the original source data. Once replicated, the local table can be
|
|
434
|
+
queried offline just as any other Pixeltable table.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'`.
|
|
438
|
+
local_path: Local table path where the replica will be created, such as `'my_new_dir.my_new_tbl'`. It can be
|
|
439
|
+
the same or different from the cloud table name.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
A handle to the newly created local replica table.
|
|
443
|
+
"""
|
|
444
|
+
if not remote_uri.startswith('pxt://'):
|
|
445
|
+
raise excs.Error("`remote_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
|
|
446
|
+
|
|
447
|
+
return share.pull_replica(local_path, remote_uri)
|
|
434
448
|
|
|
435
449
|
|
|
436
450
|
def get_table(path: str) -> catalog.Table:
|
|
@@ -498,10 +512,11 @@ def move(path: str, new_path: str) -> None:
|
|
|
498
512
|
def drop_table(
|
|
499
513
|
table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
|
|
500
514
|
) -> None:
|
|
501
|
-
"""Drop a table, view, or
|
|
515
|
+
"""Drop a table, view, snapshot, or replica.
|
|
502
516
|
|
|
503
517
|
Args:
|
|
504
|
-
table: Fully qualified name
|
|
518
|
+
table: Fully qualified name or table handle of the table to be dropped; or a remote URI of a cloud replica to
|
|
519
|
+
be deleted.
|
|
505
520
|
force: If `True`, will also drop all views and sub-views of this table.
|
|
506
521
|
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
507
522
|
Must be one of the following:
|
|
@@ -541,13 +556,17 @@ def drop_table(
|
|
|
541
556
|
assert isinstance(table, str)
|
|
542
557
|
tbl_path = table
|
|
543
558
|
|
|
559
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
560
|
+
|
|
544
561
|
if tbl_path.startswith('pxt://'):
|
|
545
562
|
# Remote table
|
|
563
|
+
if force:
|
|
564
|
+
raise excs.Error('Cannot use `force=True` with a cloud replica URI.')
|
|
565
|
+
# TODO: Handle if_not_exists properly
|
|
546
566
|
share.delete_replica(tbl_path)
|
|
547
567
|
else:
|
|
548
568
|
# Local table
|
|
549
569
|
path_obj = catalog.Path.parse(tbl_path)
|
|
550
|
-
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
551
570
|
Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
|
|
552
571
|
|
|
553
572
|
|
|
@@ -763,15 +782,15 @@ def ls(path: str = '') -> pd.DataFrame:
|
|
|
763
782
|
base = md['base'] or ''
|
|
764
783
|
if base.startswith('_'):
|
|
765
784
|
base = '<anonymous base table>'
|
|
766
|
-
if md['
|
|
785
|
+
if md['is_replica']:
|
|
786
|
+
kind = 'replica'
|
|
787
|
+
elif md['is_snapshot']:
|
|
767
788
|
kind = 'snapshot'
|
|
768
789
|
elif md['is_view']:
|
|
769
790
|
kind = 'view'
|
|
770
791
|
else:
|
|
771
792
|
kind = 'table'
|
|
772
793
|
version = '' if kind == 'snapshot' else str(md['version'])
|
|
773
|
-
if md['is_replica']:
|
|
774
|
-
kind = f'{kind}-replica'
|
|
775
794
|
rows.append([name, kind, version, base])
|
|
776
795
|
return rows
|
|
777
796
|
|