buildai-cli 0.3.95__tar.gz → 0.3.97__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/PKG-INFO +1 -1
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/ego_frame_search.py +428 -6
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/pyproject.toml +1 -1
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/.gitignore +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/AGENTS.md +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/CLAUDE.md +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/buildai_bootstrap.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/__init__.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/_has_core.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/auth_local.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/__init__.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/api_proxy.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/auth.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/__init__.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/broker.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/common.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/migrate.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/query.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/schema.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/status.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/tunnel.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/dev.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/doctor.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/egoexo.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/gigcamera.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/grid.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/ingest.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/ingest_docs.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/processing.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/spec.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/spec_pr.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/config.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/console.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/context.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/db_broker.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/guard.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/internal_api.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/main.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/nl_query/__init__.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/nl_query/dataset_tools.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/ops_init.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/output.py +0 -0
- {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/pagination.py +0 -0
|
@@ -7,6 +7,7 @@ import json
|
|
|
7
7
|
import os
|
|
8
8
|
import subprocess
|
|
9
9
|
from dataclasses import asdict, is_dataclass
|
|
10
|
+
from pathlib import Path
|
|
10
11
|
from typing import Any
|
|
11
12
|
from urllib.parse import urlparse
|
|
12
13
|
from uuid import UUID
|
|
@@ -86,6 +87,37 @@ def _settings_for_command(ctx: typer.Context, *, write: bool) -> object:
|
|
|
86
87
|
return init_ops_context(ctx)
|
|
87
88
|
|
|
88
89
|
|
|
90
|
+
def _clip_ids_from_options(
|
|
91
|
+
values: list[UUID],
|
|
92
|
+
file_path: Path | None,
|
|
93
|
+
*,
|
|
94
|
+
limit: int | None = None,
|
|
95
|
+
) -> list[str]:
|
|
96
|
+
"""Return unique clip UUID strings from repeatable CLI values and a newline file."""
|
|
97
|
+
ordered: list[UUID] = list(values)
|
|
98
|
+
if file_path is not None:
|
|
99
|
+
for line_number, raw_line in enumerate(file_path.read_text().splitlines(), start=1):
|
|
100
|
+
text = raw_line.strip()
|
|
101
|
+
if not text or text.startswith("#"):
|
|
102
|
+
continue
|
|
103
|
+
try:
|
|
104
|
+
ordered.append(UUID(text))
|
|
105
|
+
except ValueError as exc:
|
|
106
|
+
raise typer.BadParameter(
|
|
107
|
+
f"invalid UUID on line {line_number} of {file_path}: {text}"
|
|
108
|
+
) from exc
|
|
109
|
+
seen: set[UUID] = set()
|
|
110
|
+
deduped: list[str] = []
|
|
111
|
+
for clip_id in ordered:
|
|
112
|
+
if clip_id in seen:
|
|
113
|
+
continue
|
|
114
|
+
seen.add(clip_id)
|
|
115
|
+
deduped.append(str(clip_id))
|
|
116
|
+
if limit is not None and len(deduped) >= limit:
|
|
117
|
+
break
|
|
118
|
+
return deduped
|
|
119
|
+
|
|
120
|
+
|
|
89
121
|
def _require_internal_admin_for_write(ctx: typer.Context, *, write: bool) -> None:
|
|
90
122
|
"""Require the explicit admin profile before direct DB writes."""
|
|
91
123
|
if not write:
|
|
@@ -869,6 +901,47 @@ def membership_program(
|
|
|
869
901
|
asyncio.run(run())
|
|
870
902
|
|
|
871
903
|
|
|
904
|
+
@app.command("membership-bucket")
|
|
905
|
+
def membership_bucket(
|
|
906
|
+
ctx: typer.Context,
|
|
907
|
+
corpus_key: str = typer.Option(..., "--corpus-key", help="Target corpus key."),
|
|
908
|
+
bucket_uri_prefix: str = typer.Option(..., "--bucket-uri-prefix", help="GCS URI prefix."),
|
|
909
|
+
run_id: str = typer.Option("manual", "--run-id", help="Membership materialization run id."),
|
|
910
|
+
min_source_video_size_bytes: int | None = typer.Option(
|
|
911
|
+
None,
|
|
912
|
+
"--min-source-video-size-bytes",
|
|
913
|
+
min=1,
|
|
914
|
+
help="Require source videos to be larger than this byte count.",
|
|
915
|
+
),
|
|
916
|
+
limit: int | None = typer.Option(None, "--limit", min=1, help="Cap candidate clips."),
|
|
917
|
+
write: bool = typer.Option(False, "--write", help="Insert membership rows."),
|
|
918
|
+
format: Format = format_option(),
|
|
919
|
+
) -> None:
|
|
920
|
+
"""Dry-run or materialize corpus membership from video assets in a GCS bucket prefix."""
|
|
921
|
+
_require_internal_admin_for_write(ctx, write=write)
|
|
922
|
+
settings = _settings_for_command(ctx, write=write)
|
|
923
|
+
|
|
924
|
+
async def run() -> None:
|
|
925
|
+
from dal.embeddings import ego_frame_search as ego_search_dal
|
|
926
|
+
|
|
927
|
+
async with get_cli_context(settings, profile=(ctx.obj or {}).get("cli_profile")) as (
|
|
928
|
+
_db,
|
|
929
|
+
dal_ctx,
|
|
930
|
+
):
|
|
931
|
+
summary = await ego_search_dal.materialize_bucket_membership(
|
|
932
|
+
dal_ctx,
|
|
933
|
+
corpus_key=corpus_key,
|
|
934
|
+
bucket_uri_prefix=bucket_uri_prefix,
|
|
935
|
+
membership_run_id=run_id,
|
|
936
|
+
min_source_video_size_bytes=min_source_video_size_bytes,
|
|
937
|
+
limit=limit,
|
|
938
|
+
write=write,
|
|
939
|
+
)
|
|
940
|
+
render(_plain(summary), format=format)
|
|
941
|
+
|
|
942
|
+
asyncio.run(run())
|
|
943
|
+
|
|
944
|
+
|
|
872
945
|
@app.command("membership-index-space")
|
|
873
946
|
def membership_index_space(
|
|
874
947
|
ctx: typer.Context,
|
|
@@ -935,6 +1008,65 @@ def membership_gigcamera(
|
|
|
935
1008
|
asyncio.run(run())
|
|
936
1009
|
|
|
937
1010
|
|
|
1011
|
+
@app.command("sampled-frames-from-vectors")
|
|
1012
|
+
def sampled_frames_from_vectors(
|
|
1013
|
+
ctx: typer.Context,
|
|
1014
|
+
frame_uri_prefix: str = typer.Option(
|
|
1015
|
+
...,
|
|
1016
|
+
"--frame-uri-prefix",
|
|
1017
|
+
help="Frame-jpg GCS URI prefix to materialize as sampled frames.",
|
|
1018
|
+
),
|
|
1019
|
+
clip_id: list[UUID] = typer.Option(
|
|
1020
|
+
[],
|
|
1021
|
+
"--clip-id",
|
|
1022
|
+
help="Core clip id to include. Repeatable.",
|
|
1023
|
+
),
|
|
1024
|
+
clip_id_file: Path | None = typer.Option(
|
|
1025
|
+
None,
|
|
1026
|
+
"--clip-id-file",
|
|
1027
|
+
exists=True,
|
|
1028
|
+
file_okay=True,
|
|
1029
|
+
dir_okay=False,
|
|
1030
|
+
readable=True,
|
|
1031
|
+
help="Newline-delimited core clip ids to include.",
|
|
1032
|
+
),
|
|
1033
|
+
space_id: int | None = typer.Option(None, "--space-id", help="Target embedding space id."),
|
|
1034
|
+
space_key: str | None = typer.Option(None, "--space-key"),
|
|
1035
|
+
selection_kind: str = typer.Option("near_90s", "--selection-kind"),
|
|
1036
|
+
limit: int | None = typer.Option(None, "--limit", min=1, help="Cap candidate clips."),
|
|
1037
|
+
write: bool = typer.Option(False, "--write", help="Insert missing sampled-frame rows."),
|
|
1038
|
+
format: Format = format_option(),
|
|
1039
|
+
) -> None:
|
|
1040
|
+
"""Dry-run or materialize sampled-frame anchors from existing frame vectors."""
|
|
1041
|
+
_require_internal_admin_for_write(ctx, write=write)
|
|
1042
|
+
settings = _settings_for_command(ctx, write=write)
|
|
1043
|
+
|
|
1044
|
+
async def run() -> None:
|
|
1045
|
+
from dal.embeddings import ego_frame_search as ego_search_dal
|
|
1046
|
+
|
|
1047
|
+
async with get_cli_context(settings, profile=(ctx.obj or {}).get("cli_profile")) as (
|
|
1048
|
+
_db,
|
|
1049
|
+
dal_ctx,
|
|
1050
|
+
):
|
|
1051
|
+
target_space_id = await _resolve_space_id(
|
|
1052
|
+
dal_ctx,
|
|
1053
|
+
space_id=space_id,
|
|
1054
|
+
space_key=space_key,
|
|
1055
|
+
)
|
|
1056
|
+
summary = await ego_search_dal.materialize_sampled_frames_from_existing_vectors(
|
|
1057
|
+
dal_ctx,
|
|
1058
|
+
target_space_id=target_space_id,
|
|
1059
|
+
frame_uri_prefix=frame_uri_prefix,
|
|
1060
|
+
clip_ids=_clip_ids_from_options(clip_id, clip_id_file, limit=None),
|
|
1061
|
+
selection_kind=selection_kind,
|
|
1062
|
+
limit=limit,
|
|
1063
|
+
write=write,
|
|
1064
|
+
)
|
|
1065
|
+
render(_plain(summary), format=format)
|
|
1066
|
+
|
|
1067
|
+
asyncio.run(run())
|
|
1068
|
+
|
|
1069
|
+
|
|
938
1070
|
@app.command("membership-dataset")
|
|
939
1071
|
def membership_dataset(
|
|
940
1072
|
ctx: typer.Context,
|
|
@@ -1165,6 +1297,11 @@ def queue_missing_label_text_embeddings(
|
|
|
1165
1297
|
),
|
|
1166
1298
|
model_config_hash: str | None = typer.Option(None, "--model-config-hash"),
|
|
1167
1299
|
limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected labels."),
|
|
1300
|
+
required_asset_uri_prefix: list[str] = typer.Option(
|
|
1301
|
+
[],
|
|
1302
|
+
"--required-asset-uri-prefix",
|
|
1303
|
+
help="Require selected frame assets to start with this GCS URI prefix. Repeatable.",
|
|
1304
|
+
),
|
|
1168
1305
|
idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
|
|
1169
1306
|
max_parallel_requests: int = typer.Option(4, "--max-parallel-requests", min=1),
|
|
1170
1307
|
write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
|
|
@@ -1204,6 +1341,8 @@ def queue_missing_label_text_embeddings(
|
|
|
1204
1341
|
"only_missing_text_embeddings": True,
|
|
1205
1342
|
"max_parallel_requests_per_worker": max_parallel_requests,
|
|
1206
1343
|
}
|
|
1344
|
+
if required_asset_uri_prefix:
|
|
1345
|
+
selection_spec["required_asset_uri_prefixes"] = required_asset_uri_prefix
|
|
1207
1346
|
sink_overrides = {
|
|
1208
1347
|
"embedding": {
|
|
1209
1348
|
"space_id": resolved_text_space_id,
|
|
@@ -1373,11 +1512,35 @@ def build_scann(
|
|
|
1373
1512
|
@app.command("queue-missing-embeddings")
|
|
1374
1513
|
def queue_missing_embeddings(
|
|
1375
1514
|
ctx: typer.Context,
|
|
1376
|
-
corpus_key: list[str] = typer.Option(
|
|
1515
|
+
corpus_key: list[str] = typer.Option([], "--corpus-key", help="Corpus key. Repeatable."),
|
|
1516
|
+
clip_id: list[UUID] = typer.Option(
|
|
1517
|
+
[],
|
|
1518
|
+
"--clip-id",
|
|
1519
|
+
help="Core clip id to include. Repeatable; bypasses corpus membership when set.",
|
|
1520
|
+
),
|
|
1521
|
+
clip_id_file: Path | None = typer.Option(
|
|
1522
|
+
None,
|
|
1523
|
+
"--clip-id-file",
|
|
1524
|
+
exists=True,
|
|
1525
|
+
dir_okay=False,
|
|
1526
|
+
readable=True,
|
|
1527
|
+
help="Newline-delimited core clip ids to include.",
|
|
1528
|
+
),
|
|
1377
1529
|
space_id: int | None = typer.Option(None, "--space-id", help="Target embedding space id."),
|
|
1378
1530
|
space_key: str | None = typer.Option(_INDEX_QWEN_EGO_SPACE_KEY, "--space-key"),
|
|
1379
1531
|
selection_kind: str = typer.Option("near_90s", "--selection-kind"),
|
|
1380
1532
|
limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected frame rows."),
|
|
1533
|
+
required_asset_uri_prefix: list[str] = typer.Option(
|
|
1534
|
+
[],
|
|
1535
|
+
"--required-asset-uri-prefix",
|
|
1536
|
+
help="Require selected frame assets to start with this GCS URI prefix. Repeatable.",
|
|
1537
|
+
),
|
|
1538
|
+
min_source_video_size_bytes: int | None = typer.Option(
|
|
1539
|
+
None,
|
|
1540
|
+
"--min-source-video-size-bytes",
|
|
1541
|
+
min=1,
|
|
1542
|
+
help="Require selected clips to have a source video larger than this byte count.",
|
|
1543
|
+
),
|
|
1381
1544
|
idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
|
|
1382
1545
|
cost_cap_usd: float | None = typer.Option(None, "--cost-cap-usd", min=0.0),
|
|
1383
1546
|
write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
|
|
@@ -1387,6 +1550,93 @@ def queue_missing_embeddings(
|
|
|
1387
1550
|
_require_internal_admin_for_write(ctx, write=write)
|
|
1388
1551
|
settings = _settings_for_command(ctx, write=write)
|
|
1389
1552
|
|
|
1553
|
+
async def run() -> None:
|
|
1554
|
+
from dal.processing import media_jobs
|
|
1555
|
+
|
|
1556
|
+
async with get_cli_context(settings, profile=(ctx.obj or {}).get("cli_profile")) as (
|
|
1557
|
+
_db,
|
|
1558
|
+
dal_ctx,
|
|
1559
|
+
):
|
|
1560
|
+
target_space_id = await _resolve_space_id(
|
|
1561
|
+
dal_ctx,
|
|
1562
|
+
space_id=space_id,
|
|
1563
|
+
space_key=space_key,
|
|
1564
|
+
)
|
|
1565
|
+
clip_ids = _clip_ids_from_options(clip_id, clip_id_file, limit=limit)
|
|
1566
|
+
if clip_ids:
|
|
1567
|
+
selection_spec = {
|
|
1568
|
+
"kind": "by_clip_ids",
|
|
1569
|
+
"clip_ids": clip_ids,
|
|
1570
|
+
"selection_kind": selection_kind,
|
|
1571
|
+
"limit": None,
|
|
1572
|
+
"only_missing_vectors": True,
|
|
1573
|
+
}
|
|
1574
|
+
else:
|
|
1575
|
+
if not corpus_key:
|
|
1576
|
+
raise typer.BadParameter("provide --corpus-key or --clip-id/--clip-id-file")
|
|
1577
|
+
selection_spec = {
|
|
1578
|
+
"kind": "by_corpus",
|
|
1579
|
+
"corpus_keys": corpus_key,
|
|
1580
|
+
"selection_kind": selection_kind,
|
|
1581
|
+
"limit": limit,
|
|
1582
|
+
"only_missing_vectors": True,
|
|
1583
|
+
}
|
|
1584
|
+
if required_asset_uri_prefix:
|
|
1585
|
+
selection_spec["required_asset_uri_prefixes"] = required_asset_uri_prefix
|
|
1586
|
+
if min_source_video_size_bytes is not None:
|
|
1587
|
+
selection_spec["min_source_video_size_bytes"] = min_source_video_size_bytes
|
|
1588
|
+
sink_overrides = {"embedding": {"space_id": target_space_id}}
|
|
1589
|
+
resource_overrides = {"cost_cap_usd": cost_cap_usd} if cost_cap_usd is not None else {}
|
|
1590
|
+
if write:
|
|
1591
|
+
manifest = await media_jobs.queue_processor_job(
|
|
1592
|
+
dal_ctx,
|
|
1593
|
+
processor_ref=media_jobs.EMBED_FRAMES_PROCESSOR_REF,
|
|
1594
|
+
selection_spec=selection_spec,
|
|
1595
|
+
sink_overrides=sink_overrides,
|
|
1596
|
+
resource_overrides=resource_overrides,
|
|
1597
|
+
submitted_by_principal=_SUBMITTED_BY,
|
|
1598
|
+
idempotency_key=idempotency_key,
|
|
1599
|
+
)
|
|
1600
|
+
else:
|
|
1601
|
+
manifest = {
|
|
1602
|
+
"dry_run": True,
|
|
1603
|
+
"processor_ref": media_jobs.EMBED_FRAMES_PROCESSOR_REF,
|
|
1604
|
+
"selection_spec": selection_spec,
|
|
1605
|
+
"sink_overrides": sink_overrides,
|
|
1606
|
+
"resource_overrides": resource_overrides,
|
|
1607
|
+
}
|
|
1608
|
+
render(manifest, format=format)
|
|
1609
|
+
|
|
1610
|
+
asyncio.run(run())
|
|
1611
|
+
|
|
1612
|
+
|
|
1613
|
+
@app.command("queue-bucket-missing-embeddings")
|
|
1614
|
+
def queue_bucket_missing_embeddings(
|
|
1615
|
+
ctx: typer.Context,
|
|
1616
|
+
required_asset_uri_prefix: list[str] = typer.Option(
|
|
1617
|
+
...,
|
|
1618
|
+
"--required-asset-uri-prefix",
|
|
1619
|
+
help="Require selected frame assets to start with this GCS URI prefix. Repeatable.",
|
|
1620
|
+
),
|
|
1621
|
+
space_id: int | None = typer.Option(None, "--space-id", help="Target embedding space id."),
|
|
1622
|
+
space_key: str | None = typer.Option(_INDEX_QWEN_EGO_SPACE_KEY, "--space-key"),
|
|
1623
|
+
selection_kind: str = typer.Option("near_90s", "--selection-kind"),
|
|
1624
|
+
min_source_video_size_bytes: int | None = typer.Option(
|
|
1625
|
+
None,
|
|
1626
|
+
"--min-source-video-size-bytes",
|
|
1627
|
+
min=1,
|
|
1628
|
+
help="Require selected clips to have a source video larger than this byte count.",
|
|
1629
|
+
),
|
|
1630
|
+
limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected frame rows."),
|
|
1631
|
+
idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
|
|
1632
|
+
cost_cap_usd: float | None = typer.Option(None, "--cost-cap-usd", min=0.0),
|
|
1633
|
+
write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
|
|
1634
|
+
format: Format = format_option(),
|
|
1635
|
+
) -> None:
|
|
1636
|
+
"""Queue missing-only frame embeddings directly from a bucket-scoped frame set."""
|
|
1637
|
+
_require_internal_admin_for_write(ctx, write=write)
|
|
1638
|
+
settings = _settings_for_command(ctx, write=write)
|
|
1639
|
+
|
|
1390
1640
|
async def run() -> None:
|
|
1391
1641
|
from dal.processing import media_jobs
|
|
1392
1642
|
|
|
@@ -1400,12 +1650,14 @@ def queue_missing_embeddings(
|
|
|
1400
1650
|
space_key=space_key,
|
|
1401
1651
|
)
|
|
1402
1652
|
selection_spec = {
|
|
1403
|
-
"kind": "
|
|
1404
|
-
"corpus_keys": corpus_key,
|
|
1653
|
+
"kind": "by_bucket",
|
|
1405
1654
|
"selection_kind": selection_kind,
|
|
1406
1655
|
"limit": limit,
|
|
1407
1656
|
"only_missing_vectors": True,
|
|
1657
|
+
"required_asset_uri_prefixes": required_asset_uri_prefix,
|
|
1408
1658
|
}
|
|
1659
|
+
if min_source_video_size_bytes is not None:
|
|
1660
|
+
selection_spec["min_source_video_size_bytes"] = min_source_video_size_bytes
|
|
1409
1661
|
sink_overrides = {"embedding": {"space_id": target_space_id}}
|
|
1410
1662
|
resource_overrides = {"cost_cap_usd": cost_cap_usd} if cost_cap_usd is not None else {}
|
|
1411
1663
|
if write:
|
|
@@ -1434,9 +1686,55 @@ def queue_missing_embeddings(
|
|
|
1434
1686
|
@app.command("queue-missing-sampled-frames")
|
|
1435
1687
|
def queue_missing_sampled_frames(
|
|
1436
1688
|
ctx: typer.Context,
|
|
1437
|
-
corpus_key: list[str] = typer.Option(
|
|
1689
|
+
corpus_key: list[str] = typer.Option([], "--corpus-key", help="Corpus key. Repeatable."),
|
|
1690
|
+
clip_id: list[UUID] = typer.Option(
|
|
1691
|
+
[],
|
|
1692
|
+
"--clip-id",
|
|
1693
|
+
help="Core clip id to include. Repeatable; bypasses corpus membership when set.",
|
|
1694
|
+
),
|
|
1695
|
+
clip_id_file: Path | None = typer.Option(
|
|
1696
|
+
None,
|
|
1697
|
+
"--clip-id-file",
|
|
1698
|
+
exists=True,
|
|
1699
|
+
dir_okay=False,
|
|
1700
|
+
readable=True,
|
|
1701
|
+
help="Newline-delimited core clip ids to include.",
|
|
1702
|
+
),
|
|
1438
1703
|
selection_kind: str = typer.Option("near_90s", "--selection-kind"),
|
|
1439
1704
|
limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected clips."),
|
|
1705
|
+
required_asset_uri_prefix: list[str] = typer.Option(
|
|
1706
|
+
[],
|
|
1707
|
+
"--required-asset-uri-prefix",
|
|
1708
|
+
help="Require selected video assets to start with this GCS URI prefix. Repeatable.",
|
|
1709
|
+
),
|
|
1710
|
+
min_source_video_size_bytes: int | None = typer.Option(
|
|
1711
|
+
None,
|
|
1712
|
+
"--min-source-video-size-bytes",
|
|
1713
|
+
min=1,
|
|
1714
|
+
help="Require selected source videos to be larger than this byte count.",
|
|
1715
|
+
),
|
|
1716
|
+
num_frames: int | None = typer.Option(
|
|
1717
|
+
1,
|
|
1718
|
+
"--num-frames",
|
|
1719
|
+
min=1,
|
|
1720
|
+
help="Override extractor frame count for this manifest.",
|
|
1721
|
+
),
|
|
1722
|
+
single_frame_index: int | None = typer.Option(
|
|
1723
|
+
5,
|
|
1724
|
+
"--single-frame-index",
|
|
1725
|
+
min=0,
|
|
1726
|
+
help="Frame index to use when extracting one midpoint frame.",
|
|
1727
|
+
),
|
|
1728
|
+
use_remote_gcs_seek: bool = typer.Option(
|
|
1729
|
+
True,
|
|
1730
|
+
"--remote-gcs-seek/--full-download",
|
|
1731
|
+
help="Use authenticated GCS HTTP range reads for ffmpeg input.",
|
|
1732
|
+
),
|
|
1733
|
+
ffmpeg_scale_flags: str | None = typer.Option(
|
|
1734
|
+
"fast_bilinear",
|
|
1735
|
+
"--ffmpeg-scale-flags",
|
|
1736
|
+
help="ffmpeg scale filter flags for p104-compatible frame pixels.",
|
|
1737
|
+
),
|
|
1440
1738
|
idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
|
|
1441
1739
|
write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
|
|
1442
1740
|
format: Format = format_option(),
|
|
@@ -1445,6 +1743,113 @@ def queue_missing_sampled_frames(
|
|
|
1445
1743
|
_require_internal_admin_for_write(ctx, write=write)
|
|
1446
1744
|
settings = _settings_for_command(ctx, write=write)
|
|
1447
1745
|
|
|
1746
|
+
async def run() -> None:
|
|
1747
|
+
from dal.processing import media_jobs
|
|
1748
|
+
|
|
1749
|
+
async with get_cli_context(settings, profile=(ctx.obj or {}).get("cli_profile")) as (
|
|
1750
|
+
_db,
|
|
1751
|
+
dal_ctx,
|
|
1752
|
+
):
|
|
1753
|
+
clip_ids = _clip_ids_from_options(clip_id, clip_id_file, limit=limit)
|
|
1754
|
+
if clip_ids:
|
|
1755
|
+
selection_spec = {
|
|
1756
|
+
"kind": "by_ids",
|
|
1757
|
+
"ids": clip_ids,
|
|
1758
|
+
"selection_kind": selection_kind,
|
|
1759
|
+
"limit": None,
|
|
1760
|
+
"only_missing_sampled_frames": True,
|
|
1761
|
+
"require_zero_frames": False,
|
|
1762
|
+
}
|
|
1763
|
+
else:
|
|
1764
|
+
if not corpus_key:
|
|
1765
|
+
raise typer.BadParameter("provide --corpus-key or --clip-id/--clip-id-file")
|
|
1766
|
+
selection_spec = {
|
|
1767
|
+
"kind": "by_corpus",
|
|
1768
|
+
"corpus_keys": corpus_key,
|
|
1769
|
+
"selection_kind": selection_kind,
|
|
1770
|
+
"limit": limit,
|
|
1771
|
+
"only_missing_sampled_frames": True,
|
|
1772
|
+
"require_zero_frames": False,
|
|
1773
|
+
}
|
|
1774
|
+
if required_asset_uri_prefix:
|
|
1775
|
+
selection_spec["required_asset_uri_prefixes"] = required_asset_uri_prefix
|
|
1776
|
+
if min_source_video_size_bytes is not None:
|
|
1777
|
+
selection_spec["min_source_video_size_bytes"] = min_source_video_size_bytes
|
|
1778
|
+
if num_frames is not None:
|
|
1779
|
+
selection_spec["num_frames"] = num_frames
|
|
1780
|
+
if single_frame_index is not None:
|
|
1781
|
+
selection_spec["single_frame_index"] = single_frame_index
|
|
1782
|
+
if use_remote_gcs_seek:
|
|
1783
|
+
selection_spec["use_remote_gcs_seek"] = True
|
|
1784
|
+
if ffmpeg_scale_flags:
|
|
1785
|
+
selection_spec["ffmpeg_scale_flags"] = ffmpeg_scale_flags
|
|
1786
|
+
if write:
|
|
1787
|
+
manifest = await media_jobs.queue_processor_job(
|
|
1788
|
+
dal_ctx,
|
|
1789
|
+
processor_ref=media_jobs.EXTRACT_FRAMES_PROCESSOR_REF,
|
|
1790
|
+
selection_spec=selection_spec,
|
|
1791
|
+
sink_overrides=None,
|
|
1792
|
+
resource_overrides=None,
|
|
1793
|
+
submitted_by_principal=_SUBMITTED_BY,
|
|
1794
|
+
idempotency_key=idempotency_key,
|
|
1795
|
+
)
|
|
1796
|
+
else:
|
|
1797
|
+
manifest = {
|
|
1798
|
+
"dry_run": True,
|
|
1799
|
+
"processor_ref": media_jobs.EXTRACT_FRAMES_PROCESSOR_REF,
|
|
1800
|
+
"selection_spec": selection_spec,
|
|
1801
|
+
}
|
|
1802
|
+
render(manifest, format=format)
|
|
1803
|
+
|
|
1804
|
+
asyncio.run(run())
|
|
1805
|
+
|
|
1806
|
+
|
|
1807
|
+
@app.command("queue-bucket-missing-sampled-frames")
|
|
1808
|
+
def queue_bucket_missing_sampled_frames(
|
|
1809
|
+
ctx: typer.Context,
|
|
1810
|
+
required_asset_uri_prefix: list[str] = typer.Option(
|
|
1811
|
+
...,
|
|
1812
|
+
"--required-asset-uri-prefix",
|
|
1813
|
+
help="Require selected video assets to start with this GCS URI prefix. Repeatable.",
|
|
1814
|
+
),
|
|
1815
|
+
selection_kind: str = typer.Option("near_90s", "--selection-kind"),
|
|
1816
|
+
min_source_video_size_bytes: int | None = typer.Option(
|
|
1817
|
+
None,
|
|
1818
|
+
"--min-source-video-size-bytes",
|
|
1819
|
+
min=1,
|
|
1820
|
+
help="Require selected source videos to be larger than this byte count.",
|
|
1821
|
+
),
|
|
1822
|
+
num_frames: int | None = typer.Option(
|
|
1823
|
+
1,
|
|
1824
|
+
"--num-frames",
|
|
1825
|
+
min=1,
|
|
1826
|
+
help="Override extractor frame count for this manifest.",
|
|
1827
|
+
),
|
|
1828
|
+
single_frame_index: int | None = typer.Option(
|
|
1829
|
+
5,
|
|
1830
|
+
"--single-frame-index",
|
|
1831
|
+
min=0,
|
|
1832
|
+
help="Frame index to use when extracting one midpoint frame.",
|
|
1833
|
+
),
|
|
1834
|
+
use_remote_gcs_seek: bool = typer.Option(
|
|
1835
|
+
True,
|
|
1836
|
+
"--remote-gcs-seek/--full-download",
|
|
1837
|
+
help="Use authenticated GCS HTTP range reads for ffmpeg input.",
|
|
1838
|
+
),
|
|
1839
|
+
ffmpeg_scale_flags: str | None = typer.Option(
|
|
1840
|
+
"fast_bilinear",
|
|
1841
|
+
"--ffmpeg-scale-flags",
|
|
1842
|
+
help="ffmpeg scale filter flags for p104-compatible frame pixels.",
|
|
1843
|
+
),
|
|
1844
|
+
limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected clips."),
|
|
1845
|
+
idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
|
|
1846
|
+
write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
|
|
1847
|
+
format: Format = format_option(),
|
|
1848
|
+
) -> None:
|
|
1849
|
+
"""Queue missing sampled-frame extraction directly from a bucket-scoped video set."""
|
|
1850
|
+
_require_internal_admin_for_write(ctx, write=write)
|
|
1851
|
+
settings = _settings_for_command(ctx, write=write)
|
|
1852
|
+
|
|
1448
1853
|
async def run() -> None:
|
|
1449
1854
|
from dal.processing import media_jobs
|
|
1450
1855
|
|
|
@@ -1453,13 +1858,23 @@ def queue_missing_sampled_frames(
|
|
|
1453
1858
|
dal_ctx,
|
|
1454
1859
|
):
|
|
1455
1860
|
selection_spec = {
|
|
1456
|
-
"kind": "
|
|
1457
|
-
"corpus_keys": corpus_key,
|
|
1861
|
+
"kind": "by_bucket",
|
|
1458
1862
|
"selection_kind": selection_kind,
|
|
1459
1863
|
"limit": limit,
|
|
1460
1864
|
"only_missing_sampled_frames": True,
|
|
1461
1865
|
"require_zero_frames": False,
|
|
1866
|
+
"required_asset_uri_prefixes": required_asset_uri_prefix,
|
|
1462
1867
|
}
|
|
1868
|
+
if min_source_video_size_bytes is not None:
|
|
1869
|
+
selection_spec["min_source_video_size_bytes"] = min_source_video_size_bytes
|
|
1870
|
+
if num_frames is not None:
|
|
1871
|
+
selection_spec["num_frames"] = num_frames
|
|
1872
|
+
if single_frame_index is not None:
|
|
1873
|
+
selection_spec["single_frame_index"] = single_frame_index
|
|
1874
|
+
if use_remote_gcs_seek:
|
|
1875
|
+
selection_spec["use_remote_gcs_seek"] = True
|
|
1876
|
+
if ffmpeg_scale_flags:
|
|
1877
|
+
selection_spec["ffmpeg_scale_flags"] = ffmpeg_scale_flags
|
|
1463
1878
|
if write:
|
|
1464
1879
|
manifest = await media_jobs.queue_processor_job(
|
|
1465
1880
|
dal_ctx,
|
|
@@ -1498,6 +1913,11 @@ def queue_missing_demian_labels(
|
|
|
1498
1913
|
source_id: str = typer.Option("demian", "--source-id", help="Non-secret label source alias."),
|
|
1499
1914
|
endpoint_url: str | None = typer.Option(None, "--endpoint-url"),
|
|
1500
1915
|
limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected frames."),
|
|
1916
|
+
required_asset_uri_prefix: list[str] = typer.Option(
|
|
1917
|
+
[],
|
|
1918
|
+
"--required-asset-uri-prefix",
|
|
1919
|
+
help="Require selected frame assets to start with this GCS URI prefix. Repeatable.",
|
|
1920
|
+
),
|
|
1501
1921
|
idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
|
|
1502
1922
|
cost_cap_usd: float | None = typer.Option(None, "--cost-cap-usd", min=0.0),
|
|
1503
1923
|
estimated_cost_per_label_usd: float = typer.Option(
|
|
@@ -1549,6 +1969,8 @@ def queue_missing_demian_labels(
|
|
|
1549
1969
|
"endpoint_timeout_sec": endpoint_timeout_sec,
|
|
1550
1970
|
"allow_external_bearer": allow_external_bearer,
|
|
1551
1971
|
}
|
|
1972
|
+
if required_asset_uri_prefix:
|
|
1973
|
+
selection_spec["required_asset_uri_prefixes"] = required_asset_uri_prefix
|
|
1552
1974
|
sink_overrides = {
|
|
1553
1975
|
"observation": {
|
|
1554
1976
|
"table": "observations.frame_observations",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|