buildai-cli 0.3.95__tar.gz → 0.3.97__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/PKG-INFO +1 -1
  2. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/ego_frame_search.py +428 -6
  3. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/pyproject.toml +1 -1
  4. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/.gitignore +0 -0
  5. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/AGENTS.md +0 -0
  6. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/CLAUDE.md +0 -0
  7. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/buildai_bootstrap.py +0 -0
  8. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/__init__.py +0 -0
  9. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/_has_core.py +0 -0
  10. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/auth_local.py +0 -0
  11. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/__init__.py +0 -0
  12. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/api_proxy.py +0 -0
  13. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/auth.py +0 -0
  14. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/__init__.py +0 -0
  15. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/broker.py +0 -0
  16. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/common.py +0 -0
  17. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/migrate.py +0 -0
  18. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/query.py +0 -0
  19. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/schema.py +0 -0
  20. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/status.py +0 -0
  21. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/db/tunnel.py +0 -0
  22. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/dev.py +0 -0
  23. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/doctor.py +0 -0
  24. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/egoexo.py +0 -0
  25. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/gigcamera.py +0 -0
  26. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/grid.py +0 -0
  27. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/ingest.py +0 -0
  28. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/ingest_docs.py +0 -0
  29. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/processing.py +0 -0
  30. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/spec.py +0 -0
  31. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/commands/spec_pr.py +0 -0
  32. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/config.py +0 -0
  33. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/console.py +0 -0
  34. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/context.py +0 -0
  35. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/db_broker.py +0 -0
  36. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/guard.py +0 -0
  37. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/internal_api.py +0 -0
  38. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/main.py +0 -0
  39. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/nl_query/__init__.py +0 -0
  40. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/nl_query/dataset_tools.py +0 -0
  41. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/ops_init.py +0 -0
  42. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/output.py +0 -0
  43. {buildai_cli-0.3.95 → buildai_cli-0.3.97}/cli/pagination.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: buildai-cli
3
- Version: 0.3.95
3
+ Version: 0.3.97
4
4
  Summary: Build AI CLI (Typer)
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: httpx>=0.27.0
@@ -7,6 +7,7 @@ import json
7
7
  import os
8
8
  import subprocess
9
9
  from dataclasses import asdict, is_dataclass
10
+ from pathlib import Path
10
11
  from typing import Any
11
12
  from urllib.parse import urlparse
12
13
  from uuid import UUID
@@ -86,6 +87,37 @@ def _settings_for_command(ctx: typer.Context, *, write: bool) -> object:
86
87
  return init_ops_context(ctx)
87
88
 
88
89
 
90
+ def _clip_ids_from_options(
91
+ values: list[UUID],
92
+ file_path: Path | None,
93
+ *,
94
+ limit: int | None = None,
95
+ ) -> list[str]:
96
+ """Return unique clip UUID strings from repeatable CLI values and a newline file."""
97
+ ordered: list[UUID] = list(values)
98
+ if file_path is not None:
99
+ for line_number, raw_line in enumerate(file_path.read_text().splitlines(), start=1):
100
+ text = raw_line.strip()
101
+ if not text or text.startswith("#"):
102
+ continue
103
+ try:
104
+ ordered.append(UUID(text))
105
+ except ValueError as exc:
106
+ raise typer.BadParameter(
107
+ f"invalid UUID on line {line_number} of {file_path}: {text}"
108
+ ) from exc
109
+ seen: set[UUID] = set()
110
+ deduped: list[str] = []
111
+ for clip_id in ordered:
112
+ if clip_id in seen:
113
+ continue
114
+ seen.add(clip_id)
115
+ deduped.append(str(clip_id))
116
+ if limit is not None and len(deduped) >= limit:
117
+ break
118
+ return deduped
119
+
120
+
89
121
  def _require_internal_admin_for_write(ctx: typer.Context, *, write: bool) -> None:
90
122
  """Require the explicit admin profile before direct DB writes."""
91
123
  if not write:
@@ -869,6 +901,47 @@ def membership_program(
869
901
  asyncio.run(run())
870
902
 
871
903
 
904
+ @app.command("membership-bucket")
905
+ def membership_bucket(
906
+ ctx: typer.Context,
907
+ corpus_key: str = typer.Option(..., "--corpus-key", help="Target corpus key."),
908
+ bucket_uri_prefix: str = typer.Option(..., "--bucket-uri-prefix", help="GCS URI prefix."),
909
+ run_id: str = typer.Option("manual", "--run-id", help="Membership materialization run id."),
910
+ min_source_video_size_bytes: int | None = typer.Option(
911
+ None,
912
+ "--min-source-video-size-bytes",
913
+ min=1,
914
+ help="Require source videos to be larger than this byte count.",
915
+ ),
916
+ limit: int | None = typer.Option(None, "--limit", min=1, help="Cap candidate clips."),
917
+ write: bool = typer.Option(False, "--write", help="Insert membership rows."),
918
+ format: Format = format_option(),
919
+ ) -> None:
920
+ """Dry-run or materialize corpus membership from video assets in a GCS bucket prefix."""
921
+ _require_internal_admin_for_write(ctx, write=write)
922
+ settings = _settings_for_command(ctx, write=write)
923
+
924
+ async def run() -> None:
925
+ from dal.embeddings import ego_frame_search as ego_search_dal
926
+
927
+ async with get_cli_context(settings, profile=(ctx.obj or {}).get("cli_profile")) as (
928
+ _db,
929
+ dal_ctx,
930
+ ):
931
+ summary = await ego_search_dal.materialize_bucket_membership(
932
+ dal_ctx,
933
+ corpus_key=corpus_key,
934
+ bucket_uri_prefix=bucket_uri_prefix,
935
+ membership_run_id=run_id,
936
+ min_source_video_size_bytes=min_source_video_size_bytes,
937
+ limit=limit,
938
+ write=write,
939
+ )
940
+ render(_plain(summary), format=format)
941
+
942
+ asyncio.run(run())
943
+
944
+
872
945
  @app.command("membership-index-space")
873
946
  def membership_index_space(
874
947
  ctx: typer.Context,
@@ -935,6 +1008,65 @@ def membership_gigcamera(
935
1008
  asyncio.run(run())
936
1009
 
937
1010
 
1011
+ @app.command("sampled-frames-from-vectors")
1012
+ def sampled_frames_from_vectors(
1013
+ ctx: typer.Context,
1014
+ frame_uri_prefix: str = typer.Option(
1015
+ ...,
1016
+ "--frame-uri-prefix",
1017
+ help="Frame-jpg GCS URI prefix to materialize as sampled frames.",
1018
+ ),
1019
+ clip_id: list[UUID] = typer.Option(
1020
+ [],
1021
+ "--clip-id",
1022
+ help="Core clip id to include. Repeatable.",
1023
+ ),
1024
+ clip_id_file: Path | None = typer.Option(
1025
+ None,
1026
+ "--clip-id-file",
1027
+ exists=True,
1028
+ file_okay=True,
1029
+ dir_okay=False,
1030
+ readable=True,
1031
+ help="Newline-delimited core clip ids to include.",
1032
+ ),
1033
+ space_id: int | None = typer.Option(None, "--space-id", help="Target embedding space id."),
1034
+ space_key: str | None = typer.Option(None, "--space-key"),
1035
+ selection_kind: str = typer.Option("near_90s", "--selection-kind"),
1036
+ limit: int | None = typer.Option(None, "--limit", min=1, help="Cap candidate clips."),
1037
+ write: bool = typer.Option(False, "--write", help="Insert missing sampled-frame rows."),
1038
+ format: Format = format_option(),
1039
+ ) -> None:
1040
+ """Dry-run or materialize sampled-frame anchors from existing frame vectors."""
1041
+ _require_internal_admin_for_write(ctx, write=write)
1042
+ settings = _settings_for_command(ctx, write=write)
1043
+
1044
+ async def run() -> None:
1045
+ from dal.embeddings import ego_frame_search as ego_search_dal
1046
+
1047
+ async with get_cli_context(settings, profile=(ctx.obj or {}).get("cli_profile")) as (
1048
+ _db,
1049
+ dal_ctx,
1050
+ ):
1051
+ target_space_id = await _resolve_space_id(
1052
+ dal_ctx,
1053
+ space_id=space_id,
1054
+ space_key=space_key,
1055
+ )
1056
+ summary = await ego_search_dal.materialize_sampled_frames_from_existing_vectors(
1057
+ dal_ctx,
1058
+ target_space_id=target_space_id,
1059
+ frame_uri_prefix=frame_uri_prefix,
1060
+ clip_ids=_clip_ids_from_options(clip_id, clip_id_file, limit=None),
1061
+ selection_kind=selection_kind,
1062
+ limit=limit,
1063
+ write=write,
1064
+ )
1065
+ render(_plain(summary), format=format)
1066
+
1067
+ asyncio.run(run())
1068
+
1069
+
938
1070
  @app.command("membership-dataset")
939
1071
  def membership_dataset(
940
1072
  ctx: typer.Context,
@@ -1165,6 +1297,11 @@ def queue_missing_label_text_embeddings(
1165
1297
  ),
1166
1298
  model_config_hash: str | None = typer.Option(None, "--model-config-hash"),
1167
1299
  limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected labels."),
1300
+ required_asset_uri_prefix: list[str] = typer.Option(
1301
+ [],
1302
+ "--required-asset-uri-prefix",
1303
+ help="Require selected frame assets to start with this GCS URI prefix. Repeatable.",
1304
+ ),
1168
1305
  idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
1169
1306
  max_parallel_requests: int = typer.Option(4, "--max-parallel-requests", min=1),
1170
1307
  write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
@@ -1204,6 +1341,8 @@ def queue_missing_label_text_embeddings(
1204
1341
  "only_missing_text_embeddings": True,
1205
1342
  "max_parallel_requests_per_worker": max_parallel_requests,
1206
1343
  }
1344
+ if required_asset_uri_prefix:
1345
+ selection_spec["required_asset_uri_prefixes"] = required_asset_uri_prefix
1207
1346
  sink_overrides = {
1208
1347
  "embedding": {
1209
1348
  "space_id": resolved_text_space_id,
@@ -1373,11 +1512,35 @@ def build_scann(
1373
1512
  @app.command("queue-missing-embeddings")
1374
1513
  def queue_missing_embeddings(
1375
1514
  ctx: typer.Context,
1376
- corpus_key: list[str] = typer.Option(..., "--corpus-key", help="Corpus key. Repeatable."),
1515
+ corpus_key: list[str] = typer.Option([], "--corpus-key", help="Corpus key. Repeatable."),
1516
+ clip_id: list[UUID] = typer.Option(
1517
+ [],
1518
+ "--clip-id",
1519
+ help="Core clip id to include. Repeatable; bypasses corpus membership when set.",
1520
+ ),
1521
+ clip_id_file: Path | None = typer.Option(
1522
+ None,
1523
+ "--clip-id-file",
1524
+ exists=True,
1525
+ dir_okay=False,
1526
+ readable=True,
1527
+ help="Newline-delimited core clip ids to include.",
1528
+ ),
1377
1529
  space_id: int | None = typer.Option(None, "--space-id", help="Target embedding space id."),
1378
1530
  space_key: str | None = typer.Option(_INDEX_QWEN_EGO_SPACE_KEY, "--space-key"),
1379
1531
  selection_kind: str = typer.Option("near_90s", "--selection-kind"),
1380
1532
  limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected frame rows."),
1533
+ required_asset_uri_prefix: list[str] = typer.Option(
1534
+ [],
1535
+ "--required-asset-uri-prefix",
1536
+ help="Require selected frame assets to start with this GCS URI prefix. Repeatable.",
1537
+ ),
1538
+ min_source_video_size_bytes: int | None = typer.Option(
1539
+ None,
1540
+ "--min-source-video-size-bytes",
1541
+ min=1,
1542
+ help="Require selected clips to have a source video larger than this byte count.",
1543
+ ),
1381
1544
  idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
1382
1545
  cost_cap_usd: float | None = typer.Option(None, "--cost-cap-usd", min=0.0),
1383
1546
  write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
@@ -1387,6 +1550,93 @@ def queue_missing_embeddings(
1387
1550
  _require_internal_admin_for_write(ctx, write=write)
1388
1551
  settings = _settings_for_command(ctx, write=write)
1389
1552
 
1553
+ async def run() -> None:
1554
+ from dal.processing import media_jobs
1555
+
1556
+ async with get_cli_context(settings, profile=(ctx.obj or {}).get("cli_profile")) as (
1557
+ _db,
1558
+ dal_ctx,
1559
+ ):
1560
+ target_space_id = await _resolve_space_id(
1561
+ dal_ctx,
1562
+ space_id=space_id,
1563
+ space_key=space_key,
1564
+ )
1565
+ clip_ids = _clip_ids_from_options(clip_id, clip_id_file, limit=limit)
1566
+ if clip_ids:
1567
+ selection_spec = {
1568
+ "kind": "by_clip_ids",
1569
+ "clip_ids": clip_ids,
1570
+ "selection_kind": selection_kind,
1571
+ "limit": None,
1572
+ "only_missing_vectors": True,
1573
+ }
1574
+ else:
1575
+ if not corpus_key:
1576
+ raise typer.BadParameter("provide --corpus-key or --clip-id/--clip-id-file")
1577
+ selection_spec = {
1578
+ "kind": "by_corpus",
1579
+ "corpus_keys": corpus_key,
1580
+ "selection_kind": selection_kind,
1581
+ "limit": limit,
1582
+ "only_missing_vectors": True,
1583
+ }
1584
+ if required_asset_uri_prefix:
1585
+ selection_spec["required_asset_uri_prefixes"] = required_asset_uri_prefix
1586
+ if min_source_video_size_bytes is not None:
1587
+ selection_spec["min_source_video_size_bytes"] = min_source_video_size_bytes
1588
+ sink_overrides = {"embedding": {"space_id": target_space_id}}
1589
+ resource_overrides = {"cost_cap_usd": cost_cap_usd} if cost_cap_usd is not None else {}
1590
+ if write:
1591
+ manifest = await media_jobs.queue_processor_job(
1592
+ dal_ctx,
1593
+ processor_ref=media_jobs.EMBED_FRAMES_PROCESSOR_REF,
1594
+ selection_spec=selection_spec,
1595
+ sink_overrides=sink_overrides,
1596
+ resource_overrides=resource_overrides,
1597
+ submitted_by_principal=_SUBMITTED_BY,
1598
+ idempotency_key=idempotency_key,
1599
+ )
1600
+ else:
1601
+ manifest = {
1602
+ "dry_run": True,
1603
+ "processor_ref": media_jobs.EMBED_FRAMES_PROCESSOR_REF,
1604
+ "selection_spec": selection_spec,
1605
+ "sink_overrides": sink_overrides,
1606
+ "resource_overrides": resource_overrides,
1607
+ }
1608
+ render(manifest, format=format)
1609
+
1610
+ asyncio.run(run())
1611
+
1612
+
1613
+ @app.command("queue-bucket-missing-embeddings")
1614
+ def queue_bucket_missing_embeddings(
1615
+ ctx: typer.Context,
1616
+ required_asset_uri_prefix: list[str] = typer.Option(
1617
+ ...,
1618
+ "--required-asset-uri-prefix",
1619
+ help="Require selected frame assets to start with this GCS URI prefix. Repeatable.",
1620
+ ),
1621
+ space_id: int | None = typer.Option(None, "--space-id", help="Target embedding space id."),
1622
+ space_key: str | None = typer.Option(_INDEX_QWEN_EGO_SPACE_KEY, "--space-key"),
1623
+ selection_kind: str = typer.Option("near_90s", "--selection-kind"),
1624
+ min_source_video_size_bytes: int | None = typer.Option(
1625
+ None,
1626
+ "--min-source-video-size-bytes",
1627
+ min=1,
1628
+ help="Require selected clips to have a source video larger than this byte count.",
1629
+ ),
1630
+ limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected frame rows."),
1631
+ idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
1632
+ cost_cap_usd: float | None = typer.Option(None, "--cost-cap-usd", min=0.0),
1633
+ write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
1634
+ format: Format = format_option(),
1635
+ ) -> None:
1636
+ """Queue missing-only frame embeddings directly from a bucket-scoped frame set."""
1637
+ _require_internal_admin_for_write(ctx, write=write)
1638
+ settings = _settings_for_command(ctx, write=write)
1639
+
1390
1640
  async def run() -> None:
1391
1641
  from dal.processing import media_jobs
1392
1642
 
@@ -1400,12 +1650,14 @@ def queue_missing_embeddings(
1400
1650
  space_key=space_key,
1401
1651
  )
1402
1652
  selection_spec = {
1403
- "kind": "by_corpus",
1404
- "corpus_keys": corpus_key,
1653
+ "kind": "by_bucket",
1405
1654
  "selection_kind": selection_kind,
1406
1655
  "limit": limit,
1407
1656
  "only_missing_vectors": True,
1657
+ "required_asset_uri_prefixes": required_asset_uri_prefix,
1408
1658
  }
1659
+ if min_source_video_size_bytes is not None:
1660
+ selection_spec["min_source_video_size_bytes"] = min_source_video_size_bytes
1409
1661
  sink_overrides = {"embedding": {"space_id": target_space_id}}
1410
1662
  resource_overrides = {"cost_cap_usd": cost_cap_usd} if cost_cap_usd is not None else {}
1411
1663
  if write:
@@ -1434,9 +1686,55 @@ def queue_missing_embeddings(
1434
1686
  @app.command("queue-missing-sampled-frames")
1435
1687
  def queue_missing_sampled_frames(
1436
1688
  ctx: typer.Context,
1437
- corpus_key: list[str] = typer.Option(..., "--corpus-key", help="Corpus key. Repeatable."),
1689
+ corpus_key: list[str] = typer.Option([], "--corpus-key", help="Corpus key. Repeatable."),
1690
+ clip_id: list[UUID] = typer.Option(
1691
+ [],
1692
+ "--clip-id",
1693
+ help="Core clip id to include. Repeatable; bypasses corpus membership when set.",
1694
+ ),
1695
+ clip_id_file: Path | None = typer.Option(
1696
+ None,
1697
+ "--clip-id-file",
1698
+ exists=True,
1699
+ dir_okay=False,
1700
+ readable=True,
1701
+ help="Newline-delimited core clip ids to include.",
1702
+ ),
1438
1703
  selection_kind: str = typer.Option("near_90s", "--selection-kind"),
1439
1704
  limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected clips."),
1705
+ required_asset_uri_prefix: list[str] = typer.Option(
1706
+ [],
1707
+ "--required-asset-uri-prefix",
1708
+ help="Require selected video assets to start with this GCS URI prefix. Repeatable.",
1709
+ ),
1710
+ min_source_video_size_bytes: int | None = typer.Option(
1711
+ None,
1712
+ "--min-source-video-size-bytes",
1713
+ min=1,
1714
+ help="Require selected source videos to be larger than this byte count.",
1715
+ ),
1716
+ num_frames: int | None = typer.Option(
1717
+ 1,
1718
+ "--num-frames",
1719
+ min=1,
1720
+ help="Override extractor frame count for this manifest.",
1721
+ ),
1722
+ single_frame_index: int | None = typer.Option(
1723
+ 5,
1724
+ "--single-frame-index",
1725
+ min=0,
1726
+ help="Frame index to use when extracting one midpoint frame.",
1727
+ ),
1728
+ use_remote_gcs_seek: bool = typer.Option(
1729
+ True,
1730
+ "--remote-gcs-seek/--full-download",
1731
+ help="Use authenticated GCS HTTP range reads for ffmpeg input.",
1732
+ ),
1733
+ ffmpeg_scale_flags: str | None = typer.Option(
1734
+ "fast_bilinear",
1735
+ "--ffmpeg-scale-flags",
1736
+ help="ffmpeg scale filter flags for p104-compatible frame pixels.",
1737
+ ),
1440
1738
  idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
1441
1739
  write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
1442
1740
  format: Format = format_option(),
@@ -1445,6 +1743,113 @@ def queue_missing_sampled_frames(
1445
1743
  _require_internal_admin_for_write(ctx, write=write)
1446
1744
  settings = _settings_for_command(ctx, write=write)
1447
1745
 
1746
+ async def run() -> None:
1747
+ from dal.processing import media_jobs
1748
+
1749
+ async with get_cli_context(settings, profile=(ctx.obj or {}).get("cli_profile")) as (
1750
+ _db,
1751
+ dal_ctx,
1752
+ ):
1753
+ clip_ids = _clip_ids_from_options(clip_id, clip_id_file, limit=limit)
1754
+ if clip_ids:
1755
+ selection_spec = {
1756
+ "kind": "by_ids",
1757
+ "ids": clip_ids,
1758
+ "selection_kind": selection_kind,
1759
+ "limit": None,
1760
+ "only_missing_sampled_frames": True,
1761
+ "require_zero_frames": False,
1762
+ }
1763
+ else:
1764
+ if not corpus_key:
1765
+ raise typer.BadParameter("provide --corpus-key or --clip-id/--clip-id-file")
1766
+ selection_spec = {
1767
+ "kind": "by_corpus",
1768
+ "corpus_keys": corpus_key,
1769
+ "selection_kind": selection_kind,
1770
+ "limit": limit,
1771
+ "only_missing_sampled_frames": True,
1772
+ "require_zero_frames": False,
1773
+ }
1774
+ if required_asset_uri_prefix:
1775
+ selection_spec["required_asset_uri_prefixes"] = required_asset_uri_prefix
1776
+ if min_source_video_size_bytes is not None:
1777
+ selection_spec["min_source_video_size_bytes"] = min_source_video_size_bytes
1778
+ if num_frames is not None:
1779
+ selection_spec["num_frames"] = num_frames
1780
+ if single_frame_index is not None:
1781
+ selection_spec["single_frame_index"] = single_frame_index
1782
+ if use_remote_gcs_seek:
1783
+ selection_spec["use_remote_gcs_seek"] = True
1784
+ if ffmpeg_scale_flags:
1785
+ selection_spec["ffmpeg_scale_flags"] = ffmpeg_scale_flags
1786
+ if write:
1787
+ manifest = await media_jobs.queue_processor_job(
1788
+ dal_ctx,
1789
+ processor_ref=media_jobs.EXTRACT_FRAMES_PROCESSOR_REF,
1790
+ selection_spec=selection_spec,
1791
+ sink_overrides=None,
1792
+ resource_overrides=None,
1793
+ submitted_by_principal=_SUBMITTED_BY,
1794
+ idempotency_key=idempotency_key,
1795
+ )
1796
+ else:
1797
+ manifest = {
1798
+ "dry_run": True,
1799
+ "processor_ref": media_jobs.EXTRACT_FRAMES_PROCESSOR_REF,
1800
+ "selection_spec": selection_spec,
1801
+ }
1802
+ render(manifest, format=format)
1803
+
1804
+ asyncio.run(run())
1805
+
1806
+
1807
+ @app.command("queue-bucket-missing-sampled-frames")
1808
+ def queue_bucket_missing_sampled_frames(
1809
+ ctx: typer.Context,
1810
+ required_asset_uri_prefix: list[str] = typer.Option(
1811
+ ...,
1812
+ "--required-asset-uri-prefix",
1813
+ help="Require selected video assets to start with this GCS URI prefix. Repeatable.",
1814
+ ),
1815
+ selection_kind: str = typer.Option("near_90s", "--selection-kind"),
1816
+ min_source_video_size_bytes: int | None = typer.Option(
1817
+ None,
1818
+ "--min-source-video-size-bytes",
1819
+ min=1,
1820
+ help="Require selected source videos to be larger than this byte count.",
1821
+ ),
1822
+ num_frames: int | None = typer.Option(
1823
+ 1,
1824
+ "--num-frames",
1825
+ min=1,
1826
+ help="Override extractor frame count for this manifest.",
1827
+ ),
1828
+ single_frame_index: int | None = typer.Option(
1829
+ 5,
1830
+ "--single-frame-index",
1831
+ min=0,
1832
+ help="Frame index to use when extracting one midpoint frame.",
1833
+ ),
1834
+ use_remote_gcs_seek: bool = typer.Option(
1835
+ True,
1836
+ "--remote-gcs-seek/--full-download",
1837
+ help="Use authenticated GCS HTTP range reads for ffmpeg input.",
1838
+ ),
1839
+ ffmpeg_scale_flags: str | None = typer.Option(
1840
+ "fast_bilinear",
1841
+ "--ffmpeg-scale-flags",
1842
+ help="ffmpeg scale filter flags for p104-compatible frame pixels.",
1843
+ ),
1844
+ limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected clips."),
1845
+ idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
1846
+ write: bool = typer.Option(False, "--write", help="Create the processing manifest."),
1847
+ format: Format = format_option(),
1848
+ ) -> None:
1849
+ """Queue missing sampled-frame extraction directly from a bucket-scoped video set."""
1850
+ _require_internal_admin_for_write(ctx, write=write)
1851
+ settings = _settings_for_command(ctx, write=write)
1852
+
1448
1853
  async def run() -> None:
1449
1854
  from dal.processing import media_jobs
1450
1855
 
@@ -1453,13 +1858,23 @@ def queue_missing_sampled_frames(
1453
1858
  dal_ctx,
1454
1859
  ):
1455
1860
  selection_spec = {
1456
- "kind": "by_corpus",
1457
- "corpus_keys": corpus_key,
1861
+ "kind": "by_bucket",
1458
1862
  "selection_kind": selection_kind,
1459
1863
  "limit": limit,
1460
1864
  "only_missing_sampled_frames": True,
1461
1865
  "require_zero_frames": False,
1866
+ "required_asset_uri_prefixes": required_asset_uri_prefix,
1462
1867
  }
1868
+ if min_source_video_size_bytes is not None:
1869
+ selection_spec["min_source_video_size_bytes"] = min_source_video_size_bytes
1870
+ if num_frames is not None:
1871
+ selection_spec["num_frames"] = num_frames
1872
+ if single_frame_index is not None:
1873
+ selection_spec["single_frame_index"] = single_frame_index
1874
+ if use_remote_gcs_seek:
1875
+ selection_spec["use_remote_gcs_seek"] = True
1876
+ if ffmpeg_scale_flags:
1877
+ selection_spec["ffmpeg_scale_flags"] = ffmpeg_scale_flags
1463
1878
  if write:
1464
1879
  manifest = await media_jobs.queue_processor_job(
1465
1880
  dal_ctx,
@@ -1498,6 +1913,11 @@ def queue_missing_demian_labels(
1498
1913
  source_id: str = typer.Option("demian", "--source-id", help="Non-secret label source alias."),
1499
1914
  endpoint_url: str | None = typer.Option(None, "--endpoint-url"),
1500
1915
  limit: int | None = typer.Option(None, "--limit", min=1, help="Cap selected frames."),
1916
+ required_asset_uri_prefix: list[str] = typer.Option(
1917
+ [],
1918
+ "--required-asset-uri-prefix",
1919
+ help="Require selected frame assets to start with this GCS URI prefix. Repeatable.",
1920
+ ),
1501
1921
  idempotency_key: str = typer.Option(..., "--idempotency-key", help="Manifest idempotency key."),
1502
1922
  cost_cap_usd: float | None = typer.Option(None, "--cost-cap-usd", min=0.0),
1503
1923
  estimated_cost_per_label_usd: float = typer.Option(
@@ -1549,6 +1969,8 @@ def queue_missing_demian_labels(
1549
1969
  "endpoint_timeout_sec": endpoint_timeout_sec,
1550
1970
  "allow_external_bearer": allow_external_bearer,
1551
1971
  }
1972
+ if required_asset_uri_prefix:
1973
+ selection_spec["required_asset_uri_prefixes"] = required_asset_uri_prefix
1552
1974
  sink_overrides = {
1553
1975
  "observation": {
1554
1976
  "table": "observations.frame_observations",
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "buildai-cli"
7
- version = "0.3.95"
7
+ version = "0.3.97"
8
8
  description = "Build AI CLI (Typer)"
9
9
  requires-python = ">=3.11"
10
10
  dependencies = [
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes