open-research-protocol 0.4.8 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/cli/orp.py +490 -0
- package/docs/AGENT_LOOP.md +3 -0
- package/docs/ORP_YOUTUBE_INSPECT.md +97 -0
- package/llms.txt +3 -0
- package/package.json +1 -1
- package/spec/v1/youtube-source.schema.json +151 -0
package/README.md
CHANGED
|
@@ -33,6 +33,7 @@ verification remains independent of framing. See `modules/instruments/README.md`
|
|
|
33
33
|
- `docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md` — harder live downstream benchmark where the agent must produce the next canonical task artifact
|
|
34
34
|
- `docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md` — honest map of what the kernel proves, only suggests, or still leaves unproven
|
|
35
35
|
- `docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md` — comparative experiment plan for upgrading kernel evidence beyond implementation validity
|
|
36
|
+
- `docs/ORP_YOUTUBE_INSPECT.md` — first-class YouTube metadata/transcript ingestion surface for agent-readable external source context
|
|
36
37
|
- `docs/EXTERNAL_CONTRIBUTION_GOVERNANCE.md` — canonical local-first workflow for external OSS PR work
|
|
37
38
|
- `docs/OSS_CONTRIBUTION_AGENT_LOOP.md` — agent operating rhythm for external contribution workflows
|
|
38
39
|
- `templates/` — claim, verification, failure, and issue templates
|
|
@@ -51,6 +52,7 @@ verification remains independent of framing. See `modules/instruments/README.md`
|
|
|
51
52
|
ORP should feel like one CLI with built-in abilities:
|
|
52
53
|
|
|
53
54
|
- `workspace` for hosted auth, idea, feature, world, checkpoint, and worker operations
|
|
55
|
+
- `youtube` for public video metadata and transcript ingestion
|
|
54
56
|
- `governance` for local-first repo initialization, branch safety, checkpoint commits, backup refs, readiness, repair, and cleanup
|
|
55
57
|
- `discover` for profile-based GitHub scanning and opportunity selection
|
|
56
58
|
- `collaborate` for repository collaboration setup and workflow execution
|
|
@@ -118,6 +120,8 @@ orp home --json
|
|
|
118
120
|
orp about --json
|
|
119
121
|
orp auth login
|
|
120
122
|
orp whoami --json
|
|
123
|
+
orp youtube inspect https://www.youtube.com/watch?v=<video_id> --json
|
|
124
|
+
orp youtube inspect https://www.youtube.com/watch?v=<video_id> --save --json
|
|
121
125
|
orp ideas list --json
|
|
122
126
|
orp world bind --idea-id <idea-id> --project-root /abs/path --codex-session-id <session-id> --json
|
|
123
127
|
orp checkpoint queue --idea-id <idea-id> --json
|
|
@@ -149,6 +153,7 @@ These surfaces are meant to help automated systems discover ORP quickly:
|
|
|
149
153
|
- bare `orp` opens a home screen with repo/runtime status, available packs, and next commands
|
|
150
154
|
- `orp home --json` returns the same landing context in machine-readable form
|
|
151
155
|
- `orp auth ...`, `orp ideas ...`, `orp world ...`, `orp checkpoint ...`, `orp runner ...`, and `orp agent ...` expose the hosted workspace surface directly through ORP
|
|
156
|
+
- `orp youtube inspect ...` exposes public YouTube metadata and transcript retrieval through a stable ORP artifact shape for agent use
|
|
152
157
|
- `orp init`, `orp status`, `orp branch start`, `orp checkpoint create`, `orp backup`, `orp ready`, `orp doctor`, and `orp cleanup` expose the local-first repo governance surface directly through ORP
|
|
153
158
|
- `orp discover ...` exposes profile-based GitHub scanning as a built-in ORP ability
|
|
154
159
|
- `orp collaborate ...` exposes built-in collaboration setup and workflow execution without asking users to think in terms of separate governance packs
|
|
@@ -212,6 +217,7 @@ Minimal CLI skeleton:
|
|
|
212
217
|
|
|
213
218
|
```bash
|
|
214
219
|
orp auth login
|
|
220
|
+
orp youtube inspect https://www.youtube.com/watch?v=<video_id> --json
|
|
215
221
|
orp ideas list --json
|
|
216
222
|
orp world bind --idea-id <idea-id> --project-root /abs/path --codex-session-id <session-id> --json
|
|
217
223
|
orp checkpoint queue --idea-id <idea-id> --json
|
package/cli/orp.py
CHANGED
|
@@ -30,6 +30,7 @@ import argparse
|
|
|
30
30
|
import datetime as dt
|
|
31
31
|
import getpass
|
|
32
32
|
import hashlib
|
|
33
|
+
import html
|
|
33
34
|
import json
|
|
34
35
|
import os
|
|
35
36
|
import platform
|
|
@@ -45,6 +46,7 @@ import uuid
|
|
|
45
46
|
from urllib import error as urlerror
|
|
46
47
|
from urllib import parse as urlparse
|
|
47
48
|
from urllib import request as urlrequest
|
|
49
|
+
import xml.etree.ElementTree as ET
|
|
48
50
|
|
|
49
51
|
RUNNER_LEASE_STALE_SECONDS = 120
|
|
50
52
|
|
|
@@ -112,6 +114,7 @@ DEFAULT_DISCOVER_PROFILE = "orp.profile.default.json"
|
|
|
112
114
|
DEFAULT_DISCOVER_SCAN_ROOT = "orp/discovery/github"
|
|
113
115
|
DEFAULT_HOSTED_BASE_URL = "https://orp.earth"
|
|
114
116
|
KERNEL_SCHEMA_VERSION = "1.0.0"
|
|
117
|
+
YOUTUBE_SOURCE_SCHEMA_VERSION = "1.0.0"
|
|
115
118
|
|
|
116
119
|
|
|
117
120
|
class HostedApiError(RuntimeError):
|
|
@@ -336,6 +339,442 @@ def _request_hosted_sse_event(
|
|
|
336
339
|
) from exc
|
|
337
340
|
|
|
338
341
|
|
|
342
|
+
def _http_get_text(url: str, *, headers: dict[str, str] | None = None, timeout_sec: int = 20) -> str:
|
|
343
|
+
request = urlrequest.Request(url, headers=headers or {}, method="GET")
|
|
344
|
+
try:
|
|
345
|
+
with urlrequest.urlopen(request, timeout=timeout_sec) as response:
|
|
346
|
+
return response.read().decode("utf-8", errors="replace")
|
|
347
|
+
except urlerror.HTTPError as exc:
|
|
348
|
+
body = exc.read().decode("utf-8", errors="replace").strip()
|
|
349
|
+
raise RuntimeError(f"HTTP {exc.code} while fetching {url}: {body or exc.reason}") from exc
|
|
350
|
+
except urlerror.URLError as exc:
|
|
351
|
+
raise RuntimeError(f"Could not reach {url}: {exc.reason}") from exc
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _http_get_json(url: str, *, headers: dict[str, str] | None = None, timeout_sec: int = 20) -> dict[str, Any]:
|
|
355
|
+
text = _http_get_text(url, headers=headers, timeout_sec=timeout_sec)
|
|
356
|
+
try:
|
|
357
|
+
payload = json.loads(text)
|
|
358
|
+
except Exception as exc:
|
|
359
|
+
raise RuntimeError(f"Response from {url} was not valid JSON.") from exc
|
|
360
|
+
if isinstance(payload, dict):
|
|
361
|
+
return payload
|
|
362
|
+
raise RuntimeError(f"Response from {url} was not a JSON object.")
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _youtube_request_headers() -> dict[str, str]:
|
|
366
|
+
return {
|
|
367
|
+
"User-Agent": (
|
|
368
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
369
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0 Safari/537.36"
|
|
370
|
+
),
|
|
371
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _youtube_source_schema_path() -> Path:
|
|
376
|
+
return Path(__file__).resolve().parent.parent / "spec" / "v1" / "youtube-source.schema.json"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _youtube_video_id_from_url(raw_url: str) -> str:
|
|
380
|
+
text = str(raw_url or "").strip()
|
|
381
|
+
if not text:
|
|
382
|
+
raise RuntimeError("YouTube URL is required.")
|
|
383
|
+
if re.fullmatch(r"[\w-]{11}", text):
|
|
384
|
+
return text
|
|
385
|
+
|
|
386
|
+
parsed = urlparse.urlparse(text)
|
|
387
|
+
host = parsed.netloc.lower()
|
|
388
|
+
path_parts = [part for part in parsed.path.split("/") if part]
|
|
389
|
+
if host.endswith("youtu.be"):
|
|
390
|
+
if path_parts:
|
|
391
|
+
return path_parts[0]
|
|
392
|
+
if any(host.endswith(suffix) for suffix in ("youtube.com", "youtube-nocookie.com", "music.youtube.com")):
|
|
393
|
+
if parsed.path == "/watch":
|
|
394
|
+
video_id = urlparse.parse_qs(parsed.query).get("v", [""])[0].strip()
|
|
395
|
+
if video_id:
|
|
396
|
+
return video_id
|
|
397
|
+
if len(path_parts) >= 2 and path_parts[0] in {"embed", "shorts", "live", "v"}:
|
|
398
|
+
return path_parts[1]
|
|
399
|
+
raise RuntimeError(f"Could not extract a YouTube video id from: {text}")
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _youtube_canonical_url(video_id: str) -> str:
|
|
403
|
+
return f"https://www.youtube.com/watch?v={video_id}"
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _extract_json_object_after_marker(text: str, marker: str) -> dict[str, Any] | None:
|
|
407
|
+
index = text.find(marker)
|
|
408
|
+
if index < 0:
|
|
409
|
+
return None
|
|
410
|
+
start = text.find("{", index)
|
|
411
|
+
if start < 0:
|
|
412
|
+
return None
|
|
413
|
+
depth = 0
|
|
414
|
+
in_string = False
|
|
415
|
+
escaped = False
|
|
416
|
+
for pos in range(start, len(text)):
|
|
417
|
+
ch = text[pos]
|
|
418
|
+
if in_string:
|
|
419
|
+
if escaped:
|
|
420
|
+
escaped = False
|
|
421
|
+
elif ch == "\\":
|
|
422
|
+
escaped = True
|
|
423
|
+
elif ch == '"':
|
|
424
|
+
in_string = False
|
|
425
|
+
continue
|
|
426
|
+
if ch == '"':
|
|
427
|
+
in_string = True
|
|
428
|
+
continue
|
|
429
|
+
if ch == "{":
|
|
430
|
+
depth += 1
|
|
431
|
+
continue
|
|
432
|
+
if ch == "}":
|
|
433
|
+
depth -= 1
|
|
434
|
+
if depth == 0:
|
|
435
|
+
candidate = text[start : pos + 1]
|
|
436
|
+
try:
|
|
437
|
+
payload = json.loads(candidate)
|
|
438
|
+
except Exception:
|
|
439
|
+
return None
|
|
440
|
+
return payload if isinstance(payload, dict) else None
|
|
441
|
+
return None
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _youtube_track_label(track: dict[str, Any]) -> str:
|
|
445
|
+
name = track.get("name")
|
|
446
|
+
if isinstance(name, dict):
|
|
447
|
+
simple = str(name.get("simpleText", "")).strip()
|
|
448
|
+
if simple:
|
|
449
|
+
return simple
|
|
450
|
+
runs = name.get("runs")
|
|
451
|
+
if isinstance(runs, list):
|
|
452
|
+
pieces = [
|
|
453
|
+
str(row.get("text", "")).strip()
|
|
454
|
+
for row in runs
|
|
455
|
+
if isinstance(row, dict) and str(row.get("text", "")).strip()
|
|
456
|
+
]
|
|
457
|
+
if pieces:
|
|
458
|
+
return "".join(pieces)
|
|
459
|
+
return str(track.get("languageCode", "")).strip()
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _pick_youtube_caption_track(tracks: list[dict[str, Any]], preferred_lang: str = "") -> dict[str, Any] | None:
|
|
463
|
+
if not tracks:
|
|
464
|
+
return None
|
|
465
|
+
preferred = str(preferred_lang or "").strip().lower()
|
|
466
|
+
|
|
467
|
+
def score(track: dict[str, Any]) -> tuple[int, int]:
|
|
468
|
+
code = str(track.get("languageCode", "")).strip().lower()
|
|
469
|
+
kind = str(track.get("kind", "")).strip().lower()
|
|
470
|
+
auto = 1 if kind == "asr" else 0
|
|
471
|
+
exact = 1 if preferred and code == preferred else 0
|
|
472
|
+
prefix = 1 if preferred and code.startswith(preferred + "-") else 0
|
|
473
|
+
english = 1 if code.startswith("en") else 0
|
|
474
|
+
return (exact * 100 + prefix * 80 + english * 20 - auto * 5, -auto)
|
|
475
|
+
|
|
476
|
+
ranked = sorted(tracks, key=score, reverse=True)
|
|
477
|
+
return ranked[0] if ranked else None
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def _youtube_add_query_param(url: str, key: str, value: str) -> str:
|
|
481
|
+
parsed = urlparse.urlsplit(url)
|
|
482
|
+
query = dict(urlparse.parse_qsl(parsed.query, keep_blank_values=True))
|
|
483
|
+
query[key] = value
|
|
484
|
+
return urlparse.urlunsplit(
|
|
485
|
+
(
|
|
486
|
+
parsed.scheme,
|
|
487
|
+
parsed.netloc,
|
|
488
|
+
parsed.path,
|
|
489
|
+
urlparse.urlencode(query),
|
|
490
|
+
parsed.fragment,
|
|
491
|
+
)
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def _parse_youtube_transcript_json3(payload: dict[str, Any]) -> tuple[str, list[dict[str, Any]]]:
|
|
496
|
+
events = payload.get("events")
|
|
497
|
+
if not isinstance(events, list):
|
|
498
|
+
return ("", [])
|
|
499
|
+
segments: list[dict[str, Any]] = []
|
|
500
|
+
for event in events:
|
|
501
|
+
if not isinstance(event, dict):
|
|
502
|
+
continue
|
|
503
|
+
segs = event.get("segs")
|
|
504
|
+
if not isinstance(segs, list):
|
|
505
|
+
continue
|
|
506
|
+
pieces: list[str] = []
|
|
507
|
+
for seg in segs:
|
|
508
|
+
if not isinstance(seg, dict):
|
|
509
|
+
continue
|
|
510
|
+
text = html.unescape(str(seg.get("utf8", "")))
|
|
511
|
+
if text:
|
|
512
|
+
pieces.append(text)
|
|
513
|
+
merged = re.sub(r"\s+", " ", "".join(pieces)).strip()
|
|
514
|
+
if not merged:
|
|
515
|
+
continue
|
|
516
|
+
segments.append(
|
|
517
|
+
{
|
|
518
|
+
"start_ms": int(event.get("tStartMs", 0) or 0),
|
|
519
|
+
"duration_ms": int(event.get("dDurationMs", 0) or 0),
|
|
520
|
+
"text": merged,
|
|
521
|
+
}
|
|
522
|
+
)
|
|
523
|
+
transcript_text = "\n".join(str(row["text"]) for row in segments)
|
|
524
|
+
return transcript_text, segments
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def _parse_youtube_transcript_xml(text: str) -> tuple[str, list[dict[str, Any]]]:
|
|
528
|
+
try:
|
|
529
|
+
root = ET.fromstring(text)
|
|
530
|
+
except Exception:
|
|
531
|
+
return ("", [])
|
|
532
|
+
segments: list[dict[str, Any]] = []
|
|
533
|
+
for node in root.findall(".//text"):
|
|
534
|
+
body = html.unescape("".join(node.itertext() or []))
|
|
535
|
+
body = re.sub(r"\s+", " ", body).strip()
|
|
536
|
+
if not body:
|
|
537
|
+
continue
|
|
538
|
+
start = float(node.attrib.get("start", "0") or "0")
|
|
539
|
+
duration = float(node.attrib.get("dur", "0") or "0")
|
|
540
|
+
segments.append(
|
|
541
|
+
{
|
|
542
|
+
"start_ms": int(start * 1000),
|
|
543
|
+
"duration_ms": int(duration * 1000),
|
|
544
|
+
"text": body,
|
|
545
|
+
}
|
|
546
|
+
)
|
|
547
|
+
transcript_text = "\n".join(str(row["text"]) for row in segments)
|
|
548
|
+
return transcript_text, segments
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _youtube_fetch_oembed(canonical_url: str) -> dict[str, Any]:
|
|
552
|
+
endpoint = "https://www.youtube.com/oembed?" + urlparse.urlencode({"url": canonical_url, "format": "json"})
|
|
553
|
+
try:
|
|
554
|
+
return _http_get_json(endpoint, headers=_youtube_request_headers(), timeout_sec=20)
|
|
555
|
+
except Exception:
|
|
556
|
+
return {}
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def _youtube_fetch_watch_state(video_id: str) -> dict[str, Any]:
|
|
560
|
+
url = _youtube_canonical_url(video_id) + "&hl=en&persist_hl=1"
|
|
561
|
+
html_text = _http_get_text(url, headers=_youtube_request_headers(), timeout_sec=25)
|
|
562
|
+
markers = [
|
|
563
|
+
"var ytInitialPlayerResponse = ",
|
|
564
|
+
"ytInitialPlayerResponse = ",
|
|
565
|
+
"window['ytInitialPlayerResponse'] = ",
|
|
566
|
+
'window["ytInitialPlayerResponse"] = ',
|
|
567
|
+
]
|
|
568
|
+
player_response: dict[str, Any] | None = None
|
|
569
|
+
for marker in markers:
|
|
570
|
+
player_response = _extract_json_object_after_marker(html_text, marker)
|
|
571
|
+
if player_response:
|
|
572
|
+
break
|
|
573
|
+
if not player_response:
|
|
574
|
+
raise RuntimeError("Could not parse YouTube player response from the watch page.")
|
|
575
|
+
captions = (
|
|
576
|
+
player_response.get("captions", {})
|
|
577
|
+
.get("playerCaptionsTracklistRenderer", {})
|
|
578
|
+
.get("captionTracks", [])
|
|
579
|
+
)
|
|
580
|
+
return {
|
|
581
|
+
"player_response": player_response,
|
|
582
|
+
"video_details": player_response.get("videoDetails", {}) if isinstance(player_response.get("videoDetails"), dict) else {},
|
|
583
|
+
"microformat": (
|
|
584
|
+
player_response.get("microformat", {}).get("playerMicroformatRenderer", {})
|
|
585
|
+
if isinstance(player_response.get("microformat"), dict)
|
|
586
|
+
else {}
|
|
587
|
+
),
|
|
588
|
+
"playability_status": (
|
|
589
|
+
player_response.get("playabilityStatus", {})
|
|
590
|
+
if isinstance(player_response.get("playabilityStatus"), dict)
|
|
591
|
+
else {}
|
|
592
|
+
),
|
|
593
|
+
"caption_tracks": captions if isinstance(captions, list) else [],
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _youtube_fetch_transcript_from_track(track: dict[str, Any]) -> tuple[str, list[dict[str, Any]], str]:
|
|
598
|
+
base_url = str(track.get("baseUrl", "")).strip()
|
|
599
|
+
if not base_url:
|
|
600
|
+
return ("", [], "missing_track_url")
|
|
601
|
+
json3_url = _youtube_add_query_param(base_url, "fmt", "json3")
|
|
602
|
+
try:
|
|
603
|
+
payload = _http_get_json(json3_url, headers=_youtube_request_headers(), timeout_sec=25)
|
|
604
|
+
transcript_text, segments = _parse_youtube_transcript_json3(payload)
|
|
605
|
+
if transcript_text:
|
|
606
|
+
return transcript_text, segments, "json3"
|
|
607
|
+
except Exception:
|
|
608
|
+
pass
|
|
609
|
+
try:
|
|
610
|
+
xml_text = _http_get_text(base_url, headers=_youtube_request_headers(), timeout_sec=25)
|
|
611
|
+
transcript_text, segments = _parse_youtube_transcript_xml(xml_text)
|
|
612
|
+
if transcript_text:
|
|
613
|
+
return transcript_text, segments, "xml"
|
|
614
|
+
except Exception:
|
|
615
|
+
pass
|
|
616
|
+
return ("", [], "unavailable")
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _youtube_text_bundle(payload: dict[str, Any]) -> str:
|
|
620
|
+
parts: list[str] = []
|
|
621
|
+
title = str(payload.get("title", "")).strip()
|
|
622
|
+
if title:
|
|
623
|
+
parts.append(f"Title: {title}")
|
|
624
|
+
author_name = str(payload.get("author_name", "")).strip()
|
|
625
|
+
if author_name:
|
|
626
|
+
parts.append(f"Author: {author_name}")
|
|
627
|
+
duration_seconds = payload.get("duration_seconds")
|
|
628
|
+
if isinstance(duration_seconds, int) and duration_seconds > 0:
|
|
629
|
+
parts.append(f"Duration seconds: {duration_seconds}")
|
|
630
|
+
description = str(payload.get("description", "")).strip()
|
|
631
|
+
if description:
|
|
632
|
+
parts.append("Description:\n" + description)
|
|
633
|
+
transcript_text = str(payload.get("transcript_text", "")).strip()
|
|
634
|
+
if transcript_text:
|
|
635
|
+
parts.append("Transcript:\n" + transcript_text)
|
|
636
|
+
return "\n\n".join(parts)
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def _youtube_inspect_payload(raw_url: str, preferred_lang: str = "") -> dict[str, Any]:
|
|
640
|
+
video_id = _youtube_video_id_from_url(raw_url)
|
|
641
|
+
canonical_url = _youtube_canonical_url(video_id)
|
|
642
|
+
warnings: list[str] = []
|
|
643
|
+
oembed = _youtube_fetch_oembed(canonical_url)
|
|
644
|
+
|
|
645
|
+
watch_state: dict[str, Any] = {}
|
|
646
|
+
try:
|
|
647
|
+
watch_state = _youtube_fetch_watch_state(video_id)
|
|
648
|
+
except Exception as exc:
|
|
649
|
+
warnings.append(str(exc))
|
|
650
|
+
|
|
651
|
+
video_details = watch_state.get("video_details", {}) if isinstance(watch_state.get("video_details"), dict) else {}
|
|
652
|
+
microformat = watch_state.get("microformat", {}) if isinstance(watch_state.get("microformat"), dict) else {}
|
|
653
|
+
playability = watch_state.get("playability_status", {}) if isinstance(watch_state.get("playability_status"), dict) else {}
|
|
654
|
+
tracks = [row for row in watch_state.get("caption_tracks", []) if isinstance(row, dict)]
|
|
655
|
+
chosen_track = _pick_youtube_caption_track(tracks, preferred_lang)
|
|
656
|
+
transcript_text = ""
|
|
657
|
+
transcript_segments: list[dict[str, Any]] = []
|
|
658
|
+
transcript_fetch_mode = "none"
|
|
659
|
+
transcript_available = False
|
|
660
|
+
transcript_language = ""
|
|
661
|
+
transcript_track_name = ""
|
|
662
|
+
transcript_kind = "none"
|
|
663
|
+
if chosen_track is not None:
|
|
664
|
+
transcript_text, transcript_segments, transcript_fetch_mode = _youtube_fetch_transcript_from_track(chosen_track)
|
|
665
|
+
transcript_available = bool(transcript_text.strip())
|
|
666
|
+
transcript_language = str(chosen_track.get("languageCode", "")).strip()
|
|
667
|
+
transcript_track_name = _youtube_track_label(chosen_track)
|
|
668
|
+
transcript_kind = "auto" if str(chosen_track.get("kind", "")).strip().lower() == "asr" else "manual"
|
|
669
|
+
if not transcript_available:
|
|
670
|
+
warnings.append("A caption track was found, but transcript text could not be fetched.")
|
|
671
|
+
elif watch_state:
|
|
672
|
+
warnings.append("No caption tracks were available for this video.")
|
|
673
|
+
|
|
674
|
+
title = str(video_details.get("title") or oembed.get("title") or "").strip()
|
|
675
|
+
author_name = str(video_details.get("author") or oembed.get("author_name") or "").strip()
|
|
676
|
+
author_url = str(oembed.get("author_url") or "").strip()
|
|
677
|
+
thumbnail_url = str(oembed.get("thumbnail_url") or "").strip()
|
|
678
|
+
description = str(video_details.get("shortDescription") or microformat.get("description", {}).get("simpleText", "") or "").strip()
|
|
679
|
+
channel_id = str(video_details.get("channelId") or "").strip()
|
|
680
|
+
duration_seconds = 0
|
|
681
|
+
raw_duration = video_details.get("lengthSeconds")
|
|
682
|
+
if isinstance(raw_duration, str) and raw_duration.isdigit():
|
|
683
|
+
duration_seconds = int(raw_duration)
|
|
684
|
+
published_at = str(microformat.get("publishDate") or "").strip()
|
|
685
|
+
payload = {
|
|
686
|
+
"schema_version": YOUTUBE_SOURCE_SCHEMA_VERSION,
|
|
687
|
+
"kind": "youtube_source",
|
|
688
|
+
"retrieved_at_utc": _now_utc(),
|
|
689
|
+
"source_url": str(raw_url).strip(),
|
|
690
|
+
"canonical_url": canonical_url,
|
|
691
|
+
"video_id": video_id,
|
|
692
|
+
"title": title,
|
|
693
|
+
"author_name": author_name,
|
|
694
|
+
"author_url": author_url,
|
|
695
|
+
"thumbnail_url": thumbnail_url,
|
|
696
|
+
"channel_id": channel_id,
|
|
697
|
+
"description": description,
|
|
698
|
+
"duration_seconds": duration_seconds or None,
|
|
699
|
+
"published_at": published_at,
|
|
700
|
+
"playability_status": str(playability.get("status", "")).strip(),
|
|
701
|
+
"transcript_available": transcript_available,
|
|
702
|
+
"transcript_language": transcript_language,
|
|
703
|
+
"transcript_track_name": transcript_track_name,
|
|
704
|
+
"transcript_kind": transcript_kind,
|
|
705
|
+
"transcript_fetch_mode": transcript_fetch_mode,
|
|
706
|
+
"transcript_text": transcript_text,
|
|
707
|
+
"transcript_segments": transcript_segments,
|
|
708
|
+
"warnings": _unique_strings(warnings),
|
|
709
|
+
}
|
|
710
|
+
payload["text_bundle"] = _youtube_text_bundle(payload)
|
|
711
|
+
return payload
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _default_youtube_artifact_path(repo_root: Path, video_id: str) -> Path:
|
|
715
|
+
return repo_root / "orp" / "external" / "youtube" / f"{video_id}.json"
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
def cmd_youtube_inspect(args: argparse.Namespace) -> int:
|
|
719
|
+
repo_root = Path(args.repo_root).resolve()
|
|
720
|
+
preferred_lang = str(getattr(args, "lang", "") or "").strip()
|
|
721
|
+
payload = _youtube_inspect_payload(args.url, preferred_lang=preferred_lang)
|
|
722
|
+
|
|
723
|
+
out_raw = str(getattr(args, "out", "") or "").strip()
|
|
724
|
+
should_save = bool(getattr(args, "save", False) or out_raw)
|
|
725
|
+
out_path: Path | None = None
|
|
726
|
+
emitted_format = ""
|
|
727
|
+
if should_save:
|
|
728
|
+
if out_raw:
|
|
729
|
+
out_path = _resolve_cli_path(out_raw, repo_root)
|
|
730
|
+
else:
|
|
731
|
+
_ensure_dirs(repo_root)
|
|
732
|
+
out_path = _default_youtube_artifact_path(repo_root, str(payload.get("video_id", "")).strip())
|
|
733
|
+
if out_path.exists() and not bool(getattr(args, "force", False)):
|
|
734
|
+
raise RuntimeError(
|
|
735
|
+
f"output path already exists: {_path_for_state(out_path, repo_root)}. Use --force to overwrite."
|
|
736
|
+
)
|
|
737
|
+
emitted_format = _write_structured_payload(out_path, payload, format_hint=str(getattr(args, "format", "") or ""))
|
|
738
|
+
|
|
739
|
+
result = {
|
|
740
|
+
"ok": True,
|
|
741
|
+
"saved": out_path is not None,
|
|
742
|
+
"path": _path_for_state(out_path, repo_root) if out_path is not None else "",
|
|
743
|
+
"format": emitted_format,
|
|
744
|
+
"schema_path": "spec/v1/youtube-source.schema.json",
|
|
745
|
+
"source": payload,
|
|
746
|
+
}
|
|
747
|
+
if args.json_output:
|
|
748
|
+
_print_json(result)
|
|
749
|
+
else:
|
|
750
|
+
_print_pairs(
|
|
751
|
+
[
|
|
752
|
+
("ok", "true"),
|
|
753
|
+
("video.id", str(payload.get("video_id", "")).strip()),
|
|
754
|
+
("video.title", str(payload.get("title", "")).strip()),
|
|
755
|
+
("video.author", str(payload.get("author_name", "")).strip()),
|
|
756
|
+
("video.duration_seconds", payload.get("duration_seconds") or ""),
|
|
757
|
+
("transcript.available", str(bool(payload.get("transcript_available", False))).lower()),
|
|
758
|
+
("transcript.language", str(payload.get("transcript_language", "")).strip()),
|
|
759
|
+
("transcript.kind", str(payload.get("transcript_kind", "")).strip()),
|
|
760
|
+
("saved", str(bool(out_path is not None)).lower()),
|
|
761
|
+
("path", _path_for_state(out_path, repo_root) if out_path is not None else ""),
|
|
762
|
+
]
|
|
763
|
+
)
|
|
764
|
+
bundle = str(payload.get("text_bundle", "")).strip()
|
|
765
|
+
warnings = payload.get("warnings", []) if isinstance(payload.get("warnings"), list) else []
|
|
766
|
+
if bundle:
|
|
767
|
+
print("")
|
|
768
|
+
print(bundle)
|
|
769
|
+
if warnings:
|
|
770
|
+
print("")
|
|
771
|
+
for warning in warnings:
|
|
772
|
+
text = str(warning).strip()
|
|
773
|
+
if text:
|
|
774
|
+
print(f"warning={text}")
|
|
775
|
+
return 0
|
|
776
|
+
|
|
777
|
+
|
|
339
778
|
def _runner_transport_mode(args: argparse.Namespace) -> str:
|
|
340
779
|
mode = str(getattr(args, "transport", "auto") or "auto").strip().lower()
|
|
341
780
|
if mode in {"poll", "sse"}:
|
|
@@ -5146,6 +5585,7 @@ def _about_payload() -> dict[str, Any]:
|
|
|
5146
5585
|
"kernel": "spec/v1/kernel.schema.json",
|
|
5147
5586
|
"kernel_proposal": "spec/v1/kernel-proposal.schema.json",
|
|
5148
5587
|
"kernel_extension": "spec/v1/kernel-extension.schema.json",
|
|
5588
|
+
"youtube_source": "spec/v1/youtube-source.schema.json",
|
|
5149
5589
|
"profile_pack": "spec/v1/profile-pack.schema.json",
|
|
5150
5590
|
"link_project": "spec/v1/link-project.schema.json",
|
|
5151
5591
|
"link_session": "spec/v1/link-session.schema.json",
|
|
@@ -5164,6 +5604,13 @@ def _about_payload() -> dict[str, Any]:
|
|
|
5164
5604
|
["kernel", "migrate"],
|
|
5165
5605
|
],
|
|
5166
5606
|
},
|
|
5607
|
+
{
|
|
5608
|
+
"id": "youtube",
|
|
5609
|
+
"description": "Public YouTube metadata and transcript ingestion for agent-readable external source context.",
|
|
5610
|
+
"entrypoints": [
|
|
5611
|
+
["youtube", "inspect"],
|
|
5612
|
+
],
|
|
5613
|
+
},
|
|
5167
5614
|
{
|
|
5168
5615
|
"id": "workspace",
|
|
5169
5616
|
"description": "Hosted workspace auth, ideas, features, worlds, checkpoints, and worker operations.",
|
|
@@ -5257,6 +5704,7 @@ def _about_payload() -> dict[str, Any]:
|
|
|
5257
5704
|
{"name": "kernel_stats", "path": ["kernel", "stats"], "json_output": True},
|
|
5258
5705
|
{"name": "kernel_propose", "path": ["kernel", "propose"], "json_output": True},
|
|
5259
5706
|
{"name": "kernel_migrate", "path": ["kernel", "migrate"], "json_output": True},
|
|
5707
|
+
{"name": "youtube_inspect", "path": ["youtube", "inspect"], "json_output": True},
|
|
5260
5708
|
{"name": "auth_login", "path": ["auth", "login"], "json_output": True},
|
|
5261
5709
|
{"name": "auth_verify", "path": ["auth", "verify"], "json_output": True},
|
|
5262
5710
|
{"name": "auth_logout", "path": ["auth", "logout"], "json_output": True},
|
|
@@ -5326,6 +5774,7 @@ def _about_payload() -> dict[str, Any]:
|
|
|
5326
5774
|
"Default CLI output is human-readable; listed commands with json_output=true also support --json.",
|
|
5327
5775
|
"Reasoning-kernel artifacts shape promotable repository truth for tasks, decisions, hypotheses, experiments, checkpoints, policies, and results.",
|
|
5328
5776
|
"Kernel evolution in ORP should stay explicit: observe real usage, propose changes, and migrate artifacts through versioned CLI surfaces rather than silent agent mutation.",
|
|
5777
|
+
"YouTube inspection is a built-in ORP ability exposed through `orp youtube inspect`, returning public metadata and caption transcript text when available.",
|
|
5329
5778
|
"Discovery profiles in ORP are portable search-intent files managed directly by ORP.",
|
|
5330
5779
|
"Collaboration is a built-in ORP ability exposed through `orp collaborate ...`.",
|
|
5331
5780
|
"Project/session linking is a built-in ORP ability exposed through `orp link ...` and stored machine-locally under `.git/orp/link/`.",
|
|
@@ -5435,6 +5884,10 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
|
|
|
5435
5884
|
"label": "Inspect the current hosted workspace identity",
|
|
5436
5885
|
"command": "orp whoami --json",
|
|
5437
5886
|
},
|
|
5887
|
+
{
|
|
5888
|
+
"label": "Inspect a YouTube video and public transcript for agent context",
|
|
5889
|
+
"command": "orp youtube inspect https://www.youtube.com/watch?v=<video_id> --json",
|
|
5890
|
+
},
|
|
5438
5891
|
{
|
|
5439
5892
|
"label": "List hosted ideas in the current workspace",
|
|
5440
5893
|
"command": "orp ideas list --json",
|
|
@@ -12257,6 +12710,43 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
12257
12710
|
add_json_flag(s_world_bind)
|
|
12258
12711
|
s_world_bind.set_defaults(func=cmd_world_bind, json_output=False)
|
|
12259
12712
|
|
|
12713
|
+
s_youtube = sub.add_parser("youtube", help="Public YouTube metadata and transcript inspection")
|
|
12714
|
+
youtube_sub = s_youtube.add_subparsers(dest="youtube_cmd", required=True)
|
|
12715
|
+
|
|
12716
|
+
s_youtube_inspect = youtube_sub.add_parser(
|
|
12717
|
+
"inspect",
|
|
12718
|
+
help="Inspect a YouTube video and fetch public metadata plus transcript text when captions are available",
|
|
12719
|
+
)
|
|
12720
|
+
s_youtube_inspect.add_argument("url", help="YouTube watch/share URL or 11-character video id")
|
|
12721
|
+
s_youtube_inspect.add_argument(
|
|
12722
|
+
"--lang",
|
|
12723
|
+
default="",
|
|
12724
|
+
help="Preferred caption language code, for example en or es",
|
|
12725
|
+
)
|
|
12726
|
+
s_youtube_inspect.add_argument(
|
|
12727
|
+
"--save",
|
|
12728
|
+
action="store_true",
|
|
12729
|
+
help="Save the inspected source artifact under orp/external/youtube/<video_id>.json",
|
|
12730
|
+
)
|
|
12731
|
+
s_youtube_inspect.add_argument(
|
|
12732
|
+
"--out",
|
|
12733
|
+
default="",
|
|
12734
|
+
help="Optional output path for the source artifact (.json, .yml, or .yaml)",
|
|
12735
|
+
)
|
|
12736
|
+
s_youtube_inspect.add_argument(
|
|
12737
|
+
"--format",
|
|
12738
|
+
default="",
|
|
12739
|
+
choices=["", "json", "yaml"],
|
|
12740
|
+
help="Optional explicit output format when saving",
|
|
12741
|
+
)
|
|
12742
|
+
s_youtube_inspect.add_argument(
|
|
12743
|
+
"--force",
|
|
12744
|
+
action="store_true",
|
|
12745
|
+
help="Overwrite an existing saved artifact",
|
|
12746
|
+
)
|
|
12747
|
+
add_json_flag(s_youtube_inspect)
|
|
12748
|
+
s_youtube_inspect.set_defaults(func=cmd_youtube_inspect, json_output=False)
|
|
12749
|
+
|
|
12260
12750
|
s_secrets = sub.add_parser("secrets", help="Hosted secret store and project binding operations")
|
|
12261
12751
|
secrets_sub = s_secrets.add_subparsers(dest="secrets_cmd", required=True)
|
|
12262
12752
|
|
package/docs/AGENT_LOOP.md
CHANGED
|
@@ -21,6 +21,9 @@ Use this loop when an AI agent is the primary operator of an ORP-enabled repo.
|
|
|
21
21
|
- or `orp pack fetch --source <git-url> --pack-id <pack-id> --install-target . --json`
|
|
22
22
|
- If the workflow depends on public Erdos data, sync it first:
|
|
23
23
|
- `orp erdos sync --problem-id <id> --out-problem-dir <dir> --json`
|
|
24
|
+
- If the task begins from a public YouTube link, normalize it first:
|
|
25
|
+
- `orp youtube inspect <youtube-url> --json`
|
|
26
|
+
- or `orp youtube inspect <youtube-url> --save --json` when the source artifact should stay with the repo
|
|
24
27
|
|
|
25
28
|
## 3. Run
|
|
26
29
|
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# ORP YouTube Inspect
|
|
2
|
+
|
|
3
|
+
`orp youtube inspect` is ORP's first-class public-source ingestion surface for
|
|
4
|
+
YouTube videos.
|
|
5
|
+
|
|
6
|
+
It gives agents and users a stable way to turn a YouTube link into:
|
|
7
|
+
|
|
8
|
+
- normalized video metadata,
|
|
9
|
+
- public caption transcript text when available,
|
|
10
|
+
- segment-level timing rows,
|
|
11
|
+
- and one agent-friendly `text_bundle` field that can be handed directly into
|
|
12
|
+
summarization, extraction, comparison, or kernel-shaped artifact creation.
|
|
13
|
+
|
|
14
|
+
## Why this exists
|
|
15
|
+
|
|
16
|
+
Agents often receive a raw YouTube URL and are asked:
|
|
17
|
+
|
|
18
|
+
- what is this video about?
|
|
19
|
+
- summarize it,
|
|
20
|
+
- extract claims,
|
|
21
|
+
- capture action items,
|
|
22
|
+
- compare it against repo work,
|
|
23
|
+
- or turn it into a canonical ORP artifact.
|
|
24
|
+
|
|
25
|
+
Without a built-in surface, each agent has to improvise scraping, transcript
|
|
26
|
+
discovery, and output shape. ORP now treats this as a real protocol ability.
|
|
27
|
+
|
|
28
|
+
## Command
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
orp youtube inspect https://www.youtube.com/watch?v=<video_id> --json
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Optional persistence:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
orp youtube inspect https://www.youtube.com/watch?v=<video_id> --save --json
|
|
38
|
+
orp youtube inspect https://www.youtube.com/watch?v=<video_id> --out analysis/source.youtube.json --json
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Output shape
|
|
42
|
+
|
|
43
|
+
The canonical artifact schema is:
|
|
44
|
+
|
|
45
|
+
- `spec/v1/youtube-source.schema.json`
|
|
46
|
+
|
|
47
|
+
The command returns:
|
|
48
|
+
|
|
49
|
+
- source identity:
|
|
50
|
+
- `source_url`
|
|
51
|
+
- `canonical_url`
|
|
52
|
+
- `video_id`
|
|
53
|
+
- metadata:
|
|
54
|
+
- `title`
|
|
55
|
+
- `author_name`
|
|
56
|
+
- `author_url`
|
|
57
|
+
- `thumbnail_url`
|
|
58
|
+
- `channel_id`
|
|
59
|
+
- `description`
|
|
60
|
+
- `duration_seconds`
|
|
61
|
+
- `published_at`
|
|
62
|
+
- `playability_status`
|
|
63
|
+
- transcript fields:
|
|
64
|
+
- `transcript_available`
|
|
65
|
+
- `transcript_language`
|
|
66
|
+
- `transcript_track_name`
|
|
67
|
+
- `transcript_kind`
|
|
68
|
+
- `transcript_fetch_mode`
|
|
69
|
+
- `transcript_text`
|
|
70
|
+
- `transcript_segments`
|
|
71
|
+
- agent-ready bundle:
|
|
72
|
+
- `text_bundle`
|
|
73
|
+
- capture notes:
|
|
74
|
+
- `warnings`
|
|
75
|
+
|
|
76
|
+
## Save behavior
|
|
77
|
+
|
|
78
|
+
`--save` writes the artifact to:
|
|
79
|
+
|
|
80
|
+
```text
|
|
81
|
+
orp/external/youtube/<video_id>.json
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
This keeps YouTube ingestion consistent with ORP's larger local-first artifact
|
|
85
|
+
discipline while staying outside the evidence boundary by default.
|
|
86
|
+
|
|
87
|
+
## Important boundary
|
|
88
|
+
|
|
89
|
+
`orp youtube inspect` returns public source context. It does **not** make the
|
|
90
|
+
result canonical evidence by itself.
|
|
91
|
+
|
|
92
|
+
If a video matters for repo truth, the agent should still:
|
|
93
|
+
|
|
94
|
+
1. inspect the video,
|
|
95
|
+
2. summarize or structure the relevant claims,
|
|
96
|
+
3. promote that into a typed ORP artifact when appropriate,
|
|
97
|
+
4. and cite the saved source artifact path alongside any downstream result.
|
package/llms.txt
CHANGED
|
@@ -13,6 +13,7 @@ ORP (Open Research Protocol) is a docs-first, local-first, agent-friendly protoc
|
|
|
13
13
|
## Fast Machine Discovery
|
|
14
14
|
|
|
15
15
|
- Run `orp about --json` for machine-readable tool metadata, artifact paths, schemas, supported commands, and bundled packs.
|
|
16
|
+
- Run `orp youtube inspect <youtube-url> --json` to normalize a public YouTube video into ORP's source artifact shape, including transcript text when public captions are fetchable.
|
|
16
17
|
- Run `orp erdos sync --json` for machine-readable Erdos catalog sync results.
|
|
17
18
|
- Run `orp pack list --json` for machine-readable bundled pack inventory.
|
|
18
19
|
- Core runtime commands also support `--json`:
|
|
@@ -37,10 +38,12 @@ ORP (Open Research Protocol) is a docs-first, local-first, agent-friendly protoc
|
|
|
37
38
|
- `spec/v1/orp.config.schema.json`
|
|
38
39
|
- `spec/v1/packet.schema.json`
|
|
39
40
|
- `spec/v1/profile-pack.schema.json`
|
|
41
|
+
- `spec/v1/youtube-source.schema.json`
|
|
40
42
|
|
|
41
43
|
## Key Commands
|
|
42
44
|
|
|
43
45
|
- `orp init`
|
|
46
|
+
- `orp youtube inspect <youtube-url> --json`
|
|
44
47
|
- `orp gate run --profile <profile>`
|
|
45
48
|
- `orp packet emit --profile <profile>`
|
|
46
49
|
- `orp report summary`
|
package/package.json
CHANGED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://openresearchprotocol.com/spec/v1/youtube-source.schema.json",
|
|
4
|
+
"title": "ORP YouTube Source Artifact",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": [
|
|
8
|
+
"schema_version",
|
|
9
|
+
"kind",
|
|
10
|
+
"retrieved_at_utc",
|
|
11
|
+
"source_url",
|
|
12
|
+
"canonical_url",
|
|
13
|
+
"video_id",
|
|
14
|
+
"title",
|
|
15
|
+
"author_name",
|
|
16
|
+
"author_url",
|
|
17
|
+
"thumbnail_url",
|
|
18
|
+
"channel_id",
|
|
19
|
+
"description",
|
|
20
|
+
"duration_seconds",
|
|
21
|
+
"published_at",
|
|
22
|
+
"playability_status",
|
|
23
|
+
"transcript_available",
|
|
24
|
+
"transcript_language",
|
|
25
|
+
"transcript_track_name",
|
|
26
|
+
"transcript_kind",
|
|
27
|
+
"transcript_fetch_mode",
|
|
28
|
+
"transcript_text",
|
|
29
|
+
"transcript_segments",
|
|
30
|
+
"warnings",
|
|
31
|
+
"text_bundle"
|
|
32
|
+
],
|
|
33
|
+
"properties": {
|
|
34
|
+
"schema_version": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"const": "1.0.0"
|
|
37
|
+
},
|
|
38
|
+
"kind": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"const": "youtube_source"
|
|
41
|
+
},
|
|
42
|
+
"retrieved_at_utc": {
|
|
43
|
+
"type": "string"
|
|
44
|
+
},
|
|
45
|
+
"source_url": {
|
|
46
|
+
"type": "string"
|
|
47
|
+
},
|
|
48
|
+
"canonical_url": {
|
|
49
|
+
"type": "string"
|
|
50
|
+
},
|
|
51
|
+
"video_id": {
|
|
52
|
+
"type": "string",
|
|
53
|
+
"pattern": "^[A-Za-z0-9_-]{11}$"
|
|
54
|
+
},
|
|
55
|
+
"title": {
|
|
56
|
+
"type": "string"
|
|
57
|
+
},
|
|
58
|
+
"author_name": {
|
|
59
|
+
"type": "string"
|
|
60
|
+
},
|
|
61
|
+
"author_url": {
|
|
62
|
+
"type": "string"
|
|
63
|
+
},
|
|
64
|
+
"thumbnail_url": {
|
|
65
|
+
"type": "string"
|
|
66
|
+
},
|
|
67
|
+
"channel_id": {
|
|
68
|
+
"type": "string"
|
|
69
|
+
},
|
|
70
|
+
"description": {
|
|
71
|
+
"type": "string"
|
|
72
|
+
},
|
|
73
|
+
"duration_seconds": {
|
|
74
|
+
"type": [
|
|
75
|
+
"integer",
|
|
76
|
+
"null"
|
|
77
|
+
],
|
|
78
|
+
"minimum": 0
|
|
79
|
+
},
|
|
80
|
+
"published_at": {
|
|
81
|
+
"type": "string"
|
|
82
|
+
},
|
|
83
|
+
"playability_status": {
|
|
84
|
+
"type": "string"
|
|
85
|
+
},
|
|
86
|
+
"transcript_available": {
|
|
87
|
+
"type": "boolean"
|
|
88
|
+
},
|
|
89
|
+
"transcript_language": {
|
|
90
|
+
"type": "string"
|
|
91
|
+
},
|
|
92
|
+
"transcript_track_name": {
|
|
93
|
+
"type": "string"
|
|
94
|
+
},
|
|
95
|
+
"transcript_kind": {
|
|
96
|
+
"type": "string",
|
|
97
|
+
"enum": [
|
|
98
|
+
"manual",
|
|
99
|
+
"auto",
|
|
100
|
+
"none"
|
|
101
|
+
]
|
|
102
|
+
},
|
|
103
|
+
"transcript_fetch_mode": {
|
|
104
|
+
"type": "string",
|
|
105
|
+
"enum": [
|
|
106
|
+
"json3",
|
|
107
|
+
"xml",
|
|
108
|
+
"unavailable",
|
|
109
|
+
"none",
|
|
110
|
+
"missing_track_url"
|
|
111
|
+
]
|
|
112
|
+
},
|
|
113
|
+
"transcript_text": {
|
|
114
|
+
"type": "string"
|
|
115
|
+
},
|
|
116
|
+
"transcript_segments": {
|
|
117
|
+
"type": "array",
|
|
118
|
+
"items": {
|
|
119
|
+
"type": "object",
|
|
120
|
+
"additionalProperties": false,
|
|
121
|
+
"required": [
|
|
122
|
+
"start_ms",
|
|
123
|
+
"duration_ms",
|
|
124
|
+
"text"
|
|
125
|
+
],
|
|
126
|
+
"properties": {
|
|
127
|
+
"start_ms": {
|
|
128
|
+
"type": "integer",
|
|
129
|
+
"minimum": 0
|
|
130
|
+
},
|
|
131
|
+
"duration_ms": {
|
|
132
|
+
"type": "integer",
|
|
133
|
+
"minimum": 0
|
|
134
|
+
},
|
|
135
|
+
"text": {
|
|
136
|
+
"type": "string"
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
"warnings": {
|
|
142
|
+
"type": "array",
|
|
143
|
+
"items": {
|
|
144
|
+
"type": "string"
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
"text_bundle": {
|
|
148
|
+
"type": "string"
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|