npm - open-research-protocol - Versions diffs - 0.4.10 → 0.4.12 - Mend

open-research-protocol 0.4.10 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +3 -0
package/bin/orp-compute.mjs +82 -9
package/cli/orp.py +242 -36
package/docs/ORP_YOUTUBE_INSPECT.md +10 -1
package/llms.txt +1 -1
package/package.json +2 -2
package/scripts/orp-kernel-benchmark.py +1 -1
package/spec/v1/youtube-source.schema.json +49 -8

package/README.md CHANGED Viewed

@@ -147,7 +147,9 @@ orp pack fetch --source <git-url> --pack-id <pack-id> --install-target . --json
 orp gate run --profile default --json
 orp packet emit --profile default --json
 orp compute decide --input orp.compute.json --json
+orp compute decide --project-map orp.compute-map.json --point-id adult-vs-developmental-rgc-opponent --json
 orp compute run-local --input orp.compute.json --task orp.compute.task.json --json
+orp compute run-local --project-map orp.compute-map.json --point-id adult-vs-developmental-rgc-opponent --task orp.compute.task.json --json
 orp report summary --json
 ```
@@ -157,6 +159,7 @@ These surfaces are meant to help automated systems discover ORP quickly:
 - `orp home --json` returns the same landing context in machine-readable form
 - `orp auth ...`, `orp ideas ...`, `orp world ...`, `orp checkpoint ...`, `orp runner ...`, and `orp agent ...` expose the hosted workspace surface directly through ORP
 - `orp compute ...` exposes targeted-compute admission, local execution, and paid-approval gating through a stable ORP wrapper surface
+- `orp compute ...` can now consume either a raw compute packet input or a repo-declared `breakthroughs` project compute map plus a compute-point id
 - `orp youtube inspect ...` exposes public YouTube metadata plus full transcript ingestion through a stable ORP artifact shape for agent use when caption tracks are available
 - `orp init`, `orp status`, `orp branch start`, `orp checkpoint create`, `orp backup`, `orp ready`, `orp doctor`, and `orp cleanup` expose the local-first repo governance surface directly through ORP
 - `orp discover ...` exposes profile-based GitHub scanning as a built-in ORP ability

package/bin/orp-compute.mjs CHANGED Viewed

@@ -4,12 +4,14 @@ import fs from "node:fs/promises";
 import path from "node:path";
 import process from "node:process";
 import {
+  buildComputePointDecisionPacket,
   buildOrpComputeGateResult,
   buildOrpComputePacket,
   defineComputePacket,
   defineDecision,
   defineImpactRead,
   definePolicy,
+  defineProjectComputeMap,
   defineResultBundle,
   defineRung,
   evaluateDispatch,
@@ -21,7 +23,9 @@ function printHelp() {
 Usage:
   orp compute decide --input <path> [--packet-out <path>] [--json]
+  orp compute decide --project-map <path> --point-id <id> [--rung-id <id>] [--success-bar <path>] [--packet-out <path>] [--json]
   orp compute run-local --input <path> --task <path> [--receipt-out <path>] [--packet-out <path>] [--json]
+  orp compute run-local --project-map <path> --point-id <id> --task <path> [--rung-id <id>] [--success-bar <path>] [--receipt-out <path>] [--packet-out <path>] [--json]
 Input JSON shape:
   {
@@ -40,6 +44,22 @@ Input JSON shape:
     }
   }
+Project-map mode:
+  {
+    "projectId": "longevity-controller",
+    "repoRoots": ["/abs/path"],
+    "rungs": [...],
+    "defaultPolicy": {...},
+    "computePoints": [...]
+  }
+Project-map mode options:
+- --project-map <path> points to a repo compute catalog
+- --point-id <id> selects the compute point
+- --rung-id <id> optionally overrides the point default rung
+- --success-bar <path> optionally points to a JSON object merged into the packet success bar
+- repo/orp context is derived from the project map unless overridden with --repo-root, --board-id, --problem-id, or --artifact-root
 Task JSON shape for run-local:
   {
     "command": "node",
@@ -116,11 +136,71 @@ function buildContext(raw) {
   };
 }
+async function loadContext(options) {
+  if (options.input && options.projectMap) {
+    throw new Error("use either --input or --project-map, not both");
+  }
+  if (options.projectMap) {
+    if (!options.pointId) {
+      throw new Error("project-map mode requires --point-id <id>");
+    }
+    const projectMap = defineProjectComputeMap(await readJson(options.projectMap));
+    const successBar = options.successBar
+      ? await readJson(options.successBar)
+      : undefined;
+    const template = buildComputePointDecisionPacket({
+      projectComputeMap: projectMap,
+      pointId: options.pointId,
+      rungId: options.rungId,
+      successBar,
+    });
+    return {
+      raw: {
+        projectMap,
+        repo: {
+          rootPath: options.repoRoot || projectMap.repoRoots[0] || process.cwd(),
+        },
+        orp: {
+          boardId: options.boardId || "targeted_compute",
+          problemId: options.problemId || template.computePoint.id,
+          artifactRoot:
+            options.artifactRoot ||
+            `orp/artifacts/compute/${template.computePoint.id}`,
+        },
+      },
+      projectMap,
+      computePoint: template.computePoint,
+      decision: template.decision,
+      rung: template.rung,
+      policy: template.policy,
+      packet: template.packet,
+    };
+  }
+  if (!options.input) {
+    throw new Error("compute command requires --input <path> or --project-map <path>");
+  }
+  return buildContext(await readJson(options.input));
+}
 function commandLabel(subcommand, options) {
   const parts = ["orp", "compute", subcommand];
   if (options.input) {
     parts.push("--input", options.input);
   }
+  if (options.projectMap) {
+    parts.push("--project-map", options.projectMap);
+  }
+  if (options.pointId) {
+    parts.push("--point-id", options.pointId);
+  }
+  if (options.rungId) {
+    parts.push("--rung-id", options.rungId);
+  }
   if (options.task) {
     parts.push("--task", options.task);
   }
@@ -148,11 +228,7 @@ function summarizeDispatch(dispatchResult) {
 }
 async function runDecide(options) {
-  if (!options.input) {
-    throw new Error("compute decide requires --input <path>");
-  }
-  const context = buildContext(await readJson(options.input));
+  const context = await loadContext(options);
   const dispatchResult = evaluateDispatch(context);
   const gateResult = buildOrpComputeGateResult({
     gateId: context.packet.rungId,
@@ -195,14 +271,11 @@ async function runDecide(options) {
 }
 async function runLocal(options) {
-  if (!options.input) {
-    throw new Error("compute run-local requires --input <path>");
-  }
   if (!options.task) {
     throw new Error("compute run-local requires --task <path>");
   }
-  const context = buildContext(await readJson(options.input));
+  const context = await loadContext(options);
   const task = await readJson(options.task);
   const dispatchResult = evaluateDispatch(context);

package/cli/orp.py CHANGED Viewed

@@ -115,6 +115,10 @@ DEFAULT_DISCOVER_SCAN_ROOT = "orp/discovery/github"
 DEFAULT_HOSTED_BASE_URL = "https://orp.earth"
 KERNEL_SCHEMA_VERSION = "1.0.0"
 YOUTUBE_SOURCE_SCHEMA_VERSION = "1.0.0"
+YOUTUBE_ANDROID_CLIENT_VERSION = "20.10.38"
+YOUTUBE_ANDROID_USER_AGENT = (
+    f"com.google.android.youtube/{YOUTUBE_ANDROID_CLIENT_VERSION} (Linux; U; Android 14)"
+)
 class HostedApiError(RuntimeError):
@@ -362,6 +366,35 @@ def _http_get_json(url: str, *, headers: dict[str, str] | None = None, timeout_s
     raise RuntimeError(f"Response from {url} was not a JSON object.")
+def _http_post_json(
+    url: str,
+    payload: dict[str, Any],
+    *,
+    headers: dict[str, str] | None = None,
+    timeout_sec: int = 20,
+) -> dict[str, Any]:
+    body = json.dumps(payload).encode("utf-8")
+    merged_headers = {"Content-Type": "application/json"}
+    if headers:
+        merged_headers.update(headers)
+    request = urlrequest.Request(url, data=body, headers=merged_headers, method="POST")
+    try:
+        with urlrequest.urlopen(request, timeout=timeout_sec) as response:
+            text = response.read().decode("utf-8", errors="replace")
+    except urlerror.HTTPError as exc:
+        body_text = exc.read().decode("utf-8", errors="replace").strip()
+        raise RuntimeError(f"HTTP {exc.code} while fetching {url}: {body_text or exc.reason}") from exc
+    except urlerror.URLError as exc:
+        raise RuntimeError(f"Could not reach {url}: {exc.reason}") from exc
+    try:
+        parsed = json.loads(text)
+    except Exception as exc:
+        raise RuntimeError(f"Response from {url} was not valid JSON.") from exc
+    if isinstance(parsed, dict):
+        return parsed
+    raise RuntimeError(f"Response from {url} was not a JSON object.")
 def _youtube_request_headers() -> dict[str, str]:
     return {
         "User-Agent": (
@@ -372,6 +405,13 @@ def _youtube_request_headers() -> dict[str, str]:
     }
+def _youtube_android_request_headers() -> dict[str, str]:
+    return {
+        "User-Agent": YOUTUBE_ANDROID_USER_AGENT,
+        "Accept-Language": "en-US,en;q=0.9",
+    }
 def _youtube_source_schema_path() -> Path:
     return Path(__file__).resolve().parent.parent / "spec" / "v1" / "youtube-source.schema.json"
@@ -459,21 +499,52 @@ def _youtube_track_label(track: dict[str, Any]) -> str:
     return str(track.get("languageCode", "")).strip()
-def _pick_youtube_caption_track(tracks: list[dict[str, Any]], preferred_lang: str = "") -> dict[str, Any] | None:
-    if not tracks:
-        return None
+def _youtube_track_source(track: dict[str, Any]) -> str:
+    return str(track.get("_orp_source", "") or "unknown").strip()
+def _youtube_track_inventory(tracks: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    inventory: list[dict[str, Any]] = []
+    seen: set[tuple[str, str, str, str]] = set()
+    for track in tracks:
+        if not isinstance(track, dict):
+            continue
+        language_code = str(track.get("languageCode", "")).strip()
+        label = _youtube_track_label(track)
+        kind = "auto" if str(track.get("kind", "")).strip().lower() == "asr" else "manual"
+        source = _youtube_track_source(track)
+        key = (language_code, label, kind, source)
+        if key in seen:
+            continue
+        seen.add(key)
+        inventory.append(
+            {
+                "language_code": language_code,
+                "name": label,
+                "kind": kind,
+                "source": source,
+            }
+        )
+    return inventory
+def _youtube_caption_track_sort_key(track: dict[str, Any], preferred_lang: str = "") -> tuple[int, int]:
     preferred = str(preferred_lang or "").strip().lower()
+    code = str(track.get("languageCode", "")).strip().lower()
+    kind = str(track.get("kind", "")).strip().lower()
+    auto = 1 if kind == "asr" else 0
+    source = _youtube_track_source(track)
+    source_bias = 15 if source == "android_player" else 0
+    exact = 1 if preferred and code == preferred else 0
+    prefix = 1 if preferred and code.startswith(preferred + "-") else 0
+    english = 1 if code.startswith("en") else 0
+    return (exact * 100 + prefix * 80 + english * 20 + source_bias - auto * 5, -auto)
-    def score(track: dict[str, Any]) -> tuple[int, int]:
-        code = str(track.get("languageCode", "")).strip().lower()
-        kind = str(track.get("kind", "")).strip().lower()
-        auto = 1 if kind == "asr" else 0
-        exact = 1 if preferred and code == preferred else 0
-        prefix = 1 if preferred and code.startswith(preferred + "-") else 0
-        english = 1 if code.startswith("en") else 0
-        return (exact * 100 + prefix * 80 + english * 20 - auto * 5, -auto)
-    ranked = sorted(tracks, key=score, reverse=True)
+def _pick_youtube_caption_track(tracks: list[dict[str, Any]], preferred_lang: str = "") -> dict[str, Any] | None:
+    if not tracks:
+        return None
+    ranked = sorted(tracks, key=lambda track: _youtube_caption_track_sort_key(track, preferred_lang), reverse=True)
     return ranked[0] if ranked else None
@@ -544,6 +615,19 @@ def _parse_youtube_transcript_xml(text: str) -> tuple[str, list[dict[str, Any]]]
                 "text": body,
             }
         )
+    if not segments:
+        for node in root.findall(".//p"):
+            body = html.unescape("".join(node.itertext() or []))
+            body = re.sub(r"\s+", " ", body).strip()
+            if not body:
+                continue
+            segments.append(
+                {
+                    "start_ms": int(node.attrib.get("t", "0") or "0"),
+                    "duration_ms": int(node.attrib.get("d", "0") or "0"),
+                    "text": body,
+                }
+            )
     transcript_text = "\n".join(str(row["text"]) for row in segments)
     return transcript_text, segments
@@ -577,6 +661,8 @@ def _youtube_fetch_watch_state(video_id: str) -> dict[str, Any]:
         .get("playerCaptionsTracklistRenderer", {})
         .get("captionTracks", [])
     )
+    tracks = captions if isinstance(captions, list) else []
+    normalized_tracks = [{**row, "_orp_source": "watch_page"} for row in tracks if isinstance(row, dict)]
     return {
         "player_response": player_response,
         "video_details": player_response.get("videoDetails", {}) if isinstance(player_response.get("videoDetails"), dict) else {},
@@ -590,29 +676,110 @@ def _youtube_fetch_watch_state(video_id: str) -> dict[str, Any]:
             if isinstance(player_response.get("playabilityStatus"), dict)
             else {}
         ),
-        "caption_tracks": captions if isinstance(captions, list) else [],
+        "caption_tracks": normalized_tracks,
+    }
+def _youtube_fetch_android_player_state(video_id: str) -> dict[str, Any]:
+    payload = _http_post_json(
+        "https://www.youtube.com/youtubei/v1/player?prettyPrint=false",
+        {
+            "context": {
+                "client": {
+                    "clientName": "ANDROID",
+                    "clientVersion": YOUTUBE_ANDROID_CLIENT_VERSION,
+                }
+            },
+            "videoId": video_id,
+        },
+        headers=_youtube_android_request_headers(),
+        timeout_sec=25,
+    )
+    captions = (
+        payload.get("captions", {})
+        .get("playerCaptionsTracklistRenderer", {})
+        .get("captionTracks", [])
+    )
+    tracks = captions if isinstance(captions, list) else []
+    normalized_tracks = [{**row, "_orp_source": "android_player"} for row in tracks if isinstance(row, dict)]
+    return {
+        "player_response": payload,
+        "video_details": payload.get("videoDetails", {}) if isinstance(payload.get("videoDetails"), dict) else {},
+        "microformat": {},
+        "playability_status": payload.get("playabilityStatus", {}) if isinstance(payload.get("playabilityStatus"), dict) else {},
+        "caption_tracks": normalized_tracks,
     }
+def _youtube_ranked_caption_tracks(
+    watch_tracks: list[dict[str, Any]],
+    android_tracks: list[dict[str, Any]],
+    preferred_lang: str = "",
+) -> list[dict[str, Any]]:
+    ranked = sorted(
+        [track for track in android_tracks if isinstance(track, dict)]
+        + [track for track in watch_tracks if isinstance(track, dict)],
+        key=lambda track: _youtube_caption_track_sort_key(track, preferred_lang),
+        reverse=True,
+    )
+    unique: list[dict[str, Any]] = []
+    seen: set[tuple[str, str, str, str]] = set()
+    for track in ranked:
+        key = (
+            str(track.get("languageCode", "")).strip(),
+            _youtube_track_label(track),
+            str(track.get("kind", "")).strip().lower(),
+            _youtube_track_source(track),
+        )
+        if key in seen:
+            continue
+        seen.add(key)
+        unique.append(track)
+    return unique
+def _youtube_parse_transcript_response(text: str) -> tuple[str, list[dict[str, Any]], str]:
+    stripped = str(text or "").lstrip()
+    if not stripped:
+        return ("", [], "empty")
+    if stripped.startswith("{"):
+        try:
+            payload = json.loads(text)
+        except Exception:
+            payload = None
+        if isinstance(payload, dict):
+            transcript_text, segments = _parse_youtube_transcript_json3(payload)
+            if transcript_text:
+                return (transcript_text, segments, "json3")
+    transcript_text, segments = _parse_youtube_transcript_xml(text)
+    if transcript_text:
+        return (transcript_text, segments, "xml")
+    return ("", [], "unparsed")
 def _youtube_fetch_transcript_from_track(track: dict[str, Any]) -> tuple[str, list[dict[str, Any]], str]:
     base_url = str(track.get("baseUrl", "")).strip()
     if not base_url:
         return ("", [], "missing_track_url")
-    json3_url = _youtube_add_query_param(base_url, "fmt", "json3")
-    try:
-        payload = _http_get_json(json3_url, headers=_youtube_request_headers(), timeout_sec=25)
-        transcript_text, segments = _parse_youtube_transcript_json3(payload)
-        if transcript_text:
-            return transcript_text, segments, "json3"
-    except Exception:
-        pass
-    try:
-        xml_text = _http_get_text(base_url, headers=_youtube_request_headers(), timeout_sec=25)
-        transcript_text, segments = _parse_youtube_transcript_xml(xml_text)
+    source = _youtube_track_source(track) or "unknown"
+    candidate_urls = [
+        ("base", base_url),
+        ("json3", _youtube_add_query_param(base_url, "fmt", "json3")),
+        ("srv3", _youtube_add_query_param(base_url, "fmt", "srv3")),
+    ]
+    seen_urls: set[str] = set()
+    for mode, candidate_url in candidate_urls:
+        if candidate_url in seen_urls:
+            continue
+        seen_urls.add(candidate_url)
+        try:
+            response_text = _http_get_text(candidate_url, headers=_youtube_request_headers(), timeout_sec=25)
+        except Exception:
+            continue
+        transcript_text, segments, parsed_mode = _youtube_parse_transcript_response(response_text)
         if transcript_text:
-            return transcript_text, segments, "xml"
-    except Exception:
-        pass
+            final_mode = parsed_mode if mode == "base" else f"{mode}_{parsed_mode}"
+            return transcript_text, segments, f"{source}_{final_mode}"
     return ("", [], "unavailable")
@@ -647,28 +814,61 @@ def _youtube_inspect_payload(raw_url: str, preferred_lang: str = "") -> dict[str
         watch_state = _youtube_fetch_watch_state(video_id)
     except Exception as exc:
         warnings.append(str(exc))
+    android_state: dict[str, Any] = {}
+    try:
+        android_state = _youtube_fetch_android_player_state(video_id)
+    except Exception as exc:
+        warnings.append(str(exc))
-    video_details = watch_state.get("video_details", {}) if isinstance(watch_state.get("video_details"), dict) else {}
+    watch_video_details = watch_state.get("video_details", {}) if isinstance(watch_state.get("video_details"), dict) else {}
+    android_video_details = (
+        android_state.get("video_details", {}) if isinstance(android_state.get("video_details"), dict) else {}
+    )
+    video_details = watch_video_details or android_video_details
     microformat = watch_state.get("microformat", {}) if isinstance(watch_state.get("microformat"), dict) else {}
     playability = watch_state.get("playability_status", {}) if isinstance(watch_state.get("playability_status"), dict) else {}
-    tracks = [row for row in watch_state.get("caption_tracks", []) if isinstance(row, dict)]
-    chosen_track = _pick_youtube_caption_track(tracks, preferred_lang)
+    if not playability:
+        playability = android_state.get("playability_status", {}) if isinstance(android_state.get("playability_status"), dict) else {}
+    watch_tracks = [row for row in watch_state.get("caption_tracks", []) if isinstance(row, dict)]
+    android_tracks = [row for row in android_state.get("caption_tracks", []) if isinstance(row, dict)]
+    tracks = _youtube_ranked_caption_tracks(watch_tracks, android_tracks, preferred_lang)
+    available_tracks = _youtube_track_inventory(tracks)
     transcript_text = ""
     transcript_segments: list[dict[str, Any]] = []
     transcript_fetch_mode = "none"
     transcript_available = False
     transcript_language = ""
     transcript_track_name = ""
+    transcript_track_source = ""
     transcript_kind = "none"
+    transcript_sources_tried: list[str] = []
+    chosen_track: dict[str, Any] | None = None
+    for candidate in tracks:
+        transcript_sources_tried.append(
+            ":".join(
+                part
+                for part in [
+                    _youtube_track_source(candidate),
+                    str(candidate.get("languageCode", "")).strip(),
+                    _youtube_track_label(candidate),
+                ]
+                if part
+            )
+        )
+        transcript_text, transcript_segments, transcript_fetch_mode = _youtube_fetch_transcript_from_track(candidate)
+        if transcript_text.strip():
+            transcript_available = True
+            chosen_track = candidate
+            break
     if chosen_track is not None:
-        transcript_text, transcript_segments, transcript_fetch_mode = _youtube_fetch_transcript_from_track(chosen_track)
-        transcript_available = bool(transcript_text.strip())
         transcript_language = str(chosen_track.get("languageCode", "")).strip()
         transcript_track_name = _youtube_track_label(chosen_track)
+        transcript_track_source = _youtube_track_source(chosen_track)
         transcript_kind = "auto" if str(chosen_track.get("kind", "")).strip().lower() == "asr" else "manual"
+    if tracks:
         if not transcript_available:
             warnings.append("A caption track was found, but transcript text could not be fetched.")
-    elif watch_state:
+    elif watch_state or android_state:
         warnings.append("No caption tracks were available for this video.")
     title = str(video_details.get("title") or oembed.get("title") or "").strip()
@@ -698,13 +898,17 @@ def _youtube_inspect_payload(raw_url: str, preferred_lang: str = "") -> dict[str
         "duration_seconds": duration_seconds or None,
         "published_at": published_at,
         "playability_status": str(playability.get("status", "")).strip(),
+        "transcript_track_count": len(available_tracks),
+        "available_transcript_tracks": available_tracks,
         "transcript_available": transcript_available,
         "transcript_language": transcript_language,
         "transcript_track_name": transcript_track_name,
+        "transcript_track_source": transcript_track_source,
         "transcript_kind": transcript_kind,
         "transcript_fetch_mode": transcript_fetch_mode,
         "transcript_text": transcript_text,
         "transcript_segments": transcript_segments,
+        "transcript_sources_tried": transcript_sources_tried,
         "warnings": _unique_strings(warnings),
     }
     payload["text_bundle"] = _youtube_text_bundle(payload)
@@ -754,8 +958,10 @@ def cmd_youtube_inspect(args: argparse.Namespace) -> int:
                 ("video.title", str(payload.get("title", "")).strip()),
                 ("video.author", str(payload.get("author_name", "")).strip()),
                 ("video.duration_seconds", payload.get("duration_seconds") or ""),
+                ("transcript.track_count", payload.get("transcript_track_count") or 0),
                 ("transcript.available", str(bool(payload.get("transcript_available", False))).lower()),
                 ("transcript.language", str(payload.get("transcript_language", "")).strip()),
+                ("transcript.track_source", str(payload.get("transcript_track_source", "")).strip()),
                 ("transcript.kind", str(payload.get("transcript_kind", "")).strip()),
                 ("saved", str(bool(out_path is not None)).lower()),
                 ("path", _path_for_state(out_path, repo_root) if out_path is not None else ""),
@@ -5774,7 +5980,7 @@ def _about_payload() -> dict[str, Any]:
             "Default CLI output is human-readable; listed commands with json_output=true also support --json.",
             "Reasoning-kernel artifacts shape promotable repository truth for tasks, decisions, hypotheses, experiments, checkpoints, policies, and results.",
             "Kernel evolution in ORP should stay explicit: observe real usage, propose changes, and migrate artifacts through versioned CLI surfaces rather than silent agent mutation.",
-            "YouTube inspection is a built-in ORP ability exposed through `orp youtube inspect`, returning public metadata and caption transcript text when available.",
+            "YouTube inspection is a built-in ORP ability exposed through `orp youtube inspect`, returning public metadata plus full transcript text and segments whenever public caption tracks are available.",
             "Discovery profiles in ORP are portable search-intent files managed directly by ORP.",
             "Collaboration is a built-in ORP ability exposed through `orp collaborate ...`.",
             "Project/session linking is a built-in ORP ability exposed through `orp link ...` and stored machine-locally under `.git/orp/link/`.",
@@ -5885,7 +6091,7 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
             "command": "orp whoami --json",
         },
         {
-            "label": "Inspect a YouTube video and public transcript for agent context",
+            "label": "Inspect a YouTube video and ingest full public transcript context",
             "command": "orp youtube inspect https://www.youtube.com/watch?v=<video_id> --json",
         },
         {
@@ -12715,7 +12921,7 @@ def build_parser() -> argparse.ArgumentParser:
     s_youtube_inspect = youtube_sub.add_parser(
         "inspect",
-        help="Inspect a YouTube video and fetch public metadata plus transcript text when captions are available",
+        help="Inspect a YouTube video and fetch public metadata plus full transcript text and segments when caption tracks are available",
     )
     s_youtube_inspect.add_argument("url", help="YouTube watch/share URL or 11-character video id")
     s_youtube_inspect.add_argument(

package/docs/ORP_YOUTUBE_INSPECT.md CHANGED Viewed

@@ -6,7 +6,7 @@ YouTube videos.
 It gives agents and users a stable way to turn a YouTube link into:
 - normalized video metadata,
-- public caption transcript text when available,
+- full public transcript text and segment timing when caption tracks are available,
 - segment-level timing rows,
 - and one agent-friendly `text_bundle` field that can be handed directly into
   summarization, extraction, comparison, or kernel-shaped artifact creation.
@@ -61,13 +61,17 @@ The command returns:
   - `published_at`
   - `playability_status`
 - transcript fields:
+  - `transcript_track_count`
+  - `available_transcript_tracks`
   - `transcript_available`
   - `transcript_language`
   - `transcript_track_name`
+  - `transcript_track_source`
   - `transcript_kind`
   - `transcript_fetch_mode`
   - `transcript_text`
   - `transcript_segments`
+  - `transcript_sources_tried`
 - agent-ready bundle:
   - `text_bundle`
 - capture notes:
@@ -89,6 +93,11 @@ discipline while staying outside the evidence boundary by default.
 `orp youtube inspect` returns public source context. It does **not** make the
 result canonical evidence by itself.
+When public caption tracks exist, ORP now attempts full transcript ingestion
+across multiple retrieval strategies and records which track/source succeeded.
+If a video has no accessible caption tracks, ORP reports that honestly instead
+of silently fabricating a transcript.
 If a video matters for repo truth, the agent should still:
 1. inspect the video,

package/llms.txt CHANGED Viewed

@@ -13,7 +13,7 @@ ORP (Open Research Protocol) is a docs-first, local-first, agent-friendly protoc
 ## Fast Machine Discovery
 - Run `orp about --json` for machine-readable tool metadata, artifact paths, schemas, supported commands, and bundled packs.
-- Run `orp youtube inspect <youtube-url> --json` to normalize a public YouTube video into ORP's source artifact shape, including transcript text when public captions are fetchable.
+- Run `orp youtube inspect <youtube-url> --json` to normalize a public YouTube video into ORP's source artifact shape, including full transcript text and timing segments when public caption tracks are available.
 - Run `orp erdos sync --json` for machine-readable Erdos catalog sync results.
 - Run `orp pack list --json` for machine-readable bundled pack inventory.
 - Core runtime commands also support `--json`:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-research-protocol",
-  "version": "0.4.10",
+  "version": "0.4.12",
   "description": "ORP CLI (Open Research Protocol): agent-friendly research workflows, runtime, reports, and pack tooling.",
   "license": "MIT",
   "repository": {
@@ -36,7 +36,7 @@
     "node": ">=18"
   },
   "dependencies": {
-    "breakthroughs": "^0.1.0"
+    "breakthroughs": "^0.1.1"
   },
   "scripts": {
     "postinstall": "node scripts/npm-postinstall-check.js",

package/scripts/orp-kernel-benchmark.py CHANGED Viewed

@@ -202,7 +202,7 @@ def _benchmark_init_starter(iterations: int) -> dict[str, Any]:
     targets = {
         "init_mean_lt_ms": 375.0,
-        "validate_mean_lt_ms": 210.0,
+        "validate_mean_lt_ms": 250.0,
         "gate_mean_lt_ms": 350.0,
     }
     observed = {

package/spec/v1/youtube-source.schema.json CHANGED Viewed

@@ -20,13 +20,17 @@
     "duration_seconds",
     "published_at",
     "playability_status",
+    "transcript_track_count",
+    "available_transcript_tracks",
     "transcript_available",
     "transcript_language",
     "transcript_track_name",
+    "transcript_track_source",
     "transcript_kind",
     "transcript_fetch_mode",
     "transcript_text",
     "transcript_segments",
+    "transcript_sources_tried",
     "warnings",
     "text_bundle"
   ],
@@ -83,6 +87,41 @@
     "playability_status": {
       "type": "string"
     },
+    "transcript_track_count": {
+      "type": "integer",
+      "minimum": 0
+    },
+    "available_transcript_tracks": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "additionalProperties": false,
+        "required": [
+          "language_code",
+          "name",
+          "kind",
+          "source"
+        ],
+        "properties": {
+          "language_code": {
+            "type": "string"
+          },
+          "name": {
+            "type": "string"
+          },
+          "kind": {
+            "type": "string",
+            "enum": [
+              "manual",
+              "auto"
+            ]
+          },
+          "source": {
+            "type": "string"
+          }
+        }
+      }
+    },
     "transcript_available": {
       "type": "boolean"
     },
@@ -92,6 +131,9 @@
     "transcript_track_name": {
       "type": "string"
     },
+    "transcript_track_source": {
+      "type": "string"
+    },
     "transcript_kind": {
       "type": "string",
       "enum": [
@@ -101,14 +143,7 @@
       ]
     },
     "transcript_fetch_mode": {
-      "type": "string",
-      "enum": [
-        "json3",
-        "xml",
-        "unavailable",
-        "none",
-        "missing_track_url"
-      ]
+      "type": "string"
     },
     "transcript_text": {
       "type": "string"
@@ -138,6 +173,12 @@
         }
       }
     },
+    "transcript_sources_tried": {
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    },
     "warnings": {
       "type": "array",
       "items": {