open-research-protocol 0.4.7 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +15 -0
  2. package/cli/orp.py +1158 -43
  3. package/docs/AGENT_LOOP.md +3 -0
  4. package/docs/ORP_REASONING_KERNEL_AGENT_PILOT.md +125 -0
  5. package/docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md +97 -0
  6. package/docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md +100 -0
  7. package/docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md +116 -0
  8. package/docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md +86 -0
  9. package/docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md +261 -0
  10. package/docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md +131 -0
  11. package/docs/ORP_REASONING_KERNEL_EVOLUTION.md +123 -0
  12. package/docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md +107 -0
  13. package/docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md +140 -22
  14. package/docs/ORP_REASONING_KERNEL_V0_1.md +11 -0
  15. package/docs/ORP_YOUTUBE_INSPECT.md +97 -0
  16. package/docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json +796 -0
  17. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_task_smoke.json +487 -0
  18. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_1.json +1927 -0
  19. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json +10217 -0
  20. package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_task_smoke.json +174 -0
  21. package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json +598 -0
  22. package/docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json +688 -0
  23. package/docs/benchmarks/orp_reasoning_kernel_continuation_task_smoke.json +150 -0
  24. package/docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json +448 -0
  25. package/docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json +594 -0
  26. package/docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json +769 -41
  27. package/examples/README.md +2 -0
  28. package/examples/kernel/comparison/comparison-corpus.json +337 -0
  29. package/examples/kernel/comparison/next-task-continuation.json +55 -0
  30. package/examples/kernel/corpus/operations/habanero-routing.checkpoint.kernel.yml +12 -0
  31. package/examples/kernel/corpus/operations/runner-routing.policy.kernel.yml +9 -0
  32. package/examples/kernel/corpus/product/project-home.decision.kernel.yml +11 -0
  33. package/examples/kernel/corpus/research/kernel-handoff.experiment.kernel.yml +16 -0
  34. package/examples/kernel/corpus/research/lane-drift.hypothesis.kernel.yml +11 -0
  35. package/examples/kernel/corpus/software/trace-widget.task.kernel.yml +13 -0
  36. package/examples/kernel/corpus/writing/kernel-launch.result.kernel.yml +12 -0
  37. package/llms.txt +3 -0
  38. package/package.json +4 -1
  39. package/scripts/orp-kernel-agent-pilot.py +673 -0
  40. package/scripts/orp-kernel-agent-replication.py +307 -0
  41. package/scripts/orp-kernel-benchmark.py +471 -2
  42. package/scripts/orp-kernel-canonical-continuation.py +381 -0
  43. package/scripts/orp-kernel-ci-check.py +138 -0
  44. package/scripts/orp-kernel-comparison.py +592 -0
  45. package/scripts/orp-kernel-continuation-pilot.py +384 -0
  46. package/scripts/orp-kernel-pickup.py +401 -0
  47. package/spec/v1/kernel-extension.schema.json +96 -0
  48. package/spec/v1/kernel-proposal.schema.json +115 -0
  49. package/spec/v1/kernel.schema.json +2 -1
  50. package/spec/v1/youtube-source.schema.json +151 -0
package/cli/orp.py CHANGED
@@ -30,6 +30,7 @@ import argparse
30
30
  import datetime as dt
31
31
  import getpass
32
32
  import hashlib
33
+ import html
33
34
  import json
34
35
  import os
35
36
  import platform
@@ -45,6 +46,7 @@ import uuid
45
46
  from urllib import error as urlerror
46
47
  from urllib import parse as urlparse
47
48
  from urllib import request as urlrequest
49
+ import xml.etree.ElementTree as ET
48
50
 
49
51
  RUNNER_LEASE_STALE_SECONDS = 120
50
52
 
@@ -111,6 +113,8 @@ ORP_PACKAGE_NAME = _tool_package_name()
111
113
  DEFAULT_DISCOVER_PROFILE = "orp.profile.default.json"
112
114
  DEFAULT_DISCOVER_SCAN_ROOT = "orp/discovery/github"
113
115
  DEFAULT_HOSTED_BASE_URL = "https://orp.earth"
116
+ KERNEL_SCHEMA_VERSION = "1.0.0"
117
+ YOUTUBE_SOURCE_SCHEMA_VERSION = "1.0.0"
114
118
 
115
119
 
116
120
  class HostedApiError(RuntimeError):
@@ -335,6 +339,442 @@ def _request_hosted_sse_event(
335
339
  ) from exc
336
340
 
337
341
 
342
+ def _http_get_text(url: str, *, headers: dict[str, str] | None = None, timeout_sec: int = 20) -> str:
343
+ request = urlrequest.Request(url, headers=headers or {}, method="GET")
344
+ try:
345
+ with urlrequest.urlopen(request, timeout=timeout_sec) as response:
346
+ return response.read().decode("utf-8", errors="replace")
347
+ except urlerror.HTTPError as exc:
348
+ body = exc.read().decode("utf-8", errors="replace").strip()
349
+ raise RuntimeError(f"HTTP {exc.code} while fetching {url}: {body or exc.reason}") from exc
350
+ except urlerror.URLError as exc:
351
+ raise RuntimeError(f"Could not reach {url}: {exc.reason}") from exc
352
+
353
+
354
+ def _http_get_json(url: str, *, headers: dict[str, str] | None = None, timeout_sec: int = 20) -> dict[str, Any]:
355
+ text = _http_get_text(url, headers=headers, timeout_sec=timeout_sec)
356
+ try:
357
+ payload = json.loads(text)
358
+ except Exception as exc:
359
+ raise RuntimeError(f"Response from {url} was not valid JSON.") from exc
360
+ if isinstance(payload, dict):
361
+ return payload
362
+ raise RuntimeError(f"Response from {url} was not a JSON object.")
363
+
364
+
365
+ def _youtube_request_headers() -> dict[str, str]:
366
+ return {
367
+ "User-Agent": (
368
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
369
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0 Safari/537.36"
370
+ ),
371
+ "Accept-Language": "en-US,en;q=0.9",
372
+ }
373
+
374
+
375
+ def _youtube_source_schema_path() -> Path:
376
+ return Path(__file__).resolve().parent.parent / "spec" / "v1" / "youtube-source.schema.json"
377
+
378
+
379
+ def _youtube_video_id_from_url(raw_url: str) -> str:
380
+ text = str(raw_url or "").strip()
381
+ if not text:
382
+ raise RuntimeError("YouTube URL is required.")
383
+ if re.fullmatch(r"[\w-]{11}", text):
384
+ return text
385
+
386
+ parsed = urlparse.urlparse(text)
387
+ host = parsed.netloc.lower()
388
+ path_parts = [part for part in parsed.path.split("/") if part]
389
+ if host.endswith("youtu.be"):
390
+ if path_parts:
391
+ return path_parts[0]
392
+ if any(host.endswith(suffix) for suffix in ("youtube.com", "youtube-nocookie.com", "music.youtube.com")):
393
+ if parsed.path == "/watch":
394
+ video_id = urlparse.parse_qs(parsed.query).get("v", [""])[0].strip()
395
+ if video_id:
396
+ return video_id
397
+ if len(path_parts) >= 2 and path_parts[0] in {"embed", "shorts", "live", "v"}:
398
+ return path_parts[1]
399
+ raise RuntimeError(f"Could not extract a YouTube video id from: {text}")
400
+
401
+
402
+ def _youtube_canonical_url(video_id: str) -> str:
403
+ return f"https://www.youtube.com/watch?v={video_id}"
404
+
405
+
406
+ def _extract_json_object_after_marker(text: str, marker: str) -> dict[str, Any] | None:
407
+ index = text.find(marker)
408
+ if index < 0:
409
+ return None
410
+ start = text.find("{", index)
411
+ if start < 0:
412
+ return None
413
+ depth = 0
414
+ in_string = False
415
+ escaped = False
416
+ for pos in range(start, len(text)):
417
+ ch = text[pos]
418
+ if in_string:
419
+ if escaped:
420
+ escaped = False
421
+ elif ch == "\\":
422
+ escaped = True
423
+ elif ch == '"':
424
+ in_string = False
425
+ continue
426
+ if ch == '"':
427
+ in_string = True
428
+ continue
429
+ if ch == "{":
430
+ depth += 1
431
+ continue
432
+ if ch == "}":
433
+ depth -= 1
434
+ if depth == 0:
435
+ candidate = text[start : pos + 1]
436
+ try:
437
+ payload = json.loads(candidate)
438
+ except Exception:
439
+ return None
440
+ return payload if isinstance(payload, dict) else None
441
+ return None
442
+
443
+
444
+ def _youtube_track_label(track: dict[str, Any]) -> str:
445
+ name = track.get("name")
446
+ if isinstance(name, dict):
447
+ simple = str(name.get("simpleText", "")).strip()
448
+ if simple:
449
+ return simple
450
+ runs = name.get("runs")
451
+ if isinstance(runs, list):
452
+ pieces = [
453
+ str(row.get("text", "")).strip()
454
+ for row in runs
455
+ if isinstance(row, dict) and str(row.get("text", "")).strip()
456
+ ]
457
+ if pieces:
458
+ return "".join(pieces)
459
+ return str(track.get("languageCode", "")).strip()
460
+
461
+
462
+ def _pick_youtube_caption_track(tracks: list[dict[str, Any]], preferred_lang: str = "") -> dict[str, Any] | None:
463
+ if not tracks:
464
+ return None
465
+ preferred = str(preferred_lang or "").strip().lower()
466
+
467
+ def score(track: dict[str, Any]) -> tuple[int, int]:
468
+ code = str(track.get("languageCode", "")).strip().lower()
469
+ kind = str(track.get("kind", "")).strip().lower()
470
+ auto = 1 if kind == "asr" else 0
471
+ exact = 1 if preferred and code == preferred else 0
472
+ prefix = 1 if preferred and code.startswith(preferred + "-") else 0
473
+ english = 1 if code.startswith("en") else 0
474
+ return (exact * 100 + prefix * 80 + english * 20 - auto * 5, -auto)
475
+
476
+ ranked = sorted(tracks, key=score, reverse=True)
477
+ return ranked[0] if ranked else None
478
+
479
+
480
+ def _youtube_add_query_param(url: str, key: str, value: str) -> str:
481
+ parsed = urlparse.urlsplit(url)
482
+ query = dict(urlparse.parse_qsl(parsed.query, keep_blank_values=True))
483
+ query[key] = value
484
+ return urlparse.urlunsplit(
485
+ (
486
+ parsed.scheme,
487
+ parsed.netloc,
488
+ parsed.path,
489
+ urlparse.urlencode(query),
490
+ parsed.fragment,
491
+ )
492
+ )
493
+
494
+
495
+ def _parse_youtube_transcript_json3(payload: dict[str, Any]) -> tuple[str, list[dict[str, Any]]]:
496
+ events = payload.get("events")
497
+ if not isinstance(events, list):
498
+ return ("", [])
499
+ segments: list[dict[str, Any]] = []
500
+ for event in events:
501
+ if not isinstance(event, dict):
502
+ continue
503
+ segs = event.get("segs")
504
+ if not isinstance(segs, list):
505
+ continue
506
+ pieces: list[str] = []
507
+ for seg in segs:
508
+ if not isinstance(seg, dict):
509
+ continue
510
+ text = html.unescape(str(seg.get("utf8", "")))
511
+ if text:
512
+ pieces.append(text)
513
+ merged = re.sub(r"\s+", " ", "".join(pieces)).strip()
514
+ if not merged:
515
+ continue
516
+ segments.append(
517
+ {
518
+ "start_ms": int(event.get("tStartMs", 0) or 0),
519
+ "duration_ms": int(event.get("dDurationMs", 0) or 0),
520
+ "text": merged,
521
+ }
522
+ )
523
+ transcript_text = "\n".join(str(row["text"]) for row in segments)
524
+ return transcript_text, segments
525
+
526
+
527
+ def _parse_youtube_transcript_xml(text: str) -> tuple[str, list[dict[str, Any]]]:
528
+ try:
529
+ root = ET.fromstring(text)
530
+ except Exception:
531
+ return ("", [])
532
+ segments: list[dict[str, Any]] = []
533
+ for node in root.findall(".//text"):
534
+ body = html.unescape("".join(node.itertext() or []))
535
+ body = re.sub(r"\s+", " ", body).strip()
536
+ if not body:
537
+ continue
538
+ start = float(node.attrib.get("start", "0") or "0")
539
+ duration = float(node.attrib.get("dur", "0") or "0")
540
+ segments.append(
541
+ {
542
+ "start_ms": int(start * 1000),
543
+ "duration_ms": int(duration * 1000),
544
+ "text": body,
545
+ }
546
+ )
547
+ transcript_text = "\n".join(str(row["text"]) for row in segments)
548
+ return transcript_text, segments
549
+
550
+
551
+ def _youtube_fetch_oembed(canonical_url: str) -> dict[str, Any]:
552
+ endpoint = "https://www.youtube.com/oembed?" + urlparse.urlencode({"url": canonical_url, "format": "json"})
553
+ try:
554
+ return _http_get_json(endpoint, headers=_youtube_request_headers(), timeout_sec=20)
555
+ except Exception:
556
+ return {}
557
+
558
+
559
+ def _youtube_fetch_watch_state(video_id: str) -> dict[str, Any]:
560
+ url = _youtube_canonical_url(video_id) + "&hl=en&persist_hl=1"
561
+ html_text = _http_get_text(url, headers=_youtube_request_headers(), timeout_sec=25)
562
+ markers = [
563
+ "var ytInitialPlayerResponse = ",
564
+ "ytInitialPlayerResponse = ",
565
+ "window['ytInitialPlayerResponse'] = ",
566
+ 'window["ytInitialPlayerResponse"] = ',
567
+ ]
568
+ player_response: dict[str, Any] | None = None
569
+ for marker in markers:
570
+ player_response = _extract_json_object_after_marker(html_text, marker)
571
+ if player_response:
572
+ break
573
+ if not player_response:
574
+ raise RuntimeError("Could not parse YouTube player response from the watch page.")
575
+ captions = (
576
+ player_response.get("captions", {})
577
+ .get("playerCaptionsTracklistRenderer", {})
578
+ .get("captionTracks", [])
579
+ )
580
+ return {
581
+ "player_response": player_response,
582
+ "video_details": player_response.get("videoDetails", {}) if isinstance(player_response.get("videoDetails"), dict) else {},
583
+ "microformat": (
584
+ player_response.get("microformat", {}).get("playerMicroformatRenderer", {})
585
+ if isinstance(player_response.get("microformat"), dict)
586
+ else {}
587
+ ),
588
+ "playability_status": (
589
+ player_response.get("playabilityStatus", {})
590
+ if isinstance(player_response.get("playabilityStatus"), dict)
591
+ else {}
592
+ ),
593
+ "caption_tracks": captions if isinstance(captions, list) else [],
594
+ }
595
+
596
+
597
+ def _youtube_fetch_transcript_from_track(track: dict[str, Any]) -> tuple[str, list[dict[str, Any]], str]:
598
+ base_url = str(track.get("baseUrl", "")).strip()
599
+ if not base_url:
600
+ return ("", [], "missing_track_url")
601
+ json3_url = _youtube_add_query_param(base_url, "fmt", "json3")
602
+ try:
603
+ payload = _http_get_json(json3_url, headers=_youtube_request_headers(), timeout_sec=25)
604
+ transcript_text, segments = _parse_youtube_transcript_json3(payload)
605
+ if transcript_text:
606
+ return transcript_text, segments, "json3"
607
+ except Exception:
608
+ pass
609
+ try:
610
+ xml_text = _http_get_text(base_url, headers=_youtube_request_headers(), timeout_sec=25)
611
+ transcript_text, segments = _parse_youtube_transcript_xml(xml_text)
612
+ if transcript_text:
613
+ return transcript_text, segments, "xml"
614
+ except Exception:
615
+ pass
616
+ return ("", [], "unavailable")
617
+
618
+
619
+ def _youtube_text_bundle(payload: dict[str, Any]) -> str:
620
+ parts: list[str] = []
621
+ title = str(payload.get("title", "")).strip()
622
+ if title:
623
+ parts.append(f"Title: {title}")
624
+ author_name = str(payload.get("author_name", "")).strip()
625
+ if author_name:
626
+ parts.append(f"Author: {author_name}")
627
+ duration_seconds = payload.get("duration_seconds")
628
+ if isinstance(duration_seconds, int) and duration_seconds > 0:
629
+ parts.append(f"Duration seconds: {duration_seconds}")
630
+ description = str(payload.get("description", "")).strip()
631
+ if description:
632
+ parts.append("Description:\n" + description)
633
+ transcript_text = str(payload.get("transcript_text", "")).strip()
634
+ if transcript_text:
635
+ parts.append("Transcript:\n" + transcript_text)
636
+ return "\n\n".join(parts)
637
+
638
+
639
+ def _youtube_inspect_payload(raw_url: str, preferred_lang: str = "") -> dict[str, Any]:
640
+ video_id = _youtube_video_id_from_url(raw_url)
641
+ canonical_url = _youtube_canonical_url(video_id)
642
+ warnings: list[str] = []
643
+ oembed = _youtube_fetch_oembed(canonical_url)
644
+
645
+ watch_state: dict[str, Any] = {}
646
+ try:
647
+ watch_state = _youtube_fetch_watch_state(video_id)
648
+ except Exception as exc:
649
+ warnings.append(str(exc))
650
+
651
+ video_details = watch_state.get("video_details", {}) if isinstance(watch_state.get("video_details"), dict) else {}
652
+ microformat = watch_state.get("microformat", {}) if isinstance(watch_state.get("microformat"), dict) else {}
653
+ playability = watch_state.get("playability_status", {}) if isinstance(watch_state.get("playability_status"), dict) else {}
654
+ tracks = [row for row in watch_state.get("caption_tracks", []) if isinstance(row, dict)]
655
+ chosen_track = _pick_youtube_caption_track(tracks, preferred_lang)
656
+ transcript_text = ""
657
+ transcript_segments: list[dict[str, Any]] = []
658
+ transcript_fetch_mode = "none"
659
+ transcript_available = False
660
+ transcript_language = ""
661
+ transcript_track_name = ""
662
+ transcript_kind = "none"
663
+ if chosen_track is not None:
664
+ transcript_text, transcript_segments, transcript_fetch_mode = _youtube_fetch_transcript_from_track(chosen_track)
665
+ transcript_available = bool(transcript_text.strip())
666
+ transcript_language = str(chosen_track.get("languageCode", "")).strip()
667
+ transcript_track_name = _youtube_track_label(chosen_track)
668
+ transcript_kind = "auto" if str(chosen_track.get("kind", "")).strip().lower() == "asr" else "manual"
669
+ if not transcript_available:
670
+ warnings.append("A caption track was found, but transcript text could not be fetched.")
671
+ elif watch_state:
672
+ warnings.append("No caption tracks were available for this video.")
673
+
674
+ title = str(video_details.get("title") or oembed.get("title") or "").strip()
675
+ author_name = str(video_details.get("author") or oembed.get("author_name") or "").strip()
676
+ author_url = str(oembed.get("author_url") or "").strip()
677
+ thumbnail_url = str(oembed.get("thumbnail_url") or "").strip()
678
+ description = str(video_details.get("shortDescription") or microformat.get("description", {}).get("simpleText", "") or "").strip()
679
+ channel_id = str(video_details.get("channelId") or "").strip()
680
+ duration_seconds = 0
681
+ raw_duration = video_details.get("lengthSeconds")
682
+ if isinstance(raw_duration, str) and raw_duration.isdigit():
683
+ duration_seconds = int(raw_duration)
684
+ published_at = str(microformat.get("publishDate") or "").strip()
685
+ payload = {
686
+ "schema_version": YOUTUBE_SOURCE_SCHEMA_VERSION,
687
+ "kind": "youtube_source",
688
+ "retrieved_at_utc": _now_utc(),
689
+ "source_url": str(raw_url).strip(),
690
+ "canonical_url": canonical_url,
691
+ "video_id": video_id,
692
+ "title": title,
693
+ "author_name": author_name,
694
+ "author_url": author_url,
695
+ "thumbnail_url": thumbnail_url,
696
+ "channel_id": channel_id,
697
+ "description": description,
698
+ "duration_seconds": duration_seconds or None,
699
+ "published_at": published_at,
700
+ "playability_status": str(playability.get("status", "")).strip(),
701
+ "transcript_available": transcript_available,
702
+ "transcript_language": transcript_language,
703
+ "transcript_track_name": transcript_track_name,
704
+ "transcript_kind": transcript_kind,
705
+ "transcript_fetch_mode": transcript_fetch_mode,
706
+ "transcript_text": transcript_text,
707
+ "transcript_segments": transcript_segments,
708
+ "warnings": _unique_strings(warnings),
709
+ }
710
+ payload["text_bundle"] = _youtube_text_bundle(payload)
711
+ return payload
712
+
713
+
714
+ def _default_youtube_artifact_path(repo_root: Path, video_id: str) -> Path:
715
+ return repo_root / "orp" / "external" / "youtube" / f"{video_id}.json"
716
+
717
+
718
+ def cmd_youtube_inspect(args: argparse.Namespace) -> int:
719
+ repo_root = Path(args.repo_root).resolve()
720
+ preferred_lang = str(getattr(args, "lang", "") or "").strip()
721
+ payload = _youtube_inspect_payload(args.url, preferred_lang=preferred_lang)
722
+
723
+ out_raw = str(getattr(args, "out", "") or "").strip()
724
+ should_save = bool(getattr(args, "save", False) or out_raw)
725
+ out_path: Path | None = None
726
+ emitted_format = ""
727
+ if should_save:
728
+ if out_raw:
729
+ out_path = _resolve_cli_path(out_raw, repo_root)
730
+ else:
731
+ _ensure_dirs(repo_root)
732
+ out_path = _default_youtube_artifact_path(repo_root, str(payload.get("video_id", "")).strip())
733
+ if out_path.exists() and not bool(getattr(args, "force", False)):
734
+ raise RuntimeError(
735
+ f"output path already exists: {_path_for_state(out_path, repo_root)}. Use --force to overwrite."
736
+ )
737
+ emitted_format = _write_structured_payload(out_path, payload, format_hint=str(getattr(args, "format", "") or ""))
738
+
739
+ result = {
740
+ "ok": True,
741
+ "saved": out_path is not None,
742
+ "path": _path_for_state(out_path, repo_root) if out_path is not None else "",
743
+ "format": emitted_format,
744
+ "schema_path": "spec/v1/youtube-source.schema.json",
745
+ "source": payload,
746
+ }
747
+ if args.json_output:
748
+ _print_json(result)
749
+ else:
750
+ _print_pairs(
751
+ [
752
+ ("ok", "true"),
753
+ ("video.id", str(payload.get("video_id", "")).strip()),
754
+ ("video.title", str(payload.get("title", "")).strip()),
755
+ ("video.author", str(payload.get("author_name", "")).strip()),
756
+ ("video.duration_seconds", payload.get("duration_seconds") or ""),
757
+ ("transcript.available", str(bool(payload.get("transcript_available", False))).lower()),
758
+ ("transcript.language", str(payload.get("transcript_language", "")).strip()),
759
+ ("transcript.kind", str(payload.get("transcript_kind", "")).strip()),
760
+ ("saved", str(bool(out_path is not None)).lower()),
761
+ ("path", _path_for_state(out_path, repo_root) if out_path is not None else ""),
762
+ ]
763
+ )
764
+ bundle = str(payload.get("text_bundle", "")).strip()
765
+ warnings = payload.get("warnings", []) if isinstance(payload.get("warnings"), list) else []
766
+ if bundle:
767
+ print("")
768
+ print(bundle)
769
+ if warnings:
770
+ print("")
771
+ for warning in warnings:
772
+ text = str(warning).strip()
773
+ if text:
774
+ print(f"warning={text}")
775
+ return 0
776
+
777
+
338
778
  def _runner_transport_mode(args: argparse.Namespace) -> str:
339
779
  mode = str(getattr(args, "transport", "auto") or "auto").strip().lower()
340
780
  if mode in {"poll", "sse"}:
@@ -4802,6 +5242,11 @@ def _unique_strings(values: list[str]) -> list[str]:
4802
5242
  return out
4803
5243
 
4804
5244
 
5245
+ def _slug_token(text: str, *, fallback: str = "item") -> str:
5246
+ token = re.sub(r"[^a-z0-9]+", "-", str(text or "").strip().lower()).strip("-")
5247
+ return token or fallback
5248
+
5249
+
4805
5250
  def _resolve_config_paths(raw_paths: Any, repo_root: Path, vars_map: dict[str, str]) -> list[str]:
4806
5251
  out: list[str] = []
4807
5252
  if not isinstance(raw_paths, list):
@@ -5138,6 +5583,9 @@ def _about_payload() -> dict[str, Any]:
5138
5583
  "config": "spec/v1/orp.config.schema.json",
5139
5584
  "packet": "spec/v1/packet.schema.json",
5140
5585
  "kernel": "spec/v1/kernel.schema.json",
5586
+ "kernel_proposal": "spec/v1/kernel-proposal.schema.json",
5587
+ "kernel_extension": "spec/v1/kernel-extension.schema.json",
5588
+ "youtube_source": "spec/v1/youtube-source.schema.json",
5141
5589
  "profile_pack": "spec/v1/profile-pack.schema.json",
5142
5590
  "link_project": "spec/v1/link-project.schema.json",
5143
5591
  "link_session": "spec/v1/link-session.schema.json",
@@ -5147,10 +5595,20 @@ def _about_payload() -> dict[str, Any]:
5147
5595
  "abilities": [
5148
5596
  {
5149
5597
  "id": "kernel",
5150
- "description": "Reasoning-kernel artifact scaffolding and validation for promotable repository truth.",
5598
+ "description": "Reasoning-kernel artifact scaffolding, validation, observation, proposal, and migration for promotable repository truth.",
5151
5599
  "entrypoints": [
5152
5600
  ["kernel", "validate"],
5153
5601
  ["kernel", "scaffold"],
5602
+ ["kernel", "stats"],
5603
+ ["kernel", "propose"],
5604
+ ["kernel", "migrate"],
5605
+ ],
5606
+ },
5607
+ {
5608
+ "id": "youtube",
5609
+ "description": "Public YouTube metadata and transcript ingestion for agent-readable external source context.",
5610
+ "entrypoints": [
5611
+ ["youtube", "inspect"],
5154
5612
  ],
5155
5613
  },
5156
5614
  {
@@ -5243,6 +5701,10 @@ def _about_payload() -> dict[str, Any]:
5243
5701
  {"name": "about", "path": ["about"], "json_output": True},
5244
5702
  {"name": "kernel_validate", "path": ["kernel", "validate"], "json_output": True},
5245
5703
  {"name": "kernel_scaffold", "path": ["kernel", "scaffold"], "json_output": True},
5704
+ {"name": "kernel_stats", "path": ["kernel", "stats"], "json_output": True},
5705
+ {"name": "kernel_propose", "path": ["kernel", "propose"], "json_output": True},
5706
+ {"name": "kernel_migrate", "path": ["kernel", "migrate"], "json_output": True},
5707
+ {"name": "youtube_inspect", "path": ["youtube", "inspect"], "json_output": True},
5246
5708
  {"name": "auth_login", "path": ["auth", "login"], "json_output": True},
5247
5709
  {"name": "auth_verify", "path": ["auth", "verify"], "json_output": True},
5248
5710
  {"name": "auth_logout", "path": ["auth", "logout"], "json_output": True},
@@ -5311,6 +5773,8 @@ def _about_payload() -> dict[str, Any]:
5311
5773
  "Canonical evidence lives in repo artifact paths outside ORP docs.",
5312
5774
  "Default CLI output is human-readable; listed commands with json_output=true also support --json.",
5313
5775
  "Reasoning-kernel artifacts shape promotable repository truth for tasks, decisions, hypotheses, experiments, checkpoints, policies, and results.",
5776
+ "Kernel evolution in ORP should stay explicit: observe real usage, propose changes, and migrate artifacts through versioned CLI surfaces rather than silent agent mutation.",
5777
+ "YouTube inspection is a built-in ORP ability exposed through `orp youtube inspect`, returning public metadata and caption transcript text when available.",
5314
5778
  "Discovery profiles in ORP are portable search-intent files managed directly by ORP.",
5315
5779
  "Collaboration is a built-in ORP ability exposed through `orp collaborate ...`.",
5316
5780
  "Project/session linking is a built-in ORP ability exposed through `orp link ...` and stored machine-locally under `.git/orp/link/`.",
@@ -5420,6 +5884,10 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
5420
5884
  "label": "Inspect the current hosted workspace identity",
5421
5885
  "command": "orp whoami --json",
5422
5886
  },
5887
+ {
5888
+ "label": "Inspect a YouTube video and public transcript for agent context",
5889
+ "command": "orp youtube inspect https://www.youtube.com/watch?v=<video_id> --json",
5890
+ },
5423
5891
  {
5424
5892
  "label": "List hosted ideas in the current workspace",
5425
5893
  "command": "orp ideas list --json",
@@ -5507,13 +5975,20 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
5507
5975
  )
5508
5976
  quick_actions.insert(
5509
5977
  5,
5978
+ {
5979
+ "label": "Inspect kernel validation pressure across recorded runs",
5980
+ "command": "orp kernel stats --json",
5981
+ },
5982
+ )
5983
+ quick_actions.insert(
5984
+ 6,
5510
5985
  {
5511
5986
  "label": "Mark the repo locally ready after validation",
5512
5987
  "command": "orp ready --json",
5513
5988
  },
5514
5989
  )
5515
5990
  quick_actions.insert(
5516
- 6,
5991
+ 7,
5517
5992
  {
5518
5993
  "label": "Inspect local project/session link state",
5519
5994
  "command": "orp link status --json",
@@ -7256,27 +7731,280 @@ def _gate_map(config: dict[str, Any]) -> dict[str, dict[str, Any]]:
7256
7731
  return out
7257
7732
 
7258
7733
 
7259
- KERNEL_ARTIFACT_CLASS_REQUIREMENTS: dict[str, list[str]] = {
7260
- "task": ["object", "goal", "boundary", "constraints", "success_criteria"],
7261
- "decision": ["question", "chosen_path", "rejected_alternatives", "rationale", "consequences"],
7262
- "hypothesis": ["claim", "boundary", "assumptions", "test_path", "falsifiers"],
7263
- "experiment": ["objective", "method", "inputs", "outputs", "evidence_expectations", "interpretation_limits"],
7264
- "checkpoint": ["completed_unit", "current_state", "risks", "next_handoff_target", "artifact_refs"],
7265
- "policy": ["scope", "rule", "rationale", "invariants", "enforcement_surface"],
7266
- "result": ["claim", "evidence_paths", "status", "interpretation_limits", "next_follow_up"],
7267
- }
7734
+ def _kernel_schema_path() -> Path:
7735
+ return Path(__file__).resolve().parent.parent / "spec" / "v1" / "kernel.schema.json"
7268
7736
 
7269
7737
 
7270
- def _kernel_field_present(value: Any) -> bool:
7271
- if isinstance(value, str):
7272
- return bool(value.strip())
7273
- if isinstance(value, list):
7274
- return any(_kernel_field_present(item) for item in value)
7275
- if isinstance(value, dict):
7276
- return len(value) > 0
7738
+ def _kernel_proposal_schema_path() -> Path:
7739
+ return Path(__file__).resolve().parent.parent / "spec" / "v1" / "kernel-proposal.schema.json"
7740
+
7741
+
7742
+ def _kernel_extension_schema_path() -> Path:
7743
+ return Path(__file__).resolve().parent.parent / "spec" / "v1" / "kernel-extension.schema.json"
7744
+
7745
+
7746
+ def _load_kernel_schema() -> dict[str, Any]:
7747
+ path = _kernel_schema_path()
7748
+ if not path.exists():
7749
+ raise RuntimeError(f"kernel schema is missing: {path}")
7750
+ payload = json.loads(path.read_text(encoding="utf-8"))
7751
+ if not isinstance(payload, dict):
7752
+ raise RuntimeError("kernel schema root must be an object")
7753
+ return payload
7754
+
7755
+
7756
+ def _kernel_schema_metadata() -> tuple[dict[str, list[str]], dict[str, dict[str, Any]], set[str], list[str]]:
7757
+ schema = _load_kernel_schema()
7758
+ properties = schema.get("properties")
7759
+ if not isinstance(properties, dict):
7760
+ raise RuntimeError("kernel schema is missing object properties")
7761
+ ordered_fields = [str(field).strip() for field in properties.keys() if str(field).strip()]
7762
+
7763
+ field_kinds: dict[str, dict[str, Any]] = {}
7764
+ for field, raw in properties.items():
7765
+ if not isinstance(raw, dict):
7766
+ continue
7767
+ if "const" in raw:
7768
+ field_kinds[field] = {"kind": "const", "value": raw.get("const")}
7769
+ continue
7770
+ if "enum" in raw and isinstance(raw.get("enum"), list):
7771
+ field_kinds[field] = {"kind": "enum", "value": list(raw.get("enum", []))}
7772
+ continue
7773
+ ref = raw.get("$ref")
7774
+ if isinstance(ref, str) and ref.startswith("#/$defs/"):
7775
+ field_kinds[field] = {"kind": ref.split("/")[-1]}
7776
+
7777
+ requirements: dict[str, list[str]] = {}
7778
+ raw_all_of = schema.get("allOf")
7779
+ if isinstance(raw_all_of, list):
7780
+ for clause in raw_all_of:
7781
+ if not isinstance(clause, dict):
7782
+ continue
7783
+ raw_if = clause.get("if")
7784
+ raw_then = clause.get("then")
7785
+ if not isinstance(raw_if, dict) or not isinstance(raw_then, dict):
7786
+ continue
7787
+ const = (
7788
+ raw_if.get("properties", {})
7789
+ .get("artifact_class", {})
7790
+ .get("const")
7791
+ )
7792
+ required_fields = raw_then.get("required")
7793
+ if isinstance(const, str) and isinstance(required_fields, list):
7794
+ requirements[const] = [
7795
+ str(field).strip()
7796
+ for field in required_fields
7797
+ if isinstance(field, str) and str(field).strip()
7798
+ ]
7799
+ return requirements, field_kinds, set(field_kinds.keys()), ordered_fields
7800
+
7801
+
7802
+ (
7803
+ KERNEL_ARTIFACT_CLASS_REQUIREMENTS,
7804
+ KERNEL_FIELD_KINDS,
7805
+ KERNEL_ALLOWED_FIELDS,
7806
+ KERNEL_FIELD_ORDER,
7807
+ ) = _kernel_schema_metadata()
7808
+
7809
+
7810
+ def _kernel_ordered_fields_for_class(artifact_class: str, present_fields: Sequence[str] | None = None) -> list[str]:
7811
+ ordered: list[str] = ["schema_version", "artifact_class"]
7812
+ required_fields = KERNEL_ARTIFACT_CLASS_REQUIREMENTS.get(str(artifact_class).strip(), [])
7813
+ for field in required_fields:
7814
+ if field not in ordered:
7815
+ ordered.append(field)
7816
+ for field in KERNEL_FIELD_ORDER:
7817
+ if field not in ordered:
7818
+ ordered.append(field)
7819
+ if present_fields is None:
7820
+ return ordered
7821
+ present_set = {str(field).strip() for field in present_fields if str(field).strip()}
7822
+ return [field for field in ordered if field in present_set]
7823
+
7824
+
7825
+ def _kernel_text_valid(value: Any) -> bool:
7826
+ return isinstance(value, str) and bool(value.strip())
7827
+
7828
+
7829
+ def _kernel_text_list_valid(value: Any) -> bool:
7830
+ return isinstance(value, list) and len(value) > 0 and all(_kernel_text_valid(item) for item in value)
7831
+
7832
+
7833
+ def _kernel_field_present(field: str, value: Any) -> bool:
7834
+ kind = str(KERNEL_FIELD_KINDS.get(field, {}).get("kind", ""))
7835
+ if kind == "non_empty_text":
7836
+ return _kernel_text_valid(value)
7837
+ if kind == "text_list":
7838
+ return _kernel_text_list_valid(value)
7839
+ if kind == "text_or_text_list":
7840
+ return _kernel_text_valid(value) or _kernel_text_list_valid(value)
7841
+ if kind == "const":
7842
+ return value is not None
7843
+ if kind == "enum":
7844
+ return value is not None
7277
7845
  return value is not None
7278
7846
 
7279
7847
 
7848
+ def _kernel_field_shape_issues(field: str, value: Any) -> list[str]:
7849
+ meta = KERNEL_FIELD_KINDS.get(field, {})
7850
+ kind = str(meta.get("kind", ""))
7851
+ if kind == "const":
7852
+ expected = meta.get("value")
7853
+ return [] if value == expected else [f"must equal `{expected}`."]
7854
+ if kind == "enum":
7855
+ allowed = [str(x) for x in meta.get("value", [])]
7856
+ return [] if value in allowed else [f"must be one of: {', '.join(allowed)}."]
7857
+ if kind == "non_empty_text":
7858
+ return [] if _kernel_text_valid(value) else ["must be a non-empty string."]
7859
+ if kind == "text_list":
7860
+ return [] if _kernel_text_list_valid(value) else ["must be a non-empty list of non-empty strings."]
7861
+ if kind == "text_or_text_list":
7862
+ return [] if (_kernel_text_valid(value) or _kernel_text_list_valid(value)) else [
7863
+ "must be a non-empty string or a non-empty list of non-empty strings."
7864
+ ]
7865
+ return []
7866
+
7867
+
7868
+ def _validate_kernel_payload(
7869
+ payload: dict[str, Any],
7870
+ *,
7871
+ expected_class: str = "",
7872
+ extra_required_fields: Sequence[str] = (),
7873
+ ) -> dict[str, Any]:
7874
+ artifact_issues: list[str] = []
7875
+ missing_fields: list[str] = []
7876
+
7877
+ for field in sorted(str(key) for key in payload.keys() if str(key) not in KERNEL_ALLOWED_FIELDS):
7878
+ artifact_issues.append(f"unexpected field: `{field}`.")
7879
+
7880
+ schema_version = payload.get("schema_version")
7881
+ artifact_issues.extend(
7882
+ [f"field `schema_version` {issue}" for issue in _kernel_field_shape_issues("schema_version", schema_version)]
7883
+ )
7884
+
7885
+ actual_class = str(payload.get("artifact_class", "")).strip()
7886
+ artifact_issues.extend(
7887
+ [f"field `artifact_class` {issue}" for issue in _kernel_field_shape_issues("artifact_class", payload.get("artifact_class"))]
7888
+ )
7889
+ if actual_class not in KERNEL_ARTIFACT_CLASS_REQUIREMENTS:
7890
+ artifact_issues.append(f"unsupported artifact_class: {actual_class or '(missing)'}.")
7891
+
7892
+ if expected_class and actual_class and expected_class != actual_class:
7893
+ artifact_issues.append(
7894
+ f"artifact_class mismatch: expected `{expected_class}`, found `{actual_class}`."
7895
+ )
7896
+
7897
+ field_class = actual_class or expected_class
7898
+ required_fields = list(KERNEL_ARTIFACT_CLASS_REQUIREMENTS.get(field_class, []))
7899
+ for field in _unique_strings([str(x).strip() for x in extra_required_fields if str(x).strip()]):
7900
+ if field not in required_fields:
7901
+ required_fields.append(field)
7902
+ for field, value in payload.items():
7903
+ if not isinstance(field, str) or field not in KERNEL_ALLOWED_FIELDS:
7904
+ continue
7905
+ for issue in _kernel_field_shape_issues(field, value):
7906
+ artifact_issues.append(f"field `{field}` {issue}")
7907
+ for field in required_fields:
7908
+ if not _kernel_field_present(field, payload.get(field)):
7909
+ missing_fields.append(field)
7910
+ if missing_fields:
7911
+ artifact_issues.append("missing required fields: " + ", ".join(missing_fields))
7912
+
7913
+ return {
7914
+ "artifact_class": actual_class,
7915
+ "expected_artifact_class": expected_class,
7916
+ "valid": not artifact_issues,
7917
+ "missing_fields": missing_fields,
7918
+ "issues": artifact_issues,
7919
+ }
7920
+
7921
+
7922
+ def _kernel_canonical_payload(
7923
+ payload: dict[str, Any],
7924
+ *,
7925
+ drop_unknown_fields: bool,
7926
+ ) -> tuple[dict[str, Any], list[str]]:
7927
+ unknown_fields = sorted(str(key) for key in payload.keys() if str(key) not in KERNEL_ALLOWED_FIELDS)
7928
+ if unknown_fields and not drop_unknown_fields:
7929
+ raise RuntimeError(
7930
+ "kernel artifact has unknown fields: " + ", ".join(unknown_fields) + ". Re-run with --drop-unknown-fields to discard them."
7931
+ )
7932
+
7933
+ artifact_class = str(payload.get("artifact_class", "")).strip()
7934
+ if artifact_class not in KERNEL_ARTIFACT_CLASS_REQUIREMENTS:
7935
+ raise RuntimeError(f"unsupported artifact_class: {artifact_class or '(missing)'}")
7936
+
7937
+ known_payload = {
7938
+ str(key): value
7939
+ for key, value in payload.items()
7940
+ if str(key) in KERNEL_ALLOWED_FIELDS
7941
+ }
7942
+ known_payload["schema_version"] = KERNEL_SCHEMA_VERSION
7943
+ known_payload["artifact_class"] = artifact_class
7944
+
7945
+ ordered_fields = _kernel_ordered_fields_for_class(artifact_class, present_fields=list(known_payload.keys()))
7946
+ canonical: dict[str, Any] = {}
7947
+ for field in ordered_fields:
7948
+ if field in known_payload:
7949
+ canonical[field] = known_payload[field]
7950
+ return canonical, unknown_fields
7951
+
7952
+
7953
+ def _kernel_proposal_template(
7954
+ *,
7955
+ proposal_kind: str,
7956
+ title: str,
7957
+ target_artifact_classes: Sequence[str],
7958
+ target_fields: Sequence[str],
7959
+ ) -> dict[str, Any]:
7960
+ clean_classes = _unique_strings([str(x).strip() for x in target_artifact_classes if str(x).strip()])
7961
+ clean_fields = _unique_strings([str(x).strip() for x in target_fields if str(x).strip()])
7962
+ return {
7963
+ "schema_version": KERNEL_SCHEMA_VERSION,
7964
+ "proposal_kind": proposal_kind,
7965
+ "title": title,
7966
+ "status": "draft",
7967
+ "summary": "describe the kernel evolution being proposed",
7968
+ "target_scope": {
7969
+ "artifact_classes": clean_classes,
7970
+ "fields": clean_fields,
7971
+ },
7972
+ "proposed_change": [
7973
+ "describe the exact structural change",
7974
+ ],
7975
+ "rationale": [
7976
+ "describe why the current kernel is insufficient",
7977
+ ],
7978
+ "evidence_refs": [
7979
+ "docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md",
7980
+ ],
7981
+ "compatibility_notes": [
7982
+ "describe backward-compatibility expectations",
7983
+ ],
7984
+ "migration_plan": [
7985
+ "describe how existing artifacts will be preserved or migrated",
7986
+ ],
7987
+ "evaluation_plan": [
7988
+ "describe what new evidence should justify promotion into the core kernel",
7989
+ ],
7990
+ }
7991
+
7992
+
7993
+ def _kernel_observation_stats_from_run(run: dict[str, Any]) -> dict[str, Any]:
7994
+ results = run.get("results", [])
7995
+ if not isinstance(results, list):
7996
+ results = []
7997
+ kernel_rows = [
7998
+ row.get("kernel_validation")
7999
+ for row in results
8000
+ if isinstance(row, dict) and isinstance(row.get("kernel_validation"), dict)
8001
+ ]
8002
+ return {
8003
+ "run_id": str(run.get("run_id", "")).strip(),
8004
+ "kernel_validations": kernel_rows,
8005
+ }
8006
+
8007
+
7280
8008
  def _kernel_validation_mode(gate: dict[str, Any]) -> str:
7281
8009
  kernel_cfg = gate.get("kernel") if isinstance(gate.get("kernel"), dict) else {}
7282
8010
  default_mode = "hard" if str(gate.get("phase", "")).strip() == "structure_kernel" else "soft"
@@ -7371,31 +8099,14 @@ def _validate_kernel_gate(
7371
8099
 
7372
8100
  actual_class = ""
7373
8101
  if payload:
7374
- schema_version = str(payload.get("schema_version", "")).strip()
7375
- if schema_version != "1.0.0":
7376
- artifact_issues.append("schema_version must be `1.0.0`.")
7377
-
7378
- actual_class = str(payload.get("artifact_class", "")).strip()
7379
- if actual_class not in KERNEL_ARTIFACT_CLASS_REQUIREMENTS:
7380
- artifact_issues.append(
7381
- f"unsupported artifact_class: {actual_class or '(missing)'}."
7382
- )
7383
-
7384
- if expected_class and actual_class and expected_class != actual_class:
7385
- artifact_issues.append(
7386
- f"artifact_class mismatch: expected `{expected_class}`, found `{actual_class}`."
7387
- )
7388
-
7389
- field_class = actual_class or expected_class
7390
- required_fields = list(KERNEL_ARTIFACT_CLASS_REQUIREMENTS.get(field_class, []))
7391
- for field in extra_required_fields:
7392
- if field not in required_fields:
7393
- required_fields.append(field)
7394
- for field in required_fields:
7395
- if not _kernel_field_present(payload.get(field)):
7396
- missing_fields.append(field)
7397
- if missing_fields:
7398
- artifact_issues.append("missing required fields: " + ", ".join(missing_fields))
8102
+ validation = _validate_kernel_payload(
8103
+ payload,
8104
+ expected_class=expected_class,
8105
+ extra_required_fields=extra_required_fields,
8106
+ )
8107
+ actual_class = str(validation.get("artifact_class", "")).strip()
8108
+ missing_fields = list(validation.get("missing_fields", []))
8109
+ artifact_issues.extend([str(issue) for issue in validation.get("issues", []) if isinstance(issue, str)])
7399
8110
 
7400
8111
  valid = optional_skipped or (exists and not artifact_issues)
7401
8112
  path_state = _path_for_state(path, repo_root)
@@ -7576,6 +8287,273 @@ def cmd_kernel_scaffold(args: argparse.Namespace) -> int:
7576
8287
  return 0
7577
8288
 
7578
8289
 
8290
+ def _resolve_kernel_run_json_paths(
8291
+ *,
8292
+ repo_root: Path,
8293
+ run_ids: Sequence[str],
8294
+ run_jsons: Sequence[str],
8295
+ ) -> list[Path]:
8296
+ resolved: list[Path] = []
8297
+ if run_jsons:
8298
+ for raw in run_jsons:
8299
+ if not str(raw).strip():
8300
+ continue
8301
+ _, path = _resolve_run_json_path(repo_root=repo_root, run_id_arg="", run_json_arg=str(raw))
8302
+ resolved.append(path)
8303
+ return resolved
8304
+ if run_ids:
8305
+ for raw in run_ids:
8306
+ if not str(raw).strip():
8307
+ continue
8308
+ _, path = _resolve_run_json_path(repo_root=repo_root, run_id_arg=str(raw), run_json_arg="")
8309
+ resolved.append(path)
8310
+ return resolved
8311
+
8312
+ seen: set[Path] = set()
8313
+ state_path = repo_root / "orp" / "state.json"
8314
+ if state_path.exists():
8315
+ try:
8316
+ state = _read_json(state_path)
8317
+ except Exception:
8318
+ state = {}
8319
+ runs = state.get("runs")
8320
+ if isinstance(runs, dict):
8321
+ for value in runs.values():
8322
+ if not isinstance(value, str) or not value.strip():
8323
+ continue
8324
+ candidate = (repo_root / value).resolve()
8325
+ if candidate.exists() and candidate not in seen:
8326
+ seen.add(candidate)
8327
+ resolved.append(candidate)
8328
+ artifacts_root = repo_root / "orp" / "artifacts"
8329
+ if artifacts_root.exists():
8330
+ for candidate in sorted(artifacts_root.glob("*/RUN.json")):
8331
+ candidate = candidate.resolve()
8332
+ if candidate not in seen:
8333
+ seen.add(candidate)
8334
+ resolved.append(candidate)
8335
+ return resolved
8336
+
8337
+
8338
+ def _kernel_stats_payload(
8339
+ repo_root: Path,
8340
+ run_json_paths: Sequence[Path],
8341
+ ) -> dict[str, Any]:
8342
+ runs_scanned = 0
8343
+ runs_with_kernel_validation = 0
8344
+ gate_rows_total = 0
8345
+ artifacts_total = 0
8346
+ artifacts_valid = 0
8347
+ artifacts_invalid = 0
8348
+ mode_counts: dict[str, int] = {}
8349
+ artifact_class_counts: dict[str, int] = {}
8350
+ missing_field_counts: dict[str, int] = {}
8351
+ issue_counts: dict[str, int] = {}
8352
+ path_counts: dict[str, int] = {}
8353
+ per_run: list[dict[str, Any]] = []
8354
+
8355
+ for run_json in run_json_paths:
8356
+ run = _read_json(run_json)
8357
+ stats = _kernel_observation_stats_from_run(run)
8358
+ kernel_rows = stats["kernel_validations"]
8359
+ runs_scanned += 1
8360
+ if kernel_rows:
8361
+ runs_with_kernel_validation += 1
8362
+ per_run.append(
8363
+ {
8364
+ "run_id": stats["run_id"] or run_json.parent.name,
8365
+ "run_json": _path_for_state(run_json, repo_root),
8366
+ "kernel_validations": len(kernel_rows),
8367
+ }
8368
+ )
8369
+ for row in kernel_rows:
8370
+ if not isinstance(row, dict):
8371
+ continue
8372
+ gate_rows_total += 1
8373
+ mode = str(row.get("mode", "")).strip() or "unknown"
8374
+ mode_counts[mode] = mode_counts.get(mode, 0) + 1
8375
+ for artifact in row.get("artifacts", []) if isinstance(row.get("artifacts"), list) else []:
8376
+ if not isinstance(artifact, dict):
8377
+ continue
8378
+ artifacts_total += 1
8379
+ if artifact.get("valid"):
8380
+ artifacts_valid += 1
8381
+ else:
8382
+ artifacts_invalid += 1
8383
+ artifact_class = str(
8384
+ artifact.get("artifact_class") or artifact.get("expected_artifact_class") or "unknown"
8385
+ ).strip() or "unknown"
8386
+ artifact_class_counts[artifact_class] = artifact_class_counts.get(artifact_class, 0) + 1
8387
+ artifact_path = str(artifact.get("path", "")).strip()
8388
+ if artifact_path:
8389
+ path_counts[artifact_path] = path_counts.get(artifact_path, 0) + 1
8390
+ for field in artifact.get("missing_fields", []) if isinstance(artifact.get("missing_fields"), list) else []:
8391
+ key = str(field).strip()
8392
+ if key:
8393
+ missing_field_counts[key] = missing_field_counts.get(key, 0) + 1
8394
+ for issue in artifact.get("issues", []) if isinstance(artifact.get("issues"), list) else []:
8395
+ key = str(issue).strip()
8396
+ if key:
8397
+ issue_counts[key] = issue_counts.get(key, 0) + 1
8398
+
8399
+ top_missing_fields = [
8400
+ {"field": key, "count": count}
8401
+ for key, count in sorted(missing_field_counts.items(), key=lambda item: (-item[1], item[0]))[:10]
8402
+ ]
8403
+ top_issue_signals = [
8404
+ {"issue": key, "count": count}
8405
+ for key, count in sorted(issue_counts.items(), key=lambda item: (-item[1], item[0]))[:10]
8406
+ ]
8407
+ top_paths = [
8408
+ {"path": key, "count": count}
8409
+ for key, count in sorted(path_counts.items(), key=lambda item: (-item[1], item[0]))[:10]
8410
+ ]
8411
+ observations: list[str] = []
8412
+ if runs_scanned == 0:
8413
+ observations.append("No RUN.json artifacts were found. Run `orp gate run` with a structure_kernel gate to collect kernel observations.")
8414
+ elif runs_with_kernel_validation == 0:
8415
+ observations.append("RUN.json artifacts exist, but none recorded kernel_validation. Add a structure_kernel gate with a kernel.artifacts block.")
8416
+ else:
8417
+ if top_missing_fields:
8418
+ focus = ", ".join(f"{row['field']} ({row['count']})" for row in top_missing_fields[:5])
8419
+ observations.append(f"Most repeated missing fields: {focus}.")
8420
+ if artifacts_invalid == 0:
8421
+ observations.append("All observed kernel artifacts validated successfully across scanned runs.")
8422
+ else:
8423
+ observations.append(
8424
+ f"{artifacts_invalid} of {artifacts_total} observed kernel artifacts failed validation."
8425
+ )
8426
+ return {
8427
+ "ok": True,
8428
+ "repo_root": str(repo_root),
8429
+ "runs_scanned": runs_scanned,
8430
+ "runs_with_kernel_validation": runs_with_kernel_validation,
8431
+ "kernel_validation_rows": gate_rows_total,
8432
+ "artifacts_total": artifacts_total,
8433
+ "artifacts_valid": artifacts_valid,
8434
+ "artifacts_invalid": artifacts_invalid,
8435
+ "artifact_validation_rate": round((artifacts_valid / artifacts_total), 3) if artifacts_total else None,
8436
+ "mode_counts": mode_counts,
8437
+ "artifact_class_counts": artifact_class_counts,
8438
+ "top_missing_fields": top_missing_fields,
8439
+ "top_issue_signals": top_issue_signals,
8440
+ "top_paths": top_paths,
8441
+ "observations": observations,
8442
+ "runs": per_run,
8443
+ }
8444
+
8445
+
8446
+ def cmd_kernel_stats(args: argparse.Namespace) -> int:
8447
+ repo_root = Path(args.repo_root).resolve()
8448
+ run_json_paths = _resolve_kernel_run_json_paths(
8449
+ repo_root=repo_root,
8450
+ run_ids=list(getattr(args, "run_id", []) or []),
8451
+ run_jsons=list(getattr(args, "run_json", []) or []),
8452
+ )
8453
+ payload = _kernel_stats_payload(repo_root, run_json_paths)
8454
+ if args.json_output:
8455
+ _print_json(payload)
8456
+ else:
8457
+ print(f"runs_scanned={payload['runs_scanned']}")
8458
+ print(f"runs_with_kernel_validation={payload['runs_with_kernel_validation']}")
8459
+ print(f"artifacts_total={payload['artifacts_total']}")
8460
+ print(f"artifacts_valid={payload['artifacts_valid']}")
8461
+ print(f"artifacts_invalid={payload['artifacts_invalid']}")
8462
+ for row in payload.get("top_missing_fields", []):
8463
+ print(f"missing_field={row['field']} count={row['count']}")
8464
+ for note in payload.get("observations", []):
8465
+ print(f"note={note}")
8466
+ return 0
8467
+
8468
+
8469
+ def cmd_kernel_propose(args: argparse.Namespace) -> int:
8470
+ repo_root = Path(args.repo_root).resolve()
8471
+ title = str(args.title or "").strip()
8472
+ if not title:
8473
+ raise RuntimeError("proposal title is required.")
8474
+ slug = _slug_token(getattr(args, "slug", "") or title, fallback="kernel-proposal")
8475
+ out_raw = str(getattr(args, "out", "") or "").strip()
8476
+ if out_raw:
8477
+ out_path = _resolve_cli_path(out_raw, repo_root)
8478
+ else:
8479
+ out_path = repo_root / "analysis" / "kernel-proposals" / f"{slug}.yml"
8480
+ if out_path.exists() and not args.force:
8481
+ raise RuntimeError(
8482
+ f"kernel proposal already exists: {_path_for_state(out_path, repo_root)}. Use --force to overwrite."
8483
+ )
8484
+ payload = _kernel_proposal_template(
8485
+ proposal_kind=str(args.kind).strip(),
8486
+ title=title,
8487
+ target_artifact_classes=list(getattr(args, "artifact_class", []) or []),
8488
+ target_fields=list(getattr(args, "field", []) or []),
8489
+ )
8490
+ emitted_format = _write_structured_payload(out_path, payload, format_hint=args.format)
8491
+ result = {
8492
+ "ok": True,
8493
+ "path": _path_for_state(out_path, repo_root),
8494
+ "format": emitted_format,
8495
+ "proposal_kind": payload["proposal_kind"],
8496
+ "title": payload["title"],
8497
+ }
8498
+ if args.json_output:
8499
+ _print_json(result)
8500
+ else:
8501
+ print(f"path={result['path']}")
8502
+ print(f"proposal_kind={result['proposal_kind']}")
8503
+ print(f"title={result['title']}")
8504
+ print(f"format={result['format']}")
8505
+ return 0
8506
+
8507
+
8508
+ def cmd_kernel_migrate(args: argparse.Namespace) -> int:
8509
+ repo_root = Path(args.repo_root).resolve()
8510
+ artifact_path = _resolve_cli_path(args.artifact, repo_root)
8511
+ if not artifact_path.exists():
8512
+ raise RuntimeError(f"kernel artifact not found: {_path_for_state(artifact_path, repo_root)}")
8513
+ loaded_payload = _load_config(artifact_path)
8514
+ if not isinstance(loaded_payload, dict):
8515
+ raise RuntimeError("kernel artifact root must be an object.")
8516
+ out_raw = str(getattr(args, "out", "") or "").strip()
8517
+ out_path = _resolve_cli_path(out_raw, repo_root) if out_raw else artifact_path
8518
+ if out_path.exists() and out_path != artifact_path and not args.force:
8519
+ raise RuntimeError(
8520
+ f"output path already exists: {_path_for_state(out_path, repo_root)}. Use --force to overwrite."
8521
+ )
8522
+
8523
+ original_schema_version = str(loaded_payload.get("schema_version", "") or "").strip()
8524
+ canonical_payload, dropped_unknown_fields = _kernel_canonical_payload(
8525
+ loaded_payload,
8526
+ drop_unknown_fields=bool(getattr(args, "drop_unknown_fields", False)),
8527
+ )
8528
+ emitted_format = _write_structured_payload(out_path, canonical_payload, format_hint=args.format)
8529
+ validation = _validate_kernel_payload(canonical_payload, expected_class=str(canonical_payload.get("artifact_class", "")).strip())
8530
+ result = {
8531
+ "ok": True,
8532
+ "artifact": _path_for_state(artifact_path, repo_root),
8533
+ "path": _path_for_state(out_path, repo_root),
8534
+ "format": emitted_format,
8535
+ "schema_version_before": original_schema_version or "(missing)",
8536
+ "schema_version_after": str(canonical_payload.get("schema_version", "")),
8537
+ "schema_version_updated": (original_schema_version or "") != str(canonical_payload.get("schema_version", "")),
8538
+ "artifact_class": str(canonical_payload.get("artifact_class", "")),
8539
+ "dropped_unknown_fields": dropped_unknown_fields,
8540
+ "validation": validation,
8541
+ }
8542
+ if args.json_output:
8543
+ _print_json(result)
8544
+ else:
8545
+ print(f"path={result['path']}")
8546
+ print(f"artifact_class={result['artifact_class']}")
8547
+ print(f"schema_version_before={result['schema_version_before']}")
8548
+ print(f"schema_version_after={result['schema_version_after']}")
8549
+ if dropped_unknown_fields:
8550
+ print("dropped_unknown_fields=" + ",".join(dropped_unknown_fields))
8551
+ print(f"valid={'true' if validation.get('valid') else 'false'}")
8552
+ for issue in validation.get("issues", []):
8553
+ print(f"issue={issue}")
8554
+ return 0
8555
+
8556
+
7579
8557
  def cmd_gate_run(args: argparse.Namespace) -> int:
7580
8558
  repo_root = Path(args.repo_root).resolve()
7581
8559
  _ensure_dirs(repo_root)
@@ -11732,6 +12710,43 @@ def build_parser() -> argparse.ArgumentParser:
11732
12710
  add_json_flag(s_world_bind)
11733
12711
  s_world_bind.set_defaults(func=cmd_world_bind, json_output=False)
11734
12712
 
12713
+ s_youtube = sub.add_parser("youtube", help="Public YouTube metadata and transcript inspection")
12714
+ youtube_sub = s_youtube.add_subparsers(dest="youtube_cmd", required=True)
12715
+
12716
+ s_youtube_inspect = youtube_sub.add_parser(
12717
+ "inspect",
12718
+ help="Inspect a YouTube video and fetch public metadata plus transcript text when captions are available",
12719
+ )
12720
+ s_youtube_inspect.add_argument("url", help="YouTube watch/share URL or 11-character video id")
12721
+ s_youtube_inspect.add_argument(
12722
+ "--lang",
12723
+ default="",
12724
+ help="Preferred caption language code, for example en or es",
12725
+ )
12726
+ s_youtube_inspect.add_argument(
12727
+ "--save",
12728
+ action="store_true",
12729
+ help="Save the inspected source artifact under orp/external/youtube/<video_id>.json",
12730
+ )
12731
+ s_youtube_inspect.add_argument(
12732
+ "--out",
12733
+ default="",
12734
+ help="Optional output path for the source artifact (.json, .yml, or .yaml)",
12735
+ )
12736
+ s_youtube_inspect.add_argument(
12737
+ "--format",
12738
+ default="",
12739
+ choices=["", "json", "yaml"],
12740
+ help="Optional explicit output format when saving",
12741
+ )
12742
+ s_youtube_inspect.add_argument(
12743
+ "--force",
12744
+ action="store_true",
12745
+ help="Overwrite an existing saved artifact",
12746
+ )
12747
+ add_json_flag(s_youtube_inspect)
12748
+ s_youtube_inspect.set_defaults(func=cmd_youtube_inspect, json_output=False)
12749
+
11735
12750
  s_secrets = sub.add_parser("secrets", help="Hosted secret store and project binding operations")
11736
12751
  secrets_sub = s_secrets.add_subparsers(dest="secrets_cmd", required=True)
11737
12752
 
@@ -12658,6 +13673,106 @@ def build_parser() -> argparse.ArgumentParser:
12658
13673
  add_json_flag(s_kernel_scaffold)
12659
13674
  s_kernel_scaffold.set_defaults(func=cmd_kernel_scaffold, json_output=False)
12660
13675
 
13676
+ s_kernel_stats = kernel_sub.add_parser(
13677
+ "stats",
13678
+ help="Summarize observed kernel validation pressure from RUN.json artifacts",
13679
+ )
13680
+ s_kernel_stats.add_argument(
13681
+ "--run-id",
13682
+ action="append",
13683
+ default=[],
13684
+ help="Specific run id to include (repeatable). Defaults to all discovered runs.",
13685
+ )
13686
+ s_kernel_stats.add_argument(
13687
+ "--run-json",
13688
+ action="append",
13689
+ default=[],
13690
+ help="Explicit RUN.json path to include (repeatable). Defaults to all discovered runs.",
13691
+ )
13692
+ add_json_flag(s_kernel_stats)
13693
+ s_kernel_stats.set_defaults(func=cmd_kernel_stats, json_output=False)
13694
+
13695
+ s_kernel_propose = kernel_sub.add_parser(
13696
+ "propose",
13697
+ help="Scaffold a governed kernel-evolution proposal artifact",
13698
+ )
13699
+ s_kernel_propose.add_argument(
13700
+ "--kind",
13701
+ required=True,
13702
+ choices=["add_field", "new_class", "requirement_change", "deprecate_field"],
13703
+ help="Type of kernel evolution proposal",
13704
+ )
13705
+ s_kernel_propose.add_argument(
13706
+ "--title",
13707
+ required=True,
13708
+ help="Proposal title",
13709
+ )
13710
+ s_kernel_propose.add_argument(
13711
+ "--artifact-class",
13712
+ action="append",
13713
+ default=[],
13714
+ choices=sorted(KERNEL_ARTIFACT_CLASS_REQUIREMENTS.keys()),
13715
+ help="Affected kernel artifact class (repeatable)",
13716
+ )
13717
+ s_kernel_propose.add_argument(
13718
+ "--field",
13719
+ action="append",
13720
+ default=[],
13721
+ help="Affected kernel field name (repeatable)",
13722
+ )
13723
+ s_kernel_propose.add_argument(
13724
+ "--slug",
13725
+ default="",
13726
+ help="Optional output slug override",
13727
+ )
13728
+ s_kernel_propose.add_argument(
13729
+ "--out",
13730
+ default="",
13731
+ help="Optional output path (default: analysis/kernel-proposals/<slug>.yml)",
13732
+ )
13733
+ s_kernel_propose.add_argument(
13734
+ "--format",
13735
+ default="",
13736
+ choices=["", "yaml", "json"],
13737
+ help="Optional explicit output format",
13738
+ )
13739
+ s_kernel_propose.add_argument(
13740
+ "--force",
13741
+ action="store_true",
13742
+ help="Overwrite an existing proposal at the output path",
13743
+ )
13744
+ add_json_flag(s_kernel_propose)
13745
+ s_kernel_propose.set_defaults(func=cmd_kernel_propose, json_output=False)
13746
+
13747
+ s_kernel_migrate = kernel_sub.add_parser(
13748
+ "migrate",
13749
+ help="Rewrite a kernel artifact into the current canonical field order and schema version",
13750
+ )
13751
+ s_kernel_migrate.add_argument("artifact", help="Kernel artifact path (.yml, .yaml, or .json)")
13752
+ s_kernel_migrate.add_argument(
13753
+ "--out",
13754
+ default="",
13755
+ help="Optional output path (default: rewrite in place)",
13756
+ )
13757
+ s_kernel_migrate.add_argument(
13758
+ "--format",
13759
+ default="",
13760
+ choices=["", "yaml", "json"],
13761
+ help="Optional explicit output format",
13762
+ )
13763
+ s_kernel_migrate.add_argument(
13764
+ "--drop-unknown-fields",
13765
+ action="store_true",
13766
+ help="Drop unknown fields instead of failing migration",
13767
+ )
13768
+ s_kernel_migrate.add_argument(
13769
+ "--force",
13770
+ action="store_true",
13771
+ help="Allow overwriting an existing --out path",
13772
+ )
13773
+ add_json_flag(s_kernel_migrate)
13774
+ s_kernel_migrate.set_defaults(func=cmd_kernel_migrate, json_output=False)
13775
+
12661
13776
  s_gate = sub.add_parser("gate", help="Gate operations")
12662
13777
  gate_sub = s_gate.add_subparsers(dest="gate_cmd", required=True)
12663
13778
  s_run = gate_sub.add_parser("run", help="Run configured gates for a profile")