open-research-protocol 0.4.7 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/cli/orp.py +1158 -43
- package/docs/AGENT_LOOP.md +3 -0
- package/docs/ORP_REASONING_KERNEL_AGENT_PILOT.md +125 -0
- package/docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md +97 -0
- package/docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md +100 -0
- package/docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md +116 -0
- package/docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md +86 -0
- package/docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md +261 -0
- package/docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md +131 -0
- package/docs/ORP_REASONING_KERNEL_EVOLUTION.md +123 -0
- package/docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md +107 -0
- package/docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md +140 -22
- package/docs/ORP_REASONING_KERNEL_V0_1.md +11 -0
- package/docs/ORP_YOUTUBE_INSPECT.md +97 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json +796 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_task_smoke.json +487 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_1.json +1927 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json +10217 -0
- package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_task_smoke.json +174 -0
- package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json +598 -0
- package/docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json +688 -0
- package/docs/benchmarks/orp_reasoning_kernel_continuation_task_smoke.json +150 -0
- package/docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json +448 -0
- package/docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json +594 -0
- package/docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json +769 -41
- package/examples/README.md +2 -0
- package/examples/kernel/comparison/comparison-corpus.json +337 -0
- package/examples/kernel/comparison/next-task-continuation.json +55 -0
- package/examples/kernel/corpus/operations/habanero-routing.checkpoint.kernel.yml +12 -0
- package/examples/kernel/corpus/operations/runner-routing.policy.kernel.yml +9 -0
- package/examples/kernel/corpus/product/project-home.decision.kernel.yml +11 -0
- package/examples/kernel/corpus/research/kernel-handoff.experiment.kernel.yml +16 -0
- package/examples/kernel/corpus/research/lane-drift.hypothesis.kernel.yml +11 -0
- package/examples/kernel/corpus/software/trace-widget.task.kernel.yml +13 -0
- package/examples/kernel/corpus/writing/kernel-launch.result.kernel.yml +12 -0
- package/llms.txt +3 -0
- package/package.json +4 -1
- package/scripts/orp-kernel-agent-pilot.py +673 -0
- package/scripts/orp-kernel-agent-replication.py +307 -0
- package/scripts/orp-kernel-benchmark.py +471 -2
- package/scripts/orp-kernel-canonical-continuation.py +381 -0
- package/scripts/orp-kernel-ci-check.py +138 -0
- package/scripts/orp-kernel-comparison.py +592 -0
- package/scripts/orp-kernel-continuation-pilot.py +384 -0
- package/scripts/orp-kernel-pickup.py +401 -0
- package/spec/v1/kernel-extension.schema.json +96 -0
- package/spec/v1/kernel-proposal.schema.json +115 -0
- package/spec/v1/kernel.schema.json +2 -1
- package/spec/v1/youtube-source.schema.json +151 -0
package/cli/orp.py
CHANGED
|
@@ -30,6 +30,7 @@ import argparse
|
|
|
30
30
|
import datetime as dt
|
|
31
31
|
import getpass
|
|
32
32
|
import hashlib
|
|
33
|
+
import html
|
|
33
34
|
import json
|
|
34
35
|
import os
|
|
35
36
|
import platform
|
|
@@ -45,6 +46,7 @@ import uuid
|
|
|
45
46
|
from urllib import error as urlerror
|
|
46
47
|
from urllib import parse as urlparse
|
|
47
48
|
from urllib import request as urlrequest
|
|
49
|
+
import xml.etree.ElementTree as ET
|
|
48
50
|
|
|
49
51
|
RUNNER_LEASE_STALE_SECONDS = 120
|
|
50
52
|
|
|
@@ -111,6 +113,8 @@ ORP_PACKAGE_NAME = _tool_package_name()
|
|
|
111
113
|
DEFAULT_DISCOVER_PROFILE = "orp.profile.default.json"
|
|
112
114
|
DEFAULT_DISCOVER_SCAN_ROOT = "orp/discovery/github"
|
|
113
115
|
DEFAULT_HOSTED_BASE_URL = "https://orp.earth"
|
|
116
|
+
KERNEL_SCHEMA_VERSION = "1.0.0"
|
|
117
|
+
YOUTUBE_SOURCE_SCHEMA_VERSION = "1.0.0"
|
|
114
118
|
|
|
115
119
|
|
|
116
120
|
class HostedApiError(RuntimeError):
|
|
@@ -335,6 +339,442 @@ def _request_hosted_sse_event(
|
|
|
335
339
|
) from exc
|
|
336
340
|
|
|
337
341
|
|
|
342
|
+
def _http_get_text(url: str, *, headers: dict[str, str] | None = None, timeout_sec: int = 20) -> str:
|
|
343
|
+
request = urlrequest.Request(url, headers=headers or {}, method="GET")
|
|
344
|
+
try:
|
|
345
|
+
with urlrequest.urlopen(request, timeout=timeout_sec) as response:
|
|
346
|
+
return response.read().decode("utf-8", errors="replace")
|
|
347
|
+
except urlerror.HTTPError as exc:
|
|
348
|
+
body = exc.read().decode("utf-8", errors="replace").strip()
|
|
349
|
+
raise RuntimeError(f"HTTP {exc.code} while fetching {url}: {body or exc.reason}") from exc
|
|
350
|
+
except urlerror.URLError as exc:
|
|
351
|
+
raise RuntimeError(f"Could not reach {url}: {exc.reason}") from exc
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _http_get_json(url: str, *, headers: dict[str, str] | None = None, timeout_sec: int = 20) -> dict[str, Any]:
|
|
355
|
+
text = _http_get_text(url, headers=headers, timeout_sec=timeout_sec)
|
|
356
|
+
try:
|
|
357
|
+
payload = json.loads(text)
|
|
358
|
+
except Exception as exc:
|
|
359
|
+
raise RuntimeError(f"Response from {url} was not valid JSON.") from exc
|
|
360
|
+
if isinstance(payload, dict):
|
|
361
|
+
return payload
|
|
362
|
+
raise RuntimeError(f"Response from {url} was not a JSON object.")
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _youtube_request_headers() -> dict[str, str]:
|
|
366
|
+
return {
|
|
367
|
+
"User-Agent": (
|
|
368
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
369
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0 Safari/537.36"
|
|
370
|
+
),
|
|
371
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _youtube_source_schema_path() -> Path:
|
|
376
|
+
return Path(__file__).resolve().parent.parent / "spec" / "v1" / "youtube-source.schema.json"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _youtube_video_id_from_url(raw_url: str) -> str:
|
|
380
|
+
text = str(raw_url or "").strip()
|
|
381
|
+
if not text:
|
|
382
|
+
raise RuntimeError("YouTube URL is required.")
|
|
383
|
+
if re.fullmatch(r"[\w-]{11}", text):
|
|
384
|
+
return text
|
|
385
|
+
|
|
386
|
+
parsed = urlparse.urlparse(text)
|
|
387
|
+
host = parsed.netloc.lower()
|
|
388
|
+
path_parts = [part for part in parsed.path.split("/") if part]
|
|
389
|
+
if host.endswith("youtu.be"):
|
|
390
|
+
if path_parts:
|
|
391
|
+
return path_parts[0]
|
|
392
|
+
if any(host.endswith(suffix) for suffix in ("youtube.com", "youtube-nocookie.com", "music.youtube.com")):
|
|
393
|
+
if parsed.path == "/watch":
|
|
394
|
+
video_id = urlparse.parse_qs(parsed.query).get("v", [""])[0].strip()
|
|
395
|
+
if video_id:
|
|
396
|
+
return video_id
|
|
397
|
+
if len(path_parts) >= 2 and path_parts[0] in {"embed", "shorts", "live", "v"}:
|
|
398
|
+
return path_parts[1]
|
|
399
|
+
raise RuntimeError(f"Could not extract a YouTube video id from: {text}")
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _youtube_canonical_url(video_id: str) -> str:
|
|
403
|
+
return f"https://www.youtube.com/watch?v={video_id}"
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _extract_json_object_after_marker(text: str, marker: str) -> dict[str, Any] | None:
|
|
407
|
+
index = text.find(marker)
|
|
408
|
+
if index < 0:
|
|
409
|
+
return None
|
|
410
|
+
start = text.find("{", index)
|
|
411
|
+
if start < 0:
|
|
412
|
+
return None
|
|
413
|
+
depth = 0
|
|
414
|
+
in_string = False
|
|
415
|
+
escaped = False
|
|
416
|
+
for pos in range(start, len(text)):
|
|
417
|
+
ch = text[pos]
|
|
418
|
+
if in_string:
|
|
419
|
+
if escaped:
|
|
420
|
+
escaped = False
|
|
421
|
+
elif ch == "\\":
|
|
422
|
+
escaped = True
|
|
423
|
+
elif ch == '"':
|
|
424
|
+
in_string = False
|
|
425
|
+
continue
|
|
426
|
+
if ch == '"':
|
|
427
|
+
in_string = True
|
|
428
|
+
continue
|
|
429
|
+
if ch == "{":
|
|
430
|
+
depth += 1
|
|
431
|
+
continue
|
|
432
|
+
if ch == "}":
|
|
433
|
+
depth -= 1
|
|
434
|
+
if depth == 0:
|
|
435
|
+
candidate = text[start : pos + 1]
|
|
436
|
+
try:
|
|
437
|
+
payload = json.loads(candidate)
|
|
438
|
+
except Exception:
|
|
439
|
+
return None
|
|
440
|
+
return payload if isinstance(payload, dict) else None
|
|
441
|
+
return None
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _youtube_track_label(track: dict[str, Any]) -> str:
|
|
445
|
+
name = track.get("name")
|
|
446
|
+
if isinstance(name, dict):
|
|
447
|
+
simple = str(name.get("simpleText", "")).strip()
|
|
448
|
+
if simple:
|
|
449
|
+
return simple
|
|
450
|
+
runs = name.get("runs")
|
|
451
|
+
if isinstance(runs, list):
|
|
452
|
+
pieces = [
|
|
453
|
+
str(row.get("text", "")).strip()
|
|
454
|
+
for row in runs
|
|
455
|
+
if isinstance(row, dict) and str(row.get("text", "")).strip()
|
|
456
|
+
]
|
|
457
|
+
if pieces:
|
|
458
|
+
return "".join(pieces)
|
|
459
|
+
return str(track.get("languageCode", "")).strip()
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _pick_youtube_caption_track(tracks: list[dict[str, Any]], preferred_lang: str = "") -> dict[str, Any] | None:
|
|
463
|
+
if not tracks:
|
|
464
|
+
return None
|
|
465
|
+
preferred = str(preferred_lang or "").strip().lower()
|
|
466
|
+
|
|
467
|
+
def score(track: dict[str, Any]) -> tuple[int, int]:
|
|
468
|
+
code = str(track.get("languageCode", "")).strip().lower()
|
|
469
|
+
kind = str(track.get("kind", "")).strip().lower()
|
|
470
|
+
auto = 1 if kind == "asr" else 0
|
|
471
|
+
exact = 1 if preferred and code == preferred else 0
|
|
472
|
+
prefix = 1 if preferred and code.startswith(preferred + "-") else 0
|
|
473
|
+
english = 1 if code.startswith("en") else 0
|
|
474
|
+
return (exact * 100 + prefix * 80 + english * 20 - auto * 5, -auto)
|
|
475
|
+
|
|
476
|
+
ranked = sorted(tracks, key=score, reverse=True)
|
|
477
|
+
return ranked[0] if ranked else None
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def _youtube_add_query_param(url: str, key: str, value: str) -> str:
|
|
481
|
+
parsed = urlparse.urlsplit(url)
|
|
482
|
+
query = dict(urlparse.parse_qsl(parsed.query, keep_blank_values=True))
|
|
483
|
+
query[key] = value
|
|
484
|
+
return urlparse.urlunsplit(
|
|
485
|
+
(
|
|
486
|
+
parsed.scheme,
|
|
487
|
+
parsed.netloc,
|
|
488
|
+
parsed.path,
|
|
489
|
+
urlparse.urlencode(query),
|
|
490
|
+
parsed.fragment,
|
|
491
|
+
)
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def _parse_youtube_transcript_json3(payload: dict[str, Any]) -> tuple[str, list[dict[str, Any]]]:
|
|
496
|
+
events = payload.get("events")
|
|
497
|
+
if not isinstance(events, list):
|
|
498
|
+
return ("", [])
|
|
499
|
+
segments: list[dict[str, Any]] = []
|
|
500
|
+
for event in events:
|
|
501
|
+
if not isinstance(event, dict):
|
|
502
|
+
continue
|
|
503
|
+
segs = event.get("segs")
|
|
504
|
+
if not isinstance(segs, list):
|
|
505
|
+
continue
|
|
506
|
+
pieces: list[str] = []
|
|
507
|
+
for seg in segs:
|
|
508
|
+
if not isinstance(seg, dict):
|
|
509
|
+
continue
|
|
510
|
+
text = html.unescape(str(seg.get("utf8", "")))
|
|
511
|
+
if text:
|
|
512
|
+
pieces.append(text)
|
|
513
|
+
merged = re.sub(r"\s+", " ", "".join(pieces)).strip()
|
|
514
|
+
if not merged:
|
|
515
|
+
continue
|
|
516
|
+
segments.append(
|
|
517
|
+
{
|
|
518
|
+
"start_ms": int(event.get("tStartMs", 0) or 0),
|
|
519
|
+
"duration_ms": int(event.get("dDurationMs", 0) or 0),
|
|
520
|
+
"text": merged,
|
|
521
|
+
}
|
|
522
|
+
)
|
|
523
|
+
transcript_text = "\n".join(str(row["text"]) for row in segments)
|
|
524
|
+
return transcript_text, segments
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def _parse_youtube_transcript_xml(text: str) -> tuple[str, list[dict[str, Any]]]:
|
|
528
|
+
try:
|
|
529
|
+
root = ET.fromstring(text)
|
|
530
|
+
except Exception:
|
|
531
|
+
return ("", [])
|
|
532
|
+
segments: list[dict[str, Any]] = []
|
|
533
|
+
for node in root.findall(".//text"):
|
|
534
|
+
body = html.unescape("".join(node.itertext() or []))
|
|
535
|
+
body = re.sub(r"\s+", " ", body).strip()
|
|
536
|
+
if not body:
|
|
537
|
+
continue
|
|
538
|
+
start = float(node.attrib.get("start", "0") or "0")
|
|
539
|
+
duration = float(node.attrib.get("dur", "0") or "0")
|
|
540
|
+
segments.append(
|
|
541
|
+
{
|
|
542
|
+
"start_ms": int(start * 1000),
|
|
543
|
+
"duration_ms": int(duration * 1000),
|
|
544
|
+
"text": body,
|
|
545
|
+
}
|
|
546
|
+
)
|
|
547
|
+
transcript_text = "\n".join(str(row["text"]) for row in segments)
|
|
548
|
+
return transcript_text, segments
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _youtube_fetch_oembed(canonical_url: str) -> dict[str, Any]:
|
|
552
|
+
endpoint = "https://www.youtube.com/oembed?" + urlparse.urlencode({"url": canonical_url, "format": "json"})
|
|
553
|
+
try:
|
|
554
|
+
return _http_get_json(endpoint, headers=_youtube_request_headers(), timeout_sec=20)
|
|
555
|
+
except Exception:
|
|
556
|
+
return {}
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def _youtube_fetch_watch_state(video_id: str) -> dict[str, Any]:
|
|
560
|
+
url = _youtube_canonical_url(video_id) + "&hl=en&persist_hl=1"
|
|
561
|
+
html_text = _http_get_text(url, headers=_youtube_request_headers(), timeout_sec=25)
|
|
562
|
+
markers = [
|
|
563
|
+
"var ytInitialPlayerResponse = ",
|
|
564
|
+
"ytInitialPlayerResponse = ",
|
|
565
|
+
"window['ytInitialPlayerResponse'] = ",
|
|
566
|
+
'window["ytInitialPlayerResponse"] = ',
|
|
567
|
+
]
|
|
568
|
+
player_response: dict[str, Any] | None = None
|
|
569
|
+
for marker in markers:
|
|
570
|
+
player_response = _extract_json_object_after_marker(html_text, marker)
|
|
571
|
+
if player_response:
|
|
572
|
+
break
|
|
573
|
+
if not player_response:
|
|
574
|
+
raise RuntimeError("Could not parse YouTube player response from the watch page.")
|
|
575
|
+
captions = (
|
|
576
|
+
player_response.get("captions", {})
|
|
577
|
+
.get("playerCaptionsTracklistRenderer", {})
|
|
578
|
+
.get("captionTracks", [])
|
|
579
|
+
)
|
|
580
|
+
return {
|
|
581
|
+
"player_response": player_response,
|
|
582
|
+
"video_details": player_response.get("videoDetails", {}) if isinstance(player_response.get("videoDetails"), dict) else {},
|
|
583
|
+
"microformat": (
|
|
584
|
+
player_response.get("microformat", {}).get("playerMicroformatRenderer", {})
|
|
585
|
+
if isinstance(player_response.get("microformat"), dict)
|
|
586
|
+
else {}
|
|
587
|
+
),
|
|
588
|
+
"playability_status": (
|
|
589
|
+
player_response.get("playabilityStatus", {})
|
|
590
|
+
if isinstance(player_response.get("playabilityStatus"), dict)
|
|
591
|
+
else {}
|
|
592
|
+
),
|
|
593
|
+
"caption_tracks": captions if isinstance(captions, list) else [],
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _youtube_fetch_transcript_from_track(track: dict[str, Any]) -> tuple[str, list[dict[str, Any]], str]:
|
|
598
|
+
base_url = str(track.get("baseUrl", "")).strip()
|
|
599
|
+
if not base_url:
|
|
600
|
+
return ("", [], "missing_track_url")
|
|
601
|
+
json3_url = _youtube_add_query_param(base_url, "fmt", "json3")
|
|
602
|
+
try:
|
|
603
|
+
payload = _http_get_json(json3_url, headers=_youtube_request_headers(), timeout_sec=25)
|
|
604
|
+
transcript_text, segments = _parse_youtube_transcript_json3(payload)
|
|
605
|
+
if transcript_text:
|
|
606
|
+
return transcript_text, segments, "json3"
|
|
607
|
+
except Exception:
|
|
608
|
+
pass
|
|
609
|
+
try:
|
|
610
|
+
xml_text = _http_get_text(base_url, headers=_youtube_request_headers(), timeout_sec=25)
|
|
611
|
+
transcript_text, segments = _parse_youtube_transcript_xml(xml_text)
|
|
612
|
+
if transcript_text:
|
|
613
|
+
return transcript_text, segments, "xml"
|
|
614
|
+
except Exception:
|
|
615
|
+
pass
|
|
616
|
+
return ("", [], "unavailable")
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _youtube_text_bundle(payload: dict[str, Any]) -> str:
|
|
620
|
+
parts: list[str] = []
|
|
621
|
+
title = str(payload.get("title", "")).strip()
|
|
622
|
+
if title:
|
|
623
|
+
parts.append(f"Title: {title}")
|
|
624
|
+
author_name = str(payload.get("author_name", "")).strip()
|
|
625
|
+
if author_name:
|
|
626
|
+
parts.append(f"Author: {author_name}")
|
|
627
|
+
duration_seconds = payload.get("duration_seconds")
|
|
628
|
+
if isinstance(duration_seconds, int) and duration_seconds > 0:
|
|
629
|
+
parts.append(f"Duration seconds: {duration_seconds}")
|
|
630
|
+
description = str(payload.get("description", "")).strip()
|
|
631
|
+
if description:
|
|
632
|
+
parts.append("Description:\n" + description)
|
|
633
|
+
transcript_text = str(payload.get("transcript_text", "")).strip()
|
|
634
|
+
if transcript_text:
|
|
635
|
+
parts.append("Transcript:\n" + transcript_text)
|
|
636
|
+
return "\n\n".join(parts)
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def _youtube_inspect_payload(raw_url: str, preferred_lang: str = "") -> dict[str, Any]:
|
|
640
|
+
video_id = _youtube_video_id_from_url(raw_url)
|
|
641
|
+
canonical_url = _youtube_canonical_url(video_id)
|
|
642
|
+
warnings: list[str] = []
|
|
643
|
+
oembed = _youtube_fetch_oembed(canonical_url)
|
|
644
|
+
|
|
645
|
+
watch_state: dict[str, Any] = {}
|
|
646
|
+
try:
|
|
647
|
+
watch_state = _youtube_fetch_watch_state(video_id)
|
|
648
|
+
except Exception as exc:
|
|
649
|
+
warnings.append(str(exc))
|
|
650
|
+
|
|
651
|
+
video_details = watch_state.get("video_details", {}) if isinstance(watch_state.get("video_details"), dict) else {}
|
|
652
|
+
microformat = watch_state.get("microformat", {}) if isinstance(watch_state.get("microformat"), dict) else {}
|
|
653
|
+
playability = watch_state.get("playability_status", {}) if isinstance(watch_state.get("playability_status"), dict) else {}
|
|
654
|
+
tracks = [row for row in watch_state.get("caption_tracks", []) if isinstance(row, dict)]
|
|
655
|
+
chosen_track = _pick_youtube_caption_track(tracks, preferred_lang)
|
|
656
|
+
transcript_text = ""
|
|
657
|
+
transcript_segments: list[dict[str, Any]] = []
|
|
658
|
+
transcript_fetch_mode = "none"
|
|
659
|
+
transcript_available = False
|
|
660
|
+
transcript_language = ""
|
|
661
|
+
transcript_track_name = ""
|
|
662
|
+
transcript_kind = "none"
|
|
663
|
+
if chosen_track is not None:
|
|
664
|
+
transcript_text, transcript_segments, transcript_fetch_mode = _youtube_fetch_transcript_from_track(chosen_track)
|
|
665
|
+
transcript_available = bool(transcript_text.strip())
|
|
666
|
+
transcript_language = str(chosen_track.get("languageCode", "")).strip()
|
|
667
|
+
transcript_track_name = _youtube_track_label(chosen_track)
|
|
668
|
+
transcript_kind = "auto" if str(chosen_track.get("kind", "")).strip().lower() == "asr" else "manual"
|
|
669
|
+
if not transcript_available:
|
|
670
|
+
warnings.append("A caption track was found, but transcript text could not be fetched.")
|
|
671
|
+
elif watch_state:
|
|
672
|
+
warnings.append("No caption tracks were available for this video.")
|
|
673
|
+
|
|
674
|
+
title = str(video_details.get("title") or oembed.get("title") or "").strip()
|
|
675
|
+
author_name = str(video_details.get("author") or oembed.get("author_name") or "").strip()
|
|
676
|
+
author_url = str(oembed.get("author_url") or "").strip()
|
|
677
|
+
thumbnail_url = str(oembed.get("thumbnail_url") or "").strip()
|
|
678
|
+
description = str(video_details.get("shortDescription") or microformat.get("description", {}).get("simpleText", "") or "").strip()
|
|
679
|
+
channel_id = str(video_details.get("channelId") or "").strip()
|
|
680
|
+
duration_seconds = 0
|
|
681
|
+
raw_duration = video_details.get("lengthSeconds")
|
|
682
|
+
if isinstance(raw_duration, str) and raw_duration.isdigit():
|
|
683
|
+
duration_seconds = int(raw_duration)
|
|
684
|
+
published_at = str(microformat.get("publishDate") or "").strip()
|
|
685
|
+
payload = {
|
|
686
|
+
"schema_version": YOUTUBE_SOURCE_SCHEMA_VERSION,
|
|
687
|
+
"kind": "youtube_source",
|
|
688
|
+
"retrieved_at_utc": _now_utc(),
|
|
689
|
+
"source_url": str(raw_url).strip(),
|
|
690
|
+
"canonical_url": canonical_url,
|
|
691
|
+
"video_id": video_id,
|
|
692
|
+
"title": title,
|
|
693
|
+
"author_name": author_name,
|
|
694
|
+
"author_url": author_url,
|
|
695
|
+
"thumbnail_url": thumbnail_url,
|
|
696
|
+
"channel_id": channel_id,
|
|
697
|
+
"description": description,
|
|
698
|
+
"duration_seconds": duration_seconds or None,
|
|
699
|
+
"published_at": published_at,
|
|
700
|
+
"playability_status": str(playability.get("status", "")).strip(),
|
|
701
|
+
"transcript_available": transcript_available,
|
|
702
|
+
"transcript_language": transcript_language,
|
|
703
|
+
"transcript_track_name": transcript_track_name,
|
|
704
|
+
"transcript_kind": transcript_kind,
|
|
705
|
+
"transcript_fetch_mode": transcript_fetch_mode,
|
|
706
|
+
"transcript_text": transcript_text,
|
|
707
|
+
"transcript_segments": transcript_segments,
|
|
708
|
+
"warnings": _unique_strings(warnings),
|
|
709
|
+
}
|
|
710
|
+
payload["text_bundle"] = _youtube_text_bundle(payload)
|
|
711
|
+
return payload
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _default_youtube_artifact_path(repo_root: Path, video_id: str) -> Path:
|
|
715
|
+
return repo_root / "orp" / "external" / "youtube" / f"{video_id}.json"
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
def cmd_youtube_inspect(args: argparse.Namespace) -> int:
|
|
719
|
+
repo_root = Path(args.repo_root).resolve()
|
|
720
|
+
preferred_lang = str(getattr(args, "lang", "") or "").strip()
|
|
721
|
+
payload = _youtube_inspect_payload(args.url, preferred_lang=preferred_lang)
|
|
722
|
+
|
|
723
|
+
out_raw = str(getattr(args, "out", "") or "").strip()
|
|
724
|
+
should_save = bool(getattr(args, "save", False) or out_raw)
|
|
725
|
+
out_path: Path | None = None
|
|
726
|
+
emitted_format = ""
|
|
727
|
+
if should_save:
|
|
728
|
+
if out_raw:
|
|
729
|
+
out_path = _resolve_cli_path(out_raw, repo_root)
|
|
730
|
+
else:
|
|
731
|
+
_ensure_dirs(repo_root)
|
|
732
|
+
out_path = _default_youtube_artifact_path(repo_root, str(payload.get("video_id", "")).strip())
|
|
733
|
+
if out_path.exists() and not bool(getattr(args, "force", False)):
|
|
734
|
+
raise RuntimeError(
|
|
735
|
+
f"output path already exists: {_path_for_state(out_path, repo_root)}. Use --force to overwrite."
|
|
736
|
+
)
|
|
737
|
+
emitted_format = _write_structured_payload(out_path, payload, format_hint=str(getattr(args, "format", "") or ""))
|
|
738
|
+
|
|
739
|
+
result = {
|
|
740
|
+
"ok": True,
|
|
741
|
+
"saved": out_path is not None,
|
|
742
|
+
"path": _path_for_state(out_path, repo_root) if out_path is not None else "",
|
|
743
|
+
"format": emitted_format,
|
|
744
|
+
"schema_path": "spec/v1/youtube-source.schema.json",
|
|
745
|
+
"source": payload,
|
|
746
|
+
}
|
|
747
|
+
if args.json_output:
|
|
748
|
+
_print_json(result)
|
|
749
|
+
else:
|
|
750
|
+
_print_pairs(
|
|
751
|
+
[
|
|
752
|
+
("ok", "true"),
|
|
753
|
+
("video.id", str(payload.get("video_id", "")).strip()),
|
|
754
|
+
("video.title", str(payload.get("title", "")).strip()),
|
|
755
|
+
("video.author", str(payload.get("author_name", "")).strip()),
|
|
756
|
+
("video.duration_seconds", payload.get("duration_seconds") or ""),
|
|
757
|
+
("transcript.available", str(bool(payload.get("transcript_available", False))).lower()),
|
|
758
|
+
("transcript.language", str(payload.get("transcript_language", "")).strip()),
|
|
759
|
+
("transcript.kind", str(payload.get("transcript_kind", "")).strip()),
|
|
760
|
+
("saved", str(bool(out_path is not None)).lower()),
|
|
761
|
+
("path", _path_for_state(out_path, repo_root) if out_path is not None else ""),
|
|
762
|
+
]
|
|
763
|
+
)
|
|
764
|
+
bundle = str(payload.get("text_bundle", "")).strip()
|
|
765
|
+
warnings = payload.get("warnings", []) if isinstance(payload.get("warnings"), list) else []
|
|
766
|
+
if bundle:
|
|
767
|
+
print("")
|
|
768
|
+
print(bundle)
|
|
769
|
+
if warnings:
|
|
770
|
+
print("")
|
|
771
|
+
for warning in warnings:
|
|
772
|
+
text = str(warning).strip()
|
|
773
|
+
if text:
|
|
774
|
+
print(f"warning={text}")
|
|
775
|
+
return 0
|
|
776
|
+
|
|
777
|
+
|
|
338
778
|
def _runner_transport_mode(args: argparse.Namespace) -> str:
|
|
339
779
|
mode = str(getattr(args, "transport", "auto") or "auto").strip().lower()
|
|
340
780
|
if mode in {"poll", "sse"}:
|
|
@@ -4802,6 +5242,11 @@ def _unique_strings(values: list[str]) -> list[str]:
|
|
|
4802
5242
|
return out
|
|
4803
5243
|
|
|
4804
5244
|
|
|
5245
|
+
def _slug_token(text: str, *, fallback: str = "item") -> str:
|
|
5246
|
+
token = re.sub(r"[^a-z0-9]+", "-", str(text or "").strip().lower()).strip("-")
|
|
5247
|
+
return token or fallback
|
|
5248
|
+
|
|
5249
|
+
|
|
4805
5250
|
def _resolve_config_paths(raw_paths: Any, repo_root: Path, vars_map: dict[str, str]) -> list[str]:
|
|
4806
5251
|
out: list[str] = []
|
|
4807
5252
|
if not isinstance(raw_paths, list):
|
|
@@ -5138,6 +5583,9 @@ def _about_payload() -> dict[str, Any]:
|
|
|
5138
5583
|
"config": "spec/v1/orp.config.schema.json",
|
|
5139
5584
|
"packet": "spec/v1/packet.schema.json",
|
|
5140
5585
|
"kernel": "spec/v1/kernel.schema.json",
|
|
5586
|
+
"kernel_proposal": "spec/v1/kernel-proposal.schema.json",
|
|
5587
|
+
"kernel_extension": "spec/v1/kernel-extension.schema.json",
|
|
5588
|
+
"youtube_source": "spec/v1/youtube-source.schema.json",
|
|
5141
5589
|
"profile_pack": "spec/v1/profile-pack.schema.json",
|
|
5142
5590
|
"link_project": "spec/v1/link-project.schema.json",
|
|
5143
5591
|
"link_session": "spec/v1/link-session.schema.json",
|
|
@@ -5147,10 +5595,20 @@ def _about_payload() -> dict[str, Any]:
|
|
|
5147
5595
|
"abilities": [
|
|
5148
5596
|
{
|
|
5149
5597
|
"id": "kernel",
|
|
5150
|
-
"description": "Reasoning-kernel artifact scaffolding and
|
|
5598
|
+
"description": "Reasoning-kernel artifact scaffolding, validation, observation, proposal, and migration for promotable repository truth.",
|
|
5151
5599
|
"entrypoints": [
|
|
5152
5600
|
["kernel", "validate"],
|
|
5153
5601
|
["kernel", "scaffold"],
|
|
5602
|
+
["kernel", "stats"],
|
|
5603
|
+
["kernel", "propose"],
|
|
5604
|
+
["kernel", "migrate"],
|
|
5605
|
+
],
|
|
5606
|
+
},
|
|
5607
|
+
{
|
|
5608
|
+
"id": "youtube",
|
|
5609
|
+
"description": "Public YouTube metadata and transcript ingestion for agent-readable external source context.",
|
|
5610
|
+
"entrypoints": [
|
|
5611
|
+
["youtube", "inspect"],
|
|
5154
5612
|
],
|
|
5155
5613
|
},
|
|
5156
5614
|
{
|
|
@@ -5243,6 +5701,10 @@ def _about_payload() -> dict[str, Any]:
|
|
|
5243
5701
|
{"name": "about", "path": ["about"], "json_output": True},
|
|
5244
5702
|
{"name": "kernel_validate", "path": ["kernel", "validate"], "json_output": True},
|
|
5245
5703
|
{"name": "kernel_scaffold", "path": ["kernel", "scaffold"], "json_output": True},
|
|
5704
|
+
{"name": "kernel_stats", "path": ["kernel", "stats"], "json_output": True},
|
|
5705
|
+
{"name": "kernel_propose", "path": ["kernel", "propose"], "json_output": True},
|
|
5706
|
+
{"name": "kernel_migrate", "path": ["kernel", "migrate"], "json_output": True},
|
|
5707
|
+
{"name": "youtube_inspect", "path": ["youtube", "inspect"], "json_output": True},
|
|
5246
5708
|
{"name": "auth_login", "path": ["auth", "login"], "json_output": True},
|
|
5247
5709
|
{"name": "auth_verify", "path": ["auth", "verify"], "json_output": True},
|
|
5248
5710
|
{"name": "auth_logout", "path": ["auth", "logout"], "json_output": True},
|
|
@@ -5311,6 +5773,8 @@ def _about_payload() -> dict[str, Any]:
|
|
|
5311
5773
|
"Canonical evidence lives in repo artifact paths outside ORP docs.",
|
|
5312
5774
|
"Default CLI output is human-readable; listed commands with json_output=true also support --json.",
|
|
5313
5775
|
"Reasoning-kernel artifacts shape promotable repository truth for tasks, decisions, hypotheses, experiments, checkpoints, policies, and results.",
|
|
5776
|
+
"Kernel evolution in ORP should stay explicit: observe real usage, propose changes, and migrate artifacts through versioned CLI surfaces rather than silent agent mutation.",
|
|
5777
|
+
"YouTube inspection is a built-in ORP ability exposed through `orp youtube inspect`, returning public metadata and caption transcript text when available.",
|
|
5314
5778
|
"Discovery profiles in ORP are portable search-intent files managed directly by ORP.",
|
|
5315
5779
|
"Collaboration is a built-in ORP ability exposed through `orp collaborate ...`.",
|
|
5316
5780
|
"Project/session linking is a built-in ORP ability exposed through `orp link ...` and stored machine-locally under `.git/orp/link/`.",
|
|
@@ -5420,6 +5884,10 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
|
|
|
5420
5884
|
"label": "Inspect the current hosted workspace identity",
|
|
5421
5885
|
"command": "orp whoami --json",
|
|
5422
5886
|
},
|
|
5887
|
+
{
|
|
5888
|
+
"label": "Inspect a YouTube video and public transcript for agent context",
|
|
5889
|
+
"command": "orp youtube inspect https://www.youtube.com/watch?v=<video_id> --json",
|
|
5890
|
+
},
|
|
5423
5891
|
{
|
|
5424
5892
|
"label": "List hosted ideas in the current workspace",
|
|
5425
5893
|
"command": "orp ideas list --json",
|
|
@@ -5507,13 +5975,20 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
|
|
|
5507
5975
|
)
|
|
5508
5976
|
quick_actions.insert(
|
|
5509
5977
|
5,
|
|
5978
|
+
{
|
|
5979
|
+
"label": "Inspect kernel validation pressure across recorded runs",
|
|
5980
|
+
"command": "orp kernel stats --json",
|
|
5981
|
+
},
|
|
5982
|
+
)
|
|
5983
|
+
quick_actions.insert(
|
|
5984
|
+
6,
|
|
5510
5985
|
{
|
|
5511
5986
|
"label": "Mark the repo locally ready after validation",
|
|
5512
5987
|
"command": "orp ready --json",
|
|
5513
5988
|
},
|
|
5514
5989
|
)
|
|
5515
5990
|
quick_actions.insert(
|
|
5516
|
-
|
|
5991
|
+
7,
|
|
5517
5992
|
{
|
|
5518
5993
|
"label": "Inspect local project/session link state",
|
|
5519
5994
|
"command": "orp link status --json",
|
|
@@ -7256,27 +7731,280 @@ def _gate_map(config: dict[str, Any]) -> dict[str, dict[str, Any]]:
|
|
|
7256
7731
|
return out
|
|
7257
7732
|
|
|
7258
7733
|
|
|
7259
|
-
|
|
7260
|
-
|
|
7261
|
-
"decision": ["question", "chosen_path", "rejected_alternatives", "rationale", "consequences"],
|
|
7262
|
-
"hypothesis": ["claim", "boundary", "assumptions", "test_path", "falsifiers"],
|
|
7263
|
-
"experiment": ["objective", "method", "inputs", "outputs", "evidence_expectations", "interpretation_limits"],
|
|
7264
|
-
"checkpoint": ["completed_unit", "current_state", "risks", "next_handoff_target", "artifact_refs"],
|
|
7265
|
-
"policy": ["scope", "rule", "rationale", "invariants", "enforcement_surface"],
|
|
7266
|
-
"result": ["claim", "evidence_paths", "status", "interpretation_limits", "next_follow_up"],
|
|
7267
|
-
}
|
|
7734
|
+
def _kernel_schema_path() -> Path:
|
|
7735
|
+
return Path(__file__).resolve().parent.parent / "spec" / "v1" / "kernel.schema.json"
|
|
7268
7736
|
|
|
7269
7737
|
|
|
7270
|
-
def
|
|
7271
|
-
|
|
7272
|
-
|
|
7273
|
-
|
|
7274
|
-
|
|
7275
|
-
|
|
7276
|
-
|
|
7738
|
+
def _kernel_proposal_schema_path() -> Path:
|
|
7739
|
+
return Path(__file__).resolve().parent.parent / "spec" / "v1" / "kernel-proposal.schema.json"
|
|
7740
|
+
|
|
7741
|
+
|
|
7742
|
+
def _kernel_extension_schema_path() -> Path:
|
|
7743
|
+
return Path(__file__).resolve().parent.parent / "spec" / "v1" / "kernel-extension.schema.json"
|
|
7744
|
+
|
|
7745
|
+
|
|
7746
|
+
def _load_kernel_schema() -> dict[str, Any]:
|
|
7747
|
+
path = _kernel_schema_path()
|
|
7748
|
+
if not path.exists():
|
|
7749
|
+
raise RuntimeError(f"kernel schema is missing: {path}")
|
|
7750
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
7751
|
+
if not isinstance(payload, dict):
|
|
7752
|
+
raise RuntimeError("kernel schema root must be an object")
|
|
7753
|
+
return payload
|
|
7754
|
+
|
|
7755
|
+
|
|
7756
|
+
def _kernel_schema_metadata() -> tuple[dict[str, list[str]], dict[str, dict[str, Any]], set[str], list[str]]:
|
|
7757
|
+
schema = _load_kernel_schema()
|
|
7758
|
+
properties = schema.get("properties")
|
|
7759
|
+
if not isinstance(properties, dict):
|
|
7760
|
+
raise RuntimeError("kernel schema is missing object properties")
|
|
7761
|
+
ordered_fields = [str(field).strip() for field in properties.keys() if str(field).strip()]
|
|
7762
|
+
|
|
7763
|
+
field_kinds: dict[str, dict[str, Any]] = {}
|
|
7764
|
+
for field, raw in properties.items():
|
|
7765
|
+
if not isinstance(raw, dict):
|
|
7766
|
+
continue
|
|
7767
|
+
if "const" in raw:
|
|
7768
|
+
field_kinds[field] = {"kind": "const", "value": raw.get("const")}
|
|
7769
|
+
continue
|
|
7770
|
+
if "enum" in raw and isinstance(raw.get("enum"), list):
|
|
7771
|
+
field_kinds[field] = {"kind": "enum", "value": list(raw.get("enum", []))}
|
|
7772
|
+
continue
|
|
7773
|
+
ref = raw.get("$ref")
|
|
7774
|
+
if isinstance(ref, str) and ref.startswith("#/$defs/"):
|
|
7775
|
+
field_kinds[field] = {"kind": ref.split("/")[-1]}
|
|
7776
|
+
|
|
7777
|
+
requirements: dict[str, list[str]] = {}
|
|
7778
|
+
raw_all_of = schema.get("allOf")
|
|
7779
|
+
if isinstance(raw_all_of, list):
|
|
7780
|
+
for clause in raw_all_of:
|
|
7781
|
+
if not isinstance(clause, dict):
|
|
7782
|
+
continue
|
|
7783
|
+
raw_if = clause.get("if")
|
|
7784
|
+
raw_then = clause.get("then")
|
|
7785
|
+
if not isinstance(raw_if, dict) or not isinstance(raw_then, dict):
|
|
7786
|
+
continue
|
|
7787
|
+
const = (
|
|
7788
|
+
raw_if.get("properties", {})
|
|
7789
|
+
.get("artifact_class", {})
|
|
7790
|
+
.get("const")
|
|
7791
|
+
)
|
|
7792
|
+
required_fields = raw_then.get("required")
|
|
7793
|
+
if isinstance(const, str) and isinstance(required_fields, list):
|
|
7794
|
+
requirements[const] = [
|
|
7795
|
+
str(field).strip()
|
|
7796
|
+
for field in required_fields
|
|
7797
|
+
if isinstance(field, str) and str(field).strip()
|
|
7798
|
+
]
|
|
7799
|
+
return requirements, field_kinds, set(field_kinds.keys()), ordered_fields
|
|
7800
|
+
|
|
7801
|
+
|
|
7802
|
+
(
|
|
7803
|
+
KERNEL_ARTIFACT_CLASS_REQUIREMENTS,
|
|
7804
|
+
KERNEL_FIELD_KINDS,
|
|
7805
|
+
KERNEL_ALLOWED_FIELDS,
|
|
7806
|
+
KERNEL_FIELD_ORDER,
|
|
7807
|
+
) = _kernel_schema_metadata()
|
|
7808
|
+
|
|
7809
|
+
|
|
7810
|
+
def _kernel_ordered_fields_for_class(artifact_class: str, present_fields: Sequence[str] | None = None) -> list[str]:
|
|
7811
|
+
ordered: list[str] = ["schema_version", "artifact_class"]
|
|
7812
|
+
required_fields = KERNEL_ARTIFACT_CLASS_REQUIREMENTS.get(str(artifact_class).strip(), [])
|
|
7813
|
+
for field in required_fields:
|
|
7814
|
+
if field not in ordered:
|
|
7815
|
+
ordered.append(field)
|
|
7816
|
+
for field in KERNEL_FIELD_ORDER:
|
|
7817
|
+
if field not in ordered:
|
|
7818
|
+
ordered.append(field)
|
|
7819
|
+
if present_fields is None:
|
|
7820
|
+
return ordered
|
|
7821
|
+
present_set = {str(field).strip() for field in present_fields if str(field).strip()}
|
|
7822
|
+
return [field for field in ordered if field in present_set]
|
|
7823
|
+
|
|
7824
|
+
|
|
7825
|
+
def _kernel_text_valid(value: Any) -> bool:
|
|
7826
|
+
return isinstance(value, str) and bool(value.strip())
|
|
7827
|
+
|
|
7828
|
+
|
|
7829
|
+
def _kernel_text_list_valid(value: Any) -> bool:
|
|
7830
|
+
return isinstance(value, list) and len(value) > 0 and all(_kernel_text_valid(item) for item in value)
|
|
7831
|
+
|
|
7832
|
+
|
|
7833
|
+
def _kernel_field_present(field: str, value: Any) -> bool:
|
|
7834
|
+
kind = str(KERNEL_FIELD_KINDS.get(field, {}).get("kind", ""))
|
|
7835
|
+
if kind == "non_empty_text":
|
|
7836
|
+
return _kernel_text_valid(value)
|
|
7837
|
+
if kind == "text_list":
|
|
7838
|
+
return _kernel_text_list_valid(value)
|
|
7839
|
+
if kind == "text_or_text_list":
|
|
7840
|
+
return _kernel_text_valid(value) or _kernel_text_list_valid(value)
|
|
7841
|
+
if kind == "const":
|
|
7842
|
+
return value is not None
|
|
7843
|
+
if kind == "enum":
|
|
7844
|
+
return value is not None
|
|
7277
7845
|
return value is not None
|
|
7278
7846
|
|
|
7279
7847
|
|
|
7848
|
+
def _kernel_field_shape_issues(field: str, value: Any) -> list[str]:
|
|
7849
|
+
meta = KERNEL_FIELD_KINDS.get(field, {})
|
|
7850
|
+
kind = str(meta.get("kind", ""))
|
|
7851
|
+
if kind == "const":
|
|
7852
|
+
expected = meta.get("value")
|
|
7853
|
+
return [] if value == expected else [f"must equal `{expected}`."]
|
|
7854
|
+
if kind == "enum":
|
|
7855
|
+
allowed = [str(x) for x in meta.get("value", [])]
|
|
7856
|
+
return [] if value in allowed else [f"must be one of: {', '.join(allowed)}."]
|
|
7857
|
+
if kind == "non_empty_text":
|
|
7858
|
+
return [] if _kernel_text_valid(value) else ["must be a non-empty string."]
|
|
7859
|
+
if kind == "text_list":
|
|
7860
|
+
return [] if _kernel_text_list_valid(value) else ["must be a non-empty list of non-empty strings."]
|
|
7861
|
+
if kind == "text_or_text_list":
|
|
7862
|
+
return [] if (_kernel_text_valid(value) or _kernel_text_list_valid(value)) else [
|
|
7863
|
+
"must be a non-empty string or a non-empty list of non-empty strings."
|
|
7864
|
+
]
|
|
7865
|
+
return []
|
|
7866
|
+
|
|
7867
|
+
|
|
7868
|
+
def _validate_kernel_payload(
|
|
7869
|
+
payload: dict[str, Any],
|
|
7870
|
+
*,
|
|
7871
|
+
expected_class: str = "",
|
|
7872
|
+
extra_required_fields: Sequence[str] = (),
|
|
7873
|
+
) -> dict[str, Any]:
|
|
7874
|
+
artifact_issues: list[str] = []
|
|
7875
|
+
missing_fields: list[str] = []
|
|
7876
|
+
|
|
7877
|
+
for field in sorted(str(key) for key in payload.keys() if str(key) not in KERNEL_ALLOWED_FIELDS):
|
|
7878
|
+
artifact_issues.append(f"unexpected field: `{field}`.")
|
|
7879
|
+
|
|
7880
|
+
schema_version = payload.get("schema_version")
|
|
7881
|
+
artifact_issues.extend(
|
|
7882
|
+
[f"field `schema_version` {issue}" for issue in _kernel_field_shape_issues("schema_version", schema_version)]
|
|
7883
|
+
)
|
|
7884
|
+
|
|
7885
|
+
actual_class = str(payload.get("artifact_class", "")).strip()
|
|
7886
|
+
artifact_issues.extend(
|
|
7887
|
+
[f"field `artifact_class` {issue}" for issue in _kernel_field_shape_issues("artifact_class", payload.get("artifact_class"))]
|
|
7888
|
+
)
|
|
7889
|
+
if actual_class not in KERNEL_ARTIFACT_CLASS_REQUIREMENTS:
|
|
7890
|
+
artifact_issues.append(f"unsupported artifact_class: {actual_class or '(missing)'}.")
|
|
7891
|
+
|
|
7892
|
+
if expected_class and actual_class and expected_class != actual_class:
|
|
7893
|
+
artifact_issues.append(
|
|
7894
|
+
f"artifact_class mismatch: expected `{expected_class}`, found `{actual_class}`."
|
|
7895
|
+
)
|
|
7896
|
+
|
|
7897
|
+
field_class = actual_class or expected_class
|
|
7898
|
+
required_fields = list(KERNEL_ARTIFACT_CLASS_REQUIREMENTS.get(field_class, []))
|
|
7899
|
+
for field in _unique_strings([str(x).strip() for x in extra_required_fields if str(x).strip()]):
|
|
7900
|
+
if field not in required_fields:
|
|
7901
|
+
required_fields.append(field)
|
|
7902
|
+
for field, value in payload.items():
|
|
7903
|
+
if not isinstance(field, str) or field not in KERNEL_ALLOWED_FIELDS:
|
|
7904
|
+
continue
|
|
7905
|
+
for issue in _kernel_field_shape_issues(field, value):
|
|
7906
|
+
artifact_issues.append(f"field `{field}` {issue}")
|
|
7907
|
+
for field in required_fields:
|
|
7908
|
+
if not _kernel_field_present(field, payload.get(field)):
|
|
7909
|
+
missing_fields.append(field)
|
|
7910
|
+
if missing_fields:
|
|
7911
|
+
artifact_issues.append("missing required fields: " + ", ".join(missing_fields))
|
|
7912
|
+
|
|
7913
|
+
return {
|
|
7914
|
+
"artifact_class": actual_class,
|
|
7915
|
+
"expected_artifact_class": expected_class,
|
|
7916
|
+
"valid": not artifact_issues,
|
|
7917
|
+
"missing_fields": missing_fields,
|
|
7918
|
+
"issues": artifact_issues,
|
|
7919
|
+
}
|
|
7920
|
+
|
|
7921
|
+
|
|
7922
|
+
def _kernel_canonical_payload(
|
|
7923
|
+
payload: dict[str, Any],
|
|
7924
|
+
*,
|
|
7925
|
+
drop_unknown_fields: bool,
|
|
7926
|
+
) -> tuple[dict[str, Any], list[str]]:
|
|
7927
|
+
unknown_fields = sorted(str(key) for key in payload.keys() if str(key) not in KERNEL_ALLOWED_FIELDS)
|
|
7928
|
+
if unknown_fields and not drop_unknown_fields:
|
|
7929
|
+
raise RuntimeError(
|
|
7930
|
+
"kernel artifact has unknown fields: " + ", ".join(unknown_fields) + ". Re-run with --drop-unknown-fields to discard them."
|
|
7931
|
+
)
|
|
7932
|
+
|
|
7933
|
+
artifact_class = str(payload.get("artifact_class", "")).strip()
|
|
7934
|
+
if artifact_class not in KERNEL_ARTIFACT_CLASS_REQUIREMENTS:
|
|
7935
|
+
raise RuntimeError(f"unsupported artifact_class: {artifact_class or '(missing)'}")
|
|
7936
|
+
|
|
7937
|
+
known_payload = {
|
|
7938
|
+
str(key): value
|
|
7939
|
+
for key, value in payload.items()
|
|
7940
|
+
if str(key) in KERNEL_ALLOWED_FIELDS
|
|
7941
|
+
}
|
|
7942
|
+
known_payload["schema_version"] = KERNEL_SCHEMA_VERSION
|
|
7943
|
+
known_payload["artifact_class"] = artifact_class
|
|
7944
|
+
|
|
7945
|
+
ordered_fields = _kernel_ordered_fields_for_class(artifact_class, present_fields=list(known_payload.keys()))
|
|
7946
|
+
canonical: dict[str, Any] = {}
|
|
7947
|
+
for field in ordered_fields:
|
|
7948
|
+
if field in known_payload:
|
|
7949
|
+
canonical[field] = known_payload[field]
|
|
7950
|
+
return canonical, unknown_fields
|
|
7951
|
+
|
|
7952
|
+
|
|
7953
|
+
def _kernel_proposal_template(
|
|
7954
|
+
*,
|
|
7955
|
+
proposal_kind: str,
|
|
7956
|
+
title: str,
|
|
7957
|
+
target_artifact_classes: Sequence[str],
|
|
7958
|
+
target_fields: Sequence[str],
|
|
7959
|
+
) -> dict[str, Any]:
|
|
7960
|
+
clean_classes = _unique_strings([str(x).strip() for x in target_artifact_classes if str(x).strip()])
|
|
7961
|
+
clean_fields = _unique_strings([str(x).strip() for x in target_fields if str(x).strip()])
|
|
7962
|
+
return {
|
|
7963
|
+
"schema_version": KERNEL_SCHEMA_VERSION,
|
|
7964
|
+
"proposal_kind": proposal_kind,
|
|
7965
|
+
"title": title,
|
|
7966
|
+
"status": "draft",
|
|
7967
|
+
"summary": "describe the kernel evolution being proposed",
|
|
7968
|
+
"target_scope": {
|
|
7969
|
+
"artifact_classes": clean_classes,
|
|
7970
|
+
"fields": clean_fields,
|
|
7971
|
+
},
|
|
7972
|
+
"proposed_change": [
|
|
7973
|
+
"describe the exact structural change",
|
|
7974
|
+
],
|
|
7975
|
+
"rationale": [
|
|
7976
|
+
"describe why the current kernel is insufficient",
|
|
7977
|
+
],
|
|
7978
|
+
"evidence_refs": [
|
|
7979
|
+
"docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md",
|
|
7980
|
+
],
|
|
7981
|
+
"compatibility_notes": [
|
|
7982
|
+
"describe backward-compatibility expectations",
|
|
7983
|
+
],
|
|
7984
|
+
"migration_plan": [
|
|
7985
|
+
"describe how existing artifacts will be preserved or migrated",
|
|
7986
|
+
],
|
|
7987
|
+
"evaluation_plan": [
|
|
7988
|
+
"describe what new evidence should justify promotion into the core kernel",
|
|
7989
|
+
],
|
|
7990
|
+
}
|
|
7991
|
+
|
|
7992
|
+
|
|
7993
|
+
def _kernel_observation_stats_from_run(run: dict[str, Any]) -> dict[str, Any]:
|
|
7994
|
+
results = run.get("results", [])
|
|
7995
|
+
if not isinstance(results, list):
|
|
7996
|
+
results = []
|
|
7997
|
+
kernel_rows = [
|
|
7998
|
+
row.get("kernel_validation")
|
|
7999
|
+
for row in results
|
|
8000
|
+
if isinstance(row, dict) and isinstance(row.get("kernel_validation"), dict)
|
|
8001
|
+
]
|
|
8002
|
+
return {
|
|
8003
|
+
"run_id": str(run.get("run_id", "")).strip(),
|
|
8004
|
+
"kernel_validations": kernel_rows,
|
|
8005
|
+
}
|
|
8006
|
+
|
|
8007
|
+
|
|
7280
8008
|
def _kernel_validation_mode(gate: dict[str, Any]) -> str:
|
|
7281
8009
|
kernel_cfg = gate.get("kernel") if isinstance(gate.get("kernel"), dict) else {}
|
|
7282
8010
|
default_mode = "hard" if str(gate.get("phase", "")).strip() == "structure_kernel" else "soft"
|
|
@@ -7371,31 +8099,14 @@ def _validate_kernel_gate(
|
|
|
7371
8099
|
|
|
7372
8100
|
actual_class = ""
|
|
7373
8101
|
if payload:
|
|
7374
|
-
|
|
7375
|
-
|
|
7376
|
-
|
|
7377
|
-
|
|
7378
|
-
|
|
7379
|
-
|
|
7380
|
-
|
|
7381
|
-
|
|
7382
|
-
)
|
|
7383
|
-
|
|
7384
|
-
if expected_class and actual_class and expected_class != actual_class:
|
|
7385
|
-
artifact_issues.append(
|
|
7386
|
-
f"artifact_class mismatch: expected `{expected_class}`, found `{actual_class}`."
|
|
7387
|
-
)
|
|
7388
|
-
|
|
7389
|
-
field_class = actual_class or expected_class
|
|
7390
|
-
required_fields = list(KERNEL_ARTIFACT_CLASS_REQUIREMENTS.get(field_class, []))
|
|
7391
|
-
for field in extra_required_fields:
|
|
7392
|
-
if field not in required_fields:
|
|
7393
|
-
required_fields.append(field)
|
|
7394
|
-
for field in required_fields:
|
|
7395
|
-
if not _kernel_field_present(payload.get(field)):
|
|
7396
|
-
missing_fields.append(field)
|
|
7397
|
-
if missing_fields:
|
|
7398
|
-
artifact_issues.append("missing required fields: " + ", ".join(missing_fields))
|
|
8102
|
+
validation = _validate_kernel_payload(
|
|
8103
|
+
payload,
|
|
8104
|
+
expected_class=expected_class,
|
|
8105
|
+
extra_required_fields=extra_required_fields,
|
|
8106
|
+
)
|
|
8107
|
+
actual_class = str(validation.get("artifact_class", "")).strip()
|
|
8108
|
+
missing_fields = list(validation.get("missing_fields", []))
|
|
8109
|
+
artifact_issues.extend([str(issue) for issue in validation.get("issues", []) if isinstance(issue, str)])
|
|
7399
8110
|
|
|
7400
8111
|
valid = optional_skipped or (exists and not artifact_issues)
|
|
7401
8112
|
path_state = _path_for_state(path, repo_root)
|
|
@@ -7576,6 +8287,273 @@ def cmd_kernel_scaffold(args: argparse.Namespace) -> int:
|
|
|
7576
8287
|
return 0
|
|
7577
8288
|
|
|
7578
8289
|
|
|
8290
|
+
def _resolve_kernel_run_json_paths(
|
|
8291
|
+
*,
|
|
8292
|
+
repo_root: Path,
|
|
8293
|
+
run_ids: Sequence[str],
|
|
8294
|
+
run_jsons: Sequence[str],
|
|
8295
|
+
) -> list[Path]:
|
|
8296
|
+
resolved: list[Path] = []
|
|
8297
|
+
if run_jsons:
|
|
8298
|
+
for raw in run_jsons:
|
|
8299
|
+
if not str(raw).strip():
|
|
8300
|
+
continue
|
|
8301
|
+
_, path = _resolve_run_json_path(repo_root=repo_root, run_id_arg="", run_json_arg=str(raw))
|
|
8302
|
+
resolved.append(path)
|
|
8303
|
+
return resolved
|
|
8304
|
+
if run_ids:
|
|
8305
|
+
for raw in run_ids:
|
|
8306
|
+
if not str(raw).strip():
|
|
8307
|
+
continue
|
|
8308
|
+
_, path = _resolve_run_json_path(repo_root=repo_root, run_id_arg=str(raw), run_json_arg="")
|
|
8309
|
+
resolved.append(path)
|
|
8310
|
+
return resolved
|
|
8311
|
+
|
|
8312
|
+
seen: set[Path] = set()
|
|
8313
|
+
state_path = repo_root / "orp" / "state.json"
|
|
8314
|
+
if state_path.exists():
|
|
8315
|
+
try:
|
|
8316
|
+
state = _read_json(state_path)
|
|
8317
|
+
except Exception:
|
|
8318
|
+
state = {}
|
|
8319
|
+
runs = state.get("runs")
|
|
8320
|
+
if isinstance(runs, dict):
|
|
8321
|
+
for value in runs.values():
|
|
8322
|
+
if not isinstance(value, str) or not value.strip():
|
|
8323
|
+
continue
|
|
8324
|
+
candidate = (repo_root / value).resolve()
|
|
8325
|
+
if candidate.exists() and candidate not in seen:
|
|
8326
|
+
seen.add(candidate)
|
|
8327
|
+
resolved.append(candidate)
|
|
8328
|
+
artifacts_root = repo_root / "orp" / "artifacts"
|
|
8329
|
+
if artifacts_root.exists():
|
|
8330
|
+
for candidate in sorted(artifacts_root.glob("*/RUN.json")):
|
|
8331
|
+
candidate = candidate.resolve()
|
|
8332
|
+
if candidate not in seen:
|
|
8333
|
+
seen.add(candidate)
|
|
8334
|
+
resolved.append(candidate)
|
|
8335
|
+
return resolved
|
|
8336
|
+
|
|
8337
|
+
|
|
8338
|
+
def _kernel_stats_payload(
|
|
8339
|
+
repo_root: Path,
|
|
8340
|
+
run_json_paths: Sequence[Path],
|
|
8341
|
+
) -> dict[str, Any]:
|
|
8342
|
+
runs_scanned = 0
|
|
8343
|
+
runs_with_kernel_validation = 0
|
|
8344
|
+
gate_rows_total = 0
|
|
8345
|
+
artifacts_total = 0
|
|
8346
|
+
artifacts_valid = 0
|
|
8347
|
+
artifacts_invalid = 0
|
|
8348
|
+
mode_counts: dict[str, int] = {}
|
|
8349
|
+
artifact_class_counts: dict[str, int] = {}
|
|
8350
|
+
missing_field_counts: dict[str, int] = {}
|
|
8351
|
+
issue_counts: dict[str, int] = {}
|
|
8352
|
+
path_counts: dict[str, int] = {}
|
|
8353
|
+
per_run: list[dict[str, Any]] = []
|
|
8354
|
+
|
|
8355
|
+
for run_json in run_json_paths:
|
|
8356
|
+
run = _read_json(run_json)
|
|
8357
|
+
stats = _kernel_observation_stats_from_run(run)
|
|
8358
|
+
kernel_rows = stats["kernel_validations"]
|
|
8359
|
+
runs_scanned += 1
|
|
8360
|
+
if kernel_rows:
|
|
8361
|
+
runs_with_kernel_validation += 1
|
|
8362
|
+
per_run.append(
|
|
8363
|
+
{
|
|
8364
|
+
"run_id": stats["run_id"] or run_json.parent.name,
|
|
8365
|
+
"run_json": _path_for_state(run_json, repo_root),
|
|
8366
|
+
"kernel_validations": len(kernel_rows),
|
|
8367
|
+
}
|
|
8368
|
+
)
|
|
8369
|
+
for row in kernel_rows:
|
|
8370
|
+
if not isinstance(row, dict):
|
|
8371
|
+
continue
|
|
8372
|
+
gate_rows_total += 1
|
|
8373
|
+
mode = str(row.get("mode", "")).strip() or "unknown"
|
|
8374
|
+
mode_counts[mode] = mode_counts.get(mode, 0) + 1
|
|
8375
|
+
for artifact in row.get("artifacts", []) if isinstance(row.get("artifacts"), list) else []:
|
|
8376
|
+
if not isinstance(artifact, dict):
|
|
8377
|
+
continue
|
|
8378
|
+
artifacts_total += 1
|
|
8379
|
+
if artifact.get("valid"):
|
|
8380
|
+
artifacts_valid += 1
|
|
8381
|
+
else:
|
|
8382
|
+
artifacts_invalid += 1
|
|
8383
|
+
artifact_class = str(
|
|
8384
|
+
artifact.get("artifact_class") or artifact.get("expected_artifact_class") or "unknown"
|
|
8385
|
+
).strip() or "unknown"
|
|
8386
|
+
artifact_class_counts[artifact_class] = artifact_class_counts.get(artifact_class, 0) + 1
|
|
8387
|
+
artifact_path = str(artifact.get("path", "")).strip()
|
|
8388
|
+
if artifact_path:
|
|
8389
|
+
path_counts[artifact_path] = path_counts.get(artifact_path, 0) + 1
|
|
8390
|
+
for field in artifact.get("missing_fields", []) if isinstance(artifact.get("missing_fields"), list) else []:
|
|
8391
|
+
key = str(field).strip()
|
|
8392
|
+
if key:
|
|
8393
|
+
missing_field_counts[key] = missing_field_counts.get(key, 0) + 1
|
|
8394
|
+
for issue in artifact.get("issues", []) if isinstance(artifact.get("issues"), list) else []:
|
|
8395
|
+
key = str(issue).strip()
|
|
8396
|
+
if key:
|
|
8397
|
+
issue_counts[key] = issue_counts.get(key, 0) + 1
|
|
8398
|
+
|
|
8399
|
+
top_missing_fields = [
|
|
8400
|
+
{"field": key, "count": count}
|
|
8401
|
+
for key, count in sorted(missing_field_counts.items(), key=lambda item: (-item[1], item[0]))[:10]
|
|
8402
|
+
]
|
|
8403
|
+
top_issue_signals = [
|
|
8404
|
+
{"issue": key, "count": count}
|
|
8405
|
+
for key, count in sorted(issue_counts.items(), key=lambda item: (-item[1], item[0]))[:10]
|
|
8406
|
+
]
|
|
8407
|
+
top_paths = [
|
|
8408
|
+
{"path": key, "count": count}
|
|
8409
|
+
for key, count in sorted(path_counts.items(), key=lambda item: (-item[1], item[0]))[:10]
|
|
8410
|
+
]
|
|
8411
|
+
observations: list[str] = []
|
|
8412
|
+
if runs_scanned == 0:
|
|
8413
|
+
observations.append("No RUN.json artifacts were found. Run `orp gate run` with a structure_kernel gate to collect kernel observations.")
|
|
8414
|
+
elif runs_with_kernel_validation == 0:
|
|
8415
|
+
observations.append("RUN.json artifacts exist, but none recorded kernel_validation. Add a structure_kernel gate with a kernel.artifacts block.")
|
|
8416
|
+
else:
|
|
8417
|
+
if top_missing_fields:
|
|
8418
|
+
focus = ", ".join(f"{row['field']} ({row['count']})" for row in top_missing_fields[:5])
|
|
8419
|
+
observations.append(f"Most repeated missing fields: {focus}.")
|
|
8420
|
+
if artifacts_invalid == 0:
|
|
8421
|
+
observations.append("All observed kernel artifacts validated successfully across scanned runs.")
|
|
8422
|
+
else:
|
|
8423
|
+
observations.append(
|
|
8424
|
+
f"{artifacts_invalid} of {artifacts_total} observed kernel artifacts failed validation."
|
|
8425
|
+
)
|
|
8426
|
+
return {
|
|
8427
|
+
"ok": True,
|
|
8428
|
+
"repo_root": str(repo_root),
|
|
8429
|
+
"runs_scanned": runs_scanned,
|
|
8430
|
+
"runs_with_kernel_validation": runs_with_kernel_validation,
|
|
8431
|
+
"kernel_validation_rows": gate_rows_total,
|
|
8432
|
+
"artifacts_total": artifacts_total,
|
|
8433
|
+
"artifacts_valid": artifacts_valid,
|
|
8434
|
+
"artifacts_invalid": artifacts_invalid,
|
|
8435
|
+
"artifact_validation_rate": round((artifacts_valid / artifacts_total), 3) if artifacts_total else None,
|
|
8436
|
+
"mode_counts": mode_counts,
|
|
8437
|
+
"artifact_class_counts": artifact_class_counts,
|
|
8438
|
+
"top_missing_fields": top_missing_fields,
|
|
8439
|
+
"top_issue_signals": top_issue_signals,
|
|
8440
|
+
"top_paths": top_paths,
|
|
8441
|
+
"observations": observations,
|
|
8442
|
+
"runs": per_run,
|
|
8443
|
+
}
|
|
8444
|
+
|
|
8445
|
+
|
|
8446
|
+
def cmd_kernel_stats(args: argparse.Namespace) -> int:
|
|
8447
|
+
repo_root = Path(args.repo_root).resolve()
|
|
8448
|
+
run_json_paths = _resolve_kernel_run_json_paths(
|
|
8449
|
+
repo_root=repo_root,
|
|
8450
|
+
run_ids=list(getattr(args, "run_id", []) or []),
|
|
8451
|
+
run_jsons=list(getattr(args, "run_json", []) or []),
|
|
8452
|
+
)
|
|
8453
|
+
payload = _kernel_stats_payload(repo_root, run_json_paths)
|
|
8454
|
+
if args.json_output:
|
|
8455
|
+
_print_json(payload)
|
|
8456
|
+
else:
|
|
8457
|
+
print(f"runs_scanned={payload['runs_scanned']}")
|
|
8458
|
+
print(f"runs_with_kernel_validation={payload['runs_with_kernel_validation']}")
|
|
8459
|
+
print(f"artifacts_total={payload['artifacts_total']}")
|
|
8460
|
+
print(f"artifacts_valid={payload['artifacts_valid']}")
|
|
8461
|
+
print(f"artifacts_invalid={payload['artifacts_invalid']}")
|
|
8462
|
+
for row in payload.get("top_missing_fields", []):
|
|
8463
|
+
print(f"missing_field={row['field']} count={row['count']}")
|
|
8464
|
+
for note in payload.get("observations", []):
|
|
8465
|
+
print(f"note={note}")
|
|
8466
|
+
return 0
|
|
8467
|
+
|
|
8468
|
+
|
|
8469
|
+
def cmd_kernel_propose(args: argparse.Namespace) -> int:
|
|
8470
|
+
repo_root = Path(args.repo_root).resolve()
|
|
8471
|
+
title = str(args.title or "").strip()
|
|
8472
|
+
if not title:
|
|
8473
|
+
raise RuntimeError("proposal title is required.")
|
|
8474
|
+
slug = _slug_token(getattr(args, "slug", "") or title, fallback="kernel-proposal")
|
|
8475
|
+
out_raw = str(getattr(args, "out", "") or "").strip()
|
|
8476
|
+
if out_raw:
|
|
8477
|
+
out_path = _resolve_cli_path(out_raw, repo_root)
|
|
8478
|
+
else:
|
|
8479
|
+
out_path = repo_root / "analysis" / "kernel-proposals" / f"{slug}.yml"
|
|
8480
|
+
if out_path.exists() and not args.force:
|
|
8481
|
+
raise RuntimeError(
|
|
8482
|
+
f"kernel proposal already exists: {_path_for_state(out_path, repo_root)}. Use --force to overwrite."
|
|
8483
|
+
)
|
|
8484
|
+
payload = _kernel_proposal_template(
|
|
8485
|
+
proposal_kind=str(args.kind).strip(),
|
|
8486
|
+
title=title,
|
|
8487
|
+
target_artifact_classes=list(getattr(args, "artifact_class", []) or []),
|
|
8488
|
+
target_fields=list(getattr(args, "field", []) or []),
|
|
8489
|
+
)
|
|
8490
|
+
emitted_format = _write_structured_payload(out_path, payload, format_hint=args.format)
|
|
8491
|
+
result = {
|
|
8492
|
+
"ok": True,
|
|
8493
|
+
"path": _path_for_state(out_path, repo_root),
|
|
8494
|
+
"format": emitted_format,
|
|
8495
|
+
"proposal_kind": payload["proposal_kind"],
|
|
8496
|
+
"title": payload["title"],
|
|
8497
|
+
}
|
|
8498
|
+
if args.json_output:
|
|
8499
|
+
_print_json(result)
|
|
8500
|
+
else:
|
|
8501
|
+
print(f"path={result['path']}")
|
|
8502
|
+
print(f"proposal_kind={result['proposal_kind']}")
|
|
8503
|
+
print(f"title={result['title']}")
|
|
8504
|
+
print(f"format={result['format']}")
|
|
8505
|
+
return 0
|
|
8506
|
+
|
|
8507
|
+
|
|
8508
|
+
def cmd_kernel_migrate(args: argparse.Namespace) -> int:
|
|
8509
|
+
repo_root = Path(args.repo_root).resolve()
|
|
8510
|
+
artifact_path = _resolve_cli_path(args.artifact, repo_root)
|
|
8511
|
+
if not artifact_path.exists():
|
|
8512
|
+
raise RuntimeError(f"kernel artifact not found: {_path_for_state(artifact_path, repo_root)}")
|
|
8513
|
+
loaded_payload = _load_config(artifact_path)
|
|
8514
|
+
if not isinstance(loaded_payload, dict):
|
|
8515
|
+
raise RuntimeError("kernel artifact root must be an object.")
|
|
8516
|
+
out_raw = str(getattr(args, "out", "") or "").strip()
|
|
8517
|
+
out_path = _resolve_cli_path(out_raw, repo_root) if out_raw else artifact_path
|
|
8518
|
+
if out_path.exists() and out_path != artifact_path and not args.force:
|
|
8519
|
+
raise RuntimeError(
|
|
8520
|
+
f"output path already exists: {_path_for_state(out_path, repo_root)}. Use --force to overwrite."
|
|
8521
|
+
)
|
|
8522
|
+
|
|
8523
|
+
original_schema_version = str(loaded_payload.get("schema_version", "") or "").strip()
|
|
8524
|
+
canonical_payload, dropped_unknown_fields = _kernel_canonical_payload(
|
|
8525
|
+
loaded_payload,
|
|
8526
|
+
drop_unknown_fields=bool(getattr(args, "drop_unknown_fields", False)),
|
|
8527
|
+
)
|
|
8528
|
+
emitted_format = _write_structured_payload(out_path, canonical_payload, format_hint=args.format)
|
|
8529
|
+
validation = _validate_kernel_payload(canonical_payload, expected_class=str(canonical_payload.get("artifact_class", "")).strip())
|
|
8530
|
+
result = {
|
|
8531
|
+
"ok": True,
|
|
8532
|
+
"artifact": _path_for_state(artifact_path, repo_root),
|
|
8533
|
+
"path": _path_for_state(out_path, repo_root),
|
|
8534
|
+
"format": emitted_format,
|
|
8535
|
+
"schema_version_before": original_schema_version or "(missing)",
|
|
8536
|
+
"schema_version_after": str(canonical_payload.get("schema_version", "")),
|
|
8537
|
+
"schema_version_updated": (original_schema_version or "") != str(canonical_payload.get("schema_version", "")),
|
|
8538
|
+
"artifact_class": str(canonical_payload.get("artifact_class", "")),
|
|
8539
|
+
"dropped_unknown_fields": dropped_unknown_fields,
|
|
8540
|
+
"validation": validation,
|
|
8541
|
+
}
|
|
8542
|
+
if args.json_output:
|
|
8543
|
+
_print_json(result)
|
|
8544
|
+
else:
|
|
8545
|
+
print(f"path={result['path']}")
|
|
8546
|
+
print(f"artifact_class={result['artifact_class']}")
|
|
8547
|
+
print(f"schema_version_before={result['schema_version_before']}")
|
|
8548
|
+
print(f"schema_version_after={result['schema_version_after']}")
|
|
8549
|
+
if dropped_unknown_fields:
|
|
8550
|
+
print("dropped_unknown_fields=" + ",".join(dropped_unknown_fields))
|
|
8551
|
+
print(f"valid={'true' if validation.get('valid') else 'false'}")
|
|
8552
|
+
for issue in validation.get("issues", []):
|
|
8553
|
+
print(f"issue={issue}")
|
|
8554
|
+
return 0
|
|
8555
|
+
|
|
8556
|
+
|
|
7579
8557
|
def cmd_gate_run(args: argparse.Namespace) -> int:
|
|
7580
8558
|
repo_root = Path(args.repo_root).resolve()
|
|
7581
8559
|
_ensure_dirs(repo_root)
|
|
@@ -11732,6 +12710,43 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
11732
12710
|
add_json_flag(s_world_bind)
|
|
11733
12711
|
s_world_bind.set_defaults(func=cmd_world_bind, json_output=False)
|
|
11734
12712
|
|
|
12713
|
+
s_youtube = sub.add_parser("youtube", help="Public YouTube metadata and transcript inspection")
|
|
12714
|
+
youtube_sub = s_youtube.add_subparsers(dest="youtube_cmd", required=True)
|
|
12715
|
+
|
|
12716
|
+
s_youtube_inspect = youtube_sub.add_parser(
|
|
12717
|
+
"inspect",
|
|
12718
|
+
help="Inspect a YouTube video and fetch public metadata plus transcript text when captions are available",
|
|
12719
|
+
)
|
|
12720
|
+
s_youtube_inspect.add_argument("url", help="YouTube watch/share URL or 11-character video id")
|
|
12721
|
+
s_youtube_inspect.add_argument(
|
|
12722
|
+
"--lang",
|
|
12723
|
+
default="",
|
|
12724
|
+
help="Preferred caption language code, for example en or es",
|
|
12725
|
+
)
|
|
12726
|
+
s_youtube_inspect.add_argument(
|
|
12727
|
+
"--save",
|
|
12728
|
+
action="store_true",
|
|
12729
|
+
help="Save the inspected source artifact under orp/external/youtube/<video_id>.json",
|
|
12730
|
+
)
|
|
12731
|
+
s_youtube_inspect.add_argument(
|
|
12732
|
+
"--out",
|
|
12733
|
+
default="",
|
|
12734
|
+
help="Optional output path for the source artifact (.json, .yml, or .yaml)",
|
|
12735
|
+
)
|
|
12736
|
+
s_youtube_inspect.add_argument(
|
|
12737
|
+
"--format",
|
|
12738
|
+
default="",
|
|
12739
|
+
choices=["", "json", "yaml"],
|
|
12740
|
+
help="Optional explicit output format when saving",
|
|
12741
|
+
)
|
|
12742
|
+
s_youtube_inspect.add_argument(
|
|
12743
|
+
"--force",
|
|
12744
|
+
action="store_true",
|
|
12745
|
+
help="Overwrite an existing saved artifact",
|
|
12746
|
+
)
|
|
12747
|
+
add_json_flag(s_youtube_inspect)
|
|
12748
|
+
s_youtube_inspect.set_defaults(func=cmd_youtube_inspect, json_output=False)
|
|
12749
|
+
|
|
11735
12750
|
s_secrets = sub.add_parser("secrets", help="Hosted secret store and project binding operations")
|
|
11736
12751
|
secrets_sub = s_secrets.add_subparsers(dest="secrets_cmd", required=True)
|
|
11737
12752
|
|
|
@@ -12658,6 +13673,106 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
12658
13673
|
add_json_flag(s_kernel_scaffold)
|
|
12659
13674
|
s_kernel_scaffold.set_defaults(func=cmd_kernel_scaffold, json_output=False)
|
|
12660
13675
|
|
|
13676
|
+
s_kernel_stats = kernel_sub.add_parser(
|
|
13677
|
+
"stats",
|
|
13678
|
+
help="Summarize observed kernel validation pressure from RUN.json artifacts",
|
|
13679
|
+
)
|
|
13680
|
+
s_kernel_stats.add_argument(
|
|
13681
|
+
"--run-id",
|
|
13682
|
+
action="append",
|
|
13683
|
+
default=[],
|
|
13684
|
+
help="Specific run id to include (repeatable). Defaults to all discovered runs.",
|
|
13685
|
+
)
|
|
13686
|
+
s_kernel_stats.add_argument(
|
|
13687
|
+
"--run-json",
|
|
13688
|
+
action="append",
|
|
13689
|
+
default=[],
|
|
13690
|
+
help="Explicit RUN.json path to include (repeatable). Defaults to all discovered runs.",
|
|
13691
|
+
)
|
|
13692
|
+
add_json_flag(s_kernel_stats)
|
|
13693
|
+
s_kernel_stats.set_defaults(func=cmd_kernel_stats, json_output=False)
|
|
13694
|
+
|
|
13695
|
+
s_kernel_propose = kernel_sub.add_parser(
|
|
13696
|
+
"propose",
|
|
13697
|
+
help="Scaffold a governed kernel-evolution proposal artifact",
|
|
13698
|
+
)
|
|
13699
|
+
s_kernel_propose.add_argument(
|
|
13700
|
+
"--kind",
|
|
13701
|
+
required=True,
|
|
13702
|
+
choices=["add_field", "new_class", "requirement_change", "deprecate_field"],
|
|
13703
|
+
help="Type of kernel evolution proposal",
|
|
13704
|
+
)
|
|
13705
|
+
s_kernel_propose.add_argument(
|
|
13706
|
+
"--title",
|
|
13707
|
+
required=True,
|
|
13708
|
+
help="Proposal title",
|
|
13709
|
+
)
|
|
13710
|
+
s_kernel_propose.add_argument(
|
|
13711
|
+
"--artifact-class",
|
|
13712
|
+
action="append",
|
|
13713
|
+
default=[],
|
|
13714
|
+
choices=sorted(KERNEL_ARTIFACT_CLASS_REQUIREMENTS.keys()),
|
|
13715
|
+
help="Affected kernel artifact class (repeatable)",
|
|
13716
|
+
)
|
|
13717
|
+
s_kernel_propose.add_argument(
|
|
13718
|
+
"--field",
|
|
13719
|
+
action="append",
|
|
13720
|
+
default=[],
|
|
13721
|
+
help="Affected kernel field name (repeatable)",
|
|
13722
|
+
)
|
|
13723
|
+
s_kernel_propose.add_argument(
|
|
13724
|
+
"--slug",
|
|
13725
|
+
default="",
|
|
13726
|
+
help="Optional output slug override",
|
|
13727
|
+
)
|
|
13728
|
+
s_kernel_propose.add_argument(
|
|
13729
|
+
"--out",
|
|
13730
|
+
default="",
|
|
13731
|
+
help="Optional output path (default: analysis/kernel-proposals/<slug>.yml)",
|
|
13732
|
+
)
|
|
13733
|
+
s_kernel_propose.add_argument(
|
|
13734
|
+
"--format",
|
|
13735
|
+
default="",
|
|
13736
|
+
choices=["", "yaml", "json"],
|
|
13737
|
+
help="Optional explicit output format",
|
|
13738
|
+
)
|
|
13739
|
+
s_kernel_propose.add_argument(
|
|
13740
|
+
"--force",
|
|
13741
|
+
action="store_true",
|
|
13742
|
+
help="Overwrite an existing proposal at the output path",
|
|
13743
|
+
)
|
|
13744
|
+
add_json_flag(s_kernel_propose)
|
|
13745
|
+
s_kernel_propose.set_defaults(func=cmd_kernel_propose, json_output=False)
|
|
13746
|
+
|
|
13747
|
+
s_kernel_migrate = kernel_sub.add_parser(
|
|
13748
|
+
"migrate",
|
|
13749
|
+
help="Rewrite a kernel artifact into the current canonical field order and schema version",
|
|
13750
|
+
)
|
|
13751
|
+
s_kernel_migrate.add_argument("artifact", help="Kernel artifact path (.yml, .yaml, or .json)")
|
|
13752
|
+
s_kernel_migrate.add_argument(
|
|
13753
|
+
"--out",
|
|
13754
|
+
default="",
|
|
13755
|
+
help="Optional output path (default: rewrite in place)",
|
|
13756
|
+
)
|
|
13757
|
+
s_kernel_migrate.add_argument(
|
|
13758
|
+
"--format",
|
|
13759
|
+
default="",
|
|
13760
|
+
choices=["", "yaml", "json"],
|
|
13761
|
+
help="Optional explicit output format",
|
|
13762
|
+
)
|
|
13763
|
+
s_kernel_migrate.add_argument(
|
|
13764
|
+
"--drop-unknown-fields",
|
|
13765
|
+
action="store_true",
|
|
13766
|
+
help="Drop unknown fields instead of failing migration",
|
|
13767
|
+
)
|
|
13768
|
+
s_kernel_migrate.add_argument(
|
|
13769
|
+
"--force",
|
|
13770
|
+
action="store_true",
|
|
13771
|
+
help="Allow overwriting an existing --out path",
|
|
13772
|
+
)
|
|
13773
|
+
add_json_flag(s_kernel_migrate)
|
|
13774
|
+
s_kernel_migrate.set_defaults(func=cmd_kernel_migrate, json_output=False)
|
|
13775
|
+
|
|
12661
13776
|
s_gate = sub.add_parser("gate", help="Gate operations")
|
|
12662
13777
|
gate_sub = s_gate.add_subparsers(dest="gate_cmd", required=True)
|
|
12663
13778
|
s_run = gate_sub.add_parser("run", help="Run configured gates for a profile")
|