open-research-protocol 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +9 -0
  2. package/cli/orp.py +668 -43
  3. package/docs/ORP_REASONING_KERNEL_AGENT_PILOT.md +125 -0
  4. package/docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md +97 -0
  5. package/docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md +100 -0
  6. package/docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md +116 -0
  7. package/docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md +86 -0
  8. package/docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md +261 -0
  9. package/docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md +131 -0
  10. package/docs/ORP_REASONING_KERNEL_EVOLUTION.md +123 -0
  11. package/docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md +107 -0
  12. package/docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md +140 -22
  13. package/docs/ORP_REASONING_KERNEL_V0_1.md +11 -0
  14. package/docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json +796 -0
  15. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_task_smoke.json +487 -0
  16. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_1.json +1927 -0
  17. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json +10217 -0
  18. package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_task_smoke.json +174 -0
  19. package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json +598 -0
  20. package/docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json +688 -0
  21. package/docs/benchmarks/orp_reasoning_kernel_continuation_task_smoke.json +150 -0
  22. package/docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json +448 -0
  23. package/docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json +594 -0
  24. package/docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json +769 -41
  25. package/examples/README.md +2 -0
  26. package/examples/kernel/comparison/comparison-corpus.json +337 -0
  27. package/examples/kernel/comparison/next-task-continuation.json +55 -0
  28. package/examples/kernel/corpus/operations/habanero-routing.checkpoint.kernel.yml +12 -0
  29. package/examples/kernel/corpus/operations/runner-routing.policy.kernel.yml +9 -0
  30. package/examples/kernel/corpus/product/project-home.decision.kernel.yml +11 -0
  31. package/examples/kernel/corpus/research/kernel-handoff.experiment.kernel.yml +16 -0
  32. package/examples/kernel/corpus/research/lane-drift.hypothesis.kernel.yml +11 -0
  33. package/examples/kernel/corpus/software/trace-widget.task.kernel.yml +13 -0
  34. package/examples/kernel/corpus/writing/kernel-launch.result.kernel.yml +12 -0
  35. package/package.json +4 -1
  36. package/scripts/orp-kernel-agent-pilot.py +673 -0
  37. package/scripts/orp-kernel-agent-replication.py +307 -0
  38. package/scripts/orp-kernel-benchmark.py +471 -2
  39. package/scripts/orp-kernel-canonical-continuation.py +381 -0
  40. package/scripts/orp-kernel-ci-check.py +138 -0
  41. package/scripts/orp-kernel-comparison.py +592 -0
  42. package/scripts/orp-kernel-continuation-pilot.py +384 -0
  43. package/scripts/orp-kernel-pickup.py +401 -0
  44. package/spec/v1/kernel-extension.schema.json +96 -0
  45. package/spec/v1/kernel-proposal.schema.json +115 -0
  46. package/spec/v1/kernel.schema.json +2 -1
@@ -2,6 +2,7 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import argparse
5
+ import importlib.util
5
6
  import json
6
7
  from pathlib import Path
7
8
  import platform
@@ -15,6 +16,7 @@ from typing import Any
15
16
 
16
17
  REPO_ROOT = Path(__file__).resolve().parents[1]
17
18
  CLI = ["node", "bin/orp.js"]
19
+ CORPUS_ROOT = REPO_ROOT / "examples" / "kernel" / "corpus"
18
20
  ARTIFACT_CLASSES = [
19
21
  "task",
20
22
  "decision",
@@ -24,6 +26,75 @@ ARTIFACT_CLASSES = [
24
26
  "policy",
25
27
  "result",
26
28
  ]
29
+ VALID_REQUIREMENT_FIXTURES: dict[str, dict[str, Any]] = {
30
+ "task": {
31
+ "schema_version": "1.0.0",
32
+ "artifact_class": "task",
33
+ "object": "terminal trace widget",
34
+ "goal": "surface lane drift",
35
+ "boundary": "terminal-first lane visibility",
36
+ "constraints": ["low friction"],
37
+ "success_criteria": ["operator spots drift quickly"],
38
+ },
39
+ "decision": {
40
+ "schema_version": "1.0.0",
41
+ "artifact_class": "decision",
42
+ "question": "what should the home screen emphasize first?",
43
+ "chosen_path": "linked projects first",
44
+ "rejected_alternatives": ["idea board default"],
45
+ "rationale": "active work should be foregrounded",
46
+ "consequences": ["idea browsing becomes secondary navigation"],
47
+ },
48
+ "hypothesis": {
49
+ "schema_version": "1.0.0",
50
+ "artifact_class": "hypothesis",
51
+ "claim": "drift summaries reduce missed stalled lanes",
52
+ "boundary": "terminal-first multi-lane workflows",
53
+ "assumptions": ["operators consult summaries while working"],
54
+ "test_path": "compare stalled-lane detection with and without summaries",
55
+ "falsifiers": ["no measurable pickup improvement"],
56
+ },
57
+ "experiment": {
58
+ "schema_version": "1.0.0",
59
+ "artifact_class": "experiment",
60
+ "objective": "measure whether kernel tasks improve handoff pickup",
61
+ "method": "run matched handoff trials",
62
+ "inputs": ["task prompts", "reviewers"],
63
+ "outputs": ["pickup scores", "clarification counts"],
64
+ "evidence_expectations": ["ratings", "artifact corpus"],
65
+ "interpretation_limits": ["small internal sample"],
66
+ },
67
+ "checkpoint": {
68
+ "schema_version": "1.0.0",
69
+ "artifact_class": "checkpoint",
70
+ "completed_unit": "restored canonical runner routing",
71
+ "current_state": "linked project and primary session are synchronized",
72
+ "risks": ["inactive machines may still need a sync"],
73
+ "next_handoff_target": "rerun runner sync on active machines",
74
+ "artifact_refs": [".git/orp/link/project.json", "orp/HANDOFF.md"],
75
+ },
76
+ "policy": {
77
+ "schema_version": "1.0.0",
78
+ "artifact_class": "policy",
79
+ "scope": "hosted runner job pickup",
80
+ "rule": "route only to linked projects with routeable local sessions",
81
+ "rationale": "prevent unroutable job claims",
82
+ "invariants": ["claimed jobs must have a real local execution target"],
83
+ "enforcement_surface": "runner sync poll and work lifecycle",
84
+ },
85
+ "result": {
86
+ "schema_version": "1.0.0",
87
+ "artifact_class": "result",
88
+ "claim": "ORP ships a real reasoning kernel with enforceable promotion semantics",
89
+ "evidence_paths": [
90
+ "docs/ORP_REASONING_KERNEL_V0_1.md",
91
+ "docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md",
92
+ ],
93
+ "status": "shipped in ORP CLI",
94
+ "interpretation_limits": ["comparative superiority is not yet proven"],
95
+ "next_follow_up": "run comparative artifact and handoff studies",
96
+ },
97
+ }
27
98
 
28
99
 
29
100
  def _run(
@@ -69,6 +140,35 @@ def _stats(values: list[float]) -> dict[str, float]:
69
140
  }
70
141
 
71
142
 
143
+ def _load_cli_module() -> Any:
144
+ module_path = REPO_ROOT / "cli" / "orp.py"
145
+ spec = importlib.util.spec_from_file_location("orp_cli_kernel_benchmark", module_path)
146
+ if spec is None or spec.loader is None:
147
+ raise RuntimeError(f"failed to load CLI module from {module_path}")
148
+ module = importlib.util.module_from_spec(spec)
149
+ spec.loader.exec_module(module)
150
+ return module
151
+
152
+
153
+ def _load_kernel_schema_requirements() -> dict[str, list[str]]:
154
+ schema_path = REPO_ROOT / "spec" / "v1" / "kernel.schema.json"
155
+ payload = json.loads(schema_path.read_text(encoding="utf-8"))
156
+ out: dict[str, list[str]] = {}
157
+ for clause in payload.get("allOf", []):
158
+ if not isinstance(clause, dict):
159
+ continue
160
+ const = (
161
+ clause.get("if", {})
162
+ .get("properties", {})
163
+ .get("artifact_class", {})
164
+ .get("const")
165
+ )
166
+ required = clause.get("then", {}).get("required")
167
+ if isinstance(const, str) and isinstance(required, list):
168
+ out[const] = [str(x) for x in required if isinstance(x, str)]
169
+ return out
170
+
171
+
72
172
  def _benchmark_init_starter(iterations: int) -> dict[str, Any]:
73
173
  init_times: list[float] = []
74
174
  validate_times: list[float] = []
@@ -103,7 +203,7 @@ def _benchmark_init_starter(iterations: int) -> dict[str, Any]:
103
203
  targets = {
104
204
  "init_mean_lt_ms": 350.0,
105
205
  "validate_mean_lt_ms": 200.0,
106
- "gate_mean_lt_ms": 300.0,
206
+ "gate_mean_lt_ms": 325.0,
107
207
  }
108
208
  observed = {
109
209
  "init": _stats(init_times),
@@ -330,6 +430,307 @@ def _benchmark_gate_modes() -> dict[str, Any]:
330
430
  }
331
431
 
332
432
 
433
+ def _benchmark_schema_alignment() -> dict[str, Any]:
434
+ cli_module = _load_cli_module()
435
+ schema_requirements = _load_kernel_schema_requirements()
436
+ cli_requirements = dict(getattr(cli_module, "KERNEL_ARTIFACT_CLASS_REQUIREMENTS", {}))
437
+ schema_fields = set(json.loads((REPO_ROOT / "spec" / "v1" / "kernel.schema.json").read_text(encoding="utf-8")).get("properties", {}).keys())
438
+ cli_fields = set(getattr(cli_module, "KERNEL_ALLOWED_FIELDS", set()))
439
+ return {
440
+ "schema_requirements": schema_requirements,
441
+ "cli_requirements": cli_requirements,
442
+ "schema_fields_total": len(schema_fields),
443
+ "cli_fields_total": len(cli_fields),
444
+ "meets_expectations": {
445
+ "requirements_match": schema_requirements == cli_requirements,
446
+ "fields_match": schema_fields == cli_fields,
447
+ },
448
+ }
449
+
450
+
451
+ def _benchmark_cross_domain_corpus() -> dict[str, Any]:
452
+ if not CORPUS_ROOT.exists():
453
+ raise RuntimeError(f"kernel corpus root is missing: {CORPUS_ROOT}")
454
+
455
+ rows: list[dict[str, Any]] = []
456
+ validate_times: list[float] = []
457
+ domains: set[str] = set()
458
+ classes: set[str] = set()
459
+ files = sorted(
460
+ path for path in CORPUS_ROOT.rglob("*") if path.is_file() and path.suffix.lower() in {".yml", ".yaml", ".json"}
461
+ )
462
+ if not files:
463
+ raise RuntimeError(f"kernel corpus root has no fixtures: {CORPUS_ROOT}")
464
+
465
+ with tempfile.TemporaryDirectory(prefix="orp-kernel-bench-corpus.") as td:
466
+ root = Path(td)
467
+ for path in files:
468
+ rel = path.relative_to(CORPUS_ROOT)
469
+ domain = rel.parts[0] if len(rel.parts) > 1 else "unknown"
470
+ domains.add(domain)
471
+ target = root / "analysis" / rel.name
472
+ target.parent.mkdir(parents=True, exist_ok=True)
473
+ target.write_text(path.read_text(encoding="utf-8"), encoding="utf-8")
474
+ validate_ms, validate_proc = _timed_orp(root, "kernel", "validate", str(target.relative_to(root)), "--json")
475
+ validate_payload = json.loads(validate_proc.stdout)
476
+ if not validate_payload.get("ok"):
477
+ raise RuntimeError(f"corpus benchmark failed for fixture={rel}")
478
+ classes.add(validate_payload["artifact_result"]["artifact_class"])
479
+ validate_times.append(validate_ms)
480
+ rows.append(
481
+ {
482
+ "fixture": rel.as_posix(),
483
+ "domain": domain,
484
+ "artifact_class": validate_payload["artifact_result"]["artifact_class"],
485
+ "validate_ms": round(validate_ms, 3),
486
+ }
487
+ )
488
+
489
+ observed = {"validate": _stats(validate_times)}
490
+ targets = {
491
+ "domains_min": 5,
492
+ "fixtures_min": 7,
493
+ "validate_mean_lt_ms": 200.0,
494
+ }
495
+ return {
496
+ "fixtures_total": len(rows),
497
+ "domains_total": len(domains),
498
+ "artifact_classes_total": len(classes),
499
+ "rows": rows,
500
+ "observed": observed,
501
+ "targets": targets,
502
+ "meets_targets": {
503
+ "domains": len(domains) >= targets["domains_min"],
504
+ "fixtures": len(rows) >= targets["fixtures_min"],
505
+ "validate": observed["validate"]["mean_ms"] < targets["validate_mean_lt_ms"],
506
+ },
507
+ }
508
+
509
+
510
+ def _benchmark_requirement_enforcement() -> dict[str, Any]:
511
+ rows: list[dict[str, Any]] = []
512
+ validate_times: list[float] = []
513
+ total_missing_cases = 0
514
+ requirements = _load_kernel_schema_requirements()
515
+
516
+ with tempfile.TemporaryDirectory(prefix="orp-kernel-bench-requirements.") as td:
517
+ root = Path(td)
518
+ for artifact_class, payload in VALID_REQUIREMENT_FIXTURES.items():
519
+ for removed_field in requirements[artifact_class]:
520
+ invalid_payload = dict(payload)
521
+ invalid_payload.pop(removed_field, None)
522
+ target = root / "analysis" / f"{artifact_class}.{removed_field}.invalid.kernel.json"
523
+ _write_json(target, invalid_payload)
524
+ validate_ms, validate_proc = _timed_orp(
525
+ root,
526
+ "kernel",
527
+ "validate",
528
+ str(target.relative_to(root)),
529
+ "--artifact-class",
530
+ artifact_class,
531
+ "--json",
532
+ check=False,
533
+ )
534
+ validate_payload = json.loads(validate_proc.stdout)
535
+ validate_times.append(validate_ms)
536
+ artifact_result = validate_payload["artifact_result"]
537
+ total_missing_cases += 1 if removed_field in artifact_result.get("missing_fields", []) else 0
538
+ rows.append(
539
+ {
540
+ "artifact_class": artifact_class,
541
+ "removed_field": removed_field,
542
+ "exit_code": validate_proc.returncode,
543
+ "valid": artifact_result.get("valid", validate_payload.get("ok", False)),
544
+ "missing_fields": artifact_result.get("missing_fields", []),
545
+ "validate_ms": round(validate_ms, 3),
546
+ }
547
+ )
548
+
549
+ observed = {"validate": _stats(validate_times)}
550
+ targets = {
551
+ "all_cases_detected": sum(len(fields) for fields in requirements.values()),
552
+ "validate_mean_lt_ms": 200.0,
553
+ }
554
+ return {
555
+ "cases_total": len(rows),
556
+ "rows": rows,
557
+ "observed": observed,
558
+ "targets": targets,
559
+ "meets_targets": {
560
+ "all_cases_detected": total_missing_cases == targets["all_cases_detected"]
561
+ and all(row["exit_code"] == 1 for row in rows)
562
+ and all(row["valid"] is False for row in rows),
563
+ "validate": observed["validate"]["mean_ms"] < targets["validate_mean_lt_ms"],
564
+ },
565
+ }
566
+
567
+
568
+ def _benchmark_representation_invariance() -> dict[str, Any]:
569
+ yaml_body = (
570
+ 'schema_version: "1.0.0"\n'
571
+ "artifact_class: task\n"
572
+ "object: terminal trace widget\n"
573
+ "goal: surface lane drift\n"
574
+ "boundary:\n"
575
+ " - terminal-first lane visibility\n"
576
+ "constraints:\n"
577
+ " - low friction\n"
578
+ "success_criteria:\n"
579
+ " - operator spots drift quickly\n"
580
+ )
581
+ json_body = {
582
+ "schema_version": "1.0.0",
583
+ "artifact_class": "task",
584
+ "object": "terminal trace widget",
585
+ "goal": "surface lane drift",
586
+ "boundary": ["terminal-first lane visibility"],
587
+ "constraints": ["low friction"],
588
+ "success_criteria": ["operator spots drift quickly"],
589
+ }
590
+
591
+ with tempfile.TemporaryDirectory(prefix="orp-kernel-bench-invariance.") as td:
592
+ root = Path(td)
593
+ yaml_path = root / "analysis" / "task.kernel.yml"
594
+ json_path = root / "analysis" / "task.kernel.json"
595
+ yaml_path.parent.mkdir(parents=True, exist_ok=True)
596
+ yaml_path.write_text(yaml_body, encoding="utf-8")
597
+ _write_json(json_path, json_body)
598
+
599
+ yaml_ms, yaml_proc = _timed_orp(root, "kernel", "validate", str(yaml_path.relative_to(root)), "--json")
600
+ json_ms, json_proc = _timed_orp(root, "kernel", "validate", str(json_path.relative_to(root)), "--json")
601
+ yaml_payload = json.loads(yaml_proc.stdout)
602
+ json_payload = json.loads(json_proc.stdout)
603
+ yaml_result = yaml_payload["artifact_result"]
604
+ json_result = json_payload["artifact_result"]
605
+
606
+ comparable_yaml = {k: v for k, v in yaml_result.items() if k != "path"}
607
+ comparable_json = {k: v for k, v in json_result.items() if k != "path"}
608
+ return {
609
+ "yaml_ms": round(yaml_ms, 3),
610
+ "json_ms": round(json_ms, 3),
611
+ "yaml_result": yaml_result,
612
+ "json_result": json_result,
613
+ "meets_expectations": {
614
+ "both_valid": yaml_payload["ok"] and json_payload["ok"],
615
+ "equivalent_results": comparable_yaml == comparable_json,
616
+ },
617
+ }
618
+
619
+
620
+ def _benchmark_mutation_stress() -> dict[str, Any]:
621
+ cases = [
622
+ {
623
+ "id": "unexpected_field",
624
+ "artifact_class": "task",
625
+ "payload": {
626
+ **VALID_REQUIREMENT_FIXTURES["task"],
627
+ "mystery_field": "should not be allowed",
628
+ },
629
+ "expected_fragment": "unexpected field",
630
+ },
631
+ {
632
+ "id": "whitespace_only_text",
633
+ "artifact_class": "task",
634
+ "payload": {
635
+ **VALID_REQUIREMENT_FIXTURES["task"],
636
+ "object": " ",
637
+ },
638
+ "expected_fragment": "field `object` must be a non-empty string",
639
+ },
640
+ {
641
+ "id": "wrong_text_list_type",
642
+ "artifact_class": "task",
643
+ "payload": {
644
+ **VALID_REQUIREMENT_FIXTURES["task"],
645
+ "constraints": {"bad": True},
646
+ },
647
+ "expected_fragment": "field `constraints` must be a non-empty string or a non-empty list",
648
+ },
649
+ {
650
+ "id": "non_string_list_item",
651
+ "artifact_class": "result",
652
+ "payload": {
653
+ **VALID_REQUIREMENT_FIXTURES["result"],
654
+ "evidence_paths": ["docs/ORP_REASONING_KERNEL_V0_1.md", 42],
655
+ },
656
+ "expected_fragment": "field `evidence_paths` must be a non-empty list of non-empty strings",
657
+ },
658
+ {
659
+ "id": "unsupported_artifact_class",
660
+ "artifact_class": "task",
661
+ "payload": {
662
+ **VALID_REQUIREMENT_FIXTURES["task"],
663
+ "artifact_class": "memo",
664
+ },
665
+ "expected_fragment": "unsupported artifact_class",
666
+ },
667
+ {
668
+ "id": "wrong_schema_version",
669
+ "artifact_class": "task",
670
+ "payload": {
671
+ **VALID_REQUIREMENT_FIXTURES["task"],
672
+ "schema_version": "9.9.9",
673
+ },
674
+ "expected_fragment": "field `schema_version` must equal `1.0.0`",
675
+ },
676
+ {
677
+ "id": "empty_list",
678
+ "artifact_class": "task",
679
+ "payload": {
680
+ **VALID_REQUIREMENT_FIXTURES["task"],
681
+ "boundary": [],
682
+ },
683
+ "expected_fragment": "missing required fields: boundary",
684
+ },
685
+ ]
686
+ rows: list[dict[str, Any]] = []
687
+ validate_times: list[float] = []
688
+
689
+ with tempfile.TemporaryDirectory(prefix="orp-kernel-bench-mutations.") as td:
690
+ root = Path(td)
691
+ for case in cases:
692
+ target = root / "analysis" / f"{case['id']}.kernel.json"
693
+ _write_json(target, case["payload"])
694
+ validate_ms, validate_proc = _timed_orp(
695
+ root,
696
+ "kernel",
697
+ "validate",
698
+ str(target.relative_to(root)),
699
+ "--artifact-class",
700
+ case["artifact_class"],
701
+ "--json",
702
+ check=False,
703
+ )
704
+ validate_payload = json.loads(validate_proc.stdout)
705
+ issues = validate_payload["artifact_result"]["issues"]
706
+ validate_times.append(validate_ms)
707
+ rows.append(
708
+ {
709
+ "id": case["id"],
710
+ "exit_code": validate_proc.returncode,
711
+ "issues": issues,
712
+ "validate_ms": round(validate_ms, 3),
713
+ "matched_expected_issue": any(case["expected_fragment"] in issue for issue in issues),
714
+ }
715
+ )
716
+
717
+ observed = {"validate": _stats(validate_times)}
718
+ targets = {
719
+ "cases_total": len(cases),
720
+ "validate_mean_lt_ms": 200.0,
721
+ }
722
+ return {
723
+ "cases_total": len(rows),
724
+ "rows": rows,
725
+ "observed": observed,
726
+ "targets": targets,
727
+ "meets_targets": {
728
+ "all_cases_detected": all(row["exit_code"] == 1 and row["matched_expected_issue"] for row in rows),
729
+ "validate": observed["validate"]["mean_ms"] < targets["validate_mean_lt_ms"],
730
+ },
731
+ }
732
+
733
+
333
734
  def _gather_metadata() -> dict[str, Any]:
334
735
  package_version = json.loads((REPO_ROOT / "package.json").read_text(encoding="utf-8"))["version"]
335
736
  commit = _run(["git", "rev-parse", "HEAD"]).stdout.strip()
@@ -350,8 +751,23 @@ def build_report(iterations: int) -> dict[str, Any]:
350
751
  init_benchmark = _benchmark_init_starter(iterations)
351
752
  roundtrip_benchmark = _benchmark_artifact_roundtrip()
352
753
  gate_mode_benchmark = _benchmark_gate_modes()
754
+ schema_alignment = _benchmark_schema_alignment()
755
+ corpus_benchmark = _benchmark_cross_domain_corpus()
756
+ requirement_benchmark = _benchmark_requirement_enforcement()
757
+ representation_invariance = _benchmark_representation_invariance()
758
+ mutation_stress = _benchmark_mutation_stress()
353
759
 
354
760
  claims = [
761
+ {
762
+ "id": "schema_validator_alignment",
763
+ "claim": "The CLI kernel requirements and allowed fields stay aligned with the published kernel schema.",
764
+ "status": "pass" if all(schema_alignment["meets_expectations"].values()) else "fail",
765
+ "evidence": [
766
+ "benchmarks.schema_alignment",
767
+ "spec/v1/kernel.schema.json",
768
+ "cli/orp.py",
769
+ ],
770
+ },
355
771
  {
356
772
  "id": "starter_kernel_bootstrap",
357
773
  "claim": "orp init seeds a valid starter kernel artifact and a passing default structure_kernel gate.",
@@ -407,6 +823,50 @@ def build_report(iterations: int) -> dict[str, Any]:
407
823
  "benchmarks.artifact_roundtrip",
408
824
  ],
409
825
  },
826
+ {
827
+ "id": "cross_domain_corpus_fit",
828
+ "claim": "The current v0.1 kernel class set fits a small cross-domain reference corpus cleanly.",
829
+ "status": "pass"
830
+ if all(corpus_benchmark["meets_targets"].values())
831
+ and corpus_benchmark["artifact_classes_total"] >= 7
832
+ else "fail",
833
+ "evidence": [
834
+ "benchmarks.cross_domain_corpus",
835
+ "examples/kernel/corpus",
836
+ ],
837
+ },
838
+ {
839
+ "id": "class_specific_requirement_enforcement",
840
+ "claim": "Each kernel artifact class rejects a candidate artifact when a required field is removed.",
841
+ "status": "pass"
842
+ if all(requirement_benchmark["meets_targets"].values())
843
+ else "fail",
844
+ "evidence": [
845
+ "benchmarks.requirement_enforcement",
846
+ "spec/v1/kernel.schema.json",
847
+ ],
848
+ },
849
+ {
850
+ "id": "representation_invariance",
851
+ "claim": "Equivalent YAML and JSON kernel artifacts validate to the same semantic result.",
852
+ "status": "pass"
853
+ if all(representation_invariance["meets_expectations"].values())
854
+ else "fail",
855
+ "evidence": [
856
+ "benchmarks.representation_invariance",
857
+ ],
858
+ },
859
+ {
860
+ "id": "adversarial_mutation_detection",
861
+ "claim": "The validator rejects adversarial near-miss artifacts such as unknown fields, wrong types, whitespace-only text, and bad schema metadata.",
862
+ "status": "pass"
863
+ if all(mutation_stress["meets_targets"].values())
864
+ else "fail",
865
+ "evidence": [
866
+ "benchmarks.mutation_stress",
867
+ "spec/v1/kernel.schema.json",
868
+ ],
869
+ },
410
870
  ]
411
871
 
412
872
  return {
@@ -417,13 +877,22 @@ def build_report(iterations: int) -> dict[str, Any]:
417
877
  "init_starter_kernel": init_benchmark,
418
878
  "artifact_roundtrip": roundtrip_benchmark,
419
879
  "gate_modes": gate_mode_benchmark,
880
+ "schema_alignment": schema_alignment,
881
+ "cross_domain_corpus": corpus_benchmark,
882
+ "requirement_enforcement": requirement_benchmark,
883
+ "representation_invariance": representation_invariance,
884
+ "mutation_stress": mutation_stress,
420
885
  },
421
886
  "claims": claims,
422
887
  "summary": {
423
888
  "all_claims_pass": all(row["status"] == "pass" for row in claims),
424
889
  "artifact_classes_total": roundtrip_benchmark["artifact_classes_total"],
890
+ "cross_domain_corpus_domains_total": corpus_benchmark["domains_total"],
425
891
  "all_performance_targets_met": all(init_benchmark["meets_targets"].values())
426
- and all(roundtrip_benchmark["meets_targets"].values()),
892
+ and all(roundtrip_benchmark["meets_targets"].values())
893
+ and corpus_benchmark["meets_targets"]["validate"]
894
+ and requirement_benchmark["meets_targets"]["validate"]
895
+ and mutation_stress["meets_targets"]["validate"],
427
896
  },
428
897
  }
429
898