open-research-protocol 0.4.16 → 0.4.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/orp.py +67 -27
- package/docs/ORP_LINK_RUNNER_PLAN.md +6 -6
- package/docs/ORP_PUBLIC_LAUNCH_CHECKLIST.md +2 -2
- package/docs/ORP_REASONING_KERNEL_AGENT_PILOT.md +3 -3
- package/docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md +2 -2
- package/docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md +2 -2
- package/docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md +4 -4
- package/docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md +3 -3
- package/docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md +8 -8
- package/docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md +25 -25
- package/docs/ORP_REASONING_KERNEL_EVOLUTION.md +4 -4
- package/docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md +4 -4
- package/docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md +19 -19
- package/docs/ORP_REASONING_KERNEL_V0_1.md +8 -8
- package/package.json +1 -1
- package/packages/orp-workspace-launcher/README.md +5 -5
- package/packages/orp-workspace-launcher/src/ledger.js +179 -47
- package/packages/orp-workspace-launcher/src/orp-command.js +16 -16
- package/packages/orp-workspace-launcher/test/ledger.test.js +119 -0
- package/packages/orp-workspace-launcher/test/orp-command.test.js +6 -0
- package/scripts/render-terminal-demo.py +3 -3
package/cli/orp.py
CHANGED
|
@@ -8940,7 +8940,7 @@ def _about_payload() -> dict[str, Any]:
|
|
|
8940
8940
|
"Frontier control is a built-in ORP ability exposed through `orp frontier ...`, separating the exact live point, the exact active milestone, the near structured checklist, and the farther major-version stack.",
|
|
8941
8941
|
"Agent modes are lightweight optional overlays for taste, perspective shifts, and fresh movement; `orp mode nudge sleek-minimal-progressive --json` gives agents a deterministic reminder they can call on when they want a deeper, wider, top-down, or rotated lens without changing ORP's core artifact boundaries.",
|
|
8942
8942
|
"Project/session linking is a built-in ORP ability exposed through `orp link ...` and stored machine-locally under `.git/orp/link/`.",
|
|
8943
|
-
"
|
|
8943
|
+
"Secrets are easiest to understand as saved keys and tokens: humans usually run `orp secrets add ...` and paste the value at the prompt, agents usually pipe the value with `--value-stdin`, and local macOS Keychain caching plus hosted sync are optional layers on top.",
|
|
8944
8944
|
"Machine runner identity, heartbeat, hosted sync, prompt-job execution, and lease control are built into ORP through `orp runner status`, `orp runner enable`, `orp runner disable`, `orp runner heartbeat`, `orp runner sync`, `orp runner work`, `orp runner cancel`, and `orp runner retry`.",
|
|
8945
8945
|
"Repo governance is built into ORP through `orp init`, `orp status`, `orp branch start`, `orp checkpoint create`, `orp backup`, `orp ready`, `orp doctor`, and `orp cleanup`.",
|
|
8946
8946
|
"Hosted workspace operations are built directly into ORP under `orp workspaces ...`, plus the linked auth/ideas/feature/world/checkpoint/agent surfaces.",
|
|
@@ -9046,11 +9046,11 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
|
|
|
9046
9046
|
"command": "orp workspace list",
|
|
9047
9047
|
},
|
|
9048
9048
|
{
|
|
9049
|
-
"label": "
|
|
9049
|
+
"label": "Inspect saved paths and exact recovery commands for the main workspace",
|
|
9050
9050
|
"command": "orp workspace tabs main",
|
|
9051
9051
|
},
|
|
9052
9052
|
{
|
|
9053
|
-
"label": "
|
|
9053
|
+
"label": "Save a new API key or token interactively when you need one",
|
|
9054
9054
|
"command": 'orp secrets add --alias <alias> --label "<label>" --provider <provider>',
|
|
9055
9055
|
},
|
|
9056
9056
|
{
|
|
@@ -9076,10 +9076,6 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
|
|
|
9076
9076
|
"label": "Inspect the saved workspace ledger inventory",
|
|
9077
9077
|
"command": "orp workspace list",
|
|
9078
9078
|
},
|
|
9079
|
-
{
|
|
9080
|
-
"label": "Print exact copyable crash-recovery commands for the main workspace",
|
|
9081
|
-
"command": "orp workspace tabs main",
|
|
9082
|
-
},
|
|
9083
9079
|
{
|
|
9084
9080
|
"label": "Inspect the saved tabs in the main workspace ledger",
|
|
9085
9081
|
"command": "orp workspace tabs main",
|
|
@@ -9113,11 +9109,11 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
|
|
|
9113
9109
|
"command": "orp workspaces list --json",
|
|
9114
9110
|
},
|
|
9115
9111
|
{
|
|
9116
|
-
"label": "Inspect
|
|
9112
|
+
"label": "Inspect saved keys and tokens already known to ORP",
|
|
9117
9113
|
"command": "orp secrets list --json",
|
|
9118
9114
|
},
|
|
9119
9115
|
{
|
|
9120
|
-
"label": "Reuse a saved
|
|
9116
|
+
"label": "Reuse a saved key or prompt for it and save it for this project",
|
|
9121
9117
|
"command": "orp secrets ensure --alias <alias> --provider <provider> --current-project --json",
|
|
9122
9118
|
},
|
|
9123
9119
|
{
|
|
@@ -9408,11 +9404,11 @@ def _home_payload(repo_root: Path, config_arg: str) -> dict[str, Any]:
|
|
|
9408
9404
|
},
|
|
9409
9405
|
{
|
|
9410
9406
|
"id": "secrets",
|
|
9411
|
-
"description": "Saved API keys and tokens, with
|
|
9407
|
+
"description": "Saved API keys and tokens, with an interactive human flow, a stdin agent flow, optional local macOS Keychain caching, and optional hosted sync.",
|
|
9412
9408
|
"entrypoints": [
|
|
9413
9409
|
"orp secrets list --json",
|
|
9414
9410
|
"orp secrets show <alias-or-id> --json",
|
|
9415
|
-
|
|
9411
|
+
'orp secrets add --alias <alias> --label "<label>" --provider <provider>',
|
|
9416
9412
|
"orp secrets ensure --alias <alias> --provider <provider> --current-project --json",
|
|
9417
9413
|
"orp secrets keychain-list --json",
|
|
9418
9414
|
"orp secrets keychain-show <alias-or-id> --json",
|
|
@@ -9614,14 +9610,34 @@ def _render_home_screen(payload: dict[str, Any]) -> str:
|
|
|
9614
9610
|
lines.append("")
|
|
9615
9611
|
lines.append("Command Families")
|
|
9616
9612
|
if isinstance(abilities, list) and abilities:
|
|
9617
|
-
|
|
9613
|
+
ability_map = {
|
|
9614
|
+
str(row.get("id", "")).strip(): row
|
|
9615
|
+
for row in abilities
|
|
9616
|
+
if isinstance(row, dict) and str(row.get("id", "")).strip()
|
|
9617
|
+
}
|
|
9618
|
+
visible_ability_ids = [
|
|
9619
|
+
"workspace",
|
|
9620
|
+
"secrets",
|
|
9621
|
+
"governance",
|
|
9622
|
+
"frontier",
|
|
9623
|
+
"schedule",
|
|
9624
|
+
"modes",
|
|
9625
|
+
"hosted",
|
|
9626
|
+
"discover",
|
|
9627
|
+
]
|
|
9628
|
+
shown = 0
|
|
9629
|
+
for ability_id in visible_ability_ids:
|
|
9630
|
+
row = ability_map.get(ability_id)
|
|
9618
9631
|
if not isinstance(row, dict):
|
|
9619
9632
|
continue
|
|
9620
|
-
ability_id = str(row.get("id", "")).strip()
|
|
9621
9633
|
desc = _truncate(str(row.get("description", "")).strip())
|
|
9622
9634
|
lines.append(f" - {ability_id}")
|
|
9623
9635
|
if desc:
|
|
9624
9636
|
lines.append(f" {desc}")
|
|
9637
|
+
shown += 1
|
|
9638
|
+
remaining = max(len(ability_map) - shown, 0)
|
|
9639
|
+
if remaining:
|
|
9640
|
+
lines.append(f" - ... and {remaining} more in `orp about --json`")
|
|
9625
9641
|
|
|
9626
9642
|
lines.append("")
|
|
9627
9643
|
lines.append("Collaboration")
|
|
@@ -9652,7 +9668,7 @@ def _render_home_screen(payload: dict[str, Any]) -> str:
|
|
|
9652
9668
|
lines.append("")
|
|
9653
9669
|
lines.append("Quick Actions")
|
|
9654
9670
|
if isinstance(quick_actions, list):
|
|
9655
|
-
for row in quick_actions[:
|
|
9671
|
+
for row in quick_actions[:10]:
|
|
9656
9672
|
if not isinstance(row, dict):
|
|
9657
9673
|
continue
|
|
9658
9674
|
label = str(row.get("label", "")).strip()
|
|
@@ -9661,7 +9677,7 @@ def _render_home_screen(payload: dict[str, Any]) -> str:
|
|
|
9661
9677
|
continue
|
|
9662
9678
|
lines.append(f" - {label}")
|
|
9663
9679
|
lines.append(f" {command}")
|
|
9664
|
-
remaining = max(len(quick_actions) -
|
|
9680
|
+
remaining = max(len(quick_actions) - 10, 0)
|
|
9665
9681
|
if remaining:
|
|
9666
9682
|
lines.append(f" - ... and {remaining} more in `orp home --json`")
|
|
9667
9683
|
|
|
@@ -18544,10 +18560,31 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18544
18560
|
add_json_flag(s_youtube_inspect)
|
|
18545
18561
|
s_youtube_inspect.set_defaults(func=cmd_youtube_inspect, json_output=False)
|
|
18546
18562
|
|
|
18547
|
-
s_secrets = sub.add_parser(
|
|
18563
|
+
s_secrets = sub.add_parser(
|
|
18564
|
+
"secrets",
|
|
18565
|
+
help="Save and reuse API keys and tokens locally, with optional hosted sync",
|
|
18566
|
+
description=(
|
|
18567
|
+
"ORP secrets are easiest to understand as saved keys and tokens.\n\n"
|
|
18568
|
+
"Human flow:\n"
|
|
18569
|
+
" 1. Run `orp secrets add ...`\n"
|
|
18570
|
+
" 2. Paste the value when ORP prompts `Secret value:`\n"
|
|
18571
|
+
" 3. Later run `orp secrets list` or `orp secrets resolve ...`\n\n"
|
|
18572
|
+
"Agent flow:\n"
|
|
18573
|
+
" - Pipe the value with `--value-stdin` instead of typing it interactively.\n\n"
|
|
18574
|
+
"Local macOS Keychain caching and hosted sync are optional layers on top."
|
|
18575
|
+
),
|
|
18576
|
+
epilog=(
|
|
18577
|
+
"Examples:\n"
|
|
18578
|
+
" orp secrets add --alias openai-primary --label \"OpenAI Primary\" --provider openai\n"
|
|
18579
|
+
" printf '%s' 'sk-...' | orp secrets add --alias openai-primary --label \"OpenAI Primary\" --provider openai --value-stdin\n"
|
|
18580
|
+
" orp secrets list\n"
|
|
18581
|
+
" orp secrets resolve openai-primary --reveal"
|
|
18582
|
+
),
|
|
18583
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
18584
|
+
)
|
|
18548
18585
|
secrets_sub = s_secrets.add_subparsers(dest="secrets_cmd", required=True)
|
|
18549
18586
|
|
|
18550
|
-
s_secrets_list = secrets_sub.add_parser("list", help="List
|
|
18587
|
+
s_secrets_list = secrets_sub.add_parser("list", help="List saved secrets known to ORP")
|
|
18551
18588
|
s_secrets_list.add_argument("--provider", default="", help="Optional provider filter")
|
|
18552
18589
|
add_secret_scope_flags(s_secrets_list)
|
|
18553
18590
|
s_secrets_list.add_argument(
|
|
@@ -18559,13 +18596,16 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18559
18596
|
add_json_flag(s_secrets_list)
|
|
18560
18597
|
s_secrets_list.set_defaults(func=cmd_secrets_list, json_output=False)
|
|
18561
18598
|
|
|
18562
|
-
s_secrets_show = secrets_sub.add_parser("show", help="Show one
|
|
18599
|
+
s_secrets_show = secrets_sub.add_parser("show", help="Show one saved secret by alias or id")
|
|
18563
18600
|
s_secrets_show.add_argument("secret_ref", help="Secret alias or id")
|
|
18564
18601
|
add_base_url_flag(s_secrets_show)
|
|
18565
18602
|
add_json_flag(s_secrets_show)
|
|
18566
18603
|
s_secrets_show.set_defaults(func=cmd_secrets_show, json_output=False)
|
|
18567
18604
|
|
|
18568
|
-
s_secrets_add = secrets_sub.add_parser(
|
|
18605
|
+
s_secrets_add = secrets_sub.add_parser(
|
|
18606
|
+
"add",
|
|
18607
|
+
help="Save a new secret; ORP prompts for the value unless you pass --value-stdin",
|
|
18608
|
+
)
|
|
18569
18609
|
s_secrets_add.add_argument("--alias", required=True, help="Stable secret alias")
|
|
18570
18610
|
s_secrets_add.add_argument("--label", required=True, help="Human label for the secret")
|
|
18571
18611
|
s_secrets_add.add_argument("--provider", required=True, help="Provider slug, for example openai")
|
|
@@ -18596,7 +18636,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18596
18636
|
|
|
18597
18637
|
s_secrets_ensure = secrets_sub.add_parser(
|
|
18598
18638
|
"ensure",
|
|
18599
|
-
help="
|
|
18639
|
+
help="Reuse a saved secret or prompt for it and save it when missing",
|
|
18600
18640
|
)
|
|
18601
18641
|
s_secrets_ensure.add_argument("--alias", required=True, help="Stable secret alias")
|
|
18602
18642
|
s_secrets_ensure.add_argument("--label", default="", help="Human label for create-if-missing flows")
|
|
@@ -18637,7 +18677,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18637
18677
|
|
|
18638
18678
|
s_secrets_keychain_list = secrets_sub.add_parser(
|
|
18639
18679
|
"keychain-list",
|
|
18640
|
-
help="List
|
|
18680
|
+
help="List local macOS Keychain copies known to ORP on this machine",
|
|
18641
18681
|
)
|
|
18642
18682
|
s_secrets_keychain_list.add_argument("--provider", default="", help="Optional provider filter")
|
|
18643
18683
|
add_secret_scope_flags(s_secrets_keychain_list)
|
|
@@ -18646,7 +18686,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18646
18686
|
|
|
18647
18687
|
s_secrets_keychain_show = secrets_sub.add_parser(
|
|
18648
18688
|
"keychain-show",
|
|
18649
|
-
help="Show one
|
|
18689
|
+
help="Show one local macOS Keychain copy by alias or id",
|
|
18650
18690
|
)
|
|
18651
18691
|
s_secrets_keychain_show.add_argument("secret_ref", help="Secret alias or id")
|
|
18652
18692
|
s_secrets_keychain_show.add_argument(
|
|
@@ -18659,7 +18699,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18659
18699
|
|
|
18660
18700
|
s_secrets_sync_keychain = secrets_sub.add_parser(
|
|
18661
18701
|
"sync-keychain",
|
|
18662
|
-
help="
|
|
18702
|
+
help="Copy one saved secret into the local macOS Keychain",
|
|
18663
18703
|
)
|
|
18664
18704
|
s_secrets_sync_keychain.add_argument("secret_ref", nargs="?", default="", help="Optional secret alias or id")
|
|
18665
18705
|
s_secrets_sync_keychain.add_argument("--provider", default="", help="Provider slug for project-scoped sync")
|
|
@@ -18673,7 +18713,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18673
18713
|
add_json_flag(s_secrets_sync_keychain)
|
|
18674
18714
|
s_secrets_sync_keychain.set_defaults(func=cmd_secrets_sync_keychain, json_output=False)
|
|
18675
18715
|
|
|
18676
|
-
s_secrets_update = secrets_sub.add_parser("update", help="Update one
|
|
18716
|
+
s_secrets_update = secrets_sub.add_parser("update", help="Update one saved secret")
|
|
18677
18717
|
s_secrets_update.add_argument("secret_ref", help="Secret alias or id")
|
|
18678
18718
|
s_secrets_update.add_argument("--alias", default=None, help="New alias")
|
|
18679
18719
|
s_secrets_update.add_argument("--label", default=None, help="New label")
|
|
@@ -18702,13 +18742,13 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18702
18742
|
add_json_flag(s_secrets_update)
|
|
18703
18743
|
s_secrets_update.set_defaults(func=cmd_secrets_update, json_output=False)
|
|
18704
18744
|
|
|
18705
|
-
s_secrets_archive = secrets_sub.add_parser("archive", help="Archive one
|
|
18745
|
+
s_secrets_archive = secrets_sub.add_parser("archive", help="Archive one saved secret")
|
|
18706
18746
|
s_secrets_archive.add_argument("secret_ref", help="Secret alias or id")
|
|
18707
18747
|
add_base_url_flag(s_secrets_archive)
|
|
18708
18748
|
add_json_flag(s_secrets_archive)
|
|
18709
18749
|
s_secrets_archive.set_defaults(func=cmd_secrets_archive, json_output=False)
|
|
18710
18750
|
|
|
18711
|
-
s_secrets_bind = secrets_sub.add_parser("bind", help="Bind one secret to a hosted project/world")
|
|
18751
|
+
s_secrets_bind = secrets_sub.add_parser("bind", help="Bind one saved secret to a hosted project/world")
|
|
18712
18752
|
s_secrets_bind.add_argument("secret_ref", help="Secret alias or id")
|
|
18713
18753
|
add_secret_scope_flags(s_secrets_bind)
|
|
18714
18754
|
s_secrets_bind.add_argument("--purpose", default="", help="Optional project usage note")
|
|
@@ -18729,7 +18769,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
18729
18769
|
|
|
18730
18770
|
s_secrets_resolve = secrets_sub.add_parser(
|
|
18731
18771
|
"resolve",
|
|
18732
|
-
help="Resolve one
|
|
18772
|
+
help="Resolve one saved secret by alias/id or by provider plus project scope",
|
|
18733
18773
|
)
|
|
18734
18774
|
s_secrets_resolve.add_argument("secret_ref", nargs="?", default="", help="Optional secret alias or id")
|
|
18735
18775
|
s_secrets_resolve.add_argument("--provider", default="", help="Provider slug for project-scoped resolution")
|
|
@@ -243,10 +243,10 @@ Behavior:
|
|
|
243
243
|
|
|
244
244
|
Formal schemas live in:
|
|
245
245
|
|
|
246
|
-
- [link-project.schema.json](
|
|
247
|
-
- [link-session.schema.json](
|
|
248
|
-
- [runner-machine.schema.json](
|
|
249
|
-
- [runner-runtime.schema.json](
|
|
246
|
+
- [link-project.schema.json](../spec/v1/link-project.schema.json)
|
|
247
|
+
- [link-session.schema.json](../spec/v1/link-session.schema.json)
|
|
248
|
+
- [runner-machine.schema.json](../spec/v1/runner-machine.schema.json)
|
|
249
|
+
- [runner-runtime.schema.json](../spec/v1/runner-runtime.schema.json)
|
|
250
250
|
|
|
251
251
|
Planned file locations and schema usage:
|
|
252
252
|
|
|
@@ -454,14 +454,14 @@ The CLI and Rust app now share one client-side project/session/runner contract,
|
|
|
454
454
|
- [x] Add route/helper logging for failed poll/start/complete flows, lease mismatches, missing-routeable-session failures, and repeated retry patterns.
|
|
455
455
|
- [x] Surface runner health in the Rust desktop app so operators can see online/syncing/working/error states locally.
|
|
456
456
|
- [x] Add an internal rollout and recovery runbook:
|
|
457
|
-
- [RUNNER_INTERNAL_OPERATIONS.md](
|
|
457
|
+
- [RUNNER_INTERNAL_OPERATIONS.md](./RUNNER_INTERNAL_OPERATIONS.md)
|
|
458
458
|
- [x] Deploy the hosted runner backend changes to the real internal environment.
|
|
459
459
|
- [x] Run a live internal smoke on deployed infrastructure.
|
|
460
460
|
- Completed on March 16, 2026 against `https://orp.earth`.
|
|
461
461
|
- Verified `orp link project bind`, `orp link session register`, `orp runner enable`, `orp runner sync`, `orp checkpoint queue`, `orp runner work --once`, and `orp agent work --once`.
|
|
462
462
|
- Confirmed the production `orp` checkpoint job `78cd459a-fc0b-451b-af06-be2d27379169` completed successfully and produced checkpoint response `41087b8b-9556-4ec1-90c6-eefb69bac585`.
|
|
463
463
|
- [x] Add and verify a reusable Rust-side smoke harness for the desktop wrapper path.
|
|
464
|
-
- Implemented at
|
|
464
|
+
- Implemented in the companion Rust workspace at `orp-rust/src/bin/runner_smoke.rs`.
|
|
465
465
|
- Verified on March 16, 2026 against `https://orp.earth`.
|
|
466
466
|
- Confirmed Rust-side smoke job `853a55f9-b0e5-42f7-8f2f-cdc8db1a354c` completed successfully and produced checkpoint response `6b5aee77-f176-4249-a127-978b987da946`.
|
|
467
467
|
|
|
@@ -51,7 +51,7 @@ Use this checklist when releasing ORP as the unified public CLI and product surf
|
|
|
51
51
|
- `orp agent work --once --json` remains available as the compatibility path
|
|
52
52
|
- Confirm the checkpoint response lands back in the hosted workspace.
|
|
53
53
|
- Confirm the hosted operator console reflects the same lifecycle at `/dashboard/admin/runners`.
|
|
54
|
-
- Use [RUNNER_INTERNAL_OPERATIONS.md](
|
|
54
|
+
- Use [RUNNER_INTERNAL_OPERATIONS.md](./RUNNER_INTERNAL_OPERATIONS.md) for the internal rollout and recovery flow.
|
|
55
55
|
|
|
56
56
|
## 4. Package release
|
|
57
57
|
|
|
@@ -89,4 +89,4 @@ Use this checklist when releasing ORP as the unified public CLI and product surf
|
|
|
89
89
|
- Keep the web app and CLI rollout loosely coupled.
|
|
90
90
|
- Launch the ORP CLI first if the web app/domain transition is still in progress.
|
|
91
91
|
- Do not change domain, auth, runner, and package names all in one step unless all staging checks are green.
|
|
92
|
-
- Follow [ORP_WEB_DOMAIN_TRANSITION_PLAN.md](
|
|
92
|
+
- Follow [ORP_WEB_DOMAIN_TRANSITION_PLAN.md](./ORP_WEB_DOMAIN_TRANSITION_PLAN.md) for the hosted cutover sequence.
|
|
@@ -5,12 +5,12 @@ the ORP Reasoning Kernel.
|
|
|
5
5
|
|
|
6
6
|
Supporting artifact:
|
|
7
7
|
|
|
8
|
-
- [docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json](
|
|
8
|
+
- [docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json](./benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json)
|
|
9
9
|
|
|
10
10
|
Supporting corpus and harness:
|
|
11
11
|
|
|
12
|
-
- [examples/kernel/comparison/comparison-corpus.json](
|
|
13
|
-
- [scripts/orp-kernel-agent-pilot.py](
|
|
12
|
+
- [examples/kernel/comparison/comparison-corpus.json](../examples/kernel/comparison/comparison-corpus.json)
|
|
13
|
+
- [scripts/orp-kernel-agent-pilot.py](../scripts/orp-kernel-agent-pilot.py)
|
|
14
14
|
|
|
15
15
|
## What This Pilot Measures
|
|
16
16
|
|
|
@@ -5,11 +5,11 @@ pilot for the live ORP kernel agent evaluation.
|
|
|
5
5
|
|
|
6
6
|
Supporting artifact:
|
|
7
7
|
|
|
8
|
-
- [docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json](
|
|
8
|
+
- [docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json](./benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json)
|
|
9
9
|
|
|
10
10
|
Supporting harness:
|
|
11
11
|
|
|
12
|
-
- [scripts/orp-kernel-agent-replication.py](
|
|
12
|
+
- [scripts/orp-kernel-agent-replication.py](../scripts/orp-kernel-agent-replication.py)
|
|
13
13
|
|
|
14
14
|
The harness now supports:
|
|
15
15
|
|
|
@@ -5,11 +5,11 @@ for the ORP Reasoning Kernel.
|
|
|
5
5
|
|
|
6
6
|
Supporting artifact:
|
|
7
7
|
|
|
8
|
-
- [docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json](
|
|
8
|
+
- [docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json](./benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json)
|
|
9
9
|
|
|
10
10
|
Supporting harness:
|
|
11
11
|
|
|
12
|
-
- [scripts/orp-kernel-canonical-continuation.py](
|
|
12
|
+
- [scripts/orp-kernel-canonical-continuation.py](../scripts/orp-kernel-canonical-continuation.py)
|
|
13
13
|
|
|
14
14
|
## What This Pilot Measures
|
|
15
15
|
|
|
@@ -9,12 +9,12 @@ artifact styles:
|
|
|
9
9
|
|
|
10
10
|
Supporting artifact:
|
|
11
11
|
|
|
12
|
-
- [docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json](
|
|
12
|
+
- [docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json](./benchmarks/orp_reasoning_kernel_comparison_v0_1.json)
|
|
13
13
|
|
|
14
14
|
Supporting corpus and harness:
|
|
15
15
|
|
|
16
|
-
- [examples/kernel/comparison/comparison-corpus.json](
|
|
17
|
-
- [scripts/orp-kernel-comparison.py](
|
|
16
|
+
- [examples/kernel/comparison/comparison-corpus.json](../examples/kernel/comparison/comparison-corpus.json)
|
|
17
|
+
- [scripts/orp-kernel-comparison.py](../scripts/orp-kernel-comparison.py)
|
|
18
18
|
|
|
19
19
|
## What This Pilot Measures
|
|
20
20
|
|
|
@@ -91,7 +91,7 @@ This pilot does **not** prove that the kernel:
|
|
|
91
91
|
- is universally superior across all teams or domains
|
|
92
92
|
|
|
93
93
|
Those still require the larger studies in
|
|
94
|
-
[docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md](
|
|
94
|
+
[docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md](./ORP_REASONING_KERNEL_EVALUATION_PLAN.md).
|
|
95
95
|
|
|
96
96
|
## Why The Scoring Is Structured This Way
|
|
97
97
|
|
|
@@ -5,15 +5,15 @@ Reasoning Kernel.
|
|
|
5
5
|
|
|
6
6
|
Supporting artifact:
|
|
7
7
|
|
|
8
|
-
- [docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json](
|
|
8
|
+
- [docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json](./benchmarks/orp_reasoning_kernel_continuation_v0_1.json)
|
|
9
9
|
|
|
10
10
|
Supporting harness:
|
|
11
11
|
|
|
12
|
-
- [scripts/orp-kernel-continuation-pilot.py](
|
|
12
|
+
- [scripts/orp-kernel-continuation-pilot.py](../scripts/orp-kernel-continuation-pilot.py)
|
|
13
13
|
|
|
14
14
|
Related harder benchmark:
|
|
15
15
|
|
|
16
|
-
- [docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md](
|
|
16
|
+
- [docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md](./ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md)
|
|
17
17
|
|
|
18
18
|
## What This Pilot Measures
|
|
19
19
|
|
|
@@ -13,14 +13,14 @@ to:
|
|
|
13
13
|
|
|
14
14
|
Supporting references:
|
|
15
15
|
|
|
16
|
-
- [docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md](
|
|
17
|
-
- [docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md](
|
|
18
|
-
- [docs/ORP_REASONING_KERNEL_AGENT_PILOT.md](
|
|
19
|
-
- [docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md](
|
|
20
|
-
- [docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md](
|
|
21
|
-
- [docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md](
|
|
22
|
-
- [docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md](
|
|
23
|
-
- [docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md](
|
|
16
|
+
- [docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md](./ORP_REASONING_KERNEL_COMPARISON_PILOT.md)
|
|
17
|
+
- [docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md](./ORP_REASONING_KERNEL_PICKUP_PILOT.md)
|
|
18
|
+
- [docs/ORP_REASONING_KERNEL_AGENT_PILOT.md](./ORP_REASONING_KERNEL_AGENT_PILOT.md)
|
|
19
|
+
- [docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md](./ORP_REASONING_KERNEL_AGENT_REPLICATION.md)
|
|
20
|
+
- [docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md](./ORP_REASONING_KERNEL_CONTINUATION_PILOT.md)
|
|
21
|
+
- [docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md](./ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md)
|
|
22
|
+
- [docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md](./ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md)
|
|
23
|
+
- [docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md](./ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md)
|
|
24
24
|
|
|
25
25
|
## Evaluation Principles
|
|
26
26
|
|
|
@@ -13,13 +13,13 @@ stronger when we can say, precisely:
|
|
|
13
13
|
|
|
14
14
|
Supporting references:
|
|
15
15
|
|
|
16
|
-
- [docs/ORP_REASONING_KERNEL_V0_1.md](
|
|
17
|
-
- [docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md](
|
|
18
|
-
- [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](
|
|
19
|
-
- [docs/ORP_REASONING_KERNEL_AGENT_PILOT.md](
|
|
20
|
-
- [docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md](
|
|
21
|
-
- [docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md](
|
|
22
|
-
- [docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md](
|
|
16
|
+
- [docs/ORP_REASONING_KERNEL_V0_1.md](./ORP_REASONING_KERNEL_V0_1.md)
|
|
17
|
+
- [docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md](./ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md)
|
|
18
|
+
- [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json)
|
|
19
|
+
- [docs/ORP_REASONING_KERNEL_AGENT_PILOT.md](./ORP_REASONING_KERNEL_AGENT_PILOT.md)
|
|
20
|
+
- [docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md](./ORP_REASONING_KERNEL_AGENT_REPLICATION.md)
|
|
21
|
+
- [docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md](./ORP_REASONING_KERNEL_CONTINUATION_PILOT.md)
|
|
22
|
+
- [docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md](./ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md)
|
|
23
23
|
|
|
24
24
|
## Evidence Grades
|
|
25
25
|
|
|
@@ -41,24 +41,24 @@ the current kernel release.
|
|
|
41
41
|
|
|
42
42
|
| Claim | Grade | Current Evidence | Why It Matters |
|
|
43
43
|
| --- | --- | --- | --- |
|
|
44
|
-
| ORP has a real typed kernel artifact surface. | A | [spec/v1/kernel.schema.json](
|
|
45
|
-
| `orp init` seeds a valid starter kernel artifact and validates it in the default flow. | A | [tests/test_orp_init.py](
|
|
46
|
-
| All seven v0.1 artifact classes can scaffold and validate successfully. | A | [tests/test_orp_kernel.py](
|
|
47
|
-
| Hard mode blocks invalid promotable artifacts. | A | [tests/test_orp_kernel.py](
|
|
48
|
-
| Soft mode records invalidity without blocking work. | A | [tests/test_orp_kernel.py](
|
|
49
|
-
| Existing `structure_kernel` gates remain compatible when no explicit kernel config is present. | A | [tests/test_orp_kernel.py](
|
|
50
|
-
| One-shot local kernel CLI operations are within human-scale latency on the reference machine. | A | [scripts/orp-kernel-benchmark.py](
|
|
51
|
-
| A small cross-domain reference corpus fits the current class set cleanly. | A | [examples/kernel/corpus](
|
|
52
|
-
| Each artifact class rejects a candidate when a required field is removed. | A | [tests/test_orp_kernel_corpus.py](
|
|
53
|
-
| The CLI validator stays aligned with the published kernel schema. | A | [tests/test_orp_kernel_corpus.py](
|
|
54
|
-
| Equivalent YAML and JSON artifacts validate to the same semantic result. | A | [tests/test_orp_kernel_corpus.py](
|
|
55
|
-
| The validator rejects adversarial near-miss artifacts. | A | [tests/test_orp_kernel_corpus.py](
|
|
56
|
-
| On a matched internal comparison corpus, kernel artifacts outperform both free-form and generic checklist artifacts on structural scoring. | A | [docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md](
|
|
57
|
-
| On a matched internal pickup proxy, kernel artifacts preserve more explicit handoff-critical information than both free-form and generic checklist artifacts. | A | [docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md](
|
|
58
|
-
| On a matched live Codex recoverability simulation, kernel artifacts preserve full required-field recoverability, outperform free-form artifacts on all matched cases, and outperform generic checklist artifacts on average without per-case losses. | A | [docs/ORP_REASONING_KERNEL_AGENT_PILOT.md](
|
|
59
|
-
| On a `10`-repeat full-corpus live Codex replication pilot, the kernel’s recoverability advantage stays stable across fresh-session reruns, with zero invention, no run-level losses, and perfect per-field stability on required kernel fields. | A | [docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md](
|
|
60
|
-
| On a matched full-corpus live continuation pilot, kernel artifacts support the strongest continuation score, never underperform the generic checklist baseline, and keep invention at zero. | A | [docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md](
|
|
61
|
-
| On a harder matched full-corpus canonical continuation pilot, kernel artifacts beat free-form on every case, beat checklist on average, and keep the lowest invention rate while revealing checklist as a real competitive baseline on some cases. | A | [docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md](
|
|
44
|
+
| ORP has a real typed kernel artifact surface. | A | [spec/v1/kernel.schema.json](../spec/v1/kernel.schema.json), [cli/orp.py](../cli/orp.py) | The kernel is not just prose. It is an enforceable CLI surface. |
|
|
45
|
+
| `orp init` seeds a valid starter kernel artifact and validates it in the default flow. | A | [tests/test_orp_init.py](../tests/test_orp_init.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | New repos get the kernel by default instead of needing manual adoption. |
|
|
46
|
+
| All seven v0.1 artifact classes can scaffold and validate successfully. | A | [tests/test_orp_kernel.py](../tests/test_orp_kernel.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | The kernel is broad enough for multiple project artifact types. |
|
|
47
|
+
| Hard mode blocks invalid promotable artifacts. | A | [tests/test_orp_kernel.py](../tests/test_orp_kernel.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | ORP can enforce structural promotion standards rather than only advising. |
|
|
48
|
+
| Soft mode records invalidity without blocking work. | A | [tests/test_orp_kernel.py](../tests/test_orp_kernel.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | ORP can stay fluid at intake while still surfacing missing structure. |
|
|
49
|
+
| Existing `structure_kernel` gates remain compatible when no explicit kernel config is present. | A | [tests/test_orp_kernel.py](../tests/test_orp_kernel.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | The kernel does not silently break earlier ORP configurations. |
|
|
50
|
+
| One-shot local kernel CLI operations are within human-scale latency on the reference machine. | A | [scripts/orp-kernel-benchmark.py](../scripts/orp-kernel-benchmark.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | The kernel is operationally lightweight enough to use during normal work. |
|
|
51
|
+
| A small cross-domain reference corpus fits the current class set cleanly. | A | [examples/kernel/corpus](../examples/kernel/corpus), [tests/test_orp_kernel_corpus.py](../tests/test_orp_kernel_corpus.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | The kernel now has explicit cross-domain fit evidence, not only rationale. |
|
|
52
|
+
| Each artifact class rejects a candidate when a required field is removed. | A | [tests/test_orp_kernel_corpus.py](../tests/test_orp_kernel_corpus.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | Class-specific enforcement is directly proven instead of inferred from a subset of cases. |
|
|
53
|
+
| The CLI validator stays aligned with the published kernel schema. | A | [tests/test_orp_kernel_corpus.py](../tests/test_orp_kernel_corpus.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | The kernel no longer relies on an undocumented validator rule set drifting away from the schema. |
|
|
54
|
+
| Equivalent YAML and JSON artifacts validate to the same semantic result. | A | [tests/test_orp_kernel_corpus.py](../tests/test_orp_kernel_corpus.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | The protocol is representation-stable rather than format-sensitive. |
|
|
55
|
+
| The validator rejects adversarial near-miss artifacts. | A | [tests/test_orp_kernel_corpus.py](../tests/test_orp_kernel_corpus.py), [docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json](./benchmarks/orp_reasoning_kernel_v0_1_validation.json) | The kernel is stronger against malformed or gameable inputs than before. |
|
|
56
|
+
| On a matched internal comparison corpus, kernel artifacts outperform both free-form and generic checklist artifacts on structural scoring. | A | [docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md](./ORP_REASONING_KERNEL_COMPARISON_PILOT.md), [docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json](./benchmarks/orp_reasoning_kernel_comparison_v0_1.json), [scripts/orp-kernel-comparison.py](../scripts/orp-kernel-comparison.py) | ORP now has direct comparative evidence for structural artifact quality on a matched internal corpus, not only rationale. |
|
|
57
|
+
| On a matched internal pickup proxy, kernel artifacts preserve more explicit handoff-critical information than both free-form and generic checklist artifacts. | A | [docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md](./ORP_REASONING_KERNEL_PICKUP_PILOT.md), [docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json](./benchmarks/orp_reasoning_kernel_pickup_v0_1.json), [scripts/orp-kernel-pickup.py](../scripts/orp-kernel-pickup.py) | ORP now has a second comparative signal showing that kernel structure turns into more explicit pickup value, not just fuller-looking artifacts. |
|
|
58
|
+
| On a matched live Codex recoverability simulation, kernel artifacts preserve full required-field recoverability, outperform free-form artifacts on all matched cases, and outperform generic checklist artifacts on average without per-case losses. | A | [docs/ORP_REASONING_KERNEL_AGENT_PILOT.md](./ORP_REASONING_KERNEL_AGENT_PILOT.md), [docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json](./benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json), [scripts/orp-kernel-agent-pilot.py](../scripts/orp-kernel-agent-pilot.py) | ORP now has direct in-environment agent evidence that the kernel’s structural advantage survives contact with a real fresh downstream Codex session. |
|
|
59
|
+
| On a `10`-repeat full-corpus live Codex replication pilot, the kernel’s recoverability advantage stays stable across fresh-session reruns, with zero invention, no run-level losses, and perfect per-field stability on required kernel fields. | A | [docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md](./ORP_REASONING_KERNEL_AGENT_REPLICATION.md), [docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json](./benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json), [scripts/orp-kernel-agent-replication.py](../scripts/orp-kernel-agent-replication.py) | ORP now has stronger repeatability evidence that the live agent result is not just a single-run artifact and that the structural advantage survives at field level, not only in aggregate means. |
|
|
60
|
+
| On a matched full-corpus live continuation pilot, kernel artifacts support the strongest continuation score, never underperform the generic checklist baseline, and keep invention at zero. | A | [docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md](./ORP_REASONING_KERNEL_CONTINUATION_PILOT.md), [docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json](./benchmarks/orp_reasoning_kernel_continuation_v0_1.json), [scripts/orp-kernel-continuation-pilot.py](../scripts/orp-kernel-continuation-pilot.py) | ORP now has direct agent-first evidence that kernel artifacts are not only recoverable, but also a safe and effective base for downstream continuation. |
|
|
61
|
+
| On a harder matched full-corpus canonical continuation pilot, kernel artifacts beat free-form on every case, beat checklist on average, and keep the lowest invention rate while revealing checklist as a real competitive baseline on some cases. | A | [docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md](./ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md), [docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json](./benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json), [scripts/orp-kernel-canonical-continuation.py](../scripts/orp-kernel-canonical-continuation.py) | ORP now has a stricter downstream-agent benchmark where the task is not merely “continue safely,” but “produce the next canonical artifact” without inventing unsupported structure. |
|
|
62
62
|
|
|
63
63
|
## What Is Strong But Not Fully Sealed
|
|
64
64
|
|
|
@@ -28,8 +28,8 @@ The current core kernel remains the canonical source of truth for:
|
|
|
28
28
|
|
|
29
29
|
Those semantics live in:
|
|
30
30
|
|
|
31
|
-
- [spec/v1/kernel.schema.json](
|
|
32
|
-
- [cli/orp.py](
|
|
31
|
+
- [spec/v1/kernel.schema.json](../spec/v1/kernel.schema.json)
|
|
32
|
+
- [cli/orp.py](../cli/orp.py)
|
|
33
33
|
|
|
34
34
|
The kernel should not self-mutate from a single chat, a single agent guess, or
|
|
35
35
|
one repo’s habits.
|
|
@@ -74,7 +74,7 @@ Use it for changes like:
|
|
|
74
74
|
|
|
75
75
|
Proposal shape is governed by:
|
|
76
76
|
|
|
77
|
-
- [spec/v1/kernel-proposal.schema.json](
|
|
77
|
+
- [spec/v1/kernel-proposal.schema.json](../spec/v1/kernel-proposal.schema.json)
|
|
78
78
|
|
|
79
79
|
### `orp kernel migrate`
|
|
80
80
|
|
|
@@ -93,7 +93,7 @@ It should begin as an extension or proposal before becoming universal.
|
|
|
93
93
|
|
|
94
94
|
Extension shape is defined in:
|
|
95
95
|
|
|
96
|
-
- [spec/v1/kernel-extension.schema.json](
|
|
96
|
+
- [spec/v1/kernel-extension.schema.json](../spec/v1/kernel-extension.schema.json)
|
|
97
97
|
|
|
98
98
|
That gives ORP a place to trial domain-specific structure without forcing it
|
|
99
99
|
into every project prematurely.
|
|
@@ -5,12 +5,12 @@ Reasoning Kernel.
|
|
|
5
5
|
|
|
6
6
|
Supporting artifact:
|
|
7
7
|
|
|
8
|
-
- [docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json](
|
|
8
|
+
- [docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json](./benchmarks/orp_reasoning_kernel_pickup_v0_1.json)
|
|
9
9
|
|
|
10
10
|
Supporting corpus and harness:
|
|
11
11
|
|
|
12
|
-
- [examples/kernel/comparison/comparison-corpus.json](
|
|
13
|
-
- [scripts/orp-kernel-pickup.py](
|
|
12
|
+
- [examples/kernel/comparison/comparison-corpus.json](../examples/kernel/comparison/comparison-corpus.json)
|
|
13
|
+
- [scripts/orp-kernel-pickup.py](../scripts/orp-kernel-pickup.py)
|
|
14
14
|
|
|
15
15
|
## What This Pilot Measures
|
|
16
16
|
|
|
@@ -96,7 +96,7 @@ methodology.
|
|
|
96
96
|
It is stronger evidence than a rationale-only claim, but it remains an
|
|
97
97
|
internal, deterministic proxy. The next step after this is still a live
|
|
98
98
|
human/agent pickup study as described in
|
|
99
|
-
[docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md](
|
|
99
|
+
[docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md](./ORP_REASONING_KERNEL_EVALUATION_PLAN.md).
|
|
100
100
|
|
|
101
101
|
## Bottom Line
|
|
102
102
|
|