open-research-protocol 0.4.7 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/cli/orp.py +1158 -43
- package/docs/AGENT_LOOP.md +3 -0
- package/docs/ORP_REASONING_KERNEL_AGENT_PILOT.md +125 -0
- package/docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md +97 -0
- package/docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md +100 -0
- package/docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md +116 -0
- package/docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md +86 -0
- package/docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md +261 -0
- package/docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md +131 -0
- package/docs/ORP_REASONING_KERNEL_EVOLUTION.md +123 -0
- package/docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md +107 -0
- package/docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md +140 -22
- package/docs/ORP_REASONING_KERNEL_V0_1.md +11 -0
- package/docs/ORP_YOUTUBE_INSPECT.md +97 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json +796 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_task_smoke.json +487 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_1.json +1927 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json +10217 -0
- package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_task_smoke.json +174 -0
- package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json +598 -0
- package/docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json +688 -0
- package/docs/benchmarks/orp_reasoning_kernel_continuation_task_smoke.json +150 -0
- package/docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json +448 -0
- package/docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json +594 -0
- package/docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json +769 -41
- package/examples/README.md +2 -0
- package/examples/kernel/comparison/comparison-corpus.json +337 -0
- package/examples/kernel/comparison/next-task-continuation.json +55 -0
- package/examples/kernel/corpus/operations/habanero-routing.checkpoint.kernel.yml +12 -0
- package/examples/kernel/corpus/operations/runner-routing.policy.kernel.yml +9 -0
- package/examples/kernel/corpus/product/project-home.decision.kernel.yml +11 -0
- package/examples/kernel/corpus/research/kernel-handoff.experiment.kernel.yml +16 -0
- package/examples/kernel/corpus/research/lane-drift.hypothesis.kernel.yml +11 -0
- package/examples/kernel/corpus/software/trace-widget.task.kernel.yml +13 -0
- package/examples/kernel/corpus/writing/kernel-launch.result.kernel.yml +12 -0
- package/llms.txt +3 -0
- package/package.json +4 -1
- package/scripts/orp-kernel-agent-pilot.py +673 -0
- package/scripts/orp-kernel-agent-replication.py +307 -0
- package/scripts/orp-kernel-benchmark.py +471 -2
- package/scripts/orp-kernel-canonical-continuation.py +381 -0
- package/scripts/orp-kernel-ci-check.py +138 -0
- package/scripts/orp-kernel-comparison.py +592 -0
- package/scripts/orp-kernel-continuation-pilot.py +384 -0
- package/scripts/orp-kernel-pickup.py +401 -0
- package/spec/v1/kernel-extension.schema.json +96 -0
- package/spec/v1/kernel-proposal.schema.json +115 -0
- package/spec/v1/kernel.schema.json +2 -1
- package/spec/v1/youtube-source.schema.json +151 -0
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import platform
|
|
8
|
+
import re
|
|
9
|
+
import subprocess
|
|
10
|
+
import sys
|
|
11
|
+
import time
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
16
|
+
COMPARISON_CORPUS = REPO_ROOT / "examples" / "kernel" / "comparison" / "comparison-corpus.json"
|
|
17
|
+
CONDITIONS = ["freeform", "generic_checklist", "kernel"]
|
|
18
|
+
|
|
19
|
+
PICKUP_FIELDS: dict[str, list[str]] = {
|
|
20
|
+
"task": ["object", "constraints", "success_criteria"],
|
|
21
|
+
"decision": ["question", "chosen_path", "consequences"],
|
|
22
|
+
"hypothesis": ["claim", "boundary", "test_path"],
|
|
23
|
+
"experiment": ["objective", "method", "outputs"],
|
|
24
|
+
"checkpoint": ["current_state", "risks", "next_handoff_target"],
|
|
25
|
+
"policy": ["scope", "rule", "enforcement_surface"],
|
|
26
|
+
"result": ["claim", "status", "next_follow_up"],
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
FREEFORM_LABEL_ALIASES: dict[str, set[str]] = {
|
|
30
|
+
"object": {"object"},
|
|
31
|
+
"constraints": {"constraints", "constraint"},
|
|
32
|
+
"success_criteria": {"success criteria", "success", "done when"},
|
|
33
|
+
"question": {"question"},
|
|
34
|
+
"chosen_path": {"decision", "chosen path", "recommendation"},
|
|
35
|
+
"consequences": {"consequences", "tradeoffs", "trade-offs"},
|
|
36
|
+
"claim": {"claim"},
|
|
37
|
+
"boundary": {"boundary", "scope"},
|
|
38
|
+
"test_path": {"test", "test path"},
|
|
39
|
+
"objective": {"objective"},
|
|
40
|
+
"method": {"method"},
|
|
41
|
+
"outputs": {"outputs", "evidence"},
|
|
42
|
+
"current_state": {"current state"},
|
|
43
|
+
"risks": {"risks", "risk"},
|
|
44
|
+
"next_handoff_target": {"next", "next handoff target", "handoff"},
|
|
45
|
+
"scope": {"scope"},
|
|
46
|
+
"rule": {"rule"},
|
|
47
|
+
"enforcement_surface": {"enforcement", "enforcement surface"},
|
|
48
|
+
"status": {"status"},
|
|
49
|
+
"next_follow_up": {"next follow up", "next follow-up", "next"},
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
CHECKLIST_FIELD_MAP: dict[str, dict[str, str]] = {
|
|
53
|
+
"task": {
|
|
54
|
+
"object": "summary",
|
|
55
|
+
"constraints": "constraints",
|
|
56
|
+
"success_criteria": "checks",
|
|
57
|
+
},
|
|
58
|
+
"decision": {
|
|
59
|
+
"question": "summary",
|
|
60
|
+
"chosen_path": "approach",
|
|
61
|
+
"consequences": "risks",
|
|
62
|
+
},
|
|
63
|
+
"hypothesis": {
|
|
64
|
+
"claim": "summary",
|
|
65
|
+
"boundary": "scope",
|
|
66
|
+
"test_path": "checks",
|
|
67
|
+
},
|
|
68
|
+
"experiment": {
|
|
69
|
+
"objective": "summary",
|
|
70
|
+
"method": "approach",
|
|
71
|
+
"outputs": "checks",
|
|
72
|
+
},
|
|
73
|
+
"checkpoint": {
|
|
74
|
+
"current_state": "notes",
|
|
75
|
+
"risks": "risks",
|
|
76
|
+
"next_handoff_target": "handoff",
|
|
77
|
+
},
|
|
78
|
+
"policy": {
|
|
79
|
+
"scope": "scope",
|
|
80
|
+
"rule": "summary",
|
|
81
|
+
"enforcement_surface": "checks",
|
|
82
|
+
},
|
|
83
|
+
"result": {
|
|
84
|
+
"claim": "summary",
|
|
85
|
+
"status": "checks",
|
|
86
|
+
"next_follow_up": "handoff",
|
|
87
|
+
},
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
CHECKLIST_WEIGHTS: dict[str, float] = {
|
|
91
|
+
"summary": 0.65,
|
|
92
|
+
"scope": 0.85,
|
|
93
|
+
"constraints": 0.85,
|
|
94
|
+
"approach": 0.75,
|
|
95
|
+
"checks": 0.75,
|
|
96
|
+
"risks": 0.75,
|
|
97
|
+
"handoff": 0.9,
|
|
98
|
+
"notes": 0.6,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
FREEFORM_WEIGHT = 0.5
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _run(args: list[str]) -> subprocess.CompletedProcess[str]:
|
|
105
|
+
return subprocess.run(args, cwd=str(REPO_ROOT), capture_output=True, text=True, check=True)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _read_json(path: Path) -> Any:
|
|
109
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _load_cases() -> list[dict[str, Any]]:
|
|
113
|
+
payload = _read_json(COMPARISON_CORPUS)
|
|
114
|
+
cases = payload.get("cases", [])
|
|
115
|
+
if not isinstance(cases, list) or not cases:
|
|
116
|
+
raise RuntimeError(f"comparison corpus has no cases: {COMPARISON_CORPUS}")
|
|
117
|
+
return cases
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _normalize_label(value: str) -> str:
|
|
121
|
+
return re.sub(r"\s+", " ", value.strip().lower())
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _extract_freeform_answers(body: str) -> dict[str, str]:
|
|
125
|
+
answers: dict[str, str] = {}
|
|
126
|
+
for raw_line in body.splitlines():
|
|
127
|
+
line = raw_line.strip()
|
|
128
|
+
if not line:
|
|
129
|
+
continue
|
|
130
|
+
match = re.match(r"^[#>*\-\s]*([A-Za-z][A-Za-z \-_/]+):\s*(.+?)\s*$", raw_line)
|
|
131
|
+
if not match:
|
|
132
|
+
continue
|
|
133
|
+
label = _normalize_label(match.group(1))
|
|
134
|
+
value = match.group(2).strip()
|
|
135
|
+
if not value:
|
|
136
|
+
continue
|
|
137
|
+
for field, aliases in FREEFORM_LABEL_ALIASES.items():
|
|
138
|
+
if label in aliases:
|
|
139
|
+
answers[field] = value
|
|
140
|
+
return answers
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _value_present(value: Any) -> bool:
|
|
144
|
+
if isinstance(value, str):
|
|
145
|
+
return bool(value.strip())
|
|
146
|
+
if isinstance(value, list):
|
|
147
|
+
if not value:
|
|
148
|
+
return False
|
|
149
|
+
return all(isinstance(item, str) and item.strip() for item in value)
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _display_value(value: Any) -> str | None:
|
|
154
|
+
if isinstance(value, str):
|
|
155
|
+
return value.strip() or None
|
|
156
|
+
if isinstance(value, list):
|
|
157
|
+
items = [item.strip() for item in value if isinstance(item, str) and item.strip()]
|
|
158
|
+
return "; ".join(items) if items else None
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _score_condition(case: dict[str, Any], condition: str) -> dict[str, Any]:
|
|
163
|
+
artifact_class = case["artifact_class"]
|
|
164
|
+
targets = PICKUP_FIELDS[artifact_class]
|
|
165
|
+
answers: dict[str, str | None] = {}
|
|
166
|
+
scores: dict[str, float] = {}
|
|
167
|
+
|
|
168
|
+
if condition == "kernel":
|
|
169
|
+
source = case["kernel_artifact"]
|
|
170
|
+
for field in targets:
|
|
171
|
+
value = source.get(field)
|
|
172
|
+
answers[field] = _display_value(value)
|
|
173
|
+
scores[field] = 1.0 if _value_present(value) else 0.0
|
|
174
|
+
elif condition == "generic_checklist":
|
|
175
|
+
source = case["generic_checklist"]
|
|
176
|
+
field_map = CHECKLIST_FIELD_MAP[artifact_class]
|
|
177
|
+
for field in targets:
|
|
178
|
+
source_field = field_map[field]
|
|
179
|
+
value = source.get(source_field)
|
|
180
|
+
answers[field] = _display_value(value)
|
|
181
|
+
scores[field] = CHECKLIST_WEIGHTS[source_field] if _value_present(value) else 0.0
|
|
182
|
+
elif condition == "freeform":
|
|
183
|
+
source = _extract_freeform_answers(case["freeform_markdown"])
|
|
184
|
+
for field in targets:
|
|
185
|
+
value = source.get(field)
|
|
186
|
+
answers[field] = value
|
|
187
|
+
scores[field] = FREEFORM_WEIGHT if value else 0.0
|
|
188
|
+
else:
|
|
189
|
+
raise RuntimeError(f"unsupported condition: {condition}")
|
|
190
|
+
|
|
191
|
+
mean_score = round(sum(scores.values()) / len(targets), 3)
|
|
192
|
+
answered = sum(1 for value in answers.values() if value)
|
|
193
|
+
return {
|
|
194
|
+
"condition": condition,
|
|
195
|
+
"artifact_class": artifact_class,
|
|
196
|
+
"pickup_targets": targets,
|
|
197
|
+
"answers": answers,
|
|
198
|
+
"answer_scores": {field: round(score, 3) for field, score in scores.items()},
|
|
199
|
+
"answered_targets": answered,
|
|
200
|
+
"pickup_score": mean_score,
|
|
201
|
+
"ambiguity_remaining": round(1.0 - mean_score, 3),
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _score_case(case: dict[str, Any]) -> dict[str, Any]:
|
|
206
|
+
return {
|
|
207
|
+
"id": case["id"],
|
|
208
|
+
"domain": case["domain"],
|
|
209
|
+
"artifact_class": case["artifact_class"],
|
|
210
|
+
"prompt": case["prompt"],
|
|
211
|
+
"conditions": {
|
|
212
|
+
condition: _score_condition(case, condition)
|
|
213
|
+
for condition in CONDITIONS
|
|
214
|
+
},
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _mean(values: list[float]) -> float:
|
|
219
|
+
return round(sum(values) / len(values), 3) if values else 0.0
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _aggregate(cases: list[dict[str, Any]], condition: str) -> dict[str, Any]:
|
|
223
|
+
rows: list[dict[str, Any]] = []
|
|
224
|
+
pickup_scores: list[float] = []
|
|
225
|
+
ambiguity: list[float] = []
|
|
226
|
+
answered_rates: list[float] = []
|
|
227
|
+
for case in cases:
|
|
228
|
+
row = case["conditions"][condition]
|
|
229
|
+
target_total = len(row["pickup_targets"])
|
|
230
|
+
rows.append(
|
|
231
|
+
{
|
|
232
|
+
"id": case["id"],
|
|
233
|
+
"domain": case["domain"],
|
|
234
|
+
"artifact_class": case["artifact_class"],
|
|
235
|
+
"pickup_score": row["pickup_score"],
|
|
236
|
+
"ambiguity_remaining": row["ambiguity_remaining"],
|
|
237
|
+
"answered_targets": row["answered_targets"],
|
|
238
|
+
"pickup_targets_total": target_total,
|
|
239
|
+
"answers": row["answers"],
|
|
240
|
+
}
|
|
241
|
+
)
|
|
242
|
+
pickup_scores.append(row["pickup_score"])
|
|
243
|
+
ambiguity.append(row["ambiguity_remaining"])
|
|
244
|
+
answered_rates.append(row["answered_targets"] / target_total)
|
|
245
|
+
return {
|
|
246
|
+
"condition": condition,
|
|
247
|
+
"cases_total": len(rows),
|
|
248
|
+
"rows": rows,
|
|
249
|
+
"mean_pickup_score": _mean(pickup_scores),
|
|
250
|
+
"mean_ambiguity_remaining": _mean(ambiguity),
|
|
251
|
+
"mean_answered_target_rate": _mean(answered_rates),
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _pairwise(cases: list[dict[str, Any]], left: str, right: str) -> dict[str, Any]:
|
|
256
|
+
wins = 0
|
|
257
|
+
ties = 0
|
|
258
|
+
losses = 0
|
|
259
|
+
deltas: list[float] = []
|
|
260
|
+
by_case: list[dict[str, Any]] = []
|
|
261
|
+
for case in cases:
|
|
262
|
+
left_score = case["conditions"][left]["pickup_score"]
|
|
263
|
+
right_score = case["conditions"][right]["pickup_score"]
|
|
264
|
+
delta = round(left_score - right_score, 3)
|
|
265
|
+
deltas.append(delta)
|
|
266
|
+
if delta > 0:
|
|
267
|
+
wins += 1
|
|
268
|
+
outcome = "win"
|
|
269
|
+
elif delta < 0:
|
|
270
|
+
losses += 1
|
|
271
|
+
outcome = "loss"
|
|
272
|
+
else:
|
|
273
|
+
ties += 1
|
|
274
|
+
outcome = "tie"
|
|
275
|
+
by_case.append(
|
|
276
|
+
{
|
|
277
|
+
"id": case["id"],
|
|
278
|
+
"domain": case["domain"],
|
|
279
|
+
"artifact_class": case["artifact_class"],
|
|
280
|
+
"left_score": left_score,
|
|
281
|
+
"right_score": right_score,
|
|
282
|
+
"delta": delta,
|
|
283
|
+
"outcome": outcome,
|
|
284
|
+
}
|
|
285
|
+
)
|
|
286
|
+
return {
|
|
287
|
+
"left": left,
|
|
288
|
+
"right": right,
|
|
289
|
+
"wins": wins,
|
|
290
|
+
"ties": ties,
|
|
291
|
+
"losses": losses,
|
|
292
|
+
"mean_pickup_score_delta": _mean(deltas),
|
|
293
|
+
"by_case": by_case,
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _gather_metadata() -> dict[str, Any]:
|
|
298
|
+
package_version = _read_json(REPO_ROOT / "package.json")["version"]
|
|
299
|
+
commit = _run(["git", "rev-parse", "HEAD"]).stdout.strip()
|
|
300
|
+
branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"]).stdout.strip()
|
|
301
|
+
node_version = _run(["node", "--version"]).stdout.strip()
|
|
302
|
+
return {
|
|
303
|
+
"generated_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
304
|
+
"repo_commit": commit,
|
|
305
|
+
"repo_branch": branch,
|
|
306
|
+
"package_version": package_version,
|
|
307
|
+
"python_version": sys.version.split()[0],
|
|
308
|
+
"node_version": node_version,
|
|
309
|
+
"platform": platform.platform(),
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def build_report() -> dict[str, Any]:
|
|
314
|
+
cases = [_score_case(case) for case in _load_cases()]
|
|
315
|
+
conditions = {condition: _aggregate(cases, condition) for condition in CONDITIONS}
|
|
316
|
+
pairwise = {
|
|
317
|
+
"kernel_vs_generic_checklist": _pairwise(cases, "kernel", "generic_checklist"),
|
|
318
|
+
"kernel_vs_freeform": _pairwise(cases, "kernel", "freeform"),
|
|
319
|
+
"generic_checklist_vs_freeform": _pairwise(cases, "generic_checklist", "freeform"),
|
|
320
|
+
}
|
|
321
|
+
claims = [
|
|
322
|
+
{
|
|
323
|
+
"id": "matched_pickup_corpus_exists",
|
|
324
|
+
"claim": "ORP has a matched internal pickup corpus spanning all seven kernel artifact classes.",
|
|
325
|
+
"status": "pass" if len(cases) >= 7 else "fail",
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
"id": "kernel_outscores_generic_checklist_on_pickup_proxy",
|
|
329
|
+
"claim": "On the matched internal pickup proxy, kernel artifacts preserve more explicit pickup-ready information than generic checklist artifacts.",
|
|
330
|
+
"status": "pass"
|
|
331
|
+
if conditions["kernel"]["mean_pickup_score"] > conditions["generic_checklist"]["mean_pickup_score"]
|
|
332
|
+
and pairwise["kernel_vs_generic_checklist"]["losses"] == 0
|
|
333
|
+
else "fail",
|
|
334
|
+
},
|
|
335
|
+
{
|
|
336
|
+
"id": "kernel_outscores_freeform_on_pickup_proxy",
|
|
337
|
+
"claim": "On the matched internal pickup proxy, kernel artifacts preserve more explicit pickup-ready information than free-form artifacts.",
|
|
338
|
+
"status": "pass"
|
|
339
|
+
if conditions["kernel"]["mean_pickup_score"] > conditions["freeform"]["mean_pickup_score"]
|
|
340
|
+
and pairwise["kernel_vs_freeform"]["losses"] == 0
|
|
341
|
+
else "fail",
|
|
342
|
+
},
|
|
343
|
+
{
|
|
344
|
+
"id": "generic_checklist_improves_on_freeform_on_pickup_proxy",
|
|
345
|
+
"claim": "On the matched internal pickup proxy, a generic checklist preserves more explicit pickup-ready information than free-form artifacts.",
|
|
346
|
+
"status": "pass"
|
|
347
|
+
if conditions["generic_checklist"]["mean_pickup_score"] > conditions["freeform"]["mean_pickup_score"]
|
|
348
|
+
and pairwise["generic_checklist_vs_freeform"]["losses"] == 0
|
|
349
|
+
else "fail",
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
"id": "kernel_preserves_full_pickup_targets",
|
|
353
|
+
"claim": "On the matched internal pickup proxy, kernel artifacts keep all pickup targets explicitly answerable.",
|
|
354
|
+
"status": "pass"
|
|
355
|
+
if conditions["kernel"]["mean_pickup_score"] == 1.0
|
|
356
|
+
and conditions["kernel"]["mean_answered_target_rate"] == 1.0
|
|
357
|
+
else "fail",
|
|
358
|
+
},
|
|
359
|
+
]
|
|
360
|
+
|
|
361
|
+
return {
|
|
362
|
+
"schema_version": "1.0.0",
|
|
363
|
+
"kind": "orp_reasoning_kernel_pickup_report",
|
|
364
|
+
"metadata": _gather_metadata(),
|
|
365
|
+
"corpus": {
|
|
366
|
+
"source": str(COMPARISON_CORPUS.relative_to(REPO_ROOT)),
|
|
367
|
+
"cases_total": len(cases),
|
|
368
|
+
},
|
|
369
|
+
"conditions": conditions,
|
|
370
|
+
"pairwise": pairwise,
|
|
371
|
+
"claims": claims,
|
|
372
|
+
"summary": {
|
|
373
|
+
"all_claims_pass": all(claim["status"] == "pass" for claim in claims),
|
|
374
|
+
"kernel_mean_pickup_score": conditions["kernel"]["mean_pickup_score"],
|
|
375
|
+
"generic_checklist_mean_pickup_score": conditions["generic_checklist"]["mean_pickup_score"],
|
|
376
|
+
"freeform_mean_pickup_score": conditions["freeform"]["mean_pickup_score"],
|
|
377
|
+
},
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def main() -> int:
|
|
382
|
+
parser = argparse.ArgumentParser(
|
|
383
|
+
description="Run an explicit pickup/handoff proxy over free-form, generic checklist, and kernel artifacts."
|
|
384
|
+
)
|
|
385
|
+
parser.add_argument("--out", default="", help="Optional JSON output path")
|
|
386
|
+
args = parser.parse_args()
|
|
387
|
+
|
|
388
|
+
report = build_report()
|
|
389
|
+
payload = json.dumps(report, indent=2) + "\n"
|
|
390
|
+
if args.out:
|
|
391
|
+
out_path = Path(args.out)
|
|
392
|
+
if not out_path.is_absolute():
|
|
393
|
+
out_path = REPO_ROOT / out_path
|
|
394
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
395
|
+
out_path.write_text(payload, encoding="utf-8")
|
|
396
|
+
print(payload, end="")
|
|
397
|
+
return 0 if report["summary"]["all_claims_pass"] else 1
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
if __name__ == "__main__":
|
|
401
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://openresearchprotocol.com/spec/v1/kernel-extension.schema.json",
|
|
4
|
+
"title": "ORP Reasoning Kernel Extension",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": [
|
|
8
|
+
"schema_version",
|
|
9
|
+
"extension_id",
|
|
10
|
+
"title",
|
|
11
|
+
"status",
|
|
12
|
+
"description",
|
|
13
|
+
"fields"
|
|
14
|
+
],
|
|
15
|
+
"properties": {
|
|
16
|
+
"schema_version": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"const": "1.0.0"
|
|
19
|
+
},
|
|
20
|
+
"extension_id": {
|
|
21
|
+
"$ref": "#/$defs/non_empty_text"
|
|
22
|
+
},
|
|
23
|
+
"title": {
|
|
24
|
+
"$ref": "#/$defs/non_empty_text"
|
|
25
|
+
},
|
|
26
|
+
"status": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"enum": [
|
|
29
|
+
"draft",
|
|
30
|
+
"experimental",
|
|
31
|
+
"active",
|
|
32
|
+
"retired"
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
"description": {
|
|
36
|
+
"$ref": "#/$defs/non_empty_text"
|
|
37
|
+
},
|
|
38
|
+
"fields": {
|
|
39
|
+
"type": "array",
|
|
40
|
+
"minItems": 1,
|
|
41
|
+
"items": {
|
|
42
|
+
"type": "object",
|
|
43
|
+
"additionalProperties": false,
|
|
44
|
+
"required": [
|
|
45
|
+
"name",
|
|
46
|
+
"kind",
|
|
47
|
+
"artifact_classes",
|
|
48
|
+
"rationale"
|
|
49
|
+
],
|
|
50
|
+
"properties": {
|
|
51
|
+
"name": {
|
|
52
|
+
"$ref": "#/$defs/non_empty_text"
|
|
53
|
+
},
|
|
54
|
+
"kind": {
|
|
55
|
+
"type": "string",
|
|
56
|
+
"enum": [
|
|
57
|
+
"non_empty_text",
|
|
58
|
+
"text_list",
|
|
59
|
+
"text_or_text_list"
|
|
60
|
+
]
|
|
61
|
+
},
|
|
62
|
+
"artifact_classes": {
|
|
63
|
+
"type": "array",
|
|
64
|
+
"minItems": 1,
|
|
65
|
+
"items": {
|
|
66
|
+
"type": "string",
|
|
67
|
+
"enum": [
|
|
68
|
+
"task",
|
|
69
|
+
"decision",
|
|
70
|
+
"hypothesis",
|
|
71
|
+
"experiment",
|
|
72
|
+
"checkpoint",
|
|
73
|
+
"policy",
|
|
74
|
+
"result"
|
|
75
|
+
]
|
|
76
|
+
},
|
|
77
|
+
"uniqueItems": true
|
|
78
|
+
},
|
|
79
|
+
"required": {
|
|
80
|
+
"type": "boolean",
|
|
81
|
+
"default": false
|
|
82
|
+
},
|
|
83
|
+
"rationale": {
|
|
84
|
+
"$ref": "#/$defs/non_empty_text"
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"$defs": {
|
|
91
|
+
"non_empty_text": {
|
|
92
|
+
"type": "string",
|
|
93
|
+
"minLength": 1
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://openresearchprotocol.com/spec/v1/kernel-proposal.schema.json",
|
|
4
|
+
"title": "ORP Reasoning Kernel Proposal",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": [
|
|
8
|
+
"schema_version",
|
|
9
|
+
"proposal_kind",
|
|
10
|
+
"title",
|
|
11
|
+
"status",
|
|
12
|
+
"summary",
|
|
13
|
+
"target_scope",
|
|
14
|
+
"proposed_change",
|
|
15
|
+
"rationale",
|
|
16
|
+
"evidence_refs",
|
|
17
|
+
"compatibility_notes",
|
|
18
|
+
"migration_plan",
|
|
19
|
+
"evaluation_plan"
|
|
20
|
+
],
|
|
21
|
+
"properties": {
|
|
22
|
+
"schema_version": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"const": "1.0.0"
|
|
25
|
+
},
|
|
26
|
+
"proposal_kind": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"enum": [
|
|
29
|
+
"add_field",
|
|
30
|
+
"new_class",
|
|
31
|
+
"requirement_change",
|
|
32
|
+
"deprecate_field"
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
"title": {
|
|
36
|
+
"$ref": "#/$defs/non_empty_text"
|
|
37
|
+
},
|
|
38
|
+
"status": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"enum": [
|
|
41
|
+
"draft",
|
|
42
|
+
"review",
|
|
43
|
+
"accepted",
|
|
44
|
+
"rejected"
|
|
45
|
+
]
|
|
46
|
+
},
|
|
47
|
+
"summary": {
|
|
48
|
+
"$ref": "#/$defs/non_empty_text"
|
|
49
|
+
},
|
|
50
|
+
"target_scope": {
|
|
51
|
+
"type": "object",
|
|
52
|
+
"additionalProperties": false,
|
|
53
|
+
"required": [
|
|
54
|
+
"artifact_classes",
|
|
55
|
+
"fields"
|
|
56
|
+
],
|
|
57
|
+
"properties": {
|
|
58
|
+
"artifact_classes": {
|
|
59
|
+
"type": "array",
|
|
60
|
+
"items": {
|
|
61
|
+
"type": "string",
|
|
62
|
+
"enum": [
|
|
63
|
+
"task",
|
|
64
|
+
"decision",
|
|
65
|
+
"hypothesis",
|
|
66
|
+
"experiment",
|
|
67
|
+
"checkpoint",
|
|
68
|
+
"policy",
|
|
69
|
+
"result"
|
|
70
|
+
]
|
|
71
|
+
},
|
|
72
|
+
"uniqueItems": true
|
|
73
|
+
},
|
|
74
|
+
"fields": {
|
|
75
|
+
"type": "array",
|
|
76
|
+
"items": {
|
|
77
|
+
"$ref": "#/$defs/non_empty_text"
|
|
78
|
+
},
|
|
79
|
+
"uniqueItems": true
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
"proposed_change": {
|
|
84
|
+
"$ref": "#/$defs/text_list"
|
|
85
|
+
},
|
|
86
|
+
"rationale": {
|
|
87
|
+
"$ref": "#/$defs/text_list"
|
|
88
|
+
},
|
|
89
|
+
"evidence_refs": {
|
|
90
|
+
"$ref": "#/$defs/text_list"
|
|
91
|
+
},
|
|
92
|
+
"compatibility_notes": {
|
|
93
|
+
"$ref": "#/$defs/text_list"
|
|
94
|
+
},
|
|
95
|
+
"migration_plan": {
|
|
96
|
+
"$ref": "#/$defs/text_list"
|
|
97
|
+
},
|
|
98
|
+
"evaluation_plan": {
|
|
99
|
+
"$ref": "#/$defs/text_list"
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
"$defs": {
|
|
103
|
+
"non_empty_text": {
|
|
104
|
+
"type": "string",
|
|
105
|
+
"minLength": 1
|
|
106
|
+
},
|
|
107
|
+
"text_list": {
|
|
108
|
+
"type": "array",
|
|
109
|
+
"minItems": 1,
|
|
110
|
+
"items": {
|
|
111
|
+
"$ref": "#/$defs/non_empty_text"
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|