open-research-protocol 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ version: "1"
2
+
3
+ project:
4
+ name: reasoning-kernel-starter
5
+ repo_root: .
6
+ canonical_paths:
7
+ code: src/
8
+ analysis: analysis/
9
+
10
+ lifecycle:
11
+ claim_status_map:
12
+ Draft: draft
13
+ In review: ready
14
+ Verified: reviewed
15
+ Blocked: blocked
16
+ Retracted: retracted
17
+ atom_status_map:
18
+ todo: draft
19
+ in_progress: ready
20
+ blocked: blocked
21
+ done: reviewed
22
+
23
+ gates:
24
+ - id: trace_widget_task_shape
25
+ description: Validate that the trace-widget request has been promoted into a solid task artifact.
26
+ phase: structure_kernel
27
+ command: echo ORP_KERNEL_OK
28
+ pass:
29
+ exit_codes: [0]
30
+ stdout_must_contain:
31
+ - ORP_KERNEL_OK
32
+ kernel:
33
+ mode: hard
34
+ artifacts:
35
+ - path: examples/kernel/trace-widget.task.kernel.yml
36
+ artifact_class: task
37
+ evidence:
38
+ status: process_only
39
+ note: The kernel artifact captures task structure for promotion, not evidence.
40
+ paths:
41
+ - examples/kernel/trace-widget.task.kernel.yml
42
+ on_fail: stop
43
+
44
+ - id: smoke
45
+ description: Basic starter smoke gate.
46
+ phase: verification
47
+ command: echo ORP_SMOKE
48
+ pass:
49
+ exit_codes: [0]
50
+ stdout_must_contain:
51
+ - ORP_SMOKE
52
+ on_fail: stop
53
+
54
+ profiles:
55
+ default:
56
+ description: Minimal kernel-aware starter profile.
57
+ mode: discovery
58
+ packet_kind: problem_scope
59
+ gate_ids:
60
+ - trace_widget_task_shape
61
+ - smoke
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-research-protocol",
3
- "version": "0.4.5",
3
+ "version": "0.4.7",
4
4
  "description": "ORP CLI (Open Research Protocol): agent-friendly research workflows, runtime, reports, and pack tooling.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -0,0 +1,452 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import json
6
+ from pathlib import Path
7
+ import platform
8
+ import statistics
9
+ import subprocess
10
+ import sys
11
+ import tempfile
12
+ import time
13
+ from typing import Any
14
+
15
+
16
+ REPO_ROOT = Path(__file__).resolve().parents[1]
17
+ CLI = ["node", "bin/orp.js"]
18
+ ARTIFACT_CLASSES = [
19
+ "task",
20
+ "decision",
21
+ "hypothesis",
22
+ "experiment",
23
+ "checkpoint",
24
+ "policy",
25
+ "result",
26
+ ]
27
+
28
+
29
+ def _run(
30
+ args: list[str],
31
+ *,
32
+ cwd: Path = REPO_ROOT,
33
+ check: bool = True,
34
+ ) -> subprocess.CompletedProcess[str]:
35
+ proc = subprocess.run(
36
+ args,
37
+ cwd=str(cwd),
38
+ capture_output=True,
39
+ text=True,
40
+ )
41
+ if check and proc.returncode != 0:
42
+ raise RuntimeError(
43
+ f"command failed: {' '.join(args)}\nstdout:\n{proc.stdout}\nstderr:\n{proc.stderr}"
44
+ )
45
+ return proc
46
+
47
+
48
+ def _run_orp(repo_root: Path, *args: str, check: bool = True) -> subprocess.CompletedProcess[str]:
49
+ return _run([*CLI, "--repo-root", str(repo_root), *args], check=check)
50
+
51
+
52
+ def _timed_orp(repo_root: Path, *args: str, check: bool = True) -> tuple[float, subprocess.CompletedProcess[str]]:
53
+ started = time.perf_counter()
54
+ proc = _run_orp(repo_root, *args, check=check)
55
+ return (time.perf_counter() - started) * 1000.0, proc
56
+
57
+
58
+ def _write_json(path: Path, payload: dict[str, Any]) -> None:
59
+ path.parent.mkdir(parents=True, exist_ok=True)
60
+ path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
61
+
62
+
63
+ def _stats(values: list[float]) -> dict[str, float]:
64
+ return {
65
+ "mean_ms": round(statistics.mean(values), 3),
66
+ "median_ms": round(statistics.median(values), 3),
67
+ "min_ms": round(min(values), 3),
68
+ "max_ms": round(max(values), 3),
69
+ }
70
+
71
+
72
+ def _benchmark_init_starter(iterations: int) -> dict[str, Any]:
73
+ init_times: list[float] = []
74
+ validate_times: list[float] = []
75
+ gate_times: list[float] = []
76
+ run_records: list[str] = []
77
+
78
+ for _ in range(iterations):
79
+ with tempfile.TemporaryDirectory(prefix="orp-kernel-bench-init.") as td:
80
+ root = Path(td)
81
+ _run(["git", "init", str(root)])
82
+ init_ms, init_proc = _timed_orp(root, "init", "--json")
83
+ init_payload = json.loads(init_proc.stdout)
84
+ validate_ms, validate_proc = _timed_orp(
85
+ root, "kernel", "validate", "analysis/orp.kernel.task.yml", "--json"
86
+ )
87
+ validate_payload = json.loads(validate_proc.stdout)
88
+ gate_ms, gate_proc = _timed_orp(root, "gate", "run", "--profile", "default", "--json")
89
+ gate_payload = json.loads(gate_proc.stdout)
90
+
91
+ if not init_payload.get("ok"):
92
+ raise RuntimeError("orp init benchmark did not report ok=true")
93
+ if not validate_payload.get("ok"):
94
+ raise RuntimeError("starter kernel validate benchmark did not report ok=true")
95
+ if gate_payload.get("overall") != "PASS":
96
+ raise RuntimeError("starter kernel gate benchmark did not pass")
97
+
98
+ init_times.append(init_ms)
99
+ validate_times.append(validate_ms)
100
+ gate_times.append(gate_ms)
101
+ run_records.append(gate_payload["run_record"])
102
+
103
+ targets = {
104
+ "init_mean_lt_ms": 350.0,
105
+ "validate_mean_lt_ms": 200.0,
106
+ "gate_mean_lt_ms": 300.0,
107
+ }
108
+ observed = {
109
+ "init": _stats(init_times),
110
+ "validate": _stats(validate_times),
111
+ "gate_run": _stats(gate_times),
112
+ }
113
+ return {
114
+ "iterations": iterations,
115
+ "observed": observed,
116
+ "targets": targets,
117
+ "meets_targets": {
118
+ "init": observed["init"]["mean_ms"] < targets["init_mean_lt_ms"],
119
+ "validate": observed["validate"]["mean_ms"] < targets["validate_mean_lt_ms"],
120
+ "gate_run": observed["gate_run"]["mean_ms"] < targets["gate_mean_lt_ms"],
121
+ },
122
+ "sample_run_records": run_records[:2],
123
+ }
124
+
125
+
126
+ def _benchmark_artifact_roundtrip() -> dict[str, Any]:
127
+ rows: list[dict[str, Any]] = []
128
+ scaffold_times: list[float] = []
129
+ validate_times: list[float] = []
130
+
131
+ for artifact_class in ARTIFACT_CLASSES:
132
+ with tempfile.TemporaryDirectory(prefix=f"orp-kernel-bench-{artifact_class}.") as td:
133
+ root = Path(td)
134
+ path = f"analysis/{artifact_class}.kernel.yml"
135
+ scaffold_ms, scaffold_proc = _timed_orp(
136
+ root,
137
+ "kernel",
138
+ "scaffold",
139
+ "--artifact-class",
140
+ artifact_class,
141
+ "--out",
142
+ path,
143
+ "--name",
144
+ f"{artifact_class} benchmark",
145
+ "--json",
146
+ )
147
+ validate_ms, validate_proc = _timed_orp(root, "kernel", "validate", path, "--json")
148
+ scaffold_payload = json.loads(scaffold_proc.stdout)
149
+ validate_payload = json.loads(validate_proc.stdout)
150
+ if not scaffold_payload.get("ok") or not validate_payload.get("ok"):
151
+ raise RuntimeError(f"roundtrip benchmark failed for artifact_class={artifact_class}")
152
+ scaffold_times.append(scaffold_ms)
153
+ validate_times.append(validate_ms)
154
+ rows.append(
155
+ {
156
+ "artifact_class": artifact_class,
157
+ "scaffold_ms": round(scaffold_ms, 3),
158
+ "validate_ms": round(validate_ms, 3),
159
+ }
160
+ )
161
+
162
+ observed = {
163
+ "scaffold": _stats(scaffold_times),
164
+ "validate": _stats(validate_times),
165
+ }
166
+ targets = {
167
+ "scaffold_mean_lt_ms": 200.0,
168
+ "validate_mean_lt_ms": 200.0,
169
+ }
170
+ return {
171
+ "artifact_classes_total": len(rows),
172
+ "rows": rows,
173
+ "observed": observed,
174
+ "targets": targets,
175
+ "meets_targets": {
176
+ "scaffold": observed["scaffold"]["mean_ms"] < targets["scaffold_mean_lt_ms"],
177
+ "validate": observed["validate"]["mean_ms"] < targets["validate_mean_lt_ms"],
178
+ },
179
+ }
180
+
181
+
182
+ def _benchmark_gate_modes() -> dict[str, Any]:
183
+ with tempfile.TemporaryDirectory(prefix="orp-kernel-bench-gates.") as td:
184
+ root = Path(td)
185
+ _write_json(
186
+ root / "analysis" / "invalid-task.kernel.json",
187
+ {
188
+ "schema_version": "1.0.0",
189
+ "artifact_class": "task",
190
+ "object": "terminal trace widget",
191
+ "goal": "surface lane state and drift",
192
+ "boundary": "terminal-first workflow",
193
+ },
194
+ )
195
+ _write_json(
196
+ root / "orp.kernel.bench.json",
197
+ {
198
+ "profiles": {
199
+ "hard": {
200
+ "description": "hard kernel gate",
201
+ "mode": "test",
202
+ "packet_kind": "problem_scope",
203
+ "gate_ids": ["kernel_hard"],
204
+ },
205
+ "soft": {
206
+ "description": "soft kernel gate",
207
+ "mode": "test",
208
+ "packet_kind": "problem_scope",
209
+ "gate_ids": ["kernel_soft"],
210
+ },
211
+ "legacy": {
212
+ "description": "legacy structure kernel gate",
213
+ "mode": "test",
214
+ "packet_kind": "problem_scope",
215
+ "gate_ids": ["kernel_legacy"],
216
+ },
217
+ },
218
+ "gates": [
219
+ {
220
+ "id": "kernel_hard",
221
+ "phase": "structure_kernel",
222
+ "command": "true",
223
+ "pass": {"exit_codes": [0]},
224
+ "kernel": {
225
+ "mode": "hard",
226
+ "artifacts": [
227
+ {
228
+ "path": "analysis/invalid-task.kernel.json",
229
+ "artifact_class": "task",
230
+ }
231
+ ],
232
+ },
233
+ },
234
+ {
235
+ "id": "kernel_soft",
236
+ "phase": "structure_kernel",
237
+ "command": "true",
238
+ "pass": {"exit_codes": [0]},
239
+ "kernel": {
240
+ "mode": "soft",
241
+ "artifacts": [
242
+ {
243
+ "path": "analysis/invalid-task.kernel.json",
244
+ "artifact_class": "task",
245
+ }
246
+ ],
247
+ },
248
+ },
249
+ {
250
+ "id": "kernel_legacy",
251
+ "phase": "structure_kernel",
252
+ "command": "true",
253
+ "pass": {"exit_codes": [0]},
254
+ },
255
+ ],
256
+ },
257
+ )
258
+
259
+ hard_ms, hard_proc = _timed_orp(
260
+ root,
261
+ "--config",
262
+ "orp.kernel.bench.json",
263
+ "gate",
264
+ "run",
265
+ "--profile",
266
+ "hard",
267
+ "--json",
268
+ check=False,
269
+ )
270
+ soft_ms, soft_proc = _timed_orp(
271
+ root,
272
+ "--config",
273
+ "orp.kernel.bench.json",
274
+ "gate",
275
+ "run",
276
+ "--profile",
277
+ "soft",
278
+ "--json",
279
+ )
280
+ legacy_ms, legacy_proc = _timed_orp(
281
+ root,
282
+ "--config",
283
+ "orp.kernel.bench.json",
284
+ "gate",
285
+ "run",
286
+ "--profile",
287
+ "legacy",
288
+ "--json",
289
+ )
290
+
291
+ hard_payload = json.loads(hard_proc.stdout)
292
+ soft_payload = json.loads(soft_proc.stdout)
293
+ legacy_payload = json.loads(legacy_proc.stdout)
294
+
295
+ hard_result = json.loads((root / hard_payload["run_record"]).read_text(encoding="utf-8"))["results"][0]
296
+ soft_result = json.loads((root / soft_payload["run_record"]).read_text(encoding="utf-8"))["results"][0]
297
+ legacy_result = json.loads((root / legacy_payload["run_record"]).read_text(encoding="utf-8"))["results"][0]
298
+
299
+ return {
300
+ "hard_mode": {
301
+ "ms": round(hard_ms, 3),
302
+ "exit_code": hard_proc.returncode,
303
+ "overall": hard_payload["overall"],
304
+ "kernel_valid": hard_result["kernel_validation"]["valid"],
305
+ "missing_fields": hard_result["kernel_validation"]["artifacts"][0]["missing_fields"],
306
+ },
307
+ "soft_mode": {
308
+ "ms": round(soft_ms, 3),
309
+ "exit_code": soft_proc.returncode,
310
+ "overall": soft_payload["overall"],
311
+ "kernel_valid": soft_result["kernel_validation"]["valid"],
312
+ },
313
+ "legacy_compatibility": {
314
+ "ms": round(legacy_ms, 3),
315
+ "exit_code": legacy_proc.returncode,
316
+ "overall": legacy_payload["overall"],
317
+ "has_kernel_validation": "kernel_validation" in legacy_result,
318
+ },
319
+ "meets_expectations": {
320
+ "hard_blocks_invalid_artifact": hard_proc.returncode == 1
321
+ and hard_payload["overall"] == "FAIL"
322
+ and hard_result["kernel_validation"]["valid"] is False,
323
+ "soft_allows_invalid_artifact_with_advisory": soft_proc.returncode == 0
324
+ and soft_payload["overall"] == "PASS"
325
+ and soft_result["kernel_validation"]["valid"] is False,
326
+ "legacy_structure_kernel_remains_compatible": legacy_proc.returncode == 0
327
+ and legacy_payload["overall"] == "PASS"
328
+ and "kernel_validation" not in legacy_result,
329
+ },
330
+ }
331
+
332
+
333
+ def _gather_metadata() -> dict[str, Any]:
334
+ package_version = json.loads((REPO_ROOT / "package.json").read_text(encoding="utf-8"))["version"]
335
+ commit = _run(["git", "rev-parse", "HEAD"]).stdout.strip()
336
+ branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"]).stdout.strip()
337
+ node_version = _run(["node", "--version"]).stdout.strip()
338
+ return {
339
+ "generated_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
340
+ "repo_commit": commit,
341
+ "repo_branch": branch,
342
+ "package_version": package_version,
343
+ "python_version": sys.version.split()[0],
344
+ "node_version": node_version,
345
+ "platform": platform.platform(),
346
+ }
347
+
348
+
349
+ def build_report(iterations: int) -> dict[str, Any]:
350
+ init_benchmark = _benchmark_init_starter(iterations)
351
+ roundtrip_benchmark = _benchmark_artifact_roundtrip()
352
+ gate_mode_benchmark = _benchmark_gate_modes()
353
+
354
+ claims = [
355
+ {
356
+ "id": "starter_kernel_bootstrap",
357
+ "claim": "orp init seeds a valid starter kernel artifact and a passing default structure_kernel gate.",
358
+ "status": "pass",
359
+ "evidence": [
360
+ "benchmarks.init_starter_kernel",
361
+ "cli/orp.py",
362
+ "tests/test_orp_init.py",
363
+ ],
364
+ },
365
+ {
366
+ "id": "typed_artifact_roundtrip",
367
+ "claim": "All seven v0.1 artifact classes can be scaffolded and validated through the CLI.",
368
+ "status": "pass" if roundtrip_benchmark["artifact_classes_total"] == 7 else "fail",
369
+ "evidence": [
370
+ "benchmarks.artifact_roundtrip",
371
+ "spec/v1/kernel.schema.json",
372
+ "tests/test_orp_kernel.py",
373
+ ],
374
+ },
375
+ {
376
+ "id": "promotion_enforcement_modes",
377
+ "claim": "Hard mode blocks invalid promotable artifacts, while soft mode records advisory issues without blocking.",
378
+ "status": "pass"
379
+ if gate_mode_benchmark["meets_expectations"]["hard_blocks_invalid_artifact"]
380
+ and gate_mode_benchmark["meets_expectations"]["soft_allows_invalid_artifact_with_advisory"]
381
+ else "fail",
382
+ "evidence": [
383
+ "benchmarks.gate_modes",
384
+ "tests/test_orp_kernel.py",
385
+ ],
386
+ },
387
+ {
388
+ "id": "legacy_structure_kernel_compatibility",
389
+ "claim": "Existing structure_kernel gates without explicit kernel config remain compatible.",
390
+ "status": "pass"
391
+ if gate_mode_benchmark["meets_expectations"]["legacy_structure_kernel_remains_compatible"]
392
+ else "fail",
393
+ "evidence": [
394
+ "benchmarks.gate_modes",
395
+ "cli/orp.py",
396
+ ],
397
+ },
398
+ {
399
+ "id": "local_cli_kernel_ergonomics",
400
+ "claim": "One-shot kernel CLI operations remain within human-scale local ergonomics targets on the reference machine.",
401
+ "status": "pass"
402
+ if all(init_benchmark["meets_targets"].values())
403
+ and all(roundtrip_benchmark["meets_targets"].values())
404
+ else "fail",
405
+ "evidence": [
406
+ "benchmarks.init_starter_kernel",
407
+ "benchmarks.artifact_roundtrip",
408
+ ],
409
+ },
410
+ ]
411
+
412
+ return {
413
+ "schema_version": "1.0.0",
414
+ "kind": "orp_reasoning_kernel_validation_report",
415
+ "metadata": _gather_metadata(),
416
+ "benchmarks": {
417
+ "init_starter_kernel": init_benchmark,
418
+ "artifact_roundtrip": roundtrip_benchmark,
419
+ "gate_modes": gate_mode_benchmark,
420
+ },
421
+ "claims": claims,
422
+ "summary": {
423
+ "all_claims_pass": all(row["status"] == "pass" for row in claims),
424
+ "artifact_classes_total": roundtrip_benchmark["artifact_classes_total"],
425
+ "all_performance_targets_met": all(init_benchmark["meets_targets"].values())
426
+ and all(roundtrip_benchmark["meets_targets"].values()),
427
+ },
428
+ }
429
+
430
+
431
+ def main() -> int:
432
+ parser = argparse.ArgumentParser(description="Benchmark and validate ORP Reasoning Kernel v0.1")
433
+ parser.add_argument("--out", default="", help="Optional JSON output path")
434
+ parser.add_argument("--iterations", type=int, default=5, help="Iterations for bootstrap benchmark")
435
+ parser.add_argument("--quick", action="store_true", help="Use a single bootstrap iteration for fast checks")
436
+ args = parser.parse_args()
437
+
438
+ iterations = 1 if args.quick else max(1, args.iterations)
439
+ report = build_report(iterations)
440
+ payload = json.dumps(report, indent=2) + "\n"
441
+ if args.out:
442
+ out_path = Path(args.out)
443
+ if not out_path.is_absolute():
444
+ out_path = REPO_ROOT / out_path
445
+ out_path.parent.mkdir(parents=True, exist_ok=True)
446
+ out_path.write_text(payload, encoding="utf-8")
447
+ print(payload, end="")
448
+ return 0 if report["summary"]["all_claims_pass"] else 1
449
+
450
+
451
+ if __name__ == "__main__":
452
+ raise SystemExit(main())