open-research-protocol 0.4.7 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +15 -0
  2. package/cli/orp.py +1158 -43
  3. package/docs/AGENT_LOOP.md +3 -0
  4. package/docs/ORP_REASONING_KERNEL_AGENT_PILOT.md +125 -0
  5. package/docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md +97 -0
  6. package/docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md +100 -0
  7. package/docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md +116 -0
  8. package/docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md +86 -0
  9. package/docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md +261 -0
  10. package/docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md +131 -0
  11. package/docs/ORP_REASONING_KERNEL_EVOLUTION.md +123 -0
  12. package/docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md +107 -0
  13. package/docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md +140 -22
  14. package/docs/ORP_REASONING_KERNEL_V0_1.md +11 -0
  15. package/docs/ORP_YOUTUBE_INSPECT.md +97 -0
  16. package/docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json +796 -0
  17. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_task_smoke.json +487 -0
  18. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_1.json +1927 -0
  19. package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json +10217 -0
  20. package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_task_smoke.json +174 -0
  21. package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json +598 -0
  22. package/docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json +688 -0
  23. package/docs/benchmarks/orp_reasoning_kernel_continuation_task_smoke.json +150 -0
  24. package/docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json +448 -0
  25. package/docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json +594 -0
  26. package/docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json +769 -41
  27. package/examples/README.md +2 -0
  28. package/examples/kernel/comparison/comparison-corpus.json +337 -0
  29. package/examples/kernel/comparison/next-task-continuation.json +55 -0
  30. package/examples/kernel/corpus/operations/habanero-routing.checkpoint.kernel.yml +12 -0
  31. package/examples/kernel/corpus/operations/runner-routing.policy.kernel.yml +9 -0
  32. package/examples/kernel/corpus/product/project-home.decision.kernel.yml +11 -0
  33. package/examples/kernel/corpus/research/kernel-handoff.experiment.kernel.yml +16 -0
  34. package/examples/kernel/corpus/research/lane-drift.hypothesis.kernel.yml +11 -0
  35. package/examples/kernel/corpus/software/trace-widget.task.kernel.yml +13 -0
  36. package/examples/kernel/corpus/writing/kernel-launch.result.kernel.yml +12 -0
  37. package/llms.txt +3 -0
  38. package/package.json +4 -1
  39. package/scripts/orp-kernel-agent-pilot.py +673 -0
  40. package/scripts/orp-kernel-agent-replication.py +307 -0
  41. package/scripts/orp-kernel-benchmark.py +471 -2
  42. package/scripts/orp-kernel-canonical-continuation.py +381 -0
  43. package/scripts/orp-kernel-ci-check.py +138 -0
  44. package/scripts/orp-kernel-comparison.py +592 -0
  45. package/scripts/orp-kernel-continuation-pilot.py +384 -0
  46. package/scripts/orp-kernel-pickup.py +401 -0
  47. package/spec/v1/kernel-extension.schema.json +96 -0
  48. package/spec/v1/kernel-proposal.schema.json +115 -0
  49. package/spec/v1/kernel.schema.json +2 -1
  50. package/spec/v1/youtube-source.schema.json +151 -0
@@ -0,0 +1,174 @@
1
+ {
2
+ "schema_version": "1.0.0",
3
+ "kind": "orp_reasoning_kernel_canonical_continuation_report",
4
+ "metadata": {
5
+ "generated_at_utc": "2026-03-23T08:59:16Z",
6
+ "repo_commit": "c2f7f2a52744a00fb719d37de583da1f4ae615bd",
7
+ "repo_branch": "main",
8
+ "package_version": "0.4.7",
9
+ "python_version": "3.9.6",
10
+ "codex_version": "codex-cli 0.116.0",
11
+ "platform": "macOS-26.3-arm64-arm-64bit",
12
+ "model": "default"
13
+ },
14
+ "corpus": {
15
+ "cases_total": 1,
16
+ "domains": [
17
+ "software"
18
+ ],
19
+ "artifact_classes": [
20
+ "task"
21
+ ]
22
+ },
23
+ "conditions": {
24
+ "freeform": {
25
+ "condition": "freeform",
26
+ "cases_total": 1,
27
+ "rows": [
28
+ {
29
+ "id": "software_trace_widget",
30
+ "domain": "software",
31
+ "artifact_class": "task",
32
+ "total_score": 0.633,
33
+ "alignment_score": 0.4,
34
+ "invention_rate": 0.5,
35
+ "missing_list_match": 1.0,
36
+ "answers": {
37
+ "object": "terminal trace widget for lane monitoring",
38
+ "goal": "let operators tell quickly when a lane is drifting",
39
+ "boundary": "sketch the event inputs and compact panel layout",
40
+ "constraints": "stay terminal-first and low friction",
41
+ "success_criteria": null
42
+ },
43
+ "field_similarity": {
44
+ "object": 1.0,
45
+ "goal": 0.333,
46
+ "boundary": 0.0,
47
+ "constraints": 0.5,
48
+ "success_criteria": 0.0
49
+ }
50
+ }
51
+ ],
52
+ "mean_total_score": 0.633,
53
+ "mean_alignment_score": 0.4,
54
+ "mean_invention_rate": 0.5,
55
+ "mean_missing_list_match": 1.0,
56
+ "mean_confidence": 0.87,
57
+ "mean_elapsed_ms": 12235.633
58
+ },
59
+ "generic_checklist": {
60
+ "condition": "generic_checklist",
61
+ "cases_total": 1,
62
+ "rows": [
63
+ {
64
+ "id": "software_trace_widget",
65
+ "domain": "software",
66
+ "artifact_class": "task",
67
+ "total_score": 0.733,
68
+ "alignment_score": 0.6,
69
+ "invention_rate": 0.4,
70
+ "missing_list_match": 1.0,
71
+ "answers": {
72
+ "object": "Define the event schema and first rendering pass for the terminal trace widget.",
73
+ "goal": "Build the terminal trace widget for lane monitoring.",
74
+ "boundary": "Terminal-first lane visibility in active ORP sessions.",
75
+ "constraints": "Low friction; no GUI dependency.",
76
+ "success_criteria": "An operator can identify a drifting lane quickly."
77
+ },
78
+ "field_similarity": {
79
+ "object": 1.0,
80
+ "goal": 0.167,
81
+ "boundary": 1.0,
82
+ "constraints": 1.0,
83
+ "success_criteria": 0.333
84
+ }
85
+ }
86
+ ],
87
+ "mean_total_score": 0.733,
88
+ "mean_alignment_score": 0.6,
89
+ "mean_invention_rate": 0.4,
90
+ "mean_missing_list_match": 1.0,
91
+ "mean_confidence": 0.93,
92
+ "mean_elapsed_ms": 39052.112
93
+ },
94
+ "kernel": {
95
+ "condition": "kernel",
96
+ "cases_total": 1,
97
+ "rows": [
98
+ {
99
+ "id": "software_trace_widget",
100
+ "domain": "software",
101
+ "artifact_class": "task",
102
+ "total_score": 1.0,
103
+ "alignment_score": 1.0,
104
+ "invention_rate": 0.0,
105
+ "missing_list_match": 1.0,
106
+ "answers": {
107
+ "object": "terminal trace widget",
108
+ "goal": "surface lane drift and state clearly for operators",
109
+ "boundary": "terminal-first lane visibility; active ORP sessions only",
110
+ "constraints": "low friction; no GUI dependency",
111
+ "success_criteria": "an operator can identify a drifting lane within 10 seconds; the widget does not overload the terminal surface"
112
+ },
113
+ "field_similarity": {
114
+ "object": 1.0,
115
+ "goal": 1.0,
116
+ "boundary": 1.0,
117
+ "constraints": 1.0,
118
+ "success_criteria": 0.778
119
+ }
120
+ }
121
+ ],
122
+ "mean_total_score": 1.0,
123
+ "mean_alignment_score": 1.0,
124
+ "mean_invention_rate": 0.0,
125
+ "mean_missing_list_match": 1.0,
126
+ "mean_confidence": 0.99,
127
+ "mean_elapsed_ms": 10485.518
128
+ }
129
+ },
130
+ "pairwise": {
131
+ "kernel_vs_generic_checklist": {
132
+ "left": "kernel",
133
+ "right": "generic_checklist",
134
+ "wins": 1,
135
+ "ties": 0,
136
+ "losses": 0,
137
+ "mean_total_score_delta": 0.267
138
+ },
139
+ "kernel_vs_freeform": {
140
+ "left": "kernel",
141
+ "right": "freeform",
142
+ "wins": 1,
143
+ "ties": 0,
144
+ "losses": 0,
145
+ "mean_total_score_delta": 0.367
146
+ }
147
+ },
148
+ "claims": [
149
+ {
150
+ "id": "kernel_outscores_generic_checklist_on_canonical_task_continuation",
151
+ "claim": "On the matched live canonical-task continuation benchmark, kernel artifacts produce task artifacts that meet or exceed generic checklist quality without a higher invention rate.",
152
+ "status": "pass"
153
+ },
154
+ {
155
+ "id": "kernel_outscores_freeform_on_canonical_task_continuation",
156
+ "claim": "On the matched live canonical-task continuation benchmark, kernel artifacts produce stronger next-task artifacts than free-form artifacts.",
157
+ "status": "pass"
158
+ },
159
+ {
160
+ "id": "kernel_minimizes_invention_on_canonical_task_continuation",
161
+ "claim": "On the matched live canonical-task continuation benchmark, kernel artifacts minimize unsupported task-field invention.",
162
+ "status": "pass"
163
+ }
164
+ ],
165
+ "summary": {
166
+ "all_claims_pass": true,
167
+ "kernel_mean_total_score": 1.0,
168
+ "generic_checklist_mean_total_score": 0.733,
169
+ "freeform_mean_total_score": 0.633,
170
+ "kernel_mean_invention_rate": 0.0,
171
+ "generic_checklist_mean_invention_rate": 0.4,
172
+ "freeform_mean_invention_rate": 0.5
173
+ }
174
+ }