open-research-protocol 0.4.7 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/cli/orp.py +1158 -43
- package/docs/AGENT_LOOP.md +3 -0
- package/docs/ORP_REASONING_KERNEL_AGENT_PILOT.md +125 -0
- package/docs/ORP_REASONING_KERNEL_AGENT_REPLICATION.md +97 -0
- package/docs/ORP_REASONING_KERNEL_CANONICAL_CONTINUATION_PILOT.md +100 -0
- package/docs/ORP_REASONING_KERNEL_COMPARISON_PILOT.md +116 -0
- package/docs/ORP_REASONING_KERNEL_CONTINUATION_PILOT.md +86 -0
- package/docs/ORP_REASONING_KERNEL_EVALUATION_PLAN.md +261 -0
- package/docs/ORP_REASONING_KERNEL_EVIDENCE_MATRIX.md +131 -0
- package/docs/ORP_REASONING_KERNEL_EVOLUTION.md +123 -0
- package/docs/ORP_REASONING_KERNEL_PICKUP_PILOT.md +107 -0
- package/docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md +140 -22
- package/docs/ORP_REASONING_KERNEL_V0_1.md +11 -0
- package/docs/ORP_YOUTUBE_INSPECT.md +97 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_pilot_v0_1.json +796 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_task_smoke.json +487 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_1.json +1927 -0
- package/docs/benchmarks/orp_reasoning_kernel_agent_replication_v0_2.json +10217 -0
- package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_task_smoke.json +174 -0
- package/docs/benchmarks/orp_reasoning_kernel_canonical_continuation_v0_1.json +598 -0
- package/docs/benchmarks/orp_reasoning_kernel_comparison_v0_1.json +688 -0
- package/docs/benchmarks/orp_reasoning_kernel_continuation_task_smoke.json +150 -0
- package/docs/benchmarks/orp_reasoning_kernel_continuation_v0_1.json +448 -0
- package/docs/benchmarks/orp_reasoning_kernel_pickup_v0_1.json +594 -0
- package/docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json +769 -41
- package/examples/README.md +2 -0
- package/examples/kernel/comparison/comparison-corpus.json +337 -0
- package/examples/kernel/comparison/next-task-continuation.json +55 -0
- package/examples/kernel/corpus/operations/habanero-routing.checkpoint.kernel.yml +12 -0
- package/examples/kernel/corpus/operations/runner-routing.policy.kernel.yml +9 -0
- package/examples/kernel/corpus/product/project-home.decision.kernel.yml +11 -0
- package/examples/kernel/corpus/research/kernel-handoff.experiment.kernel.yml +16 -0
- package/examples/kernel/corpus/research/lane-drift.hypothesis.kernel.yml +11 -0
- package/examples/kernel/corpus/software/trace-widget.task.kernel.yml +13 -0
- package/examples/kernel/corpus/writing/kernel-launch.result.kernel.yml +12 -0
- package/llms.txt +3 -0
- package/package.json +4 -1
- package/scripts/orp-kernel-agent-pilot.py +673 -0
- package/scripts/orp-kernel-agent-replication.py +307 -0
- package/scripts/orp-kernel-benchmark.py +471 -2
- package/scripts/orp-kernel-canonical-continuation.py +381 -0
- package/scripts/orp-kernel-ci-check.py +138 -0
- package/scripts/orp-kernel-comparison.py +592 -0
- package/scripts/orp-kernel-continuation-pilot.py +384 -0
- package/scripts/orp-kernel-pickup.py +401 -0
- package/spec/v1/kernel-extension.schema.json +96 -0
- package/spec/v1/kernel-proposal.schema.json +115 -0
- package/spec/v1/kernel.schema.json +2 -1
- package/spec/v1/youtube-source.schema.json +151 -0
package/examples/README.md
CHANGED
|
@@ -10,6 +10,8 @@ Additional v1 runtime draft examples:
|
|
|
10
10
|
|
|
11
11
|
- `orp.reasoning-kernel.starter.yml` — minimal kernel-aware profile showing a real `structure_kernel` gate.
|
|
12
12
|
- `kernel/trace-widget.task.kernel.yml` — example typed kernel artifact for a promotable task.
|
|
13
|
+
- `kernel/corpus/` — small cross-domain reference corpus used by the kernel validation benchmarks.
|
|
14
|
+
- `kernel/comparison/` — matched prompt corpus used by the kernel comparison pilot harness.
|
|
13
15
|
- `orp.sunflower-coda.atomic.yml` — discovery-first profile for atomic board workflows.
|
|
14
16
|
- `orp.sunflower-coda.live-compare.yml` — side-by-side gate-compare profiles for sunflower Problems 857/20/367.
|
|
15
17
|
- `orp.sunflower-coda.pr-governance.yml` — local-first PR governance profile set (pre-open, draft-readiness, full flow).
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"kind": "orp_reasoning_kernel_comparison_corpus",
|
|
4
|
+
"generic_checklist_fields": [
|
|
5
|
+
"artifact_type",
|
|
6
|
+
"summary",
|
|
7
|
+
"scope",
|
|
8
|
+
"constraints",
|
|
9
|
+
"approach",
|
|
10
|
+
"checks",
|
|
11
|
+
"risks",
|
|
12
|
+
"evidence",
|
|
13
|
+
"handoff",
|
|
14
|
+
"notes"
|
|
15
|
+
],
|
|
16
|
+
"cases": [
|
|
17
|
+
{
|
|
18
|
+
"id": "software_trace_widget",
|
|
19
|
+
"domain": "software",
|
|
20
|
+
"artifact_class": "task",
|
|
21
|
+
"prompt": "Build the trace widget for terminal sessions so I can watch what lanes are doing and quickly tell if one is drifting.",
|
|
22
|
+
"freeform_markdown": "# Trace widget\nObject: terminal trace widget for lane monitoring.\nGoal: let operators tell quickly when a lane is drifting.\nConstraints: stay terminal-first and low friction.\nNext: sketch the event inputs and compact panel layout.\n",
|
|
23
|
+
"generic_checklist": {
|
|
24
|
+
"artifact_type": "task",
|
|
25
|
+
"summary": "Build the terminal trace widget for lane monitoring.",
|
|
26
|
+
"scope": "Terminal-first lane visibility in active ORP sessions.",
|
|
27
|
+
"constraints": [
|
|
28
|
+
"low friction",
|
|
29
|
+
"no GUI dependency"
|
|
30
|
+
],
|
|
31
|
+
"approach": "Start with the event inputs and a compact summary row.",
|
|
32
|
+
"checks": [
|
|
33
|
+
"An operator can identify a drifting lane quickly."
|
|
34
|
+
],
|
|
35
|
+
"risks": [
|
|
36
|
+
"Too much noise if every event is surfaced."
|
|
37
|
+
],
|
|
38
|
+
"evidence": [
|
|
39
|
+
"sample lane logs"
|
|
40
|
+
],
|
|
41
|
+
"handoff": "Define the event schema and first rendering pass.",
|
|
42
|
+
"notes": "Use the trace widget as a first debugging surface."
|
|
43
|
+
},
|
|
44
|
+
"kernel_artifact": {
|
|
45
|
+
"schema_version": "1.0.0",
|
|
46
|
+
"artifact_class": "task",
|
|
47
|
+
"object": "terminal trace widget",
|
|
48
|
+
"goal": "surface lane drift and state clearly for operators",
|
|
49
|
+
"boundary": [
|
|
50
|
+
"terminal-first lane visibility",
|
|
51
|
+
"active ORP sessions only"
|
|
52
|
+
],
|
|
53
|
+
"constraints": [
|
|
54
|
+
"low friction",
|
|
55
|
+
"no GUI dependency"
|
|
56
|
+
],
|
|
57
|
+
"success_criteria": [
|
|
58
|
+
"an operator can identify a drifting lane within 10 seconds",
|
|
59
|
+
"the widget does not overload the terminal surface"
|
|
60
|
+
]
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"id": "product_project_home",
|
|
65
|
+
"domain": "product",
|
|
66
|
+
"artifact_class": "decision",
|
|
67
|
+
"prompt": "Decide whether the web app home should lead with linked projects or the old idea board.",
|
|
68
|
+
"freeform_markdown": "# Home surface direction\nQuestion: should the web app lead with linked projects or the old idea board?\nDecision: lead with linked projects first.\nWhy: active work should be foregrounded and idea browsing can move into Pensieve.\nTradeoffs: the old idea board becomes secondary navigation.\n",
|
|
69
|
+
"generic_checklist": {
|
|
70
|
+
"artifact_type": "decision",
|
|
71
|
+
"summary": "Choose the linked-project home instead of the old idea-board-first home.",
|
|
72
|
+
"scope": "Default logged-in dashboard experience.",
|
|
73
|
+
"constraints": [
|
|
74
|
+
"keep the home calm",
|
|
75
|
+
"preserve access to the full idea library"
|
|
76
|
+
],
|
|
77
|
+
"approach": "Lead with linked projects and move broader idea browsing into Pensieve.",
|
|
78
|
+
"checks": [
|
|
79
|
+
"Users can reach active work in one click."
|
|
80
|
+
],
|
|
81
|
+
"risks": [
|
|
82
|
+
"Idea discovery becomes less prominent on the home screen."
|
|
83
|
+
],
|
|
84
|
+
"evidence": [
|
|
85
|
+
"Rust app home flow",
|
|
86
|
+
"current web redesign branch"
|
|
87
|
+
],
|
|
88
|
+
"handoff": "Implement the linked-project home shell and lower-right Pensieve access.",
|
|
89
|
+
"notes": "Alternative considered: keep the idea board as the front door."
|
|
90
|
+
},
|
|
91
|
+
"kernel_artifact": {
|
|
92
|
+
"schema_version": "1.0.0",
|
|
93
|
+
"artifact_class": "decision",
|
|
94
|
+
"question": "Should the web app home foreground linked projects or the old idea board?",
|
|
95
|
+
"chosen_path": "Foreground linked projects and move broad idea browsing into Pensieve.",
|
|
96
|
+
"rejected_alternatives": [
|
|
97
|
+
"keep the old idea board as the default home",
|
|
98
|
+
"split the home evenly between ideas and projects"
|
|
99
|
+
],
|
|
100
|
+
"rationale": "Active work should be reachable immediately, while the idea library can stay available as secondary navigation.",
|
|
101
|
+
"consequences": [
|
|
102
|
+
"linked projects become the primary home object",
|
|
103
|
+
"idea browsing becomes one click deeper"
|
|
104
|
+
]
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"id": "research_drift_hypothesis",
|
|
109
|
+
"domain": "research",
|
|
110
|
+
"artifact_class": "hypothesis",
|
|
111
|
+
"prompt": "Hypothesize whether drift summaries will help operators notice stalled lanes faster.",
|
|
112
|
+
"freeform_markdown": "# Drift summary hypothesis\nClaim: short drift summaries will help operators notice stalled lanes faster.\nScope: terminal-first multi-lane work.\nAssumptions: operators glance at summaries while they work.\nTest: compare stalled-lane pickup with and without summaries.\n",
|
|
113
|
+
"generic_checklist": {
|
|
114
|
+
"artifact_type": "hypothesis",
|
|
115
|
+
"summary": "Drift summaries will improve stalled-lane pickup speed.",
|
|
116
|
+
"scope": "Terminal-first multi-lane workflows.",
|
|
117
|
+
"constraints": [
|
|
118
|
+
"operators cannot be flooded with noise"
|
|
119
|
+
],
|
|
120
|
+
"approach": "Add summaries and compare pickup behavior against a no-summary baseline.",
|
|
121
|
+
"checks": [
|
|
122
|
+
"Measure stalled-lane detection time."
|
|
123
|
+
],
|
|
124
|
+
"risks": [
|
|
125
|
+
"Summaries may be ignored if they are too verbose."
|
|
126
|
+
],
|
|
127
|
+
"evidence": [
|
|
128
|
+
"pickup timing logs"
|
|
129
|
+
],
|
|
130
|
+
"handoff": "Design the comparison fixture and timing capture.",
|
|
131
|
+
"notes": "Assumes operators actually consult the summary lane."
|
|
132
|
+
},
|
|
133
|
+
"kernel_artifact": {
|
|
134
|
+
"schema_version": "1.0.0",
|
|
135
|
+
"artifact_class": "hypothesis",
|
|
136
|
+
"claim": "Short drift summaries reduce the time needed to identify stalled lanes.",
|
|
137
|
+
"boundary": [
|
|
138
|
+
"terminal-first multi-lane workflows",
|
|
139
|
+
"operators already monitoring active lanes"
|
|
140
|
+
],
|
|
141
|
+
"assumptions": [
|
|
142
|
+
"operators glance at summaries while they work",
|
|
143
|
+
"summaries do not introduce excessive noise"
|
|
144
|
+
],
|
|
145
|
+
"test_path": "Run matched stalled-lane pickup trials with and without summaries and compare detection time.",
|
|
146
|
+
"falsifiers": [
|
|
147
|
+
"pickup time does not improve materially",
|
|
148
|
+
"operators ignore the summaries"
|
|
149
|
+
]
|
|
150
|
+
}
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"id": "research_handoff_experiment",
|
|
154
|
+
"domain": "research",
|
|
155
|
+
"artifact_class": "experiment",
|
|
156
|
+
"prompt": "Design an experiment to compare kernel tasks versus free-form tasks for handoff pickup.",
|
|
157
|
+
"freeform_markdown": "# Kernel handoff experiment\nObjective: compare free-form tasks, checklist tasks, and kernel tasks during handoff pickup.\nMethod: give matched task artifacts to a second operator and time correct interpretation.\nEvidence: collect scores and clarification counts.\nLimits: internal sample only.\n",
|
|
158
|
+
"generic_checklist": {
|
|
159
|
+
"artifact_type": "experiment",
|
|
160
|
+
"summary": "Compare handoff pickup across free-form, checklist, and kernel task artifacts.",
|
|
161
|
+
"scope": "Internal ORP operator handoffs.",
|
|
162
|
+
"constraints": [
|
|
163
|
+
"small internal sample",
|
|
164
|
+
"matched prompt set"
|
|
165
|
+
],
|
|
166
|
+
"approach": "Give a second operator one artifact at a time and record time to correct interpretation.",
|
|
167
|
+
"checks": [
|
|
168
|
+
"Capture time to correct interpretation.",
|
|
169
|
+
"Capture clarification counts."
|
|
170
|
+
],
|
|
171
|
+
"risks": [
|
|
172
|
+
"Small sample may limit interpretation."
|
|
173
|
+
],
|
|
174
|
+
"evidence": [
|
|
175
|
+
"pickup timings",
|
|
176
|
+
"clarification counts"
|
|
177
|
+
],
|
|
178
|
+
"handoff": "Prepare the prompt set and reviewer score sheet.",
|
|
179
|
+
"notes": "This is a first comparative signal, not a final outcome study."
|
|
180
|
+
},
|
|
181
|
+
"kernel_artifact": {
|
|
182
|
+
"schema_version": "1.0.0",
|
|
183
|
+
"artifact_class": "experiment",
|
|
184
|
+
"objective": "Measure whether kernel task artifacts improve handoff pickup quality over free-form and generic checklist alternatives.",
|
|
185
|
+
"method": "Run matched handoff trials where a second operator receives one artifact at a time and explains the task, constraints, and next action.",
|
|
186
|
+
"inputs": [
|
|
187
|
+
"matched prompt set",
|
|
188
|
+
"second-operator reviewers",
|
|
189
|
+
"three artifact conditions"
|
|
190
|
+
],
|
|
191
|
+
"outputs": [
|
|
192
|
+
"pickup scores",
|
|
193
|
+
"clarification counts",
|
|
194
|
+
"time to correct interpretation"
|
|
195
|
+
],
|
|
196
|
+
"evidence_expectations": [
|
|
197
|
+
"score sheets",
|
|
198
|
+
"timing logs",
|
|
199
|
+
"artifact corpus"
|
|
200
|
+
],
|
|
201
|
+
"interpretation_limits": [
|
|
202
|
+
"internal sample size is small",
|
|
203
|
+
"the pilot measures structural pickup, not full downstream outcomes"
|
|
204
|
+
]
|
|
205
|
+
}
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
"id": "operations_habanero_checkpoint",
|
|
209
|
+
"domain": "operations",
|
|
210
|
+
"artifact_class": "checkpoint",
|
|
211
|
+
"prompt": "Record the current state after stabilizing runner routing for Habanero.",
|
|
212
|
+
"freeform_markdown": "# Habanero routing checkpoint\nCompleted: restored linked-project routing for Habanero.\nCurrent state: the repo is bound and the primary session is routable again.\nRisks: other machines may still need a sync.\nNext: rerun runner sync on active machines.\n",
|
|
213
|
+
"generic_checklist": {
|
|
214
|
+
"artifact_type": "checkpoint",
|
|
215
|
+
"summary": "Restored the Habanero linked-project routing path.",
|
|
216
|
+
"scope": "Habanero runner routing and session availability.",
|
|
217
|
+
"constraints": [
|
|
218
|
+
"avoid duplicate world bindings"
|
|
219
|
+
],
|
|
220
|
+
"approach": "Normalize the local link state and resync runner state.",
|
|
221
|
+
"checks": [
|
|
222
|
+
"The linked project is routable again."
|
|
223
|
+
],
|
|
224
|
+
"risks": [
|
|
225
|
+
"Other active machines may still carry stale routing state."
|
|
226
|
+
],
|
|
227
|
+
"evidence": [
|
|
228
|
+
".git/orp/link/project.json",
|
|
229
|
+
"runner sync output"
|
|
230
|
+
],
|
|
231
|
+
"handoff": "Rerun runner sync on active machines and verify live pickup.",
|
|
232
|
+
"notes": "The local repo, primary session, and hosted world are aligned again."
|
|
233
|
+
},
|
|
234
|
+
"kernel_artifact": {
|
|
235
|
+
"schema_version": "1.0.0",
|
|
236
|
+
"artifact_class": "checkpoint",
|
|
237
|
+
"completed_unit": "Restored canonical runner routing for Habanero.",
|
|
238
|
+
"current_state": "The local project link, primary session, and hosted world are synchronized and routable again.",
|
|
239
|
+
"risks": [
|
|
240
|
+
"inactive machines may still hold stale routing state",
|
|
241
|
+
"older queued jobs may need a fresh sync before pickup"
|
|
242
|
+
],
|
|
243
|
+
"next_handoff_target": "Rerun runner sync on active machines and verify one fresh hosted job pickup.",
|
|
244
|
+
"artifact_refs": [
|
|
245
|
+
".git/orp/link/project.json",
|
|
246
|
+
".git/orp/link/sessions",
|
|
247
|
+
"orp/artifacts"
|
|
248
|
+
]
|
|
249
|
+
}
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
"id": "operations_runner_policy",
|
|
253
|
+
"domain": "operations",
|
|
254
|
+
"artifact_class": "policy",
|
|
255
|
+
"prompt": "Define the routing policy for hosted runner jobs so they only target routeable linked sessions.",
|
|
256
|
+
"freeform_markdown": "# Runner routing policy\nRule: only claim hosted jobs for linked projects that have a routeable local session.\nScope: hosted runner pickup.\nWhy: avoid claiming work with nowhere real to execute.\nInvariants: a claimed job must resolve to an actual local session.\n",
|
|
257
|
+
"generic_checklist": {
|
|
258
|
+
"artifact_type": "policy",
|
|
259
|
+
"summary": "Only claim hosted jobs for linked projects that have a routeable local session.",
|
|
260
|
+
"scope": "Hosted runner job pickup.",
|
|
261
|
+
"constraints": [
|
|
262
|
+
"do not claim unroutable jobs"
|
|
263
|
+
],
|
|
264
|
+
"approach": "Require synced linked-project state and a live local session before claiming work.",
|
|
265
|
+
"checks": [
|
|
266
|
+
"Runner pickup rejects unroutable jobs."
|
|
267
|
+
],
|
|
268
|
+
"risks": [
|
|
269
|
+
"Stale sync state can hide a routeable session."
|
|
270
|
+
],
|
|
271
|
+
"evidence": [
|
|
272
|
+
"runner sync payload",
|
|
273
|
+
"job poll behavior"
|
|
274
|
+
],
|
|
275
|
+
"handoff": "Audit pickup behavior against stale-session and missing-session cases.",
|
|
276
|
+
"notes": "The rule exists to prevent dead-end job claims."
|
|
277
|
+
},
|
|
278
|
+
"kernel_artifact": {
|
|
279
|
+
"schema_version": "1.0.0",
|
|
280
|
+
"artifact_class": "policy",
|
|
281
|
+
"scope": "Hosted runner job pickup and claim behavior.",
|
|
282
|
+
"rule": "Only claim a hosted job when the linked project has a routeable local session on the current machine.",
|
|
283
|
+
"rationale": "Jobs should only be claimed when the runner can execute them against a real local target.",
|
|
284
|
+
"invariants": [
|
|
285
|
+
"a claimed job must resolve to an actual local session",
|
|
286
|
+
"runner routing must stay machine-scoped"
|
|
287
|
+
],
|
|
288
|
+
"enforcement_surface": "runner sync, poll, and work lifecycle"
|
|
289
|
+
}
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
"id": "writing_kernel_launch_result",
|
|
293
|
+
"domain": "writing",
|
|
294
|
+
"artifact_class": "result",
|
|
295
|
+
"prompt": "Summarize the result of shipping the first reasoning kernel release and what still needs follow-up.",
|
|
296
|
+
"freeform_markdown": "# Kernel launch result\nClaim: ORP shipped the first reasoning kernel release.\nEvidence: validation docs, benchmarks, and shipped CLI commands.\nStatus: shipped in the CLI.\nNext: run comparative studies against free-form artifacts and checklist artifacts.\n",
|
|
297
|
+
"generic_checklist": {
|
|
298
|
+
"artifact_type": "result",
|
|
299
|
+
"summary": "ORP shipped the first reasoning kernel release in the CLI.",
|
|
300
|
+
"scope": "Kernel schema, scaffold, validate, and gate support.",
|
|
301
|
+
"constraints": [
|
|
302
|
+
"comparative superiority is still unproven"
|
|
303
|
+
],
|
|
304
|
+
"approach": "Document the shipped surface and the next validation studies.",
|
|
305
|
+
"checks": [
|
|
306
|
+
"Release shipped",
|
|
307
|
+
"validation docs published"
|
|
308
|
+
],
|
|
309
|
+
"risks": [
|
|
310
|
+
"The current evidence is still stronger for implementation validity than outcome superiority."
|
|
311
|
+
],
|
|
312
|
+
"evidence": [
|
|
313
|
+
"docs/ORP_REASONING_KERNEL_V0_1.md",
|
|
314
|
+
"docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md"
|
|
315
|
+
],
|
|
316
|
+
"handoff": "Run comparative artifact and handoff studies next.",
|
|
317
|
+
"notes": "The release is solid as a protocol surface but not yet a proven superior methodology."
|
|
318
|
+
},
|
|
319
|
+
"kernel_artifact": {
|
|
320
|
+
"schema_version": "1.0.0",
|
|
321
|
+
"artifact_class": "result",
|
|
322
|
+
"claim": "ORP shipped the first reasoning kernel release as a real CLI protocol surface.",
|
|
323
|
+
"evidence_paths": [
|
|
324
|
+
"docs/ORP_REASONING_KERNEL_V0_1.md",
|
|
325
|
+
"docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md",
|
|
326
|
+
"spec/v1/kernel.schema.json"
|
|
327
|
+
],
|
|
328
|
+
"status": "shipped in the ORP CLI and published to npm",
|
|
329
|
+
"interpretation_limits": [
|
|
330
|
+
"comparative superiority over free-form and checklist alternatives is not yet proven",
|
|
331
|
+
"the current evidence is strongest on internal validity"
|
|
332
|
+
],
|
|
333
|
+
"next_follow_up": "Run the comparative artifact, pickup, and corpus-fit studies."
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
]
|
|
337
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"kind": "orp_reasoning_kernel_next_task_continuation",
|
|
4
|
+
"cases": {
|
|
5
|
+
"software_trace_widget": {
|
|
6
|
+
"object": "terminal trace widget",
|
|
7
|
+
"goal": "surface lane drift and state clearly for operators",
|
|
8
|
+
"boundary": "terminal-first lane visibility in active ORP sessions",
|
|
9
|
+
"constraints": "low friction and no GUI dependency",
|
|
10
|
+
"success_criteria": "operators identify a drifting lane within 10 seconds without overloading the terminal surface"
|
|
11
|
+
},
|
|
12
|
+
"product_project_home": {
|
|
13
|
+
"object": "linked-project home shell",
|
|
14
|
+
"goal": "implement the decision to foreground linked projects on the web app home",
|
|
15
|
+
"boundary": "default logged-in dashboard experience",
|
|
16
|
+
"constraints": "keep the home calm and preserve access to the full idea library through Pensieve",
|
|
17
|
+
"success_criteria": "users reach active linked work in one click and broad idea browsing remains one step deeper"
|
|
18
|
+
},
|
|
19
|
+
"research_drift_hypothesis": {
|
|
20
|
+
"object": "stalled-lane pickup comparison fixture",
|
|
21
|
+
"goal": "test whether short drift summaries reduce the time needed to identify stalled lanes",
|
|
22
|
+
"boundary": "terminal-first multi-lane workflows where operators are already monitoring active lanes",
|
|
23
|
+
"constraints": "matched with-versus-without summary trials and no excessive summary noise",
|
|
24
|
+
"success_criteria": "the fixture captures detection-time differences clearly enough to support or falsify the hypothesis"
|
|
25
|
+
},
|
|
26
|
+
"research_handoff_experiment": {
|
|
27
|
+
"object": "handoff comparison trial kit",
|
|
28
|
+
"goal": "run matched handoff trials across free-form, checklist, and kernel task artifacts",
|
|
29
|
+
"boundary": "internal ORP operator handoffs",
|
|
30
|
+
"constraints": "small internal sample with a matched prompt set and reviewer score sheet",
|
|
31
|
+
"success_criteria": "pickup scores, clarification counts, and time-to-correct-interpretation are all captured"
|
|
32
|
+
},
|
|
33
|
+
"operations_habanero_checkpoint": {
|
|
34
|
+
"object": "runner resync verification pass",
|
|
35
|
+
"goal": "confirm active Habanero machines are resynced and a fresh hosted job pickup succeeds",
|
|
36
|
+
"boundary": "active Habanero runner machines",
|
|
37
|
+
"constraints": "avoid stale routing state and duplicate world bindings",
|
|
38
|
+
"success_criteria": "one fresh hosted job is picked up successfully after rerunning runner sync on active machines"
|
|
39
|
+
},
|
|
40
|
+
"operations_runner_policy": {
|
|
41
|
+
"object": "hosted runner claim gate enforcement",
|
|
42
|
+
"goal": "enforce the rule that only routeable local sessions can be targeted by hosted claims",
|
|
43
|
+
"boundary": "runner sync, poll, and work lifecycle",
|
|
44
|
+
"constraints": "do not claim unroutable jobs and keep routing machine-scoped",
|
|
45
|
+
"success_criteria": "unroutable jobs are rejected before claim and routeable jobs continue through the normal work lifecycle"
|
|
46
|
+
},
|
|
47
|
+
"writing_kernel_launch_result": {
|
|
48
|
+
"object": "kernel comparative validation study",
|
|
49
|
+
"goal": "run the comparative artifact, pickup, and corpus-fit studies needed after the first kernel launch",
|
|
50
|
+
"boundary": "post-launch kernel evaluation work",
|
|
51
|
+
"constraints": "current evidence is strongest on internal validity and comparative superiority is not yet proven",
|
|
52
|
+
"success_criteria": "comparative artifact, pickup, and corpus-fit studies are completed and recorded as canonical evidence"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
artifact_class: checkpoint
|
|
3
|
+
completed_unit: restored canonical Habanero world binding and runner routing
|
|
4
|
+
current_state: linked project and primary session are synchronized for hosted runner pickup
|
|
5
|
+
risks:
|
|
6
|
+
- stale hosted sessions may still exist on inactive machines
|
|
7
|
+
- queued jobs created before rebinding may remain stranded
|
|
8
|
+
next_handoff_target: rerun runner sync on active machines and verify the next queued job routes cleanly
|
|
9
|
+
artifact_refs:
|
|
10
|
+
- .git/orp/link/project.json
|
|
11
|
+
- .git/orp/link/sessions
|
|
12
|
+
- orp/HANDOFF.md
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
artifact_class: policy
|
|
3
|
+
scope: hosted runner job pickup and execution
|
|
4
|
+
rule: only route runner jobs to linked projects with routeable registered sessions on the owning machine
|
|
5
|
+
rationale: prevent jobs from being claimed by machines or sessions that cannot actually execute them
|
|
6
|
+
invariants:
|
|
7
|
+
- a claimed job must have an unambiguous local execution target
|
|
8
|
+
- stale session targets must fall back safely or fail clearly
|
|
9
|
+
enforcement_surface: runner sync poll and work lifecycle
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
artifact_class: decision
|
|
3
|
+
question: what should the web home surface emphasize first for active operators?
|
|
4
|
+
chosen_path: make linked projects the primary home surface and move idea browsing into Pensieve
|
|
5
|
+
rejected_alternatives:
|
|
6
|
+
- keep the ideas board as the default landing page
|
|
7
|
+
- show linked projects and idea browsing with equal visual weight on first load
|
|
8
|
+
rationale: linked projects map directly to active operator work and reduce dashboard noise
|
|
9
|
+
consequences:
|
|
10
|
+
- idea browsing becomes secondary navigation
|
|
11
|
+
- the home screen becomes calmer but less exploratory by default
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
artifact_class: experiment
|
|
3
|
+
objective: measure whether kernel-structured tasks improve handoff pickup quality
|
|
4
|
+
method: run matched handoff trials across free-form, checklist, and kernel artifacts
|
|
5
|
+
inputs:
|
|
6
|
+
- 20 matched task prompts
|
|
7
|
+
- blind reviewers or operators
|
|
8
|
+
outputs:
|
|
9
|
+
- pickup accuracy scores
|
|
10
|
+
- clarification counts
|
|
11
|
+
evidence_expectations:
|
|
12
|
+
- scored review sheets
|
|
13
|
+
- artifact corpus and rating summary
|
|
14
|
+
interpretation_limits:
|
|
15
|
+
- the initial study may be small and internal
|
|
16
|
+
- results may not generalize across all domains
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
artifact_class: hypothesis
|
|
3
|
+
claim: exposing lane drift summaries will reduce missed stalled sessions during multi-agent work
|
|
4
|
+
boundary: applies to terminal-first lane orchestration with multiple concurrent sessions
|
|
5
|
+
assumptions:
|
|
6
|
+
- operators check summary views during active work
|
|
7
|
+
- stalled lanes emit enough signal to summarize
|
|
8
|
+
test_path: compare operator pickup of stalled lanes with and without a drift summary surface
|
|
9
|
+
falsifiers:
|
|
10
|
+
- operators do not detect stalled lanes faster
|
|
11
|
+
- the drift summary produces too many false positives
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
artifact_class: task
|
|
3
|
+
object: terminal trace widget for ORP lane sessions
|
|
4
|
+
goal: let an operator detect lane drift and stalled work from a terminal-first view
|
|
5
|
+
boundary:
|
|
6
|
+
- lane status visibility inside terminal workflow
|
|
7
|
+
- drift summaries and recent event traces
|
|
8
|
+
constraints:
|
|
9
|
+
- must remain terminal-native
|
|
10
|
+
- must not alter lane execution semantics
|
|
11
|
+
success_criteria:
|
|
12
|
+
- operator can identify a stalled or drifting lane within ten seconds
|
|
13
|
+
- widget can be used without opening the desktop UI
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
artifact_class: result
|
|
3
|
+
claim: ORP now ships a real reasoning kernel with enforceable promotion semantics
|
|
4
|
+
evidence_paths:
|
|
5
|
+
- docs/ORP_REASONING_KERNEL_V0_1.md
|
|
6
|
+
- docs/ORP_REASONING_KERNEL_TECHNICAL_VALIDATION.md
|
|
7
|
+
- docs/benchmarks/orp_reasoning_kernel_v0_1_validation.json
|
|
8
|
+
status: shipped in open-research-protocol 0.4.x
|
|
9
|
+
interpretation_limits:
|
|
10
|
+
- current evidence is implementation-focused rather than outcome-comparative
|
|
11
|
+
- cross-domain superiority is not yet proven
|
|
12
|
+
next_follow_up: run comparative artifact and handoff studies
|
package/llms.txt
CHANGED
|
@@ -13,6 +13,7 @@ ORP (Open Research Protocol) is a docs-first, local-first, agent-friendly protoc
|
|
|
13
13
|
## Fast Machine Discovery
|
|
14
14
|
|
|
15
15
|
- Run `orp about --json` for machine-readable tool metadata, artifact paths, schemas, supported commands, and bundled packs.
|
|
16
|
+
- Run `orp youtube inspect <youtube-url> --json` to normalize a public YouTube video into ORP's source artifact shape, including transcript text when public captions are fetchable.
|
|
16
17
|
- Run `orp erdos sync --json` for machine-readable Erdos catalog sync results.
|
|
17
18
|
- Run `orp pack list --json` for machine-readable bundled pack inventory.
|
|
18
19
|
- Core runtime commands also support `--json`:
|
|
@@ -37,10 +38,12 @@ ORP (Open Research Protocol) is a docs-first, local-first, agent-friendly protoc
|
|
|
37
38
|
- `spec/v1/orp.config.schema.json`
|
|
38
39
|
- `spec/v1/packet.schema.json`
|
|
39
40
|
- `spec/v1/profile-pack.schema.json`
|
|
41
|
+
- `spec/v1/youtube-source.schema.json`
|
|
40
42
|
|
|
41
43
|
## Key Commands
|
|
42
44
|
|
|
43
45
|
- `orp init`
|
|
46
|
+
- `orp youtube inspect <youtube-url> --json`
|
|
44
47
|
- `orp gate run --profile <profile>`
|
|
45
48
|
- `orp packet emit --profile <profile>`
|
|
46
49
|
- `orp report summary`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-research-protocol",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.9",
|
|
4
4
|
"description": "ORP CLI (Open Research Protocol): agent-friendly research workflows, runtime, reports, and pack tooling.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -35,6 +35,9 @@
|
|
|
35
35
|
"engines": {
|
|
36
36
|
"node": ">=18"
|
|
37
37
|
},
|
|
38
|
+
"dependencies": {
|
|
39
|
+
"breakthroughs": "^0.1.0"
|
|
40
|
+
},
|
|
38
41
|
"scripts": {
|
|
39
42
|
"postinstall": "node scripts/npm-postinstall-check.js",
|
|
40
43
|
"prepublishOnly": "node scripts/npm-prepublish-guard.js",
|