sneakoscope 2.0.12 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +5 -1
  2. package/crates/sks-core/Cargo.lock +1 -1
  3. package/crates/sks-core/Cargo.toml +1 -1
  4. package/crates/sks-core/src/main.rs +1 -1
  5. package/dist/.sks-build-stamp.json +4 -4
  6. package/dist/bin/sks.js +1 -1
  7. package/dist/core/agents/agent-orchestrator.js +11 -4
  8. package/dist/core/agents/agent-output-validator.js +1 -1
  9. package/dist/core/codex-control/codex-fake-sdk-adapter.js +3 -3
  10. package/dist/core/codex-control/codex-sdk-adapter.js +10 -0
  11. package/dist/core/codex-control/codex-task-runner.js +4 -2
  12. package/dist/core/codex-control/gpt-final-review-schema.js +61 -14
  13. package/dist/core/commands/naruto-command.js +1 -0
  14. package/dist/core/commands/research-command.js +112 -19
  15. package/dist/core/fsx.js +1 -1
  16. package/dist/core/naruto/naruto-real-worker-child.js +11 -3
  17. package/dist/core/naruto/naruto-real-worker-runtime.js +4 -0
  18. package/dist/core/pipeline/final-gpt-patch-stage.js +20 -3
  19. package/dist/core/research/claim-evidence-matrix.js +160 -0
  20. package/dist/core/research/experiment-plan.js +53 -0
  21. package/dist/core/research/falsification.js +18 -0
  22. package/dist/core/research/implementation-blueprint-densifier.js +124 -0
  23. package/dist/core/research/implementation-blueprint-markdown.js +31 -0
  24. package/dist/core/research/implementation-blueprint.js +66 -0
  25. package/dist/core/research/replication-pack.js +50 -0
  26. package/dist/core/research/research-claim-builder.js +114 -0
  27. package/dist/core/research/research-cycle-runner.js +129 -0
  28. package/dist/core/research/research-final-reviewer.js +212 -0
  29. package/dist/core/research/research-handoff.js +51 -0
  30. package/dist/core/research/research-prompt-contract.js +24 -0
  31. package/dist/core/research/research-quality-contract.js +61 -0
  32. package/dist/core/research/research-report-quality.js +67 -0
  33. package/dist/core/research/research-source-ledger-merge.js +186 -0
  34. package/dist/core/research/research-source-shards.js +176 -0
  35. package/dist/core/research/research-stage-runner.js +515 -0
  36. package/dist/core/research/research-work-graph.js +166 -0
  37. package/dist/core/research/source-quality-report.js +94 -0
  38. package/dist/core/research.js +356 -44
  39. package/dist/core/version.js +1 -1
  40. package/dist/core/zellij/zellij-slot-column-anchor.js +5 -3
  41. package/dist/core/zellij/zellij-slot-pane-renderer.js +259 -16
  42. package/dist/scripts/codex-sdk-research-pipeline-check.js +44 -5
  43. package/dist/scripts/packlist-performance-check.js +1 -1
  44. package/dist/scripts/release-dag-full-coverage-check.js +14 -1
  45. package/dist/scripts/release-parallel-speed-budget-check.js +7 -2
  46. package/dist/scripts/research-blueprint-densifier-check.js +21 -0
  47. package/dist/scripts/research-claim-builder-check.js +19 -0
  48. package/dist/scripts/research-complete-package-fixture-check.js +23 -0
  49. package/dist/scripts/research-final-reviewer-blackbox.js +22 -0
  50. package/dist/scripts/research-parallel-source-shards-check.js +22 -0
  51. package/dist/scripts/research-quality-gate-check.js +111 -0
  52. package/dist/scripts/research-real-cycle-no-legacy-final-md-check.js +14 -0
  53. package/dist/scripts/research-short-report-rejection-check.js +46 -0
  54. package/dist/scripts/research-source-ledger-merge-check.js +26 -0
  55. package/dist/scripts/research-stage-cycle-runtime-blackbox.js +24 -0
  56. package/dist/scripts/zellij-slot-column-anchor-check.js +26 -5
  57. package/dist/scripts/zellij-slot-pane-renderer-check.js +73 -5
  58. package/package.json +28 -1
  59. package/schemas/codex/agent-result.schema.json +1 -1
  60. package/schemas/research/claim-evidence-matrix.schema.json +37 -0
  61. package/schemas/research/experiment-plan.schema.json +17 -0
  62. package/schemas/research/implementation-blueprint.schema.json +30 -0
  63. package/schemas/research/replication-pack.schema.json +17 -0
  64. package/schemas/research/research-final-review.schema.json +16 -0
  65. package/schemas/research/research-quality-contract.schema.json +37 -0
  66. package/schemas/research/research-source-shard.schema.json +46 -0
  67. package/schemas/research/source-quality-report.schema.json +18 -0
  68. package/dist/build-manifest.json +0 -1168
  69. package/dist/scripts/release-readiness-report.js +0 -1146
  70. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/permission-request.command.input.schema.json +0 -61
  71. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/permission-request.command.output.schema.json +0 -103
  72. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/post-compact.command.input.schema.json +0 -52
  73. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/post-compact.command.output.schema.json +0 -24
  74. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/post-tool-use.command.input.schema.json +0 -67
  75. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/post-tool-use.command.output.schema.json +0 -84
  76. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/pre-compact.command.input.schema.json +0 -52
  77. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/pre-compact.command.output.schema.json +0 -24
  78. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/pre-tool-use.command.input.schema.json +0 -65
  79. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/pre-tool-use.command.output.schema.json +0 -105
  80. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/session-start.command.input.schema.json +0 -59
  81. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/session-start.command.output.schema.json +0 -63
  82. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/snapshot-metadata.json +0 -31
  83. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/stop.command.input.schema.json +0 -63
  84. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/stop.command.output.schema.json +0 -45
  85. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/user-prompt-submit.command.input.schema.json +0 -59
  86. package/dist/vendor/openai-codex/rust-v0.131.0/hooks/user-prompt-submit.command.output.schema.json +0 -81
package/README.md CHANGED
@@ -16,7 +16,7 @@ Set up this agent project with Sneakoscope Codex. Use [[mandarange/Sneakoscope-C
16
16
 
17
17
  ## Current Release
18
18
 
19
- SKS **2.0.12** is the public-ready parallel runtime stabilization release. It closes release DAG coverage around Zellij slot renderer proof semantics, wires Naruto allocation/rebalance into the production scheduler, keeps pre-run worker smoke opt-in, and requires GPT Final approval before local/worktree candidate output can apply.
19
+ SKS **2.0.14** is the public-ready research runtime stabilization release. It upgrades Research from a quality-contract package into a real stage-aware runtime with parallel source shards, source-ledger merge, claim matrix builder, repository-aware implementation blueprint, Codex/GPT final reviewer, and blackbox gates while preserving the 2.0.13 research artifact contract.
20
20
 
21
21
  What changed:
22
22
 
@@ -466,6 +466,8 @@ sks code-structure scan --json
466
466
 
467
467
  `sks research` prepares a named genius-lens agent council, requires every agent to run at `xhigh`, records one literal `Eureka!` idea per agent, runs an evidence-bound debate, and creates `research-source-skill.md` as a route-local source collection skill before synthesis. Research is not a code-change route: real runs may write only their own mission artifacts under `.sneakoscope/missions/<id>/`, and source/package/docs/config mutations block the run with `research-code-mutation-blocker.json`. The required Research persona lenses are Einstein Agent, Feynman Agent, Turing Agent, von Neumann Agent, and Skeptic Agent; they are cognitive roles, not impersonations, and `agent-ledger.json` must include `display_name`, `persona`, `persona_boundary`, `reasoning_effort`, falsifiers, cheap probes, and `challenge_or_response`. Normal Research is not a fixed three-cycle flow: it repeats source gathering, Eureka ideas, debate, falsification, and synthesis pressure until every agent records final agreement, or pauses at the explicit max-cycle safety cap with an unpassed gate. `debate-ledger.json` must include `consensus_iterations`, `unanimous_consensus`, and per-agent agreements; `research-gate.json` cannot pass until unanimous consensus is true for all agents. Normal Research is intentionally allowed to take one or two hours when the problem needs it; `--mock` is only for selftests or dry harness checks, and a real run blocks with `research-blocker.json` instead of silently substituting mock output when the Codex execution path is unavailable. The source layer contract separates latest papers, official/government or leading-institution sources, standards/primary docs, current news such as BBC/CNN/GDELT-style sources, public discourse such as X/Reddit, developer/practitioner knowledge such as Stack Overflow/GitHub, traditional background sources, and counterevidence/fact-checking; `source-ledger.json` must record layer coverage, source quality, blockers, citations, and cross-layer triangulation. Context7 is optional for `$Research` and only becomes relevant when the research topic specifically depends on package, API, framework, or SDK documentation. Research runs require `research-report.md`, `research-paper.md`, `genius-opinion-summary.md`, `research-source-skill.md`, `source-ledger.json`, `agent-ledger.json`, `debate-ledger.json`, `novelty-ledger.json`, `falsification-ledger.json`, and `research-gate.json` so they stay source-backed, adversarially checked, falsifiable, paper-ready, and clear about every agent lens opinion. `research status` reports source entries, source-layer coverage, triangulation checks, counterevidence, xhigh agent count, Eureka moments, debate exchanges, consensus iterations, unanimous consensus, paper presence/sections, genius-opinion summary coverage, agent findings, and falsification cases alongside the gate.
468
468
 
469
+ In 2.0.14, Research also writes a quality contract and handoff package: `research-quality-contract.json`, parallel `research/cycle-N/source-shards/*.json`, `source-ledger.json`, `claim-evidence-matrix.json`, `source-quality-report.json`, `implementation-blueprint.json`/`.md`, `team-handoff-goal.md`, `experiment-plan.json`/`.md`, `replication-pack.json`, `research-work-graph.json`, `research-final-review.static.json`, `research-final-review.codex.json`, and `research-final-review.json`. The default gate requires 12 total sources, 5 source layers, 2 counterevidence sources, 8 key claims, 6 triangulated claims, 8 blueprint sections, 4 falsification cases, 5 experiment steps, a 2200-word report, and approved static plus Codex/GPT final review before `research-gate.json` can pass. See `docs/research-pipeline.md`, `docs/research-artifacts.md`, and `docs/research-implementation-handoff.md`.
470
+
469
471
  `sks recallpulse` is the 0.8.0 report-only RecallPulse utility. It writes `recallpulse-decision.json`, `mission-status-ledger.json`, `route-proof-capsule.json`, `evidence-envelope.json`, `recallpulse-governance-report.json`, `recallpulse-task-goal-ledger.json`, and `recallpulse-eval-report.json` for the current mission. RecallPulse does not replace route gates, Honest Mode, DB safety, imagegen evidence, or TriWiki validation; it records cache hits, hydration needs, duplicate suppression, route-governance risks, and final-summary-ready durable status so later releases can promote only measured improvements. Checklist updates are sequential: every `Txxx` row is treated as a child `$Goal` checkpoint, and `sks recallpulse checklist ... --task T001 --apply` refuses out-of-order checks unless explicitly overridden.
470
472
 
471
473
  `sks pipeline plan` shows the active route lane, kept/skipped stages, verification commands, and no-unrequested-fallback invariant. The 0.9.0 Decision Lattice augments this planning surface with report-only A*/proof-debt evidence: frontier paths considered, the selected path, and rejected paths with rejection reasons. `sks proof-field scan` remains the lightweight rubric for small changes; risky or broad signals return to the full Team/Honest path, and no speedup claim is valid without replay or eval evidence.
@@ -531,6 +533,8 @@ $DB inspect this migration for destructive risk
531
533
 
532
534
  Local model workers are off by default, so SKS stays GPT-only unless you explicitly enable them. Use the Codex App prompt commands:
533
535
 
536
+ ![SKS Local LLM mode workflow](docs/sks-local-llm-mode/assets/sks-local-llm-flow.png)
537
+
534
538
  ```text
535
539
  $with-local-llm-on
536
540
  $with-local-llm-off
@@ -76,7 +76,7 @@ dependencies = [
76
76
 
77
77
  [[package]]
78
78
  name = "sks-core"
79
- version = "2.0.12"
79
+ version = "2.0.14"
80
80
  dependencies = [
81
81
  "serde_json",
82
82
  ]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "sks-core"
3
- version = "2.0.12"
3
+ version = "2.0.14"
4
4
  edition = "2021"
5
5
 
6
6
  [dependencies]
@@ -4,7 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom};
4
4
  fn main() {
5
5
  let mut args = std::env::args().skip(1);
6
6
  match args.next().as_deref() {
7
- Some("--version") => println!("sks-rs 2.0.12"),
7
+ Some("--version") => println!("sks-rs 2.0.14"),
8
8
  Some("compact-info") => {
9
9
  let mut input = String::new();
10
10
  let _ = io::stdin().read_to_string(&mut input);
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "schema": "sks.dist-build-stamp.v1",
3
3
  "package_name": "sneakoscope",
4
- "package_version": "2.0.12",
5
- "source_digest": "6f9f20ed184ebe714b41b9176d8414b7fded251a30591ada3c3bac53e49fcf61",
6
- "source_file_count": 2053,
7
- "built_at_source_time": 1780833723608
4
+ "package_version": "2.0.14",
5
+ "source_digest": "e559602363886e06dc0d079905dacbaddcadccd0617447573f90240aa9b8f2f4",
6
+ "source_file_count": 2104,
7
+ "built_at_source_time": 1780852294757
8
8
  }
package/dist/bin/sks.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- const FAST_PACKAGE_VERSION = '2.0.12';
2
+ const FAST_PACKAGE_VERSION = '2.0.14';
3
3
  const args = process.argv.slice(2);
4
4
  try {
5
5
  if (args[0] === '--agent' && args[1] === 'worker') {
@@ -209,7 +209,7 @@ export async function runNativeAgentOrchestrator(opts = {}) {
209
209
  microWins: strategyCompiled.gate.micro_wins
210
210
  });
211
211
  if (opts.narutoWorkGraph?.work_items?.length) {
212
- partition = applyNarutoWorkGraphToPartition(partition, opts.narutoWorkGraph, roster, targetActiveSlots);
212
+ partition = applyNarutoWorkGraphToPartition(partition, opts.narutoWorkGraph, roster, targetActiveSlots, prompt);
213
213
  augmentVerificationRollbackDagForNaruto(strategyCompiled.verification_rollback_dag, partition.slices);
214
214
  }
215
215
  await runAgentJanitor({ missionDir: dir, missionId, projectHash: namespace.root_hash });
@@ -620,7 +620,7 @@ function withFinalGptPatchEnvelopes(results, patchEnvelopes = []) {
620
620
  next[0] = { ...next[0], patch_envelopes: patchEnvelopes };
621
621
  return next;
622
622
  }
623
- function applyNarutoWorkGraphToPartition(partition, graph, roster, targetActiveSlots) {
623
+ function applyNarutoWorkGraphToPartition(partition, graph, roster, targetActiveSlots, parentPrompt = '') {
624
624
  const activeRoster = (Array.isArray(roster?.roster) ? roster.roster : []).slice(0, Math.max(1, targetActiveSlots));
625
625
  const activeAgentIds = new Set(activeRoster.map((row) => String(row.id || '')).filter(Boolean));
626
626
  const fallbackOwners = activeRoster.length ? activeRoster : [{ id: 'naruto_clone_001', role: 'verifier' }];
@@ -639,6 +639,7 @@ function applyNarutoWorkGraphToPartition(partition, graph, roster, targetActiveS
639
639
  const targetPaths = normalizePathList(item.target_paths);
640
640
  const verificationNodeId = writePaths.length ? `verify:${sliceId}` : null;
641
641
  const rollbackNodeId = writePaths.length ? `rollback:${sliceId}` : null;
642
+ const parentObjective = normalizeWorkerPromptText(parentPrompt);
642
643
  return {
643
644
  id: sliceId,
644
645
  owner_agent_id: owner,
@@ -669,12 +670,15 @@ function applyNarutoWorkGraphToPartition(partition, graph, roster, targetActiveS
669
670
  source_intelligence_refs: sourceIntelligenceRefs,
670
671
  goal_mode_ref: goalModeRef,
671
672
  strategy_refs: strategyRefs,
673
+ parent_prompt: parentObjective,
672
674
  max_attempts: 1,
673
675
  description: [
676
+ parentObjective ? `Parent Naruto objective:\n${parentObjective}` : null,
674
677
  String(item.title || item.id || 'Naruto work item'),
675
678
  `Naruto owner: ${owner}`,
676
679
  item.allocation_reason ? `Allocation: ${item.allocation_reason}` : null,
677
- writePaths.length ? `Write paths: ${writePaths.join(', ')}` : 'Read-only or no-write work item.'
680
+ writePaths.length ? `Write paths: ${writePaths.join(', ')}` : 'Read-only or no-write work item.',
681
+ writePaths.length ? null : 'Read-only instruction: inspect the requested files/artifacts and do not run package scripts, build commands, tests, or temp-file-creating checks unless the parent objective explicitly requires them.'
678
682
  ].filter(Boolean).join('\n')
679
683
  };
680
684
  });
@@ -1687,10 +1691,13 @@ function buildDirectSdkWorkerPrompt(slice) {
1687
1691
  '',
1688
1692
  write.length
1689
1693
  ? `Write-capable slice. Return JSON matching ${CODEX_AGENT_WORKER_RESULT_SCHEMA_ID}; include patch_envelopes for write_paths=${JSON.stringify(write)}. Each patch envelope must include schema, source "model_authored", agent_id, session_id, slot_id, generation_index, task_slice_id, lease_id, allowed_paths, operations, and rationale. Each operation must include op, path, search, replace, content, and diff; use empty strings for operation fields that do not apply.`
1690
- : `Read-only slice. Return JSON matching ${CODEX_AGENT_WORKER_RESULT_SCHEMA_ID}; do not report pre-existing repository dirtiness as changed_files.`,
1694
+ : `Read-only slice. Return JSON matching ${CODEX_AGENT_WORKER_RESULT_SCHEMA_ID}; inspect relevant files/artifacts, do not mutate files, do not create temporary/build outputs, do not run package scripts/build/test commands unless explicitly required, and do not report pre-existing repository dirtiness as changed_files.`,
1691
1695
  'Required JSON fields: status, summary, findings, changed_files, patch_envelopes, verification, rollback_notes, blockers.'
1692
1696
  ].join('\n');
1693
1697
  }
1698
+ function normalizeWorkerPromptText(value) {
1699
+ return String(value || '').replace(/\s+/g, ' ').trim().slice(0, 4000);
1700
+ }
1694
1701
  function buildDirectNoPatchReason(slice, opts) {
1695
1702
  const writePathCount = sdkWritePaths(slice, opts).length;
1696
1703
  return {
@@ -33,7 +33,7 @@ export const AGENT_RESULT_RUNTIME_SCHEMA = {
33
33
  persona_id: { type: 'string' },
34
34
  task_slice_id: { type: 'string' },
35
35
  status: { enum: ['done', 'blocked', 'failed'] },
36
- backend: { enum: ['fake', 'process', 'codex-sdk', 'zellij', 'ollama'] },
36
+ backend: { enum: ['fake', 'process', 'codex-sdk', 'python-codex-sdk', 'zellij', 'ollama', 'local-llm'] },
37
37
  summary: { type: 'string' },
38
38
  findings: { type: 'array', items: { type: 'string' } },
39
39
  proposed_changes: { type: 'array', items: { type: 'string' } },
@@ -34,11 +34,11 @@ function fakeStructuredOutput(input) {
34
34
  summary: unsafe
35
35
  ? 'Fake Codex SDK GPT final arbiter rejected an unsafe candidate for hermetic verification.'
36
36
  : 'Fake Codex SDK GPT final arbiter approved the candidate for hermetic verification.',
37
- gpt_review_findings: unsafe ? [{ severity: 'high', message: 'unsafe candidate rejected' }] : [],
37
+ gpt_review_findings: unsafe ? [{ id: 'unsafe-candidate', severity: 'high', summary: 'unsafe candidate rejected' }] : [],
38
38
  accepted_patch_envelopes: unsafe ? [] : [],
39
39
  modified_patch_envelopes: [],
40
- rejected_patch_envelopes: unsafe ? [{ reason: 'unsafe candidate' }] : [],
41
- required_followup_work: unsafe ? [{ blocker: 'unsafe_candidate_patch' }] : [],
40
+ rejected_patch_envelopes: unsafe ? [{ id: 'unsafe-candidate', summary: 'unsafe candidate', patch_envelope_json: '{}' }] : [],
41
+ required_followup_work: unsafe ? [{ id: 'unsafe_candidate_patch', severity: 'high', summary: 'unsafe_candidate_patch' }] : [],
42
42
  verification_plan: ['schema validation', 'local collaboration final gate'],
43
43
  rollback_notes: [],
44
44
  blockers: unsafe ? ['unsafe_candidate_patch'] : [],
@@ -1,5 +1,8 @@
1
+ import path from 'node:path';
2
+ import { appendJsonl } from '../fsx.js';
1
3
  import { buildCodexSdkConfig } from './codex-sdk-config-policy.js';
2
4
  import { buildCodexSdkEnv } from './codex-sdk-env-policy.js';
5
+ import { translateCodexSdkEvent } from './codex-event-translator.js';
3
6
  export async function runRealCodexSdkTask(input, policy) {
4
7
  const mod = await import('@openai/codex-sdk');
5
8
  const Codex = mod.Codex || mod.default?.Codex || mod.default;
@@ -22,9 +25,15 @@ export async function runRealCodexSdkTask(input, policy) {
22
25
  const thread = resumeId ? codex.resumeThread(resumeId, threadOptions) : codex.startThread(threadOptions);
23
26
  const events = [];
24
27
  let finalResponse = '';
28
+ let liveEventsWritten = false;
29
+ const liveEventPath = input.mutationLedgerRoot ? path.join(input.mutationLedgerRoot, 'codex-sdk-events.jsonl') : null;
25
30
  const streamed = await thread.runStreamed(buildSdkInput(input), { outputSchema: input.outputSchema });
26
31
  for await (const event of streamed.events) {
27
32
  events.push(event);
33
+ if (liveEventPath) {
34
+ await appendJsonl(liveEventPath, translateCodexSdkEvent(event));
35
+ liveEventsWritten = true;
36
+ }
28
37
  if (event?.type === 'item.completed' && event?.item?.type === 'agent_message')
29
38
  finalResponse = String(event.item.text || '');
30
39
  }
@@ -37,6 +46,7 @@ export async function runRealCodexSdkTask(input, policy) {
37
46
  finalResponse,
38
47
  structuredOutput,
39
48
  blockers: [],
49
+ liveEventsWritten,
40
50
  raw: { item_count: events.filter((event) => String(event?.type || '').startsWith('item.')).length }
41
51
  };
42
52
  }
@@ -67,8 +67,10 @@ export async function runCodexTask(input) {
67
67
  }
68
68
  const events = Array.isArray(adapterResult?.events) ? adapterResult.events : [];
69
69
  const translatedEvents = translateCodexSdkEvents(events);
70
- for (const event of translatedEvents)
71
- await appendJsonl(path.join(root, 'codex-sdk-events.jsonl'), event);
70
+ if (adapterResult?.liveEventsWritten !== true) {
71
+ for (const event of translatedEvents)
72
+ await appendJsonl(path.join(root, 'codex-sdk-events.jsonl'), event);
73
+ }
72
74
  if (adapterResult?.reliabilityShield)
73
75
  await writeJsonAtomic(path.join(root, 'codex-reliability-shield.json'), adapterResult.reliabilityShield);
74
76
  const structuredOutput = adapterResult?.structuredOutput;
@@ -1,5 +1,25 @@
1
1
  export const GPT_FINAL_ARBITER_RESULT_SCHEMA_ID = 'sks.gpt-final-arbiter-result.v1';
2
2
  export const GPT_FINAL_ARBITER_INPUT_SCHEMA = 'sks.gpt-final-arbiter-input.v1';
3
+ const reviewItemSchema = {
4
+ type: 'object',
5
+ required: ['id', 'severity', 'summary'],
6
+ properties: {
7
+ id: { type: 'string' },
8
+ severity: { type: 'string', enum: ['low', 'medium', 'high'] },
9
+ summary: { type: 'string' }
10
+ },
11
+ additionalProperties: false
12
+ };
13
+ const patchDecisionSchema = {
14
+ type: 'object',
15
+ required: ['id', 'summary', 'patch_envelope_json'],
16
+ properties: {
17
+ id: { type: 'string' },
18
+ summary: { type: 'string' },
19
+ patch_envelope_json: { type: 'string' }
20
+ },
21
+ additionalProperties: false
22
+ };
3
23
  export const gptFinalArbiterResultSchema = {
4
24
  type: 'object',
5
25
  required: [
@@ -17,20 +37,20 @@ export const gptFinalArbiterResultSchema = {
17
37
  'confidence'
18
38
  ],
19
39
  properties: {
20
- schema: { const: GPT_FINAL_ARBITER_RESULT_SCHEMA_ID },
40
+ schema: { type: 'string', enum: [GPT_FINAL_ARBITER_RESULT_SCHEMA_ID] },
21
41
  status: { enum: ['approved', 'modified', 'rejected', 'needs_more_work'] },
22
42
  summary: { type: 'string' },
23
- gpt_review_findings: { type: 'array', items: { type: 'object' } },
24
- accepted_patch_envelopes: { type: 'array', items: { type: 'object' } },
25
- modified_patch_envelopes: { type: 'array', items: { type: 'object' } },
26
- rejected_patch_envelopes: { type: 'array', items: { type: 'object' } },
27
- required_followup_work: { type: 'array', items: { type: 'object' } },
43
+ gpt_review_findings: { type: 'array', items: reviewItemSchema },
44
+ accepted_patch_envelopes: { type: 'array', items: patchDecisionSchema },
45
+ modified_patch_envelopes: { type: 'array', items: patchDecisionSchema },
46
+ rejected_patch_envelopes: { type: 'array', items: patchDecisionSchema },
47
+ required_followup_work: { type: 'array', items: reviewItemSchema },
28
48
  verification_plan: { type: 'array', items: { type: 'string' } },
29
49
  rollback_notes: { type: 'array', items: { type: 'string' } },
30
50
  blockers: { type: 'array', items: { type: 'string' } },
31
51
  confidence: { enum: ['low', 'medium', 'high'] }
32
52
  },
33
- additionalProperties: true
53
+ additionalProperties: false
34
54
  };
35
55
  export function normalizeGptFinalArbiterResult(value) {
36
56
  const status = normalizeStatus(value?.status);
@@ -38,11 +58,11 @@ export function normalizeGptFinalArbiterResult(value) {
38
58
  schema: GPT_FINAL_ARBITER_RESULT_SCHEMA_ID,
39
59
  status,
40
60
  summary: String(value?.summary || defaultSummary(status)),
41
- gpt_review_findings: array(value?.gpt_review_findings),
42
- accepted_patch_envelopes: array(value?.accepted_patch_envelopes),
43
- modified_patch_envelopes: array(value?.modified_patch_envelopes),
44
- rejected_patch_envelopes: array(value?.rejected_patch_envelopes),
45
- required_followup_work: array(value?.required_followup_work),
61
+ gpt_review_findings: reviewItems(value?.gpt_review_findings),
62
+ accepted_patch_envelopes: patchDecisionItems(value?.accepted_patch_envelopes),
63
+ modified_patch_envelopes: patchDecisionItems(value?.modified_patch_envelopes),
64
+ rejected_patch_envelopes: patchDecisionItems(value?.rejected_patch_envelopes),
65
+ required_followup_work: reviewItems(value?.required_followup_work),
46
66
  verification_plan: stringArray(value?.verification_plan),
47
67
  rollback_notes: stringArray(value?.rollback_notes),
48
68
  blockers: stringArray(value?.blockers),
@@ -57,12 +77,39 @@ function normalizeStatus(value) {
57
77
  function normalizeConfidence(value) {
58
78
  return value === 'low' || value === 'medium' || value === 'high' ? value : 'medium';
59
79
  }
60
- function array(value) {
61
- return Array.isArray(value) ? value : [];
80
+ function reviewItems(value) {
81
+ if (!Array.isArray(value))
82
+ return [];
83
+ return value.map((entry, index) => {
84
+ const raw = typeof entry === 'object' && entry !== null ? entry : { summary: entry };
85
+ return {
86
+ id: String(raw.id || raw.blocker || raw.reason || `item-${index + 1}`),
87
+ severity: normalizeSeverity(raw.severity),
88
+ summary: String(raw.summary || raw.message || raw.blocker || raw.reason || entry || '').trim()
89
+ };
90
+ }).filter((entry) => entry.summary);
91
+ }
92
+ function patchDecisionItems(value) {
93
+ if (!Array.isArray(value))
94
+ return [];
95
+ return value.map((entry, index) => {
96
+ const raw = typeof entry === 'object' && entry !== null ? entry : { summary: entry };
97
+ const patch = typeof raw.patch_envelope_json === 'string'
98
+ ? raw.patch_envelope_json
99
+ : JSON.stringify(entry ?? {});
100
+ return {
101
+ id: String(raw.id || raw.schema || raw.reason || `patch-${index + 1}`),
102
+ summary: String(raw.summary || raw.reason || raw.rationale || raw.schema || entry || '').trim() || `Patch decision ${index + 1}`,
103
+ patch_envelope_json: patch
104
+ };
105
+ });
62
106
  }
63
107
  function stringArray(value) {
64
108
  return Array.isArray(value) ? value.map((entry) => String(entry || '').trim()).filter(Boolean) : [];
65
109
  }
110
+ function normalizeSeverity(value) {
111
+ return value === 'low' || value === 'medium' || value === 'high' ? value : 'medium';
112
+ }
66
113
  function defaultSummary(status) {
67
114
  return status === 'approved' || status === 'modified'
68
115
  ? 'GPT final arbiter accepted the candidate result.'
@@ -444,6 +444,7 @@ async function runNarutoControlPlaneSmoke(input) {
444
444
  item,
445
445
  placement,
446
446
  backend: 'fake',
447
+ parentPrompt: input.prompt,
447
448
  worktreePolicy: smokeWorktreePolicy,
448
449
  zellijSessionName: `sks-${input.missionId}`,
449
450
  visiblePaneCap: input.zellijVisiblePanes
@@ -14,6 +14,15 @@ import { scanDbSafety } from '../db-safety.js';
14
14
  import { maybeFinalizeRoute } from '../proof/auto-finalize.js';
15
15
  import { runNativeAgentOrchestrator } from '../agents/agent-orchestrator.js';
16
16
  import { flag, positionalArgs, readFlagValue, readMaxCycles, readBoundedIntegerFlag, resolveMissionId, safeReadTextFile } from './command-utils.js';
17
+ import { writeResearchWorkGraph } from '../research/research-work-graph.js';
18
+ import { runResearchCycle } from '../research/research-cycle-runner.js';
19
+ import { readResearchQualityContract } from '../research/research-quality-contract.js';
20
+ import { readClaimEvidenceMatrix } from '../research/claim-evidence-matrix.js';
21
+ import { readSourceQualityReport } from '../research/source-quality-report.js';
22
+ import { readImplementationBlueprint, validateImplementationBlueprint } from '../research/implementation-blueprint.js';
23
+ import { readExperimentPlan, validateExperimentPlan } from '../research/experiment-plan.js';
24
+ import { readReplicationPack, validateReplicationPack } from '../research/replication-pack.js';
25
+ import { readResearchFinalReview } from '../research/research-final-reviewer.js';
17
26
  const RESEARCH_DEFAULT_MAX_CYCLES = 12;
18
27
  const RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES = 120;
19
28
  const RESEARCH_MIN_CYCLE_TIMEOUT_MINUTES = 15;
@@ -128,30 +137,75 @@ async function researchRun(args) {
128
137
  const dryRunPatches = flag(args, '--dry-run-patches') || flag(args, '--dryrun-patches');
129
138
  const maxWriteAgents = readBoundedIntegerFlag(args, '--max-write-agents', Math.min(requestedAgents, 5), 1, 20);
130
139
  const mock = flag(args, '--mock');
140
+ const researchWorkGraph = await writeResearchWorkGraph(dir, plan);
141
+ const graphWorkItemCount = Math.max(1, Number(researchWorkGraph.total_work_items || researchWorkGraph.work_items?.length || 0));
142
+ await runResearchCycle(dir, researchWorkGraph, { cycle: 0, status: mock ? 'mock_native_orchestrator_planned' : 'native_orchestrator_planned' });
131
143
  await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false, implementation_allowed: false, research_real_run_required: !mock, research_cycle_timeout_minutes: cycleTimeoutMinutes });
132
144
  await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock, cycleTimeoutMinutes, real_run_required: !mock });
133
- const nativeAgentRun = await runNativeAgentOrchestrator({ root, missionId: id, route: flag(args, '--autoresearch') ? '$AutoResearch' : '$Research', prompt: mission.prompt || plan.prompt || 'Research run', backend: mock ? 'fake' : 'codex-sdk', mock, agents: requestedAgents, targetActiveSlots, desiredWorkItemCount, minimumWorkItems, maxQueueExpansion, concurrency: Math.min(requestedAgents, 5), readonly: !(applyPatches && writeMode !== 'off'), profile, writeMode: writeMode, applyPatches, dryRunPatches, maxWriteAgents, roster: plan.native_agent_plan, routeCommand: 'sks research run', routeBlackboxKind: 'actual_research_command' });
145
+ const nativeAgentRun = await runNativeAgentOrchestrator({ root, missionId: id, route: flag(args, '--autoresearch') ? '$AutoResearch' : '$Research', prompt: mission.prompt || plan.prompt || 'Research run', backend: mock ? 'fake' : 'codex-sdk', mock, agents: requestedAgents, targetActiveSlots, desiredWorkItemCount: Math.max(desiredWorkItemCount, graphWorkItemCount), minimumWorkItems: Math.max(minimumWorkItems, Math.min(graphWorkItemCount, targetActiveSlots)), maxQueueExpansion, concurrency: Math.min(requestedAgents, 5), readonly: true, profile, writeMode: writeMode, applyPatches: false, dryRunPatches, maxWriteAgents, roster: plan.native_agent_plan, routeCommand: 'sks research run', routeBlackboxKind: 'actual_research_command', narutoWorkGraph: researchWorkGraph });
134
146
  await writeJsonAtomic(path.join(dir, 'research-native-agent-run.json'), nativeAgentRun);
135
147
  await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.native_agents.completed', backend: nativeAgentRun.backend, ok: nativeAgentRun.ok, proof: nativeAgentRun.proof?.status });
136
- if (mock) {
137
- let gate = await writeMockResearchResult(dir, plan);
138
- const nativeGate = { ...(gate.gate || gate), native_agent_proof: nativeAgentRun.proof?.ok === true, agent_central_ledger: true };
139
- await writeJsonAtomic(path.join(dir, 'research-gate.json'), nativeGate);
140
- gate = { ...gate, gate: nativeGate, passed: nativeGate.passed };
141
- const proof = await maybeFinalizeRoute(root, { missionId: id, route: '$Research', gateFile: 'research-gate.json', gate: gate.gate || gate, artifacts: ['agents/agent-proof-evidence.json', 'research-native-agent-run.json', 'research-gate.json', 'research-report.md', researchPaperArtifactForPlan(plan), 'source-ledger.json', 'agent-ledger.json', 'debate-ledger.json', 'completion-proof.json'], mock, command: { cmd: `sks research run ${id} --mock`, status: 0 } });
142
- await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true, implementation_allowed: false });
143
- if (flag(args, '--json'))
144
- return console.log(JSON.stringify({ schema: flag(args, '--autoresearch') ? 'sks.autoresearch-run.v1' : 'sks.research-run.v1', ok: proof.ok, mission_id: id, gate, proof: proof.validation, native_agent_run: nativeAgentRun, agent_batches: plan.agent_batches, autoresearch_cycle_policy: plan.autoresearch_cycle_policy }, null, 2));
145
- console.log(`Mock research done: ${id}`);
146
- console.log(`Gate: ${gate.passed ? 'passed' : 'blocked'}`);
147
- return;
148
- }
149
148
  if (!nativeAgentRun.ok) {
150
149
  await maybeFinalizeRoute(root, { missionId: id, route: '$Research', gateFile: 'research-gate.json', gate: await readJson(path.join(dir, 'research-gate.json'), null), artifacts: ['agents/agent-proof-evidence.json', 'research-native-agent-run.json', 'completion-proof.json'], statusHint: 'blocked', blockers: nativeAgentRun.proof?.blockers || ['native_agent_backend_blocked'], command: { cmd: `sks research run ${id}`, status: 2 } });
151
150
  await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_BLOCKED_NATIVE_AGENTS', questions_allowed: true, implementation_allowed: false, blocker: 'agents/agent-proof-evidence.json' });
152
151
  process.exitCode = 2;
153
152
  return;
154
153
  }
154
+ const legacyResearchCycle = flag(args, '--legacy-research-cycle') || process.env.SKS_RESEARCH_LEGACY_CYCLE === '1';
155
+ const sourceMutationBaseline = await researchCodeMutationSnapshot(root, id);
156
+ if (!legacyResearchCycle) {
157
+ const cycleResult = await runResearchCycle({
158
+ root,
159
+ dir,
160
+ plan,
161
+ graph: researchWorkGraph,
162
+ cycle: 1,
163
+ backend: mock ? 'mock' : 'codex-sdk',
164
+ timeoutMs: cycleTimeoutMs,
165
+ maxParallelStages: readBoundedIntegerFlag(args, '--research-stage-parallelism', 4, 1, 16),
166
+ mock
167
+ });
168
+ const mutation = await researchCodeMutationDelta(root, sourceMutationBaseline, id);
169
+ if (mutation.blocked) {
170
+ const blocker = {
171
+ schema_version: 1,
172
+ mission_id: id,
173
+ ts: nowIso(),
174
+ phase: 'RESEARCH_BLOCKED_CODE_MUTATION',
175
+ reason: 'Research mode must not modify repository source files. Only route-local mission artifacts are allowed.',
176
+ changed_paths: mutation.changed_paths,
177
+ allowed_prefixes: mutation.allowed_prefixes,
178
+ implementation_allowed: false
179
+ };
180
+ await writeJsonAtomic(path.join(dir, 'research-code-mutation-blocker.json'), blocker);
181
+ await maybeFinalizeRoute(root, { missionId: id, route: '$Research', gateFile: 'research-gate.json', gate: await readJson(path.join(dir, 'research-gate.json'), null), artifacts: ['research-code-mutation-blocker.json', 'completion-proof.json'], statusHint: 'blocked', blockers: ['research_code_mutation_detected'], command: { cmd: `sks research run ${id}`, status: 2 } });
182
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_BLOCKED_CODE_MUTATION', questions_allowed: true, implementation_allowed: false, blocker: 'research-code-mutation-blocker.json' });
183
+ process.exitCode = 2;
184
+ return;
185
+ }
186
+ const gate = await evaluateResearchGate(dir);
187
+ const passed = cycleResult.status === 'passed' && gate.passed === true;
188
+ const proof = await maybeFinalizeRoute(root, {
189
+ missionId: id,
190
+ route: '$Research',
191
+ gateFile: 'research-gate.json',
192
+ gate: gate.gate || gate,
193
+ artifacts: ['agents/agent-proof-evidence.json', 'research-native-agent-run.json', 'research-cycle-runner.json', 'research-gate.json', 'research-report.md', researchPaperArtifactForPlan(plan), 'source-ledger.json', 'claim-evidence-matrix.json', 'implementation-blueprint.json', 'team-handoff-goal.md', 'completion-proof.json'],
194
+ statusHint: passed ? undefined : 'blocked',
195
+ blockers: passed ? [] : [...(cycleResult.blockers || []), ...(gate.reasons || [])],
196
+ mock,
197
+ command: { cmd: `sks research run ${id}${mock ? ' --mock' : ''}`, status: passed ? 0 : 2 }
198
+ });
199
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: passed ? 'RESEARCH_DONE' : 'RESEARCH_BLOCKED_STAGE_CYCLE', questions_allowed: true, implementation_allowed: false });
200
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: passed ? 'research.done' : 'research.stage_cycle.blocked', cycle: 1, cycle_status: cycleResult.status });
201
+ await enforceRetention(root).catch(() => { });
202
+ if (flag(args, '--json'))
203
+ return console.log(JSON.stringify({ schema: flag(args, '--autoresearch') ? 'sks.autoresearch-run.v1' : 'sks.research-run.v1', ok: proof.ok && passed, mission_id: id, gate, quality_metrics: gate.metrics || null, proof: proof.validation, native_agent_run: nativeAgentRun, research_work_graph: researchWorkGraph, research_cycle: cycleResult, agent_batches: plan.agent_batches, autoresearch_cycle_policy: plan.autoresearch_cycle_policy }, null, 2));
204
+ printResearchCompletion(id, root, dir, plan, gate);
205
+ if (!passed)
206
+ process.exitCode = 2;
207
+ return;
208
+ }
155
209
  const codex = await getCodexInfo();
156
210
  if (!codex.bin) {
157
211
  const blocker = {
@@ -173,11 +227,10 @@ async function researchRun(args) {
173
227
  }
174
228
  let last = '';
175
229
  const researchCodexArgs = ['-c', 'service_tier="fast"', '-c', 'model_reasoning_effort="xhigh"'];
176
- const sourceMutationBaseline = await researchCodeMutationSnapshot(root, id);
177
230
  for (let cycle = 1; cycle <= maxCycles; cycle += 1) {
178
231
  const cycleDir = path.join(dir, 'research', `cycle-${cycle}`);
179
232
  const outputFile = path.join(cycleDir, 'final.md');
180
- await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle, timeoutMinutes: cycleTimeoutMinutes, profile, enforced_reasoning_effort: 'xhigh' });
233
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.legacy_cycle.start', cycle, timeoutMinutes: cycleTimeoutMinutes, profile, enforced_reasoning_effort: 'xhigh', legacy_final_md_loop: true });
181
234
  const prompt = buildResearchPrompt({ id, mission, plan, cycle, previous: last });
182
235
  const result = await runCodexExec({ root, prompt, outputFile, json: true, profile, extraArgs: researchCodexArgs, logDir: cycleDir, timeoutMs: cycleTimeoutMs });
183
236
  await writeJsonAtomic(path.join(cycleDir, 'process.json'), { code: result.code, stdout_tail: result.stdout, stderr_tail: result.stderr, stdout_bytes: result.stdoutBytes, stderr_bytes: result.stderrBytes, truncated: result.truncated, timed_out: result.timedOut });
@@ -212,8 +265,8 @@ async function researchRun(args) {
212
265
  await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.done', cycle });
213
266
  await enforceRetention(root).catch(() => { });
214
267
  if (flag(args, '--json'))
215
- return console.log(JSON.stringify({ schema: flag(args, '--autoresearch') ? 'sks.autoresearch-run.v1' : 'sks.research-run.v1', ok: proof.ok, mission_id: id, gate, proof: proof.validation, agent_batches: plan.agent_batches, autoresearch_cycle_policy: plan.autoresearch_cycle_policy }, null, 2));
216
- console.log(`Research done: ${id}`);
268
+ return console.log(JSON.stringify({ schema: flag(args, '--autoresearch') ? 'sks.autoresearch-run.v1' : 'sks.research-run.v1', ok: proof.ok, mission_id: id, gate, quality_metrics: gate.metrics || null, proof: proof.validation, research_work_graph: researchWorkGraph, agent_batches: plan.agent_batches, autoresearch_cycle_policy: plan.autoresearch_cycle_policy }, null, 2));
269
+ printResearchCompletion(id, root, dir, plan, gate);
217
270
  return;
218
271
  }
219
272
  }
@@ -222,6 +275,20 @@ async function researchRun(args) {
222
275
  await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true, implementation_allowed: false });
223
276
  console.log(`Research paused after max cycles without unanimous agent consensus: ${id}`);
224
277
  }
278
+ function printResearchCompletion(id, root, dir, plan, gate) {
279
+ const metrics = gate?.metrics || {};
280
+ const rel = (artifact) => path.relative(root, path.join(dir, artifact));
281
+ console.log(`Research done: ${id}`);
282
+ console.log(`Report: ${rel('research-report.md')}`);
283
+ console.log(`Paper: ${rel(researchPaperArtifactForPlan(plan))}`);
284
+ console.log(`Implementation blueprint: ${rel('implementation-blueprint.json')}`);
285
+ console.log(`Claim-evidence matrix: ${rel('claim-evidence-matrix.json')}`);
286
+ console.log(`Experiment plan: ${rel('experiment-plan.json')}`);
287
+ console.log(`Replication pack: ${rel('replication-pack.json')}`);
288
+ console.log(`Gate: ${gate?.passed ? 'passed' : 'blocked'}`);
289
+ console.log(`Quality: ${metrics.source_entries_total_with_counterevidence ?? metrics.source_entries ?? 0} sources / ${metrics.source_layers_covered ?? 0} layers / ${metrics.key_claims ?? 0} key claims / ${metrics.falsification_cases ?? 0} falsification cases`);
290
+ console.log(`Handoff: ${rel('team-handoff-goal.md')}`);
291
+ }
225
292
  async function researchStatus(args) {
226
293
  const root = await sksRoot();
227
294
  const id = await resolveMissionId(root, args[0]);
@@ -246,6 +313,16 @@ async function researchStatus(args) {
246
313
  const agentRows = Array.isArray(agentLedger?.agents) ? agentLedger.agents : [];
247
314
  const sourceLayerRows = Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : [];
248
315
  const sourceLayersCovered = sourceLayerRows.filter((layer) => layer.status === 'covered' && ((Array.isArray(layer.source_ids) && layer.source_ids.length) || (Array.isArray(layer.counterevidence_ids) && layer.counterevidence_ids.length))).length;
316
+ const qualityContract = await readResearchQualityContract(dir);
317
+ const claimMatrix = await readClaimEvidenceMatrix(dir);
318
+ const sourceQualityReport = await readSourceQualityReport(dir);
319
+ const implementationBlueprint = await readImplementationBlueprint(dir);
320
+ const experimentPlan = await readExperimentPlan(dir);
321
+ const replicationPack = await readReplicationPack(dir);
322
+ const finalReview = await readResearchFinalReview(dir);
323
+ const blueprintValidation = validateImplementationBlueprint(implementationBlueprint, qualityContract);
324
+ const experimentValidation = validateExperimentPlan(experimentPlan, qualityContract);
325
+ const replicationValidation = validateReplicationPack(replicationPack);
249
326
  console.log(JSON.stringify({
250
327
  mission,
251
328
  state,
@@ -275,7 +352,23 @@ async function researchStatus(args) {
275
352
  research_paper_artifact: paperArtifact.name,
276
353
  paper_present: Boolean(paperText.trim()),
277
354
  paper_sections: countResearchPaperSections(paperText),
278
- falsification_cases: falsificationLedger?.cases?.length ?? null
355
+ falsification_cases: falsificationLedger?.cases?.length ?? null,
356
+ research_quality: {
357
+ contract: qualityContract,
358
+ report_word_count: gate?.metrics?.report_word_count ?? null,
359
+ claim_evidence_matrix_present: claimMatrix.present,
360
+ key_claims: claimMatrix.key_claim_ids.length,
361
+ triangulated_claims: claimMatrix.triangulated_claim_count,
362
+ claim_matrix_blockers: claimMatrix.blockers,
363
+ source_quality_report_ok: sourceQualityReport?.ok === true,
364
+ implementation_blueprint_sections: Array.isArray(implementationBlueprint?.sections) ? implementationBlueprint.sections.length : null,
365
+ implementation_blueprint_ok: blueprintValidation.ok,
366
+ experiment_steps: Array.isArray(experimentPlan?.steps) ? experimentPlan.steps.length : null,
367
+ experiment_plan_ok: experimentValidation.ok,
368
+ replication_pack_ok: replicationValidation.ok,
369
+ final_review_approved: finalReview?.approved === true,
370
+ final_review_blockers: finalReview?.blockers || []
371
+ }
279
372
  }, null, 2));
280
373
  }
281
374
  async function researchCodeMutationSnapshot(root, missionId = null) {
package/dist/core/fsx.js CHANGED
@@ -5,7 +5,7 @@ import os from 'node:os';
5
5
  import crypto from 'node:crypto';
6
6
  import { spawn } from 'node:child_process';
7
7
  import { fileURLToPath } from 'node:url';
8
- export const PACKAGE_VERSION = '2.0.12';
8
+ export const PACKAGE_VERSION = '2.0.14';
9
9
  export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
10
10
  export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
11
11
  export function nowIso() {
@@ -32,7 +32,7 @@ async function main() {
32
32
  generationIndex: 1,
33
33
  sessionId: String(intake.item.id || ''),
34
34
  cwd: String(intake.worktree_path || process.cwd()),
35
- prompt: buildNarutoWorkerPrompt(intake.item),
35
+ prompt: buildNarutoWorkerPrompt(intake.item, intake.parent_prompt),
36
36
  outputSchemaId: CODEX_AGENT_WORKER_RESULT_SCHEMA_ID,
37
37
  outputSchema: codexAgentWorkerResultSchema,
38
38
  sandboxPolicy: intake.item.write_allowed === true ? 'workspace-write' : 'read-only',
@@ -109,10 +109,12 @@ function backendPreference(value) {
109
109
  return ['local-llm', 'codex-sdk'];
110
110
  return ['codex-sdk'];
111
111
  }
112
- function buildNarutoWorkerPrompt(item) {
112
+ function buildNarutoWorkerPrompt(item, parentPrompt) {
113
113
  const writeAllowed = item?.write_allowed === true;
114
+ const parentObjective = normalizeWorkerPromptText(parentPrompt);
114
115
  return [
115
116
  'You are a Naruto route worker. Complete only this assigned work item and return JSON matching the required schema.',
117
+ parentObjective ? `Parent Naruto objective:\n${parentObjective}` : null,
116
118
  `Work item: ${String(item?.id || '')} ${String(item?.title || item?.kind || '')}`,
117
119
  `Role: ${String(item?.required_role || 'worker')}`,
118
120
  `Kind: ${String(item?.kind || 'verification')}`,
@@ -122,8 +124,14 @@ function buildNarutoWorkerPrompt(item) {
122
124
  writeAllowed
123
125
  ? 'If changes are needed, return model-authored patch_envelopes scoped to write paths.'
124
126
  : 'This is read-only work. Do not mutate files and return an empty patch_envelopes array.',
127
+ writeAllowed
128
+ ? null
129
+ : 'For read-only work, inspect requested files/artifacts only; do not run package scripts, build commands, tests, or temp-file-creating checks unless the parent objective explicitly requires them.',
125
130
  'Include verification checks, rollback notes, blockers, findings, and changed_files.'
126
- ].join('\n');
131
+ ].filter(Boolean).join('\n');
132
+ }
133
+ function normalizeWorkerPromptText(value) {
134
+ return String(value || '').replace(/\s+/g, ' ').trim().slice(0, 4000);
127
135
  }
128
136
  main().then(() => {
129
137
  process.exit(0);
@@ -29,6 +29,7 @@ export async function spawnActualNarutoWorker(input) {
29
29
  generated_at: nowIso(),
30
30
  mission_id: input.missionId,
31
31
  item: input.item,
32
+ parent_prompt: normalizeWorkerPromptText(input.parentPrompt),
32
33
  placement: input.placement,
33
34
  backend: input.backend,
34
35
  result_path: resultPath,
@@ -85,6 +86,9 @@ export async function collectActualNarutoWorker(handle) {
85
86
  blockers
86
87
  };
87
88
  }
89
+ function normalizeWorkerPromptText(value) {
90
+ return String(value || '').replace(/\s+/g, ' ').trim().slice(0, 4000);
91
+ }
88
92
  function actualWorkerEntrypoint() {
89
93
  return fileURLToPath(new URL('./naruto-real-worker-child.js', import.meta.url));
90
94
  }