sneakoscope 0.7.68 → 0.7.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -192,7 +192,7 @@ sks qa-loop prepare "http://localhost:3000"
192
192
  sks qa-loop run latest --max-cycles 2
193
193
  sks goal create "persist this migration workflow"
194
194
  sks research prepare "evaluate this approach"
195
- sks research run latest --max-cycles 3
195
+ sks research run latest --max-cycles 3 --cycle-timeout-minutes 120
196
196
  sks research status latest
197
197
  sks db scan --json
198
198
  sks wiki refresh
@@ -211,7 +211,7 @@ sks skill-dream run --json
211
211
  sks code-structure scan --json
212
212
  ```
213
213
 
214
- `sks research` prepares a genius-lens scout council, requires every scout to run at `xhigh`, records one literal `Eureka!` idea per scout, runs an evidence-bound debate, maximizes available web/source retrieval before synthesis, and requires `source-ledger.json`, `scout-ledger.json`, `debate-ledger.json`, `novelty-ledger.json`, `falsification-ledger.json`, and `research-gate.json` so research runs stay source-backed, adversarially checked, and falsifiable. `research status` reports source entries, counterevidence, xhigh scout count, Eureka moments, debate exchanges, scout findings, and falsification cases alongside the gate.
214
+ `sks research` prepares a genius-lens scout council, requires every scout to run at `xhigh`, records one literal `Eureka!` idea per scout, runs an evidence-bound debate, and now creates `research-source-skill.md` as a route-local source collection skill before synthesis. Normal Research is intentionally allowed to take one or two hours when the problem needs it; `--mock` is only for selftests or dry harness checks, and a real run blocks with `research-blocker.json` instead of silently substituting mock output when the Codex execution path is unavailable. The source layer contract separates latest papers, official/government or leading-institution sources, standards/primary docs, current news such as BBC/CNN/GDELT-style sources, public discourse such as X/Reddit, developer/practitioner knowledge such as Stack Overflow/GitHub, and counterevidence/fact-checking; `source-ledger.json` must record layer coverage, source quality, blockers, citations, and cross-layer triangulation. Context7 is optional for `$Research` and only becomes relevant when the research topic specifically depends on package, API, framework, or SDK documentation. Research runs require `research-report.md`, `research-paper.md`, `genius-opinion-summary.md`, `research-source-skill.md`, `source-ledger.json`, `scout-ledger.json`, `debate-ledger.json`, `novelty-ledger.json`, `falsification-ledger.json`, and `research-gate.json` so they stay source-backed, adversarially checked, falsifiable, paper-ready, and clear about every scout lens opinion. `research status` reports source entries, source-layer coverage, triangulation checks, counterevidence, xhigh scout count, Eureka moments, debate exchanges, paper presence/sections, genius-opinion summary coverage, scout findings, and falsification cases alongside the gate.
215
215
 
216
216
  `sks pipeline plan` shows the active route lane, kept/skipped stages, verification commands, and no-unrequested-fallback invariant. `sks proof-field scan` is the lightweight rubric for small changes; risky or broad signals return to the full Team/Honest path.
217
217
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "sneakoscope",
3
3
  "displayName": "ㅅㅋㅅ",
4
- "version": "0.7.68",
4
+ "version": "0.7.72",
5
5
  "description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
6
6
  "type": "module",
7
7
  "homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
package/src/cli/main.mjs CHANGED
@@ -22,7 +22,7 @@ import { bumpProjectVersion, installVersionGitHook, runVersionPreCommit, version
22
22
  import { rustInfo } from '../core/rust-accelerator.mjs';
23
23
  import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
24
24
  import { defaultEvaluationScenario, runEvaluationBenchmark } from '../core/evaluation.mjs';
25
- import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
25
+ import { evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
26
26
  import {
27
27
  PPT_AUDIENCE_STRATEGY_ARTIFACT,
28
28
  PPT_CLEANUP_REPORT_ARTIFACT,
@@ -211,7 +211,7 @@ Usage:
211
211
  sks team attach-tmux [mission-id|latest]
212
212
  sks team cleanup-tmux [mission-id|latest]
213
213
  sks research prepare "topic" [--depth frontier]
214
- sks research run <mission-id|latest> [--mock] [--max-cycles N]
214
+ sks research run <mission-id|latest> [--mock] [--max-cycles N] [--cycle-timeout-minutes N]
215
215
  sks research status <mission-id|latest>
216
216
  sks db policy
217
217
  sks db scan [--migrations] [--json]
@@ -1963,7 +1963,7 @@ function readMaxCycles(args, fallback) {
1963
1963
 
1964
1964
  function positionalArgs(args = []) {
1965
1965
  const out = [];
1966
- const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--dir']);
1966
+ const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--cycle-timeout-minutes', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--dir']);
1967
1967
  for (let i = 0; i < args.length; i++) {
1968
1968
  const arg = String(args[i]);
1969
1969
  if (valueFlags.has(arg)) {
@@ -2647,19 +2647,21 @@ async function selftest() {
2647
2647
  const hookResearchTeamResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], { cwd: hookResearchMarkdownTmp, input: hookResearchTeamPayload, env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 256 * 1024 });
2648
2648
  if (hookResearchTeamResult.code !== 0) throw new Error(`selftest: active Team setup before markdown $Research hook exited ${hookResearchTeamResult.code}: ${hookResearchTeamResult.stderr}`);
2649
2649
  const hookResearchTeamState = await readJson(stateFile(hookResearchMarkdownTmp), {});
2650
- const hookResearchMarkdownPayload = JSON.stringify({ cwd: hookResearchMarkdownTmp, prompt: '[$research](/tmp/research/SKILL.md) Codex Computer Use 도구 노출 문제를 QA루프 관점에서 연구' });
2650
+ const hookResearchMarkdownPayload = JSON.stringify({ cwd: hookResearchMarkdownTmp, prompt: '논문 [$research](x) 커밋 푸쉬 연구' });
2651
2651
  const hookResearchMarkdownResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], { cwd: hookResearchMarkdownTmp, input: hookResearchMarkdownPayload, env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 256 * 1024 });
2652
2652
  if (hookResearchMarkdownResult.code !== 0) throw new Error(`selftest: markdown $Research hook exited ${hookResearchMarkdownResult.code}: ${hookResearchMarkdownResult.stderr}`);
2653
2653
  const hookResearchMarkdownJson = JSON.parse(hookResearchMarkdownResult.stdout);
2654
2654
  const hookResearchMarkdownContext = hookResearchMarkdownJson.hookSpecificOutput?.additionalContext || '';
2655
- if (!hookResearchMarkdownContext.includes('$Research route prepared')) throw new Error('selftest: markdown $Research hook did not prepare Research route');
2656
- if (hookResearchMarkdownContext.includes(`Active Team mission ${hookResearchTeamState.mission_id}`)) throw new Error('selftest: markdown $Research hook retained stale active Team context');
2657
- if (!String(hookResearchMarkdownJson.systemMessage || '').includes('Research route') || String(hookResearchMarkdownJson.systemMessage || '').includes('QA-LOOP route')) throw new Error('selftest: markdown $Research visible hook message was hijacked by QA-LOOP policy text');
2655
+ if (!hookResearchMarkdownContext.includes('$Research route prepared')) throw new Error('selftest: markdown research hook');
2656
+ if (hookResearchMarkdownContext.includes(`Active Team mission ${hookResearchTeamState.mission_id}`)) throw new Error('selftest: stale Team context');
2657
+ if (!String(hookResearchMarkdownJson.systemMessage || '').includes('Research route') || String(hookResearchMarkdownJson.systemMessage || '').includes('QA-LOOP route')) throw new Error('selftest: research hook message');
2658
2658
  const hookResearchMarkdownState = await readJson(stateFile(hookResearchMarkdownTmp), {});
2659
- if (hookResearchMarkdownState.mode !== 'RESEARCH' || hookResearchMarkdownState.route !== 'Research' || hookResearchMarkdownState.mission_id === hookResearchTeamState.mission_id || hookResearchMarkdownState.stop_gate !== 'research-gate.json' || !hookResearchMarkdownState.pipeline_plan_ready) throw new Error('selftest: markdown $Research hook did not replace active Team with prepared Research mission state');
2659
+ if (hookResearchMarkdownState.mode !== 'RESEARCH' || hookResearchMarkdownState.route !== 'Research' || hookResearchMarkdownState.mission_id === hookResearchTeamState.mission_id || hookResearchMarkdownState.stop_gate !== 'research-gate.json' || !hookResearchMarkdownState.pipeline_plan_ready) throw new Error('selftest: research hook state');
2660
2660
  const hookResearchMissionDir = missionDir(hookResearchMarkdownTmp, hookResearchMarkdownState.mission_id);
2661
- if (!(await exists(path.join(hookResearchMissionDir, PIPELINE_PLAN_ARTIFACT)))) throw new Error('selftest: markdown $Research hook did not write pipeline plan');
2662
- for (const artifact of ['source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) {
2661
+ if (!(await exists(path.join(hookResearchMissionDir, PIPELINE_PLAN_ARTIFACT)))) throw new Error('selftest: research hook plan');
2662
+ const rss = 'research-source-skill.md';
2663
+ const gos = 'genius-opinion-summary.md';
2664
+ for (const artifact of [rss, 'source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) {
2663
2665
  if (!(await exists(path.join(hookResearchMissionDir, artifact)))) throw new Error(`selftest: hook research ${artifact}`);
2664
2666
  }
2665
2667
  const hookPayload = JSON.stringify({ cwd: hookGoalTmp, prompt: '$Goal 로그인 세션 만료 UX 개선' });
@@ -3782,15 +3784,16 @@ async function selftest() {
3782
3784
  if (wikiPruneDryRun.candidates < 1 || !wikiPruneDryRun.actions.some((action) => action.reason === 'low_wiki_trust')) throw new Error('selftest: wiki prune did not flag low-trust artifact');
3783
3785
  const { dir: researchDir, mission: researchMission } = await createMission(tmp, { mode: 'research', prompt: '새로운 코드 리뷰 방법론 연구' });
3784
3786
  const researchPlan = await writeResearchPlan(researchDir, researchMission.prompt, {});
3785
- if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== 'maximum_source_retrieval') throw new Error('selftest: research plan contract');
3786
- for (const artifact of ['source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) {
3787
- if (!researchPlan.required_artifacts?.includes(artifact) || !(await exists(path.join(researchDir, artifact)))) throw new Error(`selftest: research ${artifact}`);
3788
- }
3787
+ if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== 'layered_source_retrieval_and_triangulation') throw new Error('selftest: research plan contract');
3788
+ const rArts = researchPlan.required_artifacts || [];
3789
+ for (const a of [rss, 'source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) if (!rArts.includes(a) || !(await exists(path.join(researchDir, a)))) throw new Error('selftest: research artifact');
3790
+ if (!rArts.includes('research-paper.md') || !rArts.includes(gos)) throw new Error('selftest: research paper');
3789
3791
  const initialResearchGate = await evaluateResearchGate(researchDir);
3790
- if (initialResearchGate.passed || !initialResearchGate.reasons.includes('web_search_pass_missing') || !initialResearchGate.reasons.includes('eureka_missing') || !initialResearchGate.reasons.includes('debate_exchanges_missing')) throw new Error('selftest: research gate should block');
3792
+ if (initialResearchGate.passed || ['web_search_pass_missing', 'eureka_missing', 'debate_exchanges_missing', 'research_paper_missing'].some((r) => !initialResearchGate.reasons.includes(r))) throw new Error('selftest: research gate');
3791
3793
  const researchGate = await writeMockResearchResult(researchDir, researchPlan);
3792
3794
  if (!researchGate.passed) throw new Error('selftest: mock research gate did not pass');
3793
- if (researchGate.metrics?.independent_scouts < 5 || researchGate.metrics?.xhigh_scouts < 5 || researchGate.metrics?.eureka_moments < 5 || researchGate.metrics?.debate_participants < 5 || researchGate.metrics?.counterevidence_sources < 1 || researchGate.metrics?.citation_coverage !== true || researchGate.metrics?.falsification_cases < 1) throw new Error('selftest: research metrics');
3795
+ const rm = researchGate.metrics || {};
3796
+ if (['independent_scouts', 'xhigh_scouts', 'eureka_moments', 'debate_participants', 'genius_opinion_summaries'].some((m) => rm[m] < 5) || ['counterevidence_sources', 'falsification_cases', 'triangulation_checks'].some((m) => rm[m] < 1) || rm.paper_sections < 8 || rm.citation_coverage !== true || rm.source_layers_covered < 7) throw new Error('selftest: research metrics');
3794
3797
  await writeJsonAtomic(path.join(dir, 'done-gate.json'), { passed: true, unsupported_critical_claims: 0, database_safety_violation: false, database_safety_reviewed: true, visual_drift: 'low', wiki_drift: 'low', tests_required: false });
3795
3798
  const gate = await evaluateDoneGate(tmp, id);
3796
3799
  if (!gate.passed) throw new Error('selftest: done gate');
@@ -1,6 +1,6 @@
1
1
  import path from 'node:path';
2
2
  import fsp from 'node:fs/promises';
3
- import { readJson, writeJsonAtomic, writeTextAtomic, appendJsonlBounded, nowIso, exists, ensureDir, packageRoot, dirSize, formatBytes, PACKAGE_VERSION, sksRoot, readStdin } from '../core/fsx.mjs';
3
+ import { readJson, readText, writeJsonAtomic, writeTextAtomic, appendJsonlBounded, nowIso, exists, ensureDir, packageRoot, dirSize, formatBytes, PACKAGE_VERSION, sksRoot, readStdin } from '../core/fsx.mjs';
4
4
  import { initProject } from '../core/init.mjs';
5
5
  import { getCodexInfo, runCodexExec } from '../core/codex-adapter.mjs';
6
6
  import { createMission, loadMission, findLatestMission, missionDir, setCurrent, stateFile } from '../core/mission.mjs';
@@ -8,14 +8,14 @@ import { buildQuestionSchema, writeQuestions } from '../core/questions.mjs';
8
8
  import { sealContract } from '../core/decision-contract.mjs';
9
9
  import { buildQaLoopQuestionSchema, buildQaLoopPrompt, evaluateQaGate, qaStatus, writeMockQaResult, writeQaLoopArtifacts } from '../core/qa-loop.mjs';
10
10
  import { containsUserQuestion, noQuestionContinuationReason } from '../core/no-question-guard.mjs';
11
- import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
11
+ import { RESEARCH_GENIUS_SUMMARY_ARTIFACT, RESEARCH_PAPER_ARTIFACT, RESEARCH_SOURCE_SKILL_ARTIFACT, countGeniusOpinionSummaries, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
12
12
  import { storageReport, enforceRetention, pruneWikiArtifacts } from '../core/retention.mjs';
13
13
  import { evaluateDoneGate } from '../core/hproof.mjs';
14
14
  import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
15
15
  import { DEFAULT_EVAL_THRESHOLDS, compareEvaluationReports, runEvaluationBenchmark } from '../core/evaluation.mjs';
16
16
  import { contextCapsule } from '../core/triwiki-attention.mjs';
17
17
  import { rgbaKey, rgbaToWikiCoord, validateWikiCoordinateIndex } from '../core/wiki-coordinate.mjs';
18
- import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_ONLY_POLICY, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, hasFromChatImgSignal, reflectionRequiredForRoute, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, stripVisibleDecisionAnswerBlocks, triwikiContextTracking } from '../core/routes.mjs';
18
+ import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_ONLY_POLICY, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, hasFromChatImgSignal, reflectionRequiredForRoute, routeNeedsContext7, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, stripVisibleDecisionAnswerBlocks, triwikiContextTracking } from '../core/routes.mjs';
19
19
  import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, teamRuntimePlanMetadata, teamRuntimeRequiredArtifacts, writeTeamRuntimeArtifacts } from '../core/team-dag.mjs';
20
20
  import { appendTeamEvent, formatAgentReasoning, formatRoleCounts, initTeamLive, normalizeTeamSpec, parseTeamSpecArgs, readTeamControl, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane, renderTeamCleanupSummary, renderTeamWatch, requestTeamSessionCleanup, teamCleanupRequested, teamReasoningPolicy } from '../core/team-live.mjs';
21
21
  import { evaluateTeamReviewPolicyGate, MIN_TEAM_REVIEWER_LANES, MIN_TEAM_REVIEW_POLICY_TEXT, teamReviewPolicy } from '../core/team-review-policy.mjs';
@@ -42,6 +42,10 @@ const flag = (args, name) => args.includes(name);
42
42
  const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
43
43
  const TEAM_SESSION_CLEANUP_ARTIFACT = 'team-session-cleanup.json';
44
44
  const REPOSITORY_URL = 'https://github.com/mandarange/Sneakoscope-Codex.git';
45
+ const RESEARCH_DEFAULT_MAX_CYCLES = 3;
46
+ const RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES = 120;
47
+ const RESEARCH_MIN_CYCLE_TIMEOUT_MINUTES = 15;
48
+ const RESEARCH_MAX_CYCLE_TIMEOUT_MINUTES = 240;
45
49
 
46
50
  async function resolveMissionId(root, arg) { return (!arg || arg === 'latest') ? findLatestMission(root) : arg; }
47
51
 
@@ -429,7 +433,7 @@ async function researchPrepare(args) {
429
433
  if (!prompt) throw new Error('Missing research topic.');
430
434
  const { id, dir } = await createMission(root, { mode: 'research', prompt });
431
435
  const route = ROUTES.find((entry) => entry.id === 'Research') || routePrompt('$Research');
432
- const context7Required = true;
436
+ const context7Required = routeNeedsContext7(route, prompt);
433
437
  const reasoning = routeReasoning(route, prompt);
434
438
  const plan = await writeResearchPlan(dir, prompt, { depth: readFlagValue(args, '--depth', 'frontier') });
435
439
  const pipelinePlan = await writePipelinePlan(dir, { missionId: id, route, task: prompt, required: context7Required, ambiguity: { required: false, status: 'direct_research_cli' } });
@@ -457,7 +461,7 @@ async function researchPrepare(args) {
457
461
  mode: route.mode,
458
462
  phase: 'RESEARCH_PREPARED',
459
463
  questions_allowed: false,
460
- implementation_allowed: true,
464
+ implementation_allowed: false,
461
465
  context7_required: context7Required,
462
466
  context7_verified: false,
463
467
  subagents_required: routeRequiresSubagents(route, prompt),
@@ -479,14 +483,17 @@ async function researchPrepare(args) {
479
483
  console.log(`Methodology: ${plan.methodology}`);
480
484
  console.log(`Plan: ${path.relative(root, path.join(dir, 'research-plan.md'))}`);
481
485
  console.log(`Pipeline: ${path.relative(root, path.join(dir, PIPELINE_PLAN_ARTIFACT))}`);
486
+ console.log(`Paper: ${RESEARCH_PAPER_ARTIFACT}`);
487
+ console.log(`Genius summary: ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}`);
488
+ console.log(`Source skill: ${RESEARCH_SOURCE_SKILL_ARTIFACT}`);
482
489
  console.log('Ledgers: source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json');
483
- console.log(`Run: sks research run ${id} --max-cycles 3`);
490
+ console.log(`Run: sks research run ${id} --max-cycles ${RESEARCH_DEFAULT_MAX_CYCLES} --cycle-timeout-minutes ${RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES}`);
484
491
  }
485
492
 
486
493
  async function researchRun(args) {
487
494
  const root = await sksRoot();
488
495
  const id = await resolveMissionId(root, args[0]);
489
- if (!id) throw new Error('Usage: sks research run <mission-id|latest> [--mock] [--max-cycles N]');
496
+ if (!id) throw new Error('Usage: sks research run <mission-id|latest> [--mock] [--max-cycles N] [--cycle-timeout-minutes N]');
490
497
  const { dir, mission } = await loadMission(root, id);
491
498
  const planPath = path.join(dir, 'research-plan.json');
492
499
  if (!(await exists(planPath))) await writeResearchPlan(dir, mission.prompt || '', {});
@@ -498,32 +505,46 @@ async function researchRun(args) {
498
505
  process.exitCode = 2;
499
506
  return;
500
507
  }
501
- const maxCycles = readMaxCycles(args, 3);
508
+ const maxCycles = readMaxCycles(args, RESEARCH_DEFAULT_MAX_CYCLES);
509
+ const cycleTimeoutMinutes = readResearchCycleTimeoutMinutes(args);
510
+ const cycleTimeoutMs = cycleTimeoutMinutes * 60 * 1000;
502
511
  const mock = flag(args, '--mock');
503
- await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false });
504
- await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock });
512
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false, implementation_allowed: false, research_real_run_required: !mock, research_cycle_timeout_minutes: cycleTimeoutMinutes });
513
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock, cycleTimeoutMinutes, real_run_required: !mock });
505
514
  if (mock) {
506
515
  const gate = await writeMockResearchResult(dir, plan);
507
- await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true });
516
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true, implementation_allowed: false });
508
517
  console.log(`Mock research done: ${id}`);
509
518
  console.log(`Gate: ${gate.passed ? 'passed' : 'blocked'}`);
510
519
  return;
511
520
  }
512
521
  const codex = await getCodexInfo();
513
522
  if (!codex.bin) {
514
- console.error('Codex CLI not found. Running mock research instead.');
515
- const gate = await writeMockResearchResult(dir, plan);
516
- await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true });
517
- console.log(`Mock research done: ${id}`);
523
+ const blocker = {
524
+ schema_version: 1,
525
+ mission_id: id,
526
+ ts: nowIso(),
527
+ phase: 'RESEARCH_BLOCKED_REAL_RUN_REQUIRED',
528
+ reason: 'Codex CLI not found; normal Research cannot fall back to mock output.',
529
+ required_action: 'Install/configure the Codex CLI or set SKS_CODEX_BIN to a valid executable, then rerun sks research run without --mock.',
530
+ mock_policy: '--mock is allowed only for selftests and dry harness checks.',
531
+ implementation_allowed: false
532
+ };
533
+ await writeJsonAtomic(path.join(dir, 'research-blocker.json'), blocker);
534
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: blocker.ts, type: 'research.blocked.real_run_required', reason: blocker.reason, blocker: 'research-blocker.json' });
535
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_BLOCKED_REAL_RUN_REQUIRED', questions_allowed: true, implementation_allowed: false, research_real_run_required: true, blocker: 'research-blocker.json' });
536
+ console.error('Research cannot run real sources: Codex CLI not found.');
537
+ console.error('Mock fallback is disabled for normal Research. Use --mock only for selftests, or install/configure Codex CLI/SKS_CODEX_BIN.');
538
+ process.exitCode = 2;
518
539
  return;
519
540
  }
520
541
  let last = '';
521
542
  for (let cycle = 1; cycle <= maxCycles; cycle++) {
522
543
  const cycleDir = path.join(dir, 'research', `cycle-${cycle}`);
523
544
  const outputFile = path.join(cycleDir, 'final.md');
524
- await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle });
545
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle, timeoutMinutes: cycleTimeoutMinutes });
525
546
  const prompt = buildResearchPrompt({ id, mission, plan, cycle, previous: last });
526
- const result = await runCodexExec({ root, prompt, outputFile, json: true, profile: 'sks-research', logDir: cycleDir, timeoutMs: 45 * 60 * 1000 });
547
+ const result = await runCodexExec({ root, prompt, outputFile, json: true, profile: 'sks-research', logDir: cycleDir, timeoutMs: cycleTimeoutMs });
527
548
  await writeJsonAtomic(path.join(cycleDir, 'process.json'), { code: result.code, stdout_tail: result.stdout, stderr_tail: result.stderr, stdout_bytes: result.stdoutBytes, stderr_bytes: result.stderrBytes, truncated: result.truncated, timed_out: result.timedOut });
528
549
  last = await safeReadText(outputFile, result.stdout || result.stderr || '');
529
550
  if (containsUserQuestion(last)) {
@@ -533,7 +554,7 @@ async function researchRun(args) {
533
554
  }
534
555
  const gate = await evaluateResearchGate(dir);
535
556
  if (gate.passed) {
536
- await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_DONE', questions_allowed: true });
557
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_DONE', questions_allowed: true, implementation_allowed: false });
537
558
  await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.done', cycle });
538
559
  await enforceRetention(root).catch(() => {});
539
560
  console.log(`Research done: ${id}`);
@@ -541,7 +562,7 @@ async function researchRun(args) {
541
562
  }
542
563
  await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.continue', cycle, reasons: gate.reasons });
543
564
  }
544
- await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true });
565
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true, implementation_allowed: false });
545
566
  console.log(`Research paused after max cycles: ${id}`);
546
567
  }
547
568
 
@@ -557,18 +578,31 @@ async function researchStatus(args) {
557
578
  const scoutLedger = await readJson(path.join(dir, 'scout-ledger.json'), null);
558
579
  const debateLedger = await readJson(path.join(dir, 'debate-ledger.json'), null);
559
580
  const falsificationLedger = await readJson(path.join(dir, 'falsification-ledger.json'), null);
581
+ const sourceSkillText = await readText(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), '');
582
+ const geniusSummaryText = await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '');
583
+ const paperText = await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '');
560
584
  const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
585
+ const sourceLayerRows = Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : [];
586
+ const sourceLayersCovered = sourceLayerRows.filter((layer) => layer.status === 'covered' && ((Array.isArray(layer.source_ids) && layer.source_ids.length) || (Array.isArray(layer.counterevidence_ids) && layer.counterevidence_ids.length))).length;
561
587
  console.log(JSON.stringify({
562
588
  mission,
563
589
  state,
564
590
  gate,
565
591
  novelty_entries: ledger?.entries?.length ?? null,
566
592
  source_entries: sourceLedger?.sources?.length ?? null,
593
+ source_layers_required: sourceLayerRows.length || gate?.metrics?.source_layers_required || gate?.source_layers_required || null,
594
+ source_layers_covered: gate?.metrics?.source_layers_covered ?? gate?.source_layers_covered ?? (sourceLayerRows.length ? sourceLayersCovered : null),
595
+ triangulation_checks: sourceLedger?.triangulation?.cross_layer_checks?.length ?? gate?.metrics?.triangulation_checks ?? gate?.triangulation_checks ?? null,
596
+ genius_opinion_summaries: gate?.metrics?.genius_opinion_summaries ?? gate?.genius_opinion_summaries ?? (geniusSummaryText.trim() ? countGeniusOpinionSummaries(geniusSummaryText) : null),
567
597
  counterevidence_sources: sourceLedger?.counterevidence_sources?.length ?? null,
568
598
  xhigh_scouts: scoutRows.length ? scoutRows.filter((scout) => scout.effort === 'xhigh').length : null,
569
599
  eureka_moments: scoutRows.length ? scoutRows.filter((scout) => scout.eureka?.exclamation === 'Eureka!' && String(scout.eureka?.idea || '').trim()).length : null,
570
600
  scout_findings: scoutRows.length ? scoutRows.reduce((sum, scout) => sum + (Array.isArray(scout.findings) ? scout.findings.length : 0), 0) : null,
571
601
  debate_exchanges: debateLedger?.exchanges?.length ?? null,
602
+ research_source_skill_present: Boolean(sourceSkillText.trim()),
603
+ genius_opinion_summary_present: Boolean(geniusSummaryText.trim()),
604
+ paper_present: Boolean(paperText.trim()),
605
+ paper_sections: countResearchPaperSections(paperText),
572
606
  falsification_cases: falsificationLedger?.cases?.length ?? null
573
607
  }, null, 2));
574
608
  }
@@ -621,11 +655,19 @@ async function safeReadText(file, fallback = '') {
621
655
  try { return await fsp.readFile(file, 'utf8'); } catch { return fallback; }
622
656
  }
623
657
 
624
- function readMaxCycles(args, fallback) {
625
- const i = args.indexOf('--max-cycles');
658
+ function readBoundedIntegerFlag(args, name, fallback, min, max) {
659
+ const i = args.indexOf(name);
626
660
  const raw = i >= 0 && args[i + 1] ? Number(args[i + 1]) : Number(fallback);
627
- if (!Number.isFinite(raw)) return Math.max(1, Number.parseInt(fallback, 10) || 1);
628
- return Math.max(1, Math.min(50, Math.floor(raw)));
661
+ if (!Number.isFinite(raw)) return Math.max(min, Number.parseInt(fallback, 10) || min);
662
+ return Math.max(min, Math.min(max, Math.floor(raw)));
663
+ }
664
+
665
+ function readMaxCycles(args, fallback) {
666
+ return readBoundedIntegerFlag(args, '--max-cycles', fallback, 1, 50);
667
+ }
668
+
669
+ function readResearchCycleTimeoutMinutes(args) {
670
+ return readBoundedIntegerFlag(args, '--cycle-timeout-minutes', RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES, RESEARCH_MIN_CYCLE_TIMEOUT_MINUTES, RESEARCH_MAX_CYCLE_TIMEOUT_MINUTES);
629
671
  }
630
672
 
631
673
  export async function goalCommand(sub, args) {
@@ -1576,7 +1618,7 @@ export async function statsCommand(args) {
1576
1618
 
1577
1619
  function positionalArgs(args = []) {
1578
1620
  const out = [];
1579
- const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--intent', '--changed', '--route', '--skills', '--prompt-signature']);
1621
+ const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--cycle-timeout-minutes', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--intent', '--changed', '--route', '--skills', '--prompt-signature']);
1580
1622
  for (let i = 0; i < args.length; i++) {
1581
1623
  const arg = String(args[i]);
1582
1624
  if (valueFlags.has(arg)) {
package/src/core/fsx.mjs CHANGED
@@ -5,7 +5,7 @@ import os from 'node:os';
5
5
  import crypto from 'node:crypto';
6
6
  import { spawn } from 'node:child_process';
7
7
 
8
- export const PACKAGE_VERSION = '0.7.68';
8
+ export const PACKAGE_VERSION = '0.7.72';
9
9
  export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
10
10
  export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
11
11
 
@@ -7,7 +7,7 @@ import { checkHarnessModification, harnessGuardBlockReason } from './harness-gua
7
7
  import { activeRouteContext, evaluateStop, prepareRoute, promptPipelineContext as routePipelineContext, recordContext7Evidence, recordSubagentEvidence, routePrompt } from './pipeline.mjs';
8
8
  import { classifyToolError } from './evaluation.mjs';
9
9
  import { REQUIRED_CODEX_MODEL, isForbiddenCodexModel } from './codex-model-guard.mjs';
10
- import { stripVisibleDecisionAnswerBlocks } from './routes.mjs';
10
+ import { dollarCommand, stripVisibleDecisionAnswerBlocks } from './routes.mjs';
11
11
 
12
12
  const TEAM_DIGEST_MAX_EVENTS = 4;
13
13
  const TEAM_DIGEST_MESSAGE_CHARS = 180;
@@ -77,13 +77,6 @@ function toolFailed(payload = {}) {
77
77
  return false;
78
78
  }
79
79
 
80
- function dollarCommand(prompt) {
81
- const text = String(prompt || '').trim();
82
- const match = text.match(/^\$([A-Za-z][A-Za-z0-9_-]*)(?:\s|:|$)/)
83
- || text.match(/^\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)(?:\s|:|$)/);
84
- return match ? match[1].toUpperCase() : null;
85
- }
86
-
87
80
  function looksLikeUpdateDecline(prompt) {
88
81
  return /^(no|nope|skip|later|not now|don't|dont|아니|아니요|안해|안 함|나중에|건너뛰|스킵)/i.test(String(prompt || '').trim());
89
82
  }
@@ -1006,7 +999,7 @@ function visibleHookMessage(name, text = '') {
1006
999
  if (body.includes('Computer Use fast lane active')) return 'SKS: Computer Use fast lane injected; defer TriWiki/Honest Mode to final closeout.';
1007
1000
  if (body.includes('MANDATORY ambiguity-removal gate') || body.includes('VISIBLE RESPONSE CONTRACT') || body.includes('Required questions still pending')) return 'SKS: stale clarification gate detected; continue from inferred route contract.';
1008
1001
  if (body.includes('$Team route prepared') || body.includes('Team route')) return 'SKS: Team route, live transcript, and subagent plan injected.';
1009
- if (body.includes('$Research route prepared')) return 'SKS: Research route, xhigh Eureka scout council, source ledger, debate ledger, and falsification gate injected.';
1002
+ if (body.includes('$Research route prepared')) return 'SKS: Research route, xhigh Eureka scout council, source/debate ledgers, paper output, and falsification gate injected.';
1010
1003
  if (body.includes('$AutoResearch route prepared')) return 'SKS: AutoResearch experiment loop and evidence gate injected.';
1011
1004
  if (body.includes('$PPT route prepared')) return 'SKS: PPT route and delivery-context gate injected.';
1012
1005
  if (body.includes('$Image-UX-Review route prepared') || body.includes('$UX-Review route prepared')) return 'SKS: Image UX Review route and gpt-image-2 evidence gate injected.';
package/src/core/init.mjs CHANGED
@@ -815,7 +815,7 @@ export async function installSkills(root) {
815
815
  'computer-use-fast': `---\nname: computer-use-fast\ndescription: Alias for the maximum-speed $Computer-Use/$CU Codex Computer Use lane.\n---\n\nUse the same rules as computer-use: skip Team debate, QA-LOOP clarification, upfront TriWiki refresh, Context7, subagents, and reflection unless explicitly requested. Use Codex Computer Use directly; never substitute Playwright, Chrome MCP, Browser Use, Selenium, Puppeteer, or other browser automation for UI/browser evidence. At the end only, refresh/pack TriWiki, validate it, then provide a concise completion summary plus Honest Mode.\n`,
816
816
  'cu': `---\nname: cu\ndescription: Short alias for the maximum-speed $Computer-Use Codex Computer Use lane.\n---\n\nUse the same rules as computer-use. This is a speed lane for focused UI/browser/visual tasks that require Codex Computer Use evidence, with TriWiki refresh/validate and Honest Mode deferred to final closeout.\n`,
817
817
  'goal': `---\nname: goal\ndescription: Fast $Goal/$goal bridge overlay for Codex native persisted /goal workflows.\n---\n\nUse when the user invokes $Goal/$goal or asks to persist a workflow with Codex native /goal continuation. Prepare with sks goal create or the $Goal route, write only the lightweight bridge artifacts, then use native Codex /goal create, pause, resume, and clear controls where available. Goal does not replace Team, QA, DB, or other SKS execution routes; continue implementation through the selected route and use Context7 only when external API/library docs are involved. Do not recreate the old no-question loop.\n`,
818
- 'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Run the genius-lens scout council with Einstein/Feynman/Turing/von Neumann-inspired cognitive roles plus a skeptic lens; do not impersonate the historical people. Every Research scout must run with effort=xhigh, record one literal "Eureka!" idea, and participate in a vigorous evidence-bound debate before synthesis. Maximize safe web/source search and record source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, and research-gate.json. Keep the loop short: frame outcome, compare a few mechanisms, falsify, keep the smallest useful probe, and avoid adding background process unless it reduces net route weight. Do not use for ordinary code edits.\n`,
818
+ 'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Run the genius-lens scout council with Einstein/Feynman/Turing/von Neumann-inspired cognitive roles plus a skeptic lens; do not impersonate the historical people. Every Research scout must run with effort=xhigh, record one literal "Eureka!" idea, and participate in a vigorous evidence-bound debate before synthesis. Create research-source-skill.md as a route-local Skill Creator artifact, then maximize layered public web/source search across papers, official/government or leading-institution data, standards/primary docs, current news, public discourse, developer/practitioner sources, and counterevidence before synthesis. Record research-source-skill.md, source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, research-paper.md, genius-opinion-summary.md, and research-gate.json. Context7 is optional and only needed when the research topic depends on external package/API/framework docs; do not use it as the default research evidence layer. Normal Research may take one or two hours when needed; favor real source collection, cross-layer comparison, falsification, and a concise paper manuscript over speed. Do not use --mock except for selftests or dry harness checks; if live source execution is unavailable, record a blocker and keep the gate unpassed. Do not use for ordinary code edits.\n`,
819
819
  'autoresearch': `---\nname: autoresearch\ndescription: Dollar-command route for $AutoResearch or $autoresearch iterative experiment loops.\n---\n\nUse for $AutoResearch, iterative improvement, SEO/GEO, ranking, workflow, benchmark, or experiments. Define program, hypothesis, experiment, metric, keep/discard, falsification, next step, and Honest Mode. Load seo-geo-optimizer for README/npm/GitHub/schema/AI-search work.\n`,
820
820
  'db': `---\nname: db\ndescription: Dollar-command route for $DB or $db database and Supabase safety checks.\n---\n\nUse when the user invokes $DB/$db or the task touches SQL, Supabase, Postgres, migrations, Prisma, Drizzle, Knex, MCP database tools, or production data. Run or follow sks db policy, sks db scan, sks db classify, and sks db check. Destructive database operations remain forbidden.\n`,
821
821
  'mad-sks': `---\nname: mad-sks\ndescription: Explicit high-risk authorization modifier for $MAD-SKS scoped Supabase MCP DB permission widening.\n---\n\nUse only when the user explicitly invokes $MAD-SKS or top-level sks --mad. It can be combined with another route, such as $MAD-SKS $Team or $DB ... $MAD-SKS; in that case the other command remains the primary workflow and MAD-SKS is only the temporary permission grant. The widened permission applies only while the active mission gate is open, must be deactivated when the task ends, and opens live server work, Supabase MCP database writes, column/schema cleanup, direct execute SQL, migration application when required, and normal targeted DB writes. Keep only catastrophic safeguards: whole database/schema/table removal, truncate, all-row delete/update, reset, dangerous project/branch management, credential exfiltration, persistent security weakening, and unrequested fallback implementation remain blocked. Do not carry MAD-SKS permission into later prompts or routes. The permission profile is centralized in src/core/permission-gates.mjs so skill/hook/MCP-style gates share one decision function.\n`,
@@ -921,7 +921,7 @@ async function prepareResearch(root, route, task, required) {
921
921
  await writeResearchPlan(dir, task, {});
922
922
  const pipelinePlan = await writePipelinePlan(dir, { missionId: id, route, task, required, ambiguity: { required: false, status: 'direct_route' } });
923
923
  await setCurrent(root, routeState(id, route, 'RESEARCH_PREPARED', required, { prompt: task, pipeline_plan_ready: validatePipelinePlan(pipelinePlan).ok, pipeline_plan_path: PIPELINE_PLAN_ARTIFACT }));
924
- return routeContext(route, id, task, required, 'Run sks research run latest, maximize web/source search, require every scout effort=xhigh plus one Eureka! idea, fill source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, and pass research-gate.json.');
924
+ return routeContext(route, id, task, required, 'Run sks research run latest as a real long-running source-gathering pass, never an automatic mock fallback; create research-source-skill.md, maximize layered public source search, require every scout effort=xhigh plus one Eureka! idea, fill source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, research-paper.md, genius-opinion-summary.md, and pass research-gate.json.');
925
925
  }
926
926
 
927
927
  async function prepareAutoResearch(root, route, task, required) {
@@ -1,7 +1,21 @@
1
1
  import path from 'node:path';
2
- import { appendJsonlBounded, nowIso, readJson, writeJsonAtomic, writeTextAtomic, exists } from './fsx.mjs';
2
+ import { appendJsonlBounded, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, exists } from './fsx.mjs';
3
3
  import { OUTCOME_RUBRIC } from './proof-field.mjs';
4
4
 
5
+ export const RESEARCH_PAPER_ARTIFACT = 'research-paper.md';
6
+ export const RESEARCH_SOURCE_SKILL_ARTIFACT = 'research-source-skill.md';
7
+ export const RESEARCH_GENIUS_SUMMARY_ARTIFACT = 'genius-opinion-summary.md';
8
+ export const RESEARCH_PAPER_SECTION_GROUPS = Object.freeze([
9
+ ['abstract'],
10
+ ['introduction'],
11
+ ['method', 'methodology'],
12
+ ['results', 'findings'],
13
+ ['discussion'],
14
+ ['limitations', 'falsification'],
15
+ ['conclusion', 'next experiment'],
16
+ ['references', 'sources']
17
+ ]);
18
+
5
19
  export const RESEARCH_SCOUT_COUNCIL = Object.freeze([
6
20
  {
7
21
  id: 'einstein',
@@ -40,6 +54,67 @@ export const RESEARCH_SCOUT_COUNCIL = Object.freeze([
40
54
  }
41
55
  ]);
42
56
 
57
+ export const RESEARCH_SOURCE_LAYERS = Object.freeze([
58
+ {
59
+ id: 'academic_literature',
60
+ label: 'Academic literature',
61
+ purpose: 'Find recent papers, preprints, formal reviews, citations, and open scholarly metadata before synthesis.',
62
+ evidence_role: 'formal_evidence',
63
+ examples: ['arXiv', 'Semantic Scholar', 'OpenAlex', 'Crossref', 'PubMed'],
64
+ query_templates: ['"<topic>" arxiv', '"<topic>" site:semanticscholar.org', '"<topic>" OpenAlex Crossref PubMed']
65
+ },
66
+ {
67
+ id: 'official_government_data',
68
+ label: 'Official government and leading-institution knowledge',
69
+ purpose: 'Ground claims in public datasets, policy papers, national statistics, and leading-country institutional sources.',
70
+ evidence_role: 'authoritative_baseline',
71
+ examples: ['World Bank', 'OECD', 'Eurostat', 'data.gov', 'data.gov.uk', 'NIST'],
72
+ query_templates: ['"<topic>" site:worldbank.org OR site:oecd.org', '"<topic>" site:data.gov OR site:data.gov.uk', '"<topic>" site:nist.gov']
73
+ },
74
+ {
75
+ id: 'standards_primary_docs',
76
+ label: 'Standards and primary documents',
77
+ purpose: 'Check primary specifications, standards, RFCs, policy originals, and official project documents before relying on summaries.',
78
+ evidence_role: 'primary_source',
79
+ examples: ['IETF RFCs', 'W3C', 'ISO abstracts', 'official standards bodies', 'project primary docs'],
80
+ query_templates: ['"<topic>" RFC standard specification', '"<topic>" W3C IETF NIST standard', '"<topic>" official specification']
81
+ },
82
+ {
83
+ id: 'news_current_events',
84
+ label: 'Current news and global reporting',
85
+ purpose: 'Capture recent events, public impact, and regional framing from reputable news and global news indices.',
86
+ evidence_role: 'recency_signal',
87
+ examples: ['GDELT', 'BBC', 'CNN', 'Reuters', 'AP', 'regional reputable outlets'],
88
+ query_templates: ['"<topic>" BBC CNN latest', '"<topic>" GDELT news', '"<topic>" Reuters AP analysis']
89
+ },
90
+ {
91
+ id: 'public_discourse',
92
+ label: 'Public discourse',
93
+ purpose: 'Sample public practitioner and community discourse without treating popularity as truth.',
94
+ evidence_role: 'sentiment_and_edge_cases',
95
+ examples: ['X/Twitter recent search', 'Reddit', 'Hacker News', 'public forums'],
96
+ query_templates: ['"<topic>" site:x.com OR site:twitter.com', '"<topic>" site:reddit.com', '"<topic>" "Hacker News"']
97
+ },
98
+ {
99
+ id: 'developer_practitioner',
100
+ label: 'Developer and practitioner knowledge',
101
+ purpose: 'Find implementation pitfalls, developer questions, bug reports, and operational lessons.',
102
+ evidence_role: 'practice_feedback',
103
+ examples: ['Stack Overflow', 'Stack Exchange', 'GitHub issues', 'release notes', 'engineering blogs'],
104
+ query_templates: ['"<topic>" site:stackoverflow.com', '"<topic>" site:stackexchange.com', '"<topic>" site:github.com issues']
105
+ },
106
+ {
107
+ id: 'counterevidence_factcheck',
108
+ label: 'Counterevidence and fact-checking',
109
+ purpose: 'Actively search for failures, critiques, null results, retractions, fact checks, and source conflicts.',
110
+ evidence_role: 'falsification',
111
+ examples: ['Google Fact Check Tools', 'Retraction Watch', 'critical reviews', 'benchmark failures', 'negative results'],
112
+ query_templates: ['"<topic>" critique failure limitation', '"<topic>" fact check retraction', '"<topic>" counterevidence null result']
113
+ }
114
+ ]);
115
+
116
+ export const RESEARCH_SOURCE_LAYER_IDS = Object.freeze(RESEARCH_SOURCE_LAYERS.map((layer) => layer.id));
117
+
43
118
  export function createResearchPlan(prompt, opts = {}) {
44
119
  const depth = opts.depth || 'frontier';
45
120
  return {
@@ -49,6 +124,11 @@ export function createResearchPlan(prompt, opts = {}) {
49
124
  created_at: nowIso(),
50
125
  methodology: 'genius-scout-council-frontier-discovery-loop',
51
126
  objective: 'Find the shortest useful mechanism that can be falsified or applied, grounded in maximum available source retrieval rather than broad summary.',
127
+ execution_policy: {
128
+ normal_run: 'real_long_running_research',
129
+ default_cycle_timeout_minutes: 120,
130
+ mock_policy: '--mock is for selftests and dry harness checks only; normal Research must block rather than silently substitute mock output.'
131
+ },
52
132
  outcome_rubric: OUTCOME_RUBRIC,
53
133
  research_council: {
54
134
  mode: 'persona_inspired_scouts_not_impersonation',
@@ -76,8 +156,8 @@ export function createResearchPlan(prompt, opts = {}) {
76
156
  ]
77
157
  },
78
158
  web_research_policy: {
79
- mode: 'maximum_source_retrieval',
80
- requirement: 'Use the broadest safe web/source search available in the runtime before synthesis.',
159
+ mode: 'layered_source_retrieval_and_triangulation',
160
+ requirement: 'Use every safely available public web/source route before synthesis, separated into source layers so the final claim is not dominated by one corpus or platform.',
81
161
  query_sets: [
82
162
  'first-principles and theory sources',
83
163
  'plain-language explanations and empirical examples',
@@ -85,9 +165,18 @@ export function createResearchPlan(prompt, opts = {}) {
85
165
  'systems, strategy, scaling, or deployment evidence',
86
166
  'counterevidence, failures, critiques, and null results'
87
167
  ],
168
+ source_layers: RESEARCH_SOURCE_LAYERS,
88
169
  source_priority: ['primary_sources', 'official_docs_or_standards', 'peer_reviewed_or_archival_sources', 'reputable_recent_sources', 'credible_counterevidence'],
170
+ skill_creator: {
171
+ artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
172
+ status: 'route_local_candidate',
173
+ rule: 'Before source gathering, create a route-local source collection skill that names the selected layers, query families, source-quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during a research run.'
174
+ },
89
175
  citation_rules: [
90
176
  'Every factual claim in the report must cite source-ledger ids or local project evidence.',
177
+ 'The final research paper must include references tied to source-ledger ids.',
178
+ 'Every required source layer must have at least one cited source or an explicit blocker; blockers keep the research gate unpassed.',
179
+ 'The source-ledger must include at least one cross-layer triangulation check comparing formal, current, discourse, practitioner, official, and counterevidence sources.',
91
180
  'Every novelty-ledger entry must cite at least one evidence source and at least one falsifier.',
92
181
  'If live web search is unavailable, record the blocker in source-ledger.json and keep research-gate.json unpassed.'
93
182
  ],
@@ -95,7 +184,9 @@ export function createResearchPlan(prompt, opts = {}) {
95
184
  independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
96
185
  web_search_passes: 1,
97
186
  source_entries: 1,
98
- counterevidence_sources: 1
187
+ source_layers: RESEARCH_SOURCE_LAYER_IDS.length,
188
+ counterevidence_sources: 1,
189
+ triangulation_checks: 1
99
190
  }
100
191
  },
101
192
  rules: [
@@ -104,22 +195,31 @@ export function createResearchPlan(prompt, opts = {}) {
104
195
  'Run the genius-lens scout council independently before synthesis.',
105
196
  'Every Research scout must run at reasoning_effort=xhigh, record one literal "Eureka!" idea, and participate in the debate.',
106
197
  'The scout council must debate vigorously but stay evidence-bound; record challenges and responses in debate-ledger.json.',
107
- 'Maximize safe web/source search and record queries, sources, citations, and blockers in source-ledger.json.',
198
+ 'Maximize safe web/source search as layered source retrieval and record queries, source layers, citations, quality notes, triangulation checks, and blockers in source-ledger.json.',
199
+ `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis; do not edit generated .agents/skills during the research run.`,
108
200
  'Actively seek disconfirming evidence before synthesis.',
201
+ 'Turn the surviving research result into research-paper.md with paper-style sections and references.',
202
+ `End every run with ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}, summarizing each genius-lens scout's final opinion, strongest evidence, disagreement, and changed mind.`,
109
203
  'Keep unsupported source-free claims as hypotheses only.',
110
- 'Prefer the smallest testable mechanism or implementation probe over a new long-running loop.',
204
+ 'Prefer the smallest testable mechanism or implementation probe, but do not stop source gathering early for speed when the research question needs a longer pass.',
111
205
  'Do not ask the user mid-run; resolve scope using the research plan and safety policy.'
112
206
  ],
113
207
  phases: [
114
208
  { id: 'R0_FRAME', goal: 'Frame the target outcome, constraints, and what would make the idea useful.' },
115
- { id: 'R1_SOURCE_SEARCH', goal: 'Run maximum available web/source retrieval with independent query sets for each scout lens.' },
116
- { id: 'R2_EUREKA', goal: 'Have each xhigh genius-lens scout shout Eureka! and record one non-obvious idea with source ids.' },
117
- { id: 'R3_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding.' },
118
- { id: 'R4_FALSIFY', goal: 'Attack each mechanism with counterexamples, missing evidence, source conflicts, and failure modes.' },
119
- { id: 'R5_APPLY', goal: 'Keep the smallest surviving mechanism, define a cheap probe, and write all ledgers.' }
209
+ { id: 'R1_SOURCE_SKILL', goal: `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with layer-specific search routes, quality fields, and blockers before source gathering.` },
210
+ { id: 'R2_SOURCE_SEARCH', goal: 'Run layered web/source retrieval across papers, official data, standards, news, public discourse, developer knowledge, and counterevidence.' },
211
+ { id: 'R3_EUREKA', goal: 'Have each xhigh genius-lens scout shout Eureka! and record one non-obvious idea with source ids.' },
212
+ { id: 'R4_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding.' },
213
+ { id: 'R5_FALSIFY', goal: 'Attack each mechanism with counterexamples, missing evidence, source conflicts, and failure modes.' },
214
+ { id: 'R6_APPLY', goal: 'Keep the smallest surviving mechanism, define a cheap probe, and write all ledgers.' },
215
+ { id: 'R7_PAPER', goal: 'Convert the final research result into a concise paper manuscript with abstract, method, findings, limitations, and references.' },
216
+ { id: 'R8_GENIUS_SUMMARY', goal: `Write ${RESEARCH_GENIUS_SUMMARY_ARTIFACT} so the final answer can report every scout lens opinion and the council consensus.` }
120
217
  ],
121
218
  required_artifacts: [
122
219
  'research-report.md',
220
+ RESEARCH_PAPER_ARTIFACT,
221
+ RESEARCH_GENIUS_SUMMARY_ARTIFACT,
222
+ RESEARCH_SOURCE_SKILL_ARTIFACT,
123
223
  'source-ledger.json',
124
224
  'scout-ledger.json',
125
225
  'debate-ledger.json',
@@ -137,6 +237,10 @@ export function researchPlanMarkdown(plan) {
137
237
  lines.push(`Prompt: ${plan.prompt}`);
138
238
  lines.push(`Depth: ${plan.depth}`);
139
239
  lines.push(`Methodology: ${plan.methodology}`);
240
+ if (plan.execution_policy) {
241
+ lines.push(`Execution: ${plan.execution_policy.normal_run}; default cycle timeout ${plan.execution_policy.default_cycle_timeout_minutes} minutes`);
242
+ lines.push(`Mock policy: ${plan.execution_policy.mock_policy}`);
243
+ }
140
244
  lines.push('');
141
245
  lines.push('## Rules');
142
246
  for (const rule of plan.rules) lines.push(`- ${rule}`);
@@ -152,6 +256,10 @@ export function researchPlanMarkdown(plan) {
152
256
  lines.push(`Mode: ${plan.web_research_policy.mode}`);
153
257
  lines.push(`Requirement: ${plan.web_research_policy.requirement}`);
154
258
  for (const querySet of plan.web_research_policy.query_sets || []) lines.push(`- query set: ${querySet}`);
259
+ if (plan.web_research_policy.skill_creator?.artifact) lines.push(`- source skill artifact: ${plan.web_research_policy.skill_creator.artifact}`);
260
+ for (const layer of plan.web_research_policy.source_layers || []) {
261
+ lines.push(`- layer ${layer.id}: ${layer.purpose}`);
262
+ }
155
263
  lines.push('');
156
264
  }
157
265
  lines.push('## Outcome Rubric');
@@ -166,10 +274,56 @@ export function researchPlanMarkdown(plan) {
166
274
  return `${lines.join('\n')}\n`;
167
275
  }
168
276
 
277
+ export function researchSourceSkillMarkdown(plan) {
278
+ const layers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
279
+ const lines = [];
280
+ lines.push('# Research Source Layer Skill');
281
+ lines.push('');
282
+ lines.push('Status: route-local candidate skill. Use it inside this research mission before scout synthesis. Do not install or edit generated .agents/skills from this artifact.');
283
+ lines.push('Real-run policy: collect live sources for as long as needed within the mission timeout; mock or fixture evidence is valid only for explicit --mock selftests.');
284
+ lines.push('');
285
+ lines.push('## Trigger');
286
+ lines.push('- Any `$Research` run that must collect broad public evidence before creative synthesis, debate, falsification, or paper writing.');
287
+ lines.push('');
288
+ lines.push('## Source Layers');
289
+ for (const layer of layers) {
290
+ lines.push(`- ${layer.id}: ${layer.purpose}`);
291
+ lines.push(` Examples: ${(layer.examples || []).join(', ')}`);
292
+ lines.push(` Query templates: ${(layer.query_templates || []).join(' | ')}`);
293
+ }
294
+ lines.push('');
295
+ lines.push('## Output Contract');
296
+ lines.push('- Fill source-ledger.json with `source_layers`, `sources[].layer`, `counterevidence_sources[].layer`, `citation_coverage`, `triangulation.cross_layer_checks`, and `blockers`.');
297
+ lines.push('- Each source entry should record title, locator/URL, publisher or author when known, published_at when known, accessed_at, layer, reliability, credibility, stance, supports or undermines, and notes.');
298
+ lines.push('- Public discourse sources such as X/Twitter or Reddit are signals and edge cases, not truth. They must be triangulated with formal, official, practitioner, or counterevidence layers.');
299
+ lines.push('- If a layer cannot be searched with the available runtime or credentials, record the blocker and keep research-gate.json unpassed.');
300
+ lines.push('');
301
+ lines.push('## Debate Use');
302
+ lines.push('- Every scout must cite source-ledger ids in findings and Eureka ideas.');
303
+ lines.push('- The skeptic lens must challenge the strongest claim using counterevidence or source-quality downgrades.');
304
+ lines.push('- Synthesis keeps only claims that survive cross-layer triangulation and falsification.');
305
+ lines.push('');
306
+ return `${lines.join('\n')}\n`;
307
+ }
308
+
309
+ export function countResearchPaperSections(text = '') {
310
+ const headings = String(text || '').toLowerCase().split(/\n/).filter((line) => /^#{1,3}\s+/.test(line));
311
+ return RESEARCH_PAPER_SECTION_GROUPS.filter((group) => headings.some((heading) => group.some((term) => heading.includes(term)))).length;
312
+ }
313
+
314
+ export function countGeniusOpinionSummaries(text = '') {
315
+ const lower = String(text || '').toLowerCase();
316
+ return RESEARCH_SCOUT_COUNCIL.filter((scout) => {
317
+ const label = String(scout.label || '').toLowerCase();
318
+ return lower.includes(String(scout.id || '').toLowerCase()) || (label && lower.includes(label));
319
+ }).length;
320
+ }
321
+
169
322
  export async function writeResearchPlan(dir, prompt, opts = {}) {
170
323
  const plan = createResearchPlan(prompt, opts);
171
324
  await writeJsonAtomic(path.join(dir, 'research-plan.json'), plan);
172
325
  await writeTextAtomic(path.join(dir, 'research-plan.md'), researchPlanMarkdown(plan));
326
+ await writeTextAtomic(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), researchSourceSkillMarkdown(plan));
173
327
  await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), {
174
328
  schema_version: 1,
175
329
  entries: [],
@@ -189,14 +343,46 @@ export async function writeResearchPlan(dir, prompt, opts = {}) {
189
343
  }
190
344
 
191
345
  export function defaultSourceLedger(plan = null) {
346
+ const sourceLayers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
192
347
  return {
193
348
  schema_version: 1,
194
- policy: plan?.web_research_policy?.mode || 'maximum_source_retrieval',
349
+ policy: plan?.web_research_policy?.mode || 'layered_source_retrieval_and_triangulation',
195
350
  created_at: nowIso(),
351
+ source_layer_skill: {
352
+ artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
353
+ status: 'planned'
354
+ },
196
355
  web_search_passes: 0,
356
+ source_layers: sourceLayers.map((layer) => ({
357
+ id: layer.id,
358
+ label: layer.label,
359
+ required: true,
360
+ status: 'pending',
361
+ evidence_role: layer.evidence_role,
362
+ query_templates: layer.query_templates || [],
363
+ source_ids: [],
364
+ counterevidence_ids: [],
365
+ blocker: null,
366
+ notes: ''
367
+ })),
368
+ layer_coverage: {
369
+ required: sourceLayers.map((layer) => layer.id),
370
+ covered: [],
371
+ missing: sourceLayers.map((layer) => layer.id),
372
+ notes: []
373
+ },
197
374
  queries: [],
198
375
  sources: [],
199
376
  counterevidence_sources: [],
377
+ triangulation: {
378
+ cross_layer_checks: [],
379
+ conflicts: [],
380
+ synthesis_notes: []
381
+ },
382
+ quality_model: {
383
+ reporting_basis: 'Record enough source metadata to make search reproducible, including query, layer, locator, publisher or author, publication date when known, accessed_at, reliability, credibility, stance, and cited claim ids.',
384
+ source_quality_fields: ['layer', 'kind', 'title', 'locator', 'publisher_or_author', 'published_at', 'accessed_at', 'reliability', 'credibility', 'stance', 'supports', 'undermines']
385
+ },
200
386
  citation_coverage: {
201
387
  all_key_claims_cited: false,
202
388
  notes: []
@@ -261,18 +447,53 @@ export function defaultFalsificationLedger() {
261
447
  };
262
448
  }
263
449
 
450
+ function sourceLayerIdsForPlan(plan = null) {
451
+ const layers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
452
+ return layers.map((layer) => layer.id).filter(Boolean);
453
+ }
454
+
455
+ function sourceLayerCoverageStats(sourceLedger = null, requiredLayerIds = RESEARCH_SOURCE_LAYER_IDS) {
456
+ const covered = new Set();
457
+ const sourceRows = [
458
+ ...(Array.isArray(sourceLedger?.sources) ? sourceLedger.sources : []),
459
+ ...(Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources : [])
460
+ ];
461
+ for (const source of sourceRows) {
462
+ const layer = source?.layer || source?.layer_id || source?.source_layer;
463
+ if (requiredLayerIds.includes(layer)) covered.add(layer);
464
+ }
465
+ for (const layer of Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : []) {
466
+ const id = layer?.id || layer?.layer;
467
+ const sourceIds = [
468
+ ...(Array.isArray(layer?.source_ids) ? layer.source_ids : []),
469
+ ...(Array.isArray(layer?.counterevidence_ids) ? layer.counterevidence_ids : [])
470
+ ];
471
+ if (requiredLayerIds.includes(id) && layer?.status === 'covered' && sourceIds.length > 0) covered.add(id);
472
+ }
473
+ const missing = requiredLayerIds.filter((id) => !covered.has(id));
474
+ return { covered: [...covered], missing, required: [...requiredLayerIds] };
475
+ }
476
+
264
477
  export function defaultResearchGate() {
265
478
  return {
266
479
  passed: false,
267
480
  report_present: false,
481
+ paper_present: false,
482
+ paper_sections: 0,
483
+ genius_opinion_summary_present: false,
484
+ genius_opinion_summaries: 0,
485
+ research_source_skill_present: false,
268
486
  source_ledger_present: false,
269
487
  scout_ledger_present: false,
270
488
  debate_ledger_present: false,
271
489
  novelty_ledger_present: false,
272
490
  falsification_ledger_present: false,
273
- web_search_policy: 'maximum_source_retrieval',
491
+ web_search_policy: 'layered_source_retrieval_and_triangulation',
274
492
  web_search_passes: 0,
275
493
  source_entries: 0,
494
+ source_layers_required: RESEARCH_SOURCE_LAYER_IDS.length,
495
+ source_layers_covered: 0,
496
+ triangulation_checks: 0,
276
497
  independent_scouts: 0,
277
498
  xhigh_scouts: 0,
278
499
  eureka_moments: 0,
@@ -295,7 +516,13 @@ export function defaultResearchGate() {
295
516
 
296
517
  export async function evaluateResearchGate(dir) {
297
518
  const gate = await readJson(path.join(dir, 'research-gate.json'), defaultResearchGate());
519
+ const plan = await readJson(path.join(dir, 'research-plan.json'), null);
298
520
  const reportPresent = await exists(path.join(dir, 'research-report.md'));
521
+ const paperPresent = await exists(path.join(dir, RESEARCH_PAPER_ARTIFACT));
522
+ const paperSections = paperPresent ? countResearchPaperSections(await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '')) : 0;
523
+ const geniusSummaryPresent = await exists(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT));
524
+ const geniusSummaryCount = geniusSummaryPresent ? countGeniusOpinionSummaries(await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '')) : 0;
525
+ const sourceSkillPresent = await exists(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT));
299
526
  const sourcePresent = await exists(path.join(dir, 'source-ledger.json'));
300
527
  const scoutPresent = await exists(path.join(dir, 'scout-ledger.json'));
301
528
  const debatePresent = await exists(path.join(dir, 'debate-ledger.json'));
@@ -308,6 +535,9 @@ export async function evaluateResearchGate(dir) {
308
535
  const sourceEntries = Array.isArray(sourceLedger?.sources) ? sourceLedger.sources.length : 0;
309
536
  const counterEvidenceEntries = Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources.length : 0;
310
537
  const webSearchPasses = Math.max(Number(gate.web_search_passes || 0), Number(sourceLedger?.web_search_passes || 0));
538
+ const requiredSourceLayers = sourceLayerIdsForPlan(plan);
539
+ const sourceLayerStats = sourceLayerCoverageStats(sourceLedger, requiredSourceLayers);
540
+ const triangulationChecks = Array.isArray(sourceLedger?.triangulation?.cross_layer_checks) ? sourceLedger.triangulation.cross_layer_checks.length : 0;
311
541
  const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
312
542
  const independentScouts = scoutRows.filter((scout) => Array.isArray(scout.findings) && scout.findings.length > 0).length;
313
543
  const xhighScouts = scoutRows.filter((scout) => scout.effort === 'xhigh').length;
@@ -324,6 +554,11 @@ export async function evaluateResearchGate(dir) {
324
554
  const citationCoverage = gate.citation_coverage === true || sourceLedger?.citation_coverage?.all_key_claims_cited === true;
325
555
  const reasons = [];
326
556
  if (!reportPresent && gate.report_present !== true) reasons.push('research_report_missing');
557
+ if (!paperPresent) reasons.push('research_paper_missing');
558
+ if (paperSections < RESEARCH_PAPER_SECTION_GROUPS.length) reasons.push('research_paper_sections_missing');
559
+ if (!geniusSummaryPresent && gate.genius_opinion_summary_present !== true) reasons.push('genius_opinion_summary_missing');
560
+ if (Math.max(Number(gate.genius_opinion_summaries || 0), geniusSummaryCount) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('genius_opinion_summary_incomplete');
561
+ if (!sourceSkillPresent && gate.research_source_skill_present !== true) reasons.push('research_source_skill_missing');
327
562
  if (!sourcePresent && gate.source_ledger_present !== true) reasons.push('source_ledger_missing');
328
563
  if (!scoutPresent && gate.scout_ledger_present !== true) reasons.push('scout_ledger_missing');
329
564
  if (!debatePresent && gate.debate_ledger_present !== true) reasons.push('debate_ledger_missing');
@@ -331,6 +566,8 @@ export async function evaluateResearchGate(dir) {
331
566
  if (!falsificationPresent && gate.falsification_ledger_present !== true) reasons.push('falsification_ledger_missing');
332
567
  if (webSearchPasses < 1) reasons.push('web_search_pass_missing');
333
568
  if (Math.max(Number(gate.source_entries || 0), sourceEntries) < 1) reasons.push('source_entry_missing');
569
+ if (Math.max(Number(gate.source_layers_covered || 0), sourceLayerStats.covered.length) < requiredSourceLayers.length) reasons.push('source_layer_coverage_missing');
570
+ if (Math.max(Number(gate.triangulation_checks || 0), triangulationChecks) < 1) reasons.push('cross_layer_triangulation_missing');
334
571
  if (Math.max(Number(gate.independent_scouts || 0), independentScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('independent_scouts_missing');
335
572
  if (Math.max(Number(gate.xhigh_scouts || 0), xhighScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('scout_effort_not_xhigh');
336
573
  if (Math.max(Number(gate.eureka_moments || 0), eurekaMoments) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('eureka_missing');
@@ -352,7 +589,15 @@ export async function evaluateResearchGate(dir) {
352
589
  reasons,
353
590
  metrics: {
354
591
  web_search_passes: webSearchPasses,
592
+ paper_sections: Math.max(Number(gate.paper_sections || 0), paperSections),
593
+ genius_opinion_summary_present: geniusSummaryPresent || gate.genius_opinion_summary_present === true,
594
+ genius_opinion_summaries: Math.max(Number(gate.genius_opinion_summaries || 0), geniusSummaryCount),
595
+ research_source_skill_present: sourceSkillPresent || gate.research_source_skill_present === true,
355
596
  source_entries: Math.max(Number(gate.source_entries || 0), sourceEntries),
597
+ source_layers_required: requiredSourceLayers.length,
598
+ source_layers_covered: Math.max(Number(gate.source_layers_covered || 0), sourceLayerStats.covered.length),
599
+ source_layers_missing: sourceLayerStats.missing,
600
+ triangulation_checks: Math.max(Number(gate.triangulation_checks || 0), triangulationChecks),
356
601
  independent_scouts: Math.max(Number(gate.independent_scouts || 0), independentScouts),
357
602
  xhigh_scouts: Math.max(Number(gate.xhigh_scouts || 0), xhighScouts),
358
603
  eureka_moments: Math.max(Number(gate.eureka_moments || 0), eurekaMoments),
@@ -371,41 +616,89 @@ export async function evaluateResearchGate(dir) {
371
616
  }
372
617
 
373
618
  export async function writeMockResearchResult(dir, plan) {
619
+ const mockLayerSources = RESEARCH_SOURCE_LAYERS.map((layer, index) => ({
620
+ id: `mock-source-${index + 1}`,
621
+ layer: layer.id,
622
+ kind: 'selftest',
623
+ title: `Mock ${layer.label} coverage`,
624
+ locator: 'writeMockResearchResult',
625
+ accessed_at: nowIso(),
626
+ reliability: 'mock',
627
+ credibility: 'mock',
628
+ stance: layer.id === 'counterevidence_factcheck' ? 'undermines' : 'supports',
629
+ supports: layer.id === 'counterevidence_factcheck' ? [] : ['mock-insight-1'],
630
+ undermines: layer.id === 'counterevidence_factcheck' ? ['mock-insight-1'] : [],
631
+ notes: `Selftest fixture for the ${layer.id} source layer; no live web call is made in --mock mode.`
632
+ }));
374
633
  const sourceLedger = {
375
634
  schema_version: 1,
376
- policy: 'maximum_source_retrieval',
635
+ policy: 'layered_source_retrieval_and_triangulation',
377
636
  created_at: nowIso(),
378
637
  mode: 'selftest_mock',
638
+ source_layer_skill: {
639
+ artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
640
+ status: 'created'
641
+ },
379
642
  web_search_passes: 1,
380
- queries: [
381
- { scout_id: 'einstein', query: 'mock first principles falsifiable novelty research mode', status: 'mocked' },
382
- { scout_id: 'feynman', query: 'mock simple experiment compare discovery prompt summary prompt', status: 'mocked' },
383
- { scout_id: 'turing', query: 'mock formal gate criteria source ledger citation coverage', status: 'mocked' },
384
- { scout_id: 'von_neumann', query: 'mock workflow gate scaling review route evidence', status: 'mocked' },
385
- { scout_id: 'skeptic', query: 'mock counterevidence research mode overclaims without sources', status: 'mocked' }
386
- ],
387
- sources: [
388
- {
389
- id: 'mock-source-1',
390
- kind: 'selftest',
391
- title: 'Mock SKS research source coverage',
392
- locator: 'writeMockResearchResult',
393
- accessed_at: nowIso(),
394
- supports: ['mock-insight-1'],
395
- notes: 'Selftest fixture; no live web call is made in --mock mode.'
396
- }
397
- ],
643
+ source_layers: RESEARCH_SOURCE_LAYERS.map((layer, index) => ({
644
+ id: layer.id,
645
+ label: layer.label,
646
+ required: true,
647
+ status: 'covered',
648
+ evidence_role: layer.evidence_role,
649
+ query_templates: layer.query_templates || [],
650
+ source_ids: [`mock-source-${index + 1}`],
651
+ counterevidence_ids: layer.id === 'counterevidence_factcheck' ? ['mock-counter-1'] : [],
652
+ blocker: null,
653
+ notes: 'Mock mode records layer coverage without live web access.'
654
+ })),
655
+ layer_coverage: {
656
+ required: [...RESEARCH_SOURCE_LAYER_IDS],
657
+ covered: [...RESEARCH_SOURCE_LAYER_IDS],
658
+ missing: [],
659
+ notes: ['mock fixture covers every research source layer']
660
+ },
661
+ queries: RESEARCH_SOURCE_LAYERS.map((layer) => ({
662
+ scout_id: layer.id === 'counterevidence_factcheck' ? 'skeptic' : null,
663
+ layer: layer.id,
664
+ query: `mock ${layer.id} layered research source search for ${plan.prompt}`,
665
+ status: 'mocked'
666
+ })),
667
+ sources: mockLayerSources,
398
668
  counterevidence_sources: [
399
669
  {
400
670
  id: 'mock-counter-1',
671
+ layer: 'counterevidence_factcheck',
401
672
  kind: 'selftest',
402
673
  title: 'Mock overclaim counterexample',
403
674
  locator: 'writeMockResearchResult',
404
675
  accessed_at: nowIso(),
676
+ reliability: 'mock',
677
+ credibility: 'mock',
678
+ stance: 'undermines',
405
679
  undermines: ['mock-insight-1'],
406
680
  notes: 'Shows the gate must fail if a run produces no tests or falsifiers.'
407
681
  }
408
682
  ],
683
+ triangulation: {
684
+ cross_layer_checks: [
685
+ {
686
+ id: 'mock-triangulation-1',
687
+ claim: 'Research Mode should not synthesize from a single corpus.',
688
+ source_ids: ['mock-source-1', 'mock-source-2', 'mock-source-5', 'mock-counter-1'],
689
+ result: 'survives_with_layered_evidence_requirement'
690
+ },
691
+ {
692
+ id: 'mock-triangulation-2',
693
+ claim: 'Public discourse is useful only when checked against formal and official layers.',
694
+ source_ids: ['mock-source-1', 'mock-source-2', 'mock-source-5', 'mock-source-6'],
695
+ result: 'downgrade_popularity_to_signal_not_truth'
696
+ }
697
+ ],
698
+ conflicts: [],
699
+ synthesis_notes: ['mock fixture requires cross-layer checks before synthesis']
700
+ },
701
+ quality_model: defaultSourceLedger(plan).quality_model,
409
702
  citation_coverage: {
410
703
  all_key_claims_cited: true,
411
704
  notes: ['mock report and novelty entry cite mock-source-1 and mock-counter-1']
@@ -493,23 +786,51 @@ export async function writeMockResearchResult(dir, plan) {
493
786
  }
494
787
  ]
495
788
  };
789
+ const geniusSummary = [
790
+ '# Genius Opinion Summary',
791
+ '',
792
+ `Prompt: ${plan.prompt}`,
793
+ '',
794
+ '## Scout Opinions',
795
+ ...RESEARCH_SCOUT_COUNCIL.flatMap((scout) => [
796
+ `### ${scout.label} (${scout.id})`,
797
+ `Final opinion: ${scout.label} wants the run to preserve ${scout.mandate.toLowerCase()} while producing a cited, falsifiable insight.`,
798
+ 'Strongest evidence: mock-source-1 plus the layered source ledger.',
799
+ 'Main disagreement: whether formal structure or cheap empirical probes should dominate the first pass.',
800
+ 'Changed mind: accepted that citation coverage, counterevidence, and triangulation are gates before synthesis.',
801
+ ''
802
+ ]),
803
+ '## Council Consensus',
804
+ 'The council keeps one modest, testable claim: Research Mode is useful when it writes a source-cited paper, records every scout opinion, triangulates across source layers, and exposes the next decisive test.'
805
+ ].join('\n');
806
+ await writeTextAtomic(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), researchSourceSkillMarkdown(plan));
496
807
  await writeJsonAtomic(path.join(dir, 'source-ledger.json'), sourceLedger);
497
808
  await writeJsonAtomic(path.join(dir, 'scout-ledger.json'), scoutLedger);
498
809
  await writeJsonAtomic(path.join(dir, 'debate-ledger.json'), debateLedger);
499
810
  await writeJsonAtomic(path.join(dir, 'falsification-ledger.json'), falsificationLedger);
500
811
  await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), ledger);
501
- await writeTextAtomic(path.join(dir, 'research-report.md'), `# SKS Research Report\n\nPrompt: ${plan.prompt}\n\n## Scout Council Synthesis\n\nThe mock council keeps one cited methodological insight: a research mode should force falsifiable novelty rather than summarize known material [mock-source-1].\n\n## Source Coverage\n\nThis is a selftest fixture. It records mock source and counterevidence ledgers but does not perform live web browsing in --mock mode.\n\n## Candidate Insight\n\nA useful research run must produce source-cited, falsifiable novelty with scout findings and a cheap probe.\n\n## Falsification\n\nThe claim is weak if no new testable prediction, counterevidence source, or experiment is produced [mock-counter-1].\n\n## Next Test\n\nCompare this mode against a summary-only run and score candidate insights, falsification passes, citation coverage, and testability.\n`);
812
+ await writeTextAtomic(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), `${geniusSummary}\n`);
813
+ await writeTextAtomic(path.join(dir, 'research-report.md'), `# SKS Research Report\n\nPrompt: ${plan.prompt}\n\n## Scout Council Synthesis\n\nThe mock council keeps one cited methodological insight: a research mode should force layered, falsifiable novelty rather than summarize known material from one corpus [mock-source-1].\n\n## Source Coverage\n\nThis is a selftest fixture. It records mock coverage for academic literature, official data, standards, news, public discourse, developer knowledge, and counterevidence layers, but does not perform live web browsing in --mock mode.\n\n## Candidate Insight\n\nA useful research run must produce source-cited, cross-layer triangulated, falsifiable novelty with scout findings and a cheap probe.\n\n## Falsification\n\nThe claim is weak if no new testable prediction, counterevidence source, cross-layer check, or experiment is produced [mock-counter-1].\n\n## Next Test\n\nCompare this mode against a summary-only run and score candidate insights, falsification passes, citation coverage, source-layer coverage, triangulation checks, and testability.\n`);
814
+ await writeTextAtomic(path.join(dir, RESEARCH_PAPER_ARTIFACT), `# Research Paper: ${plan.prompt}\n\n## Abstract\nA source-cited research run should produce cross-layer, falsifiable novelty rather than only summarize known material.\n\n## Introduction\nThe mock topic is evaluated as a research workflow outcome with layered source coverage [mock-source-1].\n\n## Methodology\nFive xhigh scouts produce Eureka ideas, debate, triangulate source layers, and falsify the strongest claim.\n\n## Findings\nThe surviving finding is that useful research needs cited novelty, source-layer coverage, cross-layer triangulation, and a cheap decisive probe.\n\n## Discussion\nThe debate favors gate-backed evidence over narrative confidence, and treats public discourse as signal rather than truth.\n\n## Limitations and Falsification\nThe claim fails without sources, counterevidence, triangulation checks, or testable predictions [mock-counter-1].\n\n## Conclusion and Next Experiment\nCompare this loop against a summary-only baseline and score testable insights.\n\n## References\n- [mock-source-1] Mock academic literature coverage.\n- [mock-source-2] Mock official government and leading-institution knowledge coverage.\n- [mock-source-3] Mock standards and primary documents coverage.\n- [mock-source-4] Mock current news and global reporting coverage.\n- [mock-source-5] Mock public discourse coverage.\n- [mock-source-6] Mock developer and practitioner knowledge coverage.\n- [mock-source-7] Mock counterevidence and fact-checking coverage.\n- [mock-counter-1] Mock overclaim counterexample.\n`);
502
815
  await writeJsonAtomic(path.join(dir, 'research-gate.json'), {
503
816
  ...defaultResearchGate(),
504
817
  passed: true,
505
818
  report_present: true,
819
+ paper_present: true,
820
+ paper_sections: RESEARCH_PAPER_SECTION_GROUPS.length,
821
+ genius_opinion_summary_present: true,
822
+ genius_opinion_summaries: RESEARCH_SCOUT_COUNCIL.length,
823
+ research_source_skill_present: true,
506
824
  source_ledger_present: true,
507
825
  scout_ledger_present: true,
508
826
  debate_ledger_present: true,
509
827
  novelty_ledger_present: true,
510
828
  falsification_ledger_present: true,
511
829
  web_search_passes: 1,
512
- source_entries: 1,
830
+ source_entries: mockLayerSources.length,
831
+ source_layers_required: RESEARCH_SOURCE_LAYER_IDS.length,
832
+ source_layers_covered: RESEARCH_SOURCE_LAYER_IDS.length,
833
+ triangulation_checks: sourceLedger.triangulation.cross_layer_checks.length,
513
834
  independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
514
835
  xhigh_scouts: RESEARCH_SCOUT_COUNCIL.length,
515
836
  eureka_moments: RESEARCH_SCOUT_COUNCIL.length,
@@ -522,12 +843,12 @@ export async function writeMockResearchResult(dir, plan) {
522
843
  falsification_cases: 1,
523
844
  testable_predictions: 1,
524
845
  citation_coverage: true,
525
- evidence: ['mock research report', 'mock source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
846
+ evidence: ['mock research report', 'mock research paper', 'mock genius opinion summary', 'mock research source skill', 'mock layered source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
526
847
  notes: ['mock mode records the new contract but does not call a model or perform live web browsing']
527
848
  });
528
849
  return evaluateResearchGate(dir);
529
850
  }
530
851
 
531
852
  export function buildResearchPrompt({ id, mission, plan, cycle, previous }) {
532
- return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\nWEB/SOURCE POLICY: Run the broadest safe web/source search available in this runtime before synthesis. Use independent query sets for every scout. Prefer primary sources, official docs or standards, peer-reviewed or archival sources, reputable recent sources, and credible counterevidence. If live web search is unavailable, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source search first: create source-ledger.json with queries, source ids, counterevidence sources, citation coverage, and blockers.\n2. Independent xhigh scouts: create scout-ledger.json with effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, and cheap_probes for every scout lens.\n3. Debate: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout before synthesis.\n4. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n5. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, debate, and falsification are recorded.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- source-ledger.json: web/source queries, source ids, source priority, counterevidence sources, citation coverage, and blockers.\n- scout-ledger.json: one entry per scout lens with effort, eureka, query_set, findings, source_ids, falsifiers, and cheap_probes.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, and unresolved conflicts.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, web/source retrieval was attempted, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
853
+ return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, and cheap_probes for every scout lens.\n4. Debate: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout before synthesis.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, debate, cross-layer triangulation, and falsification are recorded.\n7. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with effort, eureka, query_set, findings, source_ids, falsifiers, and cheap_probes.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, and unresolved conflicts.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, research-paper.md exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
533
854
  }
@@ -390,10 +390,10 @@ export const ROUTES = [
390
390
  command: '$Research',
391
391
  mode: 'RESEARCH',
392
392
  route: 'research mission',
393
- description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate, maximum source retrieval, falsification, and testable predictions.',
394
- requiredSkills: ['research', 'research-discovery', 'pipeline-runner', 'context7-docs', REFLECTION_SKILL_NAME, 'honest-mode'],
395
- lifecycle: ['research_plan', 'source_ledger', 'xhigh_scout_council', 'eureka_moments', 'debate_ledger', 'report', 'novelty_ledger', 'falsification_ledger', 'research_gate', 'post_route_reflection', 'honest_mode'],
396
- context7Policy: 'required',
393
+ description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate, layered public source retrieval, falsification, a paper manuscript, a final genius-opinion summary, and testable predictions.',
394
+ requiredSkills: ['research', 'research-discovery', 'pipeline-runner', REFLECTION_SKILL_NAME, 'honest-mode'],
395
+ lifecycle: ['research_plan', 'source_skill', 'layered_source_ledger', 'xhigh_scout_council', 'eureka_moments', 'debate_ledger', 'report', 'paper', 'genius_opinion_summary', 'novelty_ledger', 'falsification_ledger', 'research_gate', 'post_route_reflection', 'honest_mode'],
396
+ context7Policy: 'if_external_docs',
397
397
  reasoningPolicy: 'xhigh',
398
398
  stopGate: 'research-gate.json',
399
399
  cliEntrypoint: 'sks research prepare|run',
@@ -537,7 +537,7 @@ export const COMMAND_CATALOG = [
537
537
  { name: 'init', usage: 'sks init [--force] [--local-only] [--install-scope global|project]', description: 'Initialize the local SKS control surface.' },
538
538
  { name: 'selftest', usage: 'sks selftest [--mock]', description: 'Run local smoke tests without calling a model.' },
539
539
  { name: 'goal', usage: 'sks goal create|pause|resume|clear|status ...', description: 'Prepare and control the fast SKS bridge overlay for Codex native persisted /goal workflows.' },
540
- { name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run frontier-style research missions with xhigh scout Eureka ideas, debate, source-ledger, novelty, and falsification gates.' },
540
+ { name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run long-form real research missions with xhigh scout Eureka ideas, debate, layered sources, paper, novelty, and falsification gates.' },
541
541
  { name: 'db', usage: 'sks db policy|scan|mcp-config|classify|check ...', description: 'Inspect and enforce database/Supabase safety policy.' },
542
542
  { name: 'eval', usage: 'sks eval run|compare|thresholds ...', description: 'Run deterministic context-quality and performance evidence checks.' },
543
543
  { name: 'harness', usage: 'sks harness fixture|review [--json]', description: 'Run Harness Growth Factory fixtures for forgetting, skills, experiments, tool taxonomy, permissions, MultiAgentV2, and tmux views.' },
@@ -585,9 +585,21 @@ function leadingDollarCommandMatch(prompt) {
585
585
  || text.match(/^\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)(?:\s|:|$)/);
586
586
  }
587
587
 
588
+ function embeddedDollarCommandMatch(prompt) {
589
+ const text = String(prompt || '');
590
+ const matches = [];
591
+ for (const match of text.matchAll(/\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)/g)) matches.push({ index: match.index, command: match[1] });
592
+ for (const match of text.matchAll(/(^|[\s([{<])\$([A-Za-z][A-Za-z0-9_-]*)(?=\s|:|$|[.,!?;)\]}])/g)) matches.push({ index: match.index + match[1].length, command: match[2] });
593
+ return matches
594
+ .sort((a, b) => a.index - b.index)
595
+ .find((match) => routeByDollarCommand(match.command) || String(match.command || '').toUpperCase() === 'MAD-SKS') || null;
596
+ }
597
+
588
598
  export function dollarCommand(prompt) {
589
- const match = leadingDollarCommandMatch(prompt);
590
- return match ? match[1].toUpperCase() : null;
599
+ const leading = leadingDollarCommandMatch(prompt);
600
+ if (leading) return leading[1].toUpperCase();
601
+ const embedded = embeddedDollarCommandMatch(prompt);
602
+ return embedded ? embedded.command.toUpperCase() : null;
591
603
  }
592
604
 
593
605
  export function hasMadSksSignal(prompt = '') {