sneakoscope 0.7.69 → 0.7.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/cli/main.mjs +17 -14
- package/src/cli/maintenance-commands.mjs +61 -23
- package/src/core/fsx.mjs +1 -1
- package/src/core/hooks-runtime.mjs +1 -8
- package/src/core/init.mjs +1 -1
- package/src/core/pipeline.mjs +1 -1
- package/src/core/research.mjs +327 -37
- package/src/core/routes.mjs +19 -7
package/README.md
CHANGED
|
@@ -192,7 +192,7 @@ sks qa-loop prepare "http://localhost:3000"
|
|
|
192
192
|
sks qa-loop run latest --max-cycles 2
|
|
193
193
|
sks goal create "persist this migration workflow"
|
|
194
194
|
sks research prepare "evaluate this approach"
|
|
195
|
-
sks research run latest --max-cycles 3
|
|
195
|
+
sks research run latest --max-cycles 3 --cycle-timeout-minutes 120
|
|
196
196
|
sks research status latest
|
|
197
197
|
sks db scan --json
|
|
198
198
|
sks wiki refresh
|
|
@@ -211,7 +211,7 @@ sks skill-dream run --json
|
|
|
211
211
|
sks code-structure scan --json
|
|
212
212
|
```
|
|
213
213
|
|
|
214
|
-
`sks research` prepares a genius-lens scout council, requires every scout to run at `xhigh`, records one literal `Eureka!` idea per scout, runs an evidence-bound debate,
|
|
214
|
+
`sks research` prepares a genius-lens scout council, requires every scout to run at `xhigh`, records one literal `Eureka!` idea per scout, runs an evidence-bound debate, and now creates `research-source-skill.md` as a route-local source collection skill before synthesis. Normal Research is intentionally allowed to take one or two hours when the problem needs it; `--mock` is only for selftests or dry harness checks, and a real run blocks with `research-blocker.json` instead of silently substituting mock output when the Codex execution path is unavailable. The source layer contract separates latest papers, official/government or leading-institution sources, standards/primary docs, current news such as BBC/CNN/GDELT-style sources, public discourse such as X/Reddit, developer/practitioner knowledge such as Stack Overflow/GitHub, and counterevidence/fact-checking; `source-ledger.json` must record layer coverage, source quality, blockers, citations, and cross-layer triangulation. Context7 is optional for `$Research` and only becomes relevant when the research topic specifically depends on package, API, framework, or SDK documentation. Research runs require `research-report.md`, `research-paper.md`, `genius-opinion-summary.md`, `research-source-skill.md`, `source-ledger.json`, `scout-ledger.json`, `debate-ledger.json`, `novelty-ledger.json`, `falsification-ledger.json`, and `research-gate.json` so they stay source-backed, adversarially checked, falsifiable, paper-ready, and clear about every scout lens opinion. `research status` reports source entries, source-layer coverage, triangulation checks, counterevidence, xhigh scout count, Eureka moments, debate exchanges, paper presence/sections, genius-opinion summary coverage, scout findings, and falsification cases alongside the gate.
|
|
215
215
|
|
|
216
216
|
`sks pipeline plan` shows the active route lane, kept/skipped stages, verification commands, and no-unrequested-fallback invariant. `sks proof-field scan` is the lightweight rubric for small changes; risky or broad signals return to the full Team/Honest path.
|
|
217
217
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sneakoscope",
|
|
3
3
|
"displayName": "ㅅㅋㅅ",
|
|
4
|
-
"version": "0.7.
|
|
4
|
+
"version": "0.7.72",
|
|
5
5
|
"description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
|
package/src/cli/main.mjs
CHANGED
|
@@ -22,7 +22,7 @@ import { bumpProjectVersion, installVersionGitHook, runVersionPreCommit, version
|
|
|
22
22
|
import { rustInfo } from '../core/rust-accelerator.mjs';
|
|
23
23
|
import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
|
|
24
24
|
import { defaultEvaluationScenario, runEvaluationBenchmark } from '../core/evaluation.mjs';
|
|
25
|
-
import {
|
|
25
|
+
import { evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
26
26
|
import {
|
|
27
27
|
PPT_AUDIENCE_STRATEGY_ARTIFACT,
|
|
28
28
|
PPT_CLEANUP_REPORT_ARTIFACT,
|
|
@@ -211,7 +211,7 @@ Usage:
|
|
|
211
211
|
sks team attach-tmux [mission-id|latest]
|
|
212
212
|
sks team cleanup-tmux [mission-id|latest]
|
|
213
213
|
sks research prepare "topic" [--depth frontier]
|
|
214
|
-
sks research run <mission-id|latest> [--mock] [--max-cycles N]
|
|
214
|
+
sks research run <mission-id|latest> [--mock] [--max-cycles N] [--cycle-timeout-minutes N]
|
|
215
215
|
sks research status <mission-id|latest>
|
|
216
216
|
sks db policy
|
|
217
217
|
sks db scan [--migrations] [--json]
|
|
@@ -1963,7 +1963,7 @@ function readMaxCycles(args, fallback) {
|
|
|
1963
1963
|
|
|
1964
1964
|
function positionalArgs(args = []) {
|
|
1965
1965
|
const out = [];
|
|
1966
|
-
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--dir']);
|
|
1966
|
+
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--cycle-timeout-minutes', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--dir']);
|
|
1967
1967
|
for (let i = 0; i < args.length; i++) {
|
|
1968
1968
|
const arg = String(args[i]);
|
|
1969
1969
|
if (valueFlags.has(arg)) {
|
|
@@ -2647,19 +2647,21 @@ async function selftest() {
|
|
|
2647
2647
|
const hookResearchTeamResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], { cwd: hookResearchMarkdownTmp, input: hookResearchTeamPayload, env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 256 * 1024 });
|
|
2648
2648
|
if (hookResearchTeamResult.code !== 0) throw new Error(`selftest: active Team setup before markdown $Research hook exited ${hookResearchTeamResult.code}: ${hookResearchTeamResult.stderr}`);
|
|
2649
2649
|
const hookResearchTeamState = await readJson(stateFile(hookResearchMarkdownTmp), {});
|
|
2650
|
-
const hookResearchMarkdownPayload = JSON.stringify({ cwd: hookResearchMarkdownTmp, prompt: '[$research](
|
|
2650
|
+
const hookResearchMarkdownPayload = JSON.stringify({ cwd: hookResearchMarkdownTmp, prompt: '논문 [$research](x) 팀 커밋 푸쉬 연구' });
|
|
2651
2651
|
const hookResearchMarkdownResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], { cwd: hookResearchMarkdownTmp, input: hookResearchMarkdownPayload, env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 256 * 1024 });
|
|
2652
2652
|
if (hookResearchMarkdownResult.code !== 0) throw new Error(`selftest: markdown $Research hook exited ${hookResearchMarkdownResult.code}: ${hookResearchMarkdownResult.stderr}`);
|
|
2653
2653
|
const hookResearchMarkdownJson = JSON.parse(hookResearchMarkdownResult.stdout);
|
|
2654
2654
|
const hookResearchMarkdownContext = hookResearchMarkdownJson.hookSpecificOutput?.additionalContext || '';
|
|
2655
|
-
if (!hookResearchMarkdownContext.includes('$Research route prepared')) throw new Error('selftest: markdown
|
|
2656
|
-
if (hookResearchMarkdownContext.includes(`Active Team mission ${hookResearchTeamState.mission_id}`)) throw new Error('selftest:
|
|
2657
|
-
if (!String(hookResearchMarkdownJson.systemMessage || '').includes('Research route') || String(hookResearchMarkdownJson.systemMessage || '').includes('QA-LOOP route')) throw new Error('selftest:
|
|
2655
|
+
if (!hookResearchMarkdownContext.includes('$Research route prepared')) throw new Error('selftest: markdown research hook');
|
|
2656
|
+
if (hookResearchMarkdownContext.includes(`Active Team mission ${hookResearchTeamState.mission_id}`)) throw new Error('selftest: stale Team context');
|
|
2657
|
+
if (!String(hookResearchMarkdownJson.systemMessage || '').includes('Research route') || String(hookResearchMarkdownJson.systemMessage || '').includes('QA-LOOP route')) throw new Error('selftest: research hook message');
|
|
2658
2658
|
const hookResearchMarkdownState = await readJson(stateFile(hookResearchMarkdownTmp), {});
|
|
2659
|
-
if (hookResearchMarkdownState.mode !== 'RESEARCH' || hookResearchMarkdownState.route !== 'Research' || hookResearchMarkdownState.mission_id === hookResearchTeamState.mission_id || hookResearchMarkdownState.stop_gate !== 'research-gate.json' || !hookResearchMarkdownState.pipeline_plan_ready) throw new Error('selftest:
|
|
2659
|
+
if (hookResearchMarkdownState.mode !== 'RESEARCH' || hookResearchMarkdownState.route !== 'Research' || hookResearchMarkdownState.mission_id === hookResearchTeamState.mission_id || hookResearchMarkdownState.stop_gate !== 'research-gate.json' || !hookResearchMarkdownState.pipeline_plan_ready) throw new Error('selftest: research hook state');
|
|
2660
2660
|
const hookResearchMissionDir = missionDir(hookResearchMarkdownTmp, hookResearchMarkdownState.mission_id);
|
|
2661
|
-
if (!(await exists(path.join(hookResearchMissionDir, PIPELINE_PLAN_ARTIFACT)))) throw new Error('selftest:
|
|
2662
|
-
|
|
2661
|
+
if (!(await exists(path.join(hookResearchMissionDir, PIPELINE_PLAN_ARTIFACT)))) throw new Error('selftest: research hook plan');
|
|
2662
|
+
const rss = 'research-source-skill.md';
|
|
2663
|
+
const gos = 'genius-opinion-summary.md';
|
|
2664
|
+
for (const artifact of [rss, 'source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) {
|
|
2663
2665
|
if (!(await exists(path.join(hookResearchMissionDir, artifact)))) throw new Error(`selftest: hook research ${artifact}`);
|
|
2664
2666
|
}
|
|
2665
2667
|
const hookPayload = JSON.stringify({ cwd: hookGoalTmp, prompt: '$Goal 로그인 세션 만료 UX 개선' });
|
|
@@ -3782,15 +3784,16 @@ async function selftest() {
|
|
|
3782
3784
|
if (wikiPruneDryRun.candidates < 1 || !wikiPruneDryRun.actions.some((action) => action.reason === 'low_wiki_trust')) throw new Error('selftest: wiki prune did not flag low-trust artifact');
|
|
3783
3785
|
const { dir: researchDir, mission: researchMission } = await createMission(tmp, { mode: 'research', prompt: '새로운 코드 리뷰 방법론 연구' });
|
|
3784
3786
|
const researchPlan = await writeResearchPlan(researchDir, researchMission.prompt, {});
|
|
3785
|
-
if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== '
|
|
3787
|
+
if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== 'layered_source_retrieval_and_triangulation') throw new Error('selftest: research plan contract');
|
|
3786
3788
|
const rArts = researchPlan.required_artifacts || [];
|
|
3787
|
-
for (const a of ['source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) if (!rArts.includes(a) || !(await exists(path.join(researchDir, a)))) throw new Error('selftest: research artifact');
|
|
3788
|
-
if (!rArts.includes('research-paper.md')) throw new Error('selftest: research paper');
|
|
3789
|
+
for (const a of [rss, 'source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) if (!rArts.includes(a) || !(await exists(path.join(researchDir, a)))) throw new Error('selftest: research artifact');
|
|
3790
|
+
if (!rArts.includes('research-paper.md') || !rArts.includes(gos)) throw new Error('selftest: research paper');
|
|
3789
3791
|
const initialResearchGate = await evaluateResearchGate(researchDir);
|
|
3790
3792
|
if (initialResearchGate.passed || ['web_search_pass_missing', 'eureka_missing', 'debate_exchanges_missing', 'research_paper_missing'].some((r) => !initialResearchGate.reasons.includes(r))) throw new Error('selftest: research gate');
|
|
3791
3793
|
const researchGate = await writeMockResearchResult(researchDir, researchPlan);
|
|
3792
3794
|
if (!researchGate.passed) throw new Error('selftest: mock research gate did not pass');
|
|
3793
|
-
|
|
3795
|
+
const rm = researchGate.metrics || {};
|
|
3796
|
+
if (['independent_scouts', 'xhigh_scouts', 'eureka_moments', 'debate_participants', 'genius_opinion_summaries'].some((m) => rm[m] < 5) || ['counterevidence_sources', 'falsification_cases', 'triangulation_checks'].some((m) => rm[m] < 1) || rm.paper_sections < 8 || rm.citation_coverage !== true || rm.source_layers_covered < 7) throw new Error('selftest: research metrics');
|
|
3794
3797
|
await writeJsonAtomic(path.join(dir, 'done-gate.json'), { passed: true, unsupported_critical_claims: 0, database_safety_violation: false, database_safety_reviewed: true, visual_drift: 'low', wiki_drift: 'low', tests_required: false });
|
|
3795
3798
|
const gate = await evaluateDoneGate(tmp, id);
|
|
3796
3799
|
if (!gate.passed) throw new Error('selftest: done gate');
|
|
@@ -8,14 +8,14 @@ import { buildQuestionSchema, writeQuestions } from '../core/questions.mjs';
|
|
|
8
8
|
import { sealContract } from '../core/decision-contract.mjs';
|
|
9
9
|
import { buildQaLoopQuestionSchema, buildQaLoopPrompt, evaluateQaGate, qaStatus, writeMockQaResult, writeQaLoopArtifacts } from '../core/qa-loop.mjs';
|
|
10
10
|
import { containsUserQuestion, noQuestionContinuationReason } from '../core/no-question-guard.mjs';
|
|
11
|
-
import { RESEARCH_PAPER_ARTIFACT, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
11
|
+
import { RESEARCH_GENIUS_SUMMARY_ARTIFACT, RESEARCH_PAPER_ARTIFACT, RESEARCH_SOURCE_SKILL_ARTIFACT, countGeniusOpinionSummaries, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
12
12
|
import { storageReport, enforceRetention, pruneWikiArtifacts } from '../core/retention.mjs';
|
|
13
13
|
import { evaluateDoneGate } from '../core/hproof.mjs';
|
|
14
14
|
import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
|
|
15
15
|
import { DEFAULT_EVAL_THRESHOLDS, compareEvaluationReports, runEvaluationBenchmark } from '../core/evaluation.mjs';
|
|
16
16
|
import { contextCapsule } from '../core/triwiki-attention.mjs';
|
|
17
17
|
import { rgbaKey, rgbaToWikiCoord, validateWikiCoordinateIndex } from '../core/wiki-coordinate.mjs';
|
|
18
|
-
import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_ONLY_POLICY, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, hasFromChatImgSignal, reflectionRequiredForRoute, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, stripVisibleDecisionAnswerBlocks, triwikiContextTracking } from '../core/routes.mjs';
|
|
18
|
+
import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_ONLY_POLICY, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, hasFromChatImgSignal, reflectionRequiredForRoute, routeNeedsContext7, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, stripVisibleDecisionAnswerBlocks, triwikiContextTracking } from '../core/routes.mjs';
|
|
19
19
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, teamRuntimePlanMetadata, teamRuntimeRequiredArtifacts, writeTeamRuntimeArtifacts } from '../core/team-dag.mjs';
|
|
20
20
|
import { appendTeamEvent, formatAgentReasoning, formatRoleCounts, initTeamLive, normalizeTeamSpec, parseTeamSpecArgs, readTeamControl, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane, renderTeamCleanupSummary, renderTeamWatch, requestTeamSessionCleanup, teamCleanupRequested, teamReasoningPolicy } from '../core/team-live.mjs';
|
|
21
21
|
import { evaluateTeamReviewPolicyGate, MIN_TEAM_REVIEWER_LANES, MIN_TEAM_REVIEW_POLICY_TEXT, teamReviewPolicy } from '../core/team-review-policy.mjs';
|
|
@@ -42,6 +42,10 @@ const flag = (args, name) => args.includes(name);
|
|
|
42
42
|
const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
|
|
43
43
|
const TEAM_SESSION_CLEANUP_ARTIFACT = 'team-session-cleanup.json';
|
|
44
44
|
const REPOSITORY_URL = 'https://github.com/mandarange/Sneakoscope-Codex.git';
|
|
45
|
+
const RESEARCH_DEFAULT_MAX_CYCLES = 3;
|
|
46
|
+
const RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES = 120;
|
|
47
|
+
const RESEARCH_MIN_CYCLE_TIMEOUT_MINUTES = 15;
|
|
48
|
+
const RESEARCH_MAX_CYCLE_TIMEOUT_MINUTES = 240;
|
|
45
49
|
|
|
46
50
|
async function resolveMissionId(root, arg) { return (!arg || arg === 'latest') ? findLatestMission(root) : arg; }
|
|
47
51
|
|
|
@@ -429,7 +433,7 @@ async function researchPrepare(args) {
|
|
|
429
433
|
if (!prompt) throw new Error('Missing research topic.');
|
|
430
434
|
const { id, dir } = await createMission(root, { mode: 'research', prompt });
|
|
431
435
|
const route = ROUTES.find((entry) => entry.id === 'Research') || routePrompt('$Research');
|
|
432
|
-
const context7Required =
|
|
436
|
+
const context7Required = routeNeedsContext7(route, prompt);
|
|
433
437
|
const reasoning = routeReasoning(route, prompt);
|
|
434
438
|
const plan = await writeResearchPlan(dir, prompt, { depth: readFlagValue(args, '--depth', 'frontier') });
|
|
435
439
|
const pipelinePlan = await writePipelinePlan(dir, { missionId: id, route, task: prompt, required: context7Required, ambiguity: { required: false, status: 'direct_research_cli' } });
|
|
@@ -457,7 +461,7 @@ async function researchPrepare(args) {
|
|
|
457
461
|
mode: route.mode,
|
|
458
462
|
phase: 'RESEARCH_PREPARED',
|
|
459
463
|
questions_allowed: false,
|
|
460
|
-
implementation_allowed:
|
|
464
|
+
implementation_allowed: false,
|
|
461
465
|
context7_required: context7Required,
|
|
462
466
|
context7_verified: false,
|
|
463
467
|
subagents_required: routeRequiresSubagents(route, prompt),
|
|
@@ -480,14 +484,16 @@ async function researchPrepare(args) {
|
|
|
480
484
|
console.log(`Plan: ${path.relative(root, path.join(dir, 'research-plan.md'))}`);
|
|
481
485
|
console.log(`Pipeline: ${path.relative(root, path.join(dir, PIPELINE_PLAN_ARTIFACT))}`);
|
|
482
486
|
console.log(`Paper: ${RESEARCH_PAPER_ARTIFACT}`);
|
|
487
|
+
console.log(`Genius summary: ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}`);
|
|
488
|
+
console.log(`Source skill: ${RESEARCH_SOURCE_SKILL_ARTIFACT}`);
|
|
483
489
|
console.log('Ledgers: source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json');
|
|
484
|
-
console.log(`Run: sks research run ${id} --max-cycles
|
|
490
|
+
console.log(`Run: sks research run ${id} --max-cycles ${RESEARCH_DEFAULT_MAX_CYCLES} --cycle-timeout-minutes ${RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES}`);
|
|
485
491
|
}
|
|
486
492
|
|
|
487
493
|
async function researchRun(args) {
|
|
488
494
|
const root = await sksRoot();
|
|
489
495
|
const id = await resolveMissionId(root, args[0]);
|
|
490
|
-
if (!id) throw new Error('Usage: sks research run <mission-id|latest> [--mock] [--max-cycles N]');
|
|
496
|
+
if (!id) throw new Error('Usage: sks research run <mission-id|latest> [--mock] [--max-cycles N] [--cycle-timeout-minutes N]');
|
|
491
497
|
const { dir, mission } = await loadMission(root, id);
|
|
492
498
|
const planPath = path.join(dir, 'research-plan.json');
|
|
493
499
|
if (!(await exists(planPath))) await writeResearchPlan(dir, mission.prompt || '', {});
|
|
@@ -499,32 +505,46 @@ async function researchRun(args) {
|
|
|
499
505
|
process.exitCode = 2;
|
|
500
506
|
return;
|
|
501
507
|
}
|
|
502
|
-
const maxCycles = readMaxCycles(args,
|
|
508
|
+
const maxCycles = readMaxCycles(args, RESEARCH_DEFAULT_MAX_CYCLES);
|
|
509
|
+
const cycleTimeoutMinutes = readResearchCycleTimeoutMinutes(args);
|
|
510
|
+
const cycleTimeoutMs = cycleTimeoutMinutes * 60 * 1000;
|
|
503
511
|
const mock = flag(args, '--mock');
|
|
504
|
-
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false });
|
|
505
|
-
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock });
|
|
512
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false, implementation_allowed: false, research_real_run_required: !mock, research_cycle_timeout_minutes: cycleTimeoutMinutes });
|
|
513
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock, cycleTimeoutMinutes, real_run_required: !mock });
|
|
506
514
|
if (mock) {
|
|
507
515
|
const gate = await writeMockResearchResult(dir, plan);
|
|
508
|
-
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true });
|
|
516
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true, implementation_allowed: false });
|
|
509
517
|
console.log(`Mock research done: ${id}`);
|
|
510
518
|
console.log(`Gate: ${gate.passed ? 'passed' : 'blocked'}`);
|
|
511
519
|
return;
|
|
512
520
|
}
|
|
513
521
|
const codex = await getCodexInfo();
|
|
514
522
|
if (!codex.bin) {
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
523
|
+
const blocker = {
|
|
524
|
+
schema_version: 1,
|
|
525
|
+
mission_id: id,
|
|
526
|
+
ts: nowIso(),
|
|
527
|
+
phase: 'RESEARCH_BLOCKED_REAL_RUN_REQUIRED',
|
|
528
|
+
reason: 'Codex CLI not found; normal Research cannot fall back to mock output.',
|
|
529
|
+
required_action: 'Install/configure the Codex CLI or set SKS_CODEX_BIN to a valid executable, then rerun sks research run without --mock.',
|
|
530
|
+
mock_policy: '--mock is allowed only for selftests and dry harness checks.',
|
|
531
|
+
implementation_allowed: false
|
|
532
|
+
};
|
|
533
|
+
await writeJsonAtomic(path.join(dir, 'research-blocker.json'), blocker);
|
|
534
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: blocker.ts, type: 'research.blocked.real_run_required', reason: blocker.reason, blocker: 'research-blocker.json' });
|
|
535
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_BLOCKED_REAL_RUN_REQUIRED', questions_allowed: true, implementation_allowed: false, research_real_run_required: true, blocker: 'research-blocker.json' });
|
|
536
|
+
console.error('Research cannot run real sources: Codex CLI not found.');
|
|
537
|
+
console.error('Mock fallback is disabled for normal Research. Use --mock only for selftests, or install/configure Codex CLI/SKS_CODEX_BIN.');
|
|
538
|
+
process.exitCode = 2;
|
|
519
539
|
return;
|
|
520
540
|
}
|
|
521
541
|
let last = '';
|
|
522
542
|
for (let cycle = 1; cycle <= maxCycles; cycle++) {
|
|
523
543
|
const cycleDir = path.join(dir, 'research', `cycle-${cycle}`);
|
|
524
544
|
const outputFile = path.join(cycleDir, 'final.md');
|
|
525
|
-
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle });
|
|
545
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle, timeoutMinutes: cycleTimeoutMinutes });
|
|
526
546
|
const prompt = buildResearchPrompt({ id, mission, plan, cycle, previous: last });
|
|
527
|
-
const result = await runCodexExec({ root, prompt, outputFile, json: true, profile: 'sks-research', logDir: cycleDir, timeoutMs:
|
|
547
|
+
const result = await runCodexExec({ root, prompt, outputFile, json: true, profile: 'sks-research', logDir: cycleDir, timeoutMs: cycleTimeoutMs });
|
|
528
548
|
await writeJsonAtomic(path.join(cycleDir, 'process.json'), { code: result.code, stdout_tail: result.stdout, stderr_tail: result.stderr, stdout_bytes: result.stdoutBytes, stderr_bytes: result.stderrBytes, truncated: result.truncated, timed_out: result.timedOut });
|
|
529
549
|
last = await safeReadText(outputFile, result.stdout || result.stderr || '');
|
|
530
550
|
if (containsUserQuestion(last)) {
|
|
@@ -534,7 +554,7 @@ async function researchRun(args) {
|
|
|
534
554
|
}
|
|
535
555
|
const gate = await evaluateResearchGate(dir);
|
|
536
556
|
if (gate.passed) {
|
|
537
|
-
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_DONE', questions_allowed: true });
|
|
557
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_DONE', questions_allowed: true, implementation_allowed: false });
|
|
538
558
|
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.done', cycle });
|
|
539
559
|
await enforceRetention(root).catch(() => {});
|
|
540
560
|
console.log(`Research done: ${id}`);
|
|
@@ -542,7 +562,7 @@ async function researchRun(args) {
|
|
|
542
562
|
}
|
|
543
563
|
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.continue', cycle, reasons: gate.reasons });
|
|
544
564
|
}
|
|
545
|
-
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true });
|
|
565
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true, implementation_allowed: false });
|
|
546
566
|
console.log(`Research paused after max cycles: ${id}`);
|
|
547
567
|
}
|
|
548
568
|
|
|
@@ -558,19 +578,29 @@ async function researchStatus(args) {
|
|
|
558
578
|
const scoutLedger = await readJson(path.join(dir, 'scout-ledger.json'), null);
|
|
559
579
|
const debateLedger = await readJson(path.join(dir, 'debate-ledger.json'), null);
|
|
560
580
|
const falsificationLedger = await readJson(path.join(dir, 'falsification-ledger.json'), null);
|
|
581
|
+
const sourceSkillText = await readText(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), '');
|
|
582
|
+
const geniusSummaryText = await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '');
|
|
561
583
|
const paperText = await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '');
|
|
562
584
|
const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
|
|
585
|
+
const sourceLayerRows = Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : [];
|
|
586
|
+
const sourceLayersCovered = sourceLayerRows.filter((layer) => layer.status === 'covered' && ((Array.isArray(layer.source_ids) && layer.source_ids.length) || (Array.isArray(layer.counterevidence_ids) && layer.counterevidence_ids.length))).length;
|
|
563
587
|
console.log(JSON.stringify({
|
|
564
588
|
mission,
|
|
565
589
|
state,
|
|
566
590
|
gate,
|
|
567
591
|
novelty_entries: ledger?.entries?.length ?? null,
|
|
568
592
|
source_entries: sourceLedger?.sources?.length ?? null,
|
|
593
|
+
source_layers_required: sourceLayerRows.length || gate?.metrics?.source_layers_required || gate?.source_layers_required || null,
|
|
594
|
+
source_layers_covered: gate?.metrics?.source_layers_covered ?? gate?.source_layers_covered ?? (sourceLayerRows.length ? sourceLayersCovered : null),
|
|
595
|
+
triangulation_checks: sourceLedger?.triangulation?.cross_layer_checks?.length ?? gate?.metrics?.triangulation_checks ?? gate?.triangulation_checks ?? null,
|
|
596
|
+
genius_opinion_summaries: gate?.metrics?.genius_opinion_summaries ?? gate?.genius_opinion_summaries ?? (geniusSummaryText.trim() ? countGeniusOpinionSummaries(geniusSummaryText) : null),
|
|
569
597
|
counterevidence_sources: sourceLedger?.counterevidence_sources?.length ?? null,
|
|
570
598
|
xhigh_scouts: scoutRows.length ? scoutRows.filter((scout) => scout.effort === 'xhigh').length : null,
|
|
571
599
|
eureka_moments: scoutRows.length ? scoutRows.filter((scout) => scout.eureka?.exclamation === 'Eureka!' && String(scout.eureka?.idea || '').trim()).length : null,
|
|
572
600
|
scout_findings: scoutRows.length ? scoutRows.reduce((sum, scout) => sum + (Array.isArray(scout.findings) ? scout.findings.length : 0), 0) : null,
|
|
573
601
|
debate_exchanges: debateLedger?.exchanges?.length ?? null,
|
|
602
|
+
research_source_skill_present: Boolean(sourceSkillText.trim()),
|
|
603
|
+
genius_opinion_summary_present: Boolean(geniusSummaryText.trim()),
|
|
574
604
|
paper_present: Boolean(paperText.trim()),
|
|
575
605
|
paper_sections: countResearchPaperSections(paperText),
|
|
576
606
|
falsification_cases: falsificationLedger?.cases?.length ?? null
|
|
@@ -625,11 +655,19 @@ async function safeReadText(file, fallback = '') {
|
|
|
625
655
|
try { return await fsp.readFile(file, 'utf8'); } catch { return fallback; }
|
|
626
656
|
}
|
|
627
657
|
|
|
628
|
-
function
|
|
629
|
-
const i = args.indexOf(
|
|
658
|
+
function readBoundedIntegerFlag(args, name, fallback, min, max) {
|
|
659
|
+
const i = args.indexOf(name);
|
|
630
660
|
const raw = i >= 0 && args[i + 1] ? Number(args[i + 1]) : Number(fallback);
|
|
631
|
-
if (!Number.isFinite(raw)) return Math.max(
|
|
632
|
-
return Math.max(
|
|
661
|
+
if (!Number.isFinite(raw)) return Math.max(min, Number.parseInt(fallback, 10) || min);
|
|
662
|
+
return Math.max(min, Math.min(max, Math.floor(raw)));
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
function readMaxCycles(args, fallback) {
|
|
666
|
+
return readBoundedIntegerFlag(args, '--max-cycles', fallback, 1, 50);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
function readResearchCycleTimeoutMinutes(args) {
|
|
670
|
+
return readBoundedIntegerFlag(args, '--cycle-timeout-minutes', RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES, RESEARCH_MIN_CYCLE_TIMEOUT_MINUTES, RESEARCH_MAX_CYCLE_TIMEOUT_MINUTES);
|
|
633
671
|
}
|
|
634
672
|
|
|
635
673
|
export async function goalCommand(sub, args) {
|
|
@@ -1580,7 +1618,7 @@ export async function statsCommand(args) {
|
|
|
1580
1618
|
|
|
1581
1619
|
function positionalArgs(args = []) {
|
|
1582
1620
|
const out = [];
|
|
1583
|
-
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--intent', '--changed', '--route', '--skills', '--prompt-signature']);
|
|
1621
|
+
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--cycle-timeout-minutes', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--intent', '--changed', '--route', '--skills', '--prompt-signature']);
|
|
1584
1622
|
for (let i = 0; i < args.length; i++) {
|
|
1585
1623
|
const arg = String(args[i]);
|
|
1586
1624
|
if (valueFlags.has(arg)) {
|
package/src/core/fsx.mjs
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
|
|
8
|
-
export const PACKAGE_VERSION = '0.7.
|
|
8
|
+
export const PACKAGE_VERSION = '0.7.72';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
|
|
@@ -7,7 +7,7 @@ import { checkHarnessModification, harnessGuardBlockReason } from './harness-gua
|
|
|
7
7
|
import { activeRouteContext, evaluateStop, prepareRoute, promptPipelineContext as routePipelineContext, recordContext7Evidence, recordSubagentEvidence, routePrompt } from './pipeline.mjs';
|
|
8
8
|
import { classifyToolError } from './evaluation.mjs';
|
|
9
9
|
import { REQUIRED_CODEX_MODEL, isForbiddenCodexModel } from './codex-model-guard.mjs';
|
|
10
|
-
import { stripVisibleDecisionAnswerBlocks } from './routes.mjs';
|
|
10
|
+
import { dollarCommand, stripVisibleDecisionAnswerBlocks } from './routes.mjs';
|
|
11
11
|
|
|
12
12
|
const TEAM_DIGEST_MAX_EVENTS = 4;
|
|
13
13
|
const TEAM_DIGEST_MESSAGE_CHARS = 180;
|
|
@@ -77,13 +77,6 @@ function toolFailed(payload = {}) {
|
|
|
77
77
|
return false;
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
-
function dollarCommand(prompt) {
|
|
81
|
-
const text = String(prompt || '').trim();
|
|
82
|
-
const match = text.match(/^\$([A-Za-z][A-Za-z0-9_-]*)(?:\s|:|$)/)
|
|
83
|
-
|| text.match(/^\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)(?:\s|:|$)/);
|
|
84
|
-
return match ? match[1].toUpperCase() : null;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
80
|
function looksLikeUpdateDecline(prompt) {
|
|
88
81
|
return /^(no|nope|skip|later|not now|don't|dont|아니|아니요|안해|안 함|나중에|건너뛰|스킵)/i.test(String(prompt || '').trim());
|
|
89
82
|
}
|
package/src/core/init.mjs
CHANGED
|
@@ -815,7 +815,7 @@ export async function installSkills(root) {
|
|
|
815
815
|
'computer-use-fast': `---\nname: computer-use-fast\ndescription: Alias for the maximum-speed $Computer-Use/$CU Codex Computer Use lane.\n---\n\nUse the same rules as computer-use: skip Team debate, QA-LOOP clarification, upfront TriWiki refresh, Context7, subagents, and reflection unless explicitly requested. Use Codex Computer Use directly; never substitute Playwright, Chrome MCP, Browser Use, Selenium, Puppeteer, or other browser automation for UI/browser evidence. At the end only, refresh/pack TriWiki, validate it, then provide a concise completion summary plus Honest Mode.\n`,
|
|
816
816
|
'cu': `---\nname: cu\ndescription: Short alias for the maximum-speed $Computer-Use Codex Computer Use lane.\n---\n\nUse the same rules as computer-use. This is a speed lane for focused UI/browser/visual tasks that require Codex Computer Use evidence, with TriWiki refresh/validate and Honest Mode deferred to final closeout.\n`,
|
|
817
817
|
'goal': `---\nname: goal\ndescription: Fast $Goal/$goal bridge overlay for Codex native persisted /goal workflows.\n---\n\nUse when the user invokes $Goal/$goal or asks to persist a workflow with Codex native /goal continuation. Prepare with sks goal create or the $Goal route, write only the lightweight bridge artifacts, then use native Codex /goal create, pause, resume, and clear controls where available. Goal does not replace Team, QA, DB, or other SKS execution routes; continue implementation through the selected route and use Context7 only when external API/library docs are involved. Do not recreate the old no-question loop.\n`,
|
|
818
|
-
'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Run the genius-lens scout council with Einstein/Feynman/Turing/von Neumann-inspired cognitive roles plus a skeptic lens; do not impersonate the historical people. Every Research scout must run with effort=xhigh, record one literal "Eureka!" idea, and participate in a vigorous evidence-bound debate before synthesis.
|
|
818
|
+
'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Run the genius-lens scout council with Einstein/Feynman/Turing/von Neumann-inspired cognitive roles plus a skeptic lens; do not impersonate the historical people. Every Research scout must run with effort=xhigh, record one literal "Eureka!" idea, and participate in a vigorous evidence-bound debate before synthesis. Create research-source-skill.md as a route-local Skill Creator artifact, then maximize layered public web/source search across papers, official/government or leading-institution data, standards/primary docs, current news, public discourse, developer/practitioner sources, and counterevidence before synthesis. Record research-source-skill.md, source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, research-paper.md, genius-opinion-summary.md, and research-gate.json. Context7 is optional and only needed when the research topic depends on external package/API/framework docs; do not use it as the default research evidence layer. Normal Research may take one or two hours when needed; favor real source collection, cross-layer comparison, falsification, and a concise paper manuscript over speed. Do not use --mock except for selftests or dry harness checks; if live source execution is unavailable, record a blocker and keep the gate unpassed. Do not use for ordinary code edits.\n`,
|
|
819
819
|
'autoresearch': `---\nname: autoresearch\ndescription: Dollar-command route for $AutoResearch or $autoresearch iterative experiment loops.\n---\n\nUse for $AutoResearch, iterative improvement, SEO/GEO, ranking, workflow, benchmark, or experiments. Define program, hypothesis, experiment, metric, keep/discard, falsification, next step, and Honest Mode. Load seo-geo-optimizer for README/npm/GitHub/schema/AI-search work.\n`,
|
|
820
820
|
'db': `---\nname: db\ndescription: Dollar-command route for $DB or $db database and Supabase safety checks.\n---\n\nUse when the user invokes $DB/$db or the task touches SQL, Supabase, Postgres, migrations, Prisma, Drizzle, Knex, MCP database tools, or production data. Run or follow sks db policy, sks db scan, sks db classify, and sks db check. Destructive database operations remain forbidden.\n`,
|
|
821
821
|
'mad-sks': `---\nname: mad-sks\ndescription: Explicit high-risk authorization modifier for $MAD-SKS scoped Supabase MCP DB permission widening.\n---\n\nUse only when the user explicitly invokes $MAD-SKS or top-level sks --mad. It can be combined with another route, such as $MAD-SKS $Team or $DB ... $MAD-SKS; in that case the other command remains the primary workflow and MAD-SKS is only the temporary permission grant. The widened permission applies only while the active mission gate is open, must be deactivated when the task ends, and opens live server work, Supabase MCP database writes, column/schema cleanup, direct execute SQL, migration application when required, and normal targeted DB writes. Keep only catastrophic safeguards: whole database/schema/table removal, truncate, all-row delete/update, reset, dangerous project/branch management, credential exfiltration, persistent security weakening, and unrequested fallback implementation remain blocked. Do not carry MAD-SKS permission into later prompts or routes. The permission profile is centralized in src/core/permission-gates.mjs so skill/hook/MCP-style gates share one decision function.\n`,
|
package/src/core/pipeline.mjs
CHANGED
|
@@ -921,7 +921,7 @@ async function prepareResearch(root, route, task, required) {
|
|
|
921
921
|
await writeResearchPlan(dir, task, {});
|
|
922
922
|
const pipelinePlan = await writePipelinePlan(dir, { missionId: id, route, task, required, ambiguity: { required: false, status: 'direct_route' } });
|
|
923
923
|
await setCurrent(root, routeState(id, route, 'RESEARCH_PREPARED', required, { prompt: task, pipeline_plan_ready: validatePipelinePlan(pipelinePlan).ok, pipeline_plan_path: PIPELINE_PLAN_ARTIFACT }));
|
|
924
|
-
return routeContext(route, id, task, required, 'Run sks research run latest, maximize
|
|
924
|
+
return routeContext(route, id, task, required, 'Run sks research run latest as a real long-running source-gathering pass, never an automatic mock fallback; create research-source-skill.md, maximize layered public source search, require every scout effort=xhigh plus one Eureka! idea, fill source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, research-paper.md, genius-opinion-summary.md, and pass research-gate.json.');
|
|
925
925
|
}
|
|
926
926
|
|
|
927
927
|
async function prepareAutoResearch(root, route, task, required) {
|
package/src/core/research.mjs
CHANGED
|
@@ -3,6 +3,8 @@ import { appendJsonlBounded, nowIso, readJson, readText, writeJsonAtomic, writeT
|
|
|
3
3
|
import { OUTCOME_RUBRIC } from './proof-field.mjs';
|
|
4
4
|
|
|
5
5
|
export const RESEARCH_PAPER_ARTIFACT = 'research-paper.md';
|
|
6
|
+
export const RESEARCH_SOURCE_SKILL_ARTIFACT = 'research-source-skill.md';
|
|
7
|
+
export const RESEARCH_GENIUS_SUMMARY_ARTIFACT = 'genius-opinion-summary.md';
|
|
6
8
|
export const RESEARCH_PAPER_SECTION_GROUPS = Object.freeze([
|
|
7
9
|
['abstract'],
|
|
8
10
|
['introduction'],
|
|
@@ -52,6 +54,67 @@ export const RESEARCH_SCOUT_COUNCIL = Object.freeze([
|
|
|
52
54
|
}
|
|
53
55
|
]);
|
|
54
56
|
|
|
57
|
+
export const RESEARCH_SOURCE_LAYERS = Object.freeze([
|
|
58
|
+
{
|
|
59
|
+
id: 'academic_literature',
|
|
60
|
+
label: 'Academic literature',
|
|
61
|
+
purpose: 'Find recent papers, preprints, formal reviews, citations, and open scholarly metadata before synthesis.',
|
|
62
|
+
evidence_role: 'formal_evidence',
|
|
63
|
+
examples: ['arXiv', 'Semantic Scholar', 'OpenAlex', 'Crossref', 'PubMed'],
|
|
64
|
+
query_templates: ['"<topic>" arxiv', '"<topic>" site:semanticscholar.org', '"<topic>" OpenAlex Crossref PubMed']
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
id: 'official_government_data',
|
|
68
|
+
label: 'Official government and leading-institution knowledge',
|
|
69
|
+
purpose: 'Ground claims in public datasets, policy papers, national statistics, and leading-country institutional sources.',
|
|
70
|
+
evidence_role: 'authoritative_baseline',
|
|
71
|
+
examples: ['World Bank', 'OECD', 'Eurostat', 'data.gov', 'data.gov.uk', 'NIST'],
|
|
72
|
+
query_templates: ['"<topic>" site:worldbank.org OR site:oecd.org', '"<topic>" site:data.gov OR site:data.gov.uk', '"<topic>" site:nist.gov']
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
id: 'standards_primary_docs',
|
|
76
|
+
label: 'Standards and primary documents',
|
|
77
|
+
purpose: 'Check primary specifications, standards, RFCs, policy originals, and official project documents before relying on summaries.',
|
|
78
|
+
evidence_role: 'primary_source',
|
|
79
|
+
examples: ['IETF RFCs', 'W3C', 'ISO abstracts', 'official standards bodies', 'project primary docs'],
|
|
80
|
+
query_templates: ['"<topic>" RFC standard specification', '"<topic>" W3C IETF NIST standard', '"<topic>" official specification']
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: 'news_current_events',
|
|
84
|
+
label: 'Current news and global reporting',
|
|
85
|
+
purpose: 'Capture recent events, public impact, and regional framing from reputable news and global news indices.',
|
|
86
|
+
evidence_role: 'recency_signal',
|
|
87
|
+
examples: ['GDELT', 'BBC', 'CNN', 'Reuters', 'AP', 'regional reputable outlets'],
|
|
88
|
+
query_templates: ['"<topic>" BBC CNN latest', '"<topic>" GDELT news', '"<topic>" Reuters AP analysis']
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
id: 'public_discourse',
|
|
92
|
+
label: 'Public discourse',
|
|
93
|
+
purpose: 'Sample public practitioner and community discourse without treating popularity as truth.',
|
|
94
|
+
evidence_role: 'sentiment_and_edge_cases',
|
|
95
|
+
examples: ['X/Twitter recent search', 'Reddit', 'Hacker News', 'public forums'],
|
|
96
|
+
query_templates: ['"<topic>" site:x.com OR site:twitter.com', '"<topic>" site:reddit.com', '"<topic>" "Hacker News"']
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
id: 'developer_practitioner',
|
|
100
|
+
label: 'Developer and practitioner knowledge',
|
|
101
|
+
purpose: 'Find implementation pitfalls, developer questions, bug reports, and operational lessons.',
|
|
102
|
+
evidence_role: 'practice_feedback',
|
|
103
|
+
examples: ['Stack Overflow', 'Stack Exchange', 'GitHub issues', 'release notes', 'engineering blogs'],
|
|
104
|
+
query_templates: ['"<topic>" site:stackoverflow.com', '"<topic>" site:stackexchange.com', '"<topic>" site:github.com issues']
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
id: 'counterevidence_factcheck',
|
|
108
|
+
label: 'Counterevidence and fact-checking',
|
|
109
|
+
purpose: 'Actively search for failures, critiques, null results, retractions, fact checks, and source conflicts.',
|
|
110
|
+
evidence_role: 'falsification',
|
|
111
|
+
examples: ['Google Fact Check Tools', 'Retraction Watch', 'critical reviews', 'benchmark failures', 'negative results'],
|
|
112
|
+
query_templates: ['"<topic>" critique failure limitation', '"<topic>" fact check retraction', '"<topic>" counterevidence null result']
|
|
113
|
+
}
|
|
114
|
+
]);
|
|
115
|
+
|
|
116
|
+
export const RESEARCH_SOURCE_LAYER_IDS = Object.freeze(RESEARCH_SOURCE_LAYERS.map((layer) => layer.id));
|
|
117
|
+
|
|
55
118
|
export function createResearchPlan(prompt, opts = {}) {
|
|
56
119
|
const depth = opts.depth || 'frontier';
|
|
57
120
|
return {
|
|
@@ -61,6 +124,11 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
61
124
|
created_at: nowIso(),
|
|
62
125
|
methodology: 'genius-scout-council-frontier-discovery-loop',
|
|
63
126
|
objective: 'Find the shortest useful mechanism that can be falsified or applied, grounded in maximum available source retrieval rather than broad summary.',
|
|
127
|
+
execution_policy: {
|
|
128
|
+
normal_run: 'real_long_running_research',
|
|
129
|
+
default_cycle_timeout_minutes: 120,
|
|
130
|
+
mock_policy: '--mock is for selftests and dry harness checks only; normal Research must block rather than silently substitute mock output.'
|
|
131
|
+
},
|
|
64
132
|
outcome_rubric: OUTCOME_RUBRIC,
|
|
65
133
|
research_council: {
|
|
66
134
|
mode: 'persona_inspired_scouts_not_impersonation',
|
|
@@ -88,8 +156,8 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
88
156
|
]
|
|
89
157
|
},
|
|
90
158
|
web_research_policy: {
|
|
91
|
-
mode: '
|
|
92
|
-
requirement: 'Use
|
|
159
|
+
mode: 'layered_source_retrieval_and_triangulation',
|
|
160
|
+
requirement: 'Use every safely available public web/source route before synthesis, separated into source layers so the final claim is not dominated by one corpus or platform.',
|
|
93
161
|
query_sets: [
|
|
94
162
|
'first-principles and theory sources',
|
|
95
163
|
'plain-language explanations and empirical examples',
|
|
@@ -97,10 +165,18 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
97
165
|
'systems, strategy, scaling, or deployment evidence',
|
|
98
166
|
'counterevidence, failures, critiques, and null results'
|
|
99
167
|
],
|
|
168
|
+
source_layers: RESEARCH_SOURCE_LAYERS,
|
|
100
169
|
source_priority: ['primary_sources', 'official_docs_or_standards', 'peer_reviewed_or_archival_sources', 'reputable_recent_sources', 'credible_counterevidence'],
|
|
170
|
+
skill_creator: {
|
|
171
|
+
artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
|
|
172
|
+
status: 'route_local_candidate',
|
|
173
|
+
rule: 'Before source gathering, create a route-local source collection skill that names the selected layers, query families, source-quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during a research run.'
|
|
174
|
+
},
|
|
101
175
|
citation_rules: [
|
|
102
176
|
'Every factual claim in the report must cite source-ledger ids or local project evidence.',
|
|
103
177
|
'The final research paper must include references tied to source-ledger ids.',
|
|
178
|
+
'Every required source layer must have at least one cited source or an explicit blocker; blockers keep the research gate unpassed.',
|
|
179
|
+
'The source-ledger must include at least one cross-layer triangulation check comparing formal, current, discourse, practitioner, official, and counterevidence sources.',
|
|
104
180
|
'Every novelty-ledger entry must cite at least one evidence source and at least one falsifier.',
|
|
105
181
|
'If live web search is unavailable, record the blocker in source-ledger.json and keep research-gate.json unpassed.'
|
|
106
182
|
],
|
|
@@ -108,7 +184,9 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
108
184
|
independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
|
|
109
185
|
web_search_passes: 1,
|
|
110
186
|
source_entries: 1,
|
|
111
|
-
|
|
187
|
+
source_layers: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
188
|
+
counterevidence_sources: 1,
|
|
189
|
+
triangulation_checks: 1
|
|
112
190
|
}
|
|
113
191
|
},
|
|
114
192
|
rules: [
|
|
@@ -117,25 +195,31 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
117
195
|
'Run the genius-lens scout council independently before synthesis.',
|
|
118
196
|
'Every Research scout must run at reasoning_effort=xhigh, record one literal "Eureka!" idea, and participate in the debate.',
|
|
119
197
|
'The scout council must debate vigorously but stay evidence-bound; record challenges and responses in debate-ledger.json.',
|
|
120
|
-
'Maximize safe web/source search and record queries,
|
|
198
|
+
'Maximize safe web/source search as layered source retrieval and record queries, source layers, citations, quality notes, triangulation checks, and blockers in source-ledger.json.',
|
|
199
|
+
`Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis; do not edit generated .agents/skills during the research run.`,
|
|
121
200
|
'Actively seek disconfirming evidence before synthesis.',
|
|
122
201
|
'Turn the surviving research result into research-paper.md with paper-style sections and references.',
|
|
202
|
+
`End every run with ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}, summarizing each genius-lens scout's final opinion, strongest evidence, disagreement, and changed mind.`,
|
|
123
203
|
'Keep unsupported source-free claims as hypotheses only.',
|
|
124
|
-
'Prefer the smallest testable mechanism or implementation probe
|
|
204
|
+
'Prefer the smallest testable mechanism or implementation probe, but do not stop source gathering early for speed when the research question needs a longer pass.',
|
|
125
205
|
'Do not ask the user mid-run; resolve scope using the research plan and safety policy.'
|
|
126
206
|
],
|
|
127
207
|
phases: [
|
|
128
208
|
{ id: 'R0_FRAME', goal: 'Frame the target outcome, constraints, and what would make the idea useful.' },
|
|
129
|
-
{ id: '
|
|
130
|
-
{ id: '
|
|
131
|
-
{ id: '
|
|
132
|
-
{ id: '
|
|
133
|
-
{ id: '
|
|
134
|
-
{ id: '
|
|
209
|
+
{ id: 'R1_SOURCE_SKILL', goal: `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with layer-specific search routes, quality fields, and blockers before source gathering.` },
|
|
210
|
+
{ id: 'R2_SOURCE_SEARCH', goal: 'Run layered web/source retrieval across papers, official data, standards, news, public discourse, developer knowledge, and counterevidence.' },
|
|
211
|
+
{ id: 'R3_EUREKA', goal: 'Have each xhigh genius-lens scout shout Eureka! and record one non-obvious idea with source ids.' },
|
|
212
|
+
{ id: 'R4_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding.' },
|
|
213
|
+
{ id: 'R5_FALSIFY', goal: 'Attack each mechanism with counterexamples, missing evidence, source conflicts, and failure modes.' },
|
|
214
|
+
{ id: 'R6_APPLY', goal: 'Keep the smallest surviving mechanism, define a cheap probe, and write all ledgers.' },
|
|
215
|
+
{ id: 'R7_PAPER', goal: 'Convert the final research result into a concise paper manuscript with abstract, method, findings, limitations, and references.' },
|
|
216
|
+
{ id: 'R8_GENIUS_SUMMARY', goal: `Write ${RESEARCH_GENIUS_SUMMARY_ARTIFACT} so the final answer can report every scout lens opinion and the council consensus.` }
|
|
135
217
|
],
|
|
136
218
|
required_artifacts: [
|
|
137
219
|
'research-report.md',
|
|
138
220
|
RESEARCH_PAPER_ARTIFACT,
|
|
221
|
+
RESEARCH_GENIUS_SUMMARY_ARTIFACT,
|
|
222
|
+
RESEARCH_SOURCE_SKILL_ARTIFACT,
|
|
139
223
|
'source-ledger.json',
|
|
140
224
|
'scout-ledger.json',
|
|
141
225
|
'debate-ledger.json',
|
|
@@ -153,6 +237,10 @@ export function researchPlanMarkdown(plan) {
|
|
|
153
237
|
lines.push(`Prompt: ${plan.prompt}`);
|
|
154
238
|
lines.push(`Depth: ${plan.depth}`);
|
|
155
239
|
lines.push(`Methodology: ${plan.methodology}`);
|
|
240
|
+
if (plan.execution_policy) {
|
|
241
|
+
lines.push(`Execution: ${plan.execution_policy.normal_run}; default cycle timeout ${plan.execution_policy.default_cycle_timeout_minutes} minutes`);
|
|
242
|
+
lines.push(`Mock policy: ${plan.execution_policy.mock_policy}`);
|
|
243
|
+
}
|
|
156
244
|
lines.push('');
|
|
157
245
|
lines.push('## Rules');
|
|
158
246
|
for (const rule of plan.rules) lines.push(`- ${rule}`);
|
|
@@ -168,6 +256,10 @@ export function researchPlanMarkdown(plan) {
|
|
|
168
256
|
lines.push(`Mode: ${plan.web_research_policy.mode}`);
|
|
169
257
|
lines.push(`Requirement: ${plan.web_research_policy.requirement}`);
|
|
170
258
|
for (const querySet of plan.web_research_policy.query_sets || []) lines.push(`- query set: ${querySet}`);
|
|
259
|
+
if (plan.web_research_policy.skill_creator?.artifact) lines.push(`- source skill artifact: ${plan.web_research_policy.skill_creator.artifact}`);
|
|
260
|
+
for (const layer of plan.web_research_policy.source_layers || []) {
|
|
261
|
+
lines.push(`- layer ${layer.id}: ${layer.purpose}`);
|
|
262
|
+
}
|
|
171
263
|
lines.push('');
|
|
172
264
|
}
|
|
173
265
|
lines.push('## Outcome Rubric');
|
|
@@ -182,15 +274,56 @@ export function researchPlanMarkdown(plan) {
|
|
|
182
274
|
return `${lines.join('\n')}\n`;
|
|
183
275
|
}
|
|
184
276
|
|
|
277
|
+
export function researchSourceSkillMarkdown(plan) {
|
|
278
|
+
const layers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
|
|
279
|
+
const lines = [];
|
|
280
|
+
lines.push('# Research Source Layer Skill');
|
|
281
|
+
lines.push('');
|
|
282
|
+
lines.push('Status: route-local candidate skill. Use it inside this research mission before scout synthesis. Do not install or edit generated .agents/skills from this artifact.');
|
|
283
|
+
lines.push('Real-run policy: collect live sources for as long as needed within the mission timeout; mock or fixture evidence is valid only for explicit --mock selftests.');
|
|
284
|
+
lines.push('');
|
|
285
|
+
lines.push('## Trigger');
|
|
286
|
+
lines.push('- Any `$Research` run that must collect broad public evidence before creative synthesis, debate, falsification, or paper writing.');
|
|
287
|
+
lines.push('');
|
|
288
|
+
lines.push('## Source Layers');
|
|
289
|
+
for (const layer of layers) {
|
|
290
|
+
lines.push(`- ${layer.id}: ${layer.purpose}`);
|
|
291
|
+
lines.push(` Examples: ${(layer.examples || []).join(', ')}`);
|
|
292
|
+
lines.push(` Query templates: ${(layer.query_templates || []).join(' | ')}`);
|
|
293
|
+
}
|
|
294
|
+
lines.push('');
|
|
295
|
+
lines.push('## Output Contract');
|
|
296
|
+
lines.push('- Fill source-ledger.json with `source_layers`, `sources[].layer`, `counterevidence_sources[].layer`, `citation_coverage`, `triangulation.cross_layer_checks`, and `blockers`.');
|
|
297
|
+
lines.push('- Each source entry should record title, locator/URL, publisher or author when known, published_at when known, accessed_at, layer, reliability, credibility, stance, supports or undermines, and notes.');
|
|
298
|
+
lines.push('- Public discourse sources such as X/Twitter or Reddit are signals and edge cases, not truth. They must be triangulated with formal, official, practitioner, or counterevidence layers.');
|
|
299
|
+
lines.push('- If a layer cannot be searched with the available runtime or credentials, record the blocker and keep research-gate.json unpassed.');
|
|
300
|
+
lines.push('');
|
|
301
|
+
lines.push('## Debate Use');
|
|
302
|
+
lines.push('- Every scout must cite source-ledger ids in findings and Eureka ideas.');
|
|
303
|
+
lines.push('- The skeptic lens must challenge the strongest claim using counterevidence or source-quality downgrades.');
|
|
304
|
+
lines.push('- Synthesis keeps only claims that survive cross-layer triangulation and falsification.');
|
|
305
|
+
lines.push('');
|
|
306
|
+
return `${lines.join('\n')}\n`;
|
|
307
|
+
}
|
|
308
|
+
|
|
185
309
|
export function countResearchPaperSections(text = '') {
|
|
186
310
|
const headings = String(text || '').toLowerCase().split(/\n/).filter((line) => /^#{1,3}\s+/.test(line));
|
|
187
311
|
return RESEARCH_PAPER_SECTION_GROUPS.filter((group) => headings.some((heading) => group.some((term) => heading.includes(term)))).length;
|
|
188
312
|
}
|
|
189
313
|
|
|
314
|
+
export function countGeniusOpinionSummaries(text = '') {
|
|
315
|
+
const lower = String(text || '').toLowerCase();
|
|
316
|
+
return RESEARCH_SCOUT_COUNCIL.filter((scout) => {
|
|
317
|
+
const label = String(scout.label || '').toLowerCase();
|
|
318
|
+
return lower.includes(String(scout.id || '').toLowerCase()) || (label && lower.includes(label));
|
|
319
|
+
}).length;
|
|
320
|
+
}
|
|
321
|
+
|
|
190
322
|
export async function writeResearchPlan(dir, prompt, opts = {}) {
|
|
191
323
|
const plan = createResearchPlan(prompt, opts);
|
|
192
324
|
await writeJsonAtomic(path.join(dir, 'research-plan.json'), plan);
|
|
193
325
|
await writeTextAtomic(path.join(dir, 'research-plan.md'), researchPlanMarkdown(plan));
|
|
326
|
+
await writeTextAtomic(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), researchSourceSkillMarkdown(plan));
|
|
194
327
|
await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), {
|
|
195
328
|
schema_version: 1,
|
|
196
329
|
entries: [],
|
|
@@ -210,14 +343,46 @@ export async function writeResearchPlan(dir, prompt, opts = {}) {
|
|
|
210
343
|
}
|
|
211
344
|
|
|
212
345
|
export function defaultSourceLedger(plan = null) {
|
|
346
|
+
const sourceLayers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
|
|
213
347
|
return {
|
|
214
348
|
schema_version: 1,
|
|
215
|
-
policy: plan?.web_research_policy?.mode || '
|
|
349
|
+
policy: plan?.web_research_policy?.mode || 'layered_source_retrieval_and_triangulation',
|
|
216
350
|
created_at: nowIso(),
|
|
351
|
+
source_layer_skill: {
|
|
352
|
+
artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
|
|
353
|
+
status: 'planned'
|
|
354
|
+
},
|
|
217
355
|
web_search_passes: 0,
|
|
356
|
+
source_layers: sourceLayers.map((layer) => ({
|
|
357
|
+
id: layer.id,
|
|
358
|
+
label: layer.label,
|
|
359
|
+
required: true,
|
|
360
|
+
status: 'pending',
|
|
361
|
+
evidence_role: layer.evidence_role,
|
|
362
|
+
query_templates: layer.query_templates || [],
|
|
363
|
+
source_ids: [],
|
|
364
|
+
counterevidence_ids: [],
|
|
365
|
+
blocker: null,
|
|
366
|
+
notes: ''
|
|
367
|
+
})),
|
|
368
|
+
layer_coverage: {
|
|
369
|
+
required: sourceLayers.map((layer) => layer.id),
|
|
370
|
+
covered: [],
|
|
371
|
+
missing: sourceLayers.map((layer) => layer.id),
|
|
372
|
+
notes: []
|
|
373
|
+
},
|
|
218
374
|
queries: [],
|
|
219
375
|
sources: [],
|
|
220
376
|
counterevidence_sources: [],
|
|
377
|
+
triangulation: {
|
|
378
|
+
cross_layer_checks: [],
|
|
379
|
+
conflicts: [],
|
|
380
|
+
synthesis_notes: []
|
|
381
|
+
},
|
|
382
|
+
quality_model: {
|
|
383
|
+
reporting_basis: 'Record enough source metadata to make search reproducible, including query, layer, locator, publisher or author, publication date when known, accessed_at, reliability, credibility, stance, and cited claim ids.',
|
|
384
|
+
source_quality_fields: ['layer', 'kind', 'title', 'locator', 'publisher_or_author', 'published_at', 'accessed_at', 'reliability', 'credibility', 'stance', 'supports', 'undermines']
|
|
385
|
+
},
|
|
221
386
|
citation_coverage: {
|
|
222
387
|
all_key_claims_cited: false,
|
|
223
388
|
notes: []
|
|
@@ -282,20 +447,53 @@ export function defaultFalsificationLedger() {
|
|
|
282
447
|
};
|
|
283
448
|
}
|
|
284
449
|
|
|
450
|
+
function sourceLayerIdsForPlan(plan = null) {
|
|
451
|
+
const layers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
|
|
452
|
+
return layers.map((layer) => layer.id).filter(Boolean);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
function sourceLayerCoverageStats(sourceLedger = null, requiredLayerIds = RESEARCH_SOURCE_LAYER_IDS) {
|
|
456
|
+
const covered = new Set();
|
|
457
|
+
const sourceRows = [
|
|
458
|
+
...(Array.isArray(sourceLedger?.sources) ? sourceLedger.sources : []),
|
|
459
|
+
...(Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources : [])
|
|
460
|
+
];
|
|
461
|
+
for (const source of sourceRows) {
|
|
462
|
+
const layer = source?.layer || source?.layer_id || source?.source_layer;
|
|
463
|
+
if (requiredLayerIds.includes(layer)) covered.add(layer);
|
|
464
|
+
}
|
|
465
|
+
for (const layer of Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : []) {
|
|
466
|
+
const id = layer?.id || layer?.layer;
|
|
467
|
+
const sourceIds = [
|
|
468
|
+
...(Array.isArray(layer?.source_ids) ? layer.source_ids : []),
|
|
469
|
+
...(Array.isArray(layer?.counterevidence_ids) ? layer.counterevidence_ids : [])
|
|
470
|
+
];
|
|
471
|
+
if (requiredLayerIds.includes(id) && layer?.status === 'covered' && sourceIds.length > 0) covered.add(id);
|
|
472
|
+
}
|
|
473
|
+
const missing = requiredLayerIds.filter((id) => !covered.has(id));
|
|
474
|
+
return { covered: [...covered], missing, required: [...requiredLayerIds] };
|
|
475
|
+
}
|
|
476
|
+
|
|
285
477
|
export function defaultResearchGate() {
|
|
286
478
|
return {
|
|
287
479
|
passed: false,
|
|
288
480
|
report_present: false,
|
|
289
481
|
paper_present: false,
|
|
290
482
|
paper_sections: 0,
|
|
483
|
+
genius_opinion_summary_present: false,
|
|
484
|
+
genius_opinion_summaries: 0,
|
|
485
|
+
research_source_skill_present: false,
|
|
291
486
|
source_ledger_present: false,
|
|
292
487
|
scout_ledger_present: false,
|
|
293
488
|
debate_ledger_present: false,
|
|
294
489
|
novelty_ledger_present: false,
|
|
295
490
|
falsification_ledger_present: false,
|
|
296
|
-
web_search_policy: '
|
|
491
|
+
web_search_policy: 'layered_source_retrieval_and_triangulation',
|
|
297
492
|
web_search_passes: 0,
|
|
298
493
|
source_entries: 0,
|
|
494
|
+
source_layers_required: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
495
|
+
source_layers_covered: 0,
|
|
496
|
+
triangulation_checks: 0,
|
|
299
497
|
independent_scouts: 0,
|
|
300
498
|
xhigh_scouts: 0,
|
|
301
499
|
eureka_moments: 0,
|
|
@@ -318,9 +516,13 @@ export function defaultResearchGate() {
|
|
|
318
516
|
|
|
319
517
|
export async function evaluateResearchGate(dir) {
|
|
320
518
|
const gate = await readJson(path.join(dir, 'research-gate.json'), defaultResearchGate());
|
|
519
|
+
const plan = await readJson(path.join(dir, 'research-plan.json'), null);
|
|
321
520
|
const reportPresent = await exists(path.join(dir, 'research-report.md'));
|
|
322
521
|
const paperPresent = await exists(path.join(dir, RESEARCH_PAPER_ARTIFACT));
|
|
323
522
|
const paperSections = paperPresent ? countResearchPaperSections(await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '')) : 0;
|
|
523
|
+
const geniusSummaryPresent = await exists(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT));
|
|
524
|
+
const geniusSummaryCount = geniusSummaryPresent ? countGeniusOpinionSummaries(await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '')) : 0;
|
|
525
|
+
const sourceSkillPresent = await exists(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT));
|
|
324
526
|
const sourcePresent = await exists(path.join(dir, 'source-ledger.json'));
|
|
325
527
|
const scoutPresent = await exists(path.join(dir, 'scout-ledger.json'));
|
|
326
528
|
const debatePresent = await exists(path.join(dir, 'debate-ledger.json'));
|
|
@@ -333,6 +535,9 @@ export async function evaluateResearchGate(dir) {
|
|
|
333
535
|
const sourceEntries = Array.isArray(sourceLedger?.sources) ? sourceLedger.sources.length : 0;
|
|
334
536
|
const counterEvidenceEntries = Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources.length : 0;
|
|
335
537
|
const webSearchPasses = Math.max(Number(gate.web_search_passes || 0), Number(sourceLedger?.web_search_passes || 0));
|
|
538
|
+
const requiredSourceLayers = sourceLayerIdsForPlan(plan);
|
|
539
|
+
const sourceLayerStats = sourceLayerCoverageStats(sourceLedger, requiredSourceLayers);
|
|
540
|
+
const triangulationChecks = Array.isArray(sourceLedger?.triangulation?.cross_layer_checks) ? sourceLedger.triangulation.cross_layer_checks.length : 0;
|
|
336
541
|
const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
|
|
337
542
|
const independentScouts = scoutRows.filter((scout) => Array.isArray(scout.findings) && scout.findings.length > 0).length;
|
|
338
543
|
const xhighScouts = scoutRows.filter((scout) => scout.effort === 'xhigh').length;
|
|
@@ -351,6 +556,9 @@ export async function evaluateResearchGate(dir) {
|
|
|
351
556
|
if (!reportPresent && gate.report_present !== true) reasons.push('research_report_missing');
|
|
352
557
|
if (!paperPresent) reasons.push('research_paper_missing');
|
|
353
558
|
if (paperSections < RESEARCH_PAPER_SECTION_GROUPS.length) reasons.push('research_paper_sections_missing');
|
|
559
|
+
if (!geniusSummaryPresent && gate.genius_opinion_summary_present !== true) reasons.push('genius_opinion_summary_missing');
|
|
560
|
+
if (Math.max(Number(gate.genius_opinion_summaries || 0), geniusSummaryCount) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('genius_opinion_summary_incomplete');
|
|
561
|
+
if (!sourceSkillPresent && gate.research_source_skill_present !== true) reasons.push('research_source_skill_missing');
|
|
354
562
|
if (!sourcePresent && gate.source_ledger_present !== true) reasons.push('source_ledger_missing');
|
|
355
563
|
if (!scoutPresent && gate.scout_ledger_present !== true) reasons.push('scout_ledger_missing');
|
|
356
564
|
if (!debatePresent && gate.debate_ledger_present !== true) reasons.push('debate_ledger_missing');
|
|
@@ -358,6 +566,8 @@ export async function evaluateResearchGate(dir) {
|
|
|
358
566
|
if (!falsificationPresent && gate.falsification_ledger_present !== true) reasons.push('falsification_ledger_missing');
|
|
359
567
|
if (webSearchPasses < 1) reasons.push('web_search_pass_missing');
|
|
360
568
|
if (Math.max(Number(gate.source_entries || 0), sourceEntries) < 1) reasons.push('source_entry_missing');
|
|
569
|
+
if (Math.max(Number(gate.source_layers_covered || 0), sourceLayerStats.covered.length) < requiredSourceLayers.length) reasons.push('source_layer_coverage_missing');
|
|
570
|
+
if (Math.max(Number(gate.triangulation_checks || 0), triangulationChecks) < 1) reasons.push('cross_layer_triangulation_missing');
|
|
361
571
|
if (Math.max(Number(gate.independent_scouts || 0), independentScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('independent_scouts_missing');
|
|
362
572
|
if (Math.max(Number(gate.xhigh_scouts || 0), xhighScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('scout_effort_not_xhigh');
|
|
363
573
|
if (Math.max(Number(gate.eureka_moments || 0), eurekaMoments) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('eureka_missing');
|
|
@@ -380,7 +590,14 @@ export async function evaluateResearchGate(dir) {
|
|
|
380
590
|
metrics: {
|
|
381
591
|
web_search_passes: webSearchPasses,
|
|
382
592
|
paper_sections: Math.max(Number(gate.paper_sections || 0), paperSections),
|
|
593
|
+
genius_opinion_summary_present: geniusSummaryPresent || gate.genius_opinion_summary_present === true,
|
|
594
|
+
genius_opinion_summaries: Math.max(Number(gate.genius_opinion_summaries || 0), geniusSummaryCount),
|
|
595
|
+
research_source_skill_present: sourceSkillPresent || gate.research_source_skill_present === true,
|
|
383
596
|
source_entries: Math.max(Number(gate.source_entries || 0), sourceEntries),
|
|
597
|
+
source_layers_required: requiredSourceLayers.length,
|
|
598
|
+
source_layers_covered: Math.max(Number(gate.source_layers_covered || 0), sourceLayerStats.covered.length),
|
|
599
|
+
source_layers_missing: sourceLayerStats.missing,
|
|
600
|
+
triangulation_checks: Math.max(Number(gate.triangulation_checks || 0), triangulationChecks),
|
|
384
601
|
independent_scouts: Math.max(Number(gate.independent_scouts || 0), independentScouts),
|
|
385
602
|
xhigh_scouts: Math.max(Number(gate.xhigh_scouts || 0), xhighScouts),
|
|
386
603
|
eureka_moments: Math.max(Number(gate.eureka_moments || 0), eurekaMoments),
|
|
@@ -399,41 +616,89 @@ export async function evaluateResearchGate(dir) {
|
|
|
399
616
|
}
|
|
400
617
|
|
|
401
618
|
export async function writeMockResearchResult(dir, plan) {
|
|
619
|
+
const mockLayerSources = RESEARCH_SOURCE_LAYERS.map((layer, index) => ({
|
|
620
|
+
id: `mock-source-${index + 1}`,
|
|
621
|
+
layer: layer.id,
|
|
622
|
+
kind: 'selftest',
|
|
623
|
+
title: `Mock ${layer.label} coverage`,
|
|
624
|
+
locator: 'writeMockResearchResult',
|
|
625
|
+
accessed_at: nowIso(),
|
|
626
|
+
reliability: 'mock',
|
|
627
|
+
credibility: 'mock',
|
|
628
|
+
stance: layer.id === 'counterevidence_factcheck' ? 'undermines' : 'supports',
|
|
629
|
+
supports: layer.id === 'counterevidence_factcheck' ? [] : ['mock-insight-1'],
|
|
630
|
+
undermines: layer.id === 'counterevidence_factcheck' ? ['mock-insight-1'] : [],
|
|
631
|
+
notes: `Selftest fixture for the ${layer.id} source layer; no live web call is made in --mock mode.`
|
|
632
|
+
}));
|
|
402
633
|
const sourceLedger = {
|
|
403
634
|
schema_version: 1,
|
|
404
|
-
policy: '
|
|
635
|
+
policy: 'layered_source_retrieval_and_triangulation',
|
|
405
636
|
created_at: nowIso(),
|
|
406
637
|
mode: 'selftest_mock',
|
|
638
|
+
source_layer_skill: {
|
|
639
|
+
artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
|
|
640
|
+
status: 'created'
|
|
641
|
+
},
|
|
407
642
|
web_search_passes: 1,
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
643
|
+
source_layers: RESEARCH_SOURCE_LAYERS.map((layer, index) => ({
|
|
644
|
+
id: layer.id,
|
|
645
|
+
label: layer.label,
|
|
646
|
+
required: true,
|
|
647
|
+
status: 'covered',
|
|
648
|
+
evidence_role: layer.evidence_role,
|
|
649
|
+
query_templates: layer.query_templates || [],
|
|
650
|
+
source_ids: [`mock-source-${index + 1}`],
|
|
651
|
+
counterevidence_ids: layer.id === 'counterevidence_factcheck' ? ['mock-counter-1'] : [],
|
|
652
|
+
blocker: null,
|
|
653
|
+
notes: 'Mock mode records layer coverage without live web access.'
|
|
654
|
+
})),
|
|
655
|
+
layer_coverage: {
|
|
656
|
+
required: [...RESEARCH_SOURCE_LAYER_IDS],
|
|
657
|
+
covered: [...RESEARCH_SOURCE_LAYER_IDS],
|
|
658
|
+
missing: [],
|
|
659
|
+
notes: ['mock fixture covers every research source layer']
|
|
660
|
+
},
|
|
661
|
+
queries: RESEARCH_SOURCE_LAYERS.map((layer) => ({
|
|
662
|
+
scout_id: layer.id === 'counterevidence_factcheck' ? 'skeptic' : null,
|
|
663
|
+
layer: layer.id,
|
|
664
|
+
query: `mock ${layer.id} layered research source search for ${plan.prompt}`,
|
|
665
|
+
status: 'mocked'
|
|
666
|
+
})),
|
|
667
|
+
sources: mockLayerSources,
|
|
426
668
|
counterevidence_sources: [
|
|
427
669
|
{
|
|
428
670
|
id: 'mock-counter-1',
|
|
671
|
+
layer: 'counterevidence_factcheck',
|
|
429
672
|
kind: 'selftest',
|
|
430
673
|
title: 'Mock overclaim counterexample',
|
|
431
674
|
locator: 'writeMockResearchResult',
|
|
432
675
|
accessed_at: nowIso(),
|
|
676
|
+
reliability: 'mock',
|
|
677
|
+
credibility: 'mock',
|
|
678
|
+
stance: 'undermines',
|
|
433
679
|
undermines: ['mock-insight-1'],
|
|
434
680
|
notes: 'Shows the gate must fail if a run produces no tests or falsifiers.'
|
|
435
681
|
}
|
|
436
682
|
],
|
|
683
|
+
triangulation: {
|
|
684
|
+
cross_layer_checks: [
|
|
685
|
+
{
|
|
686
|
+
id: 'mock-triangulation-1',
|
|
687
|
+
claim: 'Research Mode should not synthesize from a single corpus.',
|
|
688
|
+
source_ids: ['mock-source-1', 'mock-source-2', 'mock-source-5', 'mock-counter-1'],
|
|
689
|
+
result: 'survives_with_layered_evidence_requirement'
|
|
690
|
+
},
|
|
691
|
+
{
|
|
692
|
+
id: 'mock-triangulation-2',
|
|
693
|
+
claim: 'Public discourse is useful only when checked against formal and official layers.',
|
|
694
|
+
source_ids: ['mock-source-1', 'mock-source-2', 'mock-source-5', 'mock-source-6'],
|
|
695
|
+
result: 'downgrade_popularity_to_signal_not_truth'
|
|
696
|
+
}
|
|
697
|
+
],
|
|
698
|
+
conflicts: [],
|
|
699
|
+
synthesis_notes: ['mock fixture requires cross-layer checks before synthesis']
|
|
700
|
+
},
|
|
701
|
+
quality_model: defaultSourceLedger(plan).quality_model,
|
|
437
702
|
citation_coverage: {
|
|
438
703
|
all_key_claims_cited: true,
|
|
439
704
|
notes: ['mock report and novelty entry cite mock-source-1 and mock-counter-1']
|
|
@@ -521,26 +786,51 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
521
786
|
}
|
|
522
787
|
]
|
|
523
788
|
};
|
|
789
|
+
const geniusSummary = [
|
|
790
|
+
'# Genius Opinion Summary',
|
|
791
|
+
'',
|
|
792
|
+
`Prompt: ${plan.prompt}`,
|
|
793
|
+
'',
|
|
794
|
+
'## Scout Opinions',
|
|
795
|
+
...RESEARCH_SCOUT_COUNCIL.flatMap((scout) => [
|
|
796
|
+
`### ${scout.label} (${scout.id})`,
|
|
797
|
+
`Final opinion: ${scout.label} wants the run to preserve ${scout.mandate.toLowerCase()} while producing a cited, falsifiable insight.`,
|
|
798
|
+
'Strongest evidence: mock-source-1 plus the layered source ledger.',
|
|
799
|
+
'Main disagreement: whether formal structure or cheap empirical probes should dominate the first pass.',
|
|
800
|
+
'Changed mind: accepted that citation coverage, counterevidence, and triangulation are gates before synthesis.',
|
|
801
|
+
''
|
|
802
|
+
]),
|
|
803
|
+
'## Council Consensus',
|
|
804
|
+
'The council keeps one modest, testable claim: Research Mode is useful when it writes a source-cited paper, records every scout opinion, triangulates across source layers, and exposes the next decisive test.'
|
|
805
|
+
].join('\n');
|
|
806
|
+
await writeTextAtomic(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), researchSourceSkillMarkdown(plan));
|
|
524
807
|
await writeJsonAtomic(path.join(dir, 'source-ledger.json'), sourceLedger);
|
|
525
808
|
await writeJsonAtomic(path.join(dir, 'scout-ledger.json'), scoutLedger);
|
|
526
809
|
await writeJsonAtomic(path.join(dir, 'debate-ledger.json'), debateLedger);
|
|
527
810
|
await writeJsonAtomic(path.join(dir, 'falsification-ledger.json'), falsificationLedger);
|
|
528
811
|
await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), ledger);
|
|
529
|
-
await writeTextAtomic(path.join(dir,
|
|
530
|
-
await writeTextAtomic(path.join(dir,
|
|
812
|
+
await writeTextAtomic(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), `${geniusSummary}\n`);
|
|
813
|
+
await writeTextAtomic(path.join(dir, 'research-report.md'), `# SKS Research Report\n\nPrompt: ${plan.prompt}\n\n## Scout Council Synthesis\n\nThe mock council keeps one cited methodological insight: a research mode should force layered, falsifiable novelty rather than summarize known material from one corpus [mock-source-1].\n\n## Source Coverage\n\nThis is a selftest fixture. It records mock coverage for academic literature, official data, standards, news, public discourse, developer knowledge, and counterevidence layers, but does not perform live web browsing in --mock mode.\n\n## Candidate Insight\n\nA useful research run must produce source-cited, cross-layer triangulated, falsifiable novelty with scout findings and a cheap probe.\n\n## Falsification\n\nThe claim is weak if no new testable prediction, counterevidence source, cross-layer check, or experiment is produced [mock-counter-1].\n\n## Next Test\n\nCompare this mode against a summary-only run and score candidate insights, falsification passes, citation coverage, source-layer coverage, triangulation checks, and testability.\n`);
|
|
814
|
+
await writeTextAtomic(path.join(dir, RESEARCH_PAPER_ARTIFACT), `# Research Paper: ${plan.prompt}\n\n## Abstract\nA source-cited research run should produce cross-layer, falsifiable novelty rather than only summarize known material.\n\n## Introduction\nThe mock topic is evaluated as a research workflow outcome with layered source coverage [mock-source-1].\n\n## Methodology\nFive xhigh scouts produce Eureka ideas, debate, triangulate source layers, and falsify the strongest claim.\n\n## Findings\nThe surviving finding is that useful research needs cited novelty, source-layer coverage, cross-layer triangulation, and a cheap decisive probe.\n\n## Discussion\nThe debate favors gate-backed evidence over narrative confidence, and treats public discourse as signal rather than truth.\n\n## Limitations and Falsification\nThe claim fails without sources, counterevidence, triangulation checks, or testable predictions [mock-counter-1].\n\n## Conclusion and Next Experiment\nCompare this loop against a summary-only baseline and score testable insights.\n\n## References\n- [mock-source-1] Mock academic literature coverage.\n- [mock-source-2] Mock official government and leading-institution knowledge coverage.\n- [mock-source-3] Mock standards and primary documents coverage.\n- [mock-source-4] Mock current news and global reporting coverage.\n- [mock-source-5] Mock public discourse coverage.\n- [mock-source-6] Mock developer and practitioner knowledge coverage.\n- [mock-source-7] Mock counterevidence and fact-checking coverage.\n- [mock-counter-1] Mock overclaim counterexample.\n`);
|
|
531
815
|
await writeJsonAtomic(path.join(dir, 'research-gate.json'), {
|
|
532
816
|
...defaultResearchGate(),
|
|
533
817
|
passed: true,
|
|
534
818
|
report_present: true,
|
|
535
819
|
paper_present: true,
|
|
536
820
|
paper_sections: RESEARCH_PAPER_SECTION_GROUPS.length,
|
|
821
|
+
genius_opinion_summary_present: true,
|
|
822
|
+
genius_opinion_summaries: RESEARCH_SCOUT_COUNCIL.length,
|
|
823
|
+
research_source_skill_present: true,
|
|
537
824
|
source_ledger_present: true,
|
|
538
825
|
scout_ledger_present: true,
|
|
539
826
|
debate_ledger_present: true,
|
|
540
827
|
novelty_ledger_present: true,
|
|
541
828
|
falsification_ledger_present: true,
|
|
542
829
|
web_search_passes: 1,
|
|
543
|
-
source_entries:
|
|
830
|
+
source_entries: mockLayerSources.length,
|
|
831
|
+
source_layers_required: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
832
|
+
source_layers_covered: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
833
|
+
triangulation_checks: sourceLedger.triangulation.cross_layer_checks.length,
|
|
544
834
|
independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
|
|
545
835
|
xhigh_scouts: RESEARCH_SCOUT_COUNCIL.length,
|
|
546
836
|
eureka_moments: RESEARCH_SCOUT_COUNCIL.length,
|
|
@@ -553,12 +843,12 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
553
843
|
falsification_cases: 1,
|
|
554
844
|
testable_predictions: 1,
|
|
555
845
|
citation_coverage: true,
|
|
556
|
-
evidence: ['mock research report', 'mock research paper', 'mock source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
|
|
846
|
+
evidence: ['mock research report', 'mock research paper', 'mock genius opinion summary', 'mock research source skill', 'mock layered source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
|
|
557
847
|
notes: ['mock mode records the new contract but does not call a model or perform live web browsing']
|
|
558
848
|
});
|
|
559
849
|
return evaluateResearchGate(dir);
|
|
560
850
|
}
|
|
561
851
|
|
|
562
852
|
export function buildResearchPrompt({ id, mission, plan, cycle, previous }) {
|
|
563
|
-
return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\
|
|
853
|
+
return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, and cheap_probes for every scout lens.\n4. Debate: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout before synthesis.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, debate, cross-layer triangulation, and falsification are recorded.\n7. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with effort, eureka, query_set, findings, source_ids, falsifiers, and cheap_probes.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, and unresolved conflicts.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, research-paper.md exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
|
|
564
854
|
}
|
package/src/core/routes.mjs
CHANGED
|
@@ -390,10 +390,10 @@ export const ROUTES = [
|
|
|
390
390
|
command: '$Research',
|
|
391
391
|
mode: 'RESEARCH',
|
|
392
392
|
route: 'research mission',
|
|
393
|
-
description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate,
|
|
394
|
-
requiredSkills: ['research', 'research-discovery', 'pipeline-runner',
|
|
395
|
-
lifecycle: ['research_plan', '
|
|
396
|
-
context7Policy: '
|
|
393
|
+
description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate, layered public source retrieval, falsification, a paper manuscript, a final genius-opinion summary, and testable predictions.',
|
|
394
|
+
requiredSkills: ['research', 'research-discovery', 'pipeline-runner', REFLECTION_SKILL_NAME, 'honest-mode'],
|
|
395
|
+
lifecycle: ['research_plan', 'source_skill', 'layered_source_ledger', 'xhigh_scout_council', 'eureka_moments', 'debate_ledger', 'report', 'paper', 'genius_opinion_summary', 'novelty_ledger', 'falsification_ledger', 'research_gate', 'post_route_reflection', 'honest_mode'],
|
|
396
|
+
context7Policy: 'if_external_docs',
|
|
397
397
|
reasoningPolicy: 'xhigh',
|
|
398
398
|
stopGate: 'research-gate.json',
|
|
399
399
|
cliEntrypoint: 'sks research prepare|run',
|
|
@@ -537,7 +537,7 @@ export const COMMAND_CATALOG = [
|
|
|
537
537
|
{ name: 'init', usage: 'sks init [--force] [--local-only] [--install-scope global|project]', description: 'Initialize the local SKS control surface.' },
|
|
538
538
|
{ name: 'selftest', usage: 'sks selftest [--mock]', description: 'Run local smoke tests without calling a model.' },
|
|
539
539
|
{ name: 'goal', usage: 'sks goal create|pause|resume|clear|status ...', description: 'Prepare and control the fast SKS bridge overlay for Codex native persisted /goal workflows.' },
|
|
540
|
-
{ name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run
|
|
540
|
+
{ name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run long-form real research missions with xhigh scout Eureka ideas, debate, layered sources, paper, novelty, and falsification gates.' },
|
|
541
541
|
{ name: 'db', usage: 'sks db policy|scan|mcp-config|classify|check ...', description: 'Inspect and enforce database/Supabase safety policy.' },
|
|
542
542
|
{ name: 'eval', usage: 'sks eval run|compare|thresholds ...', description: 'Run deterministic context-quality and performance evidence checks.' },
|
|
543
543
|
{ name: 'harness', usage: 'sks harness fixture|review [--json]', description: 'Run Harness Growth Factory fixtures for forgetting, skills, experiments, tool taxonomy, permissions, MultiAgentV2, and tmux views.' },
|
|
@@ -585,9 +585,21 @@ function leadingDollarCommandMatch(prompt) {
|
|
|
585
585
|
|| text.match(/^\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)(?:\s|:|$)/);
|
|
586
586
|
}
|
|
587
587
|
|
|
588
|
+
function embeddedDollarCommandMatch(prompt) {
|
|
589
|
+
const text = String(prompt || '');
|
|
590
|
+
const matches = [];
|
|
591
|
+
for (const match of text.matchAll(/\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)/g)) matches.push({ index: match.index, command: match[1] });
|
|
592
|
+
for (const match of text.matchAll(/(^|[\s([{<])\$([A-Za-z][A-Za-z0-9_-]*)(?=\s|:|$|[.,!?;)\]}])/g)) matches.push({ index: match.index + match[1].length, command: match[2] });
|
|
593
|
+
return matches
|
|
594
|
+
.sort((a, b) => a.index - b.index)
|
|
595
|
+
.find((match) => routeByDollarCommand(match.command) || String(match.command || '').toUpperCase() === 'MAD-SKS') || null;
|
|
596
|
+
}
|
|
597
|
+
|
|
588
598
|
export function dollarCommand(prompt) {
|
|
589
|
-
const
|
|
590
|
-
|
|
599
|
+
const leading = leadingDollarCommandMatch(prompt);
|
|
600
|
+
if (leading) return leading[1].toUpperCase();
|
|
601
|
+
const embedded = embeddedDollarCommandMatch(prompt);
|
|
602
|
+
return embedded ? embedded.command.toUpperCase() : null;
|
|
591
603
|
}
|
|
592
604
|
|
|
593
605
|
export function hasMadSksSignal(prompt = '') {
|