sneakoscope 0.7.68 → 0.7.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/cli/main.mjs +19 -16
- package/src/cli/maintenance-commands.mjs +66 -24
- package/src/core/fsx.mjs +1 -1
- package/src/core/hooks-runtime.mjs +2 -9
- package/src/core/init.mjs +1 -1
- package/src/core/pipeline.mjs +1 -1
- package/src/core/research.mjs +357 -36
- package/src/core/routes.mjs +19 -7
package/README.md
CHANGED
|
@@ -192,7 +192,7 @@ sks qa-loop prepare "http://localhost:3000"
|
|
|
192
192
|
sks qa-loop run latest --max-cycles 2
|
|
193
193
|
sks goal create "persist this migration workflow"
|
|
194
194
|
sks research prepare "evaluate this approach"
|
|
195
|
-
sks research run latest --max-cycles 3
|
|
195
|
+
sks research run latest --max-cycles 3 --cycle-timeout-minutes 120
|
|
196
196
|
sks research status latest
|
|
197
197
|
sks db scan --json
|
|
198
198
|
sks wiki refresh
|
|
@@ -211,7 +211,7 @@ sks skill-dream run --json
|
|
|
211
211
|
sks code-structure scan --json
|
|
212
212
|
```
|
|
213
213
|
|
|
214
|
-
`sks research` prepares a genius-lens scout council, requires every scout to run at `xhigh`, records one literal `Eureka!` idea per scout, runs an evidence-bound debate,
|
|
214
|
+
`sks research` prepares a genius-lens scout council, requires every scout to run at `xhigh`, records one literal `Eureka!` idea per scout, runs an evidence-bound debate, and now creates `research-source-skill.md` as a route-local source collection skill before synthesis. Normal Research is intentionally allowed to take one or two hours when the problem needs it; `--mock` is only for selftests or dry harness checks, and a real run blocks with `research-blocker.json` instead of silently substituting mock output when the Codex execution path is unavailable. The source layer contract separates latest papers, official/government or leading-institution sources, standards/primary docs, current news such as BBC/CNN/GDELT-style sources, public discourse such as X/Reddit, developer/practitioner knowledge such as Stack Overflow/GitHub, and counterevidence/fact-checking; `source-ledger.json` must record layer coverage, source quality, blockers, citations, and cross-layer triangulation. Context7 is optional for `$Research` and only becomes relevant when the research topic specifically depends on package, API, framework, or SDK documentation. Research runs require `research-report.md`, `research-paper.md`, `genius-opinion-summary.md`, `research-source-skill.md`, `source-ledger.json`, `scout-ledger.json`, `debate-ledger.json`, `novelty-ledger.json`, `falsification-ledger.json`, and `research-gate.json` so they stay source-backed, adversarially checked, falsifiable, paper-ready, and clear about every scout lens opinion. `research status` reports source entries, source-layer coverage, triangulation checks, counterevidence, xhigh scout count, Eureka moments, debate exchanges, paper presence/sections, genius-opinion summary coverage, scout findings, and falsification cases alongside the gate.
|
|
215
215
|
|
|
216
216
|
`sks pipeline plan` shows the active route lane, kept/skipped stages, verification commands, and no-unrequested-fallback invariant. `sks proof-field scan` is the lightweight rubric for small changes; risky or broad signals return to the full Team/Honest path.
|
|
217
217
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sneakoscope",
|
|
3
3
|
"displayName": "ㅅㅋㅅ",
|
|
4
|
-
"version": "0.7.
|
|
4
|
+
"version": "0.7.72",
|
|
5
5
|
"description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
|
package/src/cli/main.mjs
CHANGED
|
@@ -22,7 +22,7 @@ import { bumpProjectVersion, installVersionGitHook, runVersionPreCommit, version
|
|
|
22
22
|
import { rustInfo } from '../core/rust-accelerator.mjs';
|
|
23
23
|
import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
|
|
24
24
|
import { defaultEvaluationScenario, runEvaluationBenchmark } from '../core/evaluation.mjs';
|
|
25
|
-
import {
|
|
25
|
+
import { evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
26
26
|
import {
|
|
27
27
|
PPT_AUDIENCE_STRATEGY_ARTIFACT,
|
|
28
28
|
PPT_CLEANUP_REPORT_ARTIFACT,
|
|
@@ -211,7 +211,7 @@ Usage:
|
|
|
211
211
|
sks team attach-tmux [mission-id|latest]
|
|
212
212
|
sks team cleanup-tmux [mission-id|latest]
|
|
213
213
|
sks research prepare "topic" [--depth frontier]
|
|
214
|
-
sks research run <mission-id|latest> [--mock] [--max-cycles N]
|
|
214
|
+
sks research run <mission-id|latest> [--mock] [--max-cycles N] [--cycle-timeout-minutes N]
|
|
215
215
|
sks research status <mission-id|latest>
|
|
216
216
|
sks db policy
|
|
217
217
|
sks db scan [--migrations] [--json]
|
|
@@ -1963,7 +1963,7 @@ function readMaxCycles(args, fallback) {
|
|
|
1963
1963
|
|
|
1964
1964
|
function positionalArgs(args = []) {
|
|
1965
1965
|
const out = [];
|
|
1966
|
-
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--dir']);
|
|
1966
|
+
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--cycle-timeout-minutes', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--dir']);
|
|
1967
1967
|
for (let i = 0; i < args.length; i++) {
|
|
1968
1968
|
const arg = String(args[i]);
|
|
1969
1969
|
if (valueFlags.has(arg)) {
|
|
@@ -2647,19 +2647,21 @@ async function selftest() {
|
|
|
2647
2647
|
const hookResearchTeamResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], { cwd: hookResearchMarkdownTmp, input: hookResearchTeamPayload, env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 256 * 1024 });
|
|
2648
2648
|
if (hookResearchTeamResult.code !== 0) throw new Error(`selftest: active Team setup before markdown $Research hook exited ${hookResearchTeamResult.code}: ${hookResearchTeamResult.stderr}`);
|
|
2649
2649
|
const hookResearchTeamState = await readJson(stateFile(hookResearchMarkdownTmp), {});
|
|
2650
|
-
const hookResearchMarkdownPayload = JSON.stringify({ cwd: hookResearchMarkdownTmp, prompt: '[$research](
|
|
2650
|
+
const hookResearchMarkdownPayload = JSON.stringify({ cwd: hookResearchMarkdownTmp, prompt: '논문 [$research](x) 팀 커밋 푸쉬 연구' });
|
|
2651
2651
|
const hookResearchMarkdownResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], { cwd: hookResearchMarkdownTmp, input: hookResearchMarkdownPayload, env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 256 * 1024 });
|
|
2652
2652
|
if (hookResearchMarkdownResult.code !== 0) throw new Error(`selftest: markdown $Research hook exited ${hookResearchMarkdownResult.code}: ${hookResearchMarkdownResult.stderr}`);
|
|
2653
2653
|
const hookResearchMarkdownJson = JSON.parse(hookResearchMarkdownResult.stdout);
|
|
2654
2654
|
const hookResearchMarkdownContext = hookResearchMarkdownJson.hookSpecificOutput?.additionalContext || '';
|
|
2655
|
-
if (!hookResearchMarkdownContext.includes('$Research route prepared')) throw new Error('selftest: markdown
|
|
2656
|
-
if (hookResearchMarkdownContext.includes(`Active Team mission ${hookResearchTeamState.mission_id}`)) throw new Error('selftest:
|
|
2657
|
-
if (!String(hookResearchMarkdownJson.systemMessage || '').includes('Research route') || String(hookResearchMarkdownJson.systemMessage || '').includes('QA-LOOP route')) throw new Error('selftest:
|
|
2655
|
+
if (!hookResearchMarkdownContext.includes('$Research route prepared')) throw new Error('selftest: markdown research hook');
|
|
2656
|
+
if (hookResearchMarkdownContext.includes(`Active Team mission ${hookResearchTeamState.mission_id}`)) throw new Error('selftest: stale Team context');
|
|
2657
|
+
if (!String(hookResearchMarkdownJson.systemMessage || '').includes('Research route') || String(hookResearchMarkdownJson.systemMessage || '').includes('QA-LOOP route')) throw new Error('selftest: research hook message');
|
|
2658
2658
|
const hookResearchMarkdownState = await readJson(stateFile(hookResearchMarkdownTmp), {});
|
|
2659
|
-
if (hookResearchMarkdownState.mode !== 'RESEARCH' || hookResearchMarkdownState.route !== 'Research' || hookResearchMarkdownState.mission_id === hookResearchTeamState.mission_id || hookResearchMarkdownState.stop_gate !== 'research-gate.json' || !hookResearchMarkdownState.pipeline_plan_ready) throw new Error('selftest:
|
|
2659
|
+
if (hookResearchMarkdownState.mode !== 'RESEARCH' || hookResearchMarkdownState.route !== 'Research' || hookResearchMarkdownState.mission_id === hookResearchTeamState.mission_id || hookResearchMarkdownState.stop_gate !== 'research-gate.json' || !hookResearchMarkdownState.pipeline_plan_ready) throw new Error('selftest: research hook state');
|
|
2660
2660
|
const hookResearchMissionDir = missionDir(hookResearchMarkdownTmp, hookResearchMarkdownState.mission_id);
|
|
2661
|
-
if (!(await exists(path.join(hookResearchMissionDir, PIPELINE_PLAN_ARTIFACT)))) throw new Error('selftest:
|
|
2662
|
-
|
|
2661
|
+
if (!(await exists(path.join(hookResearchMissionDir, PIPELINE_PLAN_ARTIFACT)))) throw new Error('selftest: research hook plan');
|
|
2662
|
+
const rss = 'research-source-skill.md';
|
|
2663
|
+
const gos = 'genius-opinion-summary.md';
|
|
2664
|
+
for (const artifact of [rss, 'source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) {
|
|
2663
2665
|
if (!(await exists(path.join(hookResearchMissionDir, artifact)))) throw new Error(`selftest: hook research ${artifact}`);
|
|
2664
2666
|
}
|
|
2665
2667
|
const hookPayload = JSON.stringify({ cwd: hookGoalTmp, prompt: '$Goal 로그인 세션 만료 UX 개선' });
|
|
@@ -3782,15 +3784,16 @@ async function selftest() {
|
|
|
3782
3784
|
if (wikiPruneDryRun.candidates < 1 || !wikiPruneDryRun.actions.some((action) => action.reason === 'low_wiki_trust')) throw new Error('selftest: wiki prune did not flag low-trust artifact');
|
|
3783
3785
|
const { dir: researchDir, mission: researchMission } = await createMission(tmp, { mode: 'research', prompt: '새로운 코드 리뷰 방법론 연구' });
|
|
3784
3786
|
const researchPlan = await writeResearchPlan(researchDir, researchMission.prompt, {});
|
|
3785
|
-
if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== '
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3787
|
+
if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== 'layered_source_retrieval_and_triangulation') throw new Error('selftest: research plan contract');
|
|
3788
|
+
const rArts = researchPlan.required_artifacts || [];
|
|
3789
|
+
for (const a of [rss, 'source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) if (!rArts.includes(a) || !(await exists(path.join(researchDir, a)))) throw new Error('selftest: research artifact');
|
|
3790
|
+
if (!rArts.includes('research-paper.md') || !rArts.includes(gos)) throw new Error('selftest: research paper');
|
|
3789
3791
|
const initialResearchGate = await evaluateResearchGate(researchDir);
|
|
3790
|
-
if (initialResearchGate.passed ||
|
|
3792
|
+
if (initialResearchGate.passed || ['web_search_pass_missing', 'eureka_missing', 'debate_exchanges_missing', 'research_paper_missing'].some((r) => !initialResearchGate.reasons.includes(r))) throw new Error('selftest: research gate');
|
|
3791
3793
|
const researchGate = await writeMockResearchResult(researchDir, researchPlan);
|
|
3792
3794
|
if (!researchGate.passed) throw new Error('selftest: mock research gate did not pass');
|
|
3793
|
-
|
|
3795
|
+
const rm = researchGate.metrics || {};
|
|
3796
|
+
if (['independent_scouts', 'xhigh_scouts', 'eureka_moments', 'debate_participants', 'genius_opinion_summaries'].some((m) => rm[m] < 5) || ['counterevidence_sources', 'falsification_cases', 'triangulation_checks'].some((m) => rm[m] < 1) || rm.paper_sections < 8 || rm.citation_coverage !== true || rm.source_layers_covered < 7) throw new Error('selftest: research metrics');
|
|
3794
3797
|
await writeJsonAtomic(path.join(dir, 'done-gate.json'), { passed: true, unsupported_critical_claims: 0, database_safety_violation: false, database_safety_reviewed: true, visual_drift: 'low', wiki_drift: 'low', tests_required: false });
|
|
3795
3798
|
const gate = await evaluateDoneGate(tmp, id);
|
|
3796
3799
|
if (!gate.passed) throw new Error('selftest: done gate');
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import fsp from 'node:fs/promises';
|
|
3
|
-
import { readJson, writeJsonAtomic, writeTextAtomic, appendJsonlBounded, nowIso, exists, ensureDir, packageRoot, dirSize, formatBytes, PACKAGE_VERSION, sksRoot, readStdin } from '../core/fsx.mjs';
|
|
3
|
+
import { readJson, readText, writeJsonAtomic, writeTextAtomic, appendJsonlBounded, nowIso, exists, ensureDir, packageRoot, dirSize, formatBytes, PACKAGE_VERSION, sksRoot, readStdin } from '../core/fsx.mjs';
|
|
4
4
|
import { initProject } from '../core/init.mjs';
|
|
5
5
|
import { getCodexInfo, runCodexExec } from '../core/codex-adapter.mjs';
|
|
6
6
|
import { createMission, loadMission, findLatestMission, missionDir, setCurrent, stateFile } from '../core/mission.mjs';
|
|
@@ -8,14 +8,14 @@ import { buildQuestionSchema, writeQuestions } from '../core/questions.mjs';
|
|
|
8
8
|
import { sealContract } from '../core/decision-contract.mjs';
|
|
9
9
|
import { buildQaLoopQuestionSchema, buildQaLoopPrompt, evaluateQaGate, qaStatus, writeMockQaResult, writeQaLoopArtifacts } from '../core/qa-loop.mjs';
|
|
10
10
|
import { containsUserQuestion, noQuestionContinuationReason } from '../core/no-question-guard.mjs';
|
|
11
|
-
import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
11
|
+
import { RESEARCH_GENIUS_SUMMARY_ARTIFACT, RESEARCH_PAPER_ARTIFACT, RESEARCH_SOURCE_SKILL_ARTIFACT, countGeniusOpinionSummaries, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
12
12
|
import { storageReport, enforceRetention, pruneWikiArtifacts } from '../core/retention.mjs';
|
|
13
13
|
import { evaluateDoneGate } from '../core/hproof.mjs';
|
|
14
14
|
import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
|
|
15
15
|
import { DEFAULT_EVAL_THRESHOLDS, compareEvaluationReports, runEvaluationBenchmark } from '../core/evaluation.mjs';
|
|
16
16
|
import { contextCapsule } from '../core/triwiki-attention.mjs';
|
|
17
17
|
import { rgbaKey, rgbaToWikiCoord, validateWikiCoordinateIndex } from '../core/wiki-coordinate.mjs';
|
|
18
|
-
import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_ONLY_POLICY, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, hasFromChatImgSignal, reflectionRequiredForRoute, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, stripVisibleDecisionAnswerBlocks, triwikiContextTracking } from '../core/routes.mjs';
|
|
18
|
+
import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_ONLY_POLICY, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, hasFromChatImgSignal, reflectionRequiredForRoute, routeNeedsContext7, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, stripVisibleDecisionAnswerBlocks, triwikiContextTracking } from '../core/routes.mjs';
|
|
19
19
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, teamRuntimePlanMetadata, teamRuntimeRequiredArtifacts, writeTeamRuntimeArtifacts } from '../core/team-dag.mjs';
|
|
20
20
|
import { appendTeamEvent, formatAgentReasoning, formatRoleCounts, initTeamLive, normalizeTeamSpec, parseTeamSpecArgs, readTeamControl, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane, renderTeamCleanupSummary, renderTeamWatch, requestTeamSessionCleanup, teamCleanupRequested, teamReasoningPolicy } from '../core/team-live.mjs';
|
|
21
21
|
import { evaluateTeamReviewPolicyGate, MIN_TEAM_REVIEWER_LANES, MIN_TEAM_REVIEW_POLICY_TEXT, teamReviewPolicy } from '../core/team-review-policy.mjs';
|
|
@@ -42,6 +42,10 @@ const flag = (args, name) => args.includes(name);
|
|
|
42
42
|
const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
|
|
43
43
|
const TEAM_SESSION_CLEANUP_ARTIFACT = 'team-session-cleanup.json';
|
|
44
44
|
const REPOSITORY_URL = 'https://github.com/mandarange/Sneakoscope-Codex.git';
|
|
45
|
+
const RESEARCH_DEFAULT_MAX_CYCLES = 3;
|
|
46
|
+
const RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES = 120;
|
|
47
|
+
const RESEARCH_MIN_CYCLE_TIMEOUT_MINUTES = 15;
|
|
48
|
+
const RESEARCH_MAX_CYCLE_TIMEOUT_MINUTES = 240;
|
|
45
49
|
|
|
46
50
|
async function resolveMissionId(root, arg) { return (!arg || arg === 'latest') ? findLatestMission(root) : arg; }
|
|
47
51
|
|
|
@@ -429,7 +433,7 @@ async function researchPrepare(args) {
|
|
|
429
433
|
if (!prompt) throw new Error('Missing research topic.');
|
|
430
434
|
const { id, dir } = await createMission(root, { mode: 'research', prompt });
|
|
431
435
|
const route = ROUTES.find((entry) => entry.id === 'Research') || routePrompt('$Research');
|
|
432
|
-
const context7Required =
|
|
436
|
+
const context7Required = routeNeedsContext7(route, prompt);
|
|
433
437
|
const reasoning = routeReasoning(route, prompt);
|
|
434
438
|
const plan = await writeResearchPlan(dir, prompt, { depth: readFlagValue(args, '--depth', 'frontier') });
|
|
435
439
|
const pipelinePlan = await writePipelinePlan(dir, { missionId: id, route, task: prompt, required: context7Required, ambiguity: { required: false, status: 'direct_research_cli' } });
|
|
@@ -457,7 +461,7 @@ async function researchPrepare(args) {
|
|
|
457
461
|
mode: route.mode,
|
|
458
462
|
phase: 'RESEARCH_PREPARED',
|
|
459
463
|
questions_allowed: false,
|
|
460
|
-
implementation_allowed:
|
|
464
|
+
implementation_allowed: false,
|
|
461
465
|
context7_required: context7Required,
|
|
462
466
|
context7_verified: false,
|
|
463
467
|
subagents_required: routeRequiresSubagents(route, prompt),
|
|
@@ -479,14 +483,17 @@ async function researchPrepare(args) {
|
|
|
479
483
|
console.log(`Methodology: ${plan.methodology}`);
|
|
480
484
|
console.log(`Plan: ${path.relative(root, path.join(dir, 'research-plan.md'))}`);
|
|
481
485
|
console.log(`Pipeline: ${path.relative(root, path.join(dir, PIPELINE_PLAN_ARTIFACT))}`);
|
|
486
|
+
console.log(`Paper: ${RESEARCH_PAPER_ARTIFACT}`);
|
|
487
|
+
console.log(`Genius summary: ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}`);
|
|
488
|
+
console.log(`Source skill: ${RESEARCH_SOURCE_SKILL_ARTIFACT}`);
|
|
482
489
|
console.log('Ledgers: source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json');
|
|
483
|
-
console.log(`Run: sks research run ${id} --max-cycles
|
|
490
|
+
console.log(`Run: sks research run ${id} --max-cycles ${RESEARCH_DEFAULT_MAX_CYCLES} --cycle-timeout-minutes ${RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES}`);
|
|
484
491
|
}
|
|
485
492
|
|
|
486
493
|
async function researchRun(args) {
|
|
487
494
|
const root = await sksRoot();
|
|
488
495
|
const id = await resolveMissionId(root, args[0]);
|
|
489
|
-
if (!id) throw new Error('Usage: sks research run <mission-id|latest> [--mock] [--max-cycles N]');
|
|
496
|
+
if (!id) throw new Error('Usage: sks research run <mission-id|latest> [--mock] [--max-cycles N] [--cycle-timeout-minutes N]');
|
|
490
497
|
const { dir, mission } = await loadMission(root, id);
|
|
491
498
|
const planPath = path.join(dir, 'research-plan.json');
|
|
492
499
|
if (!(await exists(planPath))) await writeResearchPlan(dir, mission.prompt || '', {});
|
|
@@ -498,32 +505,46 @@ async function researchRun(args) {
|
|
|
498
505
|
process.exitCode = 2;
|
|
499
506
|
return;
|
|
500
507
|
}
|
|
501
|
-
const maxCycles = readMaxCycles(args,
|
|
508
|
+
const maxCycles = readMaxCycles(args, RESEARCH_DEFAULT_MAX_CYCLES);
|
|
509
|
+
const cycleTimeoutMinutes = readResearchCycleTimeoutMinutes(args);
|
|
510
|
+
const cycleTimeoutMs = cycleTimeoutMinutes * 60 * 1000;
|
|
502
511
|
const mock = flag(args, '--mock');
|
|
503
|
-
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false });
|
|
504
|
-
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock });
|
|
512
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false, implementation_allowed: false, research_real_run_required: !mock, research_cycle_timeout_minutes: cycleTimeoutMinutes });
|
|
513
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock, cycleTimeoutMinutes, real_run_required: !mock });
|
|
505
514
|
if (mock) {
|
|
506
515
|
const gate = await writeMockResearchResult(dir, plan);
|
|
507
|
-
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true });
|
|
516
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true, implementation_allowed: false });
|
|
508
517
|
console.log(`Mock research done: ${id}`);
|
|
509
518
|
console.log(`Gate: ${gate.passed ? 'passed' : 'blocked'}`);
|
|
510
519
|
return;
|
|
511
520
|
}
|
|
512
521
|
const codex = await getCodexInfo();
|
|
513
522
|
if (!codex.bin) {
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
523
|
+
const blocker = {
|
|
524
|
+
schema_version: 1,
|
|
525
|
+
mission_id: id,
|
|
526
|
+
ts: nowIso(),
|
|
527
|
+
phase: 'RESEARCH_BLOCKED_REAL_RUN_REQUIRED',
|
|
528
|
+
reason: 'Codex CLI not found; normal Research cannot fall back to mock output.',
|
|
529
|
+
required_action: 'Install/configure the Codex CLI or set SKS_CODEX_BIN to a valid executable, then rerun sks research run without --mock.',
|
|
530
|
+
mock_policy: '--mock is allowed only for selftests and dry harness checks.',
|
|
531
|
+
implementation_allowed: false
|
|
532
|
+
};
|
|
533
|
+
await writeJsonAtomic(path.join(dir, 'research-blocker.json'), blocker);
|
|
534
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: blocker.ts, type: 'research.blocked.real_run_required', reason: blocker.reason, blocker: 'research-blocker.json' });
|
|
535
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_BLOCKED_REAL_RUN_REQUIRED', questions_allowed: true, implementation_allowed: false, research_real_run_required: true, blocker: 'research-blocker.json' });
|
|
536
|
+
console.error('Research cannot run real sources: Codex CLI not found.');
|
|
537
|
+
console.error('Mock fallback is disabled for normal Research. Use --mock only for selftests, or install/configure Codex CLI/SKS_CODEX_BIN.');
|
|
538
|
+
process.exitCode = 2;
|
|
518
539
|
return;
|
|
519
540
|
}
|
|
520
541
|
let last = '';
|
|
521
542
|
for (let cycle = 1; cycle <= maxCycles; cycle++) {
|
|
522
543
|
const cycleDir = path.join(dir, 'research', `cycle-${cycle}`);
|
|
523
544
|
const outputFile = path.join(cycleDir, 'final.md');
|
|
524
|
-
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle });
|
|
545
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle, timeoutMinutes: cycleTimeoutMinutes });
|
|
525
546
|
const prompt = buildResearchPrompt({ id, mission, plan, cycle, previous: last });
|
|
526
|
-
const result = await runCodexExec({ root, prompt, outputFile, json: true, profile: 'sks-research', logDir: cycleDir, timeoutMs:
|
|
547
|
+
const result = await runCodexExec({ root, prompt, outputFile, json: true, profile: 'sks-research', logDir: cycleDir, timeoutMs: cycleTimeoutMs });
|
|
527
548
|
await writeJsonAtomic(path.join(cycleDir, 'process.json'), { code: result.code, stdout_tail: result.stdout, stderr_tail: result.stderr, stdout_bytes: result.stdoutBytes, stderr_bytes: result.stderrBytes, truncated: result.truncated, timed_out: result.timedOut });
|
|
528
549
|
last = await safeReadText(outputFile, result.stdout || result.stderr || '');
|
|
529
550
|
if (containsUserQuestion(last)) {
|
|
@@ -533,7 +554,7 @@ async function researchRun(args) {
|
|
|
533
554
|
}
|
|
534
555
|
const gate = await evaluateResearchGate(dir);
|
|
535
556
|
if (gate.passed) {
|
|
536
|
-
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_DONE', questions_allowed: true });
|
|
557
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_DONE', questions_allowed: true, implementation_allowed: false });
|
|
537
558
|
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.done', cycle });
|
|
538
559
|
await enforceRetention(root).catch(() => {});
|
|
539
560
|
console.log(`Research done: ${id}`);
|
|
@@ -541,7 +562,7 @@ async function researchRun(args) {
|
|
|
541
562
|
}
|
|
542
563
|
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.continue', cycle, reasons: gate.reasons });
|
|
543
564
|
}
|
|
544
|
-
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true });
|
|
565
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true, implementation_allowed: false });
|
|
545
566
|
console.log(`Research paused after max cycles: ${id}`);
|
|
546
567
|
}
|
|
547
568
|
|
|
@@ -557,18 +578,31 @@ async function researchStatus(args) {
|
|
|
557
578
|
const scoutLedger = await readJson(path.join(dir, 'scout-ledger.json'), null);
|
|
558
579
|
const debateLedger = await readJson(path.join(dir, 'debate-ledger.json'), null);
|
|
559
580
|
const falsificationLedger = await readJson(path.join(dir, 'falsification-ledger.json'), null);
|
|
581
|
+
const sourceSkillText = await readText(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), '');
|
|
582
|
+
const geniusSummaryText = await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '');
|
|
583
|
+
const paperText = await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '');
|
|
560
584
|
const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
|
|
585
|
+
const sourceLayerRows = Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : [];
|
|
586
|
+
const sourceLayersCovered = sourceLayerRows.filter((layer) => layer.status === 'covered' && ((Array.isArray(layer.source_ids) && layer.source_ids.length) || (Array.isArray(layer.counterevidence_ids) && layer.counterevidence_ids.length))).length;
|
|
561
587
|
console.log(JSON.stringify({
|
|
562
588
|
mission,
|
|
563
589
|
state,
|
|
564
590
|
gate,
|
|
565
591
|
novelty_entries: ledger?.entries?.length ?? null,
|
|
566
592
|
source_entries: sourceLedger?.sources?.length ?? null,
|
|
593
|
+
source_layers_required: sourceLayerRows.length || gate?.metrics?.source_layers_required || gate?.source_layers_required || null,
|
|
594
|
+
source_layers_covered: gate?.metrics?.source_layers_covered ?? gate?.source_layers_covered ?? (sourceLayerRows.length ? sourceLayersCovered : null),
|
|
595
|
+
triangulation_checks: sourceLedger?.triangulation?.cross_layer_checks?.length ?? gate?.metrics?.triangulation_checks ?? gate?.triangulation_checks ?? null,
|
|
596
|
+
genius_opinion_summaries: gate?.metrics?.genius_opinion_summaries ?? gate?.genius_opinion_summaries ?? (geniusSummaryText.trim() ? countGeniusOpinionSummaries(geniusSummaryText) : null),
|
|
567
597
|
counterevidence_sources: sourceLedger?.counterevidence_sources?.length ?? null,
|
|
568
598
|
xhigh_scouts: scoutRows.length ? scoutRows.filter((scout) => scout.effort === 'xhigh').length : null,
|
|
569
599
|
eureka_moments: scoutRows.length ? scoutRows.filter((scout) => scout.eureka?.exclamation === 'Eureka!' && String(scout.eureka?.idea || '').trim()).length : null,
|
|
570
600
|
scout_findings: scoutRows.length ? scoutRows.reduce((sum, scout) => sum + (Array.isArray(scout.findings) ? scout.findings.length : 0), 0) : null,
|
|
571
601
|
debate_exchanges: debateLedger?.exchanges?.length ?? null,
|
|
602
|
+
research_source_skill_present: Boolean(sourceSkillText.trim()),
|
|
603
|
+
genius_opinion_summary_present: Boolean(geniusSummaryText.trim()),
|
|
604
|
+
paper_present: Boolean(paperText.trim()),
|
|
605
|
+
paper_sections: countResearchPaperSections(paperText),
|
|
572
606
|
falsification_cases: falsificationLedger?.cases?.length ?? null
|
|
573
607
|
}, null, 2));
|
|
574
608
|
}
|
|
@@ -621,11 +655,19 @@ async function safeReadText(file, fallback = '') {
|
|
|
621
655
|
try { return await fsp.readFile(file, 'utf8'); } catch { return fallback; }
|
|
622
656
|
}
|
|
623
657
|
|
|
624
|
-
function
|
|
625
|
-
const i = args.indexOf(
|
|
658
|
+
function readBoundedIntegerFlag(args, name, fallback, min, max) {
|
|
659
|
+
const i = args.indexOf(name);
|
|
626
660
|
const raw = i >= 0 && args[i + 1] ? Number(args[i + 1]) : Number(fallback);
|
|
627
|
-
if (!Number.isFinite(raw)) return Math.max(
|
|
628
|
-
return Math.max(
|
|
661
|
+
if (!Number.isFinite(raw)) return Math.max(min, Number.parseInt(fallback, 10) || min);
|
|
662
|
+
return Math.max(min, Math.min(max, Math.floor(raw)));
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
function readMaxCycles(args, fallback) {
|
|
666
|
+
return readBoundedIntegerFlag(args, '--max-cycles', fallback, 1, 50);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
function readResearchCycleTimeoutMinutes(args) {
|
|
670
|
+
return readBoundedIntegerFlag(args, '--cycle-timeout-minutes', RESEARCH_DEFAULT_CYCLE_TIMEOUT_MINUTES, RESEARCH_MIN_CYCLE_TIMEOUT_MINUTES, RESEARCH_MAX_CYCLE_TIMEOUT_MINUTES);
|
|
629
671
|
}
|
|
630
672
|
|
|
631
673
|
export async function goalCommand(sub, args) {
|
|
@@ -1576,7 +1618,7 @@ export async function statsCommand(args) {
|
|
|
1576
1618
|
|
|
1577
1619
|
function positionalArgs(args = []) {
|
|
1578
1620
|
const out = [];
|
|
1579
|
-
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--intent', '--changed', '--route', '--skills', '--prompt-signature']);
|
|
1621
|
+
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--cycle-timeout-minutes', '--depth', '--scope', '--transport', '--query', '--topic', '--tokens', '--timeout-ms', '--sql', '--command', '--project-ref', '--agent', '--phase', '--message', '--role', '--max-anchors', '--lines', '--intent', '--changed', '--route', '--skills', '--prompt-signature']);
|
|
1580
1622
|
for (let i = 0; i < args.length; i++) {
|
|
1581
1623
|
const arg = String(args[i]);
|
|
1582
1624
|
if (valueFlags.has(arg)) {
|
package/src/core/fsx.mjs
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
|
|
8
|
-
export const PACKAGE_VERSION = '0.7.
|
|
8
|
+
export const PACKAGE_VERSION = '0.7.72';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
|
|
@@ -7,7 +7,7 @@ import { checkHarnessModification, harnessGuardBlockReason } from './harness-gua
|
|
|
7
7
|
import { activeRouteContext, evaluateStop, prepareRoute, promptPipelineContext as routePipelineContext, recordContext7Evidence, recordSubagentEvidence, routePrompt } from './pipeline.mjs';
|
|
8
8
|
import { classifyToolError } from './evaluation.mjs';
|
|
9
9
|
import { REQUIRED_CODEX_MODEL, isForbiddenCodexModel } from './codex-model-guard.mjs';
|
|
10
|
-
import { stripVisibleDecisionAnswerBlocks } from './routes.mjs';
|
|
10
|
+
import { dollarCommand, stripVisibleDecisionAnswerBlocks } from './routes.mjs';
|
|
11
11
|
|
|
12
12
|
const TEAM_DIGEST_MAX_EVENTS = 4;
|
|
13
13
|
const TEAM_DIGEST_MESSAGE_CHARS = 180;
|
|
@@ -77,13 +77,6 @@ function toolFailed(payload = {}) {
|
|
|
77
77
|
return false;
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
-
function dollarCommand(prompt) {
|
|
81
|
-
const text = String(prompt || '').trim();
|
|
82
|
-
const match = text.match(/^\$([A-Za-z][A-Za-z0-9_-]*)(?:\s|:|$)/)
|
|
83
|
-
|| text.match(/^\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)(?:\s|:|$)/);
|
|
84
|
-
return match ? match[1].toUpperCase() : null;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
80
|
function looksLikeUpdateDecline(prompt) {
|
|
88
81
|
return /^(no|nope|skip|later|not now|don't|dont|아니|아니요|안해|안 함|나중에|건너뛰|스킵)/i.test(String(prompt || '').trim());
|
|
89
82
|
}
|
|
@@ -1006,7 +999,7 @@ function visibleHookMessage(name, text = '') {
|
|
|
1006
999
|
if (body.includes('Computer Use fast lane active')) return 'SKS: Computer Use fast lane injected; defer TriWiki/Honest Mode to final closeout.';
|
|
1007
1000
|
if (body.includes('MANDATORY ambiguity-removal gate') || body.includes('VISIBLE RESPONSE CONTRACT') || body.includes('Required questions still pending')) return 'SKS: stale clarification gate detected; continue from inferred route contract.';
|
|
1008
1001
|
if (body.includes('$Team route prepared') || body.includes('Team route')) return 'SKS: Team route, live transcript, and subagent plan injected.';
|
|
1009
|
-
if (body.includes('$Research route prepared')) return 'SKS: Research route, xhigh Eureka scout council, source
|
|
1002
|
+
if (body.includes('$Research route prepared')) return 'SKS: Research route, xhigh Eureka scout council, source/debate ledgers, paper output, and falsification gate injected.';
|
|
1010
1003
|
if (body.includes('$AutoResearch route prepared')) return 'SKS: AutoResearch experiment loop and evidence gate injected.';
|
|
1011
1004
|
if (body.includes('$PPT route prepared')) return 'SKS: PPT route and delivery-context gate injected.';
|
|
1012
1005
|
if (body.includes('$Image-UX-Review route prepared') || body.includes('$UX-Review route prepared')) return 'SKS: Image UX Review route and gpt-image-2 evidence gate injected.';
|
package/src/core/init.mjs
CHANGED
|
@@ -815,7 +815,7 @@ export async function installSkills(root) {
|
|
|
815
815
|
'computer-use-fast': `---\nname: computer-use-fast\ndescription: Alias for the maximum-speed $Computer-Use/$CU Codex Computer Use lane.\n---\n\nUse the same rules as computer-use: skip Team debate, QA-LOOP clarification, upfront TriWiki refresh, Context7, subagents, and reflection unless explicitly requested. Use Codex Computer Use directly; never substitute Playwright, Chrome MCP, Browser Use, Selenium, Puppeteer, or other browser automation for UI/browser evidence. At the end only, refresh/pack TriWiki, validate it, then provide a concise completion summary plus Honest Mode.\n`,
|
|
816
816
|
'cu': `---\nname: cu\ndescription: Short alias for the maximum-speed $Computer-Use Codex Computer Use lane.\n---\n\nUse the same rules as computer-use. This is a speed lane for focused UI/browser/visual tasks that require Codex Computer Use evidence, with TriWiki refresh/validate and Honest Mode deferred to final closeout.\n`,
|
|
817
817
|
'goal': `---\nname: goal\ndescription: Fast $Goal/$goal bridge overlay for Codex native persisted /goal workflows.\n---\n\nUse when the user invokes $Goal/$goal or asks to persist a workflow with Codex native /goal continuation. Prepare with sks goal create or the $Goal route, write only the lightweight bridge artifacts, then use native Codex /goal create, pause, resume, and clear controls where available. Goal does not replace Team, QA, DB, or other SKS execution routes; continue implementation through the selected route and use Context7 only when external API/library docs are involved. Do not recreate the old no-question loop.\n`,
|
|
818
|
-
'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Run the genius-lens scout council with Einstein/Feynman/Turing/von Neumann-inspired cognitive roles plus a skeptic lens; do not impersonate the historical people. Every Research scout must run with effort=xhigh, record one literal "Eureka!" idea, and participate in a vigorous evidence-bound debate before synthesis.
|
|
818
|
+
'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Run the genius-lens scout council with Einstein/Feynman/Turing/von Neumann-inspired cognitive roles plus a skeptic lens; do not impersonate the historical people. Every Research scout must run with effort=xhigh, record one literal "Eureka!" idea, and participate in a vigorous evidence-bound debate before synthesis. Create research-source-skill.md as a route-local Skill Creator artifact, then maximize layered public web/source search across papers, official/government or leading-institution data, standards/primary docs, current news, public discourse, developer/practitioner sources, and counterevidence before synthesis. Record research-source-skill.md, source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, research-paper.md, genius-opinion-summary.md, and research-gate.json. Context7 is optional and only needed when the research topic depends on external package/API/framework docs; do not use it as the default research evidence layer. Normal Research may take one or two hours when needed; favor real source collection, cross-layer comparison, falsification, and a concise paper manuscript over speed. Do not use --mock except for selftests or dry harness checks; if live source execution is unavailable, record a blocker and keep the gate unpassed. Do not use for ordinary code edits.\n`,
|
|
819
819
|
'autoresearch': `---\nname: autoresearch\ndescription: Dollar-command route for $AutoResearch or $autoresearch iterative experiment loops.\n---\n\nUse for $AutoResearch, iterative improvement, SEO/GEO, ranking, workflow, benchmark, or experiments. Define program, hypothesis, experiment, metric, keep/discard, falsification, next step, and Honest Mode. Load seo-geo-optimizer for README/npm/GitHub/schema/AI-search work.\n`,
|
|
820
820
|
'db': `---\nname: db\ndescription: Dollar-command route for $DB or $db database and Supabase safety checks.\n---\n\nUse when the user invokes $DB/$db or the task touches SQL, Supabase, Postgres, migrations, Prisma, Drizzle, Knex, MCP database tools, or production data. Run or follow sks db policy, sks db scan, sks db classify, and sks db check. Destructive database operations remain forbidden.\n`,
|
|
821
821
|
'mad-sks': `---\nname: mad-sks\ndescription: Explicit high-risk authorization modifier for $MAD-SKS scoped Supabase MCP DB permission widening.\n---\n\nUse only when the user explicitly invokes $MAD-SKS or top-level sks --mad. It can be combined with another route, such as $MAD-SKS $Team or $DB ... $MAD-SKS; in that case the other command remains the primary workflow and MAD-SKS is only the temporary permission grant. The widened permission applies only while the active mission gate is open, must be deactivated when the task ends, and opens live server work, Supabase MCP database writes, column/schema cleanup, direct execute SQL, migration application when required, and normal targeted DB writes. Keep only catastrophic safeguards: whole database/schema/table removal, truncate, all-row delete/update, reset, dangerous project/branch management, credential exfiltration, persistent security weakening, and unrequested fallback implementation remain blocked. Do not carry MAD-SKS permission into later prompts or routes. The permission profile is centralized in src/core/permission-gates.mjs so skill/hook/MCP-style gates share one decision function.\n`,
|
package/src/core/pipeline.mjs
CHANGED
|
@@ -921,7 +921,7 @@ async function prepareResearch(root, route, task, required) {
|
|
|
921
921
|
await writeResearchPlan(dir, task, {});
|
|
922
922
|
const pipelinePlan = await writePipelinePlan(dir, { missionId: id, route, task, required, ambiguity: { required: false, status: 'direct_route' } });
|
|
923
923
|
await setCurrent(root, routeState(id, route, 'RESEARCH_PREPARED', required, { prompt: task, pipeline_plan_ready: validatePipelinePlan(pipelinePlan).ok, pipeline_plan_path: PIPELINE_PLAN_ARTIFACT }));
|
|
924
|
-
return routeContext(route, id, task, required, 'Run sks research run latest, maximize
|
|
924
|
+
return routeContext(route, id, task, required, 'Run sks research run latest as a real long-running source-gathering pass, never an automatic mock fallback; create research-source-skill.md, maximize layered public source search, require every scout effort=xhigh plus one Eureka! idea, fill source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, research-paper.md, genius-opinion-summary.md, and pass research-gate.json.');
|
|
925
925
|
}
|
|
926
926
|
|
|
927
927
|
async function prepareAutoResearch(root, route, task, required) {
|
package/src/core/research.mjs
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import { appendJsonlBounded, nowIso, readJson, writeJsonAtomic, writeTextAtomic, exists } from './fsx.mjs';
|
|
2
|
+
import { appendJsonlBounded, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, exists } from './fsx.mjs';
|
|
3
3
|
import { OUTCOME_RUBRIC } from './proof-field.mjs';
|
|
4
4
|
|
|
5
|
+
export const RESEARCH_PAPER_ARTIFACT = 'research-paper.md';
|
|
6
|
+
export const RESEARCH_SOURCE_SKILL_ARTIFACT = 'research-source-skill.md';
|
|
7
|
+
export const RESEARCH_GENIUS_SUMMARY_ARTIFACT = 'genius-opinion-summary.md';
|
|
8
|
+
export const RESEARCH_PAPER_SECTION_GROUPS = Object.freeze([
|
|
9
|
+
['abstract'],
|
|
10
|
+
['introduction'],
|
|
11
|
+
['method', 'methodology'],
|
|
12
|
+
['results', 'findings'],
|
|
13
|
+
['discussion'],
|
|
14
|
+
['limitations', 'falsification'],
|
|
15
|
+
['conclusion', 'next experiment'],
|
|
16
|
+
['references', 'sources']
|
|
17
|
+
]);
|
|
18
|
+
|
|
5
19
|
export const RESEARCH_SCOUT_COUNCIL = Object.freeze([
|
|
6
20
|
{
|
|
7
21
|
id: 'einstein',
|
|
@@ -40,6 +54,67 @@ export const RESEARCH_SCOUT_COUNCIL = Object.freeze([
|
|
|
40
54
|
}
|
|
41
55
|
]);
|
|
42
56
|
|
|
57
|
+
export const RESEARCH_SOURCE_LAYERS = Object.freeze([
|
|
58
|
+
{
|
|
59
|
+
id: 'academic_literature',
|
|
60
|
+
label: 'Academic literature',
|
|
61
|
+
purpose: 'Find recent papers, preprints, formal reviews, citations, and open scholarly metadata before synthesis.',
|
|
62
|
+
evidence_role: 'formal_evidence',
|
|
63
|
+
examples: ['arXiv', 'Semantic Scholar', 'OpenAlex', 'Crossref', 'PubMed'],
|
|
64
|
+
query_templates: ['"<topic>" arxiv', '"<topic>" site:semanticscholar.org', '"<topic>" OpenAlex Crossref PubMed']
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
id: 'official_government_data',
|
|
68
|
+
label: 'Official government and leading-institution knowledge',
|
|
69
|
+
purpose: 'Ground claims in public datasets, policy papers, national statistics, and leading-country institutional sources.',
|
|
70
|
+
evidence_role: 'authoritative_baseline',
|
|
71
|
+
examples: ['World Bank', 'OECD', 'Eurostat', 'data.gov', 'data.gov.uk', 'NIST'],
|
|
72
|
+
query_templates: ['"<topic>" site:worldbank.org OR site:oecd.org', '"<topic>" site:data.gov OR site:data.gov.uk', '"<topic>" site:nist.gov']
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
id: 'standards_primary_docs',
|
|
76
|
+
label: 'Standards and primary documents',
|
|
77
|
+
purpose: 'Check primary specifications, standards, RFCs, policy originals, and official project documents before relying on summaries.',
|
|
78
|
+
evidence_role: 'primary_source',
|
|
79
|
+
examples: ['IETF RFCs', 'W3C', 'ISO abstracts', 'official standards bodies', 'project primary docs'],
|
|
80
|
+
query_templates: ['"<topic>" RFC standard specification', '"<topic>" W3C IETF NIST standard', '"<topic>" official specification']
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: 'news_current_events',
|
|
84
|
+
label: 'Current news and global reporting',
|
|
85
|
+
purpose: 'Capture recent events, public impact, and regional framing from reputable news and global news indices.',
|
|
86
|
+
evidence_role: 'recency_signal',
|
|
87
|
+
examples: ['GDELT', 'BBC', 'CNN', 'Reuters', 'AP', 'regional reputable outlets'],
|
|
88
|
+
query_templates: ['"<topic>" BBC CNN latest', '"<topic>" GDELT news', '"<topic>" Reuters AP analysis']
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
id: 'public_discourse',
|
|
92
|
+
label: 'Public discourse',
|
|
93
|
+
purpose: 'Sample public practitioner and community discourse without treating popularity as truth.',
|
|
94
|
+
evidence_role: 'sentiment_and_edge_cases',
|
|
95
|
+
examples: ['X/Twitter recent search', 'Reddit', 'Hacker News', 'public forums'],
|
|
96
|
+
query_templates: ['"<topic>" site:x.com OR site:twitter.com', '"<topic>" site:reddit.com', '"<topic>" "Hacker News"']
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
id: 'developer_practitioner',
|
|
100
|
+
label: 'Developer and practitioner knowledge',
|
|
101
|
+
purpose: 'Find implementation pitfalls, developer questions, bug reports, and operational lessons.',
|
|
102
|
+
evidence_role: 'practice_feedback',
|
|
103
|
+
examples: ['Stack Overflow', 'Stack Exchange', 'GitHub issues', 'release notes', 'engineering blogs'],
|
|
104
|
+
query_templates: ['"<topic>" site:stackoverflow.com', '"<topic>" site:stackexchange.com', '"<topic>" site:github.com issues']
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
id: 'counterevidence_factcheck',
|
|
108
|
+
label: 'Counterevidence and fact-checking',
|
|
109
|
+
purpose: 'Actively search for failures, critiques, null results, retractions, fact checks, and source conflicts.',
|
|
110
|
+
evidence_role: 'falsification',
|
|
111
|
+
examples: ['Google Fact Check Tools', 'Retraction Watch', 'critical reviews', 'benchmark failures', 'negative results'],
|
|
112
|
+
query_templates: ['"<topic>" critique failure limitation', '"<topic>" fact check retraction', '"<topic>" counterevidence null result']
|
|
113
|
+
}
|
|
114
|
+
]);
|
|
115
|
+
|
|
116
|
+
export const RESEARCH_SOURCE_LAYER_IDS = Object.freeze(RESEARCH_SOURCE_LAYERS.map((layer) => layer.id));
|
|
117
|
+
|
|
43
118
|
export function createResearchPlan(prompt, opts = {}) {
|
|
44
119
|
const depth = opts.depth || 'frontier';
|
|
45
120
|
return {
|
|
@@ -49,6 +124,11 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
49
124
|
created_at: nowIso(),
|
|
50
125
|
methodology: 'genius-scout-council-frontier-discovery-loop',
|
|
51
126
|
objective: 'Find the shortest useful mechanism that can be falsified or applied, grounded in maximum available source retrieval rather than broad summary.',
|
|
127
|
+
execution_policy: {
|
|
128
|
+
normal_run: 'real_long_running_research',
|
|
129
|
+
default_cycle_timeout_minutes: 120,
|
|
130
|
+
mock_policy: '--mock is for selftests and dry harness checks only; normal Research must block rather than silently substitute mock output.'
|
|
131
|
+
},
|
|
52
132
|
outcome_rubric: OUTCOME_RUBRIC,
|
|
53
133
|
research_council: {
|
|
54
134
|
mode: 'persona_inspired_scouts_not_impersonation',
|
|
@@ -76,8 +156,8 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
76
156
|
]
|
|
77
157
|
},
|
|
78
158
|
web_research_policy: {
|
|
79
|
-
mode: '
|
|
80
|
-
requirement: 'Use
|
|
159
|
+
mode: 'layered_source_retrieval_and_triangulation',
|
|
160
|
+
requirement: 'Use every safely available public web/source route before synthesis, separated into source layers so the final claim is not dominated by one corpus or platform.',
|
|
81
161
|
query_sets: [
|
|
82
162
|
'first-principles and theory sources',
|
|
83
163
|
'plain-language explanations and empirical examples',
|
|
@@ -85,9 +165,18 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
85
165
|
'systems, strategy, scaling, or deployment evidence',
|
|
86
166
|
'counterevidence, failures, critiques, and null results'
|
|
87
167
|
],
|
|
168
|
+
source_layers: RESEARCH_SOURCE_LAYERS,
|
|
88
169
|
source_priority: ['primary_sources', 'official_docs_or_standards', 'peer_reviewed_or_archival_sources', 'reputable_recent_sources', 'credible_counterevidence'],
|
|
170
|
+
skill_creator: {
|
|
171
|
+
artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
|
|
172
|
+
status: 'route_local_candidate',
|
|
173
|
+
rule: 'Before source gathering, create a route-local source collection skill that names the selected layers, query families, source-quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during a research run.'
|
|
174
|
+
},
|
|
89
175
|
citation_rules: [
|
|
90
176
|
'Every factual claim in the report must cite source-ledger ids or local project evidence.',
|
|
177
|
+
'The final research paper must include references tied to source-ledger ids.',
|
|
178
|
+
'Every required source layer must have at least one cited source or an explicit blocker; blockers keep the research gate unpassed.',
|
|
179
|
+
'The source-ledger must include at least one cross-layer triangulation check comparing formal, current, discourse, practitioner, official, and counterevidence sources.',
|
|
91
180
|
'Every novelty-ledger entry must cite at least one evidence source and at least one falsifier.',
|
|
92
181
|
'If live web search is unavailable, record the blocker in source-ledger.json and keep research-gate.json unpassed.'
|
|
93
182
|
],
|
|
@@ -95,7 +184,9 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
95
184
|
independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
|
|
96
185
|
web_search_passes: 1,
|
|
97
186
|
source_entries: 1,
|
|
98
|
-
|
|
187
|
+
source_layers: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
188
|
+
counterevidence_sources: 1,
|
|
189
|
+
triangulation_checks: 1
|
|
99
190
|
}
|
|
100
191
|
},
|
|
101
192
|
rules: [
|
|
@@ -104,22 +195,31 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
104
195
|
'Run the genius-lens scout council independently before synthesis.',
|
|
105
196
|
'Every Research scout must run at reasoning_effort=xhigh, record one literal "Eureka!" idea, and participate in the debate.',
|
|
106
197
|
'The scout council must debate vigorously but stay evidence-bound; record challenges and responses in debate-ledger.json.',
|
|
107
|
-
'Maximize safe web/source search and record queries,
|
|
198
|
+
'Maximize safe web/source search as layered source retrieval and record queries, source layers, citations, quality notes, triangulation checks, and blockers in source-ledger.json.',
|
|
199
|
+
`Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis; do not edit generated .agents/skills during the research run.`,
|
|
108
200
|
'Actively seek disconfirming evidence before synthesis.',
|
|
201
|
+
'Turn the surviving research result into research-paper.md with paper-style sections and references.',
|
|
202
|
+
`End every run with ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}, summarizing each genius-lens scout's final opinion, strongest evidence, disagreement, and changed mind.`,
|
|
109
203
|
'Keep unsupported source-free claims as hypotheses only.',
|
|
110
|
-
'Prefer the smallest testable mechanism or implementation probe
|
|
204
|
+
'Prefer the smallest testable mechanism or implementation probe, but do not stop source gathering early for speed when the research question needs a longer pass.',
|
|
111
205
|
'Do not ask the user mid-run; resolve scope using the research plan and safety policy.'
|
|
112
206
|
],
|
|
113
207
|
phases: [
|
|
114
208
|
{ id: 'R0_FRAME', goal: 'Frame the target outcome, constraints, and what would make the idea useful.' },
|
|
115
|
-
{ id: '
|
|
116
|
-
{ id: '
|
|
117
|
-
{ id: '
|
|
118
|
-
{ id: '
|
|
119
|
-
{ id: '
|
|
209
|
+
{ id: 'R1_SOURCE_SKILL', goal: `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with layer-specific search routes, quality fields, and blockers before source gathering.` },
|
|
210
|
+
{ id: 'R2_SOURCE_SEARCH', goal: 'Run layered web/source retrieval across papers, official data, standards, news, public discourse, developer knowledge, and counterevidence.' },
|
|
211
|
+
{ id: 'R3_EUREKA', goal: 'Have each xhigh genius-lens scout shout Eureka! and record one non-obvious idea with source ids.' },
|
|
212
|
+
{ id: 'R4_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding.' },
|
|
213
|
+
{ id: 'R5_FALSIFY', goal: 'Attack each mechanism with counterexamples, missing evidence, source conflicts, and failure modes.' },
|
|
214
|
+
{ id: 'R6_APPLY', goal: 'Keep the smallest surviving mechanism, define a cheap probe, and write all ledgers.' },
|
|
215
|
+
{ id: 'R7_PAPER', goal: 'Convert the final research result into a concise paper manuscript with abstract, method, findings, limitations, and references.' },
|
|
216
|
+
{ id: 'R8_GENIUS_SUMMARY', goal: `Write ${RESEARCH_GENIUS_SUMMARY_ARTIFACT} so the final answer can report every scout lens opinion and the council consensus.` }
|
|
120
217
|
],
|
|
121
218
|
required_artifacts: [
|
|
122
219
|
'research-report.md',
|
|
220
|
+
RESEARCH_PAPER_ARTIFACT,
|
|
221
|
+
RESEARCH_GENIUS_SUMMARY_ARTIFACT,
|
|
222
|
+
RESEARCH_SOURCE_SKILL_ARTIFACT,
|
|
123
223
|
'source-ledger.json',
|
|
124
224
|
'scout-ledger.json',
|
|
125
225
|
'debate-ledger.json',
|
|
@@ -137,6 +237,10 @@ export function researchPlanMarkdown(plan) {
|
|
|
137
237
|
lines.push(`Prompt: ${plan.prompt}`);
|
|
138
238
|
lines.push(`Depth: ${plan.depth}`);
|
|
139
239
|
lines.push(`Methodology: ${plan.methodology}`);
|
|
240
|
+
if (plan.execution_policy) {
|
|
241
|
+
lines.push(`Execution: ${plan.execution_policy.normal_run}; default cycle timeout ${plan.execution_policy.default_cycle_timeout_minutes} minutes`);
|
|
242
|
+
lines.push(`Mock policy: ${plan.execution_policy.mock_policy}`);
|
|
243
|
+
}
|
|
140
244
|
lines.push('');
|
|
141
245
|
lines.push('## Rules');
|
|
142
246
|
for (const rule of plan.rules) lines.push(`- ${rule}`);
|
|
@@ -152,6 +256,10 @@ export function researchPlanMarkdown(plan) {
|
|
|
152
256
|
lines.push(`Mode: ${plan.web_research_policy.mode}`);
|
|
153
257
|
lines.push(`Requirement: ${plan.web_research_policy.requirement}`);
|
|
154
258
|
for (const querySet of plan.web_research_policy.query_sets || []) lines.push(`- query set: ${querySet}`);
|
|
259
|
+
if (plan.web_research_policy.skill_creator?.artifact) lines.push(`- source skill artifact: ${plan.web_research_policy.skill_creator.artifact}`);
|
|
260
|
+
for (const layer of plan.web_research_policy.source_layers || []) {
|
|
261
|
+
lines.push(`- layer ${layer.id}: ${layer.purpose}`);
|
|
262
|
+
}
|
|
155
263
|
lines.push('');
|
|
156
264
|
}
|
|
157
265
|
lines.push('## Outcome Rubric');
|
|
@@ -166,10 +274,56 @@ export function researchPlanMarkdown(plan) {
|
|
|
166
274
|
return `${lines.join('\n')}\n`;
|
|
167
275
|
}
|
|
168
276
|
|
|
277
|
+
export function researchSourceSkillMarkdown(plan) {
|
|
278
|
+
const layers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
|
|
279
|
+
const lines = [];
|
|
280
|
+
lines.push('# Research Source Layer Skill');
|
|
281
|
+
lines.push('');
|
|
282
|
+
lines.push('Status: route-local candidate skill. Use it inside this research mission before scout synthesis. Do not install or edit generated .agents/skills from this artifact.');
|
|
283
|
+
lines.push('Real-run policy: collect live sources for as long as needed within the mission timeout; mock or fixture evidence is valid only for explicit --mock selftests.');
|
|
284
|
+
lines.push('');
|
|
285
|
+
lines.push('## Trigger');
|
|
286
|
+
lines.push('- Any `$Research` run that must collect broad public evidence before creative synthesis, debate, falsification, or paper writing.');
|
|
287
|
+
lines.push('');
|
|
288
|
+
lines.push('## Source Layers');
|
|
289
|
+
for (const layer of layers) {
|
|
290
|
+
lines.push(`- ${layer.id}: ${layer.purpose}`);
|
|
291
|
+
lines.push(` Examples: ${(layer.examples || []).join(', ')}`);
|
|
292
|
+
lines.push(` Query templates: ${(layer.query_templates || []).join(' | ')}`);
|
|
293
|
+
}
|
|
294
|
+
lines.push('');
|
|
295
|
+
lines.push('## Output Contract');
|
|
296
|
+
lines.push('- Fill source-ledger.json with `source_layers`, `sources[].layer`, `counterevidence_sources[].layer`, `citation_coverage`, `triangulation.cross_layer_checks`, and `blockers`.');
|
|
297
|
+
lines.push('- Each source entry should record title, locator/URL, publisher or author when known, published_at when known, accessed_at, layer, reliability, credibility, stance, supports or undermines, and notes.');
|
|
298
|
+
lines.push('- Public discourse sources such as X/Twitter or Reddit are signals and edge cases, not truth. They must be triangulated with formal, official, practitioner, or counterevidence layers.');
|
|
299
|
+
lines.push('- If a layer cannot be searched with the available runtime or credentials, record the blocker and keep research-gate.json unpassed.');
|
|
300
|
+
lines.push('');
|
|
301
|
+
lines.push('## Debate Use');
|
|
302
|
+
lines.push('- Every scout must cite source-ledger ids in findings and Eureka ideas.');
|
|
303
|
+
lines.push('- The skeptic lens must challenge the strongest claim using counterevidence or source-quality downgrades.');
|
|
304
|
+
lines.push('- Synthesis keeps only claims that survive cross-layer triangulation and falsification.');
|
|
305
|
+
lines.push('');
|
|
306
|
+
return `${lines.join('\n')}\n`;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export function countResearchPaperSections(text = '') {
|
|
310
|
+
const headings = String(text || '').toLowerCase().split(/\n/).filter((line) => /^#{1,3}\s+/.test(line));
|
|
311
|
+
return RESEARCH_PAPER_SECTION_GROUPS.filter((group) => headings.some((heading) => group.some((term) => heading.includes(term)))).length;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
export function countGeniusOpinionSummaries(text = '') {
|
|
315
|
+
const lower = String(text || '').toLowerCase();
|
|
316
|
+
return RESEARCH_SCOUT_COUNCIL.filter((scout) => {
|
|
317
|
+
const label = String(scout.label || '').toLowerCase();
|
|
318
|
+
return lower.includes(String(scout.id || '').toLowerCase()) || (label && lower.includes(label));
|
|
319
|
+
}).length;
|
|
320
|
+
}
|
|
321
|
+
|
|
169
322
|
export async function writeResearchPlan(dir, prompt, opts = {}) {
|
|
170
323
|
const plan = createResearchPlan(prompt, opts);
|
|
171
324
|
await writeJsonAtomic(path.join(dir, 'research-plan.json'), plan);
|
|
172
325
|
await writeTextAtomic(path.join(dir, 'research-plan.md'), researchPlanMarkdown(plan));
|
|
326
|
+
await writeTextAtomic(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), researchSourceSkillMarkdown(plan));
|
|
173
327
|
await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), {
|
|
174
328
|
schema_version: 1,
|
|
175
329
|
entries: [],
|
|
@@ -189,14 +343,46 @@ export async function writeResearchPlan(dir, prompt, opts = {}) {
|
|
|
189
343
|
}
|
|
190
344
|
|
|
191
345
|
export function defaultSourceLedger(plan = null) {
|
|
346
|
+
const sourceLayers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
|
|
192
347
|
return {
|
|
193
348
|
schema_version: 1,
|
|
194
|
-
policy: plan?.web_research_policy?.mode || '
|
|
349
|
+
policy: plan?.web_research_policy?.mode || 'layered_source_retrieval_and_triangulation',
|
|
195
350
|
created_at: nowIso(),
|
|
351
|
+
source_layer_skill: {
|
|
352
|
+
artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
|
|
353
|
+
status: 'planned'
|
|
354
|
+
},
|
|
196
355
|
web_search_passes: 0,
|
|
356
|
+
source_layers: sourceLayers.map((layer) => ({
|
|
357
|
+
id: layer.id,
|
|
358
|
+
label: layer.label,
|
|
359
|
+
required: true,
|
|
360
|
+
status: 'pending',
|
|
361
|
+
evidence_role: layer.evidence_role,
|
|
362
|
+
query_templates: layer.query_templates || [],
|
|
363
|
+
source_ids: [],
|
|
364
|
+
counterevidence_ids: [],
|
|
365
|
+
blocker: null,
|
|
366
|
+
notes: ''
|
|
367
|
+
})),
|
|
368
|
+
layer_coverage: {
|
|
369
|
+
required: sourceLayers.map((layer) => layer.id),
|
|
370
|
+
covered: [],
|
|
371
|
+
missing: sourceLayers.map((layer) => layer.id),
|
|
372
|
+
notes: []
|
|
373
|
+
},
|
|
197
374
|
queries: [],
|
|
198
375
|
sources: [],
|
|
199
376
|
counterevidence_sources: [],
|
|
377
|
+
triangulation: {
|
|
378
|
+
cross_layer_checks: [],
|
|
379
|
+
conflicts: [],
|
|
380
|
+
synthesis_notes: []
|
|
381
|
+
},
|
|
382
|
+
quality_model: {
|
|
383
|
+
reporting_basis: 'Record enough source metadata to make search reproducible, including query, layer, locator, publisher or author, publication date when known, accessed_at, reliability, credibility, stance, and cited claim ids.',
|
|
384
|
+
source_quality_fields: ['layer', 'kind', 'title', 'locator', 'publisher_or_author', 'published_at', 'accessed_at', 'reliability', 'credibility', 'stance', 'supports', 'undermines']
|
|
385
|
+
},
|
|
200
386
|
citation_coverage: {
|
|
201
387
|
all_key_claims_cited: false,
|
|
202
388
|
notes: []
|
|
@@ -261,18 +447,53 @@ export function defaultFalsificationLedger() {
|
|
|
261
447
|
};
|
|
262
448
|
}
|
|
263
449
|
|
|
450
|
+
function sourceLayerIdsForPlan(plan = null) {
|
|
451
|
+
const layers = plan?.web_research_policy?.source_layers?.length ? plan.web_research_policy.source_layers : RESEARCH_SOURCE_LAYERS;
|
|
452
|
+
return layers.map((layer) => layer.id).filter(Boolean);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
function sourceLayerCoverageStats(sourceLedger = null, requiredLayerIds = RESEARCH_SOURCE_LAYER_IDS) {
|
|
456
|
+
const covered = new Set();
|
|
457
|
+
const sourceRows = [
|
|
458
|
+
...(Array.isArray(sourceLedger?.sources) ? sourceLedger.sources : []),
|
|
459
|
+
...(Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources : [])
|
|
460
|
+
];
|
|
461
|
+
for (const source of sourceRows) {
|
|
462
|
+
const layer = source?.layer || source?.layer_id || source?.source_layer;
|
|
463
|
+
if (requiredLayerIds.includes(layer)) covered.add(layer);
|
|
464
|
+
}
|
|
465
|
+
for (const layer of Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : []) {
|
|
466
|
+
const id = layer?.id || layer?.layer;
|
|
467
|
+
const sourceIds = [
|
|
468
|
+
...(Array.isArray(layer?.source_ids) ? layer.source_ids : []),
|
|
469
|
+
...(Array.isArray(layer?.counterevidence_ids) ? layer.counterevidence_ids : [])
|
|
470
|
+
];
|
|
471
|
+
if (requiredLayerIds.includes(id) && layer?.status === 'covered' && sourceIds.length > 0) covered.add(id);
|
|
472
|
+
}
|
|
473
|
+
const missing = requiredLayerIds.filter((id) => !covered.has(id));
|
|
474
|
+
return { covered: [...covered], missing, required: [...requiredLayerIds] };
|
|
475
|
+
}
|
|
476
|
+
|
|
264
477
|
export function defaultResearchGate() {
|
|
265
478
|
return {
|
|
266
479
|
passed: false,
|
|
267
480
|
report_present: false,
|
|
481
|
+
paper_present: false,
|
|
482
|
+
paper_sections: 0,
|
|
483
|
+
genius_opinion_summary_present: false,
|
|
484
|
+
genius_opinion_summaries: 0,
|
|
485
|
+
research_source_skill_present: false,
|
|
268
486
|
source_ledger_present: false,
|
|
269
487
|
scout_ledger_present: false,
|
|
270
488
|
debate_ledger_present: false,
|
|
271
489
|
novelty_ledger_present: false,
|
|
272
490
|
falsification_ledger_present: false,
|
|
273
|
-
web_search_policy: '
|
|
491
|
+
web_search_policy: 'layered_source_retrieval_and_triangulation',
|
|
274
492
|
web_search_passes: 0,
|
|
275
493
|
source_entries: 0,
|
|
494
|
+
source_layers_required: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
495
|
+
source_layers_covered: 0,
|
|
496
|
+
triangulation_checks: 0,
|
|
276
497
|
independent_scouts: 0,
|
|
277
498
|
xhigh_scouts: 0,
|
|
278
499
|
eureka_moments: 0,
|
|
@@ -295,7 +516,13 @@ export function defaultResearchGate() {
|
|
|
295
516
|
|
|
296
517
|
export async function evaluateResearchGate(dir) {
|
|
297
518
|
const gate = await readJson(path.join(dir, 'research-gate.json'), defaultResearchGate());
|
|
519
|
+
const plan = await readJson(path.join(dir, 'research-plan.json'), null);
|
|
298
520
|
const reportPresent = await exists(path.join(dir, 'research-report.md'));
|
|
521
|
+
const paperPresent = await exists(path.join(dir, RESEARCH_PAPER_ARTIFACT));
|
|
522
|
+
const paperSections = paperPresent ? countResearchPaperSections(await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '')) : 0;
|
|
523
|
+
const geniusSummaryPresent = await exists(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT));
|
|
524
|
+
const geniusSummaryCount = geniusSummaryPresent ? countGeniusOpinionSummaries(await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '')) : 0;
|
|
525
|
+
const sourceSkillPresent = await exists(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT));
|
|
299
526
|
const sourcePresent = await exists(path.join(dir, 'source-ledger.json'));
|
|
300
527
|
const scoutPresent = await exists(path.join(dir, 'scout-ledger.json'));
|
|
301
528
|
const debatePresent = await exists(path.join(dir, 'debate-ledger.json'));
|
|
@@ -308,6 +535,9 @@ export async function evaluateResearchGate(dir) {
|
|
|
308
535
|
const sourceEntries = Array.isArray(sourceLedger?.sources) ? sourceLedger.sources.length : 0;
|
|
309
536
|
const counterEvidenceEntries = Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources.length : 0;
|
|
310
537
|
const webSearchPasses = Math.max(Number(gate.web_search_passes || 0), Number(sourceLedger?.web_search_passes || 0));
|
|
538
|
+
const requiredSourceLayers = sourceLayerIdsForPlan(plan);
|
|
539
|
+
const sourceLayerStats = sourceLayerCoverageStats(sourceLedger, requiredSourceLayers);
|
|
540
|
+
const triangulationChecks = Array.isArray(sourceLedger?.triangulation?.cross_layer_checks) ? sourceLedger.triangulation.cross_layer_checks.length : 0;
|
|
311
541
|
const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
|
|
312
542
|
const independentScouts = scoutRows.filter((scout) => Array.isArray(scout.findings) && scout.findings.length > 0).length;
|
|
313
543
|
const xhighScouts = scoutRows.filter((scout) => scout.effort === 'xhigh').length;
|
|
@@ -324,6 +554,11 @@ export async function evaluateResearchGate(dir) {
|
|
|
324
554
|
const citationCoverage = gate.citation_coverage === true || sourceLedger?.citation_coverage?.all_key_claims_cited === true;
|
|
325
555
|
const reasons = [];
|
|
326
556
|
if (!reportPresent && gate.report_present !== true) reasons.push('research_report_missing');
|
|
557
|
+
if (!paperPresent) reasons.push('research_paper_missing');
|
|
558
|
+
if (paperSections < RESEARCH_PAPER_SECTION_GROUPS.length) reasons.push('research_paper_sections_missing');
|
|
559
|
+
if (!geniusSummaryPresent && gate.genius_opinion_summary_present !== true) reasons.push('genius_opinion_summary_missing');
|
|
560
|
+
if (Math.max(Number(gate.genius_opinion_summaries || 0), geniusSummaryCount) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('genius_opinion_summary_incomplete');
|
|
561
|
+
if (!sourceSkillPresent && gate.research_source_skill_present !== true) reasons.push('research_source_skill_missing');
|
|
327
562
|
if (!sourcePresent && gate.source_ledger_present !== true) reasons.push('source_ledger_missing');
|
|
328
563
|
if (!scoutPresent && gate.scout_ledger_present !== true) reasons.push('scout_ledger_missing');
|
|
329
564
|
if (!debatePresent && gate.debate_ledger_present !== true) reasons.push('debate_ledger_missing');
|
|
@@ -331,6 +566,8 @@ export async function evaluateResearchGate(dir) {
|
|
|
331
566
|
if (!falsificationPresent && gate.falsification_ledger_present !== true) reasons.push('falsification_ledger_missing');
|
|
332
567
|
if (webSearchPasses < 1) reasons.push('web_search_pass_missing');
|
|
333
568
|
if (Math.max(Number(gate.source_entries || 0), sourceEntries) < 1) reasons.push('source_entry_missing');
|
|
569
|
+
if (Math.max(Number(gate.source_layers_covered || 0), sourceLayerStats.covered.length) < requiredSourceLayers.length) reasons.push('source_layer_coverage_missing');
|
|
570
|
+
if (Math.max(Number(gate.triangulation_checks || 0), triangulationChecks) < 1) reasons.push('cross_layer_triangulation_missing');
|
|
334
571
|
if (Math.max(Number(gate.independent_scouts || 0), independentScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('independent_scouts_missing');
|
|
335
572
|
if (Math.max(Number(gate.xhigh_scouts || 0), xhighScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('scout_effort_not_xhigh');
|
|
336
573
|
if (Math.max(Number(gate.eureka_moments || 0), eurekaMoments) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('eureka_missing');
|
|
@@ -352,7 +589,15 @@ export async function evaluateResearchGate(dir) {
|
|
|
352
589
|
reasons,
|
|
353
590
|
metrics: {
|
|
354
591
|
web_search_passes: webSearchPasses,
|
|
592
|
+
paper_sections: Math.max(Number(gate.paper_sections || 0), paperSections),
|
|
593
|
+
genius_opinion_summary_present: geniusSummaryPresent || gate.genius_opinion_summary_present === true,
|
|
594
|
+
genius_opinion_summaries: Math.max(Number(gate.genius_opinion_summaries || 0), geniusSummaryCount),
|
|
595
|
+
research_source_skill_present: sourceSkillPresent || gate.research_source_skill_present === true,
|
|
355
596
|
source_entries: Math.max(Number(gate.source_entries || 0), sourceEntries),
|
|
597
|
+
source_layers_required: requiredSourceLayers.length,
|
|
598
|
+
source_layers_covered: Math.max(Number(gate.source_layers_covered || 0), sourceLayerStats.covered.length),
|
|
599
|
+
source_layers_missing: sourceLayerStats.missing,
|
|
600
|
+
triangulation_checks: Math.max(Number(gate.triangulation_checks || 0), triangulationChecks),
|
|
356
601
|
independent_scouts: Math.max(Number(gate.independent_scouts || 0), independentScouts),
|
|
357
602
|
xhigh_scouts: Math.max(Number(gate.xhigh_scouts || 0), xhighScouts),
|
|
358
603
|
eureka_moments: Math.max(Number(gate.eureka_moments || 0), eurekaMoments),
|
|
@@ -371,41 +616,89 @@ export async function evaluateResearchGate(dir) {
|
|
|
371
616
|
}
|
|
372
617
|
|
|
373
618
|
export async function writeMockResearchResult(dir, plan) {
|
|
619
|
+
const mockLayerSources = RESEARCH_SOURCE_LAYERS.map((layer, index) => ({
|
|
620
|
+
id: `mock-source-${index + 1}`,
|
|
621
|
+
layer: layer.id,
|
|
622
|
+
kind: 'selftest',
|
|
623
|
+
title: `Mock ${layer.label} coverage`,
|
|
624
|
+
locator: 'writeMockResearchResult',
|
|
625
|
+
accessed_at: nowIso(),
|
|
626
|
+
reliability: 'mock',
|
|
627
|
+
credibility: 'mock',
|
|
628
|
+
stance: layer.id === 'counterevidence_factcheck' ? 'undermines' : 'supports',
|
|
629
|
+
supports: layer.id === 'counterevidence_factcheck' ? [] : ['mock-insight-1'],
|
|
630
|
+
undermines: layer.id === 'counterevidence_factcheck' ? ['mock-insight-1'] : [],
|
|
631
|
+
notes: `Selftest fixture for the ${layer.id} source layer; no live web call is made in --mock mode.`
|
|
632
|
+
}));
|
|
374
633
|
const sourceLedger = {
|
|
375
634
|
schema_version: 1,
|
|
376
|
-
policy: '
|
|
635
|
+
policy: 'layered_source_retrieval_and_triangulation',
|
|
377
636
|
created_at: nowIso(),
|
|
378
637
|
mode: 'selftest_mock',
|
|
638
|
+
source_layer_skill: {
|
|
639
|
+
artifact: RESEARCH_SOURCE_SKILL_ARTIFACT,
|
|
640
|
+
status: 'created'
|
|
641
|
+
},
|
|
379
642
|
web_search_passes: 1,
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
643
|
+
source_layers: RESEARCH_SOURCE_LAYERS.map((layer, index) => ({
|
|
644
|
+
id: layer.id,
|
|
645
|
+
label: layer.label,
|
|
646
|
+
required: true,
|
|
647
|
+
status: 'covered',
|
|
648
|
+
evidence_role: layer.evidence_role,
|
|
649
|
+
query_templates: layer.query_templates || [],
|
|
650
|
+
source_ids: [`mock-source-${index + 1}`],
|
|
651
|
+
counterevidence_ids: layer.id === 'counterevidence_factcheck' ? ['mock-counter-1'] : [],
|
|
652
|
+
blocker: null,
|
|
653
|
+
notes: 'Mock mode records layer coverage without live web access.'
|
|
654
|
+
})),
|
|
655
|
+
layer_coverage: {
|
|
656
|
+
required: [...RESEARCH_SOURCE_LAYER_IDS],
|
|
657
|
+
covered: [...RESEARCH_SOURCE_LAYER_IDS],
|
|
658
|
+
missing: [],
|
|
659
|
+
notes: ['mock fixture covers every research source layer']
|
|
660
|
+
},
|
|
661
|
+
queries: RESEARCH_SOURCE_LAYERS.map((layer) => ({
|
|
662
|
+
scout_id: layer.id === 'counterevidence_factcheck' ? 'skeptic' : null,
|
|
663
|
+
layer: layer.id,
|
|
664
|
+
query: `mock ${layer.id} layered research source search for ${plan.prompt}`,
|
|
665
|
+
status: 'mocked'
|
|
666
|
+
})),
|
|
667
|
+
sources: mockLayerSources,
|
|
398
668
|
counterevidence_sources: [
|
|
399
669
|
{
|
|
400
670
|
id: 'mock-counter-1',
|
|
671
|
+
layer: 'counterevidence_factcheck',
|
|
401
672
|
kind: 'selftest',
|
|
402
673
|
title: 'Mock overclaim counterexample',
|
|
403
674
|
locator: 'writeMockResearchResult',
|
|
404
675
|
accessed_at: nowIso(),
|
|
676
|
+
reliability: 'mock',
|
|
677
|
+
credibility: 'mock',
|
|
678
|
+
stance: 'undermines',
|
|
405
679
|
undermines: ['mock-insight-1'],
|
|
406
680
|
notes: 'Shows the gate must fail if a run produces no tests or falsifiers.'
|
|
407
681
|
}
|
|
408
682
|
],
|
|
683
|
+
triangulation: {
|
|
684
|
+
cross_layer_checks: [
|
|
685
|
+
{
|
|
686
|
+
id: 'mock-triangulation-1',
|
|
687
|
+
claim: 'Research Mode should not synthesize from a single corpus.',
|
|
688
|
+
source_ids: ['mock-source-1', 'mock-source-2', 'mock-source-5', 'mock-counter-1'],
|
|
689
|
+
result: 'survives_with_layered_evidence_requirement'
|
|
690
|
+
},
|
|
691
|
+
{
|
|
692
|
+
id: 'mock-triangulation-2',
|
|
693
|
+
claim: 'Public discourse is useful only when checked against formal and official layers.',
|
|
694
|
+
source_ids: ['mock-source-1', 'mock-source-2', 'mock-source-5', 'mock-source-6'],
|
|
695
|
+
result: 'downgrade_popularity_to_signal_not_truth'
|
|
696
|
+
}
|
|
697
|
+
],
|
|
698
|
+
conflicts: [],
|
|
699
|
+
synthesis_notes: ['mock fixture requires cross-layer checks before synthesis']
|
|
700
|
+
},
|
|
701
|
+
quality_model: defaultSourceLedger(plan).quality_model,
|
|
409
702
|
citation_coverage: {
|
|
410
703
|
all_key_claims_cited: true,
|
|
411
704
|
notes: ['mock report and novelty entry cite mock-source-1 and mock-counter-1']
|
|
@@ -493,23 +786,51 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
493
786
|
}
|
|
494
787
|
]
|
|
495
788
|
};
|
|
789
|
+
const geniusSummary = [
|
|
790
|
+
'# Genius Opinion Summary',
|
|
791
|
+
'',
|
|
792
|
+
`Prompt: ${plan.prompt}`,
|
|
793
|
+
'',
|
|
794
|
+
'## Scout Opinions',
|
|
795
|
+
...RESEARCH_SCOUT_COUNCIL.flatMap((scout) => [
|
|
796
|
+
`### ${scout.label} (${scout.id})`,
|
|
797
|
+
`Final opinion: ${scout.label} wants the run to preserve ${scout.mandate.toLowerCase()} while producing a cited, falsifiable insight.`,
|
|
798
|
+
'Strongest evidence: mock-source-1 plus the layered source ledger.',
|
|
799
|
+
'Main disagreement: whether formal structure or cheap empirical probes should dominate the first pass.',
|
|
800
|
+
'Changed mind: accepted that citation coverage, counterevidence, and triangulation are gates before synthesis.',
|
|
801
|
+
''
|
|
802
|
+
]),
|
|
803
|
+
'## Council Consensus',
|
|
804
|
+
'The council keeps one modest, testable claim: Research Mode is useful when it writes a source-cited paper, records every scout opinion, triangulates across source layers, and exposes the next decisive test.'
|
|
805
|
+
].join('\n');
|
|
806
|
+
await writeTextAtomic(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), researchSourceSkillMarkdown(plan));
|
|
496
807
|
await writeJsonAtomic(path.join(dir, 'source-ledger.json'), sourceLedger);
|
|
497
808
|
await writeJsonAtomic(path.join(dir, 'scout-ledger.json'), scoutLedger);
|
|
498
809
|
await writeJsonAtomic(path.join(dir, 'debate-ledger.json'), debateLedger);
|
|
499
810
|
await writeJsonAtomic(path.join(dir, 'falsification-ledger.json'), falsificationLedger);
|
|
500
811
|
await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), ledger);
|
|
501
|
-
await writeTextAtomic(path.join(dir,
|
|
812
|
+
await writeTextAtomic(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), `${geniusSummary}\n`);
|
|
813
|
+
await writeTextAtomic(path.join(dir, 'research-report.md'), `# SKS Research Report\n\nPrompt: ${plan.prompt}\n\n## Scout Council Synthesis\n\nThe mock council keeps one cited methodological insight: a research mode should force layered, falsifiable novelty rather than summarize known material from one corpus [mock-source-1].\n\n## Source Coverage\n\nThis is a selftest fixture. It records mock coverage for academic literature, official data, standards, news, public discourse, developer knowledge, and counterevidence layers, but does not perform live web browsing in --mock mode.\n\n## Candidate Insight\n\nA useful research run must produce source-cited, cross-layer triangulated, falsifiable novelty with scout findings and a cheap probe.\n\n## Falsification\n\nThe claim is weak if no new testable prediction, counterevidence source, cross-layer check, or experiment is produced [mock-counter-1].\n\n## Next Test\n\nCompare this mode against a summary-only run and score candidate insights, falsification passes, citation coverage, source-layer coverage, triangulation checks, and testability.\n`);
|
|
814
|
+
await writeTextAtomic(path.join(dir, RESEARCH_PAPER_ARTIFACT), `# Research Paper: ${plan.prompt}\n\n## Abstract\nA source-cited research run should produce cross-layer, falsifiable novelty rather than only summarize known material.\n\n## Introduction\nThe mock topic is evaluated as a research workflow outcome with layered source coverage [mock-source-1].\n\n## Methodology\nFive xhigh scouts produce Eureka ideas, debate, triangulate source layers, and falsify the strongest claim.\n\n## Findings\nThe surviving finding is that useful research needs cited novelty, source-layer coverage, cross-layer triangulation, and a cheap decisive probe.\n\n## Discussion\nThe debate favors gate-backed evidence over narrative confidence, and treats public discourse as signal rather than truth.\n\n## Limitations and Falsification\nThe claim fails without sources, counterevidence, triangulation checks, or testable predictions [mock-counter-1].\n\n## Conclusion and Next Experiment\nCompare this loop against a summary-only baseline and score testable insights.\n\n## References\n- [mock-source-1] Mock academic literature coverage.\n- [mock-source-2] Mock official government and leading-institution knowledge coverage.\n- [mock-source-3] Mock standards and primary documents coverage.\n- [mock-source-4] Mock current news and global reporting coverage.\n- [mock-source-5] Mock public discourse coverage.\n- [mock-source-6] Mock developer and practitioner knowledge coverage.\n- [mock-source-7] Mock counterevidence and fact-checking coverage.\n- [mock-counter-1] Mock overclaim counterexample.\n`);
|
|
502
815
|
await writeJsonAtomic(path.join(dir, 'research-gate.json'), {
|
|
503
816
|
...defaultResearchGate(),
|
|
504
817
|
passed: true,
|
|
505
818
|
report_present: true,
|
|
819
|
+
paper_present: true,
|
|
820
|
+
paper_sections: RESEARCH_PAPER_SECTION_GROUPS.length,
|
|
821
|
+
genius_opinion_summary_present: true,
|
|
822
|
+
genius_opinion_summaries: RESEARCH_SCOUT_COUNCIL.length,
|
|
823
|
+
research_source_skill_present: true,
|
|
506
824
|
source_ledger_present: true,
|
|
507
825
|
scout_ledger_present: true,
|
|
508
826
|
debate_ledger_present: true,
|
|
509
827
|
novelty_ledger_present: true,
|
|
510
828
|
falsification_ledger_present: true,
|
|
511
829
|
web_search_passes: 1,
|
|
512
|
-
source_entries:
|
|
830
|
+
source_entries: mockLayerSources.length,
|
|
831
|
+
source_layers_required: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
832
|
+
source_layers_covered: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
833
|
+
triangulation_checks: sourceLedger.triangulation.cross_layer_checks.length,
|
|
513
834
|
independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
|
|
514
835
|
xhigh_scouts: RESEARCH_SCOUT_COUNCIL.length,
|
|
515
836
|
eureka_moments: RESEARCH_SCOUT_COUNCIL.length,
|
|
@@ -522,12 +843,12 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
522
843
|
falsification_cases: 1,
|
|
523
844
|
testable_predictions: 1,
|
|
524
845
|
citation_coverage: true,
|
|
525
|
-
evidence: ['mock research report', 'mock source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
|
|
846
|
+
evidence: ['mock research report', 'mock research paper', 'mock genius opinion summary', 'mock research source skill', 'mock layered source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
|
|
526
847
|
notes: ['mock mode records the new contract but does not call a model or perform live web browsing']
|
|
527
848
|
});
|
|
528
849
|
return evaluateResearchGate(dir);
|
|
529
850
|
}
|
|
530
851
|
|
|
531
852
|
export function buildResearchPrompt({ id, mission, plan, cycle, previous }) {
|
|
532
|
-
return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\
|
|
853
|
+
return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, and cheap_probes for every scout lens.\n4. Debate: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout before synthesis.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, debate, cross-layer triangulation, and falsification are recorded.\n7. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with effort, eureka, query_set, findings, source_ids, falsifiers, and cheap_probes.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, and unresolved conflicts.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, research-paper.md exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
|
|
533
854
|
}
|
package/src/core/routes.mjs
CHANGED
|
@@ -390,10 +390,10 @@ export const ROUTES = [
|
|
|
390
390
|
command: '$Research',
|
|
391
391
|
mode: 'RESEARCH',
|
|
392
392
|
route: 'research mission',
|
|
393
|
-
description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate,
|
|
394
|
-
requiredSkills: ['research', 'research-discovery', 'pipeline-runner',
|
|
395
|
-
lifecycle: ['research_plan', '
|
|
396
|
-
context7Policy: '
|
|
393
|
+
description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate, layered public source retrieval, falsification, a paper manuscript, a final genius-opinion summary, and testable predictions.',
|
|
394
|
+
requiredSkills: ['research', 'research-discovery', 'pipeline-runner', REFLECTION_SKILL_NAME, 'honest-mode'],
|
|
395
|
+
lifecycle: ['research_plan', 'source_skill', 'layered_source_ledger', 'xhigh_scout_council', 'eureka_moments', 'debate_ledger', 'report', 'paper', 'genius_opinion_summary', 'novelty_ledger', 'falsification_ledger', 'research_gate', 'post_route_reflection', 'honest_mode'],
|
|
396
|
+
context7Policy: 'if_external_docs',
|
|
397
397
|
reasoningPolicy: 'xhigh',
|
|
398
398
|
stopGate: 'research-gate.json',
|
|
399
399
|
cliEntrypoint: 'sks research prepare|run',
|
|
@@ -537,7 +537,7 @@ export const COMMAND_CATALOG = [
|
|
|
537
537
|
{ name: 'init', usage: 'sks init [--force] [--local-only] [--install-scope global|project]', description: 'Initialize the local SKS control surface.' },
|
|
538
538
|
{ name: 'selftest', usage: 'sks selftest [--mock]', description: 'Run local smoke tests without calling a model.' },
|
|
539
539
|
{ name: 'goal', usage: 'sks goal create|pause|resume|clear|status ...', description: 'Prepare and control the fast SKS bridge overlay for Codex native persisted /goal workflows.' },
|
|
540
|
-
{ name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run
|
|
540
|
+
{ name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run long-form real research missions with xhigh scout Eureka ideas, debate, layered sources, paper, novelty, and falsification gates.' },
|
|
541
541
|
{ name: 'db', usage: 'sks db policy|scan|mcp-config|classify|check ...', description: 'Inspect and enforce database/Supabase safety policy.' },
|
|
542
542
|
{ name: 'eval', usage: 'sks eval run|compare|thresholds ...', description: 'Run deterministic context-quality and performance evidence checks.' },
|
|
543
543
|
{ name: 'harness', usage: 'sks harness fixture|review [--json]', description: 'Run Harness Growth Factory fixtures for forgetting, skills, experiments, tool taxonomy, permissions, MultiAgentV2, and tmux views.' },
|
|
@@ -585,9 +585,21 @@ function leadingDollarCommandMatch(prompt) {
|
|
|
585
585
|
|| text.match(/^\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)(?:\s|:|$)/);
|
|
586
586
|
}
|
|
587
587
|
|
|
588
|
+
function embeddedDollarCommandMatch(prompt) {
|
|
589
|
+
const text = String(prompt || '');
|
|
590
|
+
const matches = [];
|
|
591
|
+
for (const match of text.matchAll(/\[\$([A-Za-z][A-Za-z0-9_-]*)\]\([^)]+\)/g)) matches.push({ index: match.index, command: match[1] });
|
|
592
|
+
for (const match of text.matchAll(/(^|[\s([{<])\$([A-Za-z][A-Za-z0-9_-]*)(?=\s|:|$|[.,!?;)\]}])/g)) matches.push({ index: match.index + match[1].length, command: match[2] });
|
|
593
|
+
return matches
|
|
594
|
+
.sort((a, b) => a.index - b.index)
|
|
595
|
+
.find((match) => routeByDollarCommand(match.command) || String(match.command || '').toUpperCase() === 'MAD-SKS') || null;
|
|
596
|
+
}
|
|
597
|
+
|
|
588
598
|
export function dollarCommand(prompt) {
|
|
589
|
-
const
|
|
590
|
-
|
|
599
|
+
const leading = leadingDollarCommandMatch(prompt);
|
|
600
|
+
if (leading) return leading[1].toUpperCase();
|
|
601
|
+
const embedded = embeddedDollarCommandMatch(prompt);
|
|
602
|
+
return embedded ? embedded.command.toUpperCase() : null;
|
|
591
603
|
}
|
|
592
604
|
|
|
593
605
|
export function hasMadSksSignal(prompt = '') {
|