sneakoscope 0.7.68 → 0.7.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/cli/main.mjs +5 -5
- package/src/cli/maintenance-commands.mjs +6 -2
- package/src/core/fsx.mjs +1 -1
- package/src/core/hooks-runtime.mjs +1 -1
- package/src/core/init.mjs +1 -1
- package/src/core/pipeline.mjs +1 -1
- package/src/core/research.mjs +35 -4
- package/src/core/routes.mjs +3 -3
package/README.md
CHANGED
|
@@ -211,7 +211,7 @@ sks skill-dream run --json
|
|
|
211
211
|
sks code-structure scan --json
|
|
212
212
|
```
|
|
213
213
|
|
|
214
|
-
`sks research` prepares a genius-lens scout council, requires every scout to run at `xhigh`, records one literal `Eureka!` idea per scout, runs an evidence-bound debate, maximizes available web/source retrieval before synthesis, and requires `source-ledger.json`, `scout-ledger.json`, `debate-ledger.json`, `novelty-ledger.json`, `falsification-ledger.json`, and `research-gate.json` so research runs stay source-backed, adversarially checked, and
|
|
214
|
+
`sks research` prepares a genius-lens scout council, requires every scout to run at `xhigh`, records one literal `Eureka!` idea per scout, runs an evidence-bound debate, maximizes available web/source retrieval before synthesis, and requires `research-report.md`, `research-paper.md`, `source-ledger.json`, `scout-ledger.json`, `debate-ledger.json`, `novelty-ledger.json`, `falsification-ledger.json`, and `research-gate.json` so research runs stay source-backed, adversarially checked, falsifiable, and paper-ready. `research status` reports source entries, counterevidence, xhigh scout count, Eureka moments, debate exchanges, paper presence/sections, scout findings, and falsification cases alongside the gate.
|
|
215
215
|
|
|
216
216
|
`sks pipeline plan` shows the active route lane, kept/skipped stages, verification commands, and no-unrequested-fallback invariant. `sks proof-field scan` is the lightweight rubric for small changes; risky or broad signals return to the full Team/Honest path.
|
|
217
217
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sneakoscope",
|
|
3
3
|
"displayName": "ㅅㅋㅅ",
|
|
4
|
-
"version": "0.7.
|
|
4
|
+
"version": "0.7.69",
|
|
5
5
|
"description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
|
package/src/cli/main.mjs
CHANGED
|
@@ -3783,14 +3783,14 @@ async function selftest() {
|
|
|
3783
3783
|
const { dir: researchDir, mission: researchMission } = await createMission(tmp, { mode: 'research', prompt: '새로운 코드 리뷰 방법론 연구' });
|
|
3784
3784
|
const researchPlan = await writeResearchPlan(researchDir, researchMission.prompt, {});
|
|
3785
3785
|
if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== 'maximum_source_retrieval') throw new Error('selftest: research plan contract');
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3786
|
+
const rArts = researchPlan.required_artifacts || [];
|
|
3787
|
+
for (const a of ['source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) if (!rArts.includes(a) || !(await exists(path.join(researchDir, a)))) throw new Error('selftest: research artifact');
|
|
3788
|
+
if (!rArts.includes('research-paper.md')) throw new Error('selftest: research paper');
|
|
3789
3789
|
const initialResearchGate = await evaluateResearchGate(researchDir);
|
|
3790
|
-
if (initialResearchGate.passed ||
|
|
3790
|
+
if (initialResearchGate.passed || ['web_search_pass_missing', 'eureka_missing', 'debate_exchanges_missing', 'research_paper_missing'].some((r) => !initialResearchGate.reasons.includes(r))) throw new Error('selftest: research gate');
|
|
3791
3791
|
const researchGate = await writeMockResearchResult(researchDir, researchPlan);
|
|
3792
3792
|
if (!researchGate.passed) throw new Error('selftest: mock research gate did not pass');
|
|
3793
|
-
if (
|
|
3793
|
+
if (['independent_scouts', 'xhigh_scouts', 'eureka_moments', 'debate_participants'].some((m) => researchGate.metrics?.[m] < 5) || researchGate.metrics?.counterevidence_sources < 1 || researchGate.metrics?.paper_sections < 8 || researchGate.metrics?.citation_coverage !== true || researchGate.metrics?.falsification_cases < 1) throw new Error('selftest: research metrics');
|
|
3794
3794
|
await writeJsonAtomic(path.join(dir, 'done-gate.json'), { passed: true, unsupported_critical_claims: 0, database_safety_violation: false, database_safety_reviewed: true, visual_drift: 'low', wiki_drift: 'low', tests_required: false });
|
|
3795
3795
|
const gate = await evaluateDoneGate(tmp, id);
|
|
3796
3796
|
if (!gate.passed) throw new Error('selftest: done gate');
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import fsp from 'node:fs/promises';
|
|
3
|
-
import { readJson, writeJsonAtomic, writeTextAtomic, appendJsonlBounded, nowIso, exists, ensureDir, packageRoot, dirSize, formatBytes, PACKAGE_VERSION, sksRoot, readStdin } from '../core/fsx.mjs';
|
|
3
|
+
import { readJson, readText, writeJsonAtomic, writeTextAtomic, appendJsonlBounded, nowIso, exists, ensureDir, packageRoot, dirSize, formatBytes, PACKAGE_VERSION, sksRoot, readStdin } from '../core/fsx.mjs';
|
|
4
4
|
import { initProject } from '../core/init.mjs';
|
|
5
5
|
import { getCodexInfo, runCodexExec } from '../core/codex-adapter.mjs';
|
|
6
6
|
import { createMission, loadMission, findLatestMission, missionDir, setCurrent, stateFile } from '../core/mission.mjs';
|
|
@@ -8,7 +8,7 @@ import { buildQuestionSchema, writeQuestions } from '../core/questions.mjs';
|
|
|
8
8
|
import { sealContract } from '../core/decision-contract.mjs';
|
|
9
9
|
import { buildQaLoopQuestionSchema, buildQaLoopPrompt, evaluateQaGate, qaStatus, writeMockQaResult, writeQaLoopArtifacts } from '../core/qa-loop.mjs';
|
|
10
10
|
import { containsUserQuestion, noQuestionContinuationReason } from '../core/no-question-guard.mjs';
|
|
11
|
-
import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
11
|
+
import { RESEARCH_PAPER_ARTIFACT, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
12
12
|
import { storageReport, enforceRetention, pruneWikiArtifacts } from '../core/retention.mjs';
|
|
13
13
|
import { evaluateDoneGate } from '../core/hproof.mjs';
|
|
14
14
|
import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
|
|
@@ -479,6 +479,7 @@ async function researchPrepare(args) {
|
|
|
479
479
|
console.log(`Methodology: ${plan.methodology}`);
|
|
480
480
|
console.log(`Plan: ${path.relative(root, path.join(dir, 'research-plan.md'))}`);
|
|
481
481
|
console.log(`Pipeline: ${path.relative(root, path.join(dir, PIPELINE_PLAN_ARTIFACT))}`);
|
|
482
|
+
console.log(`Paper: ${RESEARCH_PAPER_ARTIFACT}`);
|
|
482
483
|
console.log('Ledgers: source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json');
|
|
483
484
|
console.log(`Run: sks research run ${id} --max-cycles 3`);
|
|
484
485
|
}
|
|
@@ -557,6 +558,7 @@ async function researchStatus(args) {
|
|
|
557
558
|
const scoutLedger = await readJson(path.join(dir, 'scout-ledger.json'), null);
|
|
558
559
|
const debateLedger = await readJson(path.join(dir, 'debate-ledger.json'), null);
|
|
559
560
|
const falsificationLedger = await readJson(path.join(dir, 'falsification-ledger.json'), null);
|
|
561
|
+
const paperText = await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '');
|
|
560
562
|
const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
|
|
561
563
|
console.log(JSON.stringify({
|
|
562
564
|
mission,
|
|
@@ -569,6 +571,8 @@ async function researchStatus(args) {
|
|
|
569
571
|
eureka_moments: scoutRows.length ? scoutRows.filter((scout) => scout.eureka?.exclamation === 'Eureka!' && String(scout.eureka?.idea || '').trim()).length : null,
|
|
570
572
|
scout_findings: scoutRows.length ? scoutRows.reduce((sum, scout) => sum + (Array.isArray(scout.findings) ? scout.findings.length : 0), 0) : null,
|
|
571
573
|
debate_exchanges: debateLedger?.exchanges?.length ?? null,
|
|
574
|
+
paper_present: Boolean(paperText.trim()),
|
|
575
|
+
paper_sections: countResearchPaperSections(paperText),
|
|
572
576
|
falsification_cases: falsificationLedger?.cases?.length ?? null
|
|
573
577
|
}, null, 2));
|
|
574
578
|
}
|
package/src/core/fsx.mjs
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
|
|
8
|
-
export const PACKAGE_VERSION = '0.7.
|
|
8
|
+
export const PACKAGE_VERSION = '0.7.69';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
|
|
@@ -1006,7 +1006,7 @@ function visibleHookMessage(name, text = '') {
|
|
|
1006
1006
|
if (body.includes('Computer Use fast lane active')) return 'SKS: Computer Use fast lane injected; defer TriWiki/Honest Mode to final closeout.';
|
|
1007
1007
|
if (body.includes('MANDATORY ambiguity-removal gate') || body.includes('VISIBLE RESPONSE CONTRACT') || body.includes('Required questions still pending')) return 'SKS: stale clarification gate detected; continue from inferred route contract.';
|
|
1008
1008
|
if (body.includes('$Team route prepared') || body.includes('Team route')) return 'SKS: Team route, live transcript, and subagent plan injected.';
|
|
1009
|
-
if (body.includes('$Research route prepared')) return 'SKS: Research route, xhigh Eureka scout council, source
|
|
1009
|
+
if (body.includes('$Research route prepared')) return 'SKS: Research route, xhigh Eureka scout council, source/debate ledgers, paper output, and falsification gate injected.';
|
|
1010
1010
|
if (body.includes('$AutoResearch route prepared')) return 'SKS: AutoResearch experiment loop and evidence gate injected.';
|
|
1011
1011
|
if (body.includes('$PPT route prepared')) return 'SKS: PPT route and delivery-context gate injected.';
|
|
1012
1012
|
if (body.includes('$Image-UX-Review route prepared') || body.includes('$UX-Review route prepared')) return 'SKS: Image UX Review route and gpt-image-2 evidence gate injected.';
|
package/src/core/init.mjs
CHANGED
|
@@ -815,7 +815,7 @@ export async function installSkills(root) {
|
|
|
815
815
|
'computer-use-fast': `---\nname: computer-use-fast\ndescription: Alias for the maximum-speed $Computer-Use/$CU Codex Computer Use lane.\n---\n\nUse the same rules as computer-use: skip Team debate, QA-LOOP clarification, upfront TriWiki refresh, Context7, subagents, and reflection unless explicitly requested. Use Codex Computer Use directly; never substitute Playwright, Chrome MCP, Browser Use, Selenium, Puppeteer, or other browser automation for UI/browser evidence. At the end only, refresh/pack TriWiki, validate it, then provide a concise completion summary plus Honest Mode.\n`,
|
|
816
816
|
'cu': `---\nname: cu\ndescription: Short alias for the maximum-speed $Computer-Use Codex Computer Use lane.\n---\n\nUse the same rules as computer-use. This is a speed lane for focused UI/browser/visual tasks that require Codex Computer Use evidence, with TriWiki refresh/validate and Honest Mode deferred to final closeout.\n`,
|
|
817
817
|
'goal': `---\nname: goal\ndescription: Fast $Goal/$goal bridge overlay for Codex native persisted /goal workflows.\n---\n\nUse when the user invokes $Goal/$goal or asks to persist a workflow with Codex native /goal continuation. Prepare with sks goal create or the $Goal route, write only the lightweight bridge artifacts, then use native Codex /goal create, pause, resume, and clear controls where available. Goal does not replace Team, QA, DB, or other SKS execution routes; continue implementation through the selected route and use Context7 only when external API/library docs are involved. Do not recreate the old no-question loop.\n`,
|
|
818
|
-
'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Run the genius-lens scout council with Einstein/Feynman/Turing/von Neumann-inspired cognitive roles plus a skeptic lens; do not impersonate the historical people. Every Research scout must run with effort=xhigh, record one literal "Eureka!" idea, and participate in a vigorous evidence-bound debate before synthesis. Maximize safe web/source search and record source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, and research-gate.json. Keep the loop short: frame outcome, compare a few mechanisms, falsify, keep the smallest useful probe, and avoid adding background process unless it reduces net route weight. Do not use for ordinary code edits.\n`,
|
|
818
|
+
'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Run the genius-lens scout council with Einstein/Feynman/Turing/von Neumann-inspired cognitive roles plus a skeptic lens; do not impersonate the historical people. Every Research scout must run with effort=xhigh, record one literal "Eureka!" idea, and participate in a vigorous evidence-bound debate before synthesis. Maximize safe web/source search and record source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, research-paper.md, and research-gate.json. Keep the loop short: frame outcome, compare a few mechanisms, falsify, keep the smallest useful probe, turn the result into a concise paper manuscript, and avoid adding background process unless it reduces net route weight. Do not use for ordinary code edits.\n`,
|
|
819
819
|
'autoresearch': `---\nname: autoresearch\ndescription: Dollar-command route for $AutoResearch or $autoresearch iterative experiment loops.\n---\n\nUse for $AutoResearch, iterative improvement, SEO/GEO, ranking, workflow, benchmark, or experiments. Define program, hypothesis, experiment, metric, keep/discard, falsification, next step, and Honest Mode. Load seo-geo-optimizer for README/npm/GitHub/schema/AI-search work.\n`,
|
|
820
820
|
'db': `---\nname: db\ndescription: Dollar-command route for $DB or $db database and Supabase safety checks.\n---\n\nUse when the user invokes $DB/$db or the task touches SQL, Supabase, Postgres, migrations, Prisma, Drizzle, Knex, MCP database tools, or production data. Run or follow sks db policy, sks db scan, sks db classify, and sks db check. Destructive database operations remain forbidden.\n`,
|
|
821
821
|
'mad-sks': `---\nname: mad-sks\ndescription: Explicit high-risk authorization modifier for $MAD-SKS scoped Supabase MCP DB permission widening.\n---\n\nUse only when the user explicitly invokes $MAD-SKS or top-level sks --mad. It can be combined with another route, such as $MAD-SKS $Team or $DB ... $MAD-SKS; in that case the other command remains the primary workflow and MAD-SKS is only the temporary permission grant. The widened permission applies only while the active mission gate is open, must be deactivated when the task ends, and opens live server work, Supabase MCP database writes, column/schema cleanup, direct execute SQL, migration application when required, and normal targeted DB writes. Keep only catastrophic safeguards: whole database/schema/table removal, truncate, all-row delete/update, reset, dangerous project/branch management, credential exfiltration, persistent security weakening, and unrequested fallback implementation remain blocked. Do not carry MAD-SKS permission into later prompts or routes. The permission profile is centralized in src/core/permission-gates.mjs so skill/hook/MCP-style gates share one decision function.\n`,
|
package/src/core/pipeline.mjs
CHANGED
|
@@ -921,7 +921,7 @@ async function prepareResearch(root, route, task, required) {
|
|
|
921
921
|
await writeResearchPlan(dir, task, {});
|
|
922
922
|
const pipelinePlan = await writePipelinePlan(dir, { missionId: id, route, task, required, ambiguity: { required: false, status: 'direct_route' } });
|
|
923
923
|
await setCurrent(root, routeState(id, route, 'RESEARCH_PREPARED', required, { prompt: task, pipeline_plan_ready: validatePipelinePlan(pipelinePlan).ok, pipeline_plan_path: PIPELINE_PLAN_ARTIFACT }));
|
|
924
|
-
return routeContext(route, id, task, required, 'Run sks research run latest, maximize web/source search, require every scout effort=xhigh plus one Eureka! idea, fill source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, and pass research-gate.json.');
|
|
924
|
+
return routeContext(route, id, task, required, 'Run sks research run latest, maximize web/source search, require every scout effort=xhigh plus one Eureka! idea, fill source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json, research-report.md, research-paper.md, and pass research-gate.json.');
|
|
925
925
|
}
|
|
926
926
|
|
|
927
927
|
async function prepareAutoResearch(root, route, task, required) {
|
package/src/core/research.mjs
CHANGED
|
@@ -1,7 +1,19 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import { appendJsonlBounded, nowIso, readJson, writeJsonAtomic, writeTextAtomic, exists } from './fsx.mjs';
|
|
2
|
+
import { appendJsonlBounded, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, exists } from './fsx.mjs';
|
|
3
3
|
import { OUTCOME_RUBRIC } from './proof-field.mjs';
|
|
4
4
|
|
|
5
|
+
export const RESEARCH_PAPER_ARTIFACT = 'research-paper.md';
|
|
6
|
+
export const RESEARCH_PAPER_SECTION_GROUPS = Object.freeze([
|
|
7
|
+
['abstract'],
|
|
8
|
+
['introduction'],
|
|
9
|
+
['method', 'methodology'],
|
|
10
|
+
['results', 'findings'],
|
|
11
|
+
['discussion'],
|
|
12
|
+
['limitations', 'falsification'],
|
|
13
|
+
['conclusion', 'next experiment'],
|
|
14
|
+
['references', 'sources']
|
|
15
|
+
]);
|
|
16
|
+
|
|
5
17
|
export const RESEARCH_SCOUT_COUNCIL = Object.freeze([
|
|
6
18
|
{
|
|
7
19
|
id: 'einstein',
|
|
@@ -88,6 +100,7 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
88
100
|
source_priority: ['primary_sources', 'official_docs_or_standards', 'peer_reviewed_or_archival_sources', 'reputable_recent_sources', 'credible_counterevidence'],
|
|
89
101
|
citation_rules: [
|
|
90
102
|
'Every factual claim in the report must cite source-ledger ids or local project evidence.',
|
|
103
|
+
'The final research paper must include references tied to source-ledger ids.',
|
|
91
104
|
'Every novelty-ledger entry must cite at least one evidence source and at least one falsifier.',
|
|
92
105
|
'If live web search is unavailable, record the blocker in source-ledger.json and keep research-gate.json unpassed.'
|
|
93
106
|
],
|
|
@@ -106,6 +119,7 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
106
119
|
'The scout council must debate vigorously but stay evidence-bound; record challenges and responses in debate-ledger.json.',
|
|
107
120
|
'Maximize safe web/source search and record queries, sources, citations, and blockers in source-ledger.json.',
|
|
108
121
|
'Actively seek disconfirming evidence before synthesis.',
|
|
122
|
+
'Turn the surviving research result into research-paper.md with paper-style sections and references.',
|
|
109
123
|
'Keep unsupported source-free claims as hypotheses only.',
|
|
110
124
|
'Prefer the smallest testable mechanism or implementation probe over a new long-running loop.',
|
|
111
125
|
'Do not ask the user mid-run; resolve scope using the research plan and safety policy.'
|
|
@@ -116,10 +130,12 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
116
130
|
{ id: 'R2_EUREKA', goal: 'Have each xhigh genius-lens scout shout Eureka! and record one non-obvious idea with source ids.' },
|
|
117
131
|
{ id: 'R3_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding.' },
|
|
118
132
|
{ id: 'R4_FALSIFY', goal: 'Attack each mechanism with counterexamples, missing evidence, source conflicts, and failure modes.' },
|
|
119
|
-
{ id: 'R5_APPLY', goal: 'Keep the smallest surviving mechanism, define a cheap probe, and write all ledgers.' }
|
|
133
|
+
{ id: 'R5_APPLY', goal: 'Keep the smallest surviving mechanism, define a cheap probe, and write all ledgers.' },
|
|
134
|
+
{ id: 'R6_PAPER', goal: 'Convert the final research result into a concise paper manuscript with abstract, method, findings, limitations, and references.' }
|
|
120
135
|
],
|
|
121
136
|
required_artifacts: [
|
|
122
137
|
'research-report.md',
|
|
138
|
+
RESEARCH_PAPER_ARTIFACT,
|
|
123
139
|
'source-ledger.json',
|
|
124
140
|
'scout-ledger.json',
|
|
125
141
|
'debate-ledger.json',
|
|
@@ -166,6 +182,11 @@ export function researchPlanMarkdown(plan) {
|
|
|
166
182
|
return `${lines.join('\n')}\n`;
|
|
167
183
|
}
|
|
168
184
|
|
|
185
|
+
export function countResearchPaperSections(text = '') {
|
|
186
|
+
const headings = String(text || '').toLowerCase().split(/\n/).filter((line) => /^#{1,3}\s+/.test(line));
|
|
187
|
+
return RESEARCH_PAPER_SECTION_GROUPS.filter((group) => headings.some((heading) => group.some((term) => heading.includes(term)))).length;
|
|
188
|
+
}
|
|
189
|
+
|
|
169
190
|
export async function writeResearchPlan(dir, prompt, opts = {}) {
|
|
170
191
|
const plan = createResearchPlan(prompt, opts);
|
|
171
192
|
await writeJsonAtomic(path.join(dir, 'research-plan.json'), plan);
|
|
@@ -265,6 +286,8 @@ export function defaultResearchGate() {
|
|
|
265
286
|
return {
|
|
266
287
|
passed: false,
|
|
267
288
|
report_present: false,
|
|
289
|
+
paper_present: false,
|
|
290
|
+
paper_sections: 0,
|
|
268
291
|
source_ledger_present: false,
|
|
269
292
|
scout_ledger_present: false,
|
|
270
293
|
debate_ledger_present: false,
|
|
@@ -296,6 +319,8 @@ export function defaultResearchGate() {
|
|
|
296
319
|
export async function evaluateResearchGate(dir) {
|
|
297
320
|
const gate = await readJson(path.join(dir, 'research-gate.json'), defaultResearchGate());
|
|
298
321
|
const reportPresent = await exists(path.join(dir, 'research-report.md'));
|
|
322
|
+
const paperPresent = await exists(path.join(dir, RESEARCH_PAPER_ARTIFACT));
|
|
323
|
+
const paperSections = paperPresent ? countResearchPaperSections(await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '')) : 0;
|
|
299
324
|
const sourcePresent = await exists(path.join(dir, 'source-ledger.json'));
|
|
300
325
|
const scoutPresent = await exists(path.join(dir, 'scout-ledger.json'));
|
|
301
326
|
const debatePresent = await exists(path.join(dir, 'debate-ledger.json'));
|
|
@@ -324,6 +349,8 @@ export async function evaluateResearchGate(dir) {
|
|
|
324
349
|
const citationCoverage = gate.citation_coverage === true || sourceLedger?.citation_coverage?.all_key_claims_cited === true;
|
|
325
350
|
const reasons = [];
|
|
326
351
|
if (!reportPresent && gate.report_present !== true) reasons.push('research_report_missing');
|
|
352
|
+
if (!paperPresent) reasons.push('research_paper_missing');
|
|
353
|
+
if (paperSections < RESEARCH_PAPER_SECTION_GROUPS.length) reasons.push('research_paper_sections_missing');
|
|
327
354
|
if (!sourcePresent && gate.source_ledger_present !== true) reasons.push('source_ledger_missing');
|
|
328
355
|
if (!scoutPresent && gate.scout_ledger_present !== true) reasons.push('scout_ledger_missing');
|
|
329
356
|
if (!debatePresent && gate.debate_ledger_present !== true) reasons.push('debate_ledger_missing');
|
|
@@ -352,6 +379,7 @@ export async function evaluateResearchGate(dir) {
|
|
|
352
379
|
reasons,
|
|
353
380
|
metrics: {
|
|
354
381
|
web_search_passes: webSearchPasses,
|
|
382
|
+
paper_sections: Math.max(Number(gate.paper_sections || 0), paperSections),
|
|
355
383
|
source_entries: Math.max(Number(gate.source_entries || 0), sourceEntries),
|
|
356
384
|
independent_scouts: Math.max(Number(gate.independent_scouts || 0), independentScouts),
|
|
357
385
|
xhigh_scouts: Math.max(Number(gate.xhigh_scouts || 0), xhighScouts),
|
|
@@ -499,10 +527,13 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
499
527
|
await writeJsonAtomic(path.join(dir, 'falsification-ledger.json'), falsificationLedger);
|
|
500
528
|
await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), ledger);
|
|
501
529
|
await writeTextAtomic(path.join(dir, 'research-report.md'), `# SKS Research Report\n\nPrompt: ${plan.prompt}\n\n## Scout Council Synthesis\n\nThe mock council keeps one cited methodological insight: a research mode should force falsifiable novelty rather than summarize known material [mock-source-1].\n\n## Source Coverage\n\nThis is a selftest fixture. It records mock source and counterevidence ledgers but does not perform live web browsing in --mock mode.\n\n## Candidate Insight\n\nA useful research run must produce source-cited, falsifiable novelty with scout findings and a cheap probe.\n\n## Falsification\n\nThe claim is weak if no new testable prediction, counterevidence source, or experiment is produced [mock-counter-1].\n\n## Next Test\n\nCompare this mode against a summary-only run and score candidate insights, falsification passes, citation coverage, and testability.\n`);
|
|
530
|
+
await writeTextAtomic(path.join(dir, RESEARCH_PAPER_ARTIFACT), `# Research Paper: ${plan.prompt}\n\n## Abstract\nA source-cited research run should produce falsifiable novelty rather than only summarize known material.\n\n## Introduction\nThe mock topic is evaluated as a research workflow outcome [mock-source-1].\n\n## Methodology\nFive xhigh scouts produce Eureka ideas, debate, and falsify the strongest claim.\n\n## Findings\nThe surviving finding is that useful research needs cited novelty plus a cheap decisive probe.\n\n## Discussion\nThe debate favors gate-backed evidence over narrative confidence.\n\n## Limitations and Falsification\nThe claim fails without sources, counterevidence, or testable predictions [mock-counter-1].\n\n## Conclusion and Next Experiment\nCompare this loop against a summary-only baseline and score testable insights.\n\n## References\n- [mock-source-1] Mock SKS research source coverage.\n- [mock-counter-1] Mock overclaim counterexample.\n`);
|
|
502
531
|
await writeJsonAtomic(path.join(dir, 'research-gate.json'), {
|
|
503
532
|
...defaultResearchGate(),
|
|
504
533
|
passed: true,
|
|
505
534
|
report_present: true,
|
|
535
|
+
paper_present: true,
|
|
536
|
+
paper_sections: RESEARCH_PAPER_SECTION_GROUPS.length,
|
|
506
537
|
source_ledger_present: true,
|
|
507
538
|
scout_ledger_present: true,
|
|
508
539
|
debate_ledger_present: true,
|
|
@@ -522,12 +553,12 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
522
553
|
falsification_cases: 1,
|
|
523
554
|
testable_predictions: 1,
|
|
524
555
|
citation_coverage: true,
|
|
525
|
-
evidence: ['mock research report', 'mock source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
|
|
556
|
+
evidence: ['mock research report', 'mock research paper', 'mock source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
|
|
526
557
|
notes: ['mock mode records the new contract but does not call a model or perform live web browsing']
|
|
527
558
|
});
|
|
528
559
|
return evaluateResearchGate(dir);
|
|
529
560
|
}
|
|
530
561
|
|
|
531
562
|
export function buildResearchPrompt({ id, mission, plan, cycle, previous }) {
|
|
532
|
-
return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\nWEB/SOURCE POLICY: Run the broadest safe web/source search available in this runtime before synthesis. Use independent query sets for every scout. Prefer primary sources, official docs or standards, peer-reviewed or archival sources, reputable recent sources, and credible counterevidence. If live web search is unavailable, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source search first: create source-ledger.json with queries, source ids, counterevidence sources, citation coverage, and blockers.\n2. Independent xhigh scouts: create scout-ledger.json with effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, and cheap_probes for every scout lens.\n3. Debate: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout before synthesis.\n4. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n5. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, debate, and falsification are recorded.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- source-ledger.json: web/source queries, source ids, source priority, counterevidence sources, citation coverage, and blockers.\n- scout-ledger.json: one entry per scout lens with effort, eureka, query_set, findings, source_ids, falsifiers, and cheap_probes.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, and unresolved conflicts.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, web/source retrieval was attempted, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
|
|
563
|
+
return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nWEB/SOURCE POLICY: Run the broadest safe web/source search available in this runtime before synthesis. Use independent query sets for every scout. Prefer primary sources, official docs or standards, peer-reviewed or archival sources, reputable recent sources, and credible counterevidence. If live web search is unavailable, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source search first: create source-ledger.json with queries, source ids, counterevidence sources, citation coverage, and blockers.\n2. Independent xhigh scouts: create scout-ledger.json with effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, and cheap_probes for every scout lens.\n3. Debate: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout before synthesis.\n4. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n5. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, debate, and falsification are recorded.\n6. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- source-ledger.json: web/source queries, source ids, source priority, counterevidence sources, citation coverage, and blockers.\n- scout-ledger.json: one entry per scout lens with effort, eureka, query_set, findings, source_ids, falsifiers, and cheap_probes.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, and unresolved conflicts.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, research-paper.md exists with required paper sections, web/source retrieval was attempted, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
|
|
533
564
|
}
|
package/src/core/routes.mjs
CHANGED
|
@@ -390,9 +390,9 @@ export const ROUTES = [
|
|
|
390
390
|
command: '$Research',
|
|
391
391
|
mode: 'RESEARCH',
|
|
392
392
|
route: 'research mission',
|
|
393
|
-
description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate, maximum source retrieval, falsification, and testable predictions.',
|
|
393
|
+
description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate, maximum source retrieval, falsification, a paper manuscript, and testable predictions.',
|
|
394
394
|
requiredSkills: ['research', 'research-discovery', 'pipeline-runner', 'context7-docs', REFLECTION_SKILL_NAME, 'honest-mode'],
|
|
395
|
-
lifecycle: ['research_plan', 'source_ledger', 'xhigh_scout_council', 'eureka_moments', 'debate_ledger', 'report', 'novelty_ledger', 'falsification_ledger', 'research_gate', 'post_route_reflection', 'honest_mode'],
|
|
395
|
+
lifecycle: ['research_plan', 'source_ledger', 'xhigh_scout_council', 'eureka_moments', 'debate_ledger', 'report', 'paper', 'novelty_ledger', 'falsification_ledger', 'research_gate', 'post_route_reflection', 'honest_mode'],
|
|
396
396
|
context7Policy: 'required',
|
|
397
397
|
reasoningPolicy: 'xhigh',
|
|
398
398
|
stopGate: 'research-gate.json',
|
|
@@ -537,7 +537,7 @@ export const COMMAND_CATALOG = [
|
|
|
537
537
|
{ name: 'init', usage: 'sks init [--force] [--local-only] [--install-scope global|project]', description: 'Initialize the local SKS control surface.' },
|
|
538
538
|
{ name: 'selftest', usage: 'sks selftest [--mock]', description: 'Run local smoke tests without calling a model.' },
|
|
539
539
|
{ name: 'goal', usage: 'sks goal create|pause|resume|clear|status ...', description: 'Prepare and control the fast SKS bridge overlay for Codex native persisted /goal workflows.' },
|
|
540
|
-
{ name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run frontier-style research missions with xhigh scout Eureka ideas, debate, source-ledger, novelty, and falsification gates.' },
|
|
540
|
+
{ name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run frontier-style research missions with xhigh scout Eureka ideas, debate, source-ledger, paper, novelty, and falsification gates.' },
|
|
541
541
|
{ name: 'db', usage: 'sks db policy|scan|mcp-config|classify|check ...', description: 'Inspect and enforce database/Supabase safety policy.' },
|
|
542
542
|
{ name: 'eval', usage: 'sks eval run|compare|thresholds ...', description: 'Run deterministic context-quality and performance evidence checks.' },
|
|
543
543
|
{ name: 'harness', usage: 'sks harness fixture|review [--json]', description: 'Run Harness Growth Factory fixtures for forgetting, skills, experiments, tool taxonomy, permissions, MultiAgentV2, and tmux views.' },
|