sneakoscope 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -14
- package/docs/PERFORMANCE.md +30 -1
- package/docs/assets/sneakoscope-codex-logo.svg +51 -0
- package/package.json +15 -6
- package/src/cli/main.mjs +368 -15
- package/src/core/db-safety.mjs +7 -1
- package/src/core/decision-contract.mjs +1 -1
- package/src/core/evaluation.mjs +238 -0
- package/src/core/fsx.mjs +1 -1
- package/src/core/gx-renderer.mjs +352 -0
- package/src/core/hooks-runtime.mjs +32 -29
- package/src/core/hproof.mjs +6 -0
- package/src/core/init.mjs +89 -19
- package/src/core/research.mjs +143 -0
package/src/cli/main.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import fsp from 'node:fs/promises';
|
|
3
|
-
import { projectRoot, readJson, writeJsonAtomic,
|
|
4
|
-
import { initProject } from '../core/init.mjs';
|
|
3
|
+
import { projectRoot, readJson, writeJsonAtomic, appendJsonlBounded, nowIso, exists, ensureDir, tmpdir, packageRoot, dirSize, formatBytes, which } from '../core/fsx.mjs';
|
|
4
|
+
import { initProject, normalizeInstallScope, sksCommandPrefix } from '../core/init.mjs';
|
|
5
5
|
import { getCodexInfo, runCodexExec } from '../core/codex-adapter.mjs';
|
|
6
6
|
import { createMission, loadMission, findLatestMission, setCurrent, stateFile } from '../core/mission.mjs';
|
|
7
7
|
import { buildQuestionSchema, writeQuestions } from '../core/questions.mjs';
|
|
@@ -12,10 +12,20 @@ import { emitHook } from '../core/hooks-runtime.mjs';
|
|
|
12
12
|
import { storageReport, enforceRetention } from '../core/retention.mjs';
|
|
13
13
|
import { classifySql, classifyCommand, loadDbSafetyPolicy, safeSupabaseMcpConfig, checkSqlFile, checkDbOperation, scanDbSafety } from '../core/db-safety.mjs';
|
|
14
14
|
import { rustInfo } from '../core/rust-accelerator.mjs';
|
|
15
|
+
import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
|
|
16
|
+
import { DEFAULT_EVAL_THRESHOLDS, compareEvaluationReports, runEvaluationBenchmark } from '../core/evaluation.mjs';
|
|
17
|
+
import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
15
18
|
|
|
16
19
|
const flag = (args, name) => args.includes(name);
|
|
17
20
|
const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
|
|
18
21
|
|
|
22
|
+
function installScopeFromArgs(args = [], fallback = 'global') {
|
|
23
|
+
if (flag(args, '--project')) return 'project';
|
|
24
|
+
if (flag(args, '--global')) return 'global';
|
|
25
|
+
const i = args.indexOf('--install-scope');
|
|
26
|
+
return normalizeInstallScope(i >= 0 && args[i + 1] ? args[i + 1] : fallback);
|
|
27
|
+
}
|
|
28
|
+
|
|
19
29
|
export async function main(args) {
|
|
20
30
|
const [cmd, sub, ...rest] = args;
|
|
21
31
|
const tail = sub === undefined ? [] : [sub, ...rest];
|
|
@@ -24,6 +34,7 @@ export async function main(args) {
|
|
|
24
34
|
if (cmd === 'init') return init(tail);
|
|
25
35
|
if (cmd === 'selftest') return selftest(tail);
|
|
26
36
|
if (cmd === 'ralph') return ralph(sub, rest);
|
|
37
|
+
if (cmd === 'research') return research(sub, rest);
|
|
27
38
|
if (cmd === 'hook') return emitHook(sub);
|
|
28
39
|
if (cmd === 'profile') return profile(sub, rest);
|
|
29
40
|
if (cmd === 'hproof') return hproof(sub, rest);
|
|
@@ -31,6 +42,7 @@ export async function main(args) {
|
|
|
31
42
|
if (cmd === 'gx') return gx(sub, rest);
|
|
32
43
|
if (cmd === 'team') return team(tail);
|
|
33
44
|
if (cmd === 'db') return db(sub, rest);
|
|
45
|
+
if (cmd === 'eval') return evalCommand(sub, rest);
|
|
34
46
|
if (cmd === 'gc') return gc(tail);
|
|
35
47
|
if (cmd === 'stats') return stats(tail);
|
|
36
48
|
console.error(`Unknown command: ${cmd}`);
|
|
@@ -38,33 +50,68 @@ export async function main(args) {
|
|
|
38
50
|
}
|
|
39
51
|
|
|
40
52
|
function help() {
|
|
41
|
-
console.log(`Sneakoscope Codex
|
|
53
|
+
console.log(`Sneakoscope Codex
|
|
54
|
+
|
|
55
|
+
Usage:
|
|
56
|
+
sks doctor [--fix] [--json] [--install-scope global|project]
|
|
57
|
+
sks init [--install-scope global|project]
|
|
58
|
+
sks selftest [--mock]
|
|
59
|
+
sks ralph prepare "task"
|
|
60
|
+
sks ralph answer <mission-id|latest> <answers.json>
|
|
61
|
+
sks ralph run <mission-id|latest> [--mock] [--max-cycles N]
|
|
62
|
+
sks ralph status <mission-id|latest>
|
|
63
|
+
sks research prepare "topic" [--depth frontier]
|
|
64
|
+
sks research run <mission-id|latest> [--mock] [--max-cycles N]
|
|
65
|
+
sks research status <mission-id|latest>
|
|
66
|
+
sks db policy
|
|
67
|
+
sks db scan [--migrations] [--json]
|
|
68
|
+
sks db mcp-config --project-ref <ref>
|
|
69
|
+
sks db check --sql "DROP TABLE users"
|
|
70
|
+
sks db check --command "supabase db reset"
|
|
71
|
+
sks eval run [--json] [--out report.json]
|
|
72
|
+
sks eval compare --baseline old.json --candidate new.json [--json]
|
|
73
|
+
sks gx init [name]
|
|
74
|
+
sks gx render [name] [--format svg|html|all]
|
|
75
|
+
sks gx validate [name]
|
|
76
|
+
sks gx drift [name]
|
|
77
|
+
sks gx snapshot [name]
|
|
78
|
+
sks gc [--dry-run] [--json]
|
|
79
|
+
sks stats [--json]
|
|
80
|
+
`);
|
|
42
81
|
}
|
|
43
82
|
|
|
44
83
|
async function doctor(args) {
|
|
45
84
|
const root = await projectRoot();
|
|
46
|
-
|
|
85
|
+
const requestedScope = args.includes('--install-scope') || flag(args, '--project') || flag(args, '--global')
|
|
86
|
+
? installScopeFromArgs(args)
|
|
87
|
+
: null;
|
|
88
|
+
if (flag(args, '--fix')) await initProject(root, { installScope: requestedScope || 'global' });
|
|
47
89
|
const codex = await getCodexInfo();
|
|
48
90
|
const rust = await rustInfo();
|
|
49
91
|
const nodeOk = Number(process.versions.node.split('.')[0]) >= 20;
|
|
50
92
|
const storage = await storageReport(root);
|
|
51
93
|
const pkgBytes = await dirSize(packageRoot()).catch(() => 0);
|
|
94
|
+
const manifest = await readJson(path.join(root, '.sneakoscope', 'manifest.json'), null);
|
|
95
|
+
const installScope = requestedScope || normalizeInstallScope(manifest?.installation?.scope || 'global');
|
|
96
|
+
const install = await installStatus(root, installScope);
|
|
52
97
|
const dbPolicyExists = await exists(path.join(root, '.sneakoscope', 'db-safety.json'));
|
|
53
98
|
const dbScan = await scanDbSafety(root).catch((err) => ({ ok: false, findings: [{ id: 'db_safety_scan_failed', severity: 'high', reason: err.message }] }));
|
|
54
99
|
const result = {
|
|
55
100
|
node: { ok: nodeOk, version: process.version }, root, codex, rust,
|
|
101
|
+
install,
|
|
56
102
|
sneakoscope: { ok: await exists(path.join(root, '.sneakoscope')) },
|
|
57
103
|
db_guard: { ok: dbPolicyExists && dbScan.ok, policy: dbPolicyExists ? await loadDbSafetyPolicy(root) : null, scan: dbScan },
|
|
58
104
|
hooks: { ok: await exists(path.join(root, '.codex', 'hooks.json')) },
|
|
59
105
|
skills: { ok: await exists(path.join(root, '.agents', 'skills')) },
|
|
60
106
|
package: { bytes: pkgBytes, human: formatBytes(pkgBytes) }, storage
|
|
61
107
|
};
|
|
62
|
-
result.ready = nodeOk && Boolean(codex.bin) && result.sneakoscope.ok && result.db_guard.ok;
|
|
108
|
+
result.ready = nodeOk && Boolean(codex.bin) && install.ok && result.sneakoscope.ok && result.db_guard.ok;
|
|
63
109
|
if (flag(args, '--json')) return console.log(JSON.stringify(result, null, 2));
|
|
64
110
|
console.log('Sneakoscope Codex Doctor\n');
|
|
65
111
|
console.log(`Node: ${nodeOk ? 'ok' : 'fail'} ${process.version}`);
|
|
66
112
|
console.log(`Project: ${root}`);
|
|
67
113
|
console.log(`Codex: ${codex.bin ? 'ok' : 'missing'} ${codex.version || ''}`);
|
|
114
|
+
console.log(`Install: ${install.ok ? 'ok' : 'missing'} ${install.scope} (${install.command_prefix})`);
|
|
68
115
|
console.log(`Rust acc.: ${rust.available ? rust.version : 'optional-missing'}`);
|
|
69
116
|
console.log(`State: ${result.sneakoscope.ok ? 'ok' : 'missing .sneakoscope'}`);
|
|
70
117
|
console.log(`DB Guard: ${result.db_guard.ok ? 'ok' : 'blocked'} ${dbScan.findings?.length || 0} finding(s)`);
|
|
@@ -74,16 +121,35 @@ async function doctor(args) {
|
|
|
74
121
|
console.log(`Storage: ${storage.total_human || '0 B'}`);
|
|
75
122
|
console.log(`Ready: ${result.ready ? 'yes' : 'no'}`);
|
|
76
123
|
if (!codex.bin) console.log('\nCodex CLI missing. Install separately: npm i -g @openai/codex, or set SKS_CODEX_BIN.');
|
|
124
|
+
if (!install.ok && install.scope === 'global') console.log('SKS global command missing. Install: npm i -g sneakoscope');
|
|
125
|
+
if (!install.ok && install.scope === 'project') console.log('SKS project package missing. Install in this project: npm i -D sneakoscope');
|
|
77
126
|
if (!result.ready && !flag(args, '--fix')) console.log('Run: sks doctor --fix');
|
|
78
127
|
}
|
|
79
128
|
|
|
80
129
|
async function init(args) {
|
|
81
130
|
const root = await projectRoot();
|
|
82
|
-
const
|
|
131
|
+
const installScope = installScopeFromArgs(args);
|
|
132
|
+
const res = await initProject(root, { force: flag(args, '--force'), installScope });
|
|
83
133
|
console.log(`Initialized Sneakoscope Codex in ${root}`);
|
|
134
|
+
console.log(`Install scope: ${installScope} (${sksCommandPrefix(installScope)})`);
|
|
84
135
|
for (const x of res.created) console.log(`- ${x}`);
|
|
85
136
|
}
|
|
86
137
|
|
|
138
|
+
async function installStatus(root, scope) {
|
|
139
|
+
const commandPrefix = sksCommandPrefix(scope);
|
|
140
|
+
const globalBin = await which('sks').catch(() => null);
|
|
141
|
+
const projectBin = path.join(root, 'node_modules', 'sneakoscope', 'bin', 'sks.mjs');
|
|
142
|
+
const projectBinExists = await exists(projectBin);
|
|
143
|
+
return {
|
|
144
|
+
scope,
|
|
145
|
+
default_scope: 'global',
|
|
146
|
+
command_prefix: commandPrefix,
|
|
147
|
+
global_bin: globalBin,
|
|
148
|
+
project_bin: projectBin,
|
|
149
|
+
ok: scope === 'project' ? projectBinExists : Boolean(globalBin)
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
87
153
|
async function ralph(sub, args) {
|
|
88
154
|
if (sub === 'prepare') return ralphPrepare(args);
|
|
89
155
|
if (sub === 'answer') return ralphAnswer(args);
|
|
@@ -93,6 +159,101 @@ async function ralph(sub, args) {
|
|
|
93
159
|
process.exitCode = 1;
|
|
94
160
|
}
|
|
95
161
|
|
|
162
|
+
async function research(sub, args) {
|
|
163
|
+
if (sub === 'prepare') return researchPrepare(args);
|
|
164
|
+
if (sub === 'run') return researchRun(args);
|
|
165
|
+
if (sub === 'status') return researchStatus(args);
|
|
166
|
+
console.error('Usage: sks research <prepare|run|status>');
|
|
167
|
+
process.exitCode = 1;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
async function researchPrepare(args) {
|
|
171
|
+
const root = await projectRoot();
|
|
172
|
+
if (!(await exists(path.join(root, '.sneakoscope')))) await initProject(root, {});
|
|
173
|
+
const prompt = positionalArgs(args).join(' ').trim();
|
|
174
|
+
if (!prompt) throw new Error('Missing research topic.');
|
|
175
|
+
const { id, dir } = await createMission(root, { mode: 'research', prompt });
|
|
176
|
+
const plan = await writeResearchPlan(dir, prompt, { depth: readFlagValue(args, '--depth', 'frontier') });
|
|
177
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PREPARED', questions_allowed: false });
|
|
178
|
+
console.log(`Research mission created: ${id}`);
|
|
179
|
+
console.log(`Methodology: ${plan.methodology}`);
|
|
180
|
+
console.log(`Plan: ${path.relative(root, path.join(dir, 'research-plan.md'))}`);
|
|
181
|
+
console.log(`Run: sks research run ${id} --max-cycles 3`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
async function researchRun(args) {
|
|
185
|
+
const root = await projectRoot();
|
|
186
|
+
const id = await resolveMissionId(root, args[0]);
|
|
187
|
+
if (!id) throw new Error('Usage: sks research run <mission-id|latest> [--mock] [--max-cycles N]');
|
|
188
|
+
const { dir, mission } = await loadMission(root, id);
|
|
189
|
+
const planPath = path.join(dir, 'research-plan.json');
|
|
190
|
+
if (!(await exists(planPath))) await writeResearchPlan(dir, mission.prompt || '', {});
|
|
191
|
+
const plan = await readJson(planPath);
|
|
192
|
+
const dbScan = await scanDbSafety(root);
|
|
193
|
+
if (!dbScan.ok) {
|
|
194
|
+
console.error('Research cannot run: DB Guardian found unsafe Supabase/MCP/database configuration.');
|
|
195
|
+
console.error(JSON.stringify(dbScan.findings, null, 2));
|
|
196
|
+
process.exitCode = 2;
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
const maxCycles = readMaxCycles(args, 3);
|
|
200
|
+
const mock = flag(args, '--mock');
|
|
201
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false });
|
|
202
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock });
|
|
203
|
+
if (mock) {
|
|
204
|
+
const gate = await writeMockResearchResult(dir, plan);
|
|
205
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true });
|
|
206
|
+
console.log(`Mock research done: ${id}`);
|
|
207
|
+
console.log(`Gate: ${gate.passed ? 'passed' : 'blocked'}`);
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
const codex = await getCodexInfo();
|
|
211
|
+
if (!codex.bin) {
|
|
212
|
+
console.error('Codex CLI not found. Running mock research instead.');
|
|
213
|
+
const gate = await writeMockResearchResult(dir, plan);
|
|
214
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true });
|
|
215
|
+
console.log(`Mock research done: ${id}`);
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
let last = '';
|
|
219
|
+
for (let cycle = 1; cycle <= maxCycles; cycle++) {
|
|
220
|
+
const cycleDir = path.join(dir, 'research', `cycle-${cycle}`);
|
|
221
|
+
const outputFile = path.join(cycleDir, 'final.md');
|
|
222
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle });
|
|
223
|
+
const prompt = buildResearchPrompt({ id, mission, plan, cycle, previous: last });
|
|
224
|
+
const result = await runCodexExec({ root, prompt, outputFile, json: true, profile: 'sks-research', logDir: cycleDir, timeoutMs: 45 * 60 * 1000 });
|
|
225
|
+
await writeJsonAtomic(path.join(cycleDir, 'process.json'), { code: result.code, stdout_tail: result.stdout, stderr_tail: result.stderr, stdout_bytes: result.stdoutBytes, stderr_bytes: result.stderrBytes, truncated: result.truncated, timed_out: result.timedOut });
|
|
226
|
+
last = await safeReadText(outputFile, result.stdout || result.stderr || '');
|
|
227
|
+
if (containsUserQuestion(last)) {
|
|
228
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.guard.question_blocked', cycle });
|
|
229
|
+
last = `${last}\n\n${noQuestionContinuationReason()}`;
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
const gate = await evaluateResearchGate(dir);
|
|
233
|
+
if (gate.passed) {
|
|
234
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_DONE', questions_allowed: true });
|
|
235
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.done', cycle });
|
|
236
|
+
await enforceRetention(root).catch(() => {});
|
|
237
|
+
console.log(`Research done: ${id}`);
|
|
238
|
+
return;
|
|
239
|
+
}
|
|
240
|
+
await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.continue', cycle, reasons: gate.reasons });
|
|
241
|
+
}
|
|
242
|
+
await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true });
|
|
243
|
+
console.log(`Research paused after max cycles: ${id}`);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
async function researchStatus(args) {
|
|
247
|
+
const root = await projectRoot();
|
|
248
|
+
const id = await resolveMissionId(root, args[0]);
|
|
249
|
+
if (!id) throw new Error('Usage: sks research status <mission-id|latest>');
|
|
250
|
+
const { dir, mission } = await loadMission(root, id);
|
|
251
|
+
const state = await readJson(stateFile(root), {});
|
|
252
|
+
const gate = await readJson(path.join(dir, 'research-gate.evaluated.json'), await readJson(path.join(dir, 'research-gate.json'), null));
|
|
253
|
+
const ledger = await readJson(path.join(dir, 'novelty-ledger.json'), null);
|
|
254
|
+
console.log(JSON.stringify({ mission, state, gate, novelty_entries: ledger?.entries?.length ?? null }, null, 2));
|
|
255
|
+
}
|
|
256
|
+
|
|
96
257
|
async function ralphPrepare(args) {
|
|
97
258
|
const root = await projectRoot();
|
|
98
259
|
if (!(await exists(path.join(root, '.sneakoscope')))) await initProject(root, {});
|
|
@@ -189,7 +350,7 @@ async function ralphRun(args) {
|
|
|
189
350
|
}
|
|
190
351
|
|
|
191
352
|
function buildRalphPrompt({ id, mission, contract, cycle, previous }) {
|
|
192
|
-
return `You are running Sneakoscope Codex Ralph mode.\nMISSION: ${id}\nTASK: ${mission.prompt}\nCYCLE: ${cycle}\nNO-QUESTION LOCK: Do not ask the user. Resolve using decision-contract.json.\nDATABASE SAFETY: Destructive database operations are forbidden. Do not run DROP, TRUNCATE, db reset, db push, branch reset/merge/delete, project deletion, RLS disable, or live execute_sql writes. Use read-only/project-scoped Supabase MCP only unless the sealed contract explicitly allows migration files for local or preview branch.\nDECISION CONTRACT:\n${JSON.stringify(contract, null, 2)}\nPERFORMANCE POLICY: keep outputs concise; raw logs stay in files; summarize evidence only.\nLOOP: plan, read before write, implement within contract, run/justify tests, update .sneakoscope/missions/${id}/done-gate.json.\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
|
|
353
|
+
return `You are running Sneakoscope Codex Ralph mode.\nMISSION: ${id}\nTASK: ${mission.prompt}\nCYCLE: ${cycle}\nNO-QUESTION LOCK: Do not ask the user. Resolve using decision-contract.json.\nDATABASE SAFETY: Destructive database operations are forbidden. Do not run DROP, TRUNCATE, db reset, db push, branch reset/merge/delete, project deletion, RLS disable, or live execute_sql writes. Use read-only/project-scoped Supabase MCP only unless the sealed contract explicitly allows migration files for local or preview branch.\nDECISION CONTRACT:\n${JSON.stringify(contract, null, 2)}\nPERFORMANCE POLICY: keep outputs concise; raw logs stay in files; summarize evidence only. If the task claims performance, token, or accuracy improvement, run sks eval run or sks eval compare and record the report path in done-gate.json evidence.\nDESIGN POLICY: if the task creates HTML/UI/prototype/deck-like visual artifacts, use the installed design-artifact-expert skill, inspect design context first, verify rendered output, and record design verification in done-gate.json.\nLOOP: plan, read before write, implement within contract, run/justify tests, update .sneakoscope/missions/${id}/done-gate.json.\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
|
|
193
354
|
}
|
|
194
355
|
|
|
195
356
|
async function safeReadText(file, fallback = '') {
|
|
@@ -223,6 +384,14 @@ async function selftest() {
|
|
|
223
384
|
const tmp = tmpdir();
|
|
224
385
|
process.chdir(tmp);
|
|
225
386
|
await initProject(tmp, {});
|
|
387
|
+
const defaultHooks = await readJson(path.join(tmp, '.codex', 'hooks.json'));
|
|
388
|
+
if (defaultHooks.hooks.PreToolUse[0].hooks[0].command !== 'sks hook pre-tool') throw new Error('selftest failed: global install hook command changed');
|
|
389
|
+
const projectScopeTmp = tmpdir();
|
|
390
|
+
await initProject(projectScopeTmp, { installScope: 'project' });
|
|
391
|
+
const projectHooks = await readJson(path.join(projectScopeTmp, '.codex', 'hooks.json'));
|
|
392
|
+
if (projectHooks.hooks.PreToolUse[0].hooks[0].command !== 'node ./node_modules/sneakoscope/bin/sks.mjs hook pre-tool') throw new Error('selftest failed: project install hook command missing');
|
|
393
|
+
const researchSkillExists = await exists(path.join(tmp, '.agents', 'skills', 'research-discovery', 'SKILL.md'));
|
|
394
|
+
if (!researchSkillExists) throw new Error('selftest failed: research skill not installed');
|
|
226
395
|
const { id, dir, mission } = await createMission(tmp, { mode: 'ralph', prompt: '로그인 세션 만료 UX 개선 supabase db' });
|
|
227
396
|
const schema = buildQuestionSchema(mission.prompt);
|
|
228
397
|
await writeQuestions(dir, schema);
|
|
@@ -238,9 +407,28 @@ async function selftest() {
|
|
|
238
407
|
if (classifyCommand('supabase db reset').level !== 'destructive') throw new Error('selftest failed: supabase db reset not detected');
|
|
239
408
|
const dbDecision = await checkDbOperation(tmp, { mission_id: id }, { tool_name: 'mcp__supabase__execute_sql', sql: 'drop table users;' }, { duringRalph: true });
|
|
240
409
|
if (dbDecision.action !== 'block') throw new Error('selftest failed: destructive MCP SQL allowed');
|
|
410
|
+
const nonDbDecision = await checkDbOperation(tmp, {}, { command: 'npm test' }, { duringRalph: true });
|
|
411
|
+
if (nonDbDecision.action !== 'allow') throw new Error('selftest failed: non-DB command blocked by DB guard');
|
|
412
|
+
const evalReport = runEvaluationBenchmark({ iterations: 5 });
|
|
413
|
+
if (!evalReport.comparison.meaningful_improvement) throw new Error('selftest failed: evaluation benchmark did not show meaningful improvement');
|
|
414
|
+
const { dir: researchDir, mission: researchMission } = await createMission(tmp, { mode: 'research', prompt: '새로운 코드 리뷰 방법론 연구' });
|
|
415
|
+
const researchPlan = await writeResearchPlan(researchDir, researchMission.prompt, {});
|
|
416
|
+
const researchGate = await writeMockResearchResult(researchDir, researchPlan);
|
|
417
|
+
if (!researchGate.passed) throw new Error('selftest failed: mock research gate did not pass');
|
|
241
418
|
await writeJsonAtomic(path.join(dir, 'done-gate.json'), { passed: true, unsupported_critical_claims: 0, database_safety_violation: false, database_safety_reviewed: true, visual_drift: 'low', wiki_drift: 'low', tests_required: false });
|
|
242
419
|
const gate = await evaluateDoneGate(tmp, id);
|
|
243
420
|
if (!gate.passed) throw new Error('selftest failed: done gate');
|
|
421
|
+
const gxDir = path.join(tmp, '.sneakoscope', 'gx', 'cartridges', 'selftest');
|
|
422
|
+
await writeJsonAtomic(path.join(gxDir, 'vgraph.json'), defaultVGraph('selftest'));
|
|
423
|
+
await writeJsonAtomic(path.join(gxDir, 'beta.json'), defaultBeta('selftest'));
|
|
424
|
+
const render = await renderCartridge(gxDir, { format: 'all' });
|
|
425
|
+
if (!render.outputs.includes('render.svg')) throw new Error('selftest failed: gx svg not rendered');
|
|
426
|
+
const validation = await validateCartridge(gxDir);
|
|
427
|
+
if (!validation.ok) throw new Error('selftest failed: gx validation rejected');
|
|
428
|
+
const drift = await driftCartridge(gxDir);
|
|
429
|
+
if (drift.status !== 'low') throw new Error('selftest failed: gx drift is high');
|
|
430
|
+
const snapshot = await snapshotCartridge(gxDir);
|
|
431
|
+
if (!snapshot.files.svg || !snapshot.files.html) throw new Error('selftest failed: gx snapshot incomplete');
|
|
244
432
|
const gc = await enforceRetention(tmp, { dryRun: true });
|
|
245
433
|
if (!gc.report.exists) throw new Error('selftest failed: storage report');
|
|
246
434
|
console.log('Sneakoscope Codex selftest passed.');
|
|
@@ -262,6 +450,75 @@ async function hproof(sub, args) {
|
|
|
262
450
|
console.log(JSON.stringify(await evaluateDoneGate(root, id), null, 2));
|
|
263
451
|
}
|
|
264
452
|
|
|
453
|
+
async function evalCommand(sub, args) {
|
|
454
|
+
if (!sub || sub === 'help' || sub === '--help') {
|
|
455
|
+
console.log('Usage: sks eval run [--json] [--out report.json] [--iterations N] | sks eval compare --baseline old.json --candidate new.json [--json]');
|
|
456
|
+
return;
|
|
457
|
+
}
|
|
458
|
+
if (sub === 'thresholds') return console.log(JSON.stringify(DEFAULT_EVAL_THRESHOLDS, null, 2));
|
|
459
|
+
const root = await projectRoot();
|
|
460
|
+
if (sub === 'run') {
|
|
461
|
+
const iterations = Number(readFlagValue(args, '--iterations', 200));
|
|
462
|
+
const report = runEvaluationBenchmark({ iterations });
|
|
463
|
+
const saved = await saveEvalReport(root, args, report, 'eval');
|
|
464
|
+
if (flag(args, '--json')) return console.log(JSON.stringify({ ...report, report_path: saved }, null, 2));
|
|
465
|
+
printEvalRun(report, saved);
|
|
466
|
+
return;
|
|
467
|
+
}
|
|
468
|
+
if (sub === 'compare') {
|
|
469
|
+
const positional = positionalArgs(args);
|
|
470
|
+
const baselinePath = readFlagValue(args, '--baseline', positional[0]);
|
|
471
|
+
const candidatePath = readFlagValue(args, '--candidate', positional[1]);
|
|
472
|
+
if (!baselinePath || !candidatePath) throw new Error('Usage: sks eval compare --baseline old.json --candidate new.json [--json]');
|
|
473
|
+
const report = compareEvaluationReports(await readJson(path.resolve(baselinePath)), await readJson(path.resolve(candidatePath)));
|
|
474
|
+
const saved = await saveEvalReport(root, args, report, 'eval-compare');
|
|
475
|
+
if (flag(args, '--json')) return console.log(JSON.stringify({ ...report, report_path: saved }, null, 2));
|
|
476
|
+
printEvalCompare(report, saved);
|
|
477
|
+
return;
|
|
478
|
+
}
|
|
479
|
+
console.error('Usage: sks eval run|compare|thresholds');
|
|
480
|
+
process.exitCode = 1;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
async function saveEvalReport(root, args, report, prefix) {
|
|
484
|
+
if (flag(args, '--no-save')) return null;
|
|
485
|
+
const requested = readFlagValue(args, '--out', null);
|
|
486
|
+
const file = requested
|
|
487
|
+
? path.resolve(requested)
|
|
488
|
+
: path.join(root, '.sneakoscope', 'reports', `${prefix}-${nowIso().replace(/[:.]/g, '-')}.json`);
|
|
489
|
+
await ensureDir(path.dirname(file));
|
|
490
|
+
await writeJsonAtomic(file, report);
|
|
491
|
+
return file;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
function pct(x) {
|
|
495
|
+
return `${(100 * x).toFixed(1)}%`;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
function printEvalRun(report, saved) {
|
|
499
|
+
const c = report.comparison;
|
|
500
|
+
console.log('Sneakoscope Eval');
|
|
501
|
+
console.log(`Scenario: ${report.scenario.id}`);
|
|
502
|
+
console.log(`Tokens: ${report.baseline.estimated_tokens} -> ${report.candidate.estimated_tokens} (${pct(c.token_savings_pct)} saved)`);
|
|
503
|
+
console.log(`Accuracy: ${report.baseline.quality.accuracy_proxy} -> ${report.candidate.quality.accuracy_proxy} (${c.accuracy_delta >= 0 ? '+' : ''}${c.accuracy_delta})`);
|
|
504
|
+
console.log(`Recall: ${report.candidate.quality.required_recall}`);
|
|
505
|
+
console.log(`Precision: ${report.baseline.quality.relevance_precision} -> ${report.candidate.quality.relevance_precision}`);
|
|
506
|
+
console.log(`Build ms: ${report.baseline.context_build_ms_per_run} -> ${report.candidate.context_build_ms_per_run}`);
|
|
507
|
+
console.log(`Meaningful improvement: ${c.meaningful_improvement ? 'yes' : 'no'}`);
|
|
508
|
+
if (saved) console.log(`Report: ${saved}`);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function printEvalCompare(report, saved) {
|
|
512
|
+
const c = report.comparison;
|
|
513
|
+
console.log('Sneakoscope Eval Compare');
|
|
514
|
+
console.log(`Baseline: ${report.baseline_label}`);
|
|
515
|
+
console.log(`Candidate: ${report.candidate_label}`);
|
|
516
|
+
console.log(`Tokens: ${report.baseline.estimated_tokens} -> ${report.candidate.estimated_tokens} (${pct(c.token_savings_pct)} saved)`);
|
|
517
|
+
console.log(`Accuracy: ${report.baseline.quality.accuracy_proxy} -> ${report.candidate.quality.accuracy_proxy} (${c.accuracy_delta >= 0 ? '+' : ''}${c.accuracy_delta})`);
|
|
518
|
+
console.log(`Meaningful improvement: ${c.meaningful_improvement ? 'yes' : 'no'}`);
|
|
519
|
+
if (saved) console.log(`Report: ${saved}`);
|
|
520
|
+
}
|
|
521
|
+
|
|
265
522
|
async function memory(sub, args) { return gc(args || []); }
|
|
266
523
|
|
|
267
524
|
async function gc(args) {
|
|
@@ -286,19 +543,115 @@ async function stats(args) {
|
|
|
286
543
|
for (const [name, sec] of Object.entries(report.sections || {})) console.log(`- ${name}: ${sec.human}`);
|
|
287
544
|
}
|
|
288
545
|
|
|
546
|
+
function positionalArgs(args = []) {
|
|
547
|
+
const out = [];
|
|
548
|
+
const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--depth']);
|
|
549
|
+
for (let i = 0; i < args.length; i++) {
|
|
550
|
+
const arg = String(args[i]);
|
|
551
|
+
if (valueFlags.has(arg)) {
|
|
552
|
+
i++;
|
|
553
|
+
continue;
|
|
554
|
+
}
|
|
555
|
+
if (!arg.startsWith('--')) out.push(arg);
|
|
556
|
+
}
|
|
557
|
+
return out;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
function readFlagValue(args, name, fallback) {
|
|
561
|
+
const i = args.indexOf(name);
|
|
562
|
+
return i >= 0 && args[i + 1] ? args[i + 1] : fallback;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
function cartridgeName(args, fallback = 'architecture-atlas') {
|
|
566
|
+
const raw = positionalArgs(args)[0] || fallback;
|
|
567
|
+
return String(raw).trim().replace(/[\\/]+/g, '-').replace(/[^A-Za-z0-9_.-]+/g, '-').replace(/^-+|-+$/g, '') || fallback;
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
function cartridgeDir(root, name) {
|
|
571
|
+
return path.join(root, '.sneakoscope', 'gx', 'cartridges', name);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
function defaultVGraph(name) {
|
|
575
|
+
return {
|
|
576
|
+
id: name,
|
|
577
|
+
title: 'Sneakoscope Context Map',
|
|
578
|
+
version: 1,
|
|
579
|
+
nodes: [
|
|
580
|
+
{ id: 'source', label: 'vgraph source', kind: 'source', layer: 'input', status: 'safe' },
|
|
581
|
+
{ id: 'contract', label: 'decision contract', kind: 'guard', layer: 'policy', status: 'safe' },
|
|
582
|
+
{ id: 'proof', label: 'H-Proof gate', kind: 'guard', layer: 'verification', status: 'safe' }
|
|
583
|
+
],
|
|
584
|
+
edges: [
|
|
585
|
+
{ from: 'source', to: 'contract', label: 'constrains' },
|
|
586
|
+
{ from: 'contract', to: 'proof', label: 'verifies' }
|
|
587
|
+
],
|
|
588
|
+
invariants: [
|
|
589
|
+
'vgraph.json remains the source of truth',
|
|
590
|
+
'rendered SVG hash must match source hash'
|
|
591
|
+
],
|
|
592
|
+
tests: [
|
|
593
|
+
'sks gx validate',
|
|
594
|
+
'sks gx drift'
|
|
595
|
+
],
|
|
596
|
+
risks: []
|
|
597
|
+
};
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
function defaultBeta(name) {
|
|
601
|
+
return {
|
|
602
|
+
id: name,
|
|
603
|
+
version: 1,
|
|
604
|
+
read_order: ['title', 'layers', 'nodes', 'edges', 'invariants', 'tests'],
|
|
605
|
+
renderer: 'sneakoscope-codex-deterministic-svg'
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
|
|
289
609
|
async function gx(sub, args) {
|
|
290
610
|
const root = await projectRoot();
|
|
611
|
+
const name = cartridgeName(args);
|
|
612
|
+
const dir = cartridgeDir(root, name);
|
|
291
613
|
if (sub === 'init') {
|
|
292
|
-
const
|
|
293
|
-
const
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
614
|
+
const vgraphPath = path.join(dir, 'vgraph.json');
|
|
615
|
+
const betaPath = path.join(dir, 'beta.json');
|
|
616
|
+
const created = [];
|
|
617
|
+
if (!(await exists(vgraphPath)) || flag(args, '--force')) {
|
|
618
|
+
await writeJsonAtomic(vgraphPath, defaultVGraph(name));
|
|
619
|
+
created.push('vgraph.json');
|
|
620
|
+
}
|
|
621
|
+
if (!(await exists(betaPath)) || flag(args, '--force')) {
|
|
622
|
+
await writeJsonAtomic(betaPath, defaultBeta(name));
|
|
623
|
+
created.push('beta.json');
|
|
624
|
+
}
|
|
625
|
+
const render = await renderCartridge(dir, { format: 'all' });
|
|
626
|
+
const validation = await validateCartridge(dir);
|
|
627
|
+
const drift = await driftCartridge(dir);
|
|
628
|
+
console.log(JSON.stringify({ cartridge: path.relative(root, dir), created, render, validation: validation.ok, drift: drift.status }, null, 2));
|
|
629
|
+
return;
|
|
630
|
+
}
|
|
631
|
+
if (sub === 'render') {
|
|
632
|
+
const format = readFlagValue(args, '--format', 'all');
|
|
633
|
+
console.log(JSON.stringify(await renderCartridge(dir, { format }), null, 2));
|
|
634
|
+
return;
|
|
635
|
+
}
|
|
636
|
+
if (sub === 'validate') {
|
|
637
|
+
const validation = await validateCartridge(dir);
|
|
638
|
+
console.log(JSON.stringify(validation, null, 2));
|
|
639
|
+
process.exitCode = validation.ok ? 0 : 2;
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
642
|
+
if (sub === 'drift') {
|
|
643
|
+
const drift = await driftCartridge(dir);
|
|
644
|
+
console.log(JSON.stringify(drift, null, 2));
|
|
645
|
+
process.exitCode = drift.status === 'low' ? 0 : 2;
|
|
298
646
|
return;
|
|
299
647
|
}
|
|
300
|
-
if (
|
|
301
|
-
|
|
648
|
+
if (sub === 'snapshot') {
|
|
649
|
+
await renderCartridge(dir, { format: 'all' });
|
|
650
|
+
console.log(JSON.stringify(await snapshotCartridge(dir), null, 2));
|
|
651
|
+
return;
|
|
652
|
+
}
|
|
653
|
+
console.error('Usage: sks gx init|render|validate|drift|snapshot');
|
|
654
|
+
process.exitCode = 1;
|
|
302
655
|
}
|
|
303
656
|
|
|
304
657
|
async function team(args) {
|
package/src/core/db-safety.mjs
CHANGED
|
@@ -181,10 +181,16 @@ function recursivelyCollectStrings(obj, out = [], depth = 0) {
|
|
|
181
181
|
return out;
|
|
182
182
|
}
|
|
183
183
|
|
|
184
|
+
function looksLikeSqlText(text = '') {
|
|
185
|
+
const s = stripSqlComments(text).trim();
|
|
186
|
+
return /^(select|with|show|explain|describe|insert|update|delete|drop|truncate|alter|create|grant|revoke)\b/i.test(s)
|
|
187
|
+
|| /;\s*(select|with|show|explain|describe|insert|update|delete|drop|truncate|alter|create|grant|revoke)\b/i.test(s);
|
|
188
|
+
}
|
|
189
|
+
|
|
184
190
|
export function classifyToolPayload(payload = {}) {
|
|
185
191
|
const strings = recursivelyCollectStrings(payload).slice(0, 200);
|
|
186
192
|
const toolName = [payload.tool_name, payload.name, payload.tool?.name, payload.server, payload.mcp_tool, payload.tool, payload.type].filter(Boolean).join(' ').toLowerCase();
|
|
187
|
-
const combined = strings.join('\n');
|
|
193
|
+
const combined = strings.filter(looksLikeSqlText).join('\n');
|
|
188
194
|
const sqlClass = classifySql(combined);
|
|
189
195
|
const commandClass = classifyCommand(strings.find((s) => /\b(supabase|psql|prisma|drizzle|knex|sequelize)\b/i.test(s)) || '');
|
|
190
196
|
const toolReasons = [];
|
|
@@ -41,7 +41,7 @@ export function buildDecisionContract({ mission, schema, answers }) {
|
|
|
41
41
|
if_e2e_unavailable: 'run_unit_or_integration_and_record_e2e_not_executed',
|
|
42
42
|
if_dependency_needed: 'avoid_new_dependency_unless_allowed_by_contract',
|
|
43
43
|
if_existing_behavior_conflict: 'preserve_existing_public_behavior',
|
|
44
|
-
if_visual_cartridge_conflict: '
|
|
44
|
+
if_visual_cartridge_conflict: 'vgraph_json_wins_over_rendered_gx_artifact',
|
|
45
45
|
if_wiki_conflict: 'current_code_wins_over_wiki',
|
|
46
46
|
if_low_confidence_claim: 'read_source_do_not_ask_user',
|
|
47
47
|
if_unresolvable_optional_scope: 'defer_optional_subtask_and_complete_core_acceptance_criteria',
|