sneakoscope 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli/main.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  import path from 'node:path';
2
2
  import fsp from 'node:fs/promises';
3
- import { projectRoot, readJson, writeJsonAtomic, writeTextAtomic, appendJsonlBounded, nowIso, exists, tmpdir, packageRoot, dirSize, formatBytes } from '../core/fsx.mjs';
4
- import { initProject } from '../core/init.mjs';
3
+ import { projectRoot, readJson, writeJsonAtomic, appendJsonlBounded, nowIso, exists, ensureDir, tmpdir, packageRoot, dirSize, formatBytes, which } from '../core/fsx.mjs';
4
+ import { initProject, normalizeInstallScope, sksCommandPrefix } from '../core/init.mjs';
5
5
  import { getCodexInfo, runCodexExec } from '../core/codex-adapter.mjs';
6
6
  import { createMission, loadMission, findLatestMission, setCurrent, stateFile } from '../core/mission.mjs';
7
7
  import { buildQuestionSchema, writeQuestions } from '../core/questions.mjs';
@@ -12,10 +12,20 @@ import { emitHook } from '../core/hooks-runtime.mjs';
12
12
  import { storageReport, enforceRetention } from '../core/retention.mjs';
13
13
  import { classifySql, classifyCommand, loadDbSafetyPolicy, safeSupabaseMcpConfig, checkSqlFile, checkDbOperation, scanDbSafety } from '../core/db-safety.mjs';
14
14
  import { rustInfo } from '../core/rust-accelerator.mjs';
15
+ import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
16
+ import { DEFAULT_EVAL_THRESHOLDS, compareEvaluationReports, runEvaluationBenchmark } from '../core/evaluation.mjs';
17
+ import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
15
18
 
16
19
  const flag = (args, name) => args.includes(name);
17
20
  const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
18
21
 
22
+ function installScopeFromArgs(args = [], fallback = 'global') {
23
+ if (flag(args, '--project')) return 'project';
24
+ if (flag(args, '--global')) return 'global';
25
+ const i = args.indexOf('--install-scope');
26
+ return normalizeInstallScope(i >= 0 && args[i + 1] ? args[i + 1] : fallback);
27
+ }
28
+
19
29
  export async function main(args) {
20
30
  const [cmd, sub, ...rest] = args;
21
31
  const tail = sub === undefined ? [] : [sub, ...rest];
@@ -24,6 +34,7 @@ export async function main(args) {
24
34
  if (cmd === 'init') return init(tail);
25
35
  if (cmd === 'selftest') return selftest(tail);
26
36
  if (cmd === 'ralph') return ralph(sub, rest);
37
+ if (cmd === 'research') return research(sub, rest);
27
38
  if (cmd === 'hook') return emitHook(sub);
28
39
  if (cmd === 'profile') return profile(sub, rest);
29
40
  if (cmd === 'hproof') return hproof(sub, rest);
@@ -31,6 +42,7 @@ export async function main(args) {
31
42
  if (cmd === 'gx') return gx(sub, rest);
32
43
  if (cmd === 'team') return team(tail);
33
44
  if (cmd === 'db') return db(sub, rest);
45
+ if (cmd === 'eval') return evalCommand(sub, rest);
34
46
  if (cmd === 'gc') return gc(tail);
35
47
  if (cmd === 'stats') return stats(tail);
36
48
  console.error(`Unknown command: ${cmd}`);
@@ -38,33 +50,68 @@ export async function main(args) {
38
50
  }
39
51
 
40
52
  function help() {
41
- console.log(`Sneakoscope Codex\n\nUsage:\n sks doctor [--fix] [--json]\n sks init\n sks selftest [--mock]\n sks ralph prepare "task"\n sks ralph answer <mission-id|latest> <answers.json>\n sks ralph run <mission-id|latest> [--mock] [--max-cycles N]\n sks ralph status <mission-id|latest>\n sks db policy\n sks db scan [--migrations] [--json]\n sks db mcp-config --project-ref <ref>\n sks db check --sql "DROP TABLE users"\n sks db check --command "supabase db reset"\n sks gc [--dry-run] [--json]\n sks stats [--json]\n`);
53
+ console.log(`Sneakoscope Codex
54
+
55
+ Usage:
56
+ sks doctor [--fix] [--json] [--install-scope global|project]
57
+ sks init [--install-scope global|project]
58
+ sks selftest [--mock]
59
+ sks ralph prepare "task"
60
+ sks ralph answer <mission-id|latest> <answers.json>
61
+ sks ralph run <mission-id|latest> [--mock] [--max-cycles N]
62
+ sks ralph status <mission-id|latest>
63
+ sks research prepare "topic" [--depth frontier]
64
+ sks research run <mission-id|latest> [--mock] [--max-cycles N]
65
+ sks research status <mission-id|latest>
66
+ sks db policy
67
+ sks db scan [--migrations] [--json]
68
+ sks db mcp-config --project-ref <ref>
69
+ sks db check --sql "DROP TABLE users"
70
+ sks db check --command "supabase db reset"
71
+ sks eval run [--json] [--out report.json]
72
+ sks eval compare --baseline old.json --candidate new.json [--json]
73
+ sks gx init [name]
74
+ sks gx render [name] [--format svg|html|all]
75
+ sks gx validate [name]
76
+ sks gx drift [name]
77
+ sks gx snapshot [name]
78
+ sks gc [--dry-run] [--json]
79
+ sks stats [--json]
80
+ `);
42
81
  }
43
82
 
44
83
  async function doctor(args) {
45
84
  const root = await projectRoot();
46
- if (flag(args, '--fix')) await initProject(root, {});
85
+ const requestedScope = args.includes('--install-scope') || flag(args, '--project') || flag(args, '--global')
86
+ ? installScopeFromArgs(args)
87
+ : null;
88
+ if (flag(args, '--fix')) await initProject(root, { installScope: requestedScope || 'global' });
47
89
  const codex = await getCodexInfo();
48
90
  const rust = await rustInfo();
49
91
  const nodeOk = Number(process.versions.node.split('.')[0]) >= 20;
50
92
  const storage = await storageReport(root);
51
93
  const pkgBytes = await dirSize(packageRoot()).catch(() => 0);
94
+ const manifest = await readJson(path.join(root, '.sneakoscope', 'manifest.json'), null);
95
+ const installScope = requestedScope || normalizeInstallScope(manifest?.installation?.scope || 'global');
96
+ const install = await installStatus(root, installScope);
52
97
  const dbPolicyExists = await exists(path.join(root, '.sneakoscope', 'db-safety.json'));
53
98
  const dbScan = await scanDbSafety(root).catch((err) => ({ ok: false, findings: [{ id: 'db_safety_scan_failed', severity: 'high', reason: err.message }] }));
54
99
  const result = {
55
100
  node: { ok: nodeOk, version: process.version }, root, codex, rust,
101
+ install,
56
102
  sneakoscope: { ok: await exists(path.join(root, '.sneakoscope')) },
57
103
  db_guard: { ok: dbPolicyExists && dbScan.ok, policy: dbPolicyExists ? await loadDbSafetyPolicy(root) : null, scan: dbScan },
58
104
  hooks: { ok: await exists(path.join(root, '.codex', 'hooks.json')) },
59
105
  skills: { ok: await exists(path.join(root, '.agents', 'skills')) },
60
106
  package: { bytes: pkgBytes, human: formatBytes(pkgBytes) }, storage
61
107
  };
62
- result.ready = nodeOk && Boolean(codex.bin) && result.sneakoscope.ok && result.db_guard.ok;
108
+ result.ready = nodeOk && Boolean(codex.bin) && install.ok && result.sneakoscope.ok && result.db_guard.ok;
63
109
  if (flag(args, '--json')) return console.log(JSON.stringify(result, null, 2));
64
110
  console.log('Sneakoscope Codex Doctor\n');
65
111
  console.log(`Node: ${nodeOk ? 'ok' : 'fail'} ${process.version}`);
66
112
  console.log(`Project: ${root}`);
67
113
  console.log(`Codex: ${codex.bin ? 'ok' : 'missing'} ${codex.version || ''}`);
114
+ console.log(`Install: ${install.ok ? 'ok' : 'missing'} ${install.scope} (${install.command_prefix})`);
68
115
  console.log(`Rust acc.: ${rust.available ? rust.version : 'optional-missing'}`);
69
116
  console.log(`State: ${result.sneakoscope.ok ? 'ok' : 'missing .sneakoscope'}`);
70
117
  console.log(`DB Guard: ${result.db_guard.ok ? 'ok' : 'blocked'} ${dbScan.findings?.length || 0} finding(s)`);
@@ -74,16 +121,35 @@ async function doctor(args) {
74
121
  console.log(`Storage: ${storage.total_human || '0 B'}`);
75
122
  console.log(`Ready: ${result.ready ? 'yes' : 'no'}`);
76
123
  if (!codex.bin) console.log('\nCodex CLI missing. Install separately: npm i -g @openai/codex, or set SKS_CODEX_BIN.');
124
+ if (!install.ok && install.scope === 'global') console.log('SKS global command missing. Install: npm i -g sneakoscope');
125
+ if (!install.ok && install.scope === 'project') console.log('SKS project package missing. Install in this project: npm i -D sneakoscope');
77
126
  if (!result.ready && !flag(args, '--fix')) console.log('Run: sks doctor --fix');
78
127
  }
79
128
 
80
129
  async function init(args) {
81
130
  const root = await projectRoot();
82
- const res = await initProject(root, { force: flag(args, '--force') });
131
+ const installScope = installScopeFromArgs(args);
132
+ const res = await initProject(root, { force: flag(args, '--force'), installScope });
83
133
  console.log(`Initialized Sneakoscope Codex in ${root}`);
134
+ console.log(`Install scope: ${installScope} (${sksCommandPrefix(installScope)})`);
84
135
  for (const x of res.created) console.log(`- ${x}`);
85
136
  }
86
137
 
138
+ async function installStatus(root, scope) {
139
+ const commandPrefix = sksCommandPrefix(scope);
140
+ const globalBin = await which('sks').catch(() => null);
141
+ const projectBin = path.join(root, 'node_modules', 'sneakoscope', 'bin', 'sks.mjs');
142
+ const projectBinExists = await exists(projectBin);
143
+ return {
144
+ scope,
145
+ default_scope: 'global',
146
+ command_prefix: commandPrefix,
147
+ global_bin: globalBin,
148
+ project_bin: projectBin,
149
+ ok: scope === 'project' ? projectBinExists : Boolean(globalBin)
150
+ };
151
+ }
152
+
87
153
  async function ralph(sub, args) {
88
154
  if (sub === 'prepare') return ralphPrepare(args);
89
155
  if (sub === 'answer') return ralphAnswer(args);
@@ -93,6 +159,101 @@ async function ralph(sub, args) {
93
159
  process.exitCode = 1;
94
160
  }
95
161
 
162
+ async function research(sub, args) {
163
+ if (sub === 'prepare') return researchPrepare(args);
164
+ if (sub === 'run') return researchRun(args);
165
+ if (sub === 'status') return researchStatus(args);
166
+ console.error('Usage: sks research <prepare|run|status>');
167
+ process.exitCode = 1;
168
+ }
169
+
170
+ async function researchPrepare(args) {
171
+ const root = await projectRoot();
172
+ if (!(await exists(path.join(root, '.sneakoscope')))) await initProject(root, {});
173
+ const prompt = positionalArgs(args).join(' ').trim();
174
+ if (!prompt) throw new Error('Missing research topic.');
175
+ const { id, dir } = await createMission(root, { mode: 'research', prompt });
176
+ const plan = await writeResearchPlan(dir, prompt, { depth: readFlagValue(args, '--depth', 'frontier') });
177
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PREPARED', questions_allowed: false });
178
+ console.log(`Research mission created: ${id}`);
179
+ console.log(`Methodology: ${plan.methodology}`);
180
+ console.log(`Plan: ${path.relative(root, path.join(dir, 'research-plan.md'))}`);
181
+ console.log(`Run: sks research run ${id} --max-cycles 3`);
182
+ }
183
+
184
+ async function researchRun(args) {
185
+ const root = await projectRoot();
186
+ const id = await resolveMissionId(root, args[0]);
187
+ if (!id) throw new Error('Usage: sks research run <mission-id|latest> [--mock] [--max-cycles N]');
188
+ const { dir, mission } = await loadMission(root, id);
189
+ const planPath = path.join(dir, 'research-plan.json');
190
+ if (!(await exists(planPath))) await writeResearchPlan(dir, mission.prompt || '', {});
191
+ const plan = await readJson(planPath);
192
+ const dbScan = await scanDbSafety(root);
193
+ if (!dbScan.ok) {
194
+ console.error('Research cannot run: DB Guardian found unsafe Supabase/MCP/database configuration.');
195
+ console.error(JSON.stringify(dbScan.findings, null, 2));
196
+ process.exitCode = 2;
197
+ return;
198
+ }
199
+ const maxCycles = readMaxCycles(args, 3);
200
+ const mock = flag(args, '--mock');
201
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_RUNNING_NO_QUESTIONS', questions_allowed: false });
202
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.run.started', maxCycles, mock });
203
+ if (mock) {
204
+ const gate = await writeMockResearchResult(dir, plan);
205
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true });
206
+ console.log(`Mock research done: ${id}`);
207
+ console.log(`Gate: ${gate.passed ? 'passed' : 'blocked'}`);
208
+ return;
209
+ }
210
+ const codex = await getCodexInfo();
211
+ if (!codex.bin) {
212
+ console.error('Codex CLI not found. Running mock research instead.');
213
+ const gate = await writeMockResearchResult(dir, plan);
214
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: gate.passed ? 'RESEARCH_DONE' : 'RESEARCH_PAUSED', questions_allowed: true });
215
+ console.log(`Mock research done: ${id}`);
216
+ return;
217
+ }
218
+ let last = '';
219
+ for (let cycle = 1; cycle <= maxCycles; cycle++) {
220
+ const cycleDir = path.join(dir, 'research', `cycle-${cycle}`);
221
+ const outputFile = path.join(cycleDir, 'final.md');
222
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.start', cycle });
223
+ const prompt = buildResearchPrompt({ id, mission, plan, cycle, previous: last });
224
+ const result = await runCodexExec({ root, prompt, outputFile, json: true, profile: 'sks-research', logDir: cycleDir, timeoutMs: 45 * 60 * 1000 });
225
+ await writeJsonAtomic(path.join(cycleDir, 'process.json'), { code: result.code, stdout_tail: result.stdout, stderr_tail: result.stderr, stdout_bytes: result.stdoutBytes, stderr_bytes: result.stderrBytes, truncated: result.truncated, timed_out: result.timedOut });
226
+ last = await safeReadText(outputFile, result.stdout || result.stderr || '');
227
+ if (containsUserQuestion(last)) {
228
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.guard.question_blocked', cycle });
229
+ last = `${last}\n\n${noQuestionContinuationReason()}`;
230
+ continue;
231
+ }
232
+ const gate = await evaluateResearchGate(dir);
233
+ if (gate.passed) {
234
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_DONE', questions_allowed: true });
235
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.done', cycle });
236
+ await enforceRetention(root).catch(() => {});
237
+ console.log(`Research done: ${id}`);
238
+ return;
239
+ }
240
+ await appendJsonlBounded(path.join(dir, 'events.jsonl'), { ts: nowIso(), type: 'research.cycle.continue', cycle, reasons: gate.reasons });
241
+ }
242
+ await setCurrent(root, { mission_id: id, mode: 'RESEARCH', phase: 'RESEARCH_PAUSED_MAX_CYCLES', questions_allowed: true });
243
+ console.log(`Research paused after max cycles: ${id}`);
244
+ }
245
+
246
+ async function researchStatus(args) {
247
+ const root = await projectRoot();
248
+ const id = await resolveMissionId(root, args[0]);
249
+ if (!id) throw new Error('Usage: sks research status <mission-id|latest>');
250
+ const { dir, mission } = await loadMission(root, id);
251
+ const state = await readJson(stateFile(root), {});
252
+ const gate = await readJson(path.join(dir, 'research-gate.evaluated.json'), await readJson(path.join(dir, 'research-gate.json'), null));
253
+ const ledger = await readJson(path.join(dir, 'novelty-ledger.json'), null);
254
+ console.log(JSON.stringify({ mission, state, gate, novelty_entries: ledger?.entries?.length ?? null }, null, 2));
255
+ }
256
+
96
257
  async function ralphPrepare(args) {
97
258
  const root = await projectRoot();
98
259
  if (!(await exists(path.join(root, '.sneakoscope')))) await initProject(root, {});
@@ -189,7 +350,7 @@ async function ralphRun(args) {
189
350
  }
190
351
 
191
352
  function buildRalphPrompt({ id, mission, contract, cycle, previous }) {
192
- return `You are running Sneakoscope Codex Ralph mode.\nMISSION: ${id}\nTASK: ${mission.prompt}\nCYCLE: ${cycle}\nNO-QUESTION LOCK: Do not ask the user. Resolve using decision-contract.json.\nDATABASE SAFETY: Destructive database operations are forbidden. Do not run DROP, TRUNCATE, db reset, db push, branch reset/merge/delete, project deletion, RLS disable, or live execute_sql writes. Use read-only/project-scoped Supabase MCP only unless the sealed contract explicitly allows migration files for local or preview branch.\nDECISION CONTRACT:\n${JSON.stringify(contract, null, 2)}\nPERFORMANCE POLICY: keep outputs concise; raw logs stay in files; summarize evidence only.\nLOOP: plan, read before write, implement within contract, run/justify tests, update .sneakoscope/missions/${id}/done-gate.json.\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
353
+ return `You are running Sneakoscope Codex Ralph mode.\nMISSION: ${id}\nTASK: ${mission.prompt}\nCYCLE: ${cycle}\nNO-QUESTION LOCK: Do not ask the user. Resolve using decision-contract.json.\nDATABASE SAFETY: Destructive database operations are forbidden. Do not run DROP, TRUNCATE, db reset, db push, branch reset/merge/delete, project deletion, RLS disable, or live execute_sql writes. Use read-only/project-scoped Supabase MCP only unless the sealed contract explicitly allows migration files for local or preview branch.\nDECISION CONTRACT:\n${JSON.stringify(contract, null, 2)}\nPERFORMANCE POLICY: keep outputs concise; raw logs stay in files; summarize evidence only. If the task claims performance, token, or accuracy improvement, run sks eval run or sks eval compare and record the report path in done-gate.json evidence.\nDESIGN POLICY: if the task creates HTML/UI/prototype/deck-like visual artifacts, use the installed design-artifact-expert skill, inspect design context first, verify rendered output, and record design verification in done-gate.json.\nLOOP: plan, read before write, implement within contract, run/justify tests, update .sneakoscope/missions/${id}/done-gate.json.\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
193
354
  }
194
355
 
195
356
  async function safeReadText(file, fallback = '') {
@@ -223,6 +384,14 @@ async function selftest() {
223
384
  const tmp = tmpdir();
224
385
  process.chdir(tmp);
225
386
  await initProject(tmp, {});
387
+ const defaultHooks = await readJson(path.join(tmp, '.codex', 'hooks.json'));
388
+ if (defaultHooks.hooks.PreToolUse[0].hooks[0].command !== 'sks hook pre-tool') throw new Error('selftest failed: global install hook command changed');
389
+ const projectScopeTmp = tmpdir();
390
+ await initProject(projectScopeTmp, { installScope: 'project' });
391
+ const projectHooks = await readJson(path.join(projectScopeTmp, '.codex', 'hooks.json'));
392
+ if (projectHooks.hooks.PreToolUse[0].hooks[0].command !== 'node ./node_modules/sneakoscope/bin/sks.mjs hook pre-tool') throw new Error('selftest failed: project install hook command missing');
393
+ const researchSkillExists = await exists(path.join(tmp, '.agents', 'skills', 'research-discovery', 'SKILL.md'));
394
+ if (!researchSkillExists) throw new Error('selftest failed: research skill not installed');
226
395
  const { id, dir, mission } = await createMission(tmp, { mode: 'ralph', prompt: '로그인 세션 만료 UX 개선 supabase db' });
227
396
  const schema = buildQuestionSchema(mission.prompt);
228
397
  await writeQuestions(dir, schema);
@@ -238,9 +407,28 @@ async function selftest() {
238
407
  if (classifyCommand('supabase db reset').level !== 'destructive') throw new Error('selftest failed: supabase db reset not detected');
239
408
  const dbDecision = await checkDbOperation(tmp, { mission_id: id }, { tool_name: 'mcp__supabase__execute_sql', sql: 'drop table users;' }, { duringRalph: true });
240
409
  if (dbDecision.action !== 'block') throw new Error('selftest failed: destructive MCP SQL allowed');
410
+ const nonDbDecision = await checkDbOperation(tmp, {}, { command: 'npm test' }, { duringRalph: true });
411
+ if (nonDbDecision.action !== 'allow') throw new Error('selftest failed: non-DB command blocked by DB guard');
412
+ const evalReport = runEvaluationBenchmark({ iterations: 5 });
413
+ if (!evalReport.comparison.meaningful_improvement) throw new Error('selftest failed: evaluation benchmark did not show meaningful improvement');
414
+ const { dir: researchDir, mission: researchMission } = await createMission(tmp, { mode: 'research', prompt: '새로운 코드 리뷰 방법론 연구' });
415
+ const researchPlan = await writeResearchPlan(researchDir, researchMission.prompt, {});
416
+ const researchGate = await writeMockResearchResult(researchDir, researchPlan);
417
+ if (!researchGate.passed) throw new Error('selftest failed: mock research gate did not pass');
241
418
  await writeJsonAtomic(path.join(dir, 'done-gate.json'), { passed: true, unsupported_critical_claims: 0, database_safety_violation: false, database_safety_reviewed: true, visual_drift: 'low', wiki_drift: 'low', tests_required: false });
242
419
  const gate = await evaluateDoneGate(tmp, id);
243
420
  if (!gate.passed) throw new Error('selftest failed: done gate');
421
+ const gxDir = path.join(tmp, '.sneakoscope', 'gx', 'cartridges', 'selftest');
422
+ await writeJsonAtomic(path.join(gxDir, 'vgraph.json'), defaultVGraph('selftest'));
423
+ await writeJsonAtomic(path.join(gxDir, 'beta.json'), defaultBeta('selftest'));
424
+ const render = await renderCartridge(gxDir, { format: 'all' });
425
+ if (!render.outputs.includes('render.svg')) throw new Error('selftest failed: gx svg not rendered');
426
+ const validation = await validateCartridge(gxDir);
427
+ if (!validation.ok) throw new Error('selftest failed: gx validation rejected');
428
+ const drift = await driftCartridge(gxDir);
429
+ if (drift.status !== 'low') throw new Error('selftest failed: gx drift is high');
430
+ const snapshot = await snapshotCartridge(gxDir);
431
+ if (!snapshot.files.svg || !snapshot.files.html) throw new Error('selftest failed: gx snapshot incomplete');
244
432
  const gc = await enforceRetention(tmp, { dryRun: true });
245
433
  if (!gc.report.exists) throw new Error('selftest failed: storage report');
246
434
  console.log('Sneakoscope Codex selftest passed.');
@@ -262,6 +450,75 @@ async function hproof(sub, args) {
262
450
  console.log(JSON.stringify(await evaluateDoneGate(root, id), null, 2));
263
451
  }
264
452
 
453
+ async function evalCommand(sub, args) {
454
+ if (!sub || sub === 'help' || sub === '--help') {
455
+ console.log('Usage: sks eval run [--json] [--out report.json] [--iterations N] | sks eval compare --baseline old.json --candidate new.json [--json]');
456
+ return;
457
+ }
458
+ if (sub === 'thresholds') return console.log(JSON.stringify(DEFAULT_EVAL_THRESHOLDS, null, 2));
459
+ const root = await projectRoot();
460
+ if (sub === 'run') {
461
+ const iterations = Number(readFlagValue(args, '--iterations', 200));
462
+ const report = runEvaluationBenchmark({ iterations });
463
+ const saved = await saveEvalReport(root, args, report, 'eval');
464
+ if (flag(args, '--json')) return console.log(JSON.stringify({ ...report, report_path: saved }, null, 2));
465
+ printEvalRun(report, saved);
466
+ return;
467
+ }
468
+ if (sub === 'compare') {
469
+ const positional = positionalArgs(args);
470
+ const baselinePath = readFlagValue(args, '--baseline', positional[0]);
471
+ const candidatePath = readFlagValue(args, '--candidate', positional[1]);
472
+ if (!baselinePath || !candidatePath) throw new Error('Usage: sks eval compare --baseline old.json --candidate new.json [--json]');
473
+ const report = compareEvaluationReports(await readJson(path.resolve(baselinePath)), await readJson(path.resolve(candidatePath)));
474
+ const saved = await saveEvalReport(root, args, report, 'eval-compare');
475
+ if (flag(args, '--json')) return console.log(JSON.stringify({ ...report, report_path: saved }, null, 2));
476
+ printEvalCompare(report, saved);
477
+ return;
478
+ }
479
+ console.error('Usage: sks eval run|compare|thresholds');
480
+ process.exitCode = 1;
481
+ }
482
+
483
+ async function saveEvalReport(root, args, report, prefix) {
484
+ if (flag(args, '--no-save')) return null;
485
+ const requested = readFlagValue(args, '--out', null);
486
+ const file = requested
487
+ ? path.resolve(requested)
488
+ : path.join(root, '.sneakoscope', 'reports', `${prefix}-${nowIso().replace(/[:.]/g, '-')}.json`);
489
+ await ensureDir(path.dirname(file));
490
+ await writeJsonAtomic(file, report);
491
+ return file;
492
+ }
493
+
494
+ function pct(x) {
495
+ return `${(100 * x).toFixed(1)}%`;
496
+ }
497
+
498
+ function printEvalRun(report, saved) {
499
+ const c = report.comparison;
500
+ console.log('Sneakoscope Eval');
501
+ console.log(`Scenario: ${report.scenario.id}`);
502
+ console.log(`Tokens: ${report.baseline.estimated_tokens} -> ${report.candidate.estimated_tokens} (${pct(c.token_savings_pct)} saved)`);
503
+ console.log(`Accuracy: ${report.baseline.quality.accuracy_proxy} -> ${report.candidate.quality.accuracy_proxy} (${c.accuracy_delta >= 0 ? '+' : ''}${c.accuracy_delta})`);
504
+ console.log(`Recall: ${report.candidate.quality.required_recall}`);
505
+ console.log(`Precision: ${report.baseline.quality.relevance_precision} -> ${report.candidate.quality.relevance_precision}`);
506
+ console.log(`Build ms: ${report.baseline.context_build_ms_per_run} -> ${report.candidate.context_build_ms_per_run}`);
507
+ console.log(`Meaningful improvement: ${c.meaningful_improvement ? 'yes' : 'no'}`);
508
+ if (saved) console.log(`Report: ${saved}`);
509
+ }
510
+
511
+ function printEvalCompare(report, saved) {
512
+ const c = report.comparison;
513
+ console.log('Sneakoscope Eval Compare');
514
+ console.log(`Baseline: ${report.baseline_label}`);
515
+ console.log(`Candidate: ${report.candidate_label}`);
516
+ console.log(`Tokens: ${report.baseline.estimated_tokens} -> ${report.candidate.estimated_tokens} (${pct(c.token_savings_pct)} saved)`);
517
+ console.log(`Accuracy: ${report.baseline.quality.accuracy_proxy} -> ${report.candidate.quality.accuracy_proxy} (${c.accuracy_delta >= 0 ? '+' : ''}${c.accuracy_delta})`);
518
+ console.log(`Meaningful improvement: ${c.meaningful_improvement ? 'yes' : 'no'}`);
519
+ if (saved) console.log(`Report: ${saved}`);
520
+ }
521
+
265
522
  async function memory(sub, args) { return gc(args || []); }
266
523
 
267
524
  async function gc(args) {
@@ -286,19 +543,115 @@ async function stats(args) {
286
543
  for (const [name, sec] of Object.entries(report.sections || {})) console.log(`- ${name}: ${sec.human}`);
287
544
  }
288
545
 
546
+ function positionalArgs(args = []) {
547
+ const out = [];
548
+ const valueFlags = new Set(['--format', '--iterations', '--out', '--baseline', '--candidate', '--install-scope', '--max-cycles', '--depth']);
549
+ for (let i = 0; i < args.length; i++) {
550
+ const arg = String(args[i]);
551
+ if (valueFlags.has(arg)) {
552
+ i++;
553
+ continue;
554
+ }
555
+ if (!arg.startsWith('--')) out.push(arg);
556
+ }
557
+ return out;
558
+ }
559
+
560
+ function readFlagValue(args, name, fallback) {
561
+ const i = args.indexOf(name);
562
+ return i >= 0 && args[i + 1] ? args[i + 1] : fallback;
563
+ }
564
+
565
+ function cartridgeName(args, fallback = 'architecture-atlas') {
566
+ const raw = positionalArgs(args)[0] || fallback;
567
+ return String(raw).trim().replace(/[\\/]+/g, '-').replace(/[^A-Za-z0-9_.-]+/g, '-').replace(/^-+|-+$/g, '') || fallback;
568
+ }
569
+
570
+ function cartridgeDir(root, name) {
571
+ return path.join(root, '.sneakoscope', 'gx', 'cartridges', name);
572
+ }
573
+
574
+ function defaultVGraph(name) {
575
+ return {
576
+ id: name,
577
+ title: 'Sneakoscope Context Map',
578
+ version: 1,
579
+ nodes: [
580
+ { id: 'source', label: 'vgraph source', kind: 'source', layer: 'input', status: 'safe' },
581
+ { id: 'contract', label: 'decision contract', kind: 'guard', layer: 'policy', status: 'safe' },
582
+ { id: 'proof', label: 'H-Proof gate', kind: 'guard', layer: 'verification', status: 'safe' }
583
+ ],
584
+ edges: [
585
+ { from: 'source', to: 'contract', label: 'constrains' },
586
+ { from: 'contract', to: 'proof', label: 'verifies' }
587
+ ],
588
+ invariants: [
589
+ 'vgraph.json remains the source of truth',
590
+ 'rendered SVG hash must match source hash'
591
+ ],
592
+ tests: [
593
+ 'sks gx validate',
594
+ 'sks gx drift'
595
+ ],
596
+ risks: []
597
+ };
598
+ }
599
+
600
+ function defaultBeta(name) {
601
+ return {
602
+ id: name,
603
+ version: 1,
604
+ read_order: ['title', 'layers', 'nodes', 'edges', 'invariants', 'tests'],
605
+ renderer: 'sneakoscope-codex-deterministic-svg'
606
+ };
607
+ }
608
+
289
609
  async function gx(sub, args) {
290
610
  const root = await projectRoot();
611
+ const name = cartridgeName(args);
612
+ const dir = cartridgeDir(root, name);
291
613
  if (sub === 'init') {
292
- const name = args[0] || 'architecture-atlas';
293
- const dir = path.join(root, '.sneakoscope', 'gx', 'cartridges', name);
294
- await writeJsonAtomic(path.join(dir, 'vgraph.json'), { id: name, version: 1, nodes: [], edges: [], invariants: [], tests: [] });
295
- await writeJsonAtomic(path.join(dir, 'beta.json'), { id: name, version: 1, read_order: ['grid', 'layers', 'nodes', 'edges', 'tests'] });
296
- await writeTextAtomic(path.join(dir, 'image-prompt.md'), 'Create a clean technical architecture sheet from vgraph.json. Use GPT Image 2 only.');
297
- console.log(`GX cartridge initialized: ${path.relative(root, dir)}`);
614
+ const vgraphPath = path.join(dir, 'vgraph.json');
615
+ const betaPath = path.join(dir, 'beta.json');
616
+ const created = [];
617
+ if (!(await exists(vgraphPath)) || flag(args, '--force')) {
618
+ await writeJsonAtomic(vgraphPath, defaultVGraph(name));
619
+ created.push('vgraph.json');
620
+ }
621
+ if (!(await exists(betaPath)) || flag(args, '--force')) {
622
+ await writeJsonAtomic(betaPath, defaultBeta(name));
623
+ created.push('beta.json');
624
+ }
625
+ const render = await renderCartridge(dir, { format: 'all' });
626
+ const validation = await validateCartridge(dir);
627
+ const drift = await driftCartridge(dir);
628
+ console.log(JSON.stringify({ cartridge: path.relative(root, dir), created, render, validation: validation.ok, drift: drift.status }, null, 2));
629
+ return;
630
+ }
631
+ if (sub === 'render') {
632
+ const format = readFlagValue(args, '--format', 'all');
633
+ console.log(JSON.stringify(await renderCartridge(dir, { format }), null, 2));
634
+ return;
635
+ }
636
+ if (sub === 'validate') {
637
+ const validation = await validateCartridge(dir);
638
+ console.log(JSON.stringify(validation, null, 2));
639
+ process.exitCode = validation.ok ? 0 : 2;
640
+ return;
641
+ }
642
+ if (sub === 'drift') {
643
+ const drift = await driftCartridge(dir);
644
+ console.log(JSON.stringify(drift, null, 2));
645
+ process.exitCode = drift.status === 'low' ? 0 : 2;
298
646
  return;
299
647
  }
300
- if (['render', 'validate', 'drift'].includes(sub)) return console.log(`GX ${sub}: metadata only; image generation is performed by Codex $imagegen in live mode.`);
301
- console.error('Usage: sks gx init|render|validate|drift');
648
+ if (sub === 'snapshot') {
649
+ await renderCartridge(dir, { format: 'all' });
650
+ console.log(JSON.stringify(await snapshotCartridge(dir), null, 2));
651
+ return;
652
+ }
653
+ console.error('Usage: sks gx init|render|validate|drift|snapshot');
654
+ process.exitCode = 1;
302
655
  }
303
656
 
304
657
  async function team(args) {
@@ -181,10 +181,16 @@ function recursivelyCollectStrings(obj, out = [], depth = 0) {
181
181
  return out;
182
182
  }
183
183
 
184
+ function looksLikeSqlText(text = '') {
185
+ const s = stripSqlComments(text).trim();
186
+ return /^(select|with|show|explain|describe|insert|update|delete|drop|truncate|alter|create|grant|revoke)\b/i.test(s)
187
+ || /;\s*(select|with|show|explain|describe|insert|update|delete|drop|truncate|alter|create|grant|revoke)\b/i.test(s);
188
+ }
189
+
184
190
  export function classifyToolPayload(payload = {}) {
185
191
  const strings = recursivelyCollectStrings(payload).slice(0, 200);
186
192
  const toolName = [payload.tool_name, payload.name, payload.tool?.name, payload.server, payload.mcp_tool, payload.tool, payload.type].filter(Boolean).join(' ').toLowerCase();
187
- const combined = strings.join('\n');
193
+ const combined = strings.filter(looksLikeSqlText).join('\n');
188
194
  const sqlClass = classifySql(combined);
189
195
  const commandClass = classifyCommand(strings.find((s) => /\b(supabase|psql|prisma|drizzle|knex|sequelize)\b/i.test(s)) || '');
190
196
  const toolReasons = [];
@@ -41,7 +41,7 @@ export function buildDecisionContract({ mission, schema, answers }) {
41
41
  if_e2e_unavailable: 'run_unit_or_integration_and_record_e2e_not_executed',
42
42
  if_dependency_needed: 'avoid_new_dependency_unless_allowed_by_contract',
43
43
  if_existing_behavior_conflict: 'preserve_existing_public_behavior',
44
- if_visual_cartridge_conflict: 'vgraph_json_wins_over_sheet_png',
44
+ if_visual_cartridge_conflict: 'vgraph_json_wins_over_rendered_gx_artifact',
45
45
  if_wiki_conflict: 'current_code_wins_over_wiki',
46
46
  if_low_confidence_claim: 'read_source_do_not_ask_user',
47
47
  if_unresolvable_optional_scope: 'defer_optional_subtask_and_complete_core_acceptance_criteria',