sneakoscope 0.6.79 → 0.6.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/package.json +1 -1
- package/src/cli/main.mjs +114 -11
- package/src/cli/maintenance-commands.mjs +3 -3
- package/src/core/codex-app.mjs +3 -3
- package/src/core/fsx.mjs +1 -1
- package/src/core/hooks-runtime.mjs +84 -5
- package/src/core/init.mjs +3 -3
- package/src/core/pipeline.mjs +4 -2
- package/src/core/qa-loop.mjs +13 -6
- package/src/core/questions.mjs +3 -3
- package/src/core/routes.mjs +14 -1
package/README.md
CHANGED
|
@@ -59,7 +59,7 @@ sks selftest --mock
|
|
|
59
59
|
- Node.js `>=20.11`
|
|
60
60
|
- npm
|
|
61
61
|
- Codex CLI for terminal workflows
|
|
62
|
-
- Codex App for app-facing workflows
|
|
62
|
+
- Codex App for app-facing workflows, with Codex Computer Use required for UI/browser evidence
|
|
63
63
|
- cmux for the CLI-first runtime
|
|
64
64
|
- Context7 MCP for current-docs-gated routes
|
|
65
65
|
|
|
@@ -328,7 +328,7 @@ sks qa-loop run latest --max-cycles 2
|
|
|
328
328
|
sks qa-loop status latest
|
|
329
329
|
```
|
|
330
330
|
|
|
331
|
-
Use `$QA-LOOP` in Codex App when UI-level E2E needs verification. UI verification must use Codex Computer Use evidence only; Chrome MCP, Browser Use, Playwright, and other browser automation do not satisfy UI-level E2E verification.
|
|
331
|
+
Use `$QA-LOOP` in Codex App when UI-level E2E needs verification. UI verification must use Codex Computer Use evidence only; Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, and other browser automation do not satisfy UI-level E2E verification.
|
|
332
332
|
|
|
333
333
|
### Refresh Context Before Risky Work
|
|
334
334
|
|
|
@@ -380,7 +380,7 @@ sks codex-app check
|
|
|
380
380
|
codex mcp list
|
|
381
381
|
```
|
|
382
382
|
|
|
383
|
-
Codex App workflows need the app installed and
|
|
383
|
+
Codex App workflows need the app installed. QA and visual-evidence workflows require first-party Codex Computer Use; Browser Use may support non-UI browser context, but it is not valid UI/browser verification evidence.
|
|
384
384
|
|
|
385
385
|
### Setup is blocked by another harness
|
|
386
386
|
|
|
@@ -415,6 +415,8 @@ npm run sizecheck
|
|
|
415
415
|
npm run release:check
|
|
416
416
|
```
|
|
417
417
|
|
|
418
|
+
Package pipeline UI/browser verification and visual inspection evidence must come from Codex Computer Use only. Do not use Playwright, Chrome MCP, Browser Use, Selenium, Puppeteer, or other browser automation as substitutes for that evidence.
|
|
419
|
+
|
|
418
420
|
Dry-run publish:
|
|
419
421
|
|
|
420
422
|
```sh
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sneakoscope",
|
|
3
3
|
"displayName": "ㅅㅋㅅ",
|
|
4
|
-
"version": "0.6.
|
|
4
|
+
"version": "0.6.81",
|
|
5
5
|
"description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
|
package/src/cli/main.mjs
CHANGED
|
@@ -25,7 +25,7 @@ import { defaultEvaluationScenario, runEvaluationBenchmark } from '../core/evalu
|
|
|
25
25
|
import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
26
26
|
import { contextCapsule } from '../core/triwiki-attention.mjs';
|
|
27
27
|
import { rgbaKey, rgbaToWikiCoord, validateWikiCoordinateIndex } from '../core/wiki-coordinate.mjs';
|
|
28
|
-
import { ALLOWED_REASONING_EFFORTS, COMMAND_CATALOG, DOLLAR_COMMAND_ALIASES, DOLLAR_COMMANDS, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, USAGE_TOPICS, context7ConfigToml, hasContext7ConfigText, hasFromChatImgSignal, looksLikeAnswerOnlyRequest, noUnrequestedFallbackCodePolicyText, reflectionRequiredForRoute, reasoningInstruction, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
28
|
+
import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_EVIDENCE_SOURCE, CODEX_COMPUTER_USE_ONLY_POLICY, COMMAND_CATALOG, DOLLAR_COMMAND_ALIASES, DOLLAR_COMMANDS, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, USAGE_TOPICS, context7ConfigToml, hasContext7ConfigText, hasFromChatImgSignal, looksLikeAnswerOnlyRequest, noUnrequestedFallbackCodePolicyText, reflectionRequiredForRoute, reasoningInstruction, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
29
29
|
import { context7Evidence, evaluateStop, recordContext7Evidence, recordSubagentEvidence } from '../core/pipeline.mjs';
|
|
30
30
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, validateTeamRuntimeArtifacts, writeTeamRuntimeArtifacts } from '../core/team-dag.mjs';
|
|
31
31
|
import { appendTeamEvent, initTeamLive, parseTeamSpecText, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane } from '../core/team-live.mjs';
|
|
@@ -440,10 +440,11 @@ async function wizard(args = []) {
|
|
|
440
440
|
const rl = readline.createInterface({ input, output });
|
|
441
441
|
try {
|
|
442
442
|
console.log('ㅅㅋㅅ Setup UI\n');
|
|
443
|
-
|
|
443
|
+
const currentPackage = await effectivePackageVersion();
|
|
444
|
+
console.log(`Current package: ${currentPackage}`);
|
|
444
445
|
const latest = await npmPackageVersion('sneakoscope');
|
|
445
446
|
if (latest.version) {
|
|
446
|
-
const needsUpdate = compareVersions(latest.version,
|
|
447
|
+
const needsUpdate = compareVersions(latest.version, currentPackage) > 0;
|
|
447
448
|
console.log(`Latest on npm: ${latest.version}${needsUpdate ? ' (update available)' : ''}`);
|
|
448
449
|
if (needsUpdate) {
|
|
449
450
|
const update = await askChoice(rl, 'Update SKS before setup?', ['yes', 'no'], 'yes');
|
|
@@ -496,11 +497,13 @@ async function askChoice(rl, question, choices, fallback) {
|
|
|
496
497
|
|
|
497
498
|
async function updateCheck(args = []) {
|
|
498
499
|
const latest = await npmPackageVersion('sneakoscope');
|
|
500
|
+
const currentPackage = await effectivePackageVersion();
|
|
499
501
|
const result = {
|
|
500
502
|
package: 'sneakoscope',
|
|
501
|
-
current:
|
|
503
|
+
current: currentPackage,
|
|
504
|
+
runtime_current: PACKAGE_VERSION,
|
|
502
505
|
latest: latest.version,
|
|
503
|
-
update_available: latest.version ? compareVersions(latest.version,
|
|
506
|
+
update_available: latest.version ? compareVersions(latest.version, currentPackage) > 0 : false,
|
|
504
507
|
error: latest.error || null
|
|
505
508
|
};
|
|
506
509
|
if (flag(args, '--json')) return console.log(JSON.stringify(result, null, 2));
|
|
@@ -1124,14 +1127,15 @@ async function madHighCommand(args = []) {
|
|
|
1124
1127
|
async function maybePromptSksUpdateForMad(args = []) {
|
|
1125
1128
|
if (flag(args, '--json') || flag(args, '--skip-update-check') || process.env.SKS_SKIP_UPDATE_CHECK === '1') return { status: 'skipped' };
|
|
1126
1129
|
const latest = await npmPackageVersion('sneakoscope');
|
|
1127
|
-
|
|
1130
|
+
const currentPackage = await effectivePackageVersion();
|
|
1131
|
+
if (!latest.version || compareVersions(latest.version, currentPackage) <= 0) return { status: 'current', latest: latest.version || null, error: latest.error || null };
|
|
1128
1132
|
const command = 'npm i -g sneakoscope@latest';
|
|
1129
1133
|
if (flag(args, '--yes') || flag(args, '-y')) return installSksLatest(command, latest.version);
|
|
1130
1134
|
if (!canAskYesNo()) {
|
|
1131
|
-
console.log(`SKS update available: ${
|
|
1135
|
+
console.log(`SKS update available: ${currentPackage} -> ${latest.version}. Run: ${command}`);
|
|
1132
1136
|
return { status: 'available', latest: latest.version, command };
|
|
1133
1137
|
}
|
|
1134
|
-
const answer = (await askPostinstallQuestion(`SKS ${
|
|
1138
|
+
const answer = (await askPostinstallQuestion(`SKS ${currentPackage} -> ${latest.version} update before MAD launch? [Y/n] `)).trim();
|
|
1135
1139
|
const yes = answer === '' || /^(y|yes|예|네|응)$/i.test(answer);
|
|
1136
1140
|
if (!yes) return { status: 'skipped_by_user', latest: latest.version, command };
|
|
1137
1141
|
return installSksLatest(command, latest.version);
|
|
@@ -1628,7 +1632,7 @@ async function setup(args) {
|
|
|
1628
1632
|
if (!cliTools.cmux.ok) console.log(`\ncmux ${cmuxStatusKind(cliTools.cmux)}. ${cliTools.cmux.bin ? 'Run: sks cmux check' : `Install: ${cliTools.cmux.install_hint}`}`);
|
|
1629
1633
|
if (!install.ok && install.scope === 'global') console.log('\nGlobal command missing. Run: npm i -g sneakoscope');
|
|
1630
1634
|
if (!install.ok && install.scope === 'project') console.log('\nProject package missing. Run: npm i -D sneakoscope');
|
|
1631
|
-
if (!appRuntime.ok) console.log('\nCodex App and first-party
|
|
1635
|
+
if (!appRuntime.ok) console.log('\nCodex App and first-party Codex Computer Use are required for SKS QA/visual evidence; Browser Use is not a UI verification substitute. Run: sks codex-app check');
|
|
1632
1636
|
}
|
|
1633
1637
|
|
|
1634
1638
|
function formatCodexCliToolStatus(status = {}) {
|
|
@@ -1899,6 +1903,15 @@ async function npmPackageVersion(name) {
|
|
|
1899
1903
|
return { version: result.stdout.trim().split(/\s+/).pop() };
|
|
1900
1904
|
}
|
|
1901
1905
|
|
|
1906
|
+
async function effectivePackageVersion() {
|
|
1907
|
+
const pkg = await readJson(path.join(packageRoot(), 'package.json'), {}).catch(() => ({}));
|
|
1908
|
+
return highestVersion([PACKAGE_VERSION, pkg.version]);
|
|
1909
|
+
}
|
|
1910
|
+
|
|
1911
|
+
function highestVersion(versions = []) {
|
|
1912
|
+
return versions.filter(Boolean).reduce((best, candidate) => compareVersions(candidate, best) > 0 ? candidate : best, '0.0.0');
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1902
1915
|
function compareVersions(a, b) {
|
|
1903
1916
|
const pa = String(a || '').split(/[.-]/).map((x) => Number.parseInt(x, 10) || 0);
|
|
1904
1917
|
const pb = String(b || '').split(/[.-]/).map((x) => Number.parseInt(x, 10) || 0);
|
|
@@ -2192,7 +2205,8 @@ async function selftest() {
|
|
|
2192
2205
|
if (!promptPipelineText.includes('design.md') || !promptPipelineText.includes('imagegen')) throw new Error('selftest failed: prompt pipeline missing design/image asset routing');
|
|
2193
2206
|
if (!promptPipelineText.includes('From-Chat-IMG') || !promptPipelineText.includes('Do not assume ordinary image prompts are chat captures')) throw new Error('selftest failed: prompt pipeline missing explicit From-Chat-IMG gating');
|
|
2194
2207
|
const fromChatImgSkillText = await safeReadText(path.join(tmp, '.agents', 'skills', 'from-chat-img', 'SKILL.md'));
|
|
2195
|
-
if (!fromChatImgSkillText.includes('normal Team pipeline') || !fromChatImgSkillText.includes('Computer Use
|
|
2208
|
+
if (!fromChatImgSkillText.includes('normal Team pipeline') || !fromChatImgSkillText.includes('Codex Computer Use visual inspection') || !fromChatImgSkillText.includes(CODEX_COMPUTER_USE_ONLY_POLICY) || !fromChatImgSkillText.includes(FROM_CHAT_IMG_CHECKLIST_ARTIFACT) || !fromChatImgSkillText.includes(FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT) || !fromChatImgSkillText.includes(FROM_CHAT_IMG_QA_LOOP_ARTIFACT)) throw new Error('selftest failed: from-chat-img skill missing Team/Computer Use-only inspection checklist guidance');
|
|
2209
|
+
if (fromChatImgSkillText.includes('Computer Use/browser visual inspection')) throw new Error('selftest failed: from-chat-img skill still allows browser visual inspection wording');
|
|
2196
2210
|
const fromChatImgSkillMeta = await safeReadText(path.join(tmp, '.agents', 'skills', 'from-chat-img', 'agents', 'openai.yaml'));
|
|
2197
2211
|
if (!fromChatImgSkillMeta.includes('model_reasoning_effort: xhigh')) throw new Error('selftest failed: from-chat-img skill metadata is not xhigh');
|
|
2198
2212
|
for (const supportSkill of ['reasoning-router', 'pipeline-runner', 'context7-docs', 'seo-geo-optimizer', 'reflection', 'design-system-builder', 'design-ui-editor', 'imagegen']) {
|
|
@@ -2263,6 +2277,81 @@ async function selftest() {
|
|
|
2263
2277
|
const hookState = await readJson(stateFile(hookGoalTmp), {});
|
|
2264
2278
|
if (hookState.phase !== 'GOAL_READY' || hookState.mode !== 'GOAL') throw new Error('selftest failed: $Goal hook did not set ready state');
|
|
2265
2279
|
if (!(await exists(path.join(missionDir(hookGoalTmp, hookState.mission_id), GOAL_WORKFLOW_ARTIFACT)))) throw new Error('selftest failed: $Goal hook did not write goal workflow artifact');
|
|
2280
|
+
const hookUpdateCurrentTmp = tmpdir();
|
|
2281
|
+
await initProject(hookUpdateCurrentTmp, {});
|
|
2282
|
+
const hookUpdateCurrentPayload = JSON.stringify({ cwd: hookUpdateCurrentTmp, prompt: '상태 확인해줘' });
|
|
2283
|
+
const hookUpdateCurrentResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], {
|
|
2284
|
+
cwd: hookUpdateCurrentTmp,
|
|
2285
|
+
input: hookUpdateCurrentPayload,
|
|
2286
|
+
env: { SKS_NPM_VIEW_SNEAKOSCOPE_VERSION: '9.9.9', SKS_INSTALLED_SKS_VERSION: '9.9.9' },
|
|
2287
|
+
timeoutMs: 15000,
|
|
2288
|
+
maxOutputBytes: 256 * 1024
|
|
2289
|
+
});
|
|
2290
|
+
if (hookUpdateCurrentResult.code !== 0) throw new Error(`selftest failed: current update hook exited ${hookUpdateCurrentResult.code}: ${hookUpdateCurrentResult.stderr}`);
|
|
2291
|
+
const hookUpdateCurrentJson = JSON.parse(hookUpdateCurrentResult.stdout);
|
|
2292
|
+
const hookUpdateCurrentContext = hookUpdateCurrentJson.hookSpecificOutput?.additionalContext || '';
|
|
2293
|
+
if (String(hookUpdateCurrentContext).includes('Update SKS now') || String(hookUpdateCurrentContext).includes('Skip update for this conversation')) throw new Error('selftest failed: hook prompted for update even though installed SKS is current');
|
|
2294
|
+
const hookUpdateCurrentState = await readJson(path.join(hookUpdateCurrentTmp, '.sneakoscope', 'state', 'update-check.json'), {});
|
|
2295
|
+
if (hookUpdateCurrentState.pending_offer) throw new Error('selftest failed: current installed SKS left a pending update offer');
|
|
2296
|
+
if (hookUpdateCurrentState.current !== '9.9.9' || hookUpdateCurrentState.runtime_current !== PACKAGE_VERSION || hookUpdateCurrentState.installed_current !== '9.9.9') throw new Error('selftest failed: hook did not record effective installed SKS version');
|
|
2297
|
+
const hookUpdatePendingTmp = tmpdir();
|
|
2298
|
+
await initProject(hookUpdatePendingTmp, {});
|
|
2299
|
+
await writeJsonAtomic(path.join(hookUpdatePendingTmp, '.sneakoscope', 'state', 'update-check.json'), {
|
|
2300
|
+
current: PACKAGE_VERSION,
|
|
2301
|
+
latest: '9.9.9',
|
|
2302
|
+
pending_offer: { conversation_id: hookUpdatePendingTmp, latest: '9.9.9', offered_at: nowIso() }
|
|
2303
|
+
});
|
|
2304
|
+
const hookUpdatePendingPayload = JSON.stringify({ cwd: hookUpdatePendingTmp, prompt: 'Update SKS now' });
|
|
2305
|
+
const hookUpdatePendingResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], {
|
|
2306
|
+
cwd: hookUpdatePendingTmp,
|
|
2307
|
+
input: hookUpdatePendingPayload,
|
|
2308
|
+
env: { SKS_NPM_VIEW_SNEAKOSCOPE_VERSION: '9.9.9', SKS_INSTALLED_SKS_VERSION: '9.9.9' },
|
|
2309
|
+
timeoutMs: 15000,
|
|
2310
|
+
maxOutputBytes: 256 * 1024
|
|
2311
|
+
});
|
|
2312
|
+
if (hookUpdatePendingResult.code !== 0) throw new Error(`selftest failed: stale pending update hook exited ${hookUpdatePendingResult.code}: ${hookUpdatePendingResult.stderr}`);
|
|
2313
|
+
const hookUpdatePendingJson = JSON.parse(hookUpdatePendingResult.stdout);
|
|
2314
|
+
const hookUpdatePendingContext = hookUpdatePendingJson.hookSpecificOutput?.additionalContext || '';
|
|
2315
|
+
if (String(hookUpdatePendingContext).includes('user accepted update') || String(hookUpdatePendingContext).includes('Before doing other work')) throw new Error('selftest failed: current installed SKS accepted a stale pending update offer');
|
|
2316
|
+
const hookUpdatePendingState = await readJson(path.join(hookUpdatePendingTmp, '.sneakoscope', 'state', 'update-check.json'), {});
|
|
2317
|
+
if (hookUpdatePendingState.pending_offer) throw new Error('selftest failed: stale pending update offer was not cleared after installed SKS became current');
|
|
2318
|
+
const hookUpdateSkippedTmp = tmpdir();
|
|
2319
|
+
await initProject(hookUpdateSkippedTmp, {});
|
|
2320
|
+
await writeJsonAtomic(path.join(hookUpdateSkippedTmp, '.sneakoscope', 'state', 'update-check.json'), {
|
|
2321
|
+
current: PACKAGE_VERSION,
|
|
2322
|
+
latest: '9.9.9',
|
|
2323
|
+
skipped: { conversation_id: hookUpdateSkippedTmp, latest: '9.9.9', skipped_at: nowIso() }
|
|
2324
|
+
});
|
|
2325
|
+
const hookUpdateSkippedPayload = JSON.stringify({ cwd: hookUpdateSkippedTmp, prompt: '상태 확인해줘' });
|
|
2326
|
+
const hookUpdateSkippedResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], {
|
|
2327
|
+
cwd: hookUpdateSkippedTmp,
|
|
2328
|
+
input: hookUpdateSkippedPayload,
|
|
2329
|
+
env: { SKS_NPM_VIEW_SNEAKOSCOPE_VERSION: '9.9.9', SKS_INSTALLED_SKS_VERSION: '9.9.9' },
|
|
2330
|
+
timeoutMs: 15000,
|
|
2331
|
+
maxOutputBytes: 256 * 1024
|
|
2332
|
+
});
|
|
2333
|
+
if (hookUpdateSkippedResult.code !== 0) throw new Error(`selftest failed: stale skipped update hook exited ${hookUpdateSkippedResult.code}: ${hookUpdateSkippedResult.stderr}`);
|
|
2334
|
+
const hookUpdateSkippedJson = JSON.parse(hookUpdateSkippedResult.stdout);
|
|
2335
|
+
const hookUpdateSkippedContext = hookUpdateSkippedJson.hookSpecificOutput?.additionalContext || '';
|
|
2336
|
+
if (String(hookUpdateSkippedContext).includes('was skipped for this conversation')) throw new Error('selftest failed: current installed SKS kept stale skipped update context');
|
|
2337
|
+
const hookUpdateSkippedState = await readJson(path.join(hookUpdateSkippedTmp, '.sneakoscope', 'state', 'update-check.json'), {});
|
|
2338
|
+
if (hookUpdateSkippedState.skipped) throw new Error('selftest failed: stale skipped update state was not cleared after installed SKS became current');
|
|
2339
|
+
const hookUpdateOldTmp = tmpdir();
|
|
2340
|
+
await initProject(hookUpdateOldTmp, {});
|
|
2341
|
+
const hookUpdateOldPayload = JSON.stringify({ cwd: hookUpdateOldTmp, prompt: '상태 확인해줘' });
|
|
2342
|
+
const hookUpdateOldResult = await runProcess(process.execPath, [hookBin, 'hook', 'user-prompt-submit'], {
|
|
2343
|
+
cwd: hookUpdateOldTmp,
|
|
2344
|
+
input: hookUpdateOldPayload,
|
|
2345
|
+
env: { SKS_NPM_VIEW_SNEAKOSCOPE_VERSION: '9.9.9', SKS_INSTALLED_SKS_VERSION: '0.0.0' },
|
|
2346
|
+
timeoutMs: 15000,
|
|
2347
|
+
maxOutputBytes: 256 * 1024
|
|
2348
|
+
});
|
|
2349
|
+
if (hookUpdateOldResult.code !== 0) throw new Error(`selftest failed: stale update hook exited ${hookUpdateOldResult.code}: ${hookUpdateOldResult.stderr}`);
|
|
2350
|
+
const hookUpdateOldJson = JSON.parse(hookUpdateOldResult.stdout);
|
|
2351
|
+
const hookUpdateOldContext = hookUpdateOldJson.hookSpecificOutput?.additionalContext || '';
|
|
2352
|
+
if (!String(hookUpdateOldContext).includes('Update SKS now') || !String(hookUpdateOldContext).includes('Skip update for this conversation')) throw new Error('selftest failed: hook did not prompt when installed SKS is stale');
|
|
2353
|
+
const hookUpdateOldState = await readJson(path.join(hookUpdateOldTmp, '.sneakoscope', 'state', 'update-check.json'), {});
|
|
2354
|
+
if (hookUpdateOldState.pending_offer?.latest !== '9.9.9') throw new Error('selftest failed: stale installed SKS did not persist pending update offer');
|
|
2266
2355
|
const hookKoreanSksTmp = tmpdir();
|
|
2267
2356
|
await initProject(hookKoreanSksTmp, {});
|
|
2268
2357
|
const hookKoreanSksPayload = JSON.stringify({ cwd: hookKoreanSksTmp, prompt: koreanReadmeInstallPrompt });
|
|
@@ -2385,6 +2474,9 @@ async function selftest() {
|
|
|
2385
2474
|
const hookQaSchema = await readJson(path.join(missionDir(hookQaTmp, hookQaState.mission_id), 'required-answers.schema.json'));
|
|
2386
2475
|
const hookQaAnswers = {};
|
|
2387
2476
|
for (const s of hookQaSchema.slots) hookQaAnswers[s.id] = s.options ? (s.type === 'array' ? [s.options[0]] : s.options[0]) : (s.type.includes('array') ? ['selftest'] : 'selftest');
|
|
2477
|
+
hookQaAnswers.QA_SCOPE = 'all_available';
|
|
2478
|
+
hookQaAnswers.TARGET_BASE_URL = 'none';
|
|
2479
|
+
hookQaAnswers.API_BASE_URL = 'same_as_target';
|
|
2388
2480
|
const hookQaAnswersPath = path.join(hookQaTmp, 'qa-answers.json');
|
|
2389
2481
|
await writeJsonAtomic(hookQaAnswersPath, hookQaAnswers);
|
|
2390
2482
|
const qaAnswerResult = await runProcess(process.execPath, [hookBin, 'pipeline', 'answer', 'latest', hookQaAnswersPath], { cwd: hookQaTmp, env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 64 * 1024 });
|
|
@@ -2408,9 +2500,16 @@ async function selftest() {
|
|
|
2408
2500
|
await writeTextAtomic(path.join(unresolvedQaTmp, unresolvedQaGateFile), '# unresolved\n');
|
|
2409
2501
|
const unresolvedQaGate = await evaluateQaGate(unresolvedQaTmp);
|
|
2410
2502
|
if (unresolvedQaGate.passed || !unresolvedQaGate.reasons.includes('unresolved_fixable_findings_remaining')) throw new Error('selftest failed: unresolved fixable QA finding was accepted');
|
|
2503
|
+
const forbiddenQaTmp = tmpdir();
|
|
2504
|
+
const forbiddenQaGate = defaultQaGate({ sealed_hash: 'selftest', answers: { QA_SCOPE: 'ui_e2e_only', TARGET_BASE_URL: 'http://localhost:3000', API_BASE_URL: 'same_as_target', TARGET_ENVIRONMENT: 'local_dev_server', DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED: 'never' } });
|
|
2505
|
+
await writeJsonAtomic(path.join(forbiddenQaTmp, 'qa-gate.json'), { ...forbiddenQaGate, passed: true, qa_report_written: true, qa_ledger_complete: true, checklist_completed: true, safety_reviewed: true, credentials_not_persisted: true, ui_computer_use_evidence: true, ui_evidence_source: 'playwright', post_fix_verification_complete: true, honest_mode_complete: true, evidence: ['Playwright screenshot evidence'] });
|
|
2506
|
+
await writeJsonAtomic(path.join(forbiddenQaTmp, 'qa-ledger.json'), { checklist: [] });
|
|
2507
|
+
await writeTextAtomic(path.join(forbiddenQaTmp, forbiddenQaGate.qa_report_file), '# forbidden\n');
|
|
2508
|
+
const forbiddenQaGateResult = await evaluateQaGate(forbiddenQaTmp);
|
|
2509
|
+
if (forbiddenQaGateResult.passed || !forbiddenQaGateResult.reasons.includes('ui_evidence_source_not_codex_computer_use') || !forbiddenQaGateResult.reasons.includes('forbidden_browser_automation_evidence')) throw new Error('selftest failed: forbidden browser automation QA evidence was accepted');
|
|
2411
2510
|
const promptQa = buildQaLoopPrompt({ id: 'selftest', mission: { prompt: 'QA and fix' }, contract: { answers: { QA_CORRECTIVE_POLICY: 'apply_safe_fixes_and_reverify' } }, cycle: 1, previous: '', reportFile: qaReportFile });
|
|
2412
2511
|
if (!promptQa.includes('dogfood as human proxy') || !promptQa.includes('fix safe code/test/docs now') || !promptQa.includes('post_fix_verification_complete')) throw new Error('selftest failed: QA-LOOP dogfood prompt');
|
|
2413
|
-
if (!promptQa.includes(
|
|
2512
|
+
if (!promptQa.includes(CODEX_COMPUTER_USE_ONLY_POLICY) || !promptQa.includes('Chrome MCP') || !promptQa.includes('Playwright') || !promptQa.includes('Browser Use')) throw new Error('selftest failed: QA-LOOP prompt did not enforce Computer Use-only UI evidence');
|
|
2414
2513
|
if (promptQa.includes('Browser/Computer Use evidence')) throw new Error('selftest failed: QA-LOOP prompt still allows Browser/Computer UI evidence');
|
|
2415
2514
|
const pkgQa = defaultQaGate({ sealed_hash: 'selftest', answers: { QA_SCOPE: 'all_available', TARGET_BASE_URL: 'none', API_BASE_URL: 'same_as_target', TARGET_ENVIRONMENT: 'local_dev_server', DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED: 'never' } });
|
|
2416
2515
|
if (pkgQa.ui_e2e_required || pkgQa.api_e2e_required || !pkgQa.ui_computer_use_evidence) throw new Error('selftest failed: package QA target gate');
|
|
@@ -2549,6 +2648,7 @@ async function selftest() {
|
|
|
2549
2648
|
unresolved_findings: 0,
|
|
2550
2649
|
unresolved_fixable_findings: 0,
|
|
2551
2650
|
post_fix_verification_complete: true,
|
|
2651
|
+
computer_use_evidence_source: CODEX_COMPUTER_USE_EVIDENCE_SOURCE,
|
|
2552
2652
|
evidence: ['selftest scoped QA-LOOP covered work-1']
|
|
2553
2653
|
};
|
|
2554
2654
|
const incompleteTeamGateTmp = tmpdir();
|
|
@@ -2630,6 +2730,9 @@ async function selftest() {
|
|
|
2630
2730
|
await writeJsonAtomic(path.join(fromChatCoverageDir, FROM_CHAT_IMG_QA_LOOP_ARTIFACT), { ...passedFromChatImgQaLoop, work_order_item_ids_covered: [] });
|
|
2631
2731
|
const uncoveredFromChatQaLoopStop = await evaluateStop(fromChatCoverageTmp, fromChatCoverageState, { last_assistant_message: 'SKS Honest Mode verification evidence gap' }, { noQuestion: false });
|
|
2632
2732
|
if (uncoveredFromChatQaLoopStop?.decision !== 'block' || !String(uncoveredFromChatQaLoopStop.reason || '').includes(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:work_order_item_ids_covered`)) throw new Error('selftest failed: From-Chat-IMG scoped QA-LOOP work item coverage did not block Team gate');
|
|
2733
|
+
await writeJsonAtomic(path.join(fromChatCoverageDir, FROM_CHAT_IMG_QA_LOOP_ARTIFACT), { ...passedFromChatImgQaLoop, computer_use_evidence_source: 'playwright', evidence: ['Playwright visual verification'] });
|
|
2734
|
+
const forbiddenFromChatQaLoopStop = await evaluateStop(fromChatCoverageTmp, fromChatCoverageState, { last_assistant_message: 'SKS Honest Mode verification evidence gap' }, { noQuestion: false });
|
|
2735
|
+
if (forbiddenFromChatQaLoopStop?.decision !== 'block' || !String(forbiddenFromChatQaLoopStop.reason || '').includes(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:computer_use_evidence_source`) || !String(forbiddenFromChatQaLoopStop.reason || '').includes(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:forbidden_browser_automation_evidence`)) throw new Error('selftest failed: From-Chat-IMG scoped QA-LOOP accepted forbidden browser automation evidence');
|
|
2633
2736
|
await writeJsonAtomic(path.join(fromChatCoverageDir, FROM_CHAT_IMG_QA_LOOP_ARTIFACT), passedFromChatImgQaLoop);
|
|
2634
2737
|
await writeJsonAtomic(path.join(fromChatCoverageDir, 'team-gate.json'), { ...passedTeamGate, from_chat_img_required: true, from_chat_img_request_coverage: true });
|
|
2635
2738
|
const coveredFromChatStop = await evaluateStop(fromChatCoverageTmp, fromChatCoverageState, { last_assistant_message: 'SKS Honest Mode verification evidence gap' }, { noQuestion: false });
|
|
@@ -15,7 +15,7 @@ import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge }
|
|
|
15
15
|
import { DEFAULT_EVAL_THRESHOLDS, compareEvaluationReports, runEvaluationBenchmark } from '../core/evaluation.mjs';
|
|
16
16
|
import { contextCapsule } from '../core/triwiki-attention.mjs';
|
|
17
17
|
import { rgbaKey, rgbaToWikiCoord, validateWikiCoordinateIndex } from '../core/wiki-coordinate.mjs';
|
|
18
|
-
import { ALLOWED_REASONING_EFFORTS, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, ROUTES, hasFromChatImgSignal, routePrompt, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
18
|
+
import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_ONLY_POLICY, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, ROUTES, hasFromChatImgSignal, routePrompt, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
19
19
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, teamRuntimePlanMetadata, teamRuntimeRequiredArtifacts, writeTeamRuntimeArtifacts } from '../core/team-dag.mjs';
|
|
20
20
|
import { appendTeamEvent, formatRoleCounts, initTeamLive, normalizeTeamSpec, parseTeamSpecArgs, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane, renderTeamWatch } from '../core/team-live.mjs';
|
|
21
21
|
import { ARTIFACT_FILES, writeValidationReport } from '../core/artifact-schemas.mjs';
|
|
@@ -67,7 +67,7 @@ Prompt route:
|
|
|
67
67
|
$QA-LOOP dogfood UI/API, fix safe issues, reverify
|
|
68
68
|
|
|
69
69
|
UI evidence:
|
|
70
|
-
Codex Computer Use only for UI-level E2E; do not use Chrome MCP, Browser Use, Playwright, or other browser automation as UI verification evidence.
|
|
70
|
+
Codex Computer Use only for UI-level E2E and visual evidence; do not use Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, or other browser automation as UI verification evidence.
|
|
71
71
|
`);
|
|
72
72
|
}
|
|
73
73
|
|
|
@@ -1395,7 +1395,7 @@ export function buildTeamPlan(id, prompt, opts = {}) {
|
|
|
1395
1395
|
invariants: [
|
|
1396
1396
|
'The parent thread remains the orchestrator and owns final integration.',
|
|
1397
1397
|
'Team roster confirmation is mandatory before implementation: default SKS counts are materialized when the user did not specify counts, explicit counts are honored, and team-gate.json must include team_roster_confirmed=true with team-roster.json present.',
|
|
1398
|
-
`When and only when From-Chat-IMG/$From-Chat-IMG is explicit, treat client requests as chat-history screenshots plus separate attachments: extract visible text in reading order, use Computer Use
|
|
1398
|
+
`When and only when From-Chat-IMG/$From-Chat-IMG is explicit, treat client requests as chat-history screenshots plus separate attachments: extract visible text in reading order, use Codex Computer Use visual inspection to match screenshot image regions to attachments with confidence notes, and turn that evidence into a complete modification work order before editing. ${CODEX_COMPUTER_USE_ONLY_POLICY}`,
|
|
1399
1399
|
`For From-Chat-IMG, forensic intake is stop-gated: ${FROM_CHAT_IMG_WORK_ORDER_ARTIFACT}, ${FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT}, and ${FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT} must exist and pass schema validation before implementation is treated as complete.`,
|
|
1400
1400
|
`For From-Chat-IMG, request coverage is stop-gated: ${FROM_CHAT_IMG_COVERAGE_ARTIFACT} must show all_chat_requirements_listed=true, all_requirements_mapped_to_work_order=true, all_screenshot_regions_accounted=true, all_attachments_accounted=true, image_analysis_complete=true, verbatim_customer_requests_preserved=true, checklist_updated=true, temp_triwiki_recorded=true, scoped_qa_loop_completed=true, and unresolved_items=[] before Team completion.`,
|
|
1401
1401
|
`For From-Chat-IMG, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} must contain Customer Requests, Image Analysis, Work Items, QA Loop, and Verification sections, with every checkbox checked as each item is completed.`,
|
package/src/core/codex-app.mjs
CHANGED
|
@@ -118,7 +118,7 @@ export async function codexAppIntegrationStatus(opts = {}) {
|
|
|
118
118
|
export function codexAppGuidance({ appInstalled, codex, mcpList, computerUseReady, browserUseReady }) {
|
|
119
119
|
const lines = [];
|
|
120
120
|
if (!appInstalled) {
|
|
121
|
-
lines.push('Install and open Codex App for first-party MCP/plugin tools. SKS cmux launch can still run with Codex CLI alone, but
|
|
121
|
+
lines.push('Install and open Codex App for first-party MCP/plugin tools. SKS cmux launch can still run with Codex CLI alone, but Codex Computer Use evidence will be unavailable until Codex App is ready.');
|
|
122
122
|
lines.push(`Docs: ${CODEX_APP_DOCS_URL}`);
|
|
123
123
|
}
|
|
124
124
|
if (!codex?.bin) lines.push('Install Codex CLI too: npm i -g @openai/codex, or set SKS_CODEX_BIN.');
|
|
@@ -128,10 +128,10 @@ export function codexAppGuidance({ appInstalled, codex, mcpList, computerUseRead
|
|
|
128
128
|
}
|
|
129
129
|
if (appInstalled && (!computerUseReady || !browserUseReady)) {
|
|
130
130
|
lines.push('Open Codex App settings, enable recommended MCP/plugin tools, then restart Codex CLI sessions.');
|
|
131
|
-
lines.push('Required for SKS QA-LOOP UI evidence: Codex Computer Use only. Browser Use can support non-UI browser context, but it does not satisfy UI-level E2E verification.');
|
|
131
|
+
lines.push('Required for SKS QA-LOOP UI/browser evidence: Codex Computer Use only. Browser Use can support non-UI browser context, but it does not satisfy UI-level E2E verification.');
|
|
132
132
|
lines.push('Verify with: codex mcp list');
|
|
133
133
|
}
|
|
134
|
-
if (!lines.length) lines.push('Codex App, Codex CLI, Computer Use, and Browser Use checks look ready. UI-level E2E verification still
|
|
134
|
+
if (!lines.length) lines.push('Codex App, Codex CLI, Computer Use, and Browser Use checks look ready. UI-level E2E and visual verification still require Codex Computer Use evidence.');
|
|
135
135
|
return lines;
|
|
136
136
|
}
|
|
137
137
|
|
package/src/core/fsx.mjs
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
|
|
8
|
-
export const PACKAGE_VERSION = '0.6.
|
|
8
|
+
export const PACKAGE_VERSION = '0.6.81';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import { projectRoot, readJson, readText, writeJsonAtomic, appendJsonl, readStdin, nowIso, runProcess, which, PACKAGE_VERSION, sha256 } from './fsx.mjs';
|
|
2
|
+
import { projectRoot, readJson, readText, writeJsonAtomic, appendJsonl, readStdin, nowIso, runProcess, which, PACKAGE_VERSION, sha256, packageRoot } from './fsx.mjs';
|
|
3
3
|
import { looksInteractiveCommand, interactiveCommandReason } from './no-question-guard.mjs';
|
|
4
4
|
import { missionDir, setCurrent, stateFile } from './mission.mjs';
|
|
5
5
|
import { checkDbOperation, dbBlockReason, handleMadSksUserConfirmation } from './db-safety.mjs';
|
|
@@ -334,6 +334,50 @@ async function updateCheckContext(root, payload, prompt) {
|
|
|
334
334
|
const updateState = await readJson(statePath, {});
|
|
335
335
|
const conv = conversationId(payload);
|
|
336
336
|
const pending = updateState.pending_offer;
|
|
337
|
+
let effective = null;
|
|
338
|
+
async function effectiveVersion() {
|
|
339
|
+
if (!effective) {
|
|
340
|
+
const installed = await detectInstalledSksVersion();
|
|
341
|
+
effective = {
|
|
342
|
+
installed,
|
|
343
|
+
current: highestVersion([PACKAGE_VERSION, installed.version])
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
return effective;
|
|
347
|
+
}
|
|
348
|
+
if (pending?.latest) {
|
|
349
|
+
const currentCheck = await effectiveVersion();
|
|
350
|
+
if (compareVersions(pending.latest, currentCheck.current) <= 0) {
|
|
351
|
+
await writeJsonAtomic(statePath, {
|
|
352
|
+
...updateState,
|
|
353
|
+
current: currentCheck.current,
|
|
354
|
+
runtime_current: PACKAGE_VERSION,
|
|
355
|
+
installed_current: currentCheck.installed.version || null,
|
|
356
|
+
latest: pending.latest,
|
|
357
|
+
checked_at: nowIso(),
|
|
358
|
+
pending_offer: null,
|
|
359
|
+
check_error: null
|
|
360
|
+
});
|
|
361
|
+
return '';
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
if (updateState.skipped?.latest) {
|
|
365
|
+
const currentCheck = await effectiveVersion();
|
|
366
|
+
if (compareVersions(updateState.skipped.latest, currentCheck.current) <= 0) {
|
|
367
|
+
await writeJsonAtomic(statePath, {
|
|
368
|
+
...updateState,
|
|
369
|
+
current: currentCheck.current,
|
|
370
|
+
runtime_current: PACKAGE_VERSION,
|
|
371
|
+
installed_current: currentCheck.installed.version || null,
|
|
372
|
+
latest: updateState.skipped.latest,
|
|
373
|
+
checked_at: nowIso(),
|
|
374
|
+
pending_offer: null,
|
|
375
|
+
skipped: null,
|
|
376
|
+
check_error: null
|
|
377
|
+
});
|
|
378
|
+
return '';
|
|
379
|
+
}
|
|
380
|
+
}
|
|
337
381
|
if (pending?.conversation_id === conv && pending?.latest && looksLikeUpdateDecline(prompt)) {
|
|
338
382
|
await writeJsonAtomic(statePath, {
|
|
339
383
|
...updateState,
|
|
@@ -354,26 +398,34 @@ async function updateCheckContext(root, payload, prompt) {
|
|
|
354
398
|
return `SKS update check: update ${updateState.skipped.latest} was skipped for this conversation only. Do not ask again in this conversation; check again next conversation.`;
|
|
355
399
|
}
|
|
356
400
|
const check = await checkLatestVersion();
|
|
401
|
+
const { installed, current } = await effectiveVersion();
|
|
402
|
+
const isCurrent = check.latest && compareVersions(check.latest, current) <= 0;
|
|
357
403
|
await writeJsonAtomic(statePath, {
|
|
358
404
|
...updateState,
|
|
359
|
-
current
|
|
405
|
+
current,
|
|
406
|
+
runtime_current: PACKAGE_VERSION,
|
|
407
|
+
installed_current: installed.version || null,
|
|
360
408
|
latest: check.latest || null,
|
|
361
409
|
checked_at: nowIso(),
|
|
410
|
+
pending_offer: isCurrent ? null : updateState.pending_offer || null,
|
|
362
411
|
check_error: check.error || null
|
|
363
412
|
});
|
|
364
|
-
if (!check.latest || check.error ||
|
|
413
|
+
if (!check.latest || check.error || isCurrent) return '';
|
|
365
414
|
await writeJsonAtomic(statePath, {
|
|
366
415
|
...updateState,
|
|
367
|
-
current
|
|
416
|
+
current,
|
|
417
|
+
runtime_current: PACKAGE_VERSION,
|
|
418
|
+
installed_current: installed.version || null,
|
|
368
419
|
latest: check.latest,
|
|
369
420
|
checked_at: nowIso(),
|
|
370
421
|
pending_offer: { conversation_id: conv, latest: check.latest, offered_at: nowIso() },
|
|
371
422
|
skipped: updateState.skipped?.conversation_id === conv ? null : updateState.skipped || null
|
|
372
423
|
});
|
|
373
|
-
return `SKS update check: installed ${
|
|
424
|
+
return `SKS update check: installed ${current}, latest ${check.latest}. Before any other work, ask the user to choose: "Update SKS now" or "Skip update for this conversation". If they choose update, run npm i -g sneakoscope for global installs, or npm i -D sneakoscope && npx sks setup --install-scope project for project installs, then run sks setup and sks doctor --fix. If they skip, do not ask again in this conversation, but check again next conversation.`;
|
|
374
425
|
}
|
|
375
426
|
|
|
376
427
|
async function checkLatestVersion() {
|
|
428
|
+
if (process.env.SKS_NPM_VIEW_SNEAKOSCOPE_VERSION) return { latest: process.env.SKS_NPM_VIEW_SNEAKOSCOPE_VERSION };
|
|
377
429
|
const npm = await which('npm').catch(() => null);
|
|
378
430
|
if (!npm) return { error: 'npm not found' };
|
|
379
431
|
const result = await runProcess(npm, ['view', 'sneakoscope', 'version'], { timeoutMs: 3500, maxOutputBytes: 4096 });
|
|
@@ -381,6 +433,33 @@ async function checkLatestVersion() {
|
|
|
381
433
|
return { latest: result.stdout.trim().split(/\s+/).pop() };
|
|
382
434
|
}
|
|
383
435
|
|
|
436
|
+
async function detectInstalledSksVersion() {
|
|
437
|
+
const override = parseVersionText(process.env.SKS_INSTALLED_SKS_VERSION || '');
|
|
438
|
+
if (override) return { version: override, source: 'env' };
|
|
439
|
+
const candidates = [];
|
|
440
|
+
const pkg = await readJson(path.join(packageRoot(), 'package.json'), {}).catch(() => ({}));
|
|
441
|
+
if (parseVersionText(pkg.version)) candidates.push({ version: parseVersionText(pkg.version), source: 'package.json' });
|
|
442
|
+
const sks = await which('sks').catch(() => null);
|
|
443
|
+
if (!sks) return candidates[0] || { version: null, source: null };
|
|
444
|
+
const result = await runProcess(sks, ['--version'], {
|
|
445
|
+
timeoutMs: 2000,
|
|
446
|
+
maxOutputBytes: 4096,
|
|
447
|
+
env: { SKS_DISABLE_UPDATE_CHECK: '1' }
|
|
448
|
+
}).catch((err) => ({ code: 1, stdout: '', stderr: err.message }));
|
|
449
|
+
if (result.code === 0 && parseVersionText(result.stdout)) candidates.push({ version: parseVersionText(result.stdout), source: sks });
|
|
450
|
+
if (candidates.length) return candidates.reduce((best, candidate) => compareVersions(candidate.version, best.version) > 0 ? candidate : best);
|
|
451
|
+
return { version: null, source: sks, error: `${result.stderr || result.stdout || 'sks --version failed'}`.trim() };
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
function parseVersionText(text) {
|
|
455
|
+
const match = String(text || '').match(/\b\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?\b/);
|
|
456
|
+
return match ? match[0] : null;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function highestVersion(versions = []) {
|
|
460
|
+
return versions.filter(Boolean).reduce((best, candidate) => compareVersions(candidate, best) > 0 ? candidate : best, '0.0.0');
|
|
461
|
+
}
|
|
462
|
+
|
|
384
463
|
function compareVersions(a, b) {
|
|
385
464
|
const pa = String(a || '').split(/[.-]/).map((x) => Number.parseInt(x, 10) || 0);
|
|
386
465
|
const pb = String(b || '').split(/[.-]/).map((x) => Number.parseInt(x, 10) || 0);
|
package/src/core/init.mjs
CHANGED
|
@@ -6,7 +6,7 @@ import { DEFAULT_DB_SAFETY_POLICY } from './db-safety.mjs';
|
|
|
6
6
|
import { isHarnessSourceProject, writeHarnessGuardPolicy } from './harness-guard.mjs';
|
|
7
7
|
import { repairSksGeneratedArtifacts } from './harness-conflicts.mjs';
|
|
8
8
|
import { installVersionGitHook } from './version-manager.mjs';
|
|
9
|
-
import { DOLLAR_COMMANDS, DOLLAR_COMMAND_ALIASES, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, RECOMMENDED_MCP_SERVERS, RECOMMENDED_SKILLS, chatCaptureIntakeText, context7ConfigToml, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
9
|
+
import { CODEX_COMPUTER_USE_ONLY_POLICY, DOLLAR_COMMANDS, DOLLAR_COMMAND_ALIASES, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, RECOMMENDED_MCP_SERVERS, RECOMMENDED_SKILLS, chatCaptureIntakeText, context7ConfigToml, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
10
10
|
|
|
11
11
|
const REFLECTION_MEMORY_PATH = '.sneakoscope/memory/q2_facts/post-route-reflection.md';
|
|
12
12
|
function reflectionInstructionText(commandPrefix = 'sks') {
|
|
@@ -499,8 +499,8 @@ export async function installSkills(root) {
|
|
|
499
499
|
'sks': `---\nname: sks\ndescription: General Sneakoscope Codex command route for $SKS or $sks usage, setup, status, and workflow help.\n---\n\nUse local SKS commands: bootstrap, deps, commands, quickstart, codex-app, context7, guard, conflicts, reasoning, wiki, pipeline. Promote code-changing work to Team unless Answer/DFix/Help/Wiki/safety route fits. Surface route/guard/scope, use TriWiki, do not edit installed harness files outside this engine repo, and require human-approved conflict cleanup.\n`,
|
|
500
500
|
'wiki': `---\nname: wiki\ndescription: Dollar-command route for $Wiki TriWiki refresh, pack, validate, and prune commands.\n---\n\nUse for $Wiki or Korean wiki-refresh requests. Refresh/update/갱신: run sks wiki refresh, then validate .sneakoscope/wiki/context-pack.json. Pack: run sks wiki pack, then validate. Prune/clean/정리: use sks wiki refresh --prune, or sks wiki prune --dry-run for inspection. Report claims, anchors, trust, attention.use_first/hydrate_first, validation, and blockers. Do not start ambiguity-gated implementation, subagents, or unrelated work.\n`,
|
|
501
501
|
'team': `---\nname: team\ndescription: SKS Team orchestration for $Team/code work; $From-Chat-IMG is the explicit chat-image alias.\n---\n\nUse for $Team/code work. Ambiguity gate first. Write team-roster.json; team-gate.json needs team_roster_confirmed=true. executor:N means N scouts, N debate voices, then fresh N executors. After consensus, compile team-graph.json, team-runtime-tasks.json, team-decomposition-report.json, and team-inbox/ so worker handoff uses concrete runtime task ids with role/path/domain/lane hints. Refresh/validate TriWiki before debate, implementation, review, and final; consume attention.use_first and hydrate attention.hydrate_first before risky decisions. Log events, close sessions, pass team-session-cleanup.json, then reflection and Honest Mode. Parent integrates/verifies.\n\n${chatCaptureIntakeText()}\n`,
|
|
502
|
-
'from-chat-img': `---\nname: from-chat-img\ndescription: Explicit $From-Chat-IMG Team alias for chat screenshot plus attachment analysis.\n---\n\nUse only for From-Chat-IMG/$From-Chat-IMG. It enters the normal Team pipeline. Treat uploads as chat screenshot plus originals. Use Computer Use
|
|
503
|
-
'qa-loop': `---\nname: qa-loop\ndescription: $QA-LOOP dogfoods UI/API as human proxy with safety gates, Codex Computer Use-only UI evidence, safe fixes, rechecks, and a QA report.\n---\n\nUse only $QA-LOOP. Ask scope, target, mutation, login. Credentials are runtime-only; never save secrets. UI-level E2E needs Codex Computer Use evidence or must be marked unverified; Chrome MCP, Browser Use, Playwright, and other browser automation do not satisfy UI verification. Deployed targets are read-only; destructive removal is forbidden. After answer/run, dogfood real flows, apply safe contract-allowed code/test/docs fixes, recheck, and do not pass qa-gate.json with unresolved findings or without post_fix_verification_complete. Finish qa-ledger, date/version report, gate, completion summary, and Honest Mode.\n`,
|
|
502
|
+
'from-chat-img': `---\nname: from-chat-img\ndescription: Explicit $From-Chat-IMG Team alias for chat screenshot plus attachment analysis.\n---\n\nUse only for From-Chat-IMG/$From-Chat-IMG. It enters the normal Team pipeline. Treat uploads as chat screenshot plus originals. Use Codex Computer Use visual inspection when available, list requirements first, match regions to attachments with confidence, write ${FROM_CHAT_IMG_COVERAGE_ARTIFACT}, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT}, ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT}, and ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}, then continue Team gates, review, reflection, and Honest Mode. ${CODEX_COMPUTER_USE_ONLY_POLICY} The ledger must account for every visible customer request, screenshot image region, and separate attachment; ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} must have a checked item for each request, image-region/attachment match, work item, scoped QA-LOOP, and verification step; ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} stores temporary TriWiki-backed session context with expires_after_sessions=${FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS}. ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} must prove QA-LOOP ran over the exact customer-request work-order range after implementation, with every work item covered, post-fix verification complete, and zero unresolved findings. team-gate.json cannot pass From-Chat-IMG completion until unresolved_items is empty, every checklist box is checked, and scoped_qa_loop_completed=true.\n`,
|
|
503
|
+
'qa-loop': `---\nname: qa-loop\ndescription: $QA-LOOP dogfoods UI/API as human proxy with safety gates, Codex Computer Use-only UI evidence, safe fixes, rechecks, and a QA report.\n---\n\nUse only $QA-LOOP. Ask scope, target, mutation, login. Credentials are runtime-only; never save secrets. UI-level E2E needs Codex Computer Use evidence or must be marked unverified; Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, and other browser automation do not satisfy UI/browser verification. Deployed targets are read-only; destructive removal is forbidden. After answer/run, dogfood real flows, apply safe contract-allowed code/test/docs fixes, recheck, and do not pass qa-gate.json with unresolved findings or without post_fix_verification_complete. Finish qa-ledger, date/version report, gate, completion summary, and Honest Mode.\n`,
|
|
504
504
|
'goal': `---\nname: goal\ndescription: Dollar-command route for $Goal or $goal Codex native persisted /goal workflows.\n---\n\nUse when the user invokes $Goal/$goal or asks to persist a workflow with Codex native /goal continuation. Prepare with sks goal create or the $Goal route, then use native Codex /goal create, pause, resume, and clear controls where available. Do not recreate the old no-question loop.\n`,
|
|
505
505
|
'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Do not use for ordinary code edits.\n`,
|
|
506
506
|
'autoresearch': `---\nname: autoresearch\ndescription: Dollar-command route for $AutoResearch or $autoresearch iterative experiment loops.\n---\n\nUse for $AutoResearch, iterative improvement, SEO/GEO, ranking, workflow, benchmark, or experiments. Define program, hypothesis, experiment, metric, keep/discard, falsification, next step, and Honest Mode. Load seo-geo-optimizer for README/npm/GitHub/schema/AI-search work.\n`,
|
package/src/core/pipeline.mjs
CHANGED
|
@@ -12,7 +12,7 @@ import { writeMemorySweepReport } from './memory-governor.mjs';
|
|
|
12
12
|
import { writeMistakeMemoryReport } from './mistake-memory.mjs';
|
|
13
13
|
import { writeSkillForgeReport } from './skill-forge.mjs';
|
|
14
14
|
import { writeResearchPlan } from './research.mjs';
|
|
15
|
-
import { FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, chatCaptureIntakeText, context7RequirementText, dollarCommand, hasFromChatImgSignal, hasMadSksSignal, noUnrequestedFallbackCodePolicyText, reflectionRequiredForRoute, reasoningInstruction, routeNeedsContext7, routePrompt, routeReasoning, routeRequiresSubagents, stripDollarCommand, stripMadSksSignal, subagentExecutionPolicyText, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
15
|
+
import { CODEX_COMPUTER_USE_EVIDENCE_SOURCE, CODEX_COMPUTER_USE_ONLY_POLICY, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, chatCaptureIntakeText, context7RequirementText, dollarCommand, evidenceMentionsForbiddenBrowserAutomation, hasFromChatImgSignal, hasMadSksSignal, noUnrequestedFallbackCodePolicyText, reflectionRequiredForRoute, reasoningInstruction, routeNeedsContext7, routePrompt, routeReasoning, routeRequiresSubagents, stripDollarCommand, stripMadSksSignal, subagentExecutionPolicyText, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
16
16
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, teamRuntimePlanMetadata, teamRuntimeRequiredArtifacts, validateTeamRuntimeArtifacts, writeTeamRuntimeArtifacts } from './team-dag.mjs';
|
|
17
17
|
import { formatRoleCounts, initTeamLive, parseTeamSpecText } from './team-live.mjs';
|
|
18
18
|
|
|
@@ -299,7 +299,7 @@ async function prepareTeam(root, route, task, required) {
|
|
|
299
299
|
team_runtime: teamRuntimePlanMetadata(),
|
|
300
300
|
phases: [
|
|
301
301
|
{ id: 'team_roster_confirmation', goal: `Before any implementation, materialize the Team roster from default SKS counts or explicit user counts, write team-roster.json, and surface role counts ${formatRoleCounts(roleCounts)}. Implementation cannot be considered complete unless team-gate.json has team_roster_confirmed=true.`, agents: ['parent_orchestrator'], output: 'team-roster.json' },
|
|
302
|
-
{ id: 'parallel_analysis_scouting', goal: `Before scouting, read TriWiki context. ${fromChatImgRequired ? `From-Chat-IMG active: use Computer Use
|
|
302
|
+
{ id: 'parallel_analysis_scouting', goal: `Before scouting, read TriWiki context. ${fromChatImgRequired ? `From-Chat-IMG active: use Codex Computer Use visual inspection, list every visible customer request, match every screenshot image region to attachments, write ${FROM_CHAT_IMG_COVERAGE_ARTIFACT}, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT}, and ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT}, then require scoped QA-LOOP evidence in ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} after the customer-request work is done. ${CODEX_COMPUTER_USE_ONLY_POLICY}` : `From-Chat-IMG inactive: do not assume ordinary images are chat captures. ${CODEX_COMPUTER_USE_ONLY_POLICY}`} Spawn exactly ${roster.bundle_size} read-only analysis_scout_N agents in parallel, using the full available session budget without exceeding ${agentSessions}. Split repo/docs/tests/API/user-flow/risk investigation into independent slices, hydrate relevant low-trust claims from source, and record source-backed findings.`, agents: roster.analysis_team.map((agent) => agent.id), max_parallel_subagents: agentSessions, write_policy: 'read-only' },
|
|
303
303
|
{ id: 'triwiki_refresh', goal: `Parent orchestrator updates Team analysis artifacts, then runs ${triwikiContextTracking().refresh_command} or ${triwikiContextTracking().pack_command}, prunes with ${triwikiContextTracking().prune_command} when stale/oversized wiki state would pollute handoffs, and runs ${triwikiContextTracking().validate_command} so the next stage uses current TriWiki context.`, agents: ['parent_orchestrator'], output: '.sneakoscope/wiki/context-pack.json' },
|
|
304
304
|
{ id: 'planning_debate', goal: `Before debate, read the refreshed TriWiki pack. Debate team of exactly ${roster.bundle_size} participants maps user inconvenience, options, constraints, affected files, DB/test risk, and tradeoffs while hydrating low-trust claims from source.`, agents: roster.debate_team.map((agent) => agent.id) },
|
|
305
305
|
{ id: 'consensus', goal: `Seal one objective with acceptance criteria and disjoint implementation slices, then refresh/validate TriWiki so implementation receives current consensus context.` },
|
|
@@ -853,6 +853,8 @@ async function missingFromChatImgCoverageArtifacts(root, state = {}) {
|
|
|
853
853
|
if (Number(qaLoop.unresolved_findings) !== 0) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:unresolved_findings`);
|
|
854
854
|
if (Number(qaLoop.unresolved_fixable_findings) !== 0) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:unresolved_fixable_findings`);
|
|
855
855
|
if (!Array.isArray(qaLoop.evidence) || qaLoop.evidence.length === 0) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:evidence`);
|
|
856
|
+
if (qaLoop.computer_use_evidence_source !== CODEX_COMPUTER_USE_EVIDENCE_SOURCE) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:computer_use_evidence_source`);
|
|
857
|
+
if (evidenceMentionsForbiddenBrowserAutomation({ evidence: qaLoop.evidence, notes: qaLoop.notes, tool: qaLoop.tool, evidence_source: qaLoop.computer_use_evidence_source })) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:forbidden_browser_automation_evidence`);
|
|
856
858
|
const coveredWorkItems = new Set(Array.isArray(qaLoop.work_order_item_ids_covered) ? qaLoop.work_order_item_ids_covered.map(String) : []);
|
|
857
859
|
for (const item of Array.isArray(ledger.work_order_items) ? ledger.work_order_items : []) {
|
|
858
860
|
const workId = String(item?.id || '');
|
package/src/core/qa-loop.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { exists, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, PACKAGE_VERSION } from './fsx.mjs';
|
|
3
|
+
import { CODEX_COMPUTER_USE_EVIDENCE_SOURCE, CODEX_COMPUTER_USE_ONLY_POLICY, evidenceMentionsForbiddenBrowserAutomation } from './routes.mjs';
|
|
3
4
|
|
|
4
5
|
export const QA_LOOP_ROUTE = 'QALoop';
|
|
5
6
|
const QA_REPORT_SUFFIX = 'qa-report.md';
|
|
@@ -29,7 +30,7 @@ export function buildQaLoopQuestionSchema(prompt) {
|
|
|
29
30
|
return {
|
|
30
31
|
schema_version: 1,
|
|
31
32
|
route: QA_LOOP_ROUTE,
|
|
32
|
-
description:
|
|
33
|
+
description: `QA-LOOP questions must be answered before execution. Login secrets and browser auth state are runtime-only and must not be saved to mission files or TriWiki. ${CODEX_COMPUTER_USE_ONLY_POLICY}`,
|
|
33
34
|
prompt,
|
|
34
35
|
slots: [
|
|
35
36
|
{ id: 'GOAL_PRECISE', question: 'Define the QA objective in one sentence.', required: true, type: 'string' },
|
|
@@ -45,7 +46,7 @@ export function buildQaLoopQuestionSchema(prompt) {
|
|
|
45
46
|
{ id: 'TEMP_TEST_CREDENTIALS_READY', question: 'If login is required, are test-only credentials ready to provide ephemerally during the run?', required: true, type: 'enum', options: ['not_required', 'yes_temp_only', 'no_block_authenticated_tests'] },
|
|
46
47
|
{ id: 'TEST_CREDENTIALS_RUNTIME_SOURCE', question: 'If login is required, how will test-only credentials be provided without saving the values?', required: true, type: 'enum', options: ['not_required', 'ephemeral_chat_only', 'environment_variables', 'secret_manager'] },
|
|
47
48
|
{ id: 'CREDENTIAL_STORAGE_ACK', question: 'Acknowledge credential handling policy.', required: true, type: 'enum', options: ['never_store_credentials_in_artifacts_or_wiki'] },
|
|
48
|
-
{ id: 'UI_COMPUTER_USE_ACK', question: 'Acknowledge UI E2E evidence policy: Codex Computer Use only; no Chrome MCP, Browser Use, Playwright, or other browser automation.', required: true, type: 'enum', options: [UI_COMPUTER_USE_ONLY_ACK] },
|
|
49
|
+
{ id: 'UI_COMPUTER_USE_ACK', question: 'Acknowledge UI E2E evidence policy: Codex Computer Use only; no Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, or other browser automation.', required: true, type: 'enum', options: [UI_COMPUTER_USE_ONLY_ACK] },
|
|
49
50
|
{ id: 'TEAM_MODE_ALLOWED', question: 'May QA-LOOP use Team/subagents where useful?', required: true, type: 'enum', options: ['yes_parallel_where_safe', 'no_parent_only'] },
|
|
50
51
|
{ id: 'MAX_QA_CYCLES', question: 'How many no-question QA cycles are allowed before pausing?', required: true, type: 'string' },
|
|
51
52
|
{ id: 'ACCEPTANCE_CRITERIA', question: 'List the QA completion criteria.', required: true, type: 'array_or_string' },
|
|
@@ -122,6 +123,7 @@ export function defaultQaGate(contract = {}, opts = {}) {
|
|
|
122
123
|
credentials_not_persisted: false,
|
|
123
124
|
ui_e2e_required: uiRequired,
|
|
124
125
|
ui_computer_use_evidence: !uiRequired,
|
|
126
|
+
ui_evidence_source: uiRequired ? null : 'not_required',
|
|
125
127
|
api_e2e_required: apiRequired,
|
|
126
128
|
unsafe_external_side_effects: false,
|
|
127
129
|
corrective_loop_enabled: corrective,
|
|
@@ -169,6 +171,10 @@ export async function evaluateQaGate(dir) {
|
|
|
169
171
|
if (positiveCount(gate.unresolved_fixable_findings)) reasons.push('unresolved_fixable_findings_remaining');
|
|
170
172
|
}
|
|
171
173
|
if (gate.unsafe_external_side_effects === true) reasons.push('unsafe_external_side_effects');
|
|
174
|
+
if (gate.ui_e2e_required === true) {
|
|
175
|
+
if (gate.ui_evidence_source !== CODEX_COMPUTER_USE_EVIDENCE_SOURCE) reasons.push('ui_evidence_source_not_codex_computer_use');
|
|
176
|
+
if (evidenceMentionsForbiddenBrowserAutomation({ evidence: gate.evidence, notes: gate.notes, ui_evidence_source: gate.ui_evidence_source })) reasons.push('forbidden_browser_automation_evidence');
|
|
177
|
+
}
|
|
172
178
|
if (!reportFile) reasons.push('qa_report_file_missing');
|
|
173
179
|
else if (!isQaReportFilename(reportFile)) reasons.push('qa_report_filename_prefix_invalid');
|
|
174
180
|
else if (!(await exists(path.join(dir, reportFile)))) reasons.push('qa_report_missing');
|
|
@@ -183,8 +189,9 @@ export async function writeMockQaResult(dir, mission, contract) {
|
|
|
183
189
|
const previousGate = await readJson(path.join(dir, 'qa-gate.json'), {});
|
|
184
190
|
const previousReportFile = qaReportFileFromGate(previousGate);
|
|
185
191
|
const reportFile = isQaReportFilename(previousReportFile) ? previousReportFile : qaReportFilename();
|
|
192
|
+
const uiRequired = qaUiRequired(contract.answers || {});
|
|
186
193
|
await writeTextAtomic(path.join(dir, reportFile), `# QA-LOOP Report\n\nMission: ${mission.id}\nMode: mock verification\n\nMock QA-LOOP completed. No live UI/API actions were executed.\n\n## Honest Mode\n\nThis is a mock smoke run for command verification, not production QA evidence.\n`);
|
|
187
|
-
await writeJsonAtomic(path.join(dir, 'qa-gate.json'), { ...defaultQaGate(contract, { reportFile }), passed:
|
|
194
|
+
await writeJsonAtomic(path.join(dir, 'qa-gate.json'), { ...defaultQaGate(contract, { reportFile }), passed: !uiRequired, qa_report_written: true, qa_ledger_complete: true, checklist_completed: true, safety_reviewed: true, credentials_not_persisted: true, ui_computer_use_evidence: !uiRequired, ui_evidence_source: uiRequired ? null : 'not_required', unresolved_findings: 0, unresolved_fixable_findings: 0, unsafe_or_deferred_findings: 0, post_fix_verification_complete: true, honest_mode_complete: true, evidence: ['mock QA-LOOP smoke completed'], notes: ['No live UI/API verification was claimed.'] });
|
|
188
195
|
return evaluateQaGate(dir);
|
|
189
196
|
}
|
|
190
197
|
|
|
@@ -196,7 +203,7 @@ TASK: ${mission.prompt}
|
|
|
196
203
|
CYCLE: ${cycle}
|
|
197
204
|
NO QUESTIONS: use decision-contract.json.
|
|
198
205
|
MODE: dogfood as human proxy; use real flows, fix safe code/test/docs now, then recheck.
|
|
199
|
-
UI:
|
|
206
|
+
UI: ${CODEX_COMPUTER_USE_ONLY_POLICY} Secrets runtime-only.
|
|
200
207
|
SAFETY: deployed read-only smoke; no destructive, billing, message, webhook, admin, bulk-write, global-config, or live-data edits unless contract allows.
|
|
201
208
|
GATE: passed=false while unresolved_findings or unresolved_fixable_findings > 0, or post_fix_verification_complete is not true.
|
|
202
209
|
ARTIFACTS: update qa-ledger.json, ${report}, qa-gate.json, and qa-loop/cycle-${cycle}/.
|
|
@@ -225,7 +232,7 @@ function qaChecklist(a) {
|
|
|
225
232
|
['preflight.roles', 'Map roles, permissions, protected areas.']
|
|
226
233
|
];
|
|
227
234
|
if (qaUiRequired(a)) cases.push(
|
|
228
|
-
['ui.computer_use_only',
|
|
235
|
+
['ui.computer_use_only', CODEX_COMPUTER_USE_ONLY_POLICY],
|
|
229
236
|
['ui.navigation', 'Check primary navigation, deep links, back/forward, refresh, and protected routes.'],
|
|
230
237
|
['ui.auth', 'Check login, logout, session expiry, unauthorized access, and role-specific visibility.'],
|
|
231
238
|
['ui.forms', 'Check required fields, validation, disabled states, success, and failure.'],
|
|
@@ -253,7 +260,7 @@ function qaChecklist(a) {
|
|
|
253
260
|
|
|
254
261
|
function qaReportTemplate(mission, contract, checklist) {
|
|
255
262
|
const a = contract.answers || {};
|
|
256
|
-
return `# QA-LOOP Report\n\nMission: ${mission.id}\nTarget: ${a.TARGET_BASE_URL || 'unset'}\nScope: ${a.QA_SCOPE || 'unset'}\nEnvironment: ${a.TARGET_ENVIRONMENT || 'unset'}\n\n## Safety\n\n- Deployed destructive tests: never\n- Credentials: temp-only, never saved\n- UI evidence:
|
|
263
|
+
return `# QA-LOOP Report\n\nMission: ${mission.id}\nTarget: ${a.TARGET_BASE_URL || 'unset'}\nScope: ${a.QA_SCOPE || 'unset'}\nEnvironment: ${a.TARGET_ENVIRONMENT || 'unset'}\n\n## Safety\n\n- Deployed destructive tests: never\n- Credentials: temp-only, never saved\n- UI evidence: ${CODEX_COMPUTER_USE_ONLY_POLICY}\n\n## Checklist\n\n${checklist.map((item) => `- [ ] ${item.id}: ${item.title}`).join('\n')}\n\n## Findings\n\nTBD\n\n## Corrections And Rechecks\n\nTBD\n\n## Honest Mode\n\nTBD\n`;
|
|
257
264
|
}
|
|
258
265
|
|
|
259
266
|
function positiveCount(value) {
|
package/src/core/questions.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { writeJsonAtomic, writeTextAtomic } from './fsx.mjs';
|
|
3
3
|
import { buildQaLoopQuestionSchema } from './qa-loop.mjs';
|
|
4
|
-
import { FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, hasFromChatImgSignal } from './routes.mjs';
|
|
4
|
+
import { CODEX_COMPUTER_USE_ONLY_POLICY, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, hasFromChatImgSignal } from './routes.mjs';
|
|
5
5
|
|
|
6
6
|
export function buildQuestionSchemaForRoute(route, prompt) {
|
|
7
7
|
if (String(route?.id || '') === 'QALoop') return buildQaLoopQuestionSchema(prompt);
|
|
@@ -96,7 +96,7 @@ export function inferAnswersForPrompt(prompt, explicitAnswers = {}) {
|
|
|
96
96
|
};
|
|
97
97
|
const criteria = {
|
|
98
98
|
version: [version ? `version refs are ${version}` : 'version refs advance consistently', 'publish:dry gate passes', 'npm publish is not run'],
|
|
99
|
-
chat_capture: ['From-Chat-IMG activates chat-image intake only here', 'all visible chat requirements are listed before implementation', `${FROM_CHAT_IMG_COVERAGE_ARTIFACT} maps every customer request, screenshot region, and attachment to work-order item(s)`, `${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} is updated as each request, image match, work item, scoped QA-LOOP, and verification step is completed`, `${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} records temporary TriWiki-backed session context with retention metadata`, `${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} proves QA-LOOP ran over the exact customer-request work-order range after implementation`, 'unresolved_items is empty before Team completion', 'scoped_qa_loop_completed is true with zero unresolved QA findings', 'Computer Use
|
|
99
|
+
chat_capture: ['From-Chat-IMG activates chat-image intake only here', 'all visible chat requirements are listed before implementation', `${FROM_CHAT_IMG_COVERAGE_ARTIFACT} maps every customer request, screenshot region, and attachment to work-order item(s)`, `${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} is updated as each request, image match, work item, scoped QA-LOOP, and verification step is completed`, `${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} records temporary TriWiki-backed session context with retention metadata`, `${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} proves QA-LOOP ran over the exact customer-request work-order range after implementation`, 'unresolved_items is empty before Team completion', 'scoped_qa_loop_completed is true with zero unresolved QA findings', 'Codex Computer Use visual inspection strengthens matches when available; no Playwright or browser automation substitute is allowed', CODEX_COMPUTER_USE_ONLY_POLICY, 'client requests follow normal SKS gates and verification'],
|
|
100
100
|
priority: ['strong feedback raises required_weight', 'request topics are counted in wiki packs', 'future inference uses priority signals'],
|
|
101
101
|
questions: ['predictable answers are inferred', 'partial answers can seal contracts', 'only unresolved changing slots remain visible'],
|
|
102
102
|
install: ['bootstrap/deps initialize readiness', 'missing runtime deps show repair actions', 'readiness output is concrete']
|
|
@@ -217,7 +217,7 @@ export function questionsMarkdown(schema) {
|
|
|
217
217
|
if (isQaLoop) {
|
|
218
218
|
lines.push('QA-LOOP는 이 질문들에 모두 답변하고 Decision Contract가 봉인된 뒤에만 실행됩니다.');
|
|
219
219
|
lines.push('로그인이 필요하면 테스트 전용 계정 정보만 임시 런타임 입력으로 제공해야 하며, answers.json/리포트/로그/wiki에는 절대 저장하지 않습니다.');
|
|
220
|
-
lines.push('UI 수준 E2E
|
|
220
|
+
lines.push('UI 수준 E2E와 시각 검증은 Codex Computer Use 증거가 없으면 검증 완료로 주장할 수 없습니다. Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, 기타 브라우저 자동화는 UI/브라우저 검증 증거로 인정하지 않습니다.');
|
|
221
221
|
lines.push('개발 서버가 아닌 배포/스테이징 도메인에서는 삭제성 테스트를 절대 실행하지 않습니다.');
|
|
222
222
|
} else {
|
|
223
223
|
lines.push('이 질문들에 모두 답변하고 Decision Contract가 봉인된 뒤에만 실행됩니다.');
|
package/src/core/routes.mjs
CHANGED
|
@@ -8,6 +8,19 @@ export const FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT = 'from-chat-img-temp-triwiki.j
|
|
|
8
8
|
export const FROM_CHAT_IMG_QA_LOOP_ARTIFACT = 'from-chat-img-qa-loop.json';
|
|
9
9
|
export const FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS = 5;
|
|
10
10
|
export const USAGE_TOPICS = 'install|setup|bootstrap|root|deps|cmux|auto-review|team|qa-loop|goal|research|db|codex-app|dfix|design|imagegen|dollar|context7|pipeline|reasoning|guard|conflicts|versioning|eval|harness|hproof|gx|wiki|code-structure';
|
|
11
|
+
export const CODEX_COMPUTER_USE_EVIDENCE_SOURCE = 'codex_computer_use';
|
|
12
|
+
export const CODEX_COMPUTER_USE_ONLY_POLICY = 'Pipeline UI/browser verification and visual inspection must use Codex Computer Use only. Do not use Playwright, Chrome MCP, Browser Use, Selenium, Puppeteer, or any other browser automation substitute; if Codex Computer Use is unavailable, mark the UI/browser evidence unverified instead of substituting another tool.';
|
|
13
|
+
export const FORBIDDEN_BROWSER_AUTOMATION_RE = /\b(playwright|chrome\s+mcp|browser\s+use|selenium|puppeteer)\b/i;
|
|
14
|
+
|
|
15
|
+
export function evidenceMentionsForbiddenBrowserAutomation(value, seen = new Set()) {
|
|
16
|
+
if (value == null) return false;
|
|
17
|
+
if (typeof value === 'string') return FORBIDDEN_BROWSER_AUTOMATION_RE.test(value);
|
|
18
|
+
if (typeof value !== 'object') return false;
|
|
19
|
+
if (seen.has(value)) return false;
|
|
20
|
+
seen.add(value);
|
|
21
|
+
if (Array.isArray(value)) return value.some((item) => evidenceMentionsForbiddenBrowserAutomation(item, seen));
|
|
22
|
+
return Object.values(value).some((item) => evidenceMentionsForbiddenBrowserAutomation(item, seen));
|
|
23
|
+
}
|
|
11
24
|
|
|
12
25
|
export const RECOMMENDED_MCP_SERVERS = [
|
|
13
26
|
{
|
|
@@ -110,7 +123,7 @@ export function triwikiStagePolicyText(commandPrefix = 'sks') {
|
|
|
110
123
|
}
|
|
111
124
|
|
|
112
125
|
export function chatCaptureIntakeText() {
|
|
113
|
-
return `From-Chat-IMG intake: explicit signal only. Select forensic visual effort. Treat uploads as chat screenshot plus originals, use Computer Use
|
|
126
|
+
return `From-Chat-IMG intake: explicit signal only. Select forensic visual effort. Treat uploads as chat screenshot plus originals, use Codex Computer Use visual inspection when available, list requirements first in source order, match regions to attachments with confidence, and write ${FROM_CHAT_IMG_WORK_ORDER_ARTIFACT}, ${FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT}, ${FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT}, ${FROM_CHAT_IMG_COVERAGE_ARTIFACT}, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT}, ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT}, and ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}. ${CODEX_COMPUTER_USE_ONLY_POLICY} Preserve each visible customer request as source-bound text, account for every screenshot image region and separate attachment, map each item to work-order actions, perform the customer-request work, then run a scoped QA-LOOP over that exact work-order range before Team completion. Update checklist checkboxes as work proceeds until all boxes are checked, unresolved_items is empty, scoped_qa_loop_completed=true, QA unresolved findings are zero, and schema validation passes. ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} is temporary TriWiki-backed session context with expires_after_sessions=${FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS}, so it can be forgotten by retention after enough later sessions. Do not assume ordinary image prompts are chat captures.`;
|
|
114
127
|
}
|
|
115
128
|
|
|
116
129
|
export function noUnrequestedFallbackCodePolicyText() {
|