sneakoscope 0.6.80 → 0.6.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/package.json +1 -1
- package/src/cli/main.mjs +19 -4
- package/src/cli/maintenance-commands.mjs +3 -3
- package/src/core/codex-app.mjs +3 -3
- package/src/core/fsx.mjs +1 -1
- package/src/core/init.mjs +3 -3
- package/src/core/pipeline.mjs +4 -2
- package/src/core/qa-loop.mjs +13 -6
- package/src/core/questions.mjs +3 -3
- package/src/core/routes.mjs +14 -1
package/README.md
CHANGED
|
@@ -59,7 +59,7 @@ sks selftest --mock
|
|
|
59
59
|
- Node.js `>=20.11`
|
|
60
60
|
- npm
|
|
61
61
|
- Codex CLI for terminal workflows
|
|
62
|
-
- Codex App for app-facing workflows
|
|
62
|
+
- Codex App for app-facing workflows, with Codex Computer Use required for UI/browser evidence
|
|
63
63
|
- cmux for the CLI-first runtime
|
|
64
64
|
- Context7 MCP for current-docs-gated routes
|
|
65
65
|
|
|
@@ -328,7 +328,7 @@ sks qa-loop run latest --max-cycles 2
|
|
|
328
328
|
sks qa-loop status latest
|
|
329
329
|
```
|
|
330
330
|
|
|
331
|
-
Use `$QA-LOOP` in Codex App when UI-level E2E needs verification. UI verification must use Codex Computer Use evidence only; Chrome MCP, Browser Use, Playwright, and other browser automation do not satisfy UI-level E2E verification.
|
|
331
|
+
Use `$QA-LOOP` in Codex App when UI-level E2E needs verification. UI verification must use Codex Computer Use evidence only; Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, and other browser automation do not satisfy UI-level E2E verification.
|
|
332
332
|
|
|
333
333
|
### Refresh Context Before Risky Work
|
|
334
334
|
|
|
@@ -380,7 +380,7 @@ sks codex-app check
|
|
|
380
380
|
codex mcp list
|
|
381
381
|
```
|
|
382
382
|
|
|
383
|
-
Codex App workflows need the app installed and
|
|
383
|
+
Codex App workflows need the app installed. QA and visual-evidence workflows require first-party Codex Computer Use; Browser Use may support non-UI browser context, but it is not valid UI/browser verification evidence.
|
|
384
384
|
|
|
385
385
|
### Setup is blocked by another harness
|
|
386
386
|
|
|
@@ -415,6 +415,8 @@ npm run sizecheck
|
|
|
415
415
|
npm run release:check
|
|
416
416
|
```
|
|
417
417
|
|
|
418
|
+
Package pipeline UI/browser verification and visual inspection evidence must come from Codex Computer Use only. Do not use Playwright, Chrome MCP, Browser Use, Selenium, Puppeteer, or other browser automation as substitutes for that evidence.
|
|
419
|
+
|
|
418
420
|
Dry-run publish:
|
|
419
421
|
|
|
420
422
|
```sh
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sneakoscope",
|
|
3
3
|
"displayName": "ㅅㅋㅅ",
|
|
4
|
-
"version": "0.6.
|
|
4
|
+
"version": "0.6.81",
|
|
5
5
|
"description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
|
package/src/cli/main.mjs
CHANGED
|
@@ -25,7 +25,7 @@ import { defaultEvaluationScenario, runEvaluationBenchmark } from '../core/evalu
|
|
|
25
25
|
import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
|
|
26
26
|
import { contextCapsule } from '../core/triwiki-attention.mjs';
|
|
27
27
|
import { rgbaKey, rgbaToWikiCoord, validateWikiCoordinateIndex } from '../core/wiki-coordinate.mjs';
|
|
28
|
-
import { ALLOWED_REASONING_EFFORTS, COMMAND_CATALOG, DOLLAR_COMMAND_ALIASES, DOLLAR_COMMANDS, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, USAGE_TOPICS, context7ConfigToml, hasContext7ConfigText, hasFromChatImgSignal, looksLikeAnswerOnlyRequest, noUnrequestedFallbackCodePolicyText, reflectionRequiredForRoute, reasoningInstruction, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
28
|
+
import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_EVIDENCE_SOURCE, CODEX_COMPUTER_USE_ONLY_POLICY, COMMAND_CATALOG, DOLLAR_COMMAND_ALIASES, DOLLAR_COMMANDS, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, RECOMMENDED_SKILLS, ROUTES, USAGE_TOPICS, context7ConfigToml, hasContext7ConfigText, hasFromChatImgSignal, looksLikeAnswerOnlyRequest, noUnrequestedFallbackCodePolicyText, reflectionRequiredForRoute, reasoningInstruction, routePrompt, routeReasoning, routeRequiresSubagents, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
29
29
|
import { context7Evidence, evaluateStop, recordContext7Evidence, recordSubagentEvidence } from '../core/pipeline.mjs';
|
|
30
30
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, validateTeamRuntimeArtifacts, writeTeamRuntimeArtifacts } from '../core/team-dag.mjs';
|
|
31
31
|
import { appendTeamEvent, initTeamLive, parseTeamSpecText, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane } from '../core/team-live.mjs';
|
|
@@ -1632,7 +1632,7 @@ async function setup(args) {
|
|
|
1632
1632
|
if (!cliTools.cmux.ok) console.log(`\ncmux ${cmuxStatusKind(cliTools.cmux)}. ${cliTools.cmux.bin ? 'Run: sks cmux check' : `Install: ${cliTools.cmux.install_hint}`}`);
|
|
1633
1633
|
if (!install.ok && install.scope === 'global') console.log('\nGlobal command missing. Run: npm i -g sneakoscope');
|
|
1634
1634
|
if (!install.ok && install.scope === 'project') console.log('\nProject package missing. Run: npm i -D sneakoscope');
|
|
1635
|
-
if (!appRuntime.ok) console.log('\nCodex App and first-party
|
|
1635
|
+
if (!appRuntime.ok) console.log('\nCodex App and first-party Codex Computer Use are required for SKS QA/visual evidence; Browser Use is not a UI verification substitute. Run: sks codex-app check');
|
|
1636
1636
|
}
|
|
1637
1637
|
|
|
1638
1638
|
function formatCodexCliToolStatus(status = {}) {
|
|
@@ -2205,7 +2205,8 @@ async function selftest() {
|
|
|
2205
2205
|
if (!promptPipelineText.includes('design.md') || !promptPipelineText.includes('imagegen')) throw new Error('selftest failed: prompt pipeline missing design/image asset routing');
|
|
2206
2206
|
if (!promptPipelineText.includes('From-Chat-IMG') || !promptPipelineText.includes('Do not assume ordinary image prompts are chat captures')) throw new Error('selftest failed: prompt pipeline missing explicit From-Chat-IMG gating');
|
|
2207
2207
|
const fromChatImgSkillText = await safeReadText(path.join(tmp, '.agents', 'skills', 'from-chat-img', 'SKILL.md'));
|
|
2208
|
-
if (!fromChatImgSkillText.includes('normal Team pipeline') || !fromChatImgSkillText.includes('Computer Use
|
|
2208
|
+
if (!fromChatImgSkillText.includes('normal Team pipeline') || !fromChatImgSkillText.includes('Codex Computer Use visual inspection') || !fromChatImgSkillText.includes(CODEX_COMPUTER_USE_ONLY_POLICY) || !fromChatImgSkillText.includes(FROM_CHAT_IMG_CHECKLIST_ARTIFACT) || !fromChatImgSkillText.includes(FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT) || !fromChatImgSkillText.includes(FROM_CHAT_IMG_QA_LOOP_ARTIFACT)) throw new Error('selftest failed: from-chat-img skill missing Team/Computer Use-only inspection checklist guidance');
|
|
2209
|
+
if (fromChatImgSkillText.includes('Computer Use/browser visual inspection')) throw new Error('selftest failed: from-chat-img skill still allows browser visual inspection wording');
|
|
2209
2210
|
const fromChatImgSkillMeta = await safeReadText(path.join(tmp, '.agents', 'skills', 'from-chat-img', 'agents', 'openai.yaml'));
|
|
2210
2211
|
if (!fromChatImgSkillMeta.includes('model_reasoning_effort: xhigh')) throw new Error('selftest failed: from-chat-img skill metadata is not xhigh');
|
|
2211
2212
|
for (const supportSkill of ['reasoning-router', 'pipeline-runner', 'context7-docs', 'seo-geo-optimizer', 'reflection', 'design-system-builder', 'design-ui-editor', 'imagegen']) {
|
|
@@ -2473,6 +2474,9 @@ async function selftest() {
|
|
|
2473
2474
|
const hookQaSchema = await readJson(path.join(missionDir(hookQaTmp, hookQaState.mission_id), 'required-answers.schema.json'));
|
|
2474
2475
|
const hookQaAnswers = {};
|
|
2475
2476
|
for (const s of hookQaSchema.slots) hookQaAnswers[s.id] = s.options ? (s.type === 'array' ? [s.options[0]] : s.options[0]) : (s.type.includes('array') ? ['selftest'] : 'selftest');
|
|
2477
|
+
hookQaAnswers.QA_SCOPE = 'all_available';
|
|
2478
|
+
hookQaAnswers.TARGET_BASE_URL = 'none';
|
|
2479
|
+
hookQaAnswers.API_BASE_URL = 'same_as_target';
|
|
2476
2480
|
const hookQaAnswersPath = path.join(hookQaTmp, 'qa-answers.json');
|
|
2477
2481
|
await writeJsonAtomic(hookQaAnswersPath, hookQaAnswers);
|
|
2478
2482
|
const qaAnswerResult = await runProcess(process.execPath, [hookBin, 'pipeline', 'answer', 'latest', hookQaAnswersPath], { cwd: hookQaTmp, env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 64 * 1024 });
|
|
@@ -2496,9 +2500,16 @@ async function selftest() {
|
|
|
2496
2500
|
await writeTextAtomic(path.join(unresolvedQaTmp, unresolvedQaGateFile), '# unresolved\n');
|
|
2497
2501
|
const unresolvedQaGate = await evaluateQaGate(unresolvedQaTmp);
|
|
2498
2502
|
if (unresolvedQaGate.passed || !unresolvedQaGate.reasons.includes('unresolved_fixable_findings_remaining')) throw new Error('selftest failed: unresolved fixable QA finding was accepted');
|
|
2503
|
+
const forbiddenQaTmp = tmpdir();
|
|
2504
|
+
const forbiddenQaGate = defaultQaGate({ sealed_hash: 'selftest', answers: { QA_SCOPE: 'ui_e2e_only', TARGET_BASE_URL: 'http://localhost:3000', API_BASE_URL: 'same_as_target', TARGET_ENVIRONMENT: 'local_dev_server', DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED: 'never' } });
|
|
2505
|
+
await writeJsonAtomic(path.join(forbiddenQaTmp, 'qa-gate.json'), { ...forbiddenQaGate, passed: true, qa_report_written: true, qa_ledger_complete: true, checklist_completed: true, safety_reviewed: true, credentials_not_persisted: true, ui_computer_use_evidence: true, ui_evidence_source: 'playwright', post_fix_verification_complete: true, honest_mode_complete: true, evidence: ['Playwright screenshot evidence'] });
|
|
2506
|
+
await writeJsonAtomic(path.join(forbiddenQaTmp, 'qa-ledger.json'), { checklist: [] });
|
|
2507
|
+
await writeTextAtomic(path.join(forbiddenQaTmp, forbiddenQaGate.qa_report_file), '# forbidden\n');
|
|
2508
|
+
const forbiddenQaGateResult = await evaluateQaGate(forbiddenQaTmp);
|
|
2509
|
+
if (forbiddenQaGateResult.passed || !forbiddenQaGateResult.reasons.includes('ui_evidence_source_not_codex_computer_use') || !forbiddenQaGateResult.reasons.includes('forbidden_browser_automation_evidence')) throw new Error('selftest failed: forbidden browser automation QA evidence was accepted');
|
|
2499
2510
|
const promptQa = buildQaLoopPrompt({ id: 'selftest', mission: { prompt: 'QA and fix' }, contract: { answers: { QA_CORRECTIVE_POLICY: 'apply_safe_fixes_and_reverify' } }, cycle: 1, previous: '', reportFile: qaReportFile });
|
|
2500
2511
|
if (!promptQa.includes('dogfood as human proxy') || !promptQa.includes('fix safe code/test/docs now') || !promptQa.includes('post_fix_verification_complete')) throw new Error('selftest failed: QA-LOOP dogfood prompt');
|
|
2501
|
-
if (!promptQa.includes(
|
|
2512
|
+
if (!promptQa.includes(CODEX_COMPUTER_USE_ONLY_POLICY) || !promptQa.includes('Chrome MCP') || !promptQa.includes('Playwright') || !promptQa.includes('Browser Use')) throw new Error('selftest failed: QA-LOOP prompt did not enforce Computer Use-only UI evidence');
|
|
2502
2513
|
if (promptQa.includes('Browser/Computer Use evidence')) throw new Error('selftest failed: QA-LOOP prompt still allows Browser/Computer UI evidence');
|
|
2503
2514
|
const pkgQa = defaultQaGate({ sealed_hash: 'selftest', answers: { QA_SCOPE: 'all_available', TARGET_BASE_URL: 'none', API_BASE_URL: 'same_as_target', TARGET_ENVIRONMENT: 'local_dev_server', DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED: 'never' } });
|
|
2504
2515
|
if (pkgQa.ui_e2e_required || pkgQa.api_e2e_required || !pkgQa.ui_computer_use_evidence) throw new Error('selftest failed: package QA target gate');
|
|
@@ -2637,6 +2648,7 @@ async function selftest() {
|
|
|
2637
2648
|
unresolved_findings: 0,
|
|
2638
2649
|
unresolved_fixable_findings: 0,
|
|
2639
2650
|
post_fix_verification_complete: true,
|
|
2651
|
+
computer_use_evidence_source: CODEX_COMPUTER_USE_EVIDENCE_SOURCE,
|
|
2640
2652
|
evidence: ['selftest scoped QA-LOOP covered work-1']
|
|
2641
2653
|
};
|
|
2642
2654
|
const incompleteTeamGateTmp = tmpdir();
|
|
@@ -2718,6 +2730,9 @@ async function selftest() {
|
|
|
2718
2730
|
await writeJsonAtomic(path.join(fromChatCoverageDir, FROM_CHAT_IMG_QA_LOOP_ARTIFACT), { ...passedFromChatImgQaLoop, work_order_item_ids_covered: [] });
|
|
2719
2731
|
const uncoveredFromChatQaLoopStop = await evaluateStop(fromChatCoverageTmp, fromChatCoverageState, { last_assistant_message: 'SKS Honest Mode verification evidence gap' }, { noQuestion: false });
|
|
2720
2732
|
if (uncoveredFromChatQaLoopStop?.decision !== 'block' || !String(uncoveredFromChatQaLoopStop.reason || '').includes(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:work_order_item_ids_covered`)) throw new Error('selftest failed: From-Chat-IMG scoped QA-LOOP work item coverage did not block Team gate');
|
|
2733
|
+
await writeJsonAtomic(path.join(fromChatCoverageDir, FROM_CHAT_IMG_QA_LOOP_ARTIFACT), { ...passedFromChatImgQaLoop, computer_use_evidence_source: 'playwright', evidence: ['Playwright visual verification'] });
|
|
2734
|
+
const forbiddenFromChatQaLoopStop = await evaluateStop(fromChatCoverageTmp, fromChatCoverageState, { last_assistant_message: 'SKS Honest Mode verification evidence gap' }, { noQuestion: false });
|
|
2735
|
+
if (forbiddenFromChatQaLoopStop?.decision !== 'block' || !String(forbiddenFromChatQaLoopStop.reason || '').includes(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:computer_use_evidence_source`) || !String(forbiddenFromChatQaLoopStop.reason || '').includes(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:forbidden_browser_automation_evidence`)) throw new Error('selftest failed: From-Chat-IMG scoped QA-LOOP accepted forbidden browser automation evidence');
|
|
2721
2736
|
await writeJsonAtomic(path.join(fromChatCoverageDir, FROM_CHAT_IMG_QA_LOOP_ARTIFACT), passedFromChatImgQaLoop);
|
|
2722
2737
|
await writeJsonAtomic(path.join(fromChatCoverageDir, 'team-gate.json'), { ...passedTeamGate, from_chat_img_required: true, from_chat_img_request_coverage: true });
|
|
2723
2738
|
const coveredFromChatStop = await evaluateStop(fromChatCoverageTmp, fromChatCoverageState, { last_assistant_message: 'SKS Honest Mode verification evidence gap' }, { noQuestion: false });
|
|
@@ -15,7 +15,7 @@ import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge }
|
|
|
15
15
|
import { DEFAULT_EVAL_THRESHOLDS, compareEvaluationReports, runEvaluationBenchmark } from '../core/evaluation.mjs';
|
|
16
16
|
import { contextCapsule } from '../core/triwiki-attention.mjs';
|
|
17
17
|
import { rgbaKey, rgbaToWikiCoord, validateWikiCoordinateIndex } from '../core/wiki-coordinate.mjs';
|
|
18
|
-
import { ALLOWED_REASONING_EFFORTS, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, ROUTES, hasFromChatImgSignal, routePrompt, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
18
|
+
import { ALLOWED_REASONING_EFFORTS, CODEX_COMPUTER_USE_ONLY_POLICY, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, ROUTES, hasFromChatImgSignal, routePrompt, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
19
19
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, teamRuntimePlanMetadata, teamRuntimeRequiredArtifacts, writeTeamRuntimeArtifacts } from '../core/team-dag.mjs';
|
|
20
20
|
import { appendTeamEvent, formatRoleCounts, initTeamLive, normalizeTeamSpec, parseTeamSpecArgs, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane, renderTeamWatch } from '../core/team-live.mjs';
|
|
21
21
|
import { ARTIFACT_FILES, writeValidationReport } from '../core/artifact-schemas.mjs';
|
|
@@ -67,7 +67,7 @@ Prompt route:
|
|
|
67
67
|
$QA-LOOP dogfood UI/API, fix safe issues, reverify
|
|
68
68
|
|
|
69
69
|
UI evidence:
|
|
70
|
-
Codex Computer Use only for UI-level E2E; do not use Chrome MCP, Browser Use, Playwright, or other browser automation as UI verification evidence.
|
|
70
|
+
Codex Computer Use only for UI-level E2E and visual evidence; do not use Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, or other browser automation as UI verification evidence.
|
|
71
71
|
`);
|
|
72
72
|
}
|
|
73
73
|
|
|
@@ -1395,7 +1395,7 @@ export function buildTeamPlan(id, prompt, opts = {}) {
|
|
|
1395
1395
|
invariants: [
|
|
1396
1396
|
'The parent thread remains the orchestrator and owns final integration.',
|
|
1397
1397
|
'Team roster confirmation is mandatory before implementation: default SKS counts are materialized when the user did not specify counts, explicit counts are honored, and team-gate.json must include team_roster_confirmed=true with team-roster.json present.',
|
|
1398
|
-
`When and only when From-Chat-IMG/$From-Chat-IMG is explicit, treat client requests as chat-history screenshots plus separate attachments: extract visible text in reading order, use Computer Use
|
|
1398
|
+
`When and only when From-Chat-IMG/$From-Chat-IMG is explicit, treat client requests as chat-history screenshots plus separate attachments: extract visible text in reading order, use Codex Computer Use visual inspection to match screenshot image regions to attachments with confidence notes, and turn that evidence into a complete modification work order before editing. ${CODEX_COMPUTER_USE_ONLY_POLICY}`,
|
|
1399
1399
|
`For From-Chat-IMG, forensic intake is stop-gated: ${FROM_CHAT_IMG_WORK_ORDER_ARTIFACT}, ${FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT}, and ${FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT} must exist and pass schema validation before implementation is treated as complete.`,
|
|
1400
1400
|
`For From-Chat-IMG, request coverage is stop-gated: ${FROM_CHAT_IMG_COVERAGE_ARTIFACT} must show all_chat_requirements_listed=true, all_requirements_mapped_to_work_order=true, all_screenshot_regions_accounted=true, all_attachments_accounted=true, image_analysis_complete=true, verbatim_customer_requests_preserved=true, checklist_updated=true, temp_triwiki_recorded=true, scoped_qa_loop_completed=true, and unresolved_items=[] before Team completion.`,
|
|
1401
1401
|
`For From-Chat-IMG, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} must contain Customer Requests, Image Analysis, Work Items, QA Loop, and Verification sections, with every checkbox checked as each item is completed.`,
|
package/src/core/codex-app.mjs
CHANGED
|
@@ -118,7 +118,7 @@ export async function codexAppIntegrationStatus(opts = {}) {
|
|
|
118
118
|
export function codexAppGuidance({ appInstalled, codex, mcpList, computerUseReady, browserUseReady }) {
|
|
119
119
|
const lines = [];
|
|
120
120
|
if (!appInstalled) {
|
|
121
|
-
lines.push('Install and open Codex App for first-party MCP/plugin tools. SKS cmux launch can still run with Codex CLI alone, but
|
|
121
|
+
lines.push('Install and open Codex App for first-party MCP/plugin tools. SKS cmux launch can still run with Codex CLI alone, but Codex Computer Use evidence will be unavailable until Codex App is ready.');
|
|
122
122
|
lines.push(`Docs: ${CODEX_APP_DOCS_URL}`);
|
|
123
123
|
}
|
|
124
124
|
if (!codex?.bin) lines.push('Install Codex CLI too: npm i -g @openai/codex, or set SKS_CODEX_BIN.');
|
|
@@ -128,10 +128,10 @@ export function codexAppGuidance({ appInstalled, codex, mcpList, computerUseRead
|
|
|
128
128
|
}
|
|
129
129
|
if (appInstalled && (!computerUseReady || !browserUseReady)) {
|
|
130
130
|
lines.push('Open Codex App settings, enable recommended MCP/plugin tools, then restart Codex CLI sessions.');
|
|
131
|
-
lines.push('Required for SKS QA-LOOP UI evidence: Codex Computer Use only. Browser Use can support non-UI browser context, but it does not satisfy UI-level E2E verification.');
|
|
131
|
+
lines.push('Required for SKS QA-LOOP UI/browser evidence: Codex Computer Use only. Browser Use can support non-UI browser context, but it does not satisfy UI-level E2E verification.');
|
|
132
132
|
lines.push('Verify with: codex mcp list');
|
|
133
133
|
}
|
|
134
|
-
if (!lines.length) lines.push('Codex App, Codex CLI, Computer Use, and Browser Use checks look ready. UI-level E2E verification still
|
|
134
|
+
if (!lines.length) lines.push('Codex App, Codex CLI, Computer Use, and Browser Use checks look ready. UI-level E2E and visual verification still require Codex Computer Use evidence.');
|
|
135
135
|
return lines;
|
|
136
136
|
}
|
|
137
137
|
|
package/src/core/fsx.mjs
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
|
|
8
|
-
export const PACKAGE_VERSION = '0.6.
|
|
8
|
+
export const PACKAGE_VERSION = '0.6.81';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
|
package/src/core/init.mjs
CHANGED
|
@@ -6,7 +6,7 @@ import { DEFAULT_DB_SAFETY_POLICY } from './db-safety.mjs';
|
|
|
6
6
|
import { isHarnessSourceProject, writeHarnessGuardPolicy } from './harness-guard.mjs';
|
|
7
7
|
import { repairSksGeneratedArtifacts } from './harness-conflicts.mjs';
|
|
8
8
|
import { installVersionGitHook } from './version-manager.mjs';
|
|
9
|
-
import { DOLLAR_COMMANDS, DOLLAR_COMMAND_ALIASES, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, RECOMMENDED_MCP_SERVERS, RECOMMENDED_SKILLS, chatCaptureIntakeText, context7ConfigToml, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
9
|
+
import { CODEX_COMPUTER_USE_ONLY_POLICY, DOLLAR_COMMANDS, DOLLAR_COMMAND_ALIASES, DOLLAR_SKILL_NAMES, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, RECOMMENDED_MCP_SERVERS, RECOMMENDED_SKILLS, chatCaptureIntakeText, context7ConfigToml, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
10
10
|
|
|
11
11
|
const REFLECTION_MEMORY_PATH = '.sneakoscope/memory/q2_facts/post-route-reflection.md';
|
|
12
12
|
function reflectionInstructionText(commandPrefix = 'sks') {
|
|
@@ -499,8 +499,8 @@ export async function installSkills(root) {
|
|
|
499
499
|
'sks': `---\nname: sks\ndescription: General Sneakoscope Codex command route for $SKS or $sks usage, setup, status, and workflow help.\n---\n\nUse local SKS commands: bootstrap, deps, commands, quickstart, codex-app, context7, guard, conflicts, reasoning, wiki, pipeline. Promote code-changing work to Team unless Answer/DFix/Help/Wiki/safety route fits. Surface route/guard/scope, use TriWiki, do not edit installed harness files outside this engine repo, and require human-approved conflict cleanup.\n`,
|
|
500
500
|
'wiki': `---\nname: wiki\ndescription: Dollar-command route for $Wiki TriWiki refresh, pack, validate, and prune commands.\n---\n\nUse for $Wiki or Korean wiki-refresh requests. Refresh/update/갱신: run sks wiki refresh, then validate .sneakoscope/wiki/context-pack.json. Pack: run sks wiki pack, then validate. Prune/clean/정리: use sks wiki refresh --prune, or sks wiki prune --dry-run for inspection. Report claims, anchors, trust, attention.use_first/hydrate_first, validation, and blockers. Do not start ambiguity-gated implementation, subagents, or unrelated work.\n`,
|
|
501
501
|
'team': `---\nname: team\ndescription: SKS Team orchestration for $Team/code work; $From-Chat-IMG is the explicit chat-image alias.\n---\n\nUse for $Team/code work. Ambiguity gate first. Write team-roster.json; team-gate.json needs team_roster_confirmed=true. executor:N means N scouts, N debate voices, then fresh N executors. After consensus, compile team-graph.json, team-runtime-tasks.json, team-decomposition-report.json, and team-inbox/ so worker handoff uses concrete runtime task ids with role/path/domain/lane hints. Refresh/validate TriWiki before debate, implementation, review, and final; consume attention.use_first and hydrate attention.hydrate_first before risky decisions. Log events, close sessions, pass team-session-cleanup.json, then reflection and Honest Mode. Parent integrates/verifies.\n\n${chatCaptureIntakeText()}\n`,
|
|
502
|
-
'from-chat-img': `---\nname: from-chat-img\ndescription: Explicit $From-Chat-IMG Team alias for chat screenshot plus attachment analysis.\n---\n\nUse only for From-Chat-IMG/$From-Chat-IMG. It enters the normal Team pipeline. Treat uploads as chat screenshot plus originals. Use Computer Use
|
|
503
|
-
'qa-loop': `---\nname: qa-loop\ndescription: $QA-LOOP dogfoods UI/API as human proxy with safety gates, Codex Computer Use-only UI evidence, safe fixes, rechecks, and a QA report.\n---\n\nUse only $QA-LOOP. Ask scope, target, mutation, login. Credentials are runtime-only; never save secrets. UI-level E2E needs Codex Computer Use evidence or must be marked unverified; Chrome MCP, Browser Use, Playwright, and other browser automation do not satisfy UI verification. Deployed targets are read-only; destructive removal is forbidden. After answer/run, dogfood real flows, apply safe contract-allowed code/test/docs fixes, recheck, and do not pass qa-gate.json with unresolved findings or without post_fix_verification_complete. Finish qa-ledger, date/version report, gate, completion summary, and Honest Mode.\n`,
|
|
502
|
+
'from-chat-img': `---\nname: from-chat-img\ndescription: Explicit $From-Chat-IMG Team alias for chat screenshot plus attachment analysis.\n---\n\nUse only for From-Chat-IMG/$From-Chat-IMG. It enters the normal Team pipeline. Treat uploads as chat screenshot plus originals. Use Codex Computer Use visual inspection when available, list requirements first, match regions to attachments with confidence, write ${FROM_CHAT_IMG_COVERAGE_ARTIFACT}, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT}, ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT}, and ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}, then continue Team gates, review, reflection, and Honest Mode. ${CODEX_COMPUTER_USE_ONLY_POLICY} The ledger must account for every visible customer request, screenshot image region, and separate attachment; ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} must have a checked item for each request, image-region/attachment match, work item, scoped QA-LOOP, and verification step; ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} stores temporary TriWiki-backed session context with expires_after_sessions=${FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS}. ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} must prove QA-LOOP ran over the exact customer-request work-order range after implementation, with every work item covered, post-fix verification complete, and zero unresolved findings. team-gate.json cannot pass From-Chat-IMG completion until unresolved_items is empty, every checklist box is checked, and scoped_qa_loop_completed=true.\n`,
|
|
503
|
+
'qa-loop': `---\nname: qa-loop\ndescription: $QA-LOOP dogfoods UI/API as human proxy with safety gates, Codex Computer Use-only UI evidence, safe fixes, rechecks, and a QA report.\n---\n\nUse only $QA-LOOP. Ask scope, target, mutation, login. Credentials are runtime-only; never save secrets. UI-level E2E needs Codex Computer Use evidence or must be marked unverified; Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, and other browser automation do not satisfy UI/browser verification. Deployed targets are read-only; destructive removal is forbidden. After answer/run, dogfood real flows, apply safe contract-allowed code/test/docs fixes, recheck, and do not pass qa-gate.json with unresolved findings or without post_fix_verification_complete. Finish qa-ledger, date/version report, gate, completion summary, and Honest Mode.\n`,
|
|
504
504
|
'goal': `---\nname: goal\ndescription: Dollar-command route for $Goal or $goal Codex native persisted /goal workflows.\n---\n\nUse when the user invokes $Goal/$goal or asks to persist a workflow with Codex native /goal continuation. Prepare with sks goal create or the $Goal route, then use native Codex /goal create, pause, resume, and clear controls where available. Do not recreate the old no-question loop.\n`,
|
|
505
505
|
'research': `---\nname: research\ndescription: Dollar-command route for $Research or $research frontier discovery workflows.\n---\n\nUse when the user invokes $Research/$research or asks for research, hypotheses, new mechanisms, falsification, or testable predictions. Prefer sks research prepare and sks research run. Do not use for ordinary code edits.\n`,
|
|
506
506
|
'autoresearch': `---\nname: autoresearch\ndescription: Dollar-command route for $AutoResearch or $autoresearch iterative experiment loops.\n---\n\nUse for $AutoResearch, iterative improvement, SEO/GEO, ranking, workflow, benchmark, or experiments. Define program, hypothesis, experiment, metric, keep/discard, falsification, next step, and Honest Mode. Load seo-geo-optimizer for README/npm/GitHub/schema/AI-search work.\n`,
|
package/src/core/pipeline.mjs
CHANGED
|
@@ -12,7 +12,7 @@ import { writeMemorySweepReport } from './memory-governor.mjs';
|
|
|
12
12
|
import { writeMistakeMemoryReport } from './mistake-memory.mjs';
|
|
13
13
|
import { writeSkillForgeReport } from './skill-forge.mjs';
|
|
14
14
|
import { writeResearchPlan } from './research.mjs';
|
|
15
|
-
import { FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, chatCaptureIntakeText, context7RequirementText, dollarCommand, hasFromChatImgSignal, hasMadSksSignal, noUnrequestedFallbackCodePolicyText, reflectionRequiredForRoute, reasoningInstruction, routeNeedsContext7, routePrompt, routeReasoning, routeRequiresSubagents, stripDollarCommand, stripMadSksSignal, subagentExecutionPolicyText, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
15
|
+
import { CODEX_COMPUTER_USE_EVIDENCE_SOURCE, CODEX_COMPUTER_USE_ONLY_POLICY, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, chatCaptureIntakeText, context7RequirementText, dollarCommand, evidenceMentionsForbiddenBrowserAutomation, hasFromChatImgSignal, hasMadSksSignal, noUnrequestedFallbackCodePolicyText, reflectionRequiredForRoute, reasoningInstruction, routeNeedsContext7, routePrompt, routeReasoning, routeRequiresSubagents, stripDollarCommand, stripMadSksSignal, subagentExecutionPolicyText, stackCurrentDocsPolicyText, triwikiContextTracking, triwikiContextTrackingText, triwikiStagePolicyText } from './routes.mjs';
|
|
16
16
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, teamRuntimePlanMetadata, teamRuntimeRequiredArtifacts, validateTeamRuntimeArtifacts, writeTeamRuntimeArtifacts } from './team-dag.mjs';
|
|
17
17
|
import { formatRoleCounts, initTeamLive, parseTeamSpecText } from './team-live.mjs';
|
|
18
18
|
|
|
@@ -299,7 +299,7 @@ async function prepareTeam(root, route, task, required) {
|
|
|
299
299
|
team_runtime: teamRuntimePlanMetadata(),
|
|
300
300
|
phases: [
|
|
301
301
|
{ id: 'team_roster_confirmation', goal: `Before any implementation, materialize the Team roster from default SKS counts or explicit user counts, write team-roster.json, and surface role counts ${formatRoleCounts(roleCounts)}. Implementation cannot be considered complete unless team-gate.json has team_roster_confirmed=true.`, agents: ['parent_orchestrator'], output: 'team-roster.json' },
|
|
302
|
-
{ id: 'parallel_analysis_scouting', goal: `Before scouting, read TriWiki context. ${fromChatImgRequired ? `From-Chat-IMG active: use Computer Use
|
|
302
|
+
{ id: 'parallel_analysis_scouting', goal: `Before scouting, read TriWiki context. ${fromChatImgRequired ? `From-Chat-IMG active: use Codex Computer Use visual inspection, list every visible customer request, match every screenshot image region to attachments, write ${FROM_CHAT_IMG_COVERAGE_ARTIFACT}, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT}, and ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT}, then require scoped QA-LOOP evidence in ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} after the customer-request work is done. ${CODEX_COMPUTER_USE_ONLY_POLICY}` : `From-Chat-IMG inactive: do not assume ordinary images are chat captures. ${CODEX_COMPUTER_USE_ONLY_POLICY}`} Spawn exactly ${roster.bundle_size} read-only analysis_scout_N agents in parallel, using the full available session budget without exceeding ${agentSessions}. Split repo/docs/tests/API/user-flow/risk investigation into independent slices, hydrate relevant low-trust claims from source, and record source-backed findings.`, agents: roster.analysis_team.map((agent) => agent.id), max_parallel_subagents: agentSessions, write_policy: 'read-only' },
|
|
303
303
|
{ id: 'triwiki_refresh', goal: `Parent orchestrator updates Team analysis artifacts, then runs ${triwikiContextTracking().refresh_command} or ${triwikiContextTracking().pack_command}, prunes with ${triwikiContextTracking().prune_command} when stale/oversized wiki state would pollute handoffs, and runs ${triwikiContextTracking().validate_command} so the next stage uses current TriWiki context.`, agents: ['parent_orchestrator'], output: '.sneakoscope/wiki/context-pack.json' },
|
|
304
304
|
{ id: 'planning_debate', goal: `Before debate, read the refreshed TriWiki pack. Debate team of exactly ${roster.bundle_size} participants maps user inconvenience, options, constraints, affected files, DB/test risk, and tradeoffs while hydrating low-trust claims from source.`, agents: roster.debate_team.map((agent) => agent.id) },
|
|
305
305
|
{ id: 'consensus', goal: `Seal one objective with acceptance criteria and disjoint implementation slices, then refresh/validate TriWiki so implementation receives current consensus context.` },
|
|
@@ -853,6 +853,8 @@ async function missingFromChatImgCoverageArtifacts(root, state = {}) {
|
|
|
853
853
|
if (Number(qaLoop.unresolved_findings) !== 0) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:unresolved_findings`);
|
|
854
854
|
if (Number(qaLoop.unresolved_fixable_findings) !== 0) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:unresolved_fixable_findings`);
|
|
855
855
|
if (!Array.isArray(qaLoop.evidence) || qaLoop.evidence.length === 0) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:evidence`);
|
|
856
|
+
if (qaLoop.computer_use_evidence_source !== CODEX_COMPUTER_USE_EVIDENCE_SOURCE) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:computer_use_evidence_source`);
|
|
857
|
+
if (evidenceMentionsForbiddenBrowserAutomation({ evidence: qaLoop.evidence, notes: qaLoop.notes, tool: qaLoop.tool, evidence_source: qaLoop.computer_use_evidence_source })) missing.push(`${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}:forbidden_browser_automation_evidence`);
|
|
856
858
|
const coveredWorkItems = new Set(Array.isArray(qaLoop.work_order_item_ids_covered) ? qaLoop.work_order_item_ids_covered.map(String) : []);
|
|
857
859
|
for (const item of Array.isArray(ledger.work_order_items) ? ledger.work_order_items : []) {
|
|
858
860
|
const workId = String(item?.id || '');
|
package/src/core/qa-loop.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { exists, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, PACKAGE_VERSION } from './fsx.mjs';
|
|
3
|
+
import { CODEX_COMPUTER_USE_EVIDENCE_SOURCE, CODEX_COMPUTER_USE_ONLY_POLICY, evidenceMentionsForbiddenBrowserAutomation } from './routes.mjs';
|
|
3
4
|
|
|
4
5
|
export const QA_LOOP_ROUTE = 'QALoop';
|
|
5
6
|
const QA_REPORT_SUFFIX = 'qa-report.md';
|
|
@@ -29,7 +30,7 @@ export function buildQaLoopQuestionSchema(prompt) {
|
|
|
29
30
|
return {
|
|
30
31
|
schema_version: 1,
|
|
31
32
|
route: QA_LOOP_ROUTE,
|
|
32
|
-
description:
|
|
33
|
+
description: `QA-LOOP questions must be answered before execution. Login secrets and browser auth state are runtime-only and must not be saved to mission files or TriWiki. ${CODEX_COMPUTER_USE_ONLY_POLICY}`,
|
|
33
34
|
prompt,
|
|
34
35
|
slots: [
|
|
35
36
|
{ id: 'GOAL_PRECISE', question: 'Define the QA objective in one sentence.', required: true, type: 'string' },
|
|
@@ -45,7 +46,7 @@ export function buildQaLoopQuestionSchema(prompt) {
|
|
|
45
46
|
{ id: 'TEMP_TEST_CREDENTIALS_READY', question: 'If login is required, are test-only credentials ready to provide ephemerally during the run?', required: true, type: 'enum', options: ['not_required', 'yes_temp_only', 'no_block_authenticated_tests'] },
|
|
46
47
|
{ id: 'TEST_CREDENTIALS_RUNTIME_SOURCE', question: 'If login is required, how will test-only credentials be provided without saving the values?', required: true, type: 'enum', options: ['not_required', 'ephemeral_chat_only', 'environment_variables', 'secret_manager'] },
|
|
47
48
|
{ id: 'CREDENTIAL_STORAGE_ACK', question: 'Acknowledge credential handling policy.', required: true, type: 'enum', options: ['never_store_credentials_in_artifacts_or_wiki'] },
|
|
48
|
-
{ id: 'UI_COMPUTER_USE_ACK', question: 'Acknowledge UI E2E evidence policy: Codex Computer Use only; no Chrome MCP, Browser Use, Playwright, or other browser automation.', required: true, type: 'enum', options: [UI_COMPUTER_USE_ONLY_ACK] },
|
|
49
|
+
{ id: 'UI_COMPUTER_USE_ACK', question: 'Acknowledge UI E2E evidence policy: Codex Computer Use only; no Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, or other browser automation.', required: true, type: 'enum', options: [UI_COMPUTER_USE_ONLY_ACK] },
|
|
49
50
|
{ id: 'TEAM_MODE_ALLOWED', question: 'May QA-LOOP use Team/subagents where useful?', required: true, type: 'enum', options: ['yes_parallel_where_safe', 'no_parent_only'] },
|
|
50
51
|
{ id: 'MAX_QA_CYCLES', question: 'How many no-question QA cycles are allowed before pausing?', required: true, type: 'string' },
|
|
51
52
|
{ id: 'ACCEPTANCE_CRITERIA', question: 'List the QA completion criteria.', required: true, type: 'array_or_string' },
|
|
@@ -122,6 +123,7 @@ export function defaultQaGate(contract = {}, opts = {}) {
|
|
|
122
123
|
credentials_not_persisted: false,
|
|
123
124
|
ui_e2e_required: uiRequired,
|
|
124
125
|
ui_computer_use_evidence: !uiRequired,
|
|
126
|
+
ui_evidence_source: uiRequired ? null : 'not_required',
|
|
125
127
|
api_e2e_required: apiRequired,
|
|
126
128
|
unsafe_external_side_effects: false,
|
|
127
129
|
corrective_loop_enabled: corrective,
|
|
@@ -169,6 +171,10 @@ export async function evaluateQaGate(dir) {
|
|
|
169
171
|
if (positiveCount(gate.unresolved_fixable_findings)) reasons.push('unresolved_fixable_findings_remaining');
|
|
170
172
|
}
|
|
171
173
|
if (gate.unsafe_external_side_effects === true) reasons.push('unsafe_external_side_effects');
|
|
174
|
+
if (gate.ui_e2e_required === true) {
|
|
175
|
+
if (gate.ui_evidence_source !== CODEX_COMPUTER_USE_EVIDENCE_SOURCE) reasons.push('ui_evidence_source_not_codex_computer_use');
|
|
176
|
+
if (evidenceMentionsForbiddenBrowserAutomation({ evidence: gate.evidence, notes: gate.notes, ui_evidence_source: gate.ui_evidence_source })) reasons.push('forbidden_browser_automation_evidence');
|
|
177
|
+
}
|
|
172
178
|
if (!reportFile) reasons.push('qa_report_file_missing');
|
|
173
179
|
else if (!isQaReportFilename(reportFile)) reasons.push('qa_report_filename_prefix_invalid');
|
|
174
180
|
else if (!(await exists(path.join(dir, reportFile)))) reasons.push('qa_report_missing');
|
|
@@ -183,8 +189,9 @@ export async function writeMockQaResult(dir, mission, contract) {
|
|
|
183
189
|
const previousGate = await readJson(path.join(dir, 'qa-gate.json'), {});
|
|
184
190
|
const previousReportFile = qaReportFileFromGate(previousGate);
|
|
185
191
|
const reportFile = isQaReportFilename(previousReportFile) ? previousReportFile : qaReportFilename();
|
|
192
|
+
const uiRequired = qaUiRequired(contract.answers || {});
|
|
186
193
|
await writeTextAtomic(path.join(dir, reportFile), `# QA-LOOP Report\n\nMission: ${mission.id}\nMode: mock verification\n\nMock QA-LOOP completed. No live UI/API actions were executed.\n\n## Honest Mode\n\nThis is a mock smoke run for command verification, not production QA evidence.\n`);
|
|
187
|
-
await writeJsonAtomic(path.join(dir, 'qa-gate.json'), { ...defaultQaGate(contract, { reportFile }), passed:
|
|
194
|
+
await writeJsonAtomic(path.join(dir, 'qa-gate.json'), { ...defaultQaGate(contract, { reportFile }), passed: !uiRequired, qa_report_written: true, qa_ledger_complete: true, checklist_completed: true, safety_reviewed: true, credentials_not_persisted: true, ui_computer_use_evidence: !uiRequired, ui_evidence_source: uiRequired ? null : 'not_required', unresolved_findings: 0, unresolved_fixable_findings: 0, unsafe_or_deferred_findings: 0, post_fix_verification_complete: true, honest_mode_complete: true, evidence: ['mock QA-LOOP smoke completed'], notes: ['No live UI/API verification was claimed.'] });
|
|
188
195
|
return evaluateQaGate(dir);
|
|
189
196
|
}
|
|
190
197
|
|
|
@@ -196,7 +203,7 @@ TASK: ${mission.prompt}
|
|
|
196
203
|
CYCLE: ${cycle}
|
|
197
204
|
NO QUESTIONS: use decision-contract.json.
|
|
198
205
|
MODE: dogfood as human proxy; use real flows, fix safe code/test/docs now, then recheck.
|
|
199
|
-
UI:
|
|
206
|
+
UI: ${CODEX_COMPUTER_USE_ONLY_POLICY} Secrets runtime-only.
|
|
200
207
|
SAFETY: deployed read-only smoke; no destructive, billing, message, webhook, admin, bulk-write, global-config, or live-data edits unless contract allows.
|
|
201
208
|
GATE: passed=false while unresolved_findings or unresolved_fixable_findings > 0, or post_fix_verification_complete is not true.
|
|
202
209
|
ARTIFACTS: update qa-ledger.json, ${report}, qa-gate.json, and qa-loop/cycle-${cycle}/.
|
|
@@ -225,7 +232,7 @@ function qaChecklist(a) {
|
|
|
225
232
|
['preflight.roles', 'Map roles, permissions, protected areas.']
|
|
226
233
|
];
|
|
227
234
|
if (qaUiRequired(a)) cases.push(
|
|
228
|
-
['ui.computer_use_only',
|
|
235
|
+
['ui.computer_use_only', CODEX_COMPUTER_USE_ONLY_POLICY],
|
|
229
236
|
['ui.navigation', 'Check primary navigation, deep links, back/forward, refresh, and protected routes.'],
|
|
230
237
|
['ui.auth', 'Check login, logout, session expiry, unauthorized access, and role-specific visibility.'],
|
|
231
238
|
['ui.forms', 'Check required fields, validation, disabled states, success, and failure.'],
|
|
@@ -253,7 +260,7 @@ function qaChecklist(a) {
|
|
|
253
260
|
|
|
254
261
|
function qaReportTemplate(mission, contract, checklist) {
|
|
255
262
|
const a = contract.answers || {};
|
|
256
|
-
return `# QA-LOOP Report\n\nMission: ${mission.id}\nTarget: ${a.TARGET_BASE_URL || 'unset'}\nScope: ${a.QA_SCOPE || 'unset'}\nEnvironment: ${a.TARGET_ENVIRONMENT || 'unset'}\n\n## Safety\n\n- Deployed destructive tests: never\n- Credentials: temp-only, never saved\n- UI evidence:
|
|
263
|
+
return `# QA-LOOP Report\n\nMission: ${mission.id}\nTarget: ${a.TARGET_BASE_URL || 'unset'}\nScope: ${a.QA_SCOPE || 'unset'}\nEnvironment: ${a.TARGET_ENVIRONMENT || 'unset'}\n\n## Safety\n\n- Deployed destructive tests: never\n- Credentials: temp-only, never saved\n- UI evidence: ${CODEX_COMPUTER_USE_ONLY_POLICY}\n\n## Checklist\n\n${checklist.map((item) => `- [ ] ${item.id}: ${item.title}`).join('\n')}\n\n## Findings\n\nTBD\n\n## Corrections And Rechecks\n\nTBD\n\n## Honest Mode\n\nTBD\n`;
|
|
257
264
|
}
|
|
258
265
|
|
|
259
266
|
function positiveCount(value) {
|
package/src/core/questions.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { writeJsonAtomic, writeTextAtomic } from './fsx.mjs';
|
|
3
3
|
import { buildQaLoopQuestionSchema } from './qa-loop.mjs';
|
|
4
|
-
import { FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, hasFromChatImgSignal } from './routes.mjs';
|
|
4
|
+
import { CODEX_COMPUTER_USE_ONLY_POLICY, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, hasFromChatImgSignal } from './routes.mjs';
|
|
5
5
|
|
|
6
6
|
export function buildQuestionSchemaForRoute(route, prompt) {
|
|
7
7
|
if (String(route?.id || '') === 'QALoop') return buildQaLoopQuestionSchema(prompt);
|
|
@@ -96,7 +96,7 @@ export function inferAnswersForPrompt(prompt, explicitAnswers = {}) {
|
|
|
96
96
|
};
|
|
97
97
|
const criteria = {
|
|
98
98
|
version: [version ? `version refs are ${version}` : 'version refs advance consistently', 'publish:dry gate passes', 'npm publish is not run'],
|
|
99
|
-
chat_capture: ['From-Chat-IMG activates chat-image intake only here', 'all visible chat requirements are listed before implementation', `${FROM_CHAT_IMG_COVERAGE_ARTIFACT} maps every customer request, screenshot region, and attachment to work-order item(s)`, `${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} is updated as each request, image match, work item, scoped QA-LOOP, and verification step is completed`, `${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} records temporary TriWiki-backed session context with retention metadata`, `${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} proves QA-LOOP ran over the exact customer-request work-order range after implementation`, 'unresolved_items is empty before Team completion', 'scoped_qa_loop_completed is true with zero unresolved QA findings', 'Computer Use
|
|
99
|
+
chat_capture: ['From-Chat-IMG activates chat-image intake only here', 'all visible chat requirements are listed before implementation', `${FROM_CHAT_IMG_COVERAGE_ARTIFACT} maps every customer request, screenshot region, and attachment to work-order item(s)`, `${FROM_CHAT_IMG_CHECKLIST_ARTIFACT} is updated as each request, image match, work item, scoped QA-LOOP, and verification step is completed`, `${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} records temporary TriWiki-backed session context with retention metadata`, `${FROM_CHAT_IMG_QA_LOOP_ARTIFACT} proves QA-LOOP ran over the exact customer-request work-order range after implementation`, 'unresolved_items is empty before Team completion', 'scoped_qa_loop_completed is true with zero unresolved QA findings', 'Codex Computer Use visual inspection strengthens matches when available; no Playwright or browser automation substitute is allowed', CODEX_COMPUTER_USE_ONLY_POLICY, 'client requests follow normal SKS gates and verification'],
|
|
100
100
|
priority: ['strong feedback raises required_weight', 'request topics are counted in wiki packs', 'future inference uses priority signals'],
|
|
101
101
|
questions: ['predictable answers are inferred', 'partial answers can seal contracts', 'only unresolved changing slots remain visible'],
|
|
102
102
|
install: ['bootstrap/deps initialize readiness', 'missing runtime deps show repair actions', 'readiness output is concrete']
|
|
@@ -217,7 +217,7 @@ export function questionsMarkdown(schema) {
|
|
|
217
217
|
if (isQaLoop) {
|
|
218
218
|
lines.push('QA-LOOP는 이 질문들에 모두 답변하고 Decision Contract가 봉인된 뒤에만 실행됩니다.');
|
|
219
219
|
lines.push('로그인이 필요하면 테스트 전용 계정 정보만 임시 런타임 입력으로 제공해야 하며, answers.json/리포트/로그/wiki에는 절대 저장하지 않습니다.');
|
|
220
|
-
lines.push('UI 수준 E2E
|
|
220
|
+
lines.push('UI 수준 E2E와 시각 검증은 Codex Computer Use 증거가 없으면 검증 완료로 주장할 수 없습니다. Chrome MCP, Browser Use, Playwright, Selenium, Puppeteer, 기타 브라우저 자동화는 UI/브라우저 검증 증거로 인정하지 않습니다.');
|
|
221
221
|
lines.push('개발 서버가 아닌 배포/스테이징 도메인에서는 삭제성 테스트를 절대 실행하지 않습니다.');
|
|
222
222
|
} else {
|
|
223
223
|
lines.push('이 질문들에 모두 답변하고 Decision Contract가 봉인된 뒤에만 실행됩니다.');
|
package/src/core/routes.mjs
CHANGED
|
@@ -8,6 +8,19 @@ export const FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT = 'from-chat-img-temp-triwiki.j
|
|
|
8
8
|
export const FROM_CHAT_IMG_QA_LOOP_ARTIFACT = 'from-chat-img-qa-loop.json';
|
|
9
9
|
export const FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS = 5;
|
|
10
10
|
export const USAGE_TOPICS = 'install|setup|bootstrap|root|deps|cmux|auto-review|team|qa-loop|goal|research|db|codex-app|dfix|design|imagegen|dollar|context7|pipeline|reasoning|guard|conflicts|versioning|eval|harness|hproof|gx|wiki|code-structure';
|
|
11
|
+
export const CODEX_COMPUTER_USE_EVIDENCE_SOURCE = 'codex_computer_use';
|
|
12
|
+
export const CODEX_COMPUTER_USE_ONLY_POLICY = 'Pipeline UI/browser verification and visual inspection must use Codex Computer Use only. Do not use Playwright, Chrome MCP, Browser Use, Selenium, Puppeteer, or any other browser automation substitute; if Codex Computer Use is unavailable, mark the UI/browser evidence unverified instead of substituting another tool.';
|
|
13
|
+
export const FORBIDDEN_BROWSER_AUTOMATION_RE = /\b(playwright|chrome\s+mcp|browser\s+use|selenium|puppeteer)\b/i;
|
|
14
|
+
|
|
15
|
+
export function evidenceMentionsForbiddenBrowserAutomation(value, seen = new Set()) {
|
|
16
|
+
if (value == null) return false;
|
|
17
|
+
if (typeof value === 'string') return FORBIDDEN_BROWSER_AUTOMATION_RE.test(value);
|
|
18
|
+
if (typeof value !== 'object') return false;
|
|
19
|
+
if (seen.has(value)) return false;
|
|
20
|
+
seen.add(value);
|
|
21
|
+
if (Array.isArray(value)) return value.some((item) => evidenceMentionsForbiddenBrowserAutomation(item, seen));
|
|
22
|
+
return Object.values(value).some((item) => evidenceMentionsForbiddenBrowserAutomation(item, seen));
|
|
23
|
+
}
|
|
11
24
|
|
|
12
25
|
export const RECOMMENDED_MCP_SERVERS = [
|
|
13
26
|
{
|
|
@@ -110,7 +123,7 @@ export function triwikiStagePolicyText(commandPrefix = 'sks') {
|
|
|
110
123
|
}
|
|
111
124
|
|
|
112
125
|
export function chatCaptureIntakeText() {
|
|
113
|
-
return `From-Chat-IMG intake: explicit signal only. Select forensic visual effort. Treat uploads as chat screenshot plus originals, use Computer Use
|
|
126
|
+
return `From-Chat-IMG intake: explicit signal only. Select forensic visual effort. Treat uploads as chat screenshot plus originals, use Codex Computer Use visual inspection when available, list requirements first in source order, match regions to attachments with confidence, and write ${FROM_CHAT_IMG_WORK_ORDER_ARTIFACT}, ${FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT}, ${FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT}, ${FROM_CHAT_IMG_COVERAGE_ARTIFACT}, ${FROM_CHAT_IMG_CHECKLIST_ARTIFACT}, ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT}, and ${FROM_CHAT_IMG_QA_LOOP_ARTIFACT}. ${CODEX_COMPUTER_USE_ONLY_POLICY} Preserve each visible customer request as source-bound text, account for every screenshot image region and separate attachment, map each item to work-order actions, perform the customer-request work, then run a scoped QA-LOOP over that exact work-order range before Team completion. Update checklist checkboxes as work proceeds until all boxes are checked, unresolved_items is empty, scoped_qa_loop_completed=true, QA unresolved findings are zero, and schema validation passes. ${FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT} is temporary TriWiki-backed session context with expires_after_sessions=${FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS}, so it can be forgotten by retention after enough later sessions. Do not assume ordinary image prompts are chat captures.`;
|
|
114
127
|
}
|
|
115
128
|
|
|
116
129
|
export function noUnrequestedFallbackCodePolicyText() {
|