sneakoscope 3.1.7 → 3.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +9 -2
  2. package/crates/sks-core/Cargo.lock +1 -1
  3. package/crates/sks-core/Cargo.toml +1 -1
  4. package/crates/sks-core/src/main.rs +1 -1
  5. package/dist/.sks-build-stamp.json +4 -4
  6. package/dist/bin/sks.js +1 -1
  7. package/dist/cli/args.js +17 -0
  8. package/dist/cli/command-registry.js +16 -13
  9. package/dist/cli/router.js +8 -5
  10. package/dist/commands/doctor.js +97 -2
  11. package/dist/core/codex-app/codex-skill-sync.js +80 -154
  12. package/dist/core/codex-native/core-skill-integrity.js +89 -0
  13. package/dist/core/codex-native/core-skill-manifest.js +156 -0
  14. package/dist/core/codex-native/native-capability-postcheck.js +35 -0
  15. package/dist/core/codex-native/native-capability-repair-matrix.js +210 -0
  16. package/dist/core/codex-native/native-capability-repair.js +47 -0
  17. package/dist/core/codex-native/native-media-computer-repair.js +5 -0
  18. package/dist/core/codex-native/project-skill-dedupe.js +109 -0
  19. package/dist/core/codex-native/skill-name-canonicalizer.js +21 -0
  20. package/dist/core/codex-native/skill-registry-ledger.js +85 -0
  21. package/dist/core/commands/basic-cli.js +19 -10
  22. package/dist/core/commands/mad-sks-command.js +36 -13
  23. package/dist/core/commands/naruto-command.js +4 -1
  24. package/dist/core/commands/pipeline-command.js +3 -4
  25. package/dist/core/commands/qa-loop-command.js +36 -1
  26. package/dist/core/commands/research-command.js +61 -1
  27. package/dist/core/commands/team-command.js +63 -3
  28. package/dist/core/config/config-migration-journal.js +27 -0
  29. package/dist/core/config/managed-config-merge.js +105 -0
  30. package/dist/core/config/secret-preservation.js +169 -0
  31. package/dist/core/config/supabase-secret-preservation.js +29 -0
  32. package/dist/core/decision-contract.js +28 -4
  33. package/dist/core/doctor/command-alias-cleanup.js +64 -0
  34. package/dist/core/doctor/doctor-native-capability-repair.js +48 -0
  35. package/dist/core/feature-fixtures.js +2 -0
  36. package/dist/core/feature-registry.js +2 -2
  37. package/dist/core/fsx.js +1 -1
  38. package/dist/core/init.js +5 -1
  39. package/dist/core/naruto/naruto-work-graph.js +4 -1
  40. package/dist/core/pipeline-internals/runtime-core.js +50 -4
  41. package/dist/core/pipeline-internals/runtime-gates.js +10 -1
  42. package/dist/core/proof/route-proof-gate.js +1 -1
  43. package/dist/core/qa-loop.js +227 -11
  44. package/dist/core/questions.js +239 -2
  45. package/dist/core/routes.js +3 -4
  46. package/dist/core/version.js +1 -1
  47. package/dist/scripts/agent-native-release-gate.js +13 -4
  48. package/dist/scripts/sizecheck.js +8 -2
  49. package/dist/scripts/sks-3-1-8-check-lib.js +30 -0
  50. package/package.json +27 -1
@@ -3,7 +3,7 @@ import path from 'node:path';
3
3
  import { appendJsonl, exists, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic } from '../fsx.js';
4
4
  import { containsUserQuestion, noQuestionContinuationReason } from '../no-question-guard.js';
5
5
  import { createMission, missionDir, setCurrent } from '../mission.js';
6
- import { buildQuestionSchemaForRoute, writeQuestions } from '../questions.js';
6
+ import { buildQuestionSchemaForRoute, buildRequestIntake, REQUEST_INTAKE_ARTIFACT, writeQuestions } from '../questions.js';
7
7
  import { sealContract } from '../decision-contract.js';
8
8
  import { scanDbSafety } from '../db-safety.js';
9
9
  import { GOAL_WORKFLOW_ARTIFACT, writeGoalWorkflow } from '../goal-workflow.js';
@@ -81,6 +81,8 @@ function reflectionInstructionText(commandPrefix = 'sks') {
81
81
  export function buildPipelinePlan(input = {}) {
82
82
  const route = input.route || routePrompt(input.task || '$SKS');
83
83
  const task = String(input.task || '').trim();
84
+ const requestIntake = input.requestIntake || null;
85
+ const executionPrompt = String(requestIntake?.transformed_prompt || task || '').trim();
84
86
  const ambiguity = normalizeAmbiguity(input.ambiguity, route);
85
87
  const proof = normalizeProofField(input.proofField);
86
88
  const lane = selectPipelineLane(route, task, proof);
@@ -106,6 +108,18 @@ export function buildPipelinePlan(input = {}) {
106
108
  reflection_required: reflectionRequiredForRoute(route)
107
109
  },
108
110
  task,
111
+ request_intake: requestIntake ? {
112
+ artifact: REQUEST_INTAKE_ARTIFACT,
113
+ prompt_hash: requestIntake.prompt_hash || null,
114
+ interpreted_goal: requestIntake.interpreted_intent?.goal || null,
115
+ requirement_count: Array.isArray(requestIntake.requirements) ? requestIntake.requirements.length : 0,
116
+ transformed_prompt_available: Boolean(requestIntake.transformed_prompt),
117
+ wiki_context_used: requestIntake.wiki_context_used?.source || null
118
+ } : {
119
+ artifact: REQUEST_INTAKE_ARTIFACT,
120
+ status: 'not_attached'
121
+ },
122
+ execution_prompt: executionPrompt,
109
123
  ambiguity_gate: ambiguity,
110
124
  runtime_lane: lane,
111
125
  stages,
@@ -130,10 +144,37 @@ export function buildPipelinePlan(input = {}) {
130
144
  };
131
145
  }
132
146
  export async function writePipelinePlan(dir, input = {}) {
133
- const plan = buildPipelinePlan(input);
147
+ const requestIntake = input.requestIntake || await writeRequestIntakeArtifact(dir, input);
148
+ const plan = buildPipelinePlan({ ...input, requestIntake });
134
149
  await writeJsonAtomic(path.join(dir, PIPELINE_PLAN_ARTIFACT), plan);
135
150
  return plan;
136
151
  }
152
+ export async function writeRequestIntakeArtifact(dir, input = {}) {
153
+ const file = path.join(dir, REQUEST_INTAKE_ARTIFACT);
154
+ if (!input.requestIntake && !input.forceRequestIntakeRewrite) {
155
+ const existing = await readJson(file, null);
156
+ if (existing)
157
+ return existing;
158
+ }
159
+ const root = input.root || rootFromMissionDir(dir);
160
+ const wikiContext = input.wikiContext !== undefined
161
+ ? input.wikiContext
162
+ : await readJson(path.join(root, '.sneakoscope', 'wiki', 'context-pack.json'), null);
163
+ const intake = input.requestIntake || buildRequestIntake(input.task || '', {}, {
164
+ wikiContext,
165
+ route: input.route || null
166
+ });
167
+ await writeJsonAtomic(file, intake);
168
+ return intake;
169
+ }
170
+ function rootFromMissionDir(dir) {
171
+ const resolved = path.resolve(dir);
172
+ const parts = resolved.split(path.sep);
173
+ const idx = parts.lastIndexOf('.sneakoscope');
174
+ if (idx > 0)
175
+ return parts.slice(0, idx).join(path.sep) || path.sep;
176
+ return path.resolve(resolved, '..', '..', '..');
177
+ }
137
178
  export function validatePipelinePlan(plan = {}) {
138
179
  const issues = [];
139
180
  if (plan.schema_version !== PIPELINE_PLAN_SCHEMA_VERSION)
@@ -336,12 +377,13 @@ function planVerification(route, proof) {
336
377
  function planNextActions(route, task, ambiguity, lane, agentPolicy = normalizeAgentPolicy(route, task, {})) {
337
378
  if (ambiguity.required && !ambiguity.passed) {
338
379
  return [
380
+ `read ${REQUEST_INTAKE_ARTIFACT} and preserve its source-order requirements`,
339
381
  'auto-seal execution contract from inferred answers',
340
382
  ...(looksLikeProblemSolvingRequest(task) ? ['run Solution Scout web search for similar fixes before editing'] : []),
341
383
  'continue with decision-contract.json'
342
384
  ];
343
385
  }
344
- const actions = ['read pipeline-plan.json before work', 'execute kept stages only', 'run listed verification'];
386
+ const actions = [`read ${REQUEST_INTAKE_ARTIFACT} and use its transformed_prompt`, 'read pipeline-plan.json before work', 'execute kept stages only', 'run listed verification'];
345
387
  if (agentPolicy.required)
346
388
  actions.splice(1, 0, 'run sks agents run latest --json before implementation');
347
389
  if (!lane.fast_lane_allowed && routeRequiresSubagents(route, task)) {
@@ -375,6 +417,7 @@ export function promptPipelineContext(prompt, route = null) {
375
417
  'Hook visibility limit: hooks can inject context/status or block/continue a turn, but they cannot create arbitrary live chat bubbles; use team events, mission files, or normal assistant updates for live transcript details.',
376
418
  'Ambient Goal continuation: even without an explicit $Goal keyword, use Codex native /goal persistence when it helps keep long work resumable and complete; do not let it replace or skip the selected SKS route gates.',
377
419
  'Route contract: execution routes infer contract answers from the prompt, TriWiki/current-code defaults, and conservative SKS policy. DFix and Answer bypass stateful execution because they do not start implementation.',
420
+ `Wiki-informed request intake: when a mission exists, read ${REQUEST_INTAKE_ARTIFACT} before execution; preserve every source-order requirement, apply TriWiki attention/use_first and hydrate_first context, and execute request_intake.transformed_prompt through the selected route instead of relying on the vague original wording alone.`,
378
421
  'Plan-first interaction: when ambiguity questions are truly required, show the user only the missing human decision(s), then seal the decision contract internally and execute/verify.',
379
422
  'Question-shaped directive policy: before using Answer, decide whether a question is a real information request or an implicit instruction/complaint about broken behavior. Rhetorical bug reports, mandatory-policy statements, and "why is this not happening?" execution complaints must route to Naruto, not Answer.',
380
423
  'Best-practice prompt shape: extract Goal, Context, Constraints, and Done-when before implementation; keep questions compact and only ask for answers that can change scope, safety, user-facing behavior, or acceptance criteria.',
@@ -658,7 +701,8 @@ async function activePipelinePlanNote(root, state = {}) {
658
701
  const kept = plan.stage_summary?.kept ?? plan.kept_stages?.length ?? 0;
659
702
  const skipped = plan.stage_summary?.skipped ?? plan.skipped_stages?.length ?? 0;
660
703
  const next = Array.isArray(plan.next_actions) && plan.next_actions.length ? ` Next planned action: ${plan.next_actions[0]}.` : '';
661
- return ` Pipeline plan: .sneakoscope/missions/${state.mission_id}/${PIPELINE_PLAN_ARTIFACT} (${lane}; kept=${kept}, skipped=${skipped}).${next}`;
704
+ const intake = plan.request_intake?.artifact ? ` Request intake: .sneakoscope/missions/${state.mission_id}/${plan.request_intake.artifact}; execution prompt=${plan.request_intake.transformed_prompt_available ? 'available' : 'missing'}.` : '';
705
+ return ` Pipeline plan: .sneakoscope/missions/${state.mission_id}/${PIPELINE_PLAN_ARTIFACT} (${lane}; kept=${kept}, skipped=${skipped}).${intake}${next}`;
662
706
  }
663
707
  async function prepareGoal(root, route, task, required) {
664
708
  const { id, dir, mission } = await createMission(root, { mode: 'goal', prompt: task });
@@ -1150,6 +1194,8 @@ function routeContext(route, id, task, required, next) {
1150
1194
  ${route.command} route prepared.
1151
1195
  Mission: ${id}
1152
1196
  Task: ${visibleTask}
1197
+ Request intake: .sneakoscope/missions/${id}/${REQUEST_INTAKE_ARTIFACT}
1198
+ Execution prompt: request-intake.transformed_prompt
1153
1199
  Pipeline plan: .sneakoscope/missions/${id}/${PIPELINE_PLAN_ARTIFACT}
1154
1200
  Required skills: ${route.requiredSkills.join(', ')}
1155
1201
  Stop gate: ${route.stopGate}
@@ -4,6 +4,7 @@ import { appendJsonl, exists, nowIso, readJson, readText, writeJsonAtomic } from
4
4
  import { containsUserQuestion, noQuestionContinuationReason } from '../no-question-guard.js';
5
5
  import { missionDir } from '../mission.js';
6
6
  import { evaluateResearchGate } from '../research.js';
7
+ import { evaluateQaGate } from '../qa-loop.js';
7
8
  import { PPT_REQUIRED_GATE_FIELDS } from '../ppt.js';
8
9
  import { validateFinalHonestModeReport } from '../artifact-schemas.js';
9
10
  import { IMAGE_UX_REVIEW_GATE_ARTIFACT, IMAGE_UX_REVIEW_POLICY_ARTIFACT, IMAGE_UX_REVIEW_SCREEN_INVENTORY_ARTIFACT, IMAGE_UX_REVIEW_GENERATED_REVIEW_LEDGER_ARTIFACT, IMAGE_UX_REVIEW_ISSUE_LEDGER_ARTIFACT, IMAGE_UX_REVIEW_ITERATION_REPORT_ARTIFACT, IMAGE_UX_REVIEW_REQUIRED_GATE_FIELDS, IMAGE_UX_REVIEW_REFERENCE_GATE_FIELDS, IMAGE_UX_REVIEW_HONEST_MODE_ARTIFACT, imageUxReviewGateAllowsReferenceCloseout } from '../image-ux-review.js';
@@ -392,7 +393,9 @@ function missingRequiredGateFields(file, state, gate = {}) {
392
393
  if (file === 'qa-gate.json' || mode === 'QALOOP') {
393
394
  const required = ['clarification_contract_sealed', 'qa_report_written', 'qa_ledger_complete', 'checklist_completed', 'safety_reviewed', 'deployed_destructive_tests_blocked', 'credentials_not_persisted', 'honest_mode_complete'];
394
395
  if (gate.ui_e2e_required === true)
395
- required.push('chrome_extension_preflight_passed', 'ui_chrome_extension_evidence');
396
+ required.push('chrome_extension_preflight_passed', 'ui_chrome_extension_evidence', 'ui_chrome_extension_screenshot_captured');
397
+ if (gate.gpt_image_2_annotated_review_required === true)
398
+ required.push('gpt_image_2_annotated_review_generated');
396
399
  return required.filter((key) => gate[key] !== true);
397
400
  }
398
401
  if (file === 'ppt-gate.json' || mode === 'PPT') {
@@ -421,6 +424,12 @@ async function missingRequiredGateArtifacts(root, file, state, gate = {}) {
421
424
  return [];
422
425
  return (evaluated.reasons || ['research_gate_blocked']).map((reason) => `research-gate:${reason}`);
423
426
  }
427
+ if (file === 'qa-gate.json' || mode === 'QALOOP') {
428
+ const evaluated = await evaluateQaGate(missionDir(root, state.mission_id));
429
+ if (evaluated.passed === true)
430
+ return [];
431
+ return (evaluated.reasons || ['qa_gate_blocked']).map((reason) => `qa-gate:${reason}`);
432
+ }
424
433
  if (file === IMAGE_UX_REVIEW_GATE_ARTIFACT || mode === 'IMAGE_UX_REVIEW')
425
434
  return missingImageUxReviewArtifacts(root, state, gate);
426
435
  if (file === 'naruto-gate.json' || mode === 'NARUTO')
@@ -41,7 +41,7 @@ export async function validateRouteCompletionProof(root, { missionId = null, rou
41
41
  if (agentCount < 5)
42
42
  issues.push('agent_count_below_5');
43
43
  if (agentCount > maxAgentCount)
44
- issues.push(`agent_count_above_${maxAgentCount}`);
44
+ issues.push(normalizedRoute === '$Naruto' ? 'agent_count_above_100' : 'agent_count_above_20');
45
45
  if (agents.all_sessions_closed !== true)
46
46
  issues.push('agent_sessions_not_closed');
47
47
  if (agents.no_overlap_ok !== true)
@@ -1,12 +1,16 @@
1
1
  import path from 'node:path';
2
2
  import { exists, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, PACKAGE_VERSION } from './fsx.js';
3
- import { CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE, CODEX_WEB_VERIFICATION_POLICY, evidenceMentionsForbiddenBrowserAutomation, evidenceMentionsForbiddenWebComputerUseEvidence } from './routes.js';
3
+ import { CODEX_APP_IMAGE_GENERATION_DOC_URL, CODEX_IMAGEGEN_REQUIRED_POLICY, CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE, CODEX_WEB_VERIFICATION_POLICY, evidenceMentionsForbiddenBrowserAutomation, evidenceMentionsForbiddenWebComputerUseEvidence } from './routes.js';
4
4
  import { appendAgentLedgerEvent, initializeAgentCentralLedger } from './agents/agent-central-ledger.js';
5
5
  import { resolveCodexAppExecutionProfile } from './codex-app/codex-app-execution-profile.js';
6
6
  import { resolveCodexNativeInvocationPlan } from './codex-native/codex-native-invocation-router.js';
7
+ import { imageDimensions, sha256File } from './wiki-image/image-hash.js';
7
8
  export const QA_LOOP_ROUTE = 'QALoop';
9
+ export const QA_LOOP_VISUAL_EVIDENCE_ARTIFACT = 'qa-loop/visual-evidence.json';
8
10
  const QA_REPORT_SUFFIX = 'qa-report.md';
9
11
  const UI_CHROME_EXTENSION_FIRST_ACK = 'use_codex_chrome_extension_first_no_computer_use_for_web_ui_or_mark_unverified';
12
+ const GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK = 'yes_gpt_image_2_annotated_review';
13
+ const IMAGE_FILE_RE = /\.(png|jpe?g|webp|gif)$/i;
10
14
  export const QA_NATIVE_AGENT_PERSONAS = Object.freeze([
11
15
  {
12
16
  id: 'qa_verifier_ui',
@@ -108,6 +112,9 @@ function promptText(prompt = '') {
108
112
  function lowerPrompt(prompt = '') {
109
113
  return promptText(prompt).toLowerCase();
110
114
  }
115
+ function qaPromptWantsGptImage2AnnotatedReview(prompt = '') {
116
+ return /(gpt-image-2|gpt\s*image\s*2|imagegen|\$imagegen|annotated\s+review|annotated\s+image|callout|generated\s+review\s+image|이미지\s*리뷰|생성\s*이미지|주석\s*이미지|콜아웃)/i.test(promptText(prompt));
117
+ }
111
118
  function firstUrl(prompt = '') {
112
119
  return promptText(prompt).match(/https?:\/\/[^\s)\]}>,]+/i)?.[0] || '';
113
120
  }
@@ -152,6 +159,16 @@ export function inferQaLoopAnswers(prompt = '') {
152
159
  const local = environment === 'local_dev_server';
153
160
  const login = loginPolicyFromPrompt(text);
154
161
  const scope = qaScopeFromPrompt(text);
162
+ const wantsGptImage2Review = isUiScope(scope) && qaPromptWantsGptImage2AnnotatedReview(text);
163
+ const acceptance = [
164
+ '앱 첫 화면 또는 지정된 대상이 정상 로드된다.',
165
+ '주요 내비게이션과 핵심 화면 진입에서 콘솔/화면상 치명 오류가 없다.',
166
+ '검증하지 못한 UI/API 범위는 통과로 주장하지 않고 QA 리포트에 남긴다.'
167
+ ];
168
+ if (isUiScope(scope))
169
+ acceptance.push('UI E2E 통과 증거는 실제 Codex Chrome Extension screenshot artifact path와 sha256을 기록해야 한다.');
170
+ if (wantsGptImage2Review)
171
+ acceptance.push('gpt-image-2 annotated review image가 필요한 경우 실제 Codex App $imagegen/gpt-image-2 출력 파일 path, sha256, model, provider를 기록해야 한다.');
155
172
  return {
156
173
  GOAL_PRECISE: text ? `현재 요청 범위에서 QA-LOOP를 안전하게 실행한다: ${text}` : '현재 로컬 개발 환경에서 핵심 사용자 흐름을 안전하게 QA한다.',
157
174
  QA_SCOPE: scope,
@@ -165,13 +182,10 @@ export function inferQaLoopAnswers(prompt = '') {
165
182
  ...login,
166
183
  CREDENTIAL_STORAGE_ACK: 'never_store_credentials_in_artifacts_or_wiki',
167
184
  UI_CHROME_EXTENSION_ACK: UI_CHROME_EXTENSION_FIRST_ACK,
185
+ QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED: wantsGptImage2Review ? GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK : 'not_required',
168
186
  TEAM_MODE_ALLOWED: 'no_parent_only',
169
187
  MAX_QA_CYCLES: '1',
170
- ACCEPTANCE_CRITERIA: [
171
- '앱 첫 화면 또는 지정된 대상이 정상 로드된다.',
172
- '주요 내비게이션과 핵심 화면 진입에서 콘솔/화면상 치명 오류가 없다.',
173
- '검증하지 못한 UI/API 범위는 통과로 주장하지 않고 QA 리포트에 남긴다.'
174
- ],
188
+ ACCEPTANCE_CRITERIA: acceptance,
175
189
  NON_GOALS: [
176
190
  '결제, 실제 이메일/SMS 발송, 관리자 권한 변경, 데이터 삭제, 프로덕션 데이터 변경은 테스트하지 않는다.'
177
191
  ],
@@ -290,10 +304,22 @@ export function qaUiRequired(a = {}) {
290
304
  export function qaApiRequired(a = {}) {
291
305
  return a.QA_SCOPE === 'all_available' ? hasApiTarget(a) : isApiScope(a.QA_SCOPE);
292
306
  }
307
+ export function qaGptImage2AnnotatedReviewRequired(contractOrAnswers = {}, prompt = '') {
308
+ const answers = contractOrAnswers?.answers || contractOrAnswers || {};
309
+ if (!qaUiRequired(answers))
310
+ return false;
311
+ const explicit = String(answers.QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED || answers.GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED || '').trim();
312
+ if (/^(yes|true|required|yes_gpt_image_2_annotated_review)$/i.test(explicit))
313
+ return true;
314
+ if (/^(no|false|not_required|none)$/i.test(explicit))
315
+ return false;
316
+ return qaPromptWantsGptImage2AnnotatedReview(`${prompt || ''}\n${answers.GOAL_PRECISE || ''}\n${JSON.stringify(answers.ACCEPTANCE_CRITERIA || [])}`);
317
+ }
293
318
  export function defaultQaGate(contract = {}, opts = {}) {
294
319
  const a = contract.answers || {};
295
320
  const uiRequired = qaUiRequired(a);
296
321
  const apiRequired = qaApiRequired(a);
322
+ const gptImage2ReviewRequired = qaGptImage2AnnotatedReviewRequired(contract, contract.prompt);
297
323
  const reportFile = opts.reportFile || qaReportFilename();
298
324
  const corrective = a.QA_CORRECTIVE_POLICY !== 'report_only_no_code_changes';
299
325
  return {
@@ -311,6 +337,17 @@ export function defaultQaGate(contract = {}, opts = {}) {
311
337
  ui_chrome_extension_evidence: !uiRequired,
312
338
  ui_computer_use_evidence: false,
313
339
  ui_evidence_source: uiRequired ? null : 'not_required',
340
+ ui_chrome_extension_screenshot_required: uiRequired,
341
+ ui_chrome_extension_screenshot_captured: !uiRequired,
342
+ ui_chrome_extension_screenshot_artifact: null,
343
+ ui_chrome_extension_screenshot_sha256: null,
344
+ gpt_image_2_annotated_review_required: gptImage2ReviewRequired,
345
+ gpt_image_2_annotated_review_generated: !gptImage2ReviewRequired,
346
+ gpt_image_2_annotated_review_artifact: null,
347
+ gpt_image_2_annotated_review_sha256: null,
348
+ gpt_image_2_annotated_review_model: gptImage2ReviewRequired ? null : 'not_required',
349
+ gpt_image_2_annotated_review_provider: gptImage2ReviewRequired ? null : 'not_required',
350
+ qa_visual_evidence_artifact: QA_LOOP_VISUAL_EVIDENCE_ARTIFACT,
314
351
  desktop_app_handoff_required: false,
315
352
  desktop_app_handoff_status: 'not_requested',
316
353
  desktop_app_handoff_artifact: null,
@@ -360,13 +397,48 @@ export async function writeQaLoopArtifacts(dir, mission, contract) {
360
397
  codex_app_execution_profile: executionProfile ? compactExecutionProfile(executionProfile) : null,
361
398
  codex_native_invocation: codexNativeInvocation,
362
399
  target: { scope: a.QA_SCOPE, environment: a.TARGET_ENVIRONMENT, base_url: a.TARGET_BASE_URL, api_base_url: a.API_BASE_URL },
363
- safety: { mutation_policy: a.QA_MUTATION_POLICY, deployed_destructive_tests_allowed: 'never', credentials: 'temp_only_never_saved', ui_evidence: 'codex_chrome_extension_first_required_for_web_ui_e2e' },
400
+ safety: { mutation_policy: a.QA_MUTATION_POLICY, deployed_destructive_tests_allowed: 'never', credentials: 'temp_only_never_saved', ui_evidence: 'codex_chrome_extension_first_required_for_web_ui_e2e', visual_review: 'gpt_image_2_annotated_review_required_when_contract_requests_it' },
364
401
  checklist
365
402
  });
403
+ await writeJsonAtomic(path.join(dir, QA_LOOP_VISUAL_EVIDENCE_ARTIFACT), buildQaLoopVisualEvidenceArtifact(mission, contract));
366
404
  await writeJsonAtomic(path.join(dir, 'qa-gate.json'), defaultQaGate(contract, { reportFile, executionProfile, codexNativeInvocation }));
367
405
  await writeTextAtomic(path.join(dir, reportFile), qaReportTemplate(mission, contract, checklist));
368
406
  return { checklist_count: checklist.length, report_file: reportFile };
369
407
  }
408
+ export async function ensureQaLoopVisualEvidenceContract(dir, mission = {}, contract = {}) {
409
+ const visualPath = path.join(dir, QA_LOOP_VISUAL_EVIDENCE_ARTIFACT);
410
+ if (!(await exists(visualPath))) {
411
+ await writeJsonAtomic(visualPath, buildQaLoopVisualEvidenceArtifact(mission, contract));
412
+ }
413
+ const gatePath = path.join(dir, 'qa-gate.json');
414
+ const gate = await readJson(gatePath, null);
415
+ if (!gate)
416
+ return;
417
+ const defaults = defaultQaGate(contract, { reportFile: qaReportFileFromGate(gate) || qaReportFilename() });
418
+ const keys = [
419
+ 'ui_chrome_extension_screenshot_required',
420
+ 'ui_chrome_extension_screenshot_captured',
421
+ 'ui_chrome_extension_screenshot_artifact',
422
+ 'ui_chrome_extension_screenshot_sha256',
423
+ 'gpt_image_2_annotated_review_required',
424
+ 'gpt_image_2_annotated_review_generated',
425
+ 'gpt_image_2_annotated_review_artifact',
426
+ 'gpt_image_2_annotated_review_sha256',
427
+ 'gpt_image_2_annotated_review_model',
428
+ 'gpt_image_2_annotated_review_provider',
429
+ 'qa_visual_evidence_artifact'
430
+ ];
431
+ const next = { ...gate };
432
+ let changed = false;
433
+ for (const key of keys) {
434
+ if (next[key] === undefined) {
435
+ next[key] = defaults[key];
436
+ changed = true;
437
+ }
438
+ }
439
+ if (changed)
440
+ await writeJsonAtomic(gatePath, next);
441
+ }
370
442
  export async function evaluateQaGate(dir) {
371
443
  const gate = await readJson(path.join(dir, 'qa-gate.json'), {});
372
444
  const reportFile = qaReportFileFromGate(gate);
@@ -400,6 +472,10 @@ export async function evaluateQaGate(dir) {
400
472
  reasons.push('forbidden_browser_automation_evidence');
401
473
  if (evidenceMentionsForbiddenWebComputerUseEvidence({ evidence: gate.evidence, ui_evidence_source: gate.ui_evidence_source }))
402
474
  reasons.push('computer_use_web_evidence_forbidden');
475
+ reasons.push(...await missingQaLoopVisualEvidence(dir, gate));
476
+ }
477
+ else if (gate.gpt_image_2_annotated_review_required === true) {
478
+ reasons.push(...await missingQaLoopVisualEvidence(dir, gate));
403
479
  }
404
480
  if (gate.desktop_app_handoff_required === true) {
405
481
  if (!['pending', 'launched_pending_confirmation', 'completed'].includes(String(gate.desktop_app_handoff_status || '')))
@@ -424,8 +500,9 @@ export async function evaluateQaGate(dir) {
424
500
  reasons.push('qa_report_missing');
425
501
  if (!(await exists(path.join(dir, 'qa-ledger.json'))))
426
502
  reasons.push('qa_ledger_missing');
427
- const passed = gate.passed === true && reasons.length === 0;
428
- const result = { checked_at: nowIso(), passed, reasons, gate };
503
+ const uniqueReasons = [...new Set(reasons)];
504
+ const passed = gate.passed === true && uniqueReasons.length === 0;
505
+ const result = { checked_at: nowIso(), passed, reasons: uniqueReasons, gate };
429
506
  await writeJsonAtomic(path.join(dir, 'qa-gate.evaluated.json'), result);
430
507
  return result;
431
508
  }
@@ -514,12 +591,19 @@ ARTIFACTS: update qa-ledger.json, ${report}, qa-gate.json, and qa-loop/cycle-${c
514
591
  CONTRACT:
515
592
  ${JSON.stringify(contract, null, 2)}
516
593
  ${imageContractText}${appHandoffText}${executionProfileText}
594
+ VISUAL EVIDENCE CONTRACT:
595
+ - For web UI QA, do not set chrome_extension_preflight_passed/ui_chrome_extension_evidence to true unless the Codex Chrome Extension path is ready and ${QA_LOOP_VISUAL_EVIDENCE_ARTIFACT} records a real saved Chrome Extension screenshot artifact with path, sha256, and dimensions.
596
+ - If decision-contract.json answers set QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED=${GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK}, use Codex App $imagegen/gpt-image-2 (${CODEX_APP_IMAGE_GENERATION_DOC_URL}) to produce a real generated annotated review image from the Chrome Extension screenshot. Record its path, sha256, model=gpt-image-2, provider=Codex App $imagegen, and source_screenshot_artifact in ${QA_LOOP_VISUAL_EVIDENCE_ARTIFACT} and qa-gate.json.
597
+ - Do not substitute prose-only critique, Playwright/Selenium/Puppeteer/Browser Use screenshots, Computer Use browser screenshots, placeholder images, fake fixtures, or direct API fallback as full web UI visual evidence.
517
598
  Previous tail:
518
599
  ${String(previous || '').slice(-2500)}
519
600
  `;
520
601
  }
521
602
  export async function qaStatus(dir) {
522
- const gate = await readJson(path.join(dir, 'qa-gate.evaluated.json'), await readJson(path.join(dir, 'qa-gate.json'), null));
603
+ const mission = await readJson(path.join(dir, 'mission.json'), {});
604
+ const contract = await readJson(path.join(dir, 'decision-contract.json'), { prompt: mission.prompt, answers: {}, sealed_hash: null });
605
+ await ensureQaLoopVisualEvidenceContract(dir, mission, contract).catch(() => undefined);
606
+ const gate = await evaluateQaGate(dir).catch(async () => await readJson(path.join(dir, 'qa-gate.evaluated.json'), await readJson(path.join(dir, 'qa-gate.json'), null)));
523
607
  const ledger = await readJson(path.join(dir, 'qa-ledger.json'), null);
524
608
  const appHandoff = await readJson(path.join(dir, 'qa-loop', 'app-handoff.json'), null);
525
609
  const appConfirmation = await readJson(path.join(dir, 'qa-loop', 'app-handoff-confirmation.json'), null);
@@ -545,6 +629,138 @@ function qaChecklist(a) {
545
629
  cases.push(['report.evidence', 'Record pass/fail/blocked/skipped with evidence.'], ['report.corrective_loop', 'Record fixes, rechecks, unresolved findings, deferred blockers.'], ['report.honest', 'Run Honest Mode.']);
546
630
  return cases.map(([id, title]) => ({ id, title, status: 'pending', evidence: [] }));
547
631
  }
632
+ export function buildQaLoopVisualEvidenceArtifact(mission = {}, contract = {}) {
633
+ const answers = contract.answers || {};
634
+ const uiRequired = qaUiRequired(answers);
635
+ const gptImage2ReviewRequired = qaGptImage2AnnotatedReviewRequired(contract, contract.prompt || mission.prompt);
636
+ return {
637
+ schema: 'sks.qa-loop-visual-evidence.v1',
638
+ generated_at: nowIso(),
639
+ mission_id: mission.id || contract.mission_id || null,
640
+ contract_hash: contract.sealed_hash || null,
641
+ required: uiRequired || gptImage2ReviewRequired,
642
+ chrome_extension_screenshot: {
643
+ required: uiRequired,
644
+ status: uiRequired ? 'pending' : 'not_required',
645
+ evidence_source: CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE,
646
+ artifact_path: null,
647
+ sha256: null,
648
+ width: null,
649
+ height: null,
650
+ privacy: 'local-only'
651
+ },
652
+ gpt_image_2_annotated_review: {
653
+ required: gptImage2ReviewRequired,
654
+ status: gptImage2ReviewRequired ? 'pending' : 'not_required',
655
+ model: gptImage2ReviewRequired ? 'gpt-image-2' : 'not_required',
656
+ provider: gptImage2ReviewRequired ? 'Codex App $imagegen' : 'not_required',
657
+ source_screenshot_artifact: null,
658
+ artifact_path: null,
659
+ sha256: null,
660
+ width: null,
661
+ height: null,
662
+ required_output: gptImage2ReviewRequired ? 'generated_annotated_review_image_with_numbered_callouts_severity_labels_and_visual_marks' : 'not_required',
663
+ docs_url: CODEX_APP_IMAGE_GENERATION_DOC_URL,
664
+ privacy: 'local-only'
665
+ },
666
+ blockers: uiRequired ? ['chrome_extension_screenshot_missing'] : [],
667
+ notes: [
668
+ 'QA-LOOP web visual evidence must be backed by real saved local image files.',
669
+ CODEX_WEB_VERIFICATION_POLICY,
670
+ CODEX_IMAGEGEN_REQUIRED_POLICY
671
+ ]
672
+ };
673
+ }
674
+ async function missingQaLoopVisualEvidence(dir, gate = {}) {
675
+ const visual = await readJson(path.join(dir, QA_LOOP_VISUAL_EVIDENCE_ARTIFACT), null);
676
+ const reasons = [];
677
+ const uiRequired = gate.ui_e2e_required === true;
678
+ if (uiRequired) {
679
+ const screenshot = visual?.chrome_extension_screenshot || {};
680
+ if (gate.ui_chrome_extension_screenshot_captured !== true && !positiveVisualStatus(screenshot.status, ['captured', 'attached', 'verified']))
681
+ reasons.push('ui_chrome_extension_screenshot_missing');
682
+ const screenshotPath = firstNonEmpty(gate.ui_chrome_extension_screenshot_artifact, gate.chrome_extension_screenshot_artifact, gate.ui_chrome_extension_screenshot?.path, gate.chrome_extension_screenshot?.path, screenshot.artifact_path, screenshot.path);
683
+ const screenshotSha = firstNonEmpty(gate.ui_chrome_extension_screenshot_sha256, gate.chrome_extension_screenshot_sha256, gate.ui_chrome_extension_screenshot?.sha256, gate.chrome_extension_screenshot?.sha256, screenshot.sha256);
684
+ const screenshotDims = {
685
+ width: firstNonEmpty(gate.ui_chrome_extension_screenshot_width, gate.ui_chrome_extension_screenshot?.width, gate.chrome_extension_screenshot?.width, screenshot.width),
686
+ height: firstNonEmpty(gate.ui_chrome_extension_screenshot_height, gate.ui_chrome_extension_screenshot?.height, gate.chrome_extension_screenshot?.height, screenshot.height)
687
+ };
688
+ if (!screenshotPath)
689
+ reasons.push('ui_chrome_extension_screenshot_artifact_missing');
690
+ else
691
+ reasons.push(...await imageEvidenceFileReasons(dir, screenshotPath, screenshotSha, 'ui_chrome_extension_screenshot', screenshotDims));
692
+ const screenshotSource = firstNonEmpty(gate.ui_chrome_extension_screenshot_source, screenshot.evidence_source, gate.ui_evidence_source);
693
+ if (screenshotSource !== CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE)
694
+ reasons.push('ui_chrome_extension_screenshot_source_not_codex_chrome_extension');
695
+ }
696
+ const review = visual?.gpt_image_2_annotated_review || {};
697
+ const gptImage2ReviewRequired = gate.gpt_image_2_annotated_review_required === true || review.required === true;
698
+ if (gptImage2ReviewRequired) {
699
+ if (gate.gpt_image_2_annotated_review_generated !== true && !positiveVisualStatus(review.status, ['generated', 'attached', 'verified']))
700
+ reasons.push('gpt_image_2_annotated_review_image_missing');
701
+ const reviewPath = firstNonEmpty(gate.gpt_image_2_annotated_review_artifact, gate.imagegen_annotated_review_artifact, gate.gpt_image_2_annotated_review?.path, gate.gpt_image_2_annotated_review_image?.path, review.artifact_path, review.path);
702
+ const reviewSha = firstNonEmpty(gate.gpt_image_2_annotated_review_sha256, gate.gpt_image_2_annotated_review?.sha256, gate.gpt_image_2_annotated_review_image?.sha256, review.sha256);
703
+ const reviewDims = {
704
+ width: firstNonEmpty(gate.gpt_image_2_annotated_review_width, gate.gpt_image_2_annotated_review?.width, gate.gpt_image_2_annotated_review_image?.width, review.width),
705
+ height: firstNonEmpty(gate.gpt_image_2_annotated_review_height, gate.gpt_image_2_annotated_review?.height, gate.gpt_image_2_annotated_review_image?.height, review.height)
706
+ };
707
+ if (!reviewPath)
708
+ reasons.push('gpt_image_2_annotated_review_artifact_missing');
709
+ else
710
+ reasons.push(...await imageEvidenceFileReasons(dir, reviewPath, reviewSha, 'gpt_image_2_annotated_review', reviewDims));
711
+ const model = firstNonEmpty(gate.gpt_image_2_annotated_review_model, gate.gpt_image_2_annotated_review?.model, gate.gpt_image_2_annotated_review_image?.model, review.model, review.provider?.model);
712
+ if (model !== 'gpt-image-2')
713
+ reasons.push('gpt_image_2_annotated_review_model_missing');
714
+ const provider = firstNonEmpty(gate.gpt_image_2_annotated_review_provider, gate.gpt_image_2_annotated_review?.provider, gate.gpt_image_2_annotated_review_image?.provider, review.provider, review.provider_surface);
715
+ if (!provider || !/codex\s+app|\$imagegen|codex_app_imagegen/i.test(String(provider)))
716
+ reasons.push('gpt_image_2_annotated_review_provider_not_codex_app_imagegen');
717
+ if (/mock|fake|fixture|placeholder|text[-_ ]?only|direct\s+api|openai_images_api|responses_image_generation/i.test(String(provider)))
718
+ reasons.push('gpt_image_2_annotated_review_provider_forbidden');
719
+ const sourceScreenshot = firstNonEmpty(gate.gpt_image_2_source_screenshot_artifact, gate.gpt_image_2_annotated_review?.source_screenshot_artifact, gate.gpt_image_2_annotated_review_image?.source_screenshot_artifact, review.source_screenshot_artifact, gate.ui_chrome_extension_screenshot_artifact);
720
+ if (!sourceScreenshot)
721
+ reasons.push('gpt_image_2_source_screenshot_artifact_missing');
722
+ }
723
+ return [...new Set(reasons)];
724
+ }
725
+ function positiveVisualStatus(status, accepted) {
726
+ return accepted.includes(String(status || '').trim().toLowerCase());
727
+ }
728
+ function firstNonEmpty(...values) {
729
+ for (const value of values) {
730
+ if (typeof value === 'string' && value.trim())
731
+ return value.trim();
732
+ if (value && typeof value !== 'string')
733
+ return value;
734
+ }
735
+ return null;
736
+ }
737
+ async function imageEvidenceFileReasons(dir, artifactPath, declaredSha, prefix, declaredDims = {}) {
738
+ const reasons = [];
739
+ const resolved = resolveEvidencePath(dir, artifactPath);
740
+ if (!resolved)
741
+ return [`${prefix}_artifact_path_invalid`];
742
+ if (!IMAGE_FILE_RE.test(resolved))
743
+ reasons.push(`${prefix}_artifact_not_image_file`);
744
+ if (!(await exists(resolved)))
745
+ return [...reasons, `${prefix}_artifact_file_missing`];
746
+ const sha = await sha256File(resolved).catch(() => null);
747
+ if (!declaredSha)
748
+ reasons.push(`${prefix}_sha256_missing`);
749
+ else if (sha && String(declaredSha) !== sha)
750
+ reasons.push(`${prefix}_sha256_mismatch`);
751
+ const dims = await imageDimensions(resolved).catch(() => null);
752
+ const width = Number(dims?.width ?? declaredDims?.width);
753
+ const height = Number(dims?.height ?? declaredDims?.height);
754
+ if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0)
755
+ reasons.push(`${prefix}_dimensions_missing`);
756
+ return reasons;
757
+ }
758
+ function resolveEvidencePath(dir, artifactPath) {
759
+ const value = String(artifactPath || '').trim().replace(/^file:\/\//i, '');
760
+ if (!value || /^https?:\/\//i.test(value))
761
+ return null;
762
+ return path.isAbsolute(value) ? value : path.resolve(dir, value);
763
+ }
548
764
  function missionRootFromDir(dir) {
549
765
  const normalized = path.resolve(String(dir || ''));
550
766
  const marker = `${path.sep}.sneakoscope${path.sep}missions${path.sep}`;
@@ -564,7 +780,7 @@ function compactExecutionProfile(profile) {
564
780
  }
565
781
  function qaReportTemplate(mission, contract, checklist) {
566
782
  const a = contract.answers || {};
567
- return `# QA-LOOP Report\n\nMission: ${mission.id}\nTarget: ${a.TARGET_BASE_URL || 'unset'}\nScope: ${a.QA_SCOPE || 'unset'}\nEnvironment: ${a.TARGET_ENVIRONMENT || 'unset'}\n\n## Safety\n\n- Deployed destructive tests: never\n- Credentials: temp-only, never saved\n- UI evidence: ${CODEX_WEB_VERIFICATION_POLICY}\n\n## Checklist\n\n${checklist.map((item) => `- [ ] ${item.id}: ${item.title}`).join('\n')}\n\n## Findings\n\nTBD\n\n## Corrections And Rechecks\n\nTBD\n\n## Honest Mode\n\nTBD\n`;
783
+ return `# QA-LOOP Report\n\nMission: ${mission.id}\nTarget: ${a.TARGET_BASE_URL || 'unset'}\nScope: ${a.QA_SCOPE || 'unset'}\nEnvironment: ${a.TARGET_ENVIRONMENT || 'unset'}\n\n## Safety\n\n- Deployed destructive tests: never\n- Credentials: temp-only, never saved\n- UI evidence: ${CODEX_WEB_VERIFICATION_POLICY}\n- Visual evidence ledger: ${QA_LOOP_VISUAL_EVIDENCE_ARTIFACT}\n\n## Checklist\n\n${checklist.map((item) => `- [ ] ${item.id}: ${item.title}`).join('\n')}\n\n## Findings\n\nTBD\n\n## Corrections And Rechecks\n\nTBD\n\n## Honest Mode\n\nTBD\n`;
568
784
  }
569
785
  function positiveCount(value) {
570
786
  const n = Number(value || 0);