sneakoscope 4.1.1 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/README.md +13 -10
  2. package/crates/sks-core/Cargo.lock +1 -1
  3. package/crates/sks-core/Cargo.toml +1 -1
  4. package/crates/sks-core/src/main.rs +1 -1
  5. package/dist/bin/sks.js +1 -1
  6. package/dist/cli/command-registry.js +1 -1
  7. package/dist/core/auto-review.js +1 -1
  8. package/dist/core/codex-control/codex-app-server-v2-client.js +86 -2
  9. package/dist/core/codex-control/codex-reliability-shield.js +26 -5
  10. package/dist/core/codex-control/codex-task-runner.js +7 -1
  11. package/dist/core/codex-control/model-call-concurrency.js +1 -1
  12. package/dist/core/commands/mad-db-command.js +146 -51
  13. package/dist/core/commands/mad-sks-command.js +15 -31
  14. package/dist/core/commands/qa-loop-command.js +23 -7
  15. package/dist/core/db-safety.js +35 -37
  16. package/dist/core/doctor/supabase-mcp-repair.js +2 -2
  17. package/dist/core/feature-registry.js +1 -1
  18. package/dist/core/fsx.js +1 -1
  19. package/dist/core/hooks-runtime.js +1 -1
  20. package/dist/core/init.js +5 -4
  21. package/dist/core/mad-db/mad-db-capability.js +203 -74
  22. package/dist/core/mad-db/mad-db-coordinator.js +287 -0
  23. package/dist/core/mad-db/mad-db-executor.js +156 -0
  24. package/dist/core/mad-db/mad-db-ledger.js +1 -1
  25. package/dist/core/mad-db/mad-db-lock.js +40 -0
  26. package/dist/core/mad-db/mad-db-operation-store.js +140 -0
  27. package/dist/core/mad-db/mad-db-policy-resolver.js +42 -22
  28. package/dist/core/mad-db/mad-db-policy.js +195 -0
  29. package/dist/core/mad-db/mad-db-postconditions.js +30 -0
  30. package/dist/core/mad-db/mad-db-recovery.js +27 -0
  31. package/dist/core/mad-db/mad-db-result-lifecycle.js +31 -102
  32. package/dist/core/mad-db/mad-db-runtime-profile.js +121 -0
  33. package/dist/core/mad-db/mad-db-target.js +64 -0
  34. package/dist/core/managed-assets/managed-assets-manifest.js +1 -1
  35. package/dist/core/pipeline-internals/runtime-core.js +40 -0
  36. package/dist/core/providers/glm/bench/glm-benchmark-types.js +1 -1
  37. package/dist/core/qa-loop/qa-app-server-driver.js +134 -0
  38. package/dist/core/qa-loop/qa-contract-v2.js +231 -0
  39. package/dist/core/qa-loop/qa-gate-v2.js +132 -0
  40. package/dist/core/qa-loop/qa-runtime-artifacts.js +53 -0
  41. package/dist/core/qa-loop/qa-surface-router.js +114 -0
  42. package/dist/core/qa-loop/qa-types.js +18 -0
  43. package/dist/core/qa-loop.js +83 -26
  44. package/dist/core/release/gate-manifest.js +1 -0
  45. package/dist/core/release/release-gate-dag.js +6 -5
  46. package/dist/core/release/sla-scheduler.js +1 -1
  47. package/dist/core/routes.js +42 -12
  48. package/dist/core/triwiki/triwiki-affected-graph.js +3 -2
  49. package/dist/core/version.js +1 -1
  50. package/dist/core/zellij/zellij-slot-column-anchor.js +5 -1
  51. package/dist/scripts/check-dist-runtime.js +3 -2
  52. package/dist/scripts/codex-0142-manifest-check.js +2 -1
  53. package/dist/scripts/codex-control-all-pipelines-check.js +1 -0
  54. package/dist/scripts/codex-control-model-capacity-fallback-check.js +53 -0
  55. package/dist/scripts/config-managed-merge-callsite-coverage-check.js +7 -1
  56. package/dist/scripts/loop-directive-check-lib.js +78 -1
  57. package/dist/scripts/mad-db-capability-check.js +13 -2
  58. package/dist/scripts/mad-db-command-check.js +7 -5
  59. package/dist/scripts/mad-db-hook-idempotency-check.js +21 -0
  60. package/dist/scripts/mad-db-ledger-check.js +2 -1
  61. package/dist/scripts/mad-db-lifecycle-hook-decision-check.js +5 -4
  62. package/dist/scripts/mad-db-mad-command-check.js +29 -16
  63. package/dist/scripts/mad-db-mcp-result-lifecycle-check.js +11 -10
  64. package/dist/scripts/mad-db-one-cycle-bounded-check.js +15 -18
  65. package/dist/scripts/mad-db-one-cycle-consumption-check.js +3 -3
  66. package/dist/scripts/mad-db-operation-lifecycle-blackbox.js +9 -9
  67. package/dist/scripts/mad-db-operation-lifecycle-ledger-check.js +6 -6
  68. package/dist/scripts/mad-db-parallel-lifecycle-check.js +24 -0
  69. package/dist/scripts/mad-db-policy-v2-check.js +20 -0
  70. package/dist/scripts/mad-db-priority-resolver-check.js +5 -5
  71. package/dist/scripts/mad-db-real-supabase-e2e.js +166 -0
  72. package/dist/scripts/mad-db-route-identity-check.js +28 -0
  73. package/dist/scripts/mad-db-runtime-profile-lifecycle-check.js +24 -0
  74. package/dist/scripts/mad-db-safety-conflict-matrix-check.js +3 -3
  75. package/dist/scripts/mad-db-skill-policy-snapshot-check.js +15 -0
  76. package/dist/scripts/qa-loop-app-server-driver-check.js +74 -0
  77. package/dist/scripts/qa-loop-surface-router-check.js +49 -0
  78. package/dist/scripts/release-check-dynamic-execute.js +1 -1
  79. package/dist/scripts/release-dag-full-coverage-check.js +6 -0
  80. package/dist/scripts/release-triwiki-first-runner-blackbox.js +5 -1
  81. package/dist/scripts/runtime-ts-rust-boundary-check.js +1 -1
  82. package/dist/scripts/triwiki-affected-graph-check.js +2 -2
  83. package/package.json +18 -5
  84. package/schemas/mad-db/mad-db-capability.schema.json +92 -19
@@ -1,14 +1,18 @@
1
1
  import path from 'node:path';
2
2
  import { exists, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, PACKAGE_VERSION } from './fsx.js';
3
- import { CODEX_APP_IMAGE_GENERATION_DOC_URL, CODEX_IMAGEGEN_REQUIRED_POLICY, CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE, CODEX_WEB_VERIFICATION_POLICY, evidenceMentionsForbiddenBrowserAutomation, evidenceMentionsForbiddenWebComputerUseEvidence } from './routes.js';
3
+ import { CODEX_APP_IMAGE_GENERATION_DOC_URL, CODEX_CHROME_EXTENSION_EVIDENCE_SOURCE, CODEX_COMPUTER_USE_EVIDENCE_SOURCE, CODEX_IMAGEGEN_REQUIRED_POLICY, CODEX_IN_APP_BROWSER_EVIDENCE_SOURCE, CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE, CODEX_WEB_VERIFICATION_POLICY, evidenceMentionsForbiddenBrowserAutomation, evidenceMentionsForbiddenWebComputerUseEvidence } from './routes.js';
4
4
  import { appendAgentLedgerEvent, initializeAgentCentralLedger } from './agents/agent-central-ledger.js';
5
5
  import { resolveCodexAppExecutionProfile } from './codex-app/codex-app-execution-profile.js';
6
6
  import { resolveCodexNativeInvocationPlan } from './codex-native/codex-native-invocation-router.js';
7
7
  import { imageDimensions, sha256File } from './wiki-image/image-hash.js';
8
+ import { initializeQaRuntimeArtifacts } from './qa-loop/qa-runtime-artifacts.js';
9
+ import { evaluateQaGateV2 } from './qa-loop/qa-gate-v2.js';
10
+ import { DEFAULT_QA_MAX_CYCLES, QA_GATE_V2_ARTIFACT, QA_SURFACE_SELECTION_ARTIFACT } from './qa-loop/qa-types.js';
8
11
  export const QA_LOOP_ROUTE = 'QALoop';
9
12
  export const QA_LOOP_VISUAL_EVIDENCE_ARTIFACT = 'qa-loop/visual-evidence.json';
10
13
  const QA_REPORT_SUFFIX = 'qa-report.md';
11
- const UI_CHROME_EXTENSION_FIRST_ACK = 'use_codex_chrome_extension_first_no_computer_use_for_web_ui_or_mark_unverified';
14
+ const UI_SURFACE_ROUTER_ACK = 'use_codex_surface_router_browser_chrome_computer_no_synthetic_evidence';
15
+ const LEGACY_UI_CHROME_EXTENSION_FIRST_ACK = 'use_codex_chrome_extension_first_no_computer_use_for_web_ui_or_mark_unverified';
12
16
  const GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK = 'yes_gpt_image_2_annotated_review';
13
17
  const IMAGE_FILE_RE = /\.(png|jpe?g|webp|gif)$/i;
14
18
  export const QA_NATIVE_AGENT_PERSONAS = Object.freeze([
@@ -166,7 +170,7 @@ export function inferQaLoopAnswers(prompt = '') {
166
170
  '검증하지 못한 UI/API 범위는 통과로 주장하지 않고 QA 리포트에 남긴다.'
167
171
  ];
168
172
  if (isUiScope(scope))
169
- acceptance.push('UI E2E 통과 증거는 실제 Codex Chrome Extension screenshot artifact pathsha256을 기록해야 한다.');
173
+ acceptance.push('UI E2E 통과 증거는 surface router가 고른 @Browser/@Chrome/@Computer 실제 action·observation ledger필요한 screenshot/hash를 기록해야 한다.');
170
174
  if (wantsGptImage2Review)
171
175
  acceptance.push('gpt-image-2 annotated review image가 필요한 경우 실제 Codex App $imagegen/gpt-image-2 출력 파일 path, sha256, model, provider를 기록해야 한다.');
172
176
  return {
@@ -181,17 +185,17 @@ export function inferQaLoopAnswers(prompt = '') {
181
185
  EXTERNAL_SIDE_EFFECT_POLICY: 'block_all_external_side_effects',
182
186
  ...login,
183
187
  CREDENTIAL_STORAGE_ACK: 'never_store_credentials_in_artifacts_or_wiki',
184
- UI_CHROME_EXTENSION_ACK: UI_CHROME_EXTENSION_FIRST_ACK,
188
+ UI_CHROME_EXTENSION_ACK: UI_SURFACE_ROUTER_ACK,
185
189
  QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED: wantsGptImage2Review ? GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK : 'not_required',
186
190
  TEAM_MODE_ALLOWED: 'no_parent_only',
187
- MAX_QA_CYCLES: '1',
191
+ MAX_QA_CYCLES: String(DEFAULT_QA_MAX_CYCLES),
188
192
  ACCEPTANCE_CRITERIA: acceptance,
189
193
  NON_GOALS: [
190
194
  '결제, 실제 이메일/SMS 발송, 관리자 권한 변경, 데이터 삭제, 프로덕션 데이터 변경은 테스트하지 않는다.'
191
195
  ],
192
196
  RISK_BOUNDARY: [
193
197
  '실제 사용자 데이터, 인증 권한, 결제, 메시지 발송, 웹훅, 외부 서비스 상태를 생성/수정/삭제하지 않는다.',
194
- 'Codex Chrome Extension readiness/evidence가 없으면 web/browser UI 검증 완료로 주장하지 않는다.',
198
+ '선택된 Codex App visual surface의 실제 action·observation evidence가 없으면 UI 검증 완료로 주장하지 않는다.',
195
199
  '로그인이 필요하지만 임시 테스트 자격증명이 없으면 인증 구간은 차단/미검증으로 기록한다.'
196
200
  ],
197
201
  MID_RUN_UNKNOWN_POLICY: ['preserve_existing_behavior', 'defer_optional_scope', 'block_only_if_no_safe_path']
@@ -245,7 +249,7 @@ export function qaLoopQuestionSlots() {
245
249
  { id: 'TEMP_TEST_CREDENTIALS_READY', question: 'If login is required, are test-only credentials ready to provide ephemerally during the run?', required: true, type: 'enum', options: ['not_required', 'yes_temp_only', 'no_block_authenticated_tests'] },
246
250
  { id: 'TEST_CREDENTIALS_RUNTIME_SOURCE', question: 'If login is required, how will test-only credentials be provided without saving the values?', required: true, type: 'enum', options: ['not_required', 'ephemeral_chat_only', 'environment_variables', 'secret_manager'] },
247
251
  { id: 'CREDENTIAL_STORAGE_ACK', question: 'Acknowledge credential handling policy.', required: true, type: 'enum', options: ['never_store_credentials_in_artifacts_or_wiki'] },
248
- { id: 'UI_CHROME_EXTENSION_ACK', question: 'Acknowledge UI E2E evidence policy: Codex Chrome Extension first for web/browser/webapp verification; no Computer Use or unofficial browser automation substitute.', required: true, type: 'enum', options: [UI_CHROME_EXTENSION_FIRST_ACK] },
252
+ { id: 'UI_CHROME_EXTENSION_ACK', question: 'Acknowledge UI E2E evidence policy: QA-LOOP routes local/public web to @Browser, signed-in web to @Chrome, native/cross-app GUI to @Computer, and never treats synthetic artifacts as real proof.', required: true, type: 'enum', options: [UI_SURFACE_ROUTER_ACK, LEGACY_UI_CHROME_EXTENSION_FIRST_ACK] },
249
253
  { id: 'TEAM_MODE_ALLOWED', question: 'May QA-LOOP use Team/subagents where useful?', required: true, type: 'enum', options: ['yes_parallel_where_safe', 'no_parent_only'] },
250
254
  { id: 'MAX_QA_CYCLES', question: 'How many no-question QA cycles are allowed before pausing?', required: true, type: 'string' },
251
255
  { id: 'ACCEPTANCE_CRITERIA', question: 'List the QA completion criteria.', required: true, type: 'array_or_string' },
@@ -269,8 +273,8 @@ export function validateQaLoopAnswers(schema, answers = {}) {
269
273
  errors.push({ slot: 'QA_MUTATION_POLICY', error: 'production_deployed_qa_is_read_only_smoke_only' });
270
274
  if (answers.DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED !== 'never')
271
275
  errors.push({ slot: 'DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED', error: 'destructive_deployed_tests_never_allowed' });
272
- if (isUiScope(answers.QA_SCOPE) && answers.UI_CHROME_EXTENSION_ACK !== UI_CHROME_EXTENSION_FIRST_ACK)
273
- errors.push({ slot: 'UI_CHROME_EXTENSION_ACK', error: 'ui_e2e_requires_codex_chrome_extension_first_ack' });
276
+ if (isUiScope(answers.QA_SCOPE) && ![UI_SURFACE_ROUTER_ACK, LEGACY_UI_CHROME_EXTENSION_FIRST_ACK].includes(answers.UI_CHROME_EXTENSION_ACK))
277
+ errors.push({ slot: 'UI_CHROME_EXTENSION_ACK', error: 'ui_e2e_requires_codex_surface_router_ack' });
274
278
  if (answers.LOGIN_REQUIRED === 'yes' && !['yes_temp_only', 'no_block_authenticated_tests'].includes(answers.TEMP_TEST_CREDENTIALS_READY))
275
279
  errors.push({ slot: 'TEMP_TEST_CREDENTIALS_READY', error: 'authenticated_tests_require_ephemeral_test_credentials_or_must_be_blocked' });
276
280
  if (answers.LOGIN_REQUIRED === 'yes' && answers.TEMP_TEST_CREDENTIALS_READY === 'yes_temp_only' && answers.TEST_CREDENTIALS_RUNTIME_SOURCE === 'not_required')
@@ -322,6 +326,7 @@ export function defaultQaGate(contract = {}, opts = {}) {
322
326
  const gptImage2ReviewRequired = qaGptImage2AnnotatedReviewRequired(contract, contract.prompt);
323
327
  const reportFile = opts.reportFile || qaReportFilename();
324
328
  const corrective = a.QA_CORRECTIVE_POLICY !== 'report_only_no_code_changes';
329
+ const selectedSurface = opts.qaRuntime?.surface?.selected_surface || null;
325
330
  return {
326
331
  passed: false,
327
332
  clarification_contract_sealed: Boolean(contract.sealed_hash),
@@ -333,6 +338,15 @@ export function defaultQaGate(contract = {}, opts = {}) {
333
338
  deployed_destructive_tests_blocked: a.TARGET_ENVIRONMENT === 'local_dev_server' || a.DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED === 'never',
334
339
  credentials_not_persisted: false,
335
340
  ui_e2e_required: uiRequired,
341
+ qa_contract_v2_required: opts.qaRuntime ? true : false,
342
+ qa_surface_selection_artifact: opts.qaRuntime ? QA_SURFACE_SELECTION_ARTIFACT : null,
343
+ qa_gate_v2_artifact: opts.qaRuntime ? QA_GATE_V2_ARTIFACT : null,
344
+ qa_surface_selected: selectedSurface,
345
+ ui_selected_surface: selectedSurface,
346
+ ui_live_surface_preflight_passed: !uiRequired,
347
+ ui_real_action_count: 0,
348
+ ui_observation_count: 0,
349
+ same_flow_replay_complete: false,
336
350
  chrome_extension_preflight_passed: !uiRequired,
337
351
  ui_chrome_extension_evidence: !uiRequired,
338
352
  ui_computer_use_evidence: false,
@@ -385,6 +399,13 @@ export async function writeQaLoopArtifacts(dir, mission, contract) {
385
399
  const root = missionRootFromDir(dir);
386
400
  const executionProfile = root ? await resolveCodexAppExecutionProfile({ root }).catch(() => null) : null;
387
401
  const codexNativeInvocation = root ? await resolveQaCodexNativeInvocation(root, mission.id).catch(() => null) : null;
402
+ const qaRuntime = await initializeQaRuntimeArtifacts(dir, {
403
+ ...contract,
404
+ prompt: mission.prompt || contract.prompt,
405
+ mission_id: mission.id || contract.mission_id
406
+ }, {
407
+ missionId: mission.id || contract.mission_id || null
408
+ }).catch(() => null);
388
409
  if (executionProfile)
389
410
  await writeJsonAtomic(path.join(dir, 'qa-loop', 'execution-profile.json'), executionProfile).catch(() => undefined);
390
411
  if (codexNativeInvocation)
@@ -397,11 +418,20 @@ export async function writeQaLoopArtifacts(dir, mission, contract) {
397
418
  codex_app_execution_profile: executionProfile ? compactExecutionProfile(executionProfile) : null,
398
419
  codex_native_invocation: codexNativeInvocation,
399
420
  target: { scope: a.QA_SCOPE, environment: a.TARGET_ENVIRONMENT, base_url: a.TARGET_BASE_URL, api_base_url: a.API_BASE_URL },
400
- safety: { mutation_policy: a.QA_MUTATION_POLICY, deployed_destructive_tests_allowed: 'never', credentials: 'temp_only_never_saved', ui_evidence: 'codex_chrome_extension_first_required_for_web_ui_e2e', visual_review: 'gpt_image_2_annotated_review_required_when_contract_requests_it' },
421
+ qa_runtime_v2: qaRuntime ? {
422
+ contract_artifact: 'qa-loop/qa-contract-v2.json',
423
+ surface_selection_artifact: QA_SURFACE_SELECTION_ARTIFACT,
424
+ selected_surface: qaRuntime.surface.selected_surface,
425
+ journey_graph_artifact: 'qa-loop/qa-journey-graph.json',
426
+ gate_artifact: QA_GATE_V2_ARTIFACT
427
+ } : null,
428
+ safety: { mutation_policy: a.QA_MUTATION_POLICY, deployed_destructive_tests_allowed: 'never', credentials: 'temp_only_never_saved', ui_evidence: 'codex_surface_router_live_action_required_for_ui_e2e', visual_review: 'gpt_image_2_annotated_review_required_when_contract_requests_it' },
401
429
  checklist
402
430
  });
403
431
  await writeJsonAtomic(path.join(dir, QA_LOOP_VISUAL_EVIDENCE_ARTIFACT), buildQaLoopVisualEvidenceArtifact(mission, contract));
404
- await writeJsonAtomic(path.join(dir, 'qa-gate.json'), defaultQaGate(contract, { reportFile, executionProfile, codexNativeInvocation }));
432
+ await writeJsonAtomic(path.join(dir, 'qa-gate.json'), defaultQaGate(contract, { reportFile, executionProfile, codexNativeInvocation, qaRuntime }));
433
+ if (qaRuntime)
434
+ await evaluateQaGateV2(dir).catch(() => undefined);
405
435
  await writeTextAtomic(path.join(dir, reportFile), qaReportTemplate(mission, contract, checklist));
406
436
  return { checklist_count: checklist.length, report_file: reportFile };
407
437
  }
@@ -441,8 +471,18 @@ export async function ensureQaLoopVisualEvidenceContract(dir, mission = {}, cont
441
471
  }
442
472
  export async function evaluateQaGate(dir) {
443
473
  const gate = await readJson(path.join(dir, 'qa-gate.json'), {});
474
+ const surfaceSelection = await readJson(path.join(dir, QA_SURFACE_SELECTION_ARTIFACT), null);
475
+ const selectedSurface = gate.ui_selected_surface || gate.qa_surface_selected || surfaceSelection?.selected_surface || null;
476
+ const expectedEvidenceSource = evidenceSourceForSurface(selectedSurface);
477
+ const gateV2 = gate.qa_contract_v2_required === true ? await evaluateQaGateV2(dir).catch((err) => ({
478
+ passed: false,
479
+ blockers: [`qa_gate_v2_evaluation_failed:${err?.message || String(err)}`],
480
+ unverified: []
481
+ })) : null;
444
482
  const reportFile = qaReportFileFromGate(gate);
445
483
  const reasons = [];
484
+ if (gateV2 && gateV2.passed !== true)
485
+ reasons.push(...(gateV2.blockers || []));
446
486
  for (const key of ['clarification_contract_sealed', 'qa_report_written', 'qa_ledger_complete', 'checklist_completed', 'safety_reviewed', 'deployed_destructive_tests_blocked', 'credentials_not_persisted', 'honest_mode_complete']) {
447
487
  if (gate[key] !== true)
448
488
  reasons.push(`${key}_missing`);
@@ -460,17 +500,22 @@ export async function evaluateQaGate(dir) {
460
500
  if (gate.unsafe_external_side_effects === true)
461
501
  reasons.push('unsafe_external_side_effects');
462
502
  if (gate.ui_e2e_required === true) {
463
- if (gate.chrome_extension_preflight_passed !== true)
464
- reasons.push('chrome_extension_preflight_missing');
465
- if (gate.ui_chrome_extension_evidence !== true)
466
- reasons.push('ui_chrome_extension_evidence_missing');
467
- if (gate.ui_computer_use_evidence === true)
503
+ if (!selectedSurface || selectedSurface === 'codex_chrome_extension') {
504
+ if (gate.chrome_extension_preflight_passed !== true)
505
+ reasons.push('chrome_extension_preflight_missing');
506
+ if (gate.ui_chrome_extension_evidence !== true)
507
+ reasons.push('ui_chrome_extension_evidence_missing');
508
+ }
509
+ else if (gate.ui_live_surface_preflight_passed !== true) {
510
+ reasons.push('ui_live_surface_preflight_missing');
511
+ }
512
+ if (gate.ui_computer_use_evidence === true && selectedSurface !== 'codex_computer_use')
468
513
  reasons.push('ui_computer_use_evidence_forbidden_for_web');
469
- if (gate.ui_evidence_source !== CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE)
470
- reasons.push('ui_evidence_source_not_codex_chrome_extension');
514
+ if (expectedEvidenceSource && gate.ui_evidence_source !== expectedEvidenceSource)
515
+ reasons.push(`ui_evidence_source_not_${expectedEvidenceSource}`);
471
516
  if (evidenceMentionsForbiddenBrowserAutomation({ evidence: gate.evidence, notes: gate.notes, ui_evidence_source: gate.ui_evidence_source }))
472
517
  reasons.push('forbidden_browser_automation_evidence');
473
- if (evidenceMentionsForbiddenWebComputerUseEvidence({ evidence: gate.evidence, ui_evidence_source: gate.ui_evidence_source }))
518
+ if (selectedSurface !== 'codex_computer_use' && evidenceMentionsForbiddenWebComputerUseEvidence({ evidence: gate.evidence, ui_evidence_source: gate.ui_evidence_source }))
474
519
  reasons.push('computer_use_web_evidence_forbidden');
475
520
  reasons.push(...await missingQaLoopVisualEvidence(dir, gate));
476
521
  }
@@ -502,7 +547,7 @@ export async function evaluateQaGate(dir) {
502
547
  reasons.push('qa_ledger_missing');
503
548
  const uniqueReasons = [...new Set(reasons)];
504
549
  const passed = gate.passed === true && uniqueReasons.length === 0;
505
- const result = { checked_at: nowIso(), passed, reasons: uniqueReasons, gate };
550
+ const result = { checked_at: nowIso(), passed, reasons: uniqueReasons, gate, gate_v2: gateV2 };
506
551
  await writeJsonAtomic(path.join(dir, 'qa-gate.evaluated.json'), result);
507
552
  return result;
508
553
  }
@@ -592,9 +637,9 @@ CONTRACT:
592
637
  ${JSON.stringify(contract, null, 2)}
593
638
  ${imageContractText}${appHandoffText}${executionProfileText}
594
639
  VISUAL EVIDENCE CONTRACT:
595
- - For web UI QA, do not set chrome_extension_preflight_passed/ui_chrome_extension_evidence to true unless the Codex Chrome Extension path is ready and ${QA_LOOP_VISUAL_EVIDENCE_ARTIFACT} records a real saved Chrome Extension screenshot artifact with path, sha256, and dimensions.
596
- - If decision-contract.json answers set QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED=${GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK}, use Codex App $imagegen/gpt-image-2 (${CODEX_APP_IMAGE_GENERATION_DOC_URL}) to produce a real generated annotated review image from the Chrome Extension screenshot. Record its path, sha256, model=gpt-image-2, provider=Codex App $imagegen, and source_screenshot_artifact in ${QA_LOOP_VISUAL_EVIDENCE_ARTIFACT} and qa-gate.json.
597
- - Do not substitute prose-only critique, Playwright/Selenium/Puppeteer/Browser Use screenshots, Computer Use browser screenshots, placeholder images, fake fixtures, or direct API fallback as full web UI visual evidence.
640
+ - For UI QA, do not mark live UI evidence true unless qa-loop/qa-surface-selection.json selected the correct @Browser/@Chrome/@Computer surface and action/observation ledgers record real user-like actions.
641
+ - If decision-contract.json answers set QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED=${GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK}, use Codex App $imagegen/gpt-image-2 (${CODEX_APP_IMAGE_GENERATION_DOC_URL}) to produce a real generated annotated review image from the selected-surface source screenshot. Record its path, sha256, model=gpt-image-2, provider=Codex App $imagegen, and source_screenshot_artifact in ${QA_LOOP_VISUAL_EVIDENCE_ARTIFACT} and qa-gate.json.
642
+ - Do not substitute prose-only critique, Playwright/Selenium/Puppeteer screenshots, static screenshots, plugin cache, placeholder images, fake fixtures, or direct API fallback as full UI visual evidence.
598
643
  Previous tail:
599
644
  ${String(previous || '').slice(-2500)}
600
645
  `;
@@ -623,7 +668,7 @@ function qaChecklist(a) {
623
668
  ['preflight.roles', 'Map roles, permissions, protected areas.']
624
669
  ];
625
670
  if (qaUiRequired(a))
626
- cases.push(['ui.chrome_extension_first', CODEX_WEB_VERIFICATION_POLICY], ['ui.navigation', 'Check primary navigation, deep links, back/forward, refresh, and protected routes.'], ['ui.auth', 'Check login, logout, session expiry, unauthorized access, and role-specific visibility.'], ['ui.forms', 'Check required fields, validation, disabled states, success, and failure.'], ['ui.states', 'Check loading, empty, error, retry, offline/timeout, and slow network states.'], ['ui.crud', 'Check allowed create/change flows and block forbidden destructive flows by environment.'], ['ui.responsive', 'Check desktop, tablet, mobile, overflow, long text, and keyboard focus order.'], ['ui.a11y', 'Check labels, focus traps, modals, contrast-sensitive controls, and screen-reader names.'], ['ui.visual', 'Capture evidence for meaningful UI regressions without storing secrets.']);
671
+ cases.push(['ui.surface_router', CODEX_WEB_VERIFICATION_POLICY], ['ui.navigation', 'Check primary navigation, deep links, back/forward, refresh, and protected routes.'], ['ui.auth', 'Check login, logout, session expiry, unauthorized access, and role-specific visibility.'], ['ui.forms', 'Check required fields, validation, disabled states, success, and failure.'], ['ui.states', 'Check loading, empty, error, retry, offline/timeout, and slow network states.'], ['ui.crud', 'Check allowed create/change flows and block forbidden destructive flows by environment.'], ['ui.responsive', 'Check desktop, tablet, mobile, overflow, long text, and keyboard focus order.'], ['ui.a11y', 'Check labels, focus traps, modals, contrast-sensitive controls, and screen-reader names.'], ['ui.visual', 'Capture evidence for meaningful UI regressions without storing secrets.']);
627
672
  if (qaApiRequired(a))
628
673
  cases.push(['api.health', 'Check health/version/readiness endpoints when available.'], ['api.auth', 'Check anonymous, authenticated, expired, and wrong-role access.'], ['api.contract', 'Check status codes, response shape, headers, content type, and error format.'], ['api.validation', 'Check missing, malformed, boundary, duplicate, and over-limit payloads.'], ['api.listing', 'Check pagination, sorting, filters, search, and empty results.'], ['api.mutation', 'Check allowed seeded create/change and forbid deployed destructive flows.'], ['api.idempotency', 'Check retry/idempotency behavior for safe operations.'], ['api.concurrency', 'Check stale change, conflict, and double-submit behavior.'], ['api.failure', 'Check timeout, upstream error, rate-limit, and rollback-visible failure paths.'], ['api.security', 'Check CORS, auth headers, PII redaction, and permission boundaries.']);
629
674
  cases.push(['report.evidence', 'Record pass/fail/blocked/skipped with evidence.'], ['report.corrective_loop', 'Record fixes, rechecks, unresolved findings, deferred blockers.'], ['report.honest', 'Run Honest Mode.']);
@@ -675,6 +720,8 @@ async function missingQaLoopVisualEvidence(dir, gate = {}) {
675
720
  const visual = await readJson(path.join(dir, QA_LOOP_VISUAL_EVIDENCE_ARTIFACT), null);
676
721
  const reasons = [];
677
722
  const uiRequired = gate.ui_e2e_required === true;
723
+ const selectedSurface = gate.ui_selected_surface || gate.qa_surface_selected || (gate.ui_chrome_extension_evidence === true ? 'codex_chrome_extension' : null);
724
+ const expectedSource = evidenceSourceForSurface(selectedSurface) || CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE;
678
725
  if (uiRequired) {
679
726
  const screenshot = visual?.chrome_extension_screenshot || {};
680
727
  if (gate.ui_chrome_extension_screenshot_captured !== true && !positiveVisualStatus(screenshot.status, ['captured', 'attached', 'verified']))
@@ -690,8 +737,8 @@ async function missingQaLoopVisualEvidence(dir, gate = {}) {
690
737
  else
691
738
  reasons.push(...await imageEvidenceFileReasons(dir, screenshotPath, screenshotSha, 'ui_chrome_extension_screenshot', screenshotDims));
692
739
  const screenshotSource = firstNonEmpty(gate.ui_chrome_extension_screenshot_source, screenshot.evidence_source, gate.ui_evidence_source);
693
- if (screenshotSource !== CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE)
694
- reasons.push('ui_chrome_extension_screenshot_source_not_codex_chrome_extension');
740
+ if (screenshotSource !== expectedSource)
741
+ reasons.push(`ui_chrome_extension_screenshot_source_not_${expectedSource}`);
695
742
  }
696
743
  const review = visual?.gpt_image_2_annotated_review || {};
697
744
  const gptImage2ReviewRequired = gate.gpt_image_2_annotated_review_required === true || review.required === true;
@@ -786,4 +833,14 @@ function positiveCount(value) {
786
833
  const n = Number(value || 0);
787
834
  return Number.isFinite(n) && n > 0;
788
835
  }
836
+ function evidenceSourceForSurface(surface) {
837
+ const value = String(surface || '').trim();
838
+ if (value === 'codex_in_app_browser')
839
+ return CODEX_IN_APP_BROWSER_EVIDENCE_SOURCE;
840
+ if (value === 'codex_chrome_extension')
841
+ return CODEX_CHROME_EXTENSION_EVIDENCE_SOURCE;
842
+ if (value === 'codex_computer_use')
843
+ return CODEX_COMPUTER_USE_EVIDENCE_SOURCE;
844
+ return null;
845
+ }
789
846
  //# sourceMappingURL=qa-loop.js.map
@@ -6,6 +6,7 @@ export const FORBIDDEN_RECURSIVE_GATES = new Set([
6
6
  'release:check:dynamic:execute',
7
7
  'release:real-check',
8
8
  'release:publish',
9
+ 'publish:ignore-scripts',
9
10
  'publish:npm',
10
11
  'publish:dry',
11
12
  'prepublishOnly'
@@ -28,15 +28,16 @@ export async function runReleaseGateDag(input) {
28
28
  const preset = input.preset || 'release';
29
29
  const manifest = loadReleaseGateManifest(root);
30
30
  const presetGates = selectReleaseGatePreset(manifest, preset);
31
- const triwikiGraph = input.triwiki !== false && (preset === 'affected' || preset === 'fast' || preset === 'confidence') && input.full !== true
32
- ? computeTriWikiAffectedGraph({ root, tier: preset === 'fast' ? 'affected' : 'confidence', changedSince: input.changedSince || 'auto', ...(input.changedFiles ? { changedFiles: input.changedFiles } : {}) })
33
- : null;
34
31
  const affected = (preset === 'affected' || preset === 'fast' || preset === 'confidence') && input.full !== true
35
32
  ? selectAffectedReleaseGates(root, manifest, presetGates, { changedSince: input.changedSince || 'auto', ...(input.changedFiles ? { changedFiles: input.changedFiles } : {}), preset })
36
33
  : selectAffectedReleaseGates(root, manifest, presetGates, { full: true, preset });
34
+ const rootReleaseSurfaceChanged = affected.selection.changed_files.some((file) => file === 'package.json' || file === 'package-lock.json' || file === 'release-gates.v2.json');
35
+ const triwikiGraph = input.triwiki !== false && !rootReleaseSurfaceChanged && (preset === 'affected' || preset === 'fast' || preset === 'confidence') && input.full !== true
36
+ ? computeTriWikiAffectedGraph({ root, tier: preset === 'fast' ? 'affected' : 'confidence', changedSince: input.changedSince || 'auto', ...(input.changedFiles ? { changedFiles: input.changedFiles } : {}) })
37
+ : null;
37
38
  const triwikiSelectionUsed = Boolean(triwikiGraph);
38
39
  const triwikiConservative = Boolean(triwikiGraph?.conservative_reason);
39
- const triwikiSelectedIds = new Set(triwikiGraph && !triwikiConservative ? triwikiGraph.gates : presetGates.map((gate) => gate.id));
40
+ const triwikiSelectedIds = new Set(triwikiGraph && !triwikiConservative ? triwikiGraph.gates : affected.gates.map((gate) => gate.id));
40
41
  const selected = triwikiGraph
41
42
  ? presetGates.filter((gate) => triwikiSelectedIds.has(gate.id))
42
43
  : affected.gates;
@@ -45,7 +46,7 @@ export async function runReleaseGateDag(input) {
45
46
  affected.selection.mode = 'affected';
46
47
  affected.selection.selected_gate_ids = selected.map((gate) => gate.id);
47
48
  affected.selection.skipped_gate_ids = triwikiSkippedGates;
48
- affected.selection.reasons = Object.fromEntries(selected.map((gate) => [gate.id, triwikiConservative ? `triwiki_conservative:${triwikiGraph.conservative_reason}` : 'triwiki-affected']));
49
+ affected.selection.reasons = Object.fromEntries(selected.map((gate) => [gate.id, triwikiConservative ? `triwiki_conservative_fallback:${triwikiGraph.conservative_reason}` : 'triwiki-affected']));
49
50
  }
50
51
  const selectedIds = new Set(selected.map((gate) => gate.id));
51
52
  const affectedExternalSatisfiedDeps = affected.selection.mode === 'affected'
@@ -2,7 +2,7 @@ import { computeTriWikiAffectedGraph } from '../triwiki/triwiki-affected-graph.j
2
2
  import { buildTriWikiSlaCertificate } from '../triwiki/triwiki-sla-certificate.js';
3
3
  import { planExtremeParallelSchedule } from './extreme-parallel-scheduler.js';
4
4
  export const SLA_SCHEDULER_SCHEMA = 'sks.sla-scheduler.v1';
5
- export function planFiveMinuteSla(root, graph = computeTriWikiAffectedGraph({ root, tier: 'affected' }), slaMs = 300_000) {
5
+ export function planFiveMinuteSla(root, graph = computeTriWikiAffectedGraph({ root, tier: 'affected', includeProofLookup: false }), slaMs = 300_000) {
6
6
  const schedule = planExtremeParallelSchedule(root, graph);
7
7
  const certificate = buildTriWikiSlaCertificate({
8
8
  graph,
@@ -32,15 +32,30 @@ export const FROM_CHAT_IMG_QA_LOOP_ARTIFACT = 'from-chat-img-qa-loop.json';
32
32
  export const FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS = 5;
33
33
  export const USAGE_TOPICS = 'install|setup|bootstrap|root|deps|zellij|tmux|auto-review|team|qa-loop|ppt|image-ux-review|computer-use|goal|fast-mode|research|db|git|codex|codex-app|codex-native|hooks|features|all-features|dfix|commit|commit-and-push|design|imagegen|dollar|context7|xai|pipeline|reasoning|guard|conflicts|versioning|eval|harness|hproof|gx|wiki|wrongness|code-structure|proof-field|skill-dream|rust';
34
34
  export const CODEX_COMPUTER_USE_EVIDENCE_SOURCE = 'codex_computer_use';
35
- export const CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE = 'codex_chrome_extension';
35
+ export const CODEX_IN_APP_BROWSER_EVIDENCE_SOURCE = 'codex_in_app_browser';
36
+ export const CODEX_CHROME_EXTENSION_EVIDENCE_SOURCE = 'codex_chrome_extension';
37
+ export const CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE = CODEX_IN_APP_BROWSER_EVIDENCE_SOURCE;
36
38
  export const CODEX_IMAGEGEN_EVIDENCE_SOURCE = 'codex_app_imagegen_gpt_image_2';
39
+ export const CODEX_IN_APP_BROWSER_DOC_URL = 'https://developers.openai.com/codex/app/browser';
37
40
  export const CODEX_CHROME_EXTENSION_DOC_URL = 'https://developers.openai.com/codex/app/chrome-extension';
41
+ export const CODEX_COMPUTER_USE_DOC_URL = 'https://developers.openai.com/codex/app/computer-use';
42
+ export const CODEX_RECORD_REPLAY_DOC_URL = 'https://developers.openai.com/codex/record-and-replay';
43
+ export const CODEX_APP_SERVER_DOC_URL = 'https://developers.openai.com/codex/app-server';
38
44
  export const CODEX_APP_IMAGE_GENERATION_DOC_URL = 'https://developers.openai.com/codex/app/features#image-generation';
39
45
  export const OPENAI_IMAGE_GENERATION_DOC_URL = 'https://developers.openai.com/api/docs/guides/image-generation';
40
46
  export const OPENAI_CHATGPT_IMAGES_2_DOC_URL = 'https://openai.com/index/introducing-chatgpt-images-2-0/';
41
47
  export const OPENAI_GPT_IMAGE_2_MODEL_DOC_URL = 'https://developers.openai.com/api/docs/models/gpt-image-2';
42
- export const CODEX_WEB_VERIFICATION_POLICY = `Web, browser, localhost, website, webapp, and web-based app verification must use the official Codex Chrome Extension path first (${CODEX_CHROME_EXTENSION_DOC_URL}). Before web UX review, QA-LOOP, browser smoke, authenticated browser checks, or web visual verification proceeds, SKS must verify that the Chrome Extension path is installed/enabled through Codex App plugin readiness; if it is missing, rapidly halt the pipeline, tell the user to install/setup the extension, and resume only after the user explicitly says installation is complete. Do not use Codex Computer Use as browser/web-app verification evidence. Do not substitute Playwright, Selenium, Puppeteer, Browser Use, Chrome MCP, generic browser automation, screenshots fabricated from code, or prose-only checks for the Chrome Extension gate.`;
43
- export const CODEX_COMPUTER_USE_ONLY_POLICY = `Codex Computer Use is reserved for native macOS, desktop-app, OS-settings, and non-web visual tasks such as setting up a Mac app or inspecting a non-browser surface. It must not be used for browser, localhost, website, webapp, or web-based app verification; those routes follow the Chrome Extension policy instead. If live native Computer Use tools are unavailable for a non-web target, mark the native visual evidence unverified instead of fabricating screenshots or substituting browser automation. Codex App readiness/config verification is not target evidence: use Codex-provided control surfaces such as \`codex features list\`, \`codex mcp list\`, \`sks codex-app check\`, remote-control status, and plugin/tool exposure. In Codex App prompts, invoke @Computer or @AppName only for live native Mac/non-web target apps or screens.`;
48
+ export const QA_INTERACTION_SURFACES = Object.freeze([
49
+ 'codex_in_app_browser',
50
+ 'codex_chrome_extension',
51
+ 'codex_computer_use',
52
+ 'codex_app_plugin',
53
+ 'structured_mcp',
54
+ 'shell_or_api_diagnostic'
55
+ ]);
56
+ export const CODEX_QA_SURFACE_ROUTING_POLICY = `Codex QA surface routing follows the official Codex App split: use @Browser / in-app Browser (${CODEX_IN_APP_BROWSER_DOC_URL}) first for localhost, local development servers, file-backed previews, and public pages that do not require sign-in; use @Chrome / Codex Chrome Extension (${CODEX_CHROME_EXTENSION_DOC_URL}) for signed-in websites, cookies, browser profiles, extensions, existing tabs, or internal tools; use @Computer or @AppName (${CODEX_COMPUTER_USE_DOC_URL}) for native macOS/Windows apps, OS settings, cross-app workflows, and GUI-only bugs. Prefer structured Plugins/MCPs for repeatable data operations, then verify rendered user-visible results with Browser, Chrome, or Computer Use. Playwright, Selenium, Puppeteer, Chrome MCP, static screenshots, plugin cache, and final-agent prose are not Codex App live action proof. App Server evidence (${CODEX_APP_SERVER_DOC_URL}) must correlate thread, turn, item/tool events, approvals, diffs, actions, observations, findings, fixes, and same-flow replay before a real QA pass is claimed.`;
57
+ export const CODEX_WEB_VERIFICATION_POLICY = CODEX_QA_SURFACE_ROUTING_POLICY;
58
+ export const CODEX_COMPUTER_USE_ONLY_POLICY = `Codex Computer Use is a live GUI surface for supported macOS and Windows environments, invoked with @Computer or @AppName for native apps, OS settings, browser contexts that truly require GUI-level operation, and cross-app workflows. Do not replace @Browser localhost/public-page checks or @Chrome signed-in checks with Computer Use unless the surface router records a specific GUI-only/cross-app reason. If live Computer Use tools, permissions, or app access are unavailable, mark the affected native/GUI evidence blocked or unverified instead of fabricating screenshots or actions. Codex App readiness/config checks are capability evidence only, not target interaction proof.`;
44
59
  export const IMAGEGEN_SOCIAL_SOURCE_POLICY = 'Use public X/social/community reports only as prompt-quality and workflow-sentiment hints after official OpenAI/Codex docs. Social posts are not capability specs, evidence of tool availability, or proof that a generated asset was created.';
45
60
  export const CODEX_IMAGEGEN_REQUIRED_POLICY = 'Pipeline image generation, raster asset creation/editing, and generated image-review evidence must use real Codex App imagegen/$imagegen with gpt-image-2 when that evidence is required for full verification. For newest-model image requests, prompt explicitly for "ChatGPT Images 2.0 / GPT Image 2.0 with gpt-image-2" instead of relying on generic image-generation wording. Do not substitute placeholder SVG/HTML/CSS, prose-only critique, stock-like stand-ins, manually fabricated files, or missing-output ledgers for requested/generated raster assets or required generated review images. If imagegen/gpt-image-2 is unavailable or generated annotated images cannot be created/linked, record the blocker and cap any closeout at verified_partial/reference-only instead of claiming generated-image evidence or full route verification; that partial closeout requires source screenshots plus hashes, docs evidence, source Image Voxel anchors, and Honest Mode evidence. In Codex App prompts, invoke $imagegen when live image generation is needed; SKS hooks and skills can require the policy but cannot attach missing host image-generation tools to an already-started turn. Official OpenAI/Codex docs are authoritative for capabilities, surfaces, limits, and evidence rules; X/social/community reports may inform prompt style only.';
46
61
  export const DEFAULT_CODEX_APP_PLUGINS = Object.freeze([
@@ -56,7 +71,7 @@ export const RESERVED_CODEX_PLUGIN_SKILL_NAMES = Object.freeze([
56
71
  'browser-use',
57
72
  ...DEFAULT_CODEX_APP_PLUGINS.map(([name]) => name)
58
73
  ].sort());
59
- export const FORBIDDEN_BROWSER_AUTOMATION_RE = /\b(playwright|chrome\s+mcp|browser\s+use|selenium|puppeteer)\b/i;
74
+ export const FORBIDDEN_BROWSER_AUTOMATION_RE = /\b(playwright|chrome\s+mcp|selenium|puppeteer)\b/i;
60
75
  export function evidenceMentionsForbiddenBrowserAutomation(value, seen = new Set()) {
61
76
  if (value == null)
62
77
  return false;
@@ -231,7 +246,7 @@ export function stackCurrentDocsPolicy(commandPrefix = 'sks') {
231
246
  validate_command: `${prefix} wiki validate .sneakoscope/wiki/context-pack.json`,
232
247
  priority: 'must_precede_coding_style_defaults',
233
248
  examples: [
234
- 'Supabase hosted projects should prefer sb_publishable_ and sb_secret_ keys over legacy anon/service_role keys when current docs apply.',
249
+ 'Supabase hosted projects should prefer sb_publishable_ and sb_secret_ keys over legacy anon and service role keys when current docs apply.',
235
250
  'Next.js 16 deprecates the middleware file convention in favor of proxy.ts/proxy.js.',
236
251
  'Vercel Function duration limits, including the 300s default with Fluid Compute, are deployment constraints that must shape long-running server work.'
237
252
  ]
@@ -239,7 +254,7 @@ export function stackCurrentDocsPolicy(commandPrefix = 'sks') {
239
254
  }
240
255
  export function stackCurrentDocsPolicyText(commandPrefix = 'sks') {
241
256
  const policy = stackCurrentDocsPolicy(commandPrefix);
242
- return `Stack current-docs policy: whenever project tech stack is added or a framework/package/runtime/platform version changes, fetch current docs with Context7 (resolve-library-id then query-docs) or official vendor web docs before coding, record the syntax/limits/security guidance as high-priority TriWiki claims in ${policy.memory_path}, run "${policy.refresh_command}", then "${policy.validate_command}". Treat these claims as higher priority than model-memory defaults. Examples include Supabase publishable/secret keys replacing legacy anon/service_role guidance for hosted projects, Next.js 16 proxy.ts/proxy.js replacing the deprecated middleware file convention, avoiding stale webpack defaults when newer framework guidance says otherwise, and Vercel Function duration limits such as the 300s default under Fluid Compute.`;
257
+ return `Stack current-docs policy: whenever project tech stack is added or a framework/package/runtime/platform version changes, fetch current docs with Context7 (resolve-library-id then query-docs) or official vendor web docs before coding, record the syntax/limits/security guidance as high-priority TriWiki claims in ${policy.memory_path}, run "${policy.refresh_command}", then "${policy.validate_command}". Treat these claims as higher priority than model-memory defaults. Examples include Supabase publishable/secret keys replacing legacy anon and service role guidance for hosted projects, Next.js 16 proxy.ts/proxy.js replacing the deprecated middleware file convention, avoiding stale webpack defaults when newer framework guidance says otherwise, and Vercel Function duration limits such as the 300s default under Fluid Compute.`;
243
258
  }
244
259
  export function triwikiContextTrackingText(commandPrefix = 'sks') {
245
260
  const ctx = triwikiContextTracking(commandPrefix);
@@ -549,21 +564,34 @@ export const ROUTES = [
549
564
  cliEntrypoint: 'sks db scan',
550
565
  examples: ['$DB check this migration safely']
551
566
  },
567
+ {
568
+ id: 'MadDB',
569
+ command: '$MAD-DB',
570
+ mode: 'MADDB',
571
+ route: 'first-class MadDB SQL-plane execution',
572
+ description: 'Explicit one-cycle MadDB route. When invoked by $MAD-DB or sks mad-db run|exec|apply-migration, SQL-plane mutations such as CREATE, ALTER, table/schema DROP, column add/drop/rename, INSERT, UPDATE, DELETE including all-row mutations, TRUNCATE, execute_sql, and apply_migration are authorized for the bound Supabase project and must be executed with tool-result plus read-back proof. Supabase project/account/billing/credential control-plane actions remain denied.',
573
+ requiredSkills: ['mad-db', 'db-safety-guard', 'pipeline-runner', 'context7-docs', REFLECTION_SKILL_NAME, 'honest-mode'],
574
+ appSkillAliases: ['mad-db'],
575
+ lifecycle: ['explicit_invocation', 'single_mission_capability_v2', 'ephemeral_write_profile', 'tool_inventory', 'execute_sql_or_apply_migration', 'read_back_verification', 'close_and_read_only_restore', 'post_route_reflection', 'honest_mode'],
576
+ context7Policy: 'required',
577
+ reasoningPolicy: 'xhigh',
578
+ stopGate: 'mad-db-gate.json',
579
+ cliEntrypoint: 'sks mad-db run|exec|apply-migration|status|close|revoke|doctor',
580
+ examples: ['$MAD-DB public.users legacy_code 컬럼 삭제', '$MAD-DB truncate public.staging_events']
581
+ },
552
582
  {
553
583
  id: 'MadSKS',
554
584
  command: '$MAD-SKS',
555
585
  mode: 'MADSKS',
556
586
  route: 'explicit scoped permission-widening modifier',
557
- description: 'Explicit high-risk authorization modifier that can be combined with other $ commands to temporarily open approved target-project scopes such as files, shell, package installs, services, network, Computer Use/browser workflows, generated assets, file permissions, migrations, Supabase MCP DB writes, direct execute SQL, schema cleanup, and normal targeted DB writes for the active invocation, while preserving catastrophic wipe/all-row/project-management, credential-exfiltration, persistent security-weakening, and unrequested fallback safeguards.',
587
+ description: 'Explicit high-risk authorization modifier that can be combined with other $ commands to temporarily open approved target-project scopes such as files, shell, package installs, services, network, Computer Use/browser workflows, generated assets, file permissions, migrations, Supabase MCP DB writes, direct execute SQL, schema cleanup, and normal targeted DB writes for the active invocation, while preserving catastrophic wipe/all-row/project-management, credential-exfiltration, persistent security-weakening, and unrequested fallback safeguards. It is not the first-class MadDB destructive SQL-plane route.',
558
588
  requiredSkills: ['mad-sks', 'db-safety-guard', 'pipeline-runner', 'context7-docs', REFLECTION_SKILL_NAME, 'honest-mode'],
559
- dollarAliases: ['$MAD-DB'],
560
- appSkillAliases: ['mad-db'],
561
589
  lifecycle: ['explicit_invocation', 'auto_sealed_permission_scope', 'scoped_permission_override', 'catastrophic_guard', 'permission_deactivation', 'post_route_reflection', 'honest_mode'],
562
590
  context7Policy: 'required',
563
591
  reasoningPolicy: 'xhigh',
564
592
  stopGate: 'mad-sks-gate.json',
565
593
  cliEntrypoint: 'Codex App prompt route only: $MAD-SKS <task>',
566
- examples: ['$MAD-SKS $Team target project maintenance with package/service/file and DB scopes', '$DB Supabase 점검 $MAD-SKS', '$MAD-DB enable one-cycle DB break-glass only after explicit ack']
594
+ examples: ['$MAD-SKS $Team target project maintenance with package/service/file and DB scopes', '$DB Supabase 점검 $MAD-SKS']
567
595
  },
568
596
  {
569
597
  id: 'GX',
@@ -964,6 +992,8 @@ export function routeRequiresSubagents(route, prompt = '') {
964
992
  return false;
965
993
  if (route.id === 'ImageUXReview')
966
994
  return false;
995
+ if (route.id === 'MadDB')
996
+ return false;
967
997
  if (route.id === 'Research' || route.id === 'AutoResearch')
968
998
  return true;
969
999
  if (route.id === 'Goal')
@@ -996,7 +1026,7 @@ export function simpleGitOnlyRouteId(prompt = '') {
996
1026
  }
997
1027
  export function reflectionRequiredForRoute(route) {
998
1028
  const id = String(route?.id || route?.mode || route?.route || route || '').replace(/^\$/, '');
999
- return /^(team|naruto|shadowclone|shadow-clone|kagebunshin|kage-bunshin|qaloop|qa-loop|ppt|imageuxreview|image-ux-review|research|autoresearch|db|database|madsks|mad-sks|gx)$/i.test(id);
1029
+ return /^(team|naruto|shadowclone|shadow-clone|kagebunshin|kage-bunshin|qaloop|qa-loop|ppt|imageuxreview|image-ux-review|research|autoresearch|db|database|madsks|mad-sks|maddb|mad-db|gx)$/i.test(id);
1000
1030
  }
1001
1031
  export function looksLikeCodeChangingWork(prompt = '') {
1002
1032
  const text = String(prompt || '');
@@ -1038,7 +1068,7 @@ export function routeReasoning(route, prompt = '') {
1038
1068
  const base = ALLOWED_REASONING_EFFORTS.has(route?.reasoningPolicy) ? route.reasoningPolicy : 'medium';
1039
1069
  if (hasFromChatImgSignal(text))
1040
1070
  return reasoning('xhigh', 'from_chat_img_image_work_order_analysis');
1041
- if (/(?:^|\s)sks\s+--mad\b|(?:^|\s)--mad\b|\$MAD-SKS\b|\bmad-sks\b|\bmadsks\b/i.test(text))
1071
+ if (/(?:^|\s)sks\s+--mad\b|(?:^|\s)--mad\b|\$MAD-SKS\b|\$MAD-DB\b|\bmad-sks\b|\bmadsks\b|\bmad-db\b|\bmaddb\b/i.test(text))
1042
1072
  return reasoning('xhigh', 'mad_sks_or_mad_launch_default');
1043
1073
  if (route?.id === 'Team' || route?.id === 'Naruto')
1044
1074
  return teamRouteReasoning(text);
@@ -35,7 +35,8 @@ export function computeTriWikiAffectedGraph(input) {
35
35
  const gatePacks = new Set();
36
36
  for (const impact of selected)
37
37
  gatePacks.add(impact.gate_pack);
38
- const proofLookup = selected.map((impact) => {
38
+ const includeProofLookup = input.includeProofLookup !== false;
39
+ const proofLookup = includeProofLookup ? selected.map((impact) => {
39
40
  const cacheKey = computeTriWikiCacheKey({
40
41
  root: input.root,
41
42
  id: impact.gate_id,
@@ -46,7 +47,7 @@ export function computeTriWikiAffectedGraph(input) {
46
47
  });
47
48
  const hit = readReusableTriWikiProofCard({ root: input.root, subjectId: impact.gate_id, cacheKey: cacheKey.key });
48
49
  return { impact, hit };
49
- });
50
+ }) : [];
50
51
  const reusedProofs = proofLookup
51
52
  .filter((row) => row.hit.hit && row.hit.card && row.hit.path)
52
53
  .map((row) => ({ gate_id: row.impact.gate_id, proof_id: row.hit.card.proof_id, path: row.hit.path }))
@@ -1,2 +1,2 @@
1
- export const PACKAGE_VERSION = '4.1.1';
1
+ export const PACKAGE_VERSION = '4.2.1';
2
2
  //# sourceMappingURL=version.js.map
@@ -115,7 +115,11 @@ function renderTelemetrySlotRows(snapshot) {
115
115
  });
116
116
  }
117
117
  function isMadDbActive(capability) {
118
- if (!capability || capability.enabled !== true || capability.consumed === true)
118
+ if (!capability)
119
+ return false;
120
+ if (capability.schema === 'sks.mad-db-capability.v2' && !['transport_ready', 'active'].includes(String(capability.status || '')))
121
+ return false;
122
+ if (capability.schema !== 'sks.mad-db-capability.v2' && (capability.enabled !== true || capability.consumed === true))
119
123
  return false;
120
124
  const expires = Date.parse(capability.expires_at || '');
121
125
  return Number.isFinite(expires) && expires > Date.now();
@@ -8,6 +8,7 @@ import { sourceSnapshot } from './lib/ensure-dist-fresh.js';
8
8
  const root = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
9
9
  const distRoot = path.join(root, 'dist');
10
10
  const issues = [];
11
+ const contractOnlyMarker = 'contract' + '_only';
11
12
  if (!fs.existsSync(distRoot))
12
13
  issues.push('dist_missing');
13
14
  requiredFile('dist/bin/sks.js');
@@ -63,8 +64,8 @@ if (fs.existsSync(distRoot)) {
63
64
  if (!rel.endsWith('.js'))
64
65
  continue;
65
66
  const text = fs.readFileSync(file, 'utf8');
66
- if (text.includes('contract_only'))
67
- issues.push(`contract_only:${rel}`);
67
+ if (text.includes(contractOnlyMarker))
68
+ issues.push(`${contractOnlyMarker}:${rel}`);
68
69
  if (/from\s+['"][^'"]+\.mjs['"]|import\(\s*['"][^'"]+\.mjs['"]\s*\)/.test(text)) {
69
70
  issues.push(`imports_mjs:${rel}`);
70
71
  }
@@ -8,11 +8,12 @@ const manifest = parity.manifest;
8
8
  const dep = pkg.dependencies?.['@openai/codex-sdk'];
9
9
  const lockSdk = lock.packages?.['node_modules/@openai/codex-sdk']?.version;
10
10
  const lockCli = lock.packages?.['node_modules/@openai/codex']?.version;
11
+ const lockRootVersion = lock.packages?.['']?.version || lock.version;
11
12
  assertGate(parity.ok, 'Codex release manifest TS/JSON parity must hold', parity);
12
13
  assertGate(dep === manifest.sdkVersion, 'package.json must pin @openai/codex-sdk exactly to manifest sdkVersion', { dep, sdkVersion: manifest.sdkVersion });
13
14
  assertGate(lockSdk === manifest.sdkVersion, 'package-lock must resolve @openai/codex-sdk to manifest sdkVersion', { lockSdk, sdkVersion: manifest.sdkVersion });
14
15
  assertGate(lockCli === manifest.requiredCliVersion, 'package-lock must resolve @openai/codex to manifest requiredCliVersion', { lockCli, requiredCliVersion: manifest.requiredCliVersion });
15
- assertGate(pkg.version === '4.1.1', 'package version must be 4.1.1', { version: pkg.version });
16
+ assertGate(pkg.version === lockRootVersion, 'package version must match package-lock root version', { version: pkg.version, lockRootVersion });
16
17
  emitGate('codex:0142:manifest', {
17
18
  manifest_sha256: parity.manifest_sha256,
18
19
  target_tag: manifest.targetTag,
@@ -10,6 +10,7 @@ const required = [
10
10
  'codex-control:thread-registry',
11
11
  'codex-control:side-effect-scope',
12
12
  'codex-control:empty-result-retry',
13
+ 'codex-control:model-capacity-fallback',
13
14
  'codex-control:stream-idle-watchdog',
14
15
  'codex-control:tool-call-sequence-repair',
15
16
  'codex-control:keepalive-no-cot-leak'
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env node
2
+ // @ts-nocheck
3
+ import { assertGate, emitGate, importDist, readText } from './sks-1-18-gate-lib.js';
4
+ const mod = await importDist('core/codex-control/codex-reliability-shield.js');
5
+ let attempts = 0;
6
+ const seenAttempts = [];
7
+ const result = await mod.runWithCodexReliabilityShield(baseTask(), async (attempt) => {
8
+ attempts += 1;
9
+ seenAttempts.push(attempt);
10
+ return {
11
+ ok: false,
12
+ sdkThreadId: '',
13
+ sdkRunId: null,
14
+ events: [{ type: 'turn.failed', message: 'Selected model is at capacity. Please try a different model.' }],
15
+ finalResponse: '',
16
+ structuredOutput: null,
17
+ blockers: ['Selected model is at capacity. Please try a different model.']
18
+ };
19
+ });
20
+ assertGate(attempts === 1, 'model capacity must not retry with downgraded pressure', { attempts, seenAttempts, result });
21
+ assertGate(result.reliabilityShield.ok === false, 'terminal model capacity must remain visible as a blocker', result.reliabilityShield);
22
+ assertGate(result.reliabilityShield.retry_count === 0, 'capacity retry count must stay zero', result.reliabilityShield);
23
+ assertGate(result.reliabilityShield.model_capacity_retry_count === 0, 'model capacity retry must not be counted', result.reliabilityShield);
24
+ assertGate(result.reliabilityShield.selected_model_capacity_fallback === false, 'capacity fallback flag must not be selected', result.reliabilityShield);
25
+ assertGate(result.reliabilityShield.attempts[0].retryable === false, 'capacity attempt must be terminal', result.reliabilityShield.attempts[0]);
26
+ assertGate(result.reliabilityShield.attempts[0].retry_reason === null, 'capacity retry reason must stay null', result.reliabilityShield.attempts[0]);
27
+ assertGate(result.reliabilityShield.attempts[0].blockers.includes('codex_model_capacity_unavailable'), 'capacity blocker must be explicit', result.reliabilityShield.attempts[0]);
28
+ assertGate(mod.isCodexModelCapacityError({ blockers: ['Selected model is at capacity. Please try a different model.'] }, []) === true, 'capacity classifier must recognize common Codex error text');
29
+ const runnerSource = readText('src/core/codex-control/codex-task-runner.ts');
30
+ assertGate(!runnerSource.includes("capacity_fallback_service_tier: 'standard'"), 'capacity fallback must not force standard service tier');
31
+ assertGate(!runnerSource.includes("capacity_fallback_reasoning_effort: 'low'"), 'capacity fallback must not force low reasoning');
32
+ assertGate(!runnerSource.includes('SKS_CODEX_CAPACITY_FALLBACK_MODEL'), 'capacity fallback model override must be removed');
33
+ emitGate('codex-control:model-capacity-fallback', {
34
+ attempts,
35
+ retry_count: result.reliabilityShield.retry_count,
36
+ model_capacity_retry_count: result.reliabilityShield.model_capacity_retry_count
37
+ });
38
+ function baseTask() {
39
+ return {
40
+ route: '$Agent',
41
+ tier: 'worker',
42
+ missionId: 'M-model-capacity-fallback',
43
+ cwd: process.cwd(),
44
+ prompt: 'model capacity fallback fixture',
45
+ outputSchemaId: 'sks.agent-worker-result.v1',
46
+ outputSchema: {},
47
+ sandboxPolicy: 'read-only',
48
+ requestedScopeContract: { read_only: true },
49
+ reliabilityPolicy: { maxEmptyResultRetries: 1, idleTimeoutMs: 5000, timeoutClass: 'short' },
50
+ mutationLedgerRoot: process.cwd()
51
+ };
52
+ }
53
+ //# sourceMappingURL=codex-control-model-capacity-fallback-check.js.map
@@ -114,11 +114,17 @@ const ALLOWLIST = [
114
114
  reason: 'migration journal writes hashes and rollback metadata, not raw secret config values',
115
115
  expires: '3.2.0'
116
116
  },
117
+ {
118
+ file: 'src/core/mad-db/mad-db-runtime-profile.ts',
119
+ pattern: /codex-mad-db\.config\.toml|writeTextAtomic/,
120
+ reason: 'MAD-DB runtime profile writes only a mission-local temporary Codex profile and verifies read-only restoration on close',
121
+ expires: '4.3.0'
122
+ },
117
123
  {
118
124
  file: 'src/core/providers/glm/naruto/glm-naruto-trace.ts',
119
125
  pattern: /mission-result\.json|sanitizeArtifact/,
120
126
  reason: 'GLM Naruto trace writer persists sanitized mission-result proof artifacts, not raw env secret files',
121
- expires: '4.2.0'
127
+ expires: '4.3.0'
122
128
  }
123
129
  ];
124
130
  const sources = listSourceFiles().map((file) => ({