sneakoscope 4.1.1 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -10
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/bin/sks.js +1 -1
- package/dist/cli/command-registry.js +1 -1
- package/dist/core/auto-review.js +1 -1
- package/dist/core/codex-control/codex-app-server-v2-client.js +86 -2
- package/dist/core/codex-control/codex-reliability-shield.js +26 -5
- package/dist/core/codex-control/codex-task-runner.js +7 -1
- package/dist/core/codex-control/model-call-concurrency.js +1 -1
- package/dist/core/commands/mad-db-command.js +146 -51
- package/dist/core/commands/mad-sks-command.js +15 -31
- package/dist/core/commands/qa-loop-command.js +23 -7
- package/dist/core/db-safety.js +35 -37
- package/dist/core/doctor/supabase-mcp-repair.js +2 -2
- package/dist/core/feature-registry.js +1 -1
- package/dist/core/fsx.js +1 -1
- package/dist/core/hooks-runtime.js +1 -1
- package/dist/core/init.js +5 -4
- package/dist/core/mad-db/mad-db-capability.js +203 -74
- package/dist/core/mad-db/mad-db-coordinator.js +287 -0
- package/dist/core/mad-db/mad-db-executor.js +156 -0
- package/dist/core/mad-db/mad-db-ledger.js +1 -1
- package/dist/core/mad-db/mad-db-lock.js +40 -0
- package/dist/core/mad-db/mad-db-operation-store.js +140 -0
- package/dist/core/mad-db/mad-db-policy-resolver.js +42 -22
- package/dist/core/mad-db/mad-db-policy.js +195 -0
- package/dist/core/mad-db/mad-db-postconditions.js +30 -0
- package/dist/core/mad-db/mad-db-recovery.js +27 -0
- package/dist/core/mad-db/mad-db-result-lifecycle.js +31 -102
- package/dist/core/mad-db/mad-db-runtime-profile.js +121 -0
- package/dist/core/mad-db/mad-db-target.js +64 -0
- package/dist/core/managed-assets/managed-assets-manifest.js +1 -1
- package/dist/core/pipeline-internals/runtime-core.js +40 -0
- package/dist/core/providers/glm/bench/glm-benchmark-types.js +1 -1
- package/dist/core/qa-loop/qa-app-server-driver.js +134 -0
- package/dist/core/qa-loop/qa-contract-v2.js +231 -0
- package/dist/core/qa-loop/qa-gate-v2.js +132 -0
- package/dist/core/qa-loop/qa-runtime-artifacts.js +53 -0
- package/dist/core/qa-loop/qa-surface-router.js +114 -0
- package/dist/core/qa-loop/qa-types.js +18 -0
- package/dist/core/qa-loop.js +83 -26
- package/dist/core/release/gate-manifest.js +1 -0
- package/dist/core/release/release-gate-dag.js +6 -5
- package/dist/core/release/sla-scheduler.js +1 -1
- package/dist/core/routes.js +42 -12
- package/dist/core/triwiki/triwiki-affected-graph.js +3 -2
- package/dist/core/version.js +1 -1
- package/dist/core/zellij/zellij-slot-column-anchor.js +5 -1
- package/dist/scripts/check-dist-runtime.js +3 -2
- package/dist/scripts/codex-0142-manifest-check.js +2 -1
- package/dist/scripts/codex-control-all-pipelines-check.js +1 -0
- package/dist/scripts/codex-control-model-capacity-fallback-check.js +53 -0
- package/dist/scripts/config-managed-merge-callsite-coverage-check.js +7 -1
- package/dist/scripts/loop-directive-check-lib.js +78 -1
- package/dist/scripts/mad-db-capability-check.js +13 -2
- package/dist/scripts/mad-db-command-check.js +7 -5
- package/dist/scripts/mad-db-hook-idempotency-check.js +21 -0
- package/dist/scripts/mad-db-ledger-check.js +2 -1
- package/dist/scripts/mad-db-lifecycle-hook-decision-check.js +5 -4
- package/dist/scripts/mad-db-mad-command-check.js +29 -16
- package/dist/scripts/mad-db-mcp-result-lifecycle-check.js +11 -10
- package/dist/scripts/mad-db-one-cycle-bounded-check.js +15 -18
- package/dist/scripts/mad-db-one-cycle-consumption-check.js +3 -3
- package/dist/scripts/mad-db-operation-lifecycle-blackbox.js +9 -9
- package/dist/scripts/mad-db-operation-lifecycle-ledger-check.js +6 -6
- package/dist/scripts/mad-db-parallel-lifecycle-check.js +24 -0
- package/dist/scripts/mad-db-policy-v2-check.js +20 -0
- package/dist/scripts/mad-db-priority-resolver-check.js +5 -5
- package/dist/scripts/mad-db-real-supabase-e2e.js +166 -0
- package/dist/scripts/mad-db-route-identity-check.js +28 -0
- package/dist/scripts/mad-db-runtime-profile-lifecycle-check.js +24 -0
- package/dist/scripts/mad-db-safety-conflict-matrix-check.js +3 -3
- package/dist/scripts/mad-db-skill-policy-snapshot-check.js +15 -0
- package/dist/scripts/qa-loop-app-server-driver-check.js +74 -0
- package/dist/scripts/qa-loop-surface-router-check.js +49 -0
- package/dist/scripts/release-check-dynamic-execute.js +1 -1
- package/dist/scripts/release-dag-full-coverage-check.js +6 -0
- package/dist/scripts/release-triwiki-first-runner-blackbox.js +5 -1
- package/dist/scripts/runtime-ts-rust-boundary-check.js +1 -1
- package/dist/scripts/triwiki-affected-graph-check.js +2 -2
- package/package.json +18 -5
- package/schemas/mad-db/mad-db-capability.schema.json +92 -19
package/dist/core/qa-loop.js
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { exists, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, PACKAGE_VERSION } from './fsx.js';
|
|
3
|
-
import { CODEX_APP_IMAGE_GENERATION_DOC_URL, CODEX_IMAGEGEN_REQUIRED_POLICY, CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE, CODEX_WEB_VERIFICATION_POLICY, evidenceMentionsForbiddenBrowserAutomation, evidenceMentionsForbiddenWebComputerUseEvidence } from './routes.js';
|
|
3
|
+
import { CODEX_APP_IMAGE_GENERATION_DOC_URL, CODEX_CHROME_EXTENSION_EVIDENCE_SOURCE, CODEX_COMPUTER_USE_EVIDENCE_SOURCE, CODEX_IMAGEGEN_REQUIRED_POLICY, CODEX_IN_APP_BROWSER_EVIDENCE_SOURCE, CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE, CODEX_WEB_VERIFICATION_POLICY, evidenceMentionsForbiddenBrowserAutomation, evidenceMentionsForbiddenWebComputerUseEvidence } from './routes.js';
|
|
4
4
|
import { appendAgentLedgerEvent, initializeAgentCentralLedger } from './agents/agent-central-ledger.js';
|
|
5
5
|
import { resolveCodexAppExecutionProfile } from './codex-app/codex-app-execution-profile.js';
|
|
6
6
|
import { resolveCodexNativeInvocationPlan } from './codex-native/codex-native-invocation-router.js';
|
|
7
7
|
import { imageDimensions, sha256File } from './wiki-image/image-hash.js';
|
|
8
|
+
import { initializeQaRuntimeArtifacts } from './qa-loop/qa-runtime-artifacts.js';
|
|
9
|
+
import { evaluateQaGateV2 } from './qa-loop/qa-gate-v2.js';
|
|
10
|
+
import { DEFAULT_QA_MAX_CYCLES, QA_GATE_V2_ARTIFACT, QA_SURFACE_SELECTION_ARTIFACT } from './qa-loop/qa-types.js';
|
|
8
11
|
export const QA_LOOP_ROUTE = 'QALoop';
|
|
9
12
|
export const QA_LOOP_VISUAL_EVIDENCE_ARTIFACT = 'qa-loop/visual-evidence.json';
|
|
10
13
|
const QA_REPORT_SUFFIX = 'qa-report.md';
|
|
11
|
-
const
|
|
14
|
+
const UI_SURFACE_ROUTER_ACK = 'use_codex_surface_router_browser_chrome_computer_no_synthetic_evidence';
|
|
15
|
+
const LEGACY_UI_CHROME_EXTENSION_FIRST_ACK = 'use_codex_chrome_extension_first_no_computer_use_for_web_ui_or_mark_unverified';
|
|
12
16
|
const GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK = 'yes_gpt_image_2_annotated_review';
|
|
13
17
|
const IMAGE_FILE_RE = /\.(png|jpe?g|webp|gif)$/i;
|
|
14
18
|
export const QA_NATIVE_AGENT_PERSONAS = Object.freeze([
|
|
@@ -166,7 +170,7 @@ export function inferQaLoopAnswers(prompt = '') {
|
|
|
166
170
|
'검증하지 못한 UI/API 범위는 통과로 주장하지 않고 QA 리포트에 남긴다.'
|
|
167
171
|
];
|
|
168
172
|
if (isUiScope(scope))
|
|
169
|
-
acceptance.push('UI E2E 통과 증거는
|
|
173
|
+
acceptance.push('UI E2E 통과 증거는 surface router가 고른 @Browser/@Chrome/@Computer 실제 action·observation ledger와 필요한 screenshot/hash를 기록해야 한다.');
|
|
170
174
|
if (wantsGptImage2Review)
|
|
171
175
|
acceptance.push('gpt-image-2 annotated review image가 필요한 경우 실제 Codex App $imagegen/gpt-image-2 출력 파일 path, sha256, model, provider를 기록해야 한다.');
|
|
172
176
|
return {
|
|
@@ -181,17 +185,17 @@ export function inferQaLoopAnswers(prompt = '') {
|
|
|
181
185
|
EXTERNAL_SIDE_EFFECT_POLICY: 'block_all_external_side_effects',
|
|
182
186
|
...login,
|
|
183
187
|
CREDENTIAL_STORAGE_ACK: 'never_store_credentials_in_artifacts_or_wiki',
|
|
184
|
-
UI_CHROME_EXTENSION_ACK:
|
|
188
|
+
UI_CHROME_EXTENSION_ACK: UI_SURFACE_ROUTER_ACK,
|
|
185
189
|
QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED: wantsGptImage2Review ? GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK : 'not_required',
|
|
186
190
|
TEAM_MODE_ALLOWED: 'no_parent_only',
|
|
187
|
-
MAX_QA_CYCLES:
|
|
191
|
+
MAX_QA_CYCLES: String(DEFAULT_QA_MAX_CYCLES),
|
|
188
192
|
ACCEPTANCE_CRITERIA: acceptance,
|
|
189
193
|
NON_GOALS: [
|
|
190
194
|
'결제, 실제 이메일/SMS 발송, 관리자 권한 변경, 데이터 삭제, 프로덕션 데이터 변경은 테스트하지 않는다.'
|
|
191
195
|
],
|
|
192
196
|
RISK_BOUNDARY: [
|
|
193
197
|
'실제 사용자 데이터, 인증 권한, 결제, 메시지 발송, 웹훅, 외부 서비스 상태를 생성/수정/삭제하지 않는다.',
|
|
194
|
-
'Codex
|
|
198
|
+
'선택된 Codex App visual surface의 실제 action·observation evidence가 없으면 UI 검증 완료로 주장하지 않는다.',
|
|
195
199
|
'로그인이 필요하지만 임시 테스트 자격증명이 없으면 인증 구간은 차단/미검증으로 기록한다.'
|
|
196
200
|
],
|
|
197
201
|
MID_RUN_UNKNOWN_POLICY: ['preserve_existing_behavior', 'defer_optional_scope', 'block_only_if_no_safe_path']
|
|
@@ -245,7 +249,7 @@ export function qaLoopQuestionSlots() {
|
|
|
245
249
|
{ id: 'TEMP_TEST_CREDENTIALS_READY', question: 'If login is required, are test-only credentials ready to provide ephemerally during the run?', required: true, type: 'enum', options: ['not_required', 'yes_temp_only', 'no_block_authenticated_tests'] },
|
|
246
250
|
{ id: 'TEST_CREDENTIALS_RUNTIME_SOURCE', question: 'If login is required, how will test-only credentials be provided without saving the values?', required: true, type: 'enum', options: ['not_required', 'ephemeral_chat_only', 'environment_variables', 'secret_manager'] },
|
|
247
251
|
{ id: 'CREDENTIAL_STORAGE_ACK', question: 'Acknowledge credential handling policy.', required: true, type: 'enum', options: ['never_store_credentials_in_artifacts_or_wiki'] },
|
|
248
|
-
{ id: 'UI_CHROME_EXTENSION_ACK', question: 'Acknowledge UI E2E evidence policy:
|
|
252
|
+
{ id: 'UI_CHROME_EXTENSION_ACK', question: 'Acknowledge UI E2E evidence policy: QA-LOOP routes local/public web to @Browser, signed-in web to @Chrome, native/cross-app GUI to @Computer, and never treats synthetic artifacts as real proof.', required: true, type: 'enum', options: [UI_SURFACE_ROUTER_ACK, LEGACY_UI_CHROME_EXTENSION_FIRST_ACK] },
|
|
249
253
|
{ id: 'TEAM_MODE_ALLOWED', question: 'May QA-LOOP use Team/subagents where useful?', required: true, type: 'enum', options: ['yes_parallel_where_safe', 'no_parent_only'] },
|
|
250
254
|
{ id: 'MAX_QA_CYCLES', question: 'How many no-question QA cycles are allowed before pausing?', required: true, type: 'string' },
|
|
251
255
|
{ id: 'ACCEPTANCE_CRITERIA', question: 'List the QA completion criteria.', required: true, type: 'array_or_string' },
|
|
@@ -269,8 +273,8 @@ export function validateQaLoopAnswers(schema, answers = {}) {
|
|
|
269
273
|
errors.push({ slot: 'QA_MUTATION_POLICY', error: 'production_deployed_qa_is_read_only_smoke_only' });
|
|
270
274
|
if (answers.DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED !== 'never')
|
|
271
275
|
errors.push({ slot: 'DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED', error: 'destructive_deployed_tests_never_allowed' });
|
|
272
|
-
if (isUiScope(answers.QA_SCOPE) && answers.UI_CHROME_EXTENSION_ACK
|
|
273
|
-
errors.push({ slot: 'UI_CHROME_EXTENSION_ACK', error: '
|
|
276
|
+
if (isUiScope(answers.QA_SCOPE) && ![UI_SURFACE_ROUTER_ACK, LEGACY_UI_CHROME_EXTENSION_FIRST_ACK].includes(answers.UI_CHROME_EXTENSION_ACK))
|
|
277
|
+
errors.push({ slot: 'UI_CHROME_EXTENSION_ACK', error: 'ui_e2e_requires_codex_surface_router_ack' });
|
|
274
278
|
if (answers.LOGIN_REQUIRED === 'yes' && !['yes_temp_only', 'no_block_authenticated_tests'].includes(answers.TEMP_TEST_CREDENTIALS_READY))
|
|
275
279
|
errors.push({ slot: 'TEMP_TEST_CREDENTIALS_READY', error: 'authenticated_tests_require_ephemeral_test_credentials_or_must_be_blocked' });
|
|
276
280
|
if (answers.LOGIN_REQUIRED === 'yes' && answers.TEMP_TEST_CREDENTIALS_READY === 'yes_temp_only' && answers.TEST_CREDENTIALS_RUNTIME_SOURCE === 'not_required')
|
|
@@ -322,6 +326,7 @@ export function defaultQaGate(contract = {}, opts = {}) {
|
|
|
322
326
|
const gptImage2ReviewRequired = qaGptImage2AnnotatedReviewRequired(contract, contract.prompt);
|
|
323
327
|
const reportFile = opts.reportFile || qaReportFilename();
|
|
324
328
|
const corrective = a.QA_CORRECTIVE_POLICY !== 'report_only_no_code_changes';
|
|
329
|
+
const selectedSurface = opts.qaRuntime?.surface?.selected_surface || null;
|
|
325
330
|
return {
|
|
326
331
|
passed: false,
|
|
327
332
|
clarification_contract_sealed: Boolean(contract.sealed_hash),
|
|
@@ -333,6 +338,15 @@ export function defaultQaGate(contract = {}, opts = {}) {
|
|
|
333
338
|
deployed_destructive_tests_blocked: a.TARGET_ENVIRONMENT === 'local_dev_server' || a.DESTRUCTIVE_DEPLOYED_TESTS_ALLOWED === 'never',
|
|
334
339
|
credentials_not_persisted: false,
|
|
335
340
|
ui_e2e_required: uiRequired,
|
|
341
|
+
qa_contract_v2_required: opts.qaRuntime ? true : false,
|
|
342
|
+
qa_surface_selection_artifact: opts.qaRuntime ? QA_SURFACE_SELECTION_ARTIFACT : null,
|
|
343
|
+
qa_gate_v2_artifact: opts.qaRuntime ? QA_GATE_V2_ARTIFACT : null,
|
|
344
|
+
qa_surface_selected: selectedSurface,
|
|
345
|
+
ui_selected_surface: selectedSurface,
|
|
346
|
+
ui_live_surface_preflight_passed: !uiRequired,
|
|
347
|
+
ui_real_action_count: 0,
|
|
348
|
+
ui_observation_count: 0,
|
|
349
|
+
same_flow_replay_complete: false,
|
|
336
350
|
chrome_extension_preflight_passed: !uiRequired,
|
|
337
351
|
ui_chrome_extension_evidence: !uiRequired,
|
|
338
352
|
ui_computer_use_evidence: false,
|
|
@@ -385,6 +399,13 @@ export async function writeQaLoopArtifacts(dir, mission, contract) {
|
|
|
385
399
|
const root = missionRootFromDir(dir);
|
|
386
400
|
const executionProfile = root ? await resolveCodexAppExecutionProfile({ root }).catch(() => null) : null;
|
|
387
401
|
const codexNativeInvocation = root ? await resolveQaCodexNativeInvocation(root, mission.id).catch(() => null) : null;
|
|
402
|
+
const qaRuntime = await initializeQaRuntimeArtifacts(dir, {
|
|
403
|
+
...contract,
|
|
404
|
+
prompt: mission.prompt || contract.prompt,
|
|
405
|
+
mission_id: mission.id || contract.mission_id
|
|
406
|
+
}, {
|
|
407
|
+
missionId: mission.id || contract.mission_id || null
|
|
408
|
+
}).catch(() => null);
|
|
388
409
|
if (executionProfile)
|
|
389
410
|
await writeJsonAtomic(path.join(dir, 'qa-loop', 'execution-profile.json'), executionProfile).catch(() => undefined);
|
|
390
411
|
if (codexNativeInvocation)
|
|
@@ -397,11 +418,20 @@ export async function writeQaLoopArtifacts(dir, mission, contract) {
|
|
|
397
418
|
codex_app_execution_profile: executionProfile ? compactExecutionProfile(executionProfile) : null,
|
|
398
419
|
codex_native_invocation: codexNativeInvocation,
|
|
399
420
|
target: { scope: a.QA_SCOPE, environment: a.TARGET_ENVIRONMENT, base_url: a.TARGET_BASE_URL, api_base_url: a.API_BASE_URL },
|
|
400
|
-
|
|
421
|
+
qa_runtime_v2: qaRuntime ? {
|
|
422
|
+
contract_artifact: 'qa-loop/qa-contract-v2.json',
|
|
423
|
+
surface_selection_artifact: QA_SURFACE_SELECTION_ARTIFACT,
|
|
424
|
+
selected_surface: qaRuntime.surface.selected_surface,
|
|
425
|
+
journey_graph_artifact: 'qa-loop/qa-journey-graph.json',
|
|
426
|
+
gate_artifact: QA_GATE_V2_ARTIFACT
|
|
427
|
+
} : null,
|
|
428
|
+
safety: { mutation_policy: a.QA_MUTATION_POLICY, deployed_destructive_tests_allowed: 'never', credentials: 'temp_only_never_saved', ui_evidence: 'codex_surface_router_live_action_required_for_ui_e2e', visual_review: 'gpt_image_2_annotated_review_required_when_contract_requests_it' },
|
|
401
429
|
checklist
|
|
402
430
|
});
|
|
403
431
|
await writeJsonAtomic(path.join(dir, QA_LOOP_VISUAL_EVIDENCE_ARTIFACT), buildQaLoopVisualEvidenceArtifact(mission, contract));
|
|
404
|
-
await writeJsonAtomic(path.join(dir, 'qa-gate.json'), defaultQaGate(contract, { reportFile, executionProfile, codexNativeInvocation }));
|
|
432
|
+
await writeJsonAtomic(path.join(dir, 'qa-gate.json'), defaultQaGate(contract, { reportFile, executionProfile, codexNativeInvocation, qaRuntime }));
|
|
433
|
+
if (qaRuntime)
|
|
434
|
+
await evaluateQaGateV2(dir).catch(() => undefined);
|
|
405
435
|
await writeTextAtomic(path.join(dir, reportFile), qaReportTemplate(mission, contract, checklist));
|
|
406
436
|
return { checklist_count: checklist.length, report_file: reportFile };
|
|
407
437
|
}
|
|
@@ -441,8 +471,18 @@ export async function ensureQaLoopVisualEvidenceContract(dir, mission = {}, cont
|
|
|
441
471
|
}
|
|
442
472
|
export async function evaluateQaGate(dir) {
|
|
443
473
|
const gate = await readJson(path.join(dir, 'qa-gate.json'), {});
|
|
474
|
+
const surfaceSelection = await readJson(path.join(dir, QA_SURFACE_SELECTION_ARTIFACT), null);
|
|
475
|
+
const selectedSurface = gate.ui_selected_surface || gate.qa_surface_selected || surfaceSelection?.selected_surface || null;
|
|
476
|
+
const expectedEvidenceSource = evidenceSourceForSurface(selectedSurface);
|
|
477
|
+
const gateV2 = gate.qa_contract_v2_required === true ? await evaluateQaGateV2(dir).catch((err) => ({
|
|
478
|
+
passed: false,
|
|
479
|
+
blockers: [`qa_gate_v2_evaluation_failed:${err?.message || String(err)}`],
|
|
480
|
+
unverified: []
|
|
481
|
+
})) : null;
|
|
444
482
|
const reportFile = qaReportFileFromGate(gate);
|
|
445
483
|
const reasons = [];
|
|
484
|
+
if (gateV2 && gateV2.passed !== true)
|
|
485
|
+
reasons.push(...(gateV2.blockers || []));
|
|
446
486
|
for (const key of ['clarification_contract_sealed', 'qa_report_written', 'qa_ledger_complete', 'checklist_completed', 'safety_reviewed', 'deployed_destructive_tests_blocked', 'credentials_not_persisted', 'honest_mode_complete']) {
|
|
447
487
|
if (gate[key] !== true)
|
|
448
488
|
reasons.push(`${key}_missing`);
|
|
@@ -460,17 +500,22 @@ export async function evaluateQaGate(dir) {
|
|
|
460
500
|
if (gate.unsafe_external_side_effects === true)
|
|
461
501
|
reasons.push('unsafe_external_side_effects');
|
|
462
502
|
if (gate.ui_e2e_required === true) {
|
|
463
|
-
if (
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
503
|
+
if (!selectedSurface || selectedSurface === 'codex_chrome_extension') {
|
|
504
|
+
if (gate.chrome_extension_preflight_passed !== true)
|
|
505
|
+
reasons.push('chrome_extension_preflight_missing');
|
|
506
|
+
if (gate.ui_chrome_extension_evidence !== true)
|
|
507
|
+
reasons.push('ui_chrome_extension_evidence_missing');
|
|
508
|
+
}
|
|
509
|
+
else if (gate.ui_live_surface_preflight_passed !== true) {
|
|
510
|
+
reasons.push('ui_live_surface_preflight_missing');
|
|
511
|
+
}
|
|
512
|
+
if (gate.ui_computer_use_evidence === true && selectedSurface !== 'codex_computer_use')
|
|
468
513
|
reasons.push('ui_computer_use_evidence_forbidden_for_web');
|
|
469
|
-
if (gate.ui_evidence_source !==
|
|
470
|
-
reasons.push(
|
|
514
|
+
if (expectedEvidenceSource && gate.ui_evidence_source !== expectedEvidenceSource)
|
|
515
|
+
reasons.push(`ui_evidence_source_not_${expectedEvidenceSource}`);
|
|
471
516
|
if (evidenceMentionsForbiddenBrowserAutomation({ evidence: gate.evidence, notes: gate.notes, ui_evidence_source: gate.ui_evidence_source }))
|
|
472
517
|
reasons.push('forbidden_browser_automation_evidence');
|
|
473
|
-
if (evidenceMentionsForbiddenWebComputerUseEvidence({ evidence: gate.evidence, ui_evidence_source: gate.ui_evidence_source }))
|
|
518
|
+
if (selectedSurface !== 'codex_computer_use' && evidenceMentionsForbiddenWebComputerUseEvidence({ evidence: gate.evidence, ui_evidence_source: gate.ui_evidence_source }))
|
|
474
519
|
reasons.push('computer_use_web_evidence_forbidden');
|
|
475
520
|
reasons.push(...await missingQaLoopVisualEvidence(dir, gate));
|
|
476
521
|
}
|
|
@@ -502,7 +547,7 @@ export async function evaluateQaGate(dir) {
|
|
|
502
547
|
reasons.push('qa_ledger_missing');
|
|
503
548
|
const uniqueReasons = [...new Set(reasons)];
|
|
504
549
|
const passed = gate.passed === true && uniqueReasons.length === 0;
|
|
505
|
-
const result = { checked_at: nowIso(), passed, reasons: uniqueReasons, gate };
|
|
550
|
+
const result = { checked_at: nowIso(), passed, reasons: uniqueReasons, gate, gate_v2: gateV2 };
|
|
506
551
|
await writeJsonAtomic(path.join(dir, 'qa-gate.evaluated.json'), result);
|
|
507
552
|
return result;
|
|
508
553
|
}
|
|
@@ -592,9 +637,9 @@ CONTRACT:
|
|
|
592
637
|
${JSON.stringify(contract, null, 2)}
|
|
593
638
|
${imageContractText}${appHandoffText}${executionProfileText}
|
|
594
639
|
VISUAL EVIDENCE CONTRACT:
|
|
595
|
-
- For
|
|
596
|
-
- If decision-contract.json answers set QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED=${GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK}, use Codex App $imagegen/gpt-image-2 (${CODEX_APP_IMAGE_GENERATION_DOC_URL}) to produce a real generated annotated review image from the
|
|
597
|
-
- Do not substitute prose-only critique, Playwright/Selenium/Puppeteer
|
|
640
|
+
- For UI QA, do not mark live UI evidence true unless qa-loop/qa-surface-selection.json selected the correct @Browser/@Chrome/@Computer surface and action/observation ledgers record real user-like actions.
|
|
641
|
+
- If decision-contract.json answers set QA_VISUAL_REVIEW_IMAGEGEN_REQUIRED=${GPT_IMAGE_2_ANNOTATED_REVIEW_REQUIRED_ACK}, use Codex App $imagegen/gpt-image-2 (${CODEX_APP_IMAGE_GENERATION_DOC_URL}) to produce a real generated annotated review image from the selected-surface source screenshot. Record its path, sha256, model=gpt-image-2, provider=Codex App $imagegen, and source_screenshot_artifact in ${QA_LOOP_VISUAL_EVIDENCE_ARTIFACT} and qa-gate.json.
|
|
642
|
+
- Do not substitute prose-only critique, Playwright/Selenium/Puppeteer screenshots, static screenshots, plugin cache, placeholder images, fake fixtures, or direct API fallback as full UI visual evidence.
|
|
598
643
|
Previous tail:
|
|
599
644
|
${String(previous || '').slice(-2500)}
|
|
600
645
|
`;
|
|
@@ -623,7 +668,7 @@ function qaChecklist(a) {
|
|
|
623
668
|
['preflight.roles', 'Map roles, permissions, protected areas.']
|
|
624
669
|
];
|
|
625
670
|
if (qaUiRequired(a))
|
|
626
|
-
cases.push(['ui.
|
|
671
|
+
cases.push(['ui.surface_router', CODEX_WEB_VERIFICATION_POLICY], ['ui.navigation', 'Check primary navigation, deep links, back/forward, refresh, and protected routes.'], ['ui.auth', 'Check login, logout, session expiry, unauthorized access, and role-specific visibility.'], ['ui.forms', 'Check required fields, validation, disabled states, success, and failure.'], ['ui.states', 'Check loading, empty, error, retry, offline/timeout, and slow network states.'], ['ui.crud', 'Check allowed create/change flows and block forbidden destructive flows by environment.'], ['ui.responsive', 'Check desktop, tablet, mobile, overflow, long text, and keyboard focus order.'], ['ui.a11y', 'Check labels, focus traps, modals, contrast-sensitive controls, and screen-reader names.'], ['ui.visual', 'Capture evidence for meaningful UI regressions without storing secrets.']);
|
|
627
672
|
if (qaApiRequired(a))
|
|
628
673
|
cases.push(['api.health', 'Check health/version/readiness endpoints when available.'], ['api.auth', 'Check anonymous, authenticated, expired, and wrong-role access.'], ['api.contract', 'Check status codes, response shape, headers, content type, and error format.'], ['api.validation', 'Check missing, malformed, boundary, duplicate, and over-limit payloads.'], ['api.listing', 'Check pagination, sorting, filters, search, and empty results.'], ['api.mutation', 'Check allowed seeded create/change and forbid deployed destructive flows.'], ['api.idempotency', 'Check retry/idempotency behavior for safe operations.'], ['api.concurrency', 'Check stale change, conflict, and double-submit behavior.'], ['api.failure', 'Check timeout, upstream error, rate-limit, and rollback-visible failure paths.'], ['api.security', 'Check CORS, auth headers, PII redaction, and permission boundaries.']);
|
|
629
674
|
cases.push(['report.evidence', 'Record pass/fail/blocked/skipped with evidence.'], ['report.corrective_loop', 'Record fixes, rechecks, unresolved findings, deferred blockers.'], ['report.honest', 'Run Honest Mode.']);
|
|
@@ -675,6 +720,8 @@ async function missingQaLoopVisualEvidence(dir, gate = {}) {
|
|
|
675
720
|
const visual = await readJson(path.join(dir, QA_LOOP_VISUAL_EVIDENCE_ARTIFACT), null);
|
|
676
721
|
const reasons = [];
|
|
677
722
|
const uiRequired = gate.ui_e2e_required === true;
|
|
723
|
+
const selectedSurface = gate.ui_selected_surface || gate.qa_surface_selected || (gate.ui_chrome_extension_evidence === true ? 'codex_chrome_extension' : null);
|
|
724
|
+
const expectedSource = evidenceSourceForSurface(selectedSurface) || CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE;
|
|
678
725
|
if (uiRequired) {
|
|
679
726
|
const screenshot = visual?.chrome_extension_screenshot || {};
|
|
680
727
|
if (gate.ui_chrome_extension_screenshot_captured !== true && !positiveVisualStatus(screenshot.status, ['captured', 'attached', 'verified']))
|
|
@@ -690,8 +737,8 @@ async function missingQaLoopVisualEvidence(dir, gate = {}) {
|
|
|
690
737
|
else
|
|
691
738
|
reasons.push(...await imageEvidenceFileReasons(dir, screenshotPath, screenshotSha, 'ui_chrome_extension_screenshot', screenshotDims));
|
|
692
739
|
const screenshotSource = firstNonEmpty(gate.ui_chrome_extension_screenshot_source, screenshot.evidence_source, gate.ui_evidence_source);
|
|
693
|
-
if (screenshotSource !==
|
|
694
|
-
reasons.push(
|
|
740
|
+
if (screenshotSource !== expectedSource)
|
|
741
|
+
reasons.push(`ui_chrome_extension_screenshot_source_not_${expectedSource}`);
|
|
695
742
|
}
|
|
696
743
|
const review = visual?.gpt_image_2_annotated_review || {};
|
|
697
744
|
const gptImage2ReviewRequired = gate.gpt_image_2_annotated_review_required === true || review.required === true;
|
|
@@ -786,4 +833,14 @@ function positiveCount(value) {
|
|
|
786
833
|
const n = Number(value || 0);
|
|
787
834
|
return Number.isFinite(n) && n > 0;
|
|
788
835
|
}
|
|
836
|
+
function evidenceSourceForSurface(surface) {
|
|
837
|
+
const value = String(surface || '').trim();
|
|
838
|
+
if (value === 'codex_in_app_browser')
|
|
839
|
+
return CODEX_IN_APP_BROWSER_EVIDENCE_SOURCE;
|
|
840
|
+
if (value === 'codex_chrome_extension')
|
|
841
|
+
return CODEX_CHROME_EXTENSION_EVIDENCE_SOURCE;
|
|
842
|
+
if (value === 'codex_computer_use')
|
|
843
|
+
return CODEX_COMPUTER_USE_EVIDENCE_SOURCE;
|
|
844
|
+
return null;
|
|
845
|
+
}
|
|
789
846
|
//# sourceMappingURL=qa-loop.js.map
|
|
@@ -28,15 +28,16 @@ export async function runReleaseGateDag(input) {
|
|
|
28
28
|
const preset = input.preset || 'release';
|
|
29
29
|
const manifest = loadReleaseGateManifest(root);
|
|
30
30
|
const presetGates = selectReleaseGatePreset(manifest, preset);
|
|
31
|
-
const triwikiGraph = input.triwiki !== false && (preset === 'affected' || preset === 'fast' || preset === 'confidence') && input.full !== true
|
|
32
|
-
? computeTriWikiAffectedGraph({ root, tier: preset === 'fast' ? 'affected' : 'confidence', changedSince: input.changedSince || 'auto', ...(input.changedFiles ? { changedFiles: input.changedFiles } : {}) })
|
|
33
|
-
: null;
|
|
34
31
|
const affected = (preset === 'affected' || preset === 'fast' || preset === 'confidence') && input.full !== true
|
|
35
32
|
? selectAffectedReleaseGates(root, manifest, presetGates, { changedSince: input.changedSince || 'auto', ...(input.changedFiles ? { changedFiles: input.changedFiles } : {}), preset })
|
|
36
33
|
: selectAffectedReleaseGates(root, manifest, presetGates, { full: true, preset });
|
|
34
|
+
const rootReleaseSurfaceChanged = affected.selection.changed_files.some((file) => file === 'package.json' || file === 'package-lock.json' || file === 'release-gates.v2.json');
|
|
35
|
+
const triwikiGraph = input.triwiki !== false && !rootReleaseSurfaceChanged && (preset === 'affected' || preset === 'fast' || preset === 'confidence') && input.full !== true
|
|
36
|
+
? computeTriWikiAffectedGraph({ root, tier: preset === 'fast' ? 'affected' : 'confidence', changedSince: input.changedSince || 'auto', ...(input.changedFiles ? { changedFiles: input.changedFiles } : {}) })
|
|
37
|
+
: null;
|
|
37
38
|
const triwikiSelectionUsed = Boolean(triwikiGraph);
|
|
38
39
|
const triwikiConservative = Boolean(triwikiGraph?.conservative_reason);
|
|
39
|
-
const triwikiSelectedIds = new Set(triwikiGraph && !triwikiConservative ? triwikiGraph.gates :
|
|
40
|
+
const triwikiSelectedIds = new Set(triwikiGraph && !triwikiConservative ? triwikiGraph.gates : affected.gates.map((gate) => gate.id));
|
|
40
41
|
const selected = triwikiGraph
|
|
41
42
|
? presetGates.filter((gate) => triwikiSelectedIds.has(gate.id))
|
|
42
43
|
: affected.gates;
|
|
@@ -45,7 +46,7 @@ export async function runReleaseGateDag(input) {
|
|
|
45
46
|
affected.selection.mode = 'affected';
|
|
46
47
|
affected.selection.selected_gate_ids = selected.map((gate) => gate.id);
|
|
47
48
|
affected.selection.skipped_gate_ids = triwikiSkippedGates;
|
|
48
|
-
affected.selection.reasons = Object.fromEntries(selected.map((gate) => [gate.id, triwikiConservative ? `
|
|
49
|
+
affected.selection.reasons = Object.fromEntries(selected.map((gate) => [gate.id, triwikiConservative ? `triwiki_conservative_fallback:${triwikiGraph.conservative_reason}` : 'triwiki-affected']));
|
|
49
50
|
}
|
|
50
51
|
const selectedIds = new Set(selected.map((gate) => gate.id));
|
|
51
52
|
const affectedExternalSatisfiedDeps = affected.selection.mode === 'affected'
|
|
@@ -2,7 +2,7 @@ import { computeTriWikiAffectedGraph } from '../triwiki/triwiki-affected-graph.j
|
|
|
2
2
|
import { buildTriWikiSlaCertificate } from '../triwiki/triwiki-sla-certificate.js';
|
|
3
3
|
import { planExtremeParallelSchedule } from './extreme-parallel-scheduler.js';
|
|
4
4
|
export const SLA_SCHEDULER_SCHEMA = 'sks.sla-scheduler.v1';
|
|
5
|
-
export function planFiveMinuteSla(root, graph = computeTriWikiAffectedGraph({ root, tier: 'affected' }), slaMs = 300_000) {
|
|
5
|
+
export function planFiveMinuteSla(root, graph = computeTriWikiAffectedGraph({ root, tier: 'affected', includeProofLookup: false }), slaMs = 300_000) {
|
|
6
6
|
const schedule = planExtremeParallelSchedule(root, graph);
|
|
7
7
|
const certificate = buildTriWikiSlaCertificate({
|
|
8
8
|
graph,
|
package/dist/core/routes.js
CHANGED
|
@@ -32,15 +32,30 @@ export const FROM_CHAT_IMG_QA_LOOP_ARTIFACT = 'from-chat-img-qa-loop.json';
|
|
|
32
32
|
export const FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS = 5;
|
|
33
33
|
export const USAGE_TOPICS = 'install|setup|bootstrap|root|deps|zellij|tmux|auto-review|team|qa-loop|ppt|image-ux-review|computer-use|goal|fast-mode|research|db|git|codex|codex-app|codex-native|hooks|features|all-features|dfix|commit|commit-and-push|design|imagegen|dollar|context7|xai|pipeline|reasoning|guard|conflicts|versioning|eval|harness|hproof|gx|wiki|wrongness|code-structure|proof-field|skill-dream|rust';
|
|
34
34
|
export const CODEX_COMPUTER_USE_EVIDENCE_SOURCE = 'codex_computer_use';
|
|
35
|
-
export const
|
|
35
|
+
export const CODEX_IN_APP_BROWSER_EVIDENCE_SOURCE = 'codex_in_app_browser';
|
|
36
|
+
export const CODEX_CHROME_EXTENSION_EVIDENCE_SOURCE = 'codex_chrome_extension';
|
|
37
|
+
export const CODEX_WEB_VERIFICATION_EVIDENCE_SOURCE = CODEX_IN_APP_BROWSER_EVIDENCE_SOURCE;
|
|
36
38
|
export const CODEX_IMAGEGEN_EVIDENCE_SOURCE = 'codex_app_imagegen_gpt_image_2';
|
|
39
|
+
export const CODEX_IN_APP_BROWSER_DOC_URL = 'https://developers.openai.com/codex/app/browser';
|
|
37
40
|
export const CODEX_CHROME_EXTENSION_DOC_URL = 'https://developers.openai.com/codex/app/chrome-extension';
|
|
41
|
+
export const CODEX_COMPUTER_USE_DOC_URL = 'https://developers.openai.com/codex/app/computer-use';
|
|
42
|
+
export const CODEX_RECORD_REPLAY_DOC_URL = 'https://developers.openai.com/codex/record-and-replay';
|
|
43
|
+
export const CODEX_APP_SERVER_DOC_URL = 'https://developers.openai.com/codex/app-server';
|
|
38
44
|
export const CODEX_APP_IMAGE_GENERATION_DOC_URL = 'https://developers.openai.com/codex/app/features#image-generation';
|
|
39
45
|
export const OPENAI_IMAGE_GENERATION_DOC_URL = 'https://developers.openai.com/api/docs/guides/image-generation';
|
|
40
46
|
export const OPENAI_CHATGPT_IMAGES_2_DOC_URL = 'https://openai.com/index/introducing-chatgpt-images-2-0/';
|
|
41
47
|
export const OPENAI_GPT_IMAGE_2_MODEL_DOC_URL = 'https://developers.openai.com/api/docs/models/gpt-image-2';
|
|
42
|
-
export const
|
|
43
|
-
|
|
48
|
+
export const QA_INTERACTION_SURFACES = Object.freeze([
|
|
49
|
+
'codex_in_app_browser',
|
|
50
|
+
'codex_chrome_extension',
|
|
51
|
+
'codex_computer_use',
|
|
52
|
+
'codex_app_plugin',
|
|
53
|
+
'structured_mcp',
|
|
54
|
+
'shell_or_api_diagnostic'
|
|
55
|
+
]);
|
|
56
|
+
export const CODEX_QA_SURFACE_ROUTING_POLICY = `Codex QA surface routing follows the official Codex App split: use @Browser / in-app Browser (${CODEX_IN_APP_BROWSER_DOC_URL}) first for localhost, local development servers, file-backed previews, and public pages that do not require sign-in; use @Chrome / Codex Chrome Extension (${CODEX_CHROME_EXTENSION_DOC_URL}) for signed-in websites, cookies, browser profiles, extensions, existing tabs, or internal tools; use @Computer or @AppName (${CODEX_COMPUTER_USE_DOC_URL}) for native macOS/Windows apps, OS settings, cross-app workflows, and GUI-only bugs. Prefer structured Plugins/MCPs for repeatable data operations, then verify rendered user-visible results with Browser, Chrome, or Computer Use. Playwright, Selenium, Puppeteer, Chrome MCP, static screenshots, plugin cache, and final-agent prose are not Codex App live action proof. App Server evidence (${CODEX_APP_SERVER_DOC_URL}) must correlate thread, turn, item/tool events, approvals, diffs, actions, observations, findings, fixes, and same-flow replay before a real QA pass is claimed.`;
|
|
57
|
+
export const CODEX_WEB_VERIFICATION_POLICY = CODEX_QA_SURFACE_ROUTING_POLICY;
|
|
58
|
+
export const CODEX_COMPUTER_USE_ONLY_POLICY = `Codex Computer Use is a live GUI surface for supported macOS and Windows environments, invoked with @Computer or @AppName for native apps, OS settings, browser contexts that truly require GUI-level operation, and cross-app workflows. Do not replace @Browser localhost/public-page checks or @Chrome signed-in checks with Computer Use unless the surface router records a specific GUI-only/cross-app reason. If live Computer Use tools, permissions, or app access are unavailable, mark the affected native/GUI evidence blocked or unverified instead of fabricating screenshots or actions. Codex App readiness/config checks are capability evidence only, not target interaction proof.`;
|
|
44
59
|
export const IMAGEGEN_SOCIAL_SOURCE_POLICY = 'Use public X/social/community reports only as prompt-quality and workflow-sentiment hints after official OpenAI/Codex docs. Social posts are not capability specs, evidence of tool availability, or proof that a generated asset was created.';
|
|
45
60
|
export const CODEX_IMAGEGEN_REQUIRED_POLICY = 'Pipeline image generation, raster asset creation/editing, and generated image-review evidence must use real Codex App imagegen/$imagegen with gpt-image-2 when that evidence is required for full verification. For newest-model image requests, prompt explicitly for "ChatGPT Images 2.0 / GPT Image 2.0 with gpt-image-2" instead of relying on generic image-generation wording. Do not substitute placeholder SVG/HTML/CSS, prose-only critique, stock-like stand-ins, manually fabricated files, or missing-output ledgers for requested/generated raster assets or required generated review images. If imagegen/gpt-image-2 is unavailable or generated annotated images cannot be created/linked, record the blocker and cap any closeout at verified_partial/reference-only instead of claiming generated-image evidence or full route verification; that partial closeout requires source screenshots plus hashes, docs evidence, source Image Voxel anchors, and Honest Mode evidence. In Codex App prompts, invoke $imagegen when live image generation is needed; SKS hooks and skills can require the policy but cannot attach missing host image-generation tools to an already-started turn. Official OpenAI/Codex docs are authoritative for capabilities, surfaces, limits, and evidence rules; X/social/community reports may inform prompt style only.';
|
|
46
61
|
export const DEFAULT_CODEX_APP_PLUGINS = Object.freeze([
|
|
@@ -56,7 +71,7 @@ export const RESERVED_CODEX_PLUGIN_SKILL_NAMES = Object.freeze([
|
|
|
56
71
|
'browser-use',
|
|
57
72
|
...DEFAULT_CODEX_APP_PLUGINS.map(([name]) => name)
|
|
58
73
|
].sort());
|
|
59
|
-
export const FORBIDDEN_BROWSER_AUTOMATION_RE = /\b(playwright|chrome\s+mcp|
|
|
74
|
+
export const FORBIDDEN_BROWSER_AUTOMATION_RE = /\b(playwright|chrome\s+mcp|selenium|puppeteer)\b/i;
|
|
60
75
|
export function evidenceMentionsForbiddenBrowserAutomation(value, seen = new Set()) {
|
|
61
76
|
if (value == null)
|
|
62
77
|
return false;
|
|
@@ -231,7 +246,7 @@ export function stackCurrentDocsPolicy(commandPrefix = 'sks') {
|
|
|
231
246
|
validate_command: `${prefix} wiki validate .sneakoscope/wiki/context-pack.json`,
|
|
232
247
|
priority: 'must_precede_coding_style_defaults',
|
|
233
248
|
examples: [
|
|
234
|
-
'Supabase hosted projects should prefer sb_publishable_ and sb_secret_ keys over legacy anon
|
|
249
|
+
'Supabase hosted projects should prefer sb_publishable_ and sb_secret_ keys over legacy anon and service role keys when current docs apply.',
|
|
235
250
|
'Next.js 16 deprecates the middleware file convention in favor of proxy.ts/proxy.js.',
|
|
236
251
|
'Vercel Function duration limits, including the 300s default with Fluid Compute, are deployment constraints that must shape long-running server work.'
|
|
237
252
|
]
|
|
@@ -239,7 +254,7 @@ export function stackCurrentDocsPolicy(commandPrefix = 'sks') {
|
|
|
239
254
|
}
|
|
240
255
|
export function stackCurrentDocsPolicyText(commandPrefix = 'sks') {
|
|
241
256
|
const policy = stackCurrentDocsPolicy(commandPrefix);
|
|
242
|
-
return `Stack current-docs policy: whenever project tech stack is added or a framework/package/runtime/platform version changes, fetch current docs with Context7 (resolve-library-id then query-docs) or official vendor web docs before coding, record the syntax/limits/security guidance as high-priority TriWiki claims in ${policy.memory_path}, run "${policy.refresh_command}", then "${policy.validate_command}". Treat these claims as higher priority than model-memory defaults. Examples include Supabase publishable/secret keys replacing legacy anon
|
|
257
|
+
return `Stack current-docs policy: whenever project tech stack is added or a framework/package/runtime/platform version changes, fetch current docs with Context7 (resolve-library-id then query-docs) or official vendor web docs before coding, record the syntax/limits/security guidance as high-priority TriWiki claims in ${policy.memory_path}, run "${policy.refresh_command}", then "${policy.validate_command}". Treat these claims as higher priority than model-memory defaults. Examples include Supabase publishable/secret keys replacing legacy anon and service role guidance for hosted projects, Next.js 16 proxy.ts/proxy.js replacing the deprecated middleware file convention, avoiding stale webpack defaults when newer framework guidance says otherwise, and Vercel Function duration limits such as the 300s default under Fluid Compute.`;
|
|
243
258
|
}
|
|
244
259
|
export function triwikiContextTrackingText(commandPrefix = 'sks') {
|
|
245
260
|
const ctx = triwikiContextTracking(commandPrefix);
|
|
@@ -549,21 +564,34 @@ export const ROUTES = [
|
|
|
549
564
|
cliEntrypoint: 'sks db scan',
|
|
550
565
|
examples: ['$DB check this migration safely']
|
|
551
566
|
},
|
|
567
|
+
{
|
|
568
|
+
id: 'MadDB',
|
|
569
|
+
command: '$MAD-DB',
|
|
570
|
+
mode: 'MADDB',
|
|
571
|
+
route: 'first-class MadDB SQL-plane execution',
|
|
572
|
+
description: 'Explicit one-cycle MadDB route. When invoked by $MAD-DB or sks mad-db run|exec|apply-migration, SQL-plane mutations such as CREATE, ALTER, table/schema DROP, column add/drop/rename, INSERT, UPDATE, DELETE including all-row mutations, TRUNCATE, execute_sql, and apply_migration are authorized for the bound Supabase project and must be executed with tool-result plus read-back proof. Supabase project/account/billing/credential control-plane actions remain denied.',
|
|
573
|
+
requiredSkills: ['mad-db', 'db-safety-guard', 'pipeline-runner', 'context7-docs', REFLECTION_SKILL_NAME, 'honest-mode'],
|
|
574
|
+
appSkillAliases: ['mad-db'],
|
|
575
|
+
lifecycle: ['explicit_invocation', 'single_mission_capability_v2', 'ephemeral_write_profile', 'tool_inventory', 'execute_sql_or_apply_migration', 'read_back_verification', 'close_and_read_only_restore', 'post_route_reflection', 'honest_mode'],
|
|
576
|
+
context7Policy: 'required',
|
|
577
|
+
reasoningPolicy: 'xhigh',
|
|
578
|
+
stopGate: 'mad-db-gate.json',
|
|
579
|
+
cliEntrypoint: 'sks mad-db run|exec|apply-migration|status|close|revoke|doctor',
|
|
580
|
+
examples: ['$MAD-DB public.users legacy_code 컬럼 삭제', '$MAD-DB truncate public.staging_events']
|
|
581
|
+
},
|
|
552
582
|
{
|
|
553
583
|
id: 'MadSKS',
|
|
554
584
|
command: '$MAD-SKS',
|
|
555
585
|
mode: 'MADSKS',
|
|
556
586
|
route: 'explicit scoped permission-widening modifier',
|
|
557
|
-
description: 'Explicit high-risk authorization modifier that can be combined with other $ commands to temporarily open approved target-project scopes such as files, shell, package installs, services, network, Computer Use/browser workflows, generated assets, file permissions, migrations, Supabase MCP DB writes, direct execute SQL, schema cleanup, and normal targeted DB writes for the active invocation, while preserving catastrophic wipe/all-row/project-management, credential-exfiltration, persistent security-weakening, and unrequested fallback safeguards.',
|
|
587
|
+
description: 'Explicit high-risk authorization modifier that can be combined with other $ commands to temporarily open approved target-project scopes such as files, shell, package installs, services, network, Computer Use/browser workflows, generated assets, file permissions, migrations, Supabase MCP DB writes, direct execute SQL, schema cleanup, and normal targeted DB writes for the active invocation, while preserving catastrophic wipe/all-row/project-management, credential-exfiltration, persistent security-weakening, and unrequested fallback safeguards. It is not the first-class MadDB destructive SQL-plane route.',
|
|
558
588
|
requiredSkills: ['mad-sks', 'db-safety-guard', 'pipeline-runner', 'context7-docs', REFLECTION_SKILL_NAME, 'honest-mode'],
|
|
559
|
-
dollarAliases: ['$MAD-DB'],
|
|
560
|
-
appSkillAliases: ['mad-db'],
|
|
561
589
|
lifecycle: ['explicit_invocation', 'auto_sealed_permission_scope', 'scoped_permission_override', 'catastrophic_guard', 'permission_deactivation', 'post_route_reflection', 'honest_mode'],
|
|
562
590
|
context7Policy: 'required',
|
|
563
591
|
reasoningPolicy: 'xhigh',
|
|
564
592
|
stopGate: 'mad-sks-gate.json',
|
|
565
593
|
cliEntrypoint: 'Codex App prompt route only: $MAD-SKS <task>',
|
|
566
|
-
examples: ['$MAD-SKS $Team target project maintenance with package/service/file and DB scopes', '$DB Supabase 점검 $MAD-SKS'
|
|
594
|
+
examples: ['$MAD-SKS $Team target project maintenance with package/service/file and DB scopes', '$DB Supabase 점검 $MAD-SKS']
|
|
567
595
|
},
|
|
568
596
|
{
|
|
569
597
|
id: 'GX',
|
|
@@ -964,6 +992,8 @@ export function routeRequiresSubagents(route, prompt = '') {
|
|
|
964
992
|
return false;
|
|
965
993
|
if (route.id === 'ImageUXReview')
|
|
966
994
|
return false;
|
|
995
|
+
if (route.id === 'MadDB')
|
|
996
|
+
return false;
|
|
967
997
|
if (route.id === 'Research' || route.id === 'AutoResearch')
|
|
968
998
|
return true;
|
|
969
999
|
if (route.id === 'Goal')
|
|
@@ -996,7 +1026,7 @@ export function simpleGitOnlyRouteId(prompt = '') {
|
|
|
996
1026
|
}
|
|
997
1027
|
export function reflectionRequiredForRoute(route) {
|
|
998
1028
|
const id = String(route?.id || route?.mode || route?.route || route || '').replace(/^\$/, '');
|
|
999
|
-
return /^(team|naruto|shadowclone|shadow-clone|kagebunshin|kage-bunshin|qaloop|qa-loop|ppt|imageuxreview|image-ux-review|research|autoresearch|db|database|madsks|mad-sks|gx)$/i.test(id);
|
|
1029
|
+
return /^(team|naruto|shadowclone|shadow-clone|kagebunshin|kage-bunshin|qaloop|qa-loop|ppt|imageuxreview|image-ux-review|research|autoresearch|db|database|madsks|mad-sks|maddb|mad-db|gx)$/i.test(id);
|
|
1000
1030
|
}
|
|
1001
1031
|
export function looksLikeCodeChangingWork(prompt = '') {
|
|
1002
1032
|
const text = String(prompt || '');
|
|
@@ -1038,7 +1068,7 @@ export function routeReasoning(route, prompt = '') {
|
|
|
1038
1068
|
const base = ALLOWED_REASONING_EFFORTS.has(route?.reasoningPolicy) ? route.reasoningPolicy : 'medium';
|
|
1039
1069
|
if (hasFromChatImgSignal(text))
|
|
1040
1070
|
return reasoning('xhigh', 'from_chat_img_image_work_order_analysis');
|
|
1041
|
-
if (/(?:^|\s)sks\s+--mad\b|(?:^|\s)--mad\b|\$MAD-SKS\b|\bmad-sks\b|\bmadsks\b/i.test(text))
|
|
1071
|
+
if (/(?:^|\s)sks\s+--mad\b|(?:^|\s)--mad\b|\$MAD-SKS\b|\$MAD-DB\b|\bmad-sks\b|\bmadsks\b|\bmad-db\b|\bmaddb\b/i.test(text))
|
|
1042
1072
|
return reasoning('xhigh', 'mad_sks_or_mad_launch_default');
|
|
1043
1073
|
if (route?.id === 'Team' || route?.id === 'Naruto')
|
|
1044
1074
|
return teamRouteReasoning(text);
|
|
@@ -35,7 +35,8 @@ export function computeTriWikiAffectedGraph(input) {
|
|
|
35
35
|
const gatePacks = new Set();
|
|
36
36
|
for (const impact of selected)
|
|
37
37
|
gatePacks.add(impact.gate_pack);
|
|
38
|
-
const
|
|
38
|
+
const includeProofLookup = input.includeProofLookup !== false;
|
|
39
|
+
const proofLookup = includeProofLookup ? selected.map((impact) => {
|
|
39
40
|
const cacheKey = computeTriWikiCacheKey({
|
|
40
41
|
root: input.root,
|
|
41
42
|
id: impact.gate_id,
|
|
@@ -46,7 +47,7 @@ export function computeTriWikiAffectedGraph(input) {
|
|
|
46
47
|
});
|
|
47
48
|
const hit = readReusableTriWikiProofCard({ root: input.root, subjectId: impact.gate_id, cacheKey: cacheKey.key });
|
|
48
49
|
return { impact, hit };
|
|
49
|
-
});
|
|
50
|
+
}) : [];
|
|
50
51
|
const reusedProofs = proofLookup
|
|
51
52
|
.filter((row) => row.hit.hit && row.hit.card && row.hit.path)
|
|
52
53
|
.map((row) => ({ gate_id: row.impact.gate_id, proof_id: row.hit.card.proof_id, path: row.hit.path }))
|
package/dist/core/version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const PACKAGE_VERSION = '4.
|
|
1
|
+
export const PACKAGE_VERSION = '4.2.1';
|
|
2
2
|
//# sourceMappingURL=version.js.map
|
|
@@ -115,7 +115,11 @@ function renderTelemetrySlotRows(snapshot) {
|
|
|
115
115
|
});
|
|
116
116
|
}
|
|
117
117
|
function isMadDbActive(capability) {
|
|
118
|
-
if (!capability
|
|
118
|
+
if (!capability)
|
|
119
|
+
return false;
|
|
120
|
+
if (capability.schema === 'sks.mad-db-capability.v2' && !['transport_ready', 'active'].includes(String(capability.status || '')))
|
|
121
|
+
return false;
|
|
122
|
+
if (capability.schema !== 'sks.mad-db-capability.v2' && (capability.enabled !== true || capability.consumed === true))
|
|
119
123
|
return false;
|
|
120
124
|
const expires = Date.parse(capability.expires_at || '');
|
|
121
125
|
return Number.isFinite(expires) && expires > Date.now();
|
|
@@ -8,6 +8,7 @@ import { sourceSnapshot } from './lib/ensure-dist-fresh.js';
|
|
|
8
8
|
const root = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
|
|
9
9
|
const distRoot = path.join(root, 'dist');
|
|
10
10
|
const issues = [];
|
|
11
|
+
const contractOnlyMarker = 'contract' + '_only';
|
|
11
12
|
if (!fs.existsSync(distRoot))
|
|
12
13
|
issues.push('dist_missing');
|
|
13
14
|
requiredFile('dist/bin/sks.js');
|
|
@@ -63,8 +64,8 @@ if (fs.existsSync(distRoot)) {
|
|
|
63
64
|
if (!rel.endsWith('.js'))
|
|
64
65
|
continue;
|
|
65
66
|
const text = fs.readFileSync(file, 'utf8');
|
|
66
|
-
if (text.includes(
|
|
67
|
-
issues.push(
|
|
67
|
+
if (text.includes(contractOnlyMarker))
|
|
68
|
+
issues.push(`${contractOnlyMarker}:${rel}`);
|
|
68
69
|
if (/from\s+['"][^'"]+\.mjs['"]|import\(\s*['"][^'"]+\.mjs['"]\s*\)/.test(text)) {
|
|
69
70
|
issues.push(`imports_mjs:${rel}`);
|
|
70
71
|
}
|
|
@@ -8,11 +8,12 @@ const manifest = parity.manifest;
|
|
|
8
8
|
const dep = pkg.dependencies?.['@openai/codex-sdk'];
|
|
9
9
|
const lockSdk = lock.packages?.['node_modules/@openai/codex-sdk']?.version;
|
|
10
10
|
const lockCli = lock.packages?.['node_modules/@openai/codex']?.version;
|
|
11
|
+
const lockRootVersion = lock.packages?.['']?.version || lock.version;
|
|
11
12
|
assertGate(parity.ok, 'Codex release manifest TS/JSON parity must hold', parity);
|
|
12
13
|
assertGate(dep === manifest.sdkVersion, 'package.json must pin @openai/codex-sdk exactly to manifest sdkVersion', { dep, sdkVersion: manifest.sdkVersion });
|
|
13
14
|
assertGate(lockSdk === manifest.sdkVersion, 'package-lock must resolve @openai/codex-sdk to manifest sdkVersion', { lockSdk, sdkVersion: manifest.sdkVersion });
|
|
14
15
|
assertGate(lockCli === manifest.requiredCliVersion, 'package-lock must resolve @openai/codex to manifest requiredCliVersion', { lockCli, requiredCliVersion: manifest.requiredCliVersion });
|
|
15
|
-
assertGate(pkg.version ===
|
|
16
|
+
assertGate(pkg.version === lockRootVersion, 'package version must match package-lock root version', { version: pkg.version, lockRootVersion });
|
|
16
17
|
emitGate('codex:0142:manifest', {
|
|
17
18
|
manifest_sha256: parity.manifest_sha256,
|
|
18
19
|
target_tag: manifest.targetTag,
|
|
@@ -10,6 +10,7 @@ const required = [
|
|
|
10
10
|
'codex-control:thread-registry',
|
|
11
11
|
'codex-control:side-effect-scope',
|
|
12
12
|
'codex-control:empty-result-retry',
|
|
13
|
+
'codex-control:model-capacity-fallback',
|
|
13
14
|
'codex-control:stream-idle-watchdog',
|
|
14
15
|
'codex-control:tool-call-sequence-repair',
|
|
15
16
|
'codex-control:keepalive-no-cot-leak'
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// @ts-nocheck
|
|
3
|
+
import { assertGate, emitGate, importDist, readText } from './sks-1-18-gate-lib.js';
|
|
4
|
+
const mod = await importDist('core/codex-control/codex-reliability-shield.js');
|
|
5
|
+
let attempts = 0;
|
|
6
|
+
const seenAttempts = [];
|
|
7
|
+
const result = await mod.runWithCodexReliabilityShield(baseTask(), async (attempt) => {
|
|
8
|
+
attempts += 1;
|
|
9
|
+
seenAttempts.push(attempt);
|
|
10
|
+
return {
|
|
11
|
+
ok: false,
|
|
12
|
+
sdkThreadId: '',
|
|
13
|
+
sdkRunId: null,
|
|
14
|
+
events: [{ type: 'turn.failed', message: 'Selected model is at capacity. Please try a different model.' }],
|
|
15
|
+
finalResponse: '',
|
|
16
|
+
structuredOutput: null,
|
|
17
|
+
blockers: ['Selected model is at capacity. Please try a different model.']
|
|
18
|
+
};
|
|
19
|
+
});
|
|
20
|
+
assertGate(attempts === 1, 'model capacity must not retry with downgraded pressure', { attempts, seenAttempts, result });
|
|
21
|
+
assertGate(result.reliabilityShield.ok === false, 'terminal model capacity must remain visible as a blocker', result.reliabilityShield);
|
|
22
|
+
assertGate(result.reliabilityShield.retry_count === 0, 'capacity retry count must stay zero', result.reliabilityShield);
|
|
23
|
+
assertGate(result.reliabilityShield.model_capacity_retry_count === 0, 'model capacity retry must not be counted', result.reliabilityShield);
|
|
24
|
+
assertGate(result.reliabilityShield.selected_model_capacity_fallback === false, 'capacity fallback flag must not be selected', result.reliabilityShield);
|
|
25
|
+
assertGate(result.reliabilityShield.attempts[0].retryable === false, 'capacity attempt must be terminal', result.reliabilityShield.attempts[0]);
|
|
26
|
+
assertGate(result.reliabilityShield.attempts[0].retry_reason === null, 'capacity retry reason must stay null', result.reliabilityShield.attempts[0]);
|
|
27
|
+
assertGate(result.reliabilityShield.attempts[0].blockers.includes('codex_model_capacity_unavailable'), 'capacity blocker must be explicit', result.reliabilityShield.attempts[0]);
|
|
28
|
+
assertGate(mod.isCodexModelCapacityError({ blockers: ['Selected model is at capacity. Please try a different model.'] }, []) === true, 'capacity classifier must recognize common Codex error text');
|
|
29
|
+
const runnerSource = readText('src/core/codex-control/codex-task-runner.ts');
|
|
30
|
+
assertGate(!runnerSource.includes("capacity_fallback_service_tier: 'standard'"), 'capacity fallback must not force standard service tier');
|
|
31
|
+
assertGate(!runnerSource.includes("capacity_fallback_reasoning_effort: 'low'"), 'capacity fallback must not force low reasoning');
|
|
32
|
+
assertGate(!runnerSource.includes('SKS_CODEX_CAPACITY_FALLBACK_MODEL'), 'capacity fallback model override must be removed');
|
|
33
|
+
emitGate('codex-control:model-capacity-fallback', {
|
|
34
|
+
attempts,
|
|
35
|
+
retry_count: result.reliabilityShield.retry_count,
|
|
36
|
+
model_capacity_retry_count: result.reliabilityShield.model_capacity_retry_count
|
|
37
|
+
});
|
|
38
|
+
function baseTask() {
|
|
39
|
+
return {
|
|
40
|
+
route: '$Agent',
|
|
41
|
+
tier: 'worker',
|
|
42
|
+
missionId: 'M-model-capacity-fallback',
|
|
43
|
+
cwd: process.cwd(),
|
|
44
|
+
prompt: 'model capacity fallback fixture',
|
|
45
|
+
outputSchemaId: 'sks.agent-worker-result.v1',
|
|
46
|
+
outputSchema: {},
|
|
47
|
+
sandboxPolicy: 'read-only',
|
|
48
|
+
requestedScopeContract: { read_only: true },
|
|
49
|
+
reliabilityPolicy: { maxEmptyResultRetries: 1, idleTimeoutMs: 5000, timeoutClass: 'short' },
|
|
50
|
+
mutationLedgerRoot: process.cwd()
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=codex-control-model-capacity-fallback-check.js.map
|
|
@@ -114,11 +114,17 @@ const ALLOWLIST = [
|
|
|
114
114
|
reason: 'migration journal writes hashes and rollback metadata, not raw secret config values',
|
|
115
115
|
expires: '3.2.0'
|
|
116
116
|
},
|
|
117
|
+
{
|
|
118
|
+
file: 'src/core/mad-db/mad-db-runtime-profile.ts',
|
|
119
|
+
pattern: /codex-mad-db\.config\.toml|writeTextAtomic/,
|
|
120
|
+
reason: 'MAD-DB runtime profile writes only a mission-local temporary Codex profile and verifies read-only restoration on close',
|
|
121
|
+
expires: '4.3.0'
|
|
122
|
+
},
|
|
117
123
|
{
|
|
118
124
|
file: 'src/core/providers/glm/naruto/glm-naruto-trace.ts',
|
|
119
125
|
pattern: /mission-result\.json|sanitizeArtifact/,
|
|
120
126
|
reason: 'GLM Naruto trace writer persists sanitized mission-result proof artifacts, not raw env secret files',
|
|
121
|
-
expires: '4.
|
|
127
|
+
expires: '4.3.0'
|
|
122
128
|
}
|
|
123
129
|
];
|
|
124
130
|
const sources = listSourceFiles().map((file) => ({
|