sneakoscope 4.2.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -8
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/bin/sks.js +1 -1
- package/dist/cli/command-registry.js +2 -1
- package/dist/cli/ultra-search-command.js +163 -0
- package/dist/cli/xai-command.js +28 -168
- package/dist/core/agents/agent-codex-cockpit.js +3 -3
- package/dist/core/agents/agent-wrongness.js +1 -1
- package/dist/core/codex-control/codex-app-server-v2-client.js +86 -2
- package/dist/core/codex-control/codex-reliability-shield.js +26 -5
- package/dist/core/codex-control/codex-task-runner.js +7 -1
- package/dist/core/codex-control/model-call-concurrency.js +1 -1
- package/dist/core/commands/qa-loop-command.js +23 -7
- package/dist/core/fsx.js +1 -1
- package/dist/core/hooks-runtime.js +1 -1
- package/dist/core/qa-loop/qa-app-server-driver.js +134 -0
- package/dist/core/qa-loop/qa-contract-v2.js +231 -0
- package/dist/core/qa-loop/qa-gate-v2.js +132 -0
- package/dist/core/qa-loop/qa-runtime-artifacts.js +53 -0
- package/dist/core/qa-loop/qa-surface-router.js +114 -0
- package/dist/core/qa-loop/qa-types.js +18 -0
- package/dist/core/qa-loop.js +83 -26
- package/dist/core/release/gate-manifest.js +1 -0
- package/dist/core/release/sla-scheduler.js +1 -1
- package/dist/core/release-parallel-full-coverage.js +1 -1
- package/dist/core/routes.js +22 -6
- package/dist/core/source-intelligence/source-intelligence-policy.js +45 -26
- package/dist/core/source-intelligence/source-intelligence-proof.js +10 -16
- package/dist/core/source-intelligence/source-intelligence-runner.js +56 -42
- package/dist/core/triwiki/triwiki-affected-graph.js +3 -2
- package/dist/core/trust-kernel/trust-report.js +3 -5
- package/dist/core/ultra-search/index.js +3 -0
- package/dist/core/ultra-search/runtime.js +502 -0
- package/dist/core/ultra-search/types.js +3 -0
- package/dist/core/version.js +1 -1
- package/dist/scripts/agent-visual-consistency-check.js +1 -1
- package/dist/scripts/codex-control-all-pipelines-check.js +1 -0
- package/dist/scripts/codex-control-model-capacity-fallback-check.js +53 -0
- package/dist/scripts/config-managed-merge-callsite-coverage-check.js +7 -1
- package/dist/scripts/loop-directive-check-lib.js +78 -1
- package/dist/scripts/qa-loop-app-server-driver-check.js +74 -0
- package/dist/scripts/qa-loop-surface-router-check.js +49 -0
- package/dist/scripts/release-check-dynamic-execute.js +1 -1
- package/dist/scripts/release-metadata-1-19-check.js +2 -2
- package/dist/scripts/release-parallel-check.js +2 -2
- package/dist/scripts/release-parallel-full-coverage-check.js +1 -1
- package/dist/scripts/release-readiness-report.js +6 -6
- package/dist/scripts/runtime-ts-rust-boundary-check.js +1 -1
- package/dist/scripts/sks-1-18-gate-lib.js +2 -2
- package/dist/scripts/source-intelligence-all-modes-check.js +9 -19
- package/dist/scripts/source-intelligence-policy-check.js +6 -6
- package/dist/scripts/triwiki-affected-graph-check.js +2 -2
- package/dist/scripts/ultra-search-provider-interface-check.js +27 -0
- package/package.json +7 -4
- package/dist/core/mcp/xai-mcp-detector.js +0 -157
- package/dist/core/mcp/xai-search-adapter.js +0 -100
- package/dist/scripts/xai-mcp-capability-check.js +0 -14
|
@@ -8,10 +8,12 @@ export class CodexAppServerV2Client {
|
|
|
8
8
|
cwd;
|
|
9
9
|
timeoutMs;
|
|
10
10
|
currentTimeProvider;
|
|
11
|
+
approvalPolicy;
|
|
11
12
|
child = null;
|
|
12
13
|
nextId = 1;
|
|
13
14
|
pending = new Map();
|
|
14
15
|
notifications = [];
|
|
16
|
+
listeners = new Set();
|
|
15
17
|
stdoutBuffer = '';
|
|
16
18
|
stderr = '';
|
|
17
19
|
constructor(options) {
|
|
@@ -21,6 +23,7 @@ export class CodexAppServerV2Client {
|
|
|
21
23
|
this.cwd = options.cwd || process.cwd();
|
|
22
24
|
this.timeoutMs = Number(options.timeoutMs || 20_000);
|
|
23
25
|
this.currentTimeProvider = options.currentTimeProvider || (() => new Date());
|
|
26
|
+
this.approvalPolicy = options.approvalPolicy || {};
|
|
24
27
|
}
|
|
25
28
|
async initialize() {
|
|
26
29
|
this.start();
|
|
@@ -36,18 +39,58 @@ export class CodexAppServerV2Client {
|
|
|
36
39
|
optOutNotificationMethods: []
|
|
37
40
|
}
|
|
38
41
|
});
|
|
39
|
-
this.notify('
|
|
42
|
+
this.notify('initialized', {});
|
|
40
43
|
return result;
|
|
41
44
|
}
|
|
42
45
|
async listThreads(params = {}) {
|
|
43
46
|
return await this.request('thread/list', normalizeThreadListParams(params));
|
|
44
47
|
}
|
|
48
|
+
async startThread(params = {}) {
|
|
49
|
+
return await this.request('thread/start', params);
|
|
50
|
+
}
|
|
51
|
+
async resumeThread(params = {}) {
|
|
52
|
+
return await this.request('thread/resume', params);
|
|
53
|
+
}
|
|
45
54
|
async searchThreads(searchTerm, params = {}) {
|
|
46
55
|
return await this.listThreads({ ...params, searchTerm });
|
|
47
56
|
}
|
|
48
57
|
async readThread(threadId, includeTurns = false) {
|
|
49
58
|
return await this.request('thread/read', { threadId, includeTurns });
|
|
50
59
|
}
|
|
60
|
+
async startTurn(params = {}) {
|
|
61
|
+
return await this.request('turn/start', params);
|
|
62
|
+
}
|
|
63
|
+
async steerTurn(params = {}) {
|
|
64
|
+
return await this.request('turn/steer', params);
|
|
65
|
+
}
|
|
66
|
+
async interruptTurn(params = {}) {
|
|
67
|
+
return await this.request('turn/interrupt', params);
|
|
68
|
+
}
|
|
69
|
+
onEvent(listener) {
|
|
70
|
+
this.listeners.add(listener);
|
|
71
|
+
return () => this.listeners.delete(listener);
|
|
72
|
+
}
|
|
73
|
+
waitForNotification(methods, timeoutMs = this.timeoutMs) {
|
|
74
|
+
const expected = new Set(Array.isArray(methods) ? methods.map(String) : [String(methods)]);
|
|
75
|
+
return new Promise((resolve, reject) => {
|
|
76
|
+
const timer = setTimeout(() => {
|
|
77
|
+
dispose();
|
|
78
|
+
reject(new Error(`Timed out waiting for app-server notification: ${Array.from(expected).join(', ')}`));
|
|
79
|
+
}, timeoutMs);
|
|
80
|
+
timer.unref?.();
|
|
81
|
+
const dispose = this.onEvent((event) => {
|
|
82
|
+
if (event && expected.has(String(event.method || ''))) {
|
|
83
|
+
clearTimeout(timer);
|
|
84
|
+
dispose();
|
|
85
|
+
resolve(event);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
async waitForTurnCompletion(threadId, turnId, timeoutMs = this.timeoutMs) {
|
|
91
|
+
const expected = turnId ? ['turn/completed', 'thread/closed', 'thread/status/changed'] : ['turn/completed', 'thread/closed'];
|
|
92
|
+
return await this.waitForNotification(expected, timeoutMs);
|
|
93
|
+
}
|
|
51
94
|
start() {
|
|
52
95
|
if (this.child)
|
|
53
96
|
return;
|
|
@@ -107,7 +150,14 @@ export class CodexAppServerV2Client {
|
|
|
107
150
|
void this.respondToServerRequest(message);
|
|
108
151
|
}
|
|
109
152
|
else {
|
|
110
|
-
|
|
153
|
+
const event = { ...message, received_at: nowIso() };
|
|
154
|
+
this.notifications.push(event);
|
|
155
|
+
for (const listener of this.listeners) {
|
|
156
|
+
try {
|
|
157
|
+
listener(event);
|
|
158
|
+
}
|
|
159
|
+
catch { }
|
|
160
|
+
}
|
|
111
161
|
}
|
|
112
162
|
}
|
|
113
163
|
}
|
|
@@ -119,6 +169,38 @@ export class CodexAppServerV2Client {
|
|
|
119
169
|
this.write({ jsonrpc: '2.0', id, result: currentTimeResponse(this.currentTimeProvider()) });
|
|
120
170
|
return;
|
|
121
171
|
}
|
|
172
|
+
if (method === 'item/commandExecution/requestApproval' || method === 'commandExecution/requestApproval') {
|
|
173
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.commandExecution?.(message.params) || { decision: 'cancel' } });
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
if (method === 'item/fileChange/requestApproval' || method === 'fileChange/requestApproval') {
|
|
177
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.fileChange?.(message.params) || { decision: 'cancel' } });
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
if (method === 'item/permissions/requestApproval' || method === 'permissions/requestApproval') {
|
|
181
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.permissions?.(message.params) || { permissions: { network: { enabled: false }, fileSystem: { read: [], write: [], entries: [] } }, scope: 'turn', strictAutoReview: true } });
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
if (method === 'item/tool/requestUserInput') {
|
|
185
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.toolRequestUserInput?.(message.params) || { answers: {} } });
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
if (method === 'item/tool/call') {
|
|
189
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.dynamicToolCall?.(message.params) || { contentItems: [], success: false } });
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
if (method === 'mcpServer/elicitation/request') {
|
|
193
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.mcpElicitation?.(message.params) || { contentItems: [], success: false } });
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
if (method === 'attestation/generate') {
|
|
197
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.attestation?.(message.params) || { decision: 'cancel' } });
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
if (method === 'account/chatgptAuthTokens/refresh') {
|
|
201
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.chatgptAuthTokensRefresh?.(message.params) || { ok: false } });
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
122
204
|
this.write({
|
|
123
205
|
jsonrpc: '2.0',
|
|
124
206
|
id,
|
|
@@ -182,6 +264,8 @@ export async function createCodexAppServerV2Client(options = {}) {
|
|
|
182
264
|
clientOptions.timeoutMs = options.timeoutMs;
|
|
183
265
|
if (options.currentTimeProvider !== undefined)
|
|
184
266
|
clientOptions.currentTimeProvider = options.currentTimeProvider;
|
|
267
|
+
if (options.approvalPolicy !== undefined)
|
|
268
|
+
clientOptions.approvalPolicy = options.approvalPolicy;
|
|
185
269
|
return {
|
|
186
270
|
client: new CodexAppServerV2Client(clientOptions),
|
|
187
271
|
runtimeIdentity: runtime.identity
|
|
@@ -37,6 +37,7 @@ export async function runWithCodexReliabilityShield(input, runAttempt) {
|
|
|
37
37
|
break;
|
|
38
38
|
}
|
|
39
39
|
const blockers = attempts.flatMap((attempt) => attempt.blockers);
|
|
40
|
+
const modelCapacityRetryCount = attempts.filter((attempt) => attempt.model_capacity_error && attempt.retryable).length;
|
|
40
41
|
const report = {
|
|
41
42
|
schema: CODEX_RELIABILITY_SHIELD_SCHEMA,
|
|
42
43
|
generated_at: nowIso(),
|
|
@@ -52,6 +53,8 @@ export async function runWithCodexReliabilityShield(input, runAttempt) {
|
|
|
52
53
|
heartbeat_count: attempts.reduce((sum, attempt) => sum + attempt.heartbeat_count, 0),
|
|
53
54
|
repaired_tool_result_count: attempts.reduce((sum, attempt) => sum + attempt.repaired_tool_result_count, 0),
|
|
54
55
|
no_duplicate_streamed_output: attempts.slice(0, -1).every((attempt) => attempt.meaningful_event_count === 0),
|
|
56
|
+
model_capacity_retry_count: modelCapacityRetryCount,
|
|
57
|
+
selected_model_capacity_fallback: selectedAttempt > 1 && modelCapacityRetryCount > 0,
|
|
55
58
|
blockers
|
|
56
59
|
};
|
|
57
60
|
return {
|
|
@@ -61,24 +64,27 @@ export async function runWithCodexReliabilityShield(input, runAttempt) {
|
|
|
61
64
|
}
|
|
62
65
|
export function evaluateCodexReliabilityAttempt(result, events, policy, attempt) {
|
|
63
66
|
const meaningful = events.filter(isMeaningfulEvent);
|
|
64
|
-
const
|
|
67
|
+
const modelCapacity = isCodexModelCapacityError(result, events);
|
|
68
|
+
const fatal = !modelCapacity && hasFatalError(result, events);
|
|
65
69
|
const idle = hasIdleTimeout(events, policy.idleTimeoutMs);
|
|
66
70
|
const empty = events.length === 0 || (!String(result.finalResponse || '').trim() && meaningful.length === 0);
|
|
67
71
|
const partial = meaningful.length > 0 && !result.structuredOutput;
|
|
68
72
|
const blockers = [];
|
|
69
73
|
let retryable = false;
|
|
70
74
|
let retryReason = null;
|
|
71
|
-
if (
|
|
75
|
+
if (modelCapacity)
|
|
76
|
+
blockers.push('codex_model_capacity_unavailable');
|
|
77
|
+
if (!modelCapacity && idle && partial)
|
|
72
78
|
blockers.push('codex_reliability_idle_after_partial_output');
|
|
73
|
-
if (partial && !idle)
|
|
79
|
+
if (!modelCapacity && partial && !idle)
|
|
74
80
|
blockers.push('codex_reliability_partial_output_without_structured_result');
|
|
75
81
|
if (fatal)
|
|
76
82
|
blockers.push('codex_reliability_fatal_error_no_retry');
|
|
77
|
-
if (!fatal && idle && meaningful.length === 0) {
|
|
83
|
+
if (!modelCapacity && !fatal && idle && meaningful.length === 0) {
|
|
78
84
|
retryable = true;
|
|
79
85
|
retryReason = 'stream_idle_before_meaningful_event';
|
|
80
86
|
}
|
|
81
|
-
else if (!fatal && empty) {
|
|
87
|
+
else if (!modelCapacity && !fatal && empty) {
|
|
82
88
|
retryable = true;
|
|
83
89
|
retryReason = 'empty_sdk_result_before_meaningful_event';
|
|
84
90
|
}
|
|
@@ -92,11 +98,26 @@ export function evaluateCodexReliabilityAttempt(result, events, policy, attempt)
|
|
|
92
98
|
retry_reason: retryReason,
|
|
93
99
|
idle_timeout: idle,
|
|
94
100
|
fatal_error: fatal,
|
|
101
|
+
model_capacity_error: modelCapacity,
|
|
102
|
+
capacity_fallback_hint: null,
|
|
95
103
|
repaired_tool_result_count: 0,
|
|
96
104
|
heartbeat_count: 0,
|
|
97
105
|
blockers
|
|
98
106
|
};
|
|
99
107
|
}
|
|
108
|
+
export function isCodexModelCapacityError(result, events) {
|
|
109
|
+
const text = [
|
|
110
|
+
String(result.finalResponse || ''),
|
|
111
|
+
...(Array.isArray(result.blockers) ? result.blockers : []),
|
|
112
|
+
...events.map((event) => [
|
|
113
|
+
event?.error?.message,
|
|
114
|
+
event?.message,
|
|
115
|
+
event?.item?.text,
|
|
116
|
+
event?.raw?.failed_event?.error?.message
|
|
117
|
+
].filter(Boolean).join('\n'))
|
|
118
|
+
].join('\n');
|
|
119
|
+
return /selected model is at capacity|model(?:\s+[\w.-]+)?\s+is\s+at\s+capacity|try a different model|capacity(?:\s+is)?\s+exhausted|temporarily at capacity/i.test(text);
|
|
120
|
+
}
|
|
100
121
|
export function repairToolCallSequence(events) {
|
|
101
122
|
const repaired = [...events];
|
|
102
123
|
const openToolCalls = new Set();
|
|
@@ -118,6 +118,8 @@ export async function runCodexTask(input) {
|
|
|
118
118
|
patchEnvelopePath,
|
|
119
119
|
blockers: finalBlockers,
|
|
120
120
|
reliabilityShield: adapterResult?.reliabilityShield || null,
|
|
121
|
+
capacityFallback: adapterResult?.reliabilityShield?.selected_model_capacity_fallback === true,
|
|
122
|
+
modelCapacityRetryCount: Number(adapterResult?.reliabilityShield?.model_capacity_retry_count || 0),
|
|
121
123
|
ultraRouterDecision: routerDecision,
|
|
122
124
|
outputSchemaId: task.outputSchemaId,
|
|
123
125
|
finalResponse: adapterResult?.finalResponse || '',
|
|
@@ -146,7 +148,11 @@ export async function runCodexTask(input) {
|
|
|
146
148
|
result,
|
|
147
149
|
capability: capability,
|
|
148
150
|
sandbox,
|
|
149
|
-
envProof:
|
|
151
|
+
envProof: {
|
|
152
|
+
...runtime.env.proof,
|
|
153
|
+
capacity_fallback_selected: result.capacityFallback === true,
|
|
154
|
+
model_capacity_retry_count: result.modelCapacityRetryCount
|
|
155
|
+
},
|
|
150
156
|
config: runtime.config,
|
|
151
157
|
reliabilityShield: adapterResult?.reliabilityShield || null,
|
|
152
158
|
routerDecision: routerDecision,
|
|
@@ -56,7 +56,7 @@ export function defaultModelCallBudget(provider) {
|
|
|
56
56
|
const text = String(provider || '');
|
|
57
57
|
if (text === 'local-llm' || text === 'ollama')
|
|
58
58
|
return envInt('SKS_LOCAL_LLM_MAX_PARALLEL_REQUESTS', 4);
|
|
59
|
-
return envInt('SKS_REMOTE_API_PARALLEL_BUDGET',
|
|
59
|
+
return envInt('SKS_REMOTE_API_PARALLEL_BUDGET', 3);
|
|
60
60
|
}
|
|
61
61
|
class ModelCallSemaphoreImpl {
|
|
62
62
|
provider;
|
|
@@ -17,6 +17,8 @@ import { runCodexAppHandoff, qaLoopShouldRequestAppHandoff } from '../codex-app/
|
|
|
17
17
|
import { writeCodex0138CapabilityArtifacts } from '../codex-control/codex-0138-capability.js';
|
|
18
18
|
import { writeCodexAccountUsageArtifacts } from '../usage/codex-account-usage.js';
|
|
19
19
|
import { buildQaLoopBudgetPolicy, selectQaLoopEscalatedEffort } from '../qa-loop/qa-loop-budget-policy.js';
|
|
20
|
+
import { initializeQaRuntimeArtifacts } from '../qa-loop/qa-runtime-artifacts.js';
|
|
21
|
+
import { DEFAULT_QA_MAX_CYCLES, QA_SURFACE_SELECTION_ARTIFACT } from '../qa-loop/qa-types.js';
|
|
20
22
|
import { writeCodexModelEffortCapabilityArtifact } from '../codex-control/codex-model-capabilities.js';
|
|
21
23
|
import { discoverImageArtifactsInDir, writeImageArtifactPathContract } from '../image/image-artifact-path-contract.js';
|
|
22
24
|
import { pluginAppTemplatePolicy } from '../codex-plugins/codex-plugin-json.js';
|
|
@@ -41,7 +43,7 @@ export async function qaLoopCommand(sub, args = []) {
|
|
|
41
43
|
Usage:
|
|
42
44
|
sks qa-loop prepare "target"
|
|
43
45
|
sks qa-loop answer <mission-id|latest> <answers.json>
|
|
44
|
-
sks qa-loop run <mission-id|latest> [--mock] [--max-cycles N] [--app-handoff] [--app-handoff-required] [--app-handoff-launch] [--app-handoff-artifact-only]
|
|
46
|
+
sks qa-loop run <mission-id|latest> [--mock] [--max-cycles N] [--surface auto|codex_in_app_browser|codex_chrome_extension|codex_computer_use] [--report-only] [--app-handoff] [--app-handoff-required] [--app-handoff-launch] [--app-handoff-artifact-only]
|
|
45
47
|
sks qa-loop app-confirm <mission-id|latest> --verdict pass|fail --notes "..."
|
|
46
48
|
sks qa-loop status <mission-id|latest> [--desktop]
|
|
47
49
|
`);
|
|
@@ -126,6 +128,17 @@ async function qaLoopRun(args) {
|
|
|
126
128
|
await writeQaLoopArtifacts(dir, mission, contract);
|
|
127
129
|
else
|
|
128
130
|
await ensureQaLoopVisualEvidenceContract(dir, mission, contract);
|
|
131
|
+
const requestedSurface = readFlagValue(args, '--surface', 'auto');
|
|
132
|
+
const reportOnly = flag(args, '--report-only');
|
|
133
|
+
await initializeQaRuntimeArtifacts(dir, {
|
|
134
|
+
...contract,
|
|
135
|
+
prompt: mission.prompt || contract.prompt,
|
|
136
|
+
mission_id: id
|
|
137
|
+
}, {
|
|
138
|
+
missionId: id,
|
|
139
|
+
requestedSurface,
|
|
140
|
+
reportOnly
|
|
141
|
+
}).catch(() => null);
|
|
129
142
|
const safetyScan = await scanDbSafety(root);
|
|
130
143
|
if (!safetyScan.ok) {
|
|
131
144
|
console.error('QA-LOOP cannot run: SKS safety scan found unsafe project data-tool configuration.');
|
|
@@ -133,7 +146,7 @@ async function qaLoopRun(args) {
|
|
|
133
146
|
process.exitCode = 2;
|
|
134
147
|
return;
|
|
135
148
|
}
|
|
136
|
-
const fallbackCycles = Number.parseInt(contract.answers?.MAX_QA_CYCLES, 10) ||
|
|
149
|
+
const fallbackCycles = Number.parseInt(contract.answers?.MAX_QA_CYCLES, 10) || DEFAULT_QA_MAX_CYCLES;
|
|
137
150
|
const maxCycles = readMaxCycles(args, fallbackCycles);
|
|
138
151
|
const requestedAgents = readBoundedIntegerFlag(args, '--agents', 3, 1, 20);
|
|
139
152
|
const targetActiveSlots = readBoundedIntegerFlag(args, '--target-active-slots', requestedAgents, 1, 20);
|
|
@@ -141,15 +154,18 @@ async function qaLoopRun(args) {
|
|
|
141
154
|
const minimumWorkItems = readBoundedIntegerFlag(args, '--minimum-work-items', targetActiveSlots, 1, 200);
|
|
142
155
|
const maxQueueExpansion = readBoundedIntegerFlag(args, '--max-queue-expansion', 10, 0, 200);
|
|
143
156
|
const profile = readFlagValue(args, '--profile', 'sks-logic-high') || 'sks-logic-high';
|
|
144
|
-
const
|
|
145
|
-
const
|
|
157
|
+
const mock = flag(args, '--mock');
|
|
158
|
+
const sourceFixesEnabled = !reportOnly;
|
|
159
|
+
const writeMode = readFlagValue(args, '--write-mode', sourceFixesEnabled && !mock ? (flag(args, '--parallel-write') ? 'parallel' : 'proof-safe') : 'off');
|
|
160
|
+
const applyPatches = sourceFixesEnabled && !mock && !flag(args, '--no-fix');
|
|
146
161
|
const dryRunPatches = flag(args, '--dry-run-patches') || flag(args, '--dryrun-patches');
|
|
147
162
|
const maxWriteAgents = readBoundedIntegerFlag(args, '--max-write-agents', Math.min(requestedAgents, 5), 1, 20);
|
|
148
|
-
const mock = flag(args, '--mock');
|
|
149
163
|
const qaGate = await readJson(path.join(dir, 'qa-gate.json'), {});
|
|
150
164
|
const reportFile = qaGate.qa_report_file;
|
|
151
165
|
const executionProfile = await readJson(path.join(dir, 'qa-loop', 'execution-profile.json'), null);
|
|
152
166
|
const uiRequired = qaUiRequired(contract.answers || {});
|
|
167
|
+
const surfaceSelection = await readJson(path.join(dir, QA_SURFACE_SELECTION_ARTIFACT), null);
|
|
168
|
+
const selectedSurface = surfaceSelection?.selected_surface || null;
|
|
153
169
|
const gptImage2ReviewRequired = qaGptImage2AnnotatedReviewRequired(contract, mission.prompt);
|
|
154
170
|
const capabilityArtifact = await writeCodex0138CapabilityArtifacts(root, { missionId: id }).catch((err) => ({ error: err?.message || String(err), report: null }));
|
|
155
171
|
const usageArtifact = await writeCodexAccountUsageArtifacts(root, { missionId: id }).catch((err) => ({ error: err?.message || String(err), snapshot: null }));
|
|
@@ -246,7 +262,7 @@ async function qaLoopRun(args) {
|
|
|
246
262
|
return;
|
|
247
263
|
}
|
|
248
264
|
}
|
|
249
|
-
if (uiRequired && !mock) {
|
|
265
|
+
if (uiRequired && !mock && selectedSurface === 'codex_chrome_extension') {
|
|
250
266
|
const chrome = await codexChromeExtensionStatus();
|
|
251
267
|
if (!chrome.ok) {
|
|
252
268
|
const blockedGate = {
|
|
@@ -277,7 +293,7 @@ async function qaLoopRun(args) {
|
|
|
277
293
|
await setCurrent(root, { mission_id: id, mode: 'QALOOP', phase: 'QALOOP_BLOCKED_CHROME_EXTENSION_SETUP_REQUIRED', questions_allowed: true });
|
|
278
294
|
if (flag(args, '--json'))
|
|
279
295
|
return console.log(JSON.stringify({ schema: 'sks.qa-loop-run.v1', ok: false, status: 'blocked', blocker: 'codex_chrome_extension_setup_required', mission_id: id, chrome_extension: chrome, gate: blockedGate }, null, 2));
|
|
280
|
-
console.error('QA-LOOP blocked:
|
|
296
|
+
console.error('QA-LOOP blocked: this journey was routed to @Chrome, but the Codex Chrome Extension is not connected. Install/enable it, then resume.');
|
|
281
297
|
console.error(chrome.docs_url);
|
|
282
298
|
process.exitCode = 2;
|
|
283
299
|
return;
|
package/dist/core/fsx.js
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
import { fileURLToPath } from 'node:url';
|
|
8
|
-
export const PACKAGE_VERSION = '4.
|
|
8
|
+
export const PACKAGE_VERSION = '4.4.0';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
export function nowIso() {
|
|
@@ -590,7 +590,7 @@ function clarificationAnswerToolAllowed(payload = {}) {
|
|
|
590
590
|
return true;
|
|
591
591
|
if (/\bpipeline\s+answer\b/i.test(command))
|
|
592
592
|
return true;
|
|
593
|
-
return !/\b(npm|git|selftest|packcheck|release:check|publish:dry|publish:npm|doctor|team|qa-loop|wiki|db|test)\b/i.test(command);
|
|
593
|
+
return !/\b(npm|git|selftest|packcheck|release:check|publish:dry|publish:ignore-scripts|publish:npm|doctor|team|qa-loop|wiki|db|test)\b/i.test(command);
|
|
594
594
|
}
|
|
595
595
|
function payloadMentionsAnswersJson(payload = {}) {
|
|
596
596
|
try {
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { appendJsonlBounded, nowIso, writeJsonAtomic } from '../fsx.js';
|
|
3
|
+
import { QA_ACTION_LEDGER_ARTIFACT, QA_LIVE_SESSION_ARTIFACT, QA_RUNTIME_EVENT_LEDGER_ARTIFACT } from './qa-types.js';
|
|
4
|
+
export async function runQaAppServerDriver(input) {
|
|
5
|
+
const events = [];
|
|
6
|
+
const dispose = input.client.onEvent?.((event) => {
|
|
7
|
+
events.push(event);
|
|
8
|
+
});
|
|
9
|
+
const startedAt = nowIso();
|
|
10
|
+
const blockers = [];
|
|
11
|
+
let threadId = null;
|
|
12
|
+
let turnId = null;
|
|
13
|
+
try {
|
|
14
|
+
await input.client.initialize?.();
|
|
15
|
+
const thread = await input.client.startThread({
|
|
16
|
+
cwd: input.cwd,
|
|
17
|
+
...input.threadStartParams
|
|
18
|
+
});
|
|
19
|
+
threadId = extractThreadId(thread);
|
|
20
|
+
if (!threadId)
|
|
21
|
+
blockers.push('app_server_thread_id_missing');
|
|
22
|
+
if (threadId) {
|
|
23
|
+
const turn = await input.client.startTurn({
|
|
24
|
+
threadId,
|
|
25
|
+
cwd: input.cwd,
|
|
26
|
+
input: [{ type: 'text', text: input.prompt }],
|
|
27
|
+
...input.turnStartParams
|
|
28
|
+
});
|
|
29
|
+
turnId = extractTurnId(turn);
|
|
30
|
+
if (!turnId)
|
|
31
|
+
blockers.push('app_server_turn_id_missing');
|
|
32
|
+
if (input.client.waitForTurnCompletion) {
|
|
33
|
+
const completed = await input.client.waitForTurnCompletion(threadId, turnId, input.timeoutMs);
|
|
34
|
+
events.push({ method: 'turn/completed', params: completed, received_at: nowIso() });
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
blockers.push(`app_server_driver_failed:${err instanceof Error ? err.message : String(err)}`);
|
|
40
|
+
}
|
|
41
|
+
finally {
|
|
42
|
+
dispose?.();
|
|
43
|
+
}
|
|
44
|
+
await writeAppServerEventLedgers(input.missionDir, input.missionId, input.surfaceSelection.selected_surface, threadId, turnId, events);
|
|
45
|
+
const session = {
|
|
46
|
+
schema: 'sks.qa-loop-live-session.v2',
|
|
47
|
+
started_at: startedAt,
|
|
48
|
+
completed_at: nowIso(),
|
|
49
|
+
mission_id: input.missionId,
|
|
50
|
+
status: blockers.length ? 'blocked' : 'completed',
|
|
51
|
+
selected_surface: input.surfaceSelection.selected_surface,
|
|
52
|
+
thread_id: threadId,
|
|
53
|
+
turn_id: turnId,
|
|
54
|
+
event_count: events.length,
|
|
55
|
+
item_event_count: events.filter(isItemEvent).length,
|
|
56
|
+
action_event_count: events.filter(isActionLikeEvent).length,
|
|
57
|
+
observation_event_count: events.filter(isObservationLikeEvent).length,
|
|
58
|
+
blockers,
|
|
59
|
+
unverified: events.some(isActionLikeEvent) ? [] : ['app_server_action_event_unverified'],
|
|
60
|
+
artifacts: {
|
|
61
|
+
runtime_events: QA_RUNTIME_EVENT_LEDGER_ARTIFACT,
|
|
62
|
+
action_ledger: QA_ACTION_LEDGER_ARTIFACT
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
await writeJsonAtomic(path.join(input.missionDir, QA_LIVE_SESSION_ARTIFACT), session);
|
|
66
|
+
return session;
|
|
67
|
+
}
|
|
68
|
+
async function writeAppServerEventLedgers(missionDir, missionId, surface, threadId, turnId, events) {
|
|
69
|
+
for (const event of events) {
|
|
70
|
+
const method = String(event.method || event.type || event.params?.method || 'app_server_event');
|
|
71
|
+
await appendJsonlBounded(path.join(missionDir, QA_RUNTIME_EVENT_LEDGER_ARTIFACT), {
|
|
72
|
+
schema: 'sks.qa-loop-app-server-event.v2',
|
|
73
|
+
ts: nowIso(),
|
|
74
|
+
mission_id: missionId,
|
|
75
|
+
thread_id: event.params?.threadId || event.threadId || threadId,
|
|
76
|
+
turn_id: event.params?.turnId || event.params?.turn?.id || event.turnId || turnId,
|
|
77
|
+
item_id: event.params?.itemId || event.params?.item?.id || event.itemId || null,
|
|
78
|
+
surface,
|
|
79
|
+
kind: method,
|
|
80
|
+
status: 'observed',
|
|
81
|
+
data: redactEvent(event)
|
|
82
|
+
});
|
|
83
|
+
if (isActionLikeEvent(event)) {
|
|
84
|
+
await appendJsonlBounded(path.join(missionDir, QA_ACTION_LEDGER_ARTIFACT), {
|
|
85
|
+
schema: 'sks.qa-loop-action.v2',
|
|
86
|
+
ts: nowIso(),
|
|
87
|
+
mission_id: missionId,
|
|
88
|
+
thread_id: event.params?.threadId || event.threadId || threadId,
|
|
89
|
+
turn_id: event.params?.turnId || event.params?.turn?.id || event.turnId || turnId,
|
|
90
|
+
item_id: event.params?.itemId || event.params?.item?.id || event.itemId || null,
|
|
91
|
+
surface,
|
|
92
|
+
kind: method,
|
|
93
|
+
status: 'observed',
|
|
94
|
+
real: true,
|
|
95
|
+
data: redactEvent(event)
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
function extractThreadId(value) {
|
|
101
|
+
const obj = value;
|
|
102
|
+
return stringOrNull(obj?.thread?.id || obj?.threadId || obj?.id);
|
|
103
|
+
}
|
|
104
|
+
function extractTurnId(value) {
|
|
105
|
+
const obj = value;
|
|
106
|
+
return stringOrNull(obj?.turn?.id || obj?.turnId || obj?.id);
|
|
107
|
+
}
|
|
108
|
+
function stringOrNull(value) {
|
|
109
|
+
const text = String(value || '').trim();
|
|
110
|
+
return text || null;
|
|
111
|
+
}
|
|
112
|
+
function isItemEvent(event) {
|
|
113
|
+
return /^item\//.test(String(event.method || event.type || ''));
|
|
114
|
+
}
|
|
115
|
+
function isActionLikeEvent(event) {
|
|
116
|
+
const method = String(event.method || event.type || '');
|
|
117
|
+
return /^item\/.*(?:tool|action|commandExecution|computer|browser|chrome)/i.test(method)
|
|
118
|
+
|| /(?:tool|action|click|type|scroll|navigate|screenshot|observation)/i.test(JSON.stringify(event.params || event));
|
|
119
|
+
}
|
|
120
|
+
function isObservationLikeEvent(event) {
|
|
121
|
+
const method = String(event.method || event.type || '');
|
|
122
|
+
return /observation|completed|screenshot|browser|chrome|computer/i.test(method)
|
|
123
|
+
|| /observation|screenshot|visual|page|window/i.test(JSON.stringify(event.params || event));
|
|
124
|
+
}
|
|
125
|
+
function redactEvent(event) {
|
|
126
|
+
return JSON.parse(JSON.stringify(event, (key, value) => {
|
|
127
|
+
if (/(password|passwd|token|secret|cookie|authorization|credential)/i.test(String(key)))
|
|
128
|
+
return '[REDACTED]';
|
|
129
|
+
if (typeof value === 'string' && /(Bearer\s+[A-Za-z0-9._-]+|sk-[A-Za-z0-9_-]+)/.test(value))
|
|
130
|
+
return '[REDACTED]';
|
|
131
|
+
return value;
|
|
132
|
+
}));
|
|
133
|
+
}
|
|
134
|
+
//# sourceMappingURL=qa-app-server-driver.js.map
|