sneakoscope 4.2.0 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/bin/sks.js +1 -1
- package/dist/core/codex-control/codex-app-server-v2-client.js +86 -2
- package/dist/core/codex-control/codex-reliability-shield.js +26 -5
- package/dist/core/codex-control/codex-task-runner.js +7 -1
- package/dist/core/codex-control/model-call-concurrency.js +1 -1
- package/dist/core/commands/qa-loop-command.js +23 -7
- package/dist/core/fsx.js +1 -1
- package/dist/core/hooks-runtime.js +1 -1
- package/dist/core/qa-loop/qa-app-server-driver.js +134 -0
- package/dist/core/qa-loop/qa-contract-v2.js +231 -0
- package/dist/core/qa-loop/qa-gate-v2.js +132 -0
- package/dist/core/qa-loop/qa-runtime-artifacts.js +53 -0
- package/dist/core/qa-loop/qa-surface-router.js +114 -0
- package/dist/core/qa-loop/qa-types.js +18 -0
- package/dist/core/qa-loop.js +83 -26
- package/dist/core/release/gate-manifest.js +1 -0
- package/dist/core/release/sla-scheduler.js +1 -1
- package/dist/core/routes.js +19 -4
- package/dist/core/triwiki/triwiki-affected-graph.js +3 -2
- package/dist/core/version.js +1 -1
- package/dist/scripts/codex-control-all-pipelines-check.js +1 -0
- package/dist/scripts/codex-control-model-capacity-fallback-check.js +53 -0
- package/dist/scripts/config-managed-merge-callsite-coverage-check.js +7 -1
- package/dist/scripts/loop-directive-check-lib.js +78 -1
- package/dist/scripts/qa-loop-app-server-driver-check.js +74 -0
- package/dist/scripts/qa-loop-surface-router-check.js +49 -0
- package/dist/scripts/release-check-dynamic-execute.js +1 -1
- package/dist/scripts/runtime-ts-rust-boundary-check.js +1 -1
- package/dist/scripts/triwiki-affected-graph-check.js +2 -2
- package/package.json +6 -3
package/README.md
CHANGED
|
@@ -35,7 +35,7 @@ Set up this agent project with Sneakoscope Codex. Use [[mandarange/Sneakoscope-C
|
|
|
35
35
|
|
|
36
36
|
## 🚀 Current Release
|
|
37
37
|
|
|
38
|
-
SKS **4.2.
|
|
38
|
+
SKS **4.2.1** stabilizes MadDB SQL-plane execution so explicit `$MAD-DB` and `sks mad-db run|exec|apply-migration` invocations use a first-class, mission-bound break-glass route instead of inheriting `$MAD-SKS` state.
|
|
39
39
|
|
|
40
40
|
What changed in 4.2.0:
|
|
41
41
|
|
|
@@ -866,7 +866,7 @@ npm run release:check
|
|
|
866
866
|
npm run publish:dry
|
|
867
867
|
```
|
|
868
868
|
|
|
869
|
-
`release:check` runs the change-aware affected release gate for ordinary local checks. Publish readiness uses `release:check:full`, which runs the full release DAG and writes a source digest stamp under `.sneakoscope/reports/` so publish commands can verify the same source/dist state. The DAG preserves the 1.18 baseline gates and adds Codex 0.136 compatibility, inherited Codex 0.135/0.134 runner truth, patch swarm runtime truth, transaction journaling, serial conflict rebase, strict strategy-to-patch proof, rollback command proof, Native CLI Session Swarm 5/10/20-process proof, Real Worker Backend Router proof, Codex child overlap proof, model-authored patch-envelope separation, Zellij layout/pane/screen/socket-dir proof, no-subagent-scaling proof, Fast mode default/worker/Codex/MAD propagation proof, Appshots attachment provenance, MCP runtime overlap evidence, task graph expansion, schema-bound follow-up work, actual Agent/Team/Research/QA route blackboxes, scheduler proof hardening, Source Intelligence propagation, Goal mode propagation checks, slot telemetry, update notice, MAD-DB, and Naruto SSOT gates. Broader live gates remain explicit scripts such as `release:real-check`; real Codex patch smoke, real Codex parallel worker proof, and real Zellij proof are optional unless their `SKS_REQUIRE_REAL_*` or `SKS_REQUIRE_ZELLIJ=1` environment variables are set. Generate the human-readable registry with `sks features inventory --write-docs`. Plain `npm publish` uses the `latest` dist-tag. `npm run publish:dry` runs `release:check:full`, verifies the fresh stamp, and then performs provenance/registry and npm dry-run checks. npm's `prepublishOnly` uses `prepublish-release-check-or-fast` to accept that current stamp before the real publish; if the stamp is missing or stale, it runs `release:check:full` once before continuing.
|
|
869
|
+
`release:check` runs the change-aware affected release gate for ordinary local checks. Publish readiness uses `release:check:full`, which runs the full release DAG and writes a source digest stamp under `.sneakoscope/reports/` so publish commands can verify the same source/dist state. The DAG preserves the 1.18 baseline gates and adds Codex 0.136 compatibility, inherited Codex 0.135/0.134 runner truth, patch swarm runtime truth, transaction journaling, serial conflict rebase, strict strategy-to-patch proof, rollback command proof, Native CLI Session Swarm 5/10/20-process proof, Real Worker Backend Router proof, Codex child overlap proof, model-authored patch-envelope separation, Zellij layout/pane/screen/socket-dir proof, no-subagent-scaling proof, Fast mode default/worker/Codex/MAD propagation proof, Appshots attachment provenance, MCP runtime overlap evidence, task graph expansion, schema-bound follow-up work, actual Agent/Team/Research/QA route blackboxes, scheduler proof hardening, Source Intelligence propagation, Goal mode propagation checks, slot telemetry, update notice, MAD-DB, and Naruto SSOT gates. Broader live gates remain explicit scripts such as `release:real-check`; real Codex patch smoke, real Codex parallel worker proof, and real Zellij proof are optional unless their `SKS_REQUIRE_REAL_*` or `SKS_REQUIRE_ZELLIJ=1` environment variables are set. Generate the human-readable registry with `sks features inventory --write-docs`. Plain `npm publish` uses the `latest` dist-tag. `npm run publish:dry` runs `release:check:full`, verifies the fresh stamp, and then performs provenance/registry and npm dry-run checks. `npm run publish:npm` and `npm run release:publish` run the same prepublish gate and then `npm publish --ignore-scripts`, so the real publish path stays strict even when lifecycle scripts are skipped. npm's `prepublishOnly` uses `prepublish-release-check-or-fast` to accept that current stamp before the real publish; if the stamp is missing or stale, it runs `release:check:full` once before continuing.
|
|
870
870
|
|
|
871
871
|
Version bumps are manual. Run `sks versioning bump` only when preparing release metadata; SKS will not create `.git/hooks/pre-commit` or auto-bump during ordinary commits.
|
|
872
872
|
|
|
@@ -4,7 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom};
|
|
|
4
4
|
fn main() {
|
|
5
5
|
let mut args = std::env::args().skip(1);
|
|
6
6
|
match args.next().as_deref() {
|
|
7
|
-
Some("--version") => println!("sks-rs 4.2.
|
|
7
|
+
Some("--version") => println!("sks-rs 4.2.1"),
|
|
8
8
|
Some("compact-info") => {
|
|
9
9
|
let mut input = String::new();
|
|
10
10
|
let _ = io::stdin().read_to_string(&mut input);
|
package/dist/bin/sks.js
CHANGED
|
@@ -8,10 +8,12 @@ export class CodexAppServerV2Client {
|
|
|
8
8
|
cwd;
|
|
9
9
|
timeoutMs;
|
|
10
10
|
currentTimeProvider;
|
|
11
|
+
approvalPolicy;
|
|
11
12
|
child = null;
|
|
12
13
|
nextId = 1;
|
|
13
14
|
pending = new Map();
|
|
14
15
|
notifications = [];
|
|
16
|
+
listeners = new Set();
|
|
15
17
|
stdoutBuffer = '';
|
|
16
18
|
stderr = '';
|
|
17
19
|
constructor(options) {
|
|
@@ -21,6 +23,7 @@ export class CodexAppServerV2Client {
|
|
|
21
23
|
this.cwd = options.cwd || process.cwd();
|
|
22
24
|
this.timeoutMs = Number(options.timeoutMs || 20_000);
|
|
23
25
|
this.currentTimeProvider = options.currentTimeProvider || (() => new Date());
|
|
26
|
+
this.approvalPolicy = options.approvalPolicy || {};
|
|
24
27
|
}
|
|
25
28
|
async initialize() {
|
|
26
29
|
this.start();
|
|
@@ -36,18 +39,58 @@ export class CodexAppServerV2Client {
|
|
|
36
39
|
optOutNotificationMethods: []
|
|
37
40
|
}
|
|
38
41
|
});
|
|
39
|
-
this.notify('
|
|
42
|
+
this.notify('initialized', {});
|
|
40
43
|
return result;
|
|
41
44
|
}
|
|
42
45
|
async listThreads(params = {}) {
|
|
43
46
|
return await this.request('thread/list', normalizeThreadListParams(params));
|
|
44
47
|
}
|
|
48
|
+
async startThread(params = {}) {
|
|
49
|
+
return await this.request('thread/start', params);
|
|
50
|
+
}
|
|
51
|
+
async resumeThread(params = {}) {
|
|
52
|
+
return await this.request('thread/resume', params);
|
|
53
|
+
}
|
|
45
54
|
async searchThreads(searchTerm, params = {}) {
|
|
46
55
|
return await this.listThreads({ ...params, searchTerm });
|
|
47
56
|
}
|
|
48
57
|
async readThread(threadId, includeTurns = false) {
|
|
49
58
|
return await this.request('thread/read', { threadId, includeTurns });
|
|
50
59
|
}
|
|
60
|
+
async startTurn(params = {}) {
|
|
61
|
+
return await this.request('turn/start', params);
|
|
62
|
+
}
|
|
63
|
+
async steerTurn(params = {}) {
|
|
64
|
+
return await this.request('turn/steer', params);
|
|
65
|
+
}
|
|
66
|
+
async interruptTurn(params = {}) {
|
|
67
|
+
return await this.request('turn/interrupt', params);
|
|
68
|
+
}
|
|
69
|
+
onEvent(listener) {
|
|
70
|
+
this.listeners.add(listener);
|
|
71
|
+
return () => this.listeners.delete(listener);
|
|
72
|
+
}
|
|
73
|
+
waitForNotification(methods, timeoutMs = this.timeoutMs) {
|
|
74
|
+
const expected = new Set(Array.isArray(methods) ? methods.map(String) : [String(methods)]);
|
|
75
|
+
return new Promise((resolve, reject) => {
|
|
76
|
+
const timer = setTimeout(() => {
|
|
77
|
+
dispose();
|
|
78
|
+
reject(new Error(`Timed out waiting for app-server notification: ${Array.from(expected).join(', ')}`));
|
|
79
|
+
}, timeoutMs);
|
|
80
|
+
timer.unref?.();
|
|
81
|
+
const dispose = this.onEvent((event) => {
|
|
82
|
+
if (event && expected.has(String(event.method || ''))) {
|
|
83
|
+
clearTimeout(timer);
|
|
84
|
+
dispose();
|
|
85
|
+
resolve(event);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
async waitForTurnCompletion(threadId, turnId, timeoutMs = this.timeoutMs) {
|
|
91
|
+
const expected = turnId ? ['turn/completed', 'thread/closed', 'thread/status/changed'] : ['turn/completed', 'thread/closed'];
|
|
92
|
+
return await this.waitForNotification(expected, timeoutMs);
|
|
93
|
+
}
|
|
51
94
|
start() {
|
|
52
95
|
if (this.child)
|
|
53
96
|
return;
|
|
@@ -107,7 +150,14 @@ export class CodexAppServerV2Client {
|
|
|
107
150
|
void this.respondToServerRequest(message);
|
|
108
151
|
}
|
|
109
152
|
else {
|
|
110
|
-
|
|
153
|
+
const event = { ...message, received_at: nowIso() };
|
|
154
|
+
this.notifications.push(event);
|
|
155
|
+
for (const listener of this.listeners) {
|
|
156
|
+
try {
|
|
157
|
+
listener(event);
|
|
158
|
+
}
|
|
159
|
+
catch { }
|
|
160
|
+
}
|
|
111
161
|
}
|
|
112
162
|
}
|
|
113
163
|
}
|
|
@@ -119,6 +169,38 @@ export class CodexAppServerV2Client {
|
|
|
119
169
|
this.write({ jsonrpc: '2.0', id, result: currentTimeResponse(this.currentTimeProvider()) });
|
|
120
170
|
return;
|
|
121
171
|
}
|
|
172
|
+
if (method === 'item/commandExecution/requestApproval' || method === 'commandExecution/requestApproval') {
|
|
173
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.commandExecution?.(message.params) || { decision: 'cancel' } });
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
if (method === 'item/fileChange/requestApproval' || method === 'fileChange/requestApproval') {
|
|
177
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.fileChange?.(message.params) || { decision: 'cancel' } });
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
if (method === 'item/permissions/requestApproval' || method === 'permissions/requestApproval') {
|
|
181
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.permissions?.(message.params) || { permissions: { network: { enabled: false }, fileSystem: { read: [], write: [], entries: [] } }, scope: 'turn', strictAutoReview: true } });
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
if (method === 'item/tool/requestUserInput') {
|
|
185
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.toolRequestUserInput?.(message.params) || { answers: {} } });
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
if (method === 'item/tool/call') {
|
|
189
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.dynamicToolCall?.(message.params) || { contentItems: [], success: false } });
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
if (method === 'mcpServer/elicitation/request') {
|
|
193
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.mcpElicitation?.(message.params) || { contentItems: [], success: false } });
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
if (method === 'attestation/generate') {
|
|
197
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.attestation?.(message.params) || { decision: 'cancel' } });
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
if (method === 'account/chatgptAuthTokens/refresh') {
|
|
201
|
+
this.write({ jsonrpc: '2.0', id, result: this.approvalPolicy.chatgptAuthTokensRefresh?.(message.params) || { ok: false } });
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
122
204
|
this.write({
|
|
123
205
|
jsonrpc: '2.0',
|
|
124
206
|
id,
|
|
@@ -182,6 +264,8 @@ export async function createCodexAppServerV2Client(options = {}) {
|
|
|
182
264
|
clientOptions.timeoutMs = options.timeoutMs;
|
|
183
265
|
if (options.currentTimeProvider !== undefined)
|
|
184
266
|
clientOptions.currentTimeProvider = options.currentTimeProvider;
|
|
267
|
+
if (options.approvalPolicy !== undefined)
|
|
268
|
+
clientOptions.approvalPolicy = options.approvalPolicy;
|
|
185
269
|
return {
|
|
186
270
|
client: new CodexAppServerV2Client(clientOptions),
|
|
187
271
|
runtimeIdentity: runtime.identity
|
|
@@ -37,6 +37,7 @@ export async function runWithCodexReliabilityShield(input, runAttempt) {
|
|
|
37
37
|
break;
|
|
38
38
|
}
|
|
39
39
|
const blockers = attempts.flatMap((attempt) => attempt.blockers);
|
|
40
|
+
const modelCapacityRetryCount = attempts.filter((attempt) => attempt.model_capacity_error && attempt.retryable).length;
|
|
40
41
|
const report = {
|
|
41
42
|
schema: CODEX_RELIABILITY_SHIELD_SCHEMA,
|
|
42
43
|
generated_at: nowIso(),
|
|
@@ -52,6 +53,8 @@ export async function runWithCodexReliabilityShield(input, runAttempt) {
|
|
|
52
53
|
heartbeat_count: attempts.reduce((sum, attempt) => sum + attempt.heartbeat_count, 0),
|
|
53
54
|
repaired_tool_result_count: attempts.reduce((sum, attempt) => sum + attempt.repaired_tool_result_count, 0),
|
|
54
55
|
no_duplicate_streamed_output: attempts.slice(0, -1).every((attempt) => attempt.meaningful_event_count === 0),
|
|
56
|
+
model_capacity_retry_count: modelCapacityRetryCount,
|
|
57
|
+
selected_model_capacity_fallback: selectedAttempt > 1 && modelCapacityRetryCount > 0,
|
|
55
58
|
blockers
|
|
56
59
|
};
|
|
57
60
|
return {
|
|
@@ -61,24 +64,27 @@ export async function runWithCodexReliabilityShield(input, runAttempt) {
|
|
|
61
64
|
}
|
|
62
65
|
export function evaluateCodexReliabilityAttempt(result, events, policy, attempt) {
|
|
63
66
|
const meaningful = events.filter(isMeaningfulEvent);
|
|
64
|
-
const
|
|
67
|
+
const modelCapacity = isCodexModelCapacityError(result, events);
|
|
68
|
+
const fatal = !modelCapacity && hasFatalError(result, events);
|
|
65
69
|
const idle = hasIdleTimeout(events, policy.idleTimeoutMs);
|
|
66
70
|
const empty = events.length === 0 || (!String(result.finalResponse || '').trim() && meaningful.length === 0);
|
|
67
71
|
const partial = meaningful.length > 0 && !result.structuredOutput;
|
|
68
72
|
const blockers = [];
|
|
69
73
|
let retryable = false;
|
|
70
74
|
let retryReason = null;
|
|
71
|
-
if (
|
|
75
|
+
if (modelCapacity)
|
|
76
|
+
blockers.push('codex_model_capacity_unavailable');
|
|
77
|
+
if (!modelCapacity && idle && partial)
|
|
72
78
|
blockers.push('codex_reliability_idle_after_partial_output');
|
|
73
|
-
if (partial && !idle)
|
|
79
|
+
if (!modelCapacity && partial && !idle)
|
|
74
80
|
blockers.push('codex_reliability_partial_output_without_structured_result');
|
|
75
81
|
if (fatal)
|
|
76
82
|
blockers.push('codex_reliability_fatal_error_no_retry');
|
|
77
|
-
if (!fatal && idle && meaningful.length === 0) {
|
|
83
|
+
if (!modelCapacity && !fatal && idle && meaningful.length === 0) {
|
|
78
84
|
retryable = true;
|
|
79
85
|
retryReason = 'stream_idle_before_meaningful_event';
|
|
80
86
|
}
|
|
81
|
-
else if (!fatal && empty) {
|
|
87
|
+
else if (!modelCapacity && !fatal && empty) {
|
|
82
88
|
retryable = true;
|
|
83
89
|
retryReason = 'empty_sdk_result_before_meaningful_event';
|
|
84
90
|
}
|
|
@@ -92,11 +98,26 @@ export function evaluateCodexReliabilityAttempt(result, events, policy, attempt)
|
|
|
92
98
|
retry_reason: retryReason,
|
|
93
99
|
idle_timeout: idle,
|
|
94
100
|
fatal_error: fatal,
|
|
101
|
+
model_capacity_error: modelCapacity,
|
|
102
|
+
capacity_fallback_hint: null,
|
|
95
103
|
repaired_tool_result_count: 0,
|
|
96
104
|
heartbeat_count: 0,
|
|
97
105
|
blockers
|
|
98
106
|
};
|
|
99
107
|
}
|
|
108
|
+
export function isCodexModelCapacityError(result, events) {
|
|
109
|
+
const text = [
|
|
110
|
+
String(result.finalResponse || ''),
|
|
111
|
+
...(Array.isArray(result.blockers) ? result.blockers : []),
|
|
112
|
+
...events.map((event) => [
|
|
113
|
+
event?.error?.message,
|
|
114
|
+
event?.message,
|
|
115
|
+
event?.item?.text,
|
|
116
|
+
event?.raw?.failed_event?.error?.message
|
|
117
|
+
].filter(Boolean).join('\n'))
|
|
118
|
+
].join('\n');
|
|
119
|
+
return /selected model is at capacity|model(?:\s+[\w.-]+)?\s+is\s+at\s+capacity|try a different model|capacity(?:\s+is)?\s+exhausted|temporarily at capacity/i.test(text);
|
|
120
|
+
}
|
|
100
121
|
export function repairToolCallSequence(events) {
|
|
101
122
|
const repaired = [...events];
|
|
102
123
|
const openToolCalls = new Set();
|
|
@@ -118,6 +118,8 @@ export async function runCodexTask(input) {
|
|
|
118
118
|
patchEnvelopePath,
|
|
119
119
|
blockers: finalBlockers,
|
|
120
120
|
reliabilityShield: adapterResult?.reliabilityShield || null,
|
|
121
|
+
capacityFallback: adapterResult?.reliabilityShield?.selected_model_capacity_fallback === true,
|
|
122
|
+
modelCapacityRetryCount: Number(adapterResult?.reliabilityShield?.model_capacity_retry_count || 0),
|
|
121
123
|
ultraRouterDecision: routerDecision,
|
|
122
124
|
outputSchemaId: task.outputSchemaId,
|
|
123
125
|
finalResponse: adapterResult?.finalResponse || '',
|
|
@@ -146,7 +148,11 @@ export async function runCodexTask(input) {
|
|
|
146
148
|
result,
|
|
147
149
|
capability: capability,
|
|
148
150
|
sandbox,
|
|
149
|
-
envProof:
|
|
151
|
+
envProof: {
|
|
152
|
+
...runtime.env.proof,
|
|
153
|
+
capacity_fallback_selected: result.capacityFallback === true,
|
|
154
|
+
model_capacity_retry_count: result.modelCapacityRetryCount
|
|
155
|
+
},
|
|
150
156
|
config: runtime.config,
|
|
151
157
|
reliabilityShield: adapterResult?.reliabilityShield || null,
|
|
152
158
|
routerDecision: routerDecision,
|
|
@@ -56,7 +56,7 @@ export function defaultModelCallBudget(provider) {
|
|
|
56
56
|
const text = String(provider || '');
|
|
57
57
|
if (text === 'local-llm' || text === 'ollama')
|
|
58
58
|
return envInt('SKS_LOCAL_LLM_MAX_PARALLEL_REQUESTS', 4);
|
|
59
|
-
return envInt('SKS_REMOTE_API_PARALLEL_BUDGET',
|
|
59
|
+
return envInt('SKS_REMOTE_API_PARALLEL_BUDGET', 3);
|
|
60
60
|
}
|
|
61
61
|
class ModelCallSemaphoreImpl {
|
|
62
62
|
provider;
|
|
@@ -17,6 +17,8 @@ import { runCodexAppHandoff, qaLoopShouldRequestAppHandoff } from '../codex-app/
|
|
|
17
17
|
import { writeCodex0138CapabilityArtifacts } from '../codex-control/codex-0138-capability.js';
|
|
18
18
|
import { writeCodexAccountUsageArtifacts } from '../usage/codex-account-usage.js';
|
|
19
19
|
import { buildQaLoopBudgetPolicy, selectQaLoopEscalatedEffort } from '../qa-loop/qa-loop-budget-policy.js';
|
|
20
|
+
import { initializeQaRuntimeArtifacts } from '../qa-loop/qa-runtime-artifacts.js';
|
|
21
|
+
import { DEFAULT_QA_MAX_CYCLES, QA_SURFACE_SELECTION_ARTIFACT } from '../qa-loop/qa-types.js';
|
|
20
22
|
import { writeCodexModelEffortCapabilityArtifact } from '../codex-control/codex-model-capabilities.js';
|
|
21
23
|
import { discoverImageArtifactsInDir, writeImageArtifactPathContract } from '../image/image-artifact-path-contract.js';
|
|
22
24
|
import { pluginAppTemplatePolicy } from '../codex-plugins/codex-plugin-json.js';
|
|
@@ -41,7 +43,7 @@ export async function qaLoopCommand(sub, args = []) {
|
|
|
41
43
|
Usage:
|
|
42
44
|
sks qa-loop prepare "target"
|
|
43
45
|
sks qa-loop answer <mission-id|latest> <answers.json>
|
|
44
|
-
sks qa-loop run <mission-id|latest> [--mock] [--max-cycles N] [--app-handoff] [--app-handoff-required] [--app-handoff-launch] [--app-handoff-artifact-only]
|
|
46
|
+
sks qa-loop run <mission-id|latest> [--mock] [--max-cycles N] [--surface auto|codex_in_app_browser|codex_chrome_extension|codex_computer_use] [--report-only] [--app-handoff] [--app-handoff-required] [--app-handoff-launch] [--app-handoff-artifact-only]
|
|
45
47
|
sks qa-loop app-confirm <mission-id|latest> --verdict pass|fail --notes "..."
|
|
46
48
|
sks qa-loop status <mission-id|latest> [--desktop]
|
|
47
49
|
`);
|
|
@@ -126,6 +128,17 @@ async function qaLoopRun(args) {
|
|
|
126
128
|
await writeQaLoopArtifacts(dir, mission, contract);
|
|
127
129
|
else
|
|
128
130
|
await ensureQaLoopVisualEvidenceContract(dir, mission, contract);
|
|
131
|
+
const requestedSurface = readFlagValue(args, '--surface', 'auto');
|
|
132
|
+
const reportOnly = flag(args, '--report-only');
|
|
133
|
+
await initializeQaRuntimeArtifacts(dir, {
|
|
134
|
+
...contract,
|
|
135
|
+
prompt: mission.prompt || contract.prompt,
|
|
136
|
+
mission_id: id
|
|
137
|
+
}, {
|
|
138
|
+
missionId: id,
|
|
139
|
+
requestedSurface,
|
|
140
|
+
reportOnly
|
|
141
|
+
}).catch(() => null);
|
|
129
142
|
const safetyScan = await scanDbSafety(root);
|
|
130
143
|
if (!safetyScan.ok) {
|
|
131
144
|
console.error('QA-LOOP cannot run: SKS safety scan found unsafe project data-tool configuration.');
|
|
@@ -133,7 +146,7 @@ async function qaLoopRun(args) {
|
|
|
133
146
|
process.exitCode = 2;
|
|
134
147
|
return;
|
|
135
148
|
}
|
|
136
|
-
const fallbackCycles = Number.parseInt(contract.answers?.MAX_QA_CYCLES, 10) ||
|
|
149
|
+
const fallbackCycles = Number.parseInt(contract.answers?.MAX_QA_CYCLES, 10) || DEFAULT_QA_MAX_CYCLES;
|
|
137
150
|
const maxCycles = readMaxCycles(args, fallbackCycles);
|
|
138
151
|
const requestedAgents = readBoundedIntegerFlag(args, '--agents', 3, 1, 20);
|
|
139
152
|
const targetActiveSlots = readBoundedIntegerFlag(args, '--target-active-slots', requestedAgents, 1, 20);
|
|
@@ -141,15 +154,18 @@ async function qaLoopRun(args) {
|
|
|
141
154
|
const minimumWorkItems = readBoundedIntegerFlag(args, '--minimum-work-items', targetActiveSlots, 1, 200);
|
|
142
155
|
const maxQueueExpansion = readBoundedIntegerFlag(args, '--max-queue-expansion', 10, 0, 200);
|
|
143
156
|
const profile = readFlagValue(args, '--profile', 'sks-logic-high') || 'sks-logic-high';
|
|
144
|
-
const
|
|
145
|
-
const
|
|
157
|
+
const mock = flag(args, '--mock');
|
|
158
|
+
const sourceFixesEnabled = !reportOnly;
|
|
159
|
+
const writeMode = readFlagValue(args, '--write-mode', sourceFixesEnabled && !mock ? (flag(args, '--parallel-write') ? 'parallel' : 'proof-safe') : 'off');
|
|
160
|
+
const applyPatches = sourceFixesEnabled && !mock && !flag(args, '--no-fix');
|
|
146
161
|
const dryRunPatches = flag(args, '--dry-run-patches') || flag(args, '--dryrun-patches');
|
|
147
162
|
const maxWriteAgents = readBoundedIntegerFlag(args, '--max-write-agents', Math.min(requestedAgents, 5), 1, 20);
|
|
148
|
-
const mock = flag(args, '--mock');
|
|
149
163
|
const qaGate = await readJson(path.join(dir, 'qa-gate.json'), {});
|
|
150
164
|
const reportFile = qaGate.qa_report_file;
|
|
151
165
|
const executionProfile = await readJson(path.join(dir, 'qa-loop', 'execution-profile.json'), null);
|
|
152
166
|
const uiRequired = qaUiRequired(contract.answers || {});
|
|
167
|
+
const surfaceSelection = await readJson(path.join(dir, QA_SURFACE_SELECTION_ARTIFACT), null);
|
|
168
|
+
const selectedSurface = surfaceSelection?.selected_surface || null;
|
|
153
169
|
const gptImage2ReviewRequired = qaGptImage2AnnotatedReviewRequired(contract, mission.prompt);
|
|
154
170
|
const capabilityArtifact = await writeCodex0138CapabilityArtifacts(root, { missionId: id }).catch((err) => ({ error: err?.message || String(err), report: null }));
|
|
155
171
|
const usageArtifact = await writeCodexAccountUsageArtifacts(root, { missionId: id }).catch((err) => ({ error: err?.message || String(err), snapshot: null }));
|
|
@@ -246,7 +262,7 @@ async function qaLoopRun(args) {
|
|
|
246
262
|
return;
|
|
247
263
|
}
|
|
248
264
|
}
|
|
249
|
-
if (uiRequired && !mock) {
|
|
265
|
+
if (uiRequired && !mock && selectedSurface === 'codex_chrome_extension') {
|
|
250
266
|
const chrome = await codexChromeExtensionStatus();
|
|
251
267
|
if (!chrome.ok) {
|
|
252
268
|
const blockedGate = {
|
|
@@ -277,7 +293,7 @@ async function qaLoopRun(args) {
|
|
|
277
293
|
await setCurrent(root, { mission_id: id, mode: 'QALOOP', phase: 'QALOOP_BLOCKED_CHROME_EXTENSION_SETUP_REQUIRED', questions_allowed: true });
|
|
278
294
|
if (flag(args, '--json'))
|
|
279
295
|
return console.log(JSON.stringify({ schema: 'sks.qa-loop-run.v1', ok: false, status: 'blocked', blocker: 'codex_chrome_extension_setup_required', mission_id: id, chrome_extension: chrome, gate: blockedGate }, null, 2));
|
|
280
|
-
console.error('QA-LOOP blocked:
|
|
296
|
+
console.error('QA-LOOP blocked: this journey was routed to @Chrome, but the Codex Chrome Extension is not connected. Install/enable it, then resume.');
|
|
281
297
|
console.error(chrome.docs_url);
|
|
282
298
|
process.exitCode = 2;
|
|
283
299
|
return;
|
package/dist/core/fsx.js
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
import { fileURLToPath } from 'node:url';
|
|
8
|
-
export const PACKAGE_VERSION = '4.2.
|
|
8
|
+
export const PACKAGE_VERSION = '4.2.1';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
export function nowIso() {
|
|
@@ -590,7 +590,7 @@ function clarificationAnswerToolAllowed(payload = {}) {
|
|
|
590
590
|
return true;
|
|
591
591
|
if (/\bpipeline\s+answer\b/i.test(command))
|
|
592
592
|
return true;
|
|
593
|
-
return !/\b(npm|git|selftest|packcheck|release:check|publish:dry|publish:npm|doctor|team|qa-loop|wiki|db|test)\b/i.test(command);
|
|
593
|
+
return !/\b(npm|git|selftest|packcheck|release:check|publish:dry|publish:ignore-scripts|publish:npm|doctor|team|qa-loop|wiki|db|test)\b/i.test(command);
|
|
594
594
|
}
|
|
595
595
|
function payloadMentionsAnswersJson(payload = {}) {
|
|
596
596
|
try {
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { appendJsonlBounded, nowIso, writeJsonAtomic } from '../fsx.js';
|
|
3
|
+
import { QA_ACTION_LEDGER_ARTIFACT, QA_LIVE_SESSION_ARTIFACT, QA_RUNTIME_EVENT_LEDGER_ARTIFACT } from './qa-types.js';
|
|
4
|
+
export async function runQaAppServerDriver(input) {
|
|
5
|
+
const events = [];
|
|
6
|
+
const dispose = input.client.onEvent?.((event) => {
|
|
7
|
+
events.push(event);
|
|
8
|
+
});
|
|
9
|
+
const startedAt = nowIso();
|
|
10
|
+
const blockers = [];
|
|
11
|
+
let threadId = null;
|
|
12
|
+
let turnId = null;
|
|
13
|
+
try {
|
|
14
|
+
await input.client.initialize?.();
|
|
15
|
+
const thread = await input.client.startThread({
|
|
16
|
+
cwd: input.cwd,
|
|
17
|
+
...input.threadStartParams
|
|
18
|
+
});
|
|
19
|
+
threadId = extractThreadId(thread);
|
|
20
|
+
if (!threadId)
|
|
21
|
+
blockers.push('app_server_thread_id_missing');
|
|
22
|
+
if (threadId) {
|
|
23
|
+
const turn = await input.client.startTurn({
|
|
24
|
+
threadId,
|
|
25
|
+
cwd: input.cwd,
|
|
26
|
+
input: [{ type: 'text', text: input.prompt }],
|
|
27
|
+
...input.turnStartParams
|
|
28
|
+
});
|
|
29
|
+
turnId = extractTurnId(turn);
|
|
30
|
+
if (!turnId)
|
|
31
|
+
blockers.push('app_server_turn_id_missing');
|
|
32
|
+
if (input.client.waitForTurnCompletion) {
|
|
33
|
+
const completed = await input.client.waitForTurnCompletion(threadId, turnId, input.timeoutMs);
|
|
34
|
+
events.push({ method: 'turn/completed', params: completed, received_at: nowIso() });
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
blockers.push(`app_server_driver_failed:${err instanceof Error ? err.message : String(err)}`);
|
|
40
|
+
}
|
|
41
|
+
finally {
|
|
42
|
+
dispose?.();
|
|
43
|
+
}
|
|
44
|
+
await writeAppServerEventLedgers(input.missionDir, input.missionId, input.surfaceSelection.selected_surface, threadId, turnId, events);
|
|
45
|
+
const session = {
|
|
46
|
+
schema: 'sks.qa-loop-live-session.v2',
|
|
47
|
+
started_at: startedAt,
|
|
48
|
+
completed_at: nowIso(),
|
|
49
|
+
mission_id: input.missionId,
|
|
50
|
+
status: blockers.length ? 'blocked' : 'completed',
|
|
51
|
+
selected_surface: input.surfaceSelection.selected_surface,
|
|
52
|
+
thread_id: threadId,
|
|
53
|
+
turn_id: turnId,
|
|
54
|
+
event_count: events.length,
|
|
55
|
+
item_event_count: events.filter(isItemEvent).length,
|
|
56
|
+
action_event_count: events.filter(isActionLikeEvent).length,
|
|
57
|
+
observation_event_count: events.filter(isObservationLikeEvent).length,
|
|
58
|
+
blockers,
|
|
59
|
+
unverified: events.some(isActionLikeEvent) ? [] : ['app_server_action_event_unverified'],
|
|
60
|
+
artifacts: {
|
|
61
|
+
runtime_events: QA_RUNTIME_EVENT_LEDGER_ARTIFACT,
|
|
62
|
+
action_ledger: QA_ACTION_LEDGER_ARTIFACT
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
await writeJsonAtomic(path.join(input.missionDir, QA_LIVE_SESSION_ARTIFACT), session);
|
|
66
|
+
return session;
|
|
67
|
+
}
|
|
68
|
+
async function writeAppServerEventLedgers(missionDir, missionId, surface, threadId, turnId, events) {
|
|
69
|
+
for (const event of events) {
|
|
70
|
+
const method = String(event.method || event.type || event.params?.method || 'app_server_event');
|
|
71
|
+
await appendJsonlBounded(path.join(missionDir, QA_RUNTIME_EVENT_LEDGER_ARTIFACT), {
|
|
72
|
+
schema: 'sks.qa-loop-app-server-event.v2',
|
|
73
|
+
ts: nowIso(),
|
|
74
|
+
mission_id: missionId,
|
|
75
|
+
thread_id: event.params?.threadId || event.threadId || threadId,
|
|
76
|
+
turn_id: event.params?.turnId || event.params?.turn?.id || event.turnId || turnId,
|
|
77
|
+
item_id: event.params?.itemId || event.params?.item?.id || event.itemId || null,
|
|
78
|
+
surface,
|
|
79
|
+
kind: method,
|
|
80
|
+
status: 'observed',
|
|
81
|
+
data: redactEvent(event)
|
|
82
|
+
});
|
|
83
|
+
if (isActionLikeEvent(event)) {
|
|
84
|
+
await appendJsonlBounded(path.join(missionDir, QA_ACTION_LEDGER_ARTIFACT), {
|
|
85
|
+
schema: 'sks.qa-loop-action.v2',
|
|
86
|
+
ts: nowIso(),
|
|
87
|
+
mission_id: missionId,
|
|
88
|
+
thread_id: event.params?.threadId || event.threadId || threadId,
|
|
89
|
+
turn_id: event.params?.turnId || event.params?.turn?.id || event.turnId || turnId,
|
|
90
|
+
item_id: event.params?.itemId || event.params?.item?.id || event.itemId || null,
|
|
91
|
+
surface,
|
|
92
|
+
kind: method,
|
|
93
|
+
status: 'observed',
|
|
94
|
+
real: true,
|
|
95
|
+
data: redactEvent(event)
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
function extractThreadId(value) {
|
|
101
|
+
const obj = value;
|
|
102
|
+
return stringOrNull(obj?.thread?.id || obj?.threadId || obj?.id);
|
|
103
|
+
}
|
|
104
|
+
function extractTurnId(value) {
|
|
105
|
+
const obj = value;
|
|
106
|
+
return stringOrNull(obj?.turn?.id || obj?.turnId || obj?.id);
|
|
107
|
+
}
|
|
108
|
+
function stringOrNull(value) {
|
|
109
|
+
const text = String(value || '').trim();
|
|
110
|
+
return text || null;
|
|
111
|
+
}
|
|
112
|
+
function isItemEvent(event) {
|
|
113
|
+
return /^item\//.test(String(event.method || event.type || ''));
|
|
114
|
+
}
|
|
115
|
+
function isActionLikeEvent(event) {
|
|
116
|
+
const method = String(event.method || event.type || '');
|
|
117
|
+
return /^item\/.*(?:tool|action|commandExecution|computer|browser|chrome)/i.test(method)
|
|
118
|
+
|| /(?:tool|action|click|type|scroll|navigate|screenshot|observation)/i.test(JSON.stringify(event.params || event));
|
|
119
|
+
}
|
|
120
|
+
function isObservationLikeEvent(event) {
|
|
121
|
+
const method = String(event.method || event.type || '');
|
|
122
|
+
return /observation|completed|screenshot|browser|chrome|computer/i.test(method)
|
|
123
|
+
|| /observation|screenshot|visual|page|window/i.test(JSON.stringify(event.params || event));
|
|
124
|
+
}
|
|
125
|
+
function redactEvent(event) {
|
|
126
|
+
return JSON.parse(JSON.stringify(event, (key, value) => {
|
|
127
|
+
if (/(password|passwd|token|secret|cookie|authorization|credential)/i.test(String(key)))
|
|
128
|
+
return '[REDACTED]';
|
|
129
|
+
if (typeof value === 'string' && /(Bearer\s+[A-Za-z0-9._-]+|sk-[A-Za-z0-9_-]+)/.test(value))
|
|
130
|
+
return '[REDACTED]';
|
|
131
|
+
return value;
|
|
132
|
+
}));
|
|
133
|
+
}
|
|
134
|
+
//# sourceMappingURL=qa-app-server-driver.js.map
|