sneakoscope 2.0.15 → 2.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/.sks-build-stamp.json +4 -4
- package/dist/bin/sks.js +1 -1
- package/dist/cli/command-registry.js +1 -1
- package/dist/commands/proof.js +21 -0
- package/dist/commands/zellij-slot-pane.js +7 -1
- package/dist/core/agents/agent-orchestrator.js +68 -3
- package/dist/core/agents/agent-scheduler.js +217 -86
- package/dist/core/agents/agent-schema.js +1 -1
- package/dist/core/agents/native-cli-session-swarm.js +97 -27
- package/dist/core/agents/native-cli-worker.js +56 -7
- package/dist/core/agents/parallel-runtime-proof.js +276 -0
- package/dist/core/agents/runtime-proof-summary.js +75 -0
- package/dist/core/codex-control/codex-task-runner.js +32 -4
- package/dist/core/codex-control/model-call-concurrency.js +106 -0
- package/dist/core/commands/naruto-command.js +65 -8
- package/dist/core/commands/team-command.js +6 -487
- package/dist/core/commands/team-legacy-observe-command.js +182 -0
- package/dist/core/db-safety.js +49 -6
- package/dist/core/feature-registry.js +4 -2
- package/dist/core/fsx.js +1 -1
- package/dist/core/git/git-worktree-capability.js +18 -0
- package/dist/core/git/git-worktree-manager.js +80 -0
- package/dist/core/git/git-worktree-pool.js +4 -0
- package/dist/core/hooks-runtime.js +41 -4
- package/dist/core/init.js +1 -0
- package/dist/core/mad-db/mad-db-capability.js +42 -2
- package/dist/core/mad-db/mad-db-ledger.js +14 -0
- package/dist/core/mad-db/mad-db-policy-resolver.js +2 -0
- package/dist/core/mad-db/mad-db-result-lifecycle.js +136 -0
- package/dist/core/naruto/naruto-concurrency-governor.js +14 -1
- package/dist/core/release/release-gate-affected-selector.js +47 -5
- package/dist/core/release/release-gate-dag.js +5 -1
- package/dist/core/release/release-gate-scheduler.js +2 -1
- package/dist/core/routes.js +3 -1
- package/dist/core/version.js +1 -1
- package/dist/core/zellij/zellij-slot-pane-renderer.js +74 -1
- package/dist/core/zellij/zellij-slot-telemetry.js +81 -3
- package/dist/core/zellij/zellij-ui-mode.js +12 -2
- package/dist/scripts/prepublish-release-check-or-fast.js +3 -3
- package/dist/scripts/release-speed-summary.js +23 -1
- package/package.json +38 -3
- package/schemas/agents/parallel-runtime-proof.schema.json +79 -0
|
@@ -126,18 +126,33 @@ export async function runNativeCliWorker(input = {}) {
|
|
|
126
126
|
});
|
|
127
127
|
await writeJsonAtomic(path.join(workerDir, 'worker-recursion-guard.json'), guard);
|
|
128
128
|
let noPatchReason = null;
|
|
129
|
-
const
|
|
129
|
+
const progressTelemetry = startWorkerProgressTelemetry({
|
|
130
130
|
agentRoot,
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
patchRel,
|
|
131
|
+
heartbeatRel,
|
|
132
|
+
intake,
|
|
134
133
|
agent,
|
|
135
134
|
slice,
|
|
136
|
-
intake: { ...intake, ...input },
|
|
137
135
|
backend,
|
|
138
|
-
|
|
139
|
-
guard
|
|
136
|
+
serviceTier: policy.service_tier
|
|
140
137
|
});
|
|
138
|
+
let routed;
|
|
139
|
+
try {
|
|
140
|
+
routed = await runNativeWorkerBackendRouter({
|
|
141
|
+
agentRoot,
|
|
142
|
+
workerDirRel,
|
|
143
|
+
resultRel,
|
|
144
|
+
patchRel,
|
|
145
|
+
agent,
|
|
146
|
+
slice,
|
|
147
|
+
intake: { ...intake, ...input },
|
|
148
|
+
backend,
|
|
149
|
+
fastModePolicy: policy,
|
|
150
|
+
guard
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
finally {
|
|
154
|
+
progressTelemetry.stop();
|
|
155
|
+
}
|
|
141
156
|
const patchEnvelopes = routed.patchEnvelopes;
|
|
142
157
|
if (patchEnvelopes.length) {
|
|
143
158
|
await writeJsonAtomic(path.resolve(agentRoot, patchRel), {
|
|
@@ -330,6 +345,39 @@ export async function runNativeCliWorker(input = {}) {
|
|
|
330
345
|
function delay(ms) {
|
|
331
346
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
332
347
|
}
|
|
348
|
+
function startWorkerProgressTelemetry(input) {
|
|
349
|
+
const parsed = Number(process.env.SKS_ZELLIJ_WORKER_PROGRESS_MS || 2000);
|
|
350
|
+
const intervalMs = Math.max(500, Number.isFinite(parsed) ? Math.floor(parsed) : 2000);
|
|
351
|
+
let tick = 0;
|
|
352
|
+
const timer = setInterval(() => {
|
|
353
|
+
tick += 1;
|
|
354
|
+
const progress = { done: tick, total: 0, label: 'backend running' };
|
|
355
|
+
appendJsonl(path.resolve(input.agentRoot, input.heartbeatRel), {
|
|
356
|
+
schema: 'sks.native-cli-worker-heartbeat.v1',
|
|
357
|
+
ts: nowIso(),
|
|
358
|
+
event: 'progress',
|
|
359
|
+
pid: process.pid,
|
|
360
|
+
session_id: input.agent.session_id,
|
|
361
|
+
slot_id: input.agent.slot_id || null,
|
|
362
|
+
generation_index: input.agent.generation_index || null,
|
|
363
|
+
progress
|
|
364
|
+
}).catch(() => undefined);
|
|
365
|
+
workerTelemetry(input.agentRoot, input.intake, input.agent, input.slice, {
|
|
366
|
+
eventType: 'task_progress',
|
|
367
|
+
status: 'running',
|
|
368
|
+
backend: input.backend,
|
|
369
|
+
serviceTier: input.serviceTier,
|
|
370
|
+
artifacts: [input.heartbeatRel],
|
|
371
|
+
progress,
|
|
372
|
+
logTail: `backend running ${tick}`
|
|
373
|
+
}).catch(() => undefined);
|
|
374
|
+
}, intervalMs);
|
|
375
|
+
return {
|
|
376
|
+
stop() {
|
|
377
|
+
clearInterval(timer);
|
|
378
|
+
}
|
|
379
|
+
};
|
|
380
|
+
}
|
|
333
381
|
function parseNativeCliWorkerArgs(args) {
|
|
334
382
|
return {
|
|
335
383
|
intake: readOption(args, '--intake', ''),
|
|
@@ -389,6 +437,7 @@ async function workerTelemetry(agentRoot, intake, agent, slice, input) {
|
|
|
389
437
|
worktree_path: agent.worktree?.path || slice.worktree?.path || intake.worktree?.path || null,
|
|
390
438
|
task_title: String(slice.description || slice.title || slice.id || 'worker task'),
|
|
391
439
|
current_file: firstString([slice.write_paths?.[0], slice.readonly_paths?.[0], slice.input_files?.[0]]) || null,
|
|
440
|
+
...(input.progress ? { progress: input.progress } : {}),
|
|
392
441
|
artifact_paths: input.artifacts || [],
|
|
393
442
|
log_tail: input.logTail || '',
|
|
394
443
|
blockers: input.blockers || []
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { appendJsonlBounded, ensureDir, nowIso, readJson, readText, writeJsonAtomic } from '../fsx.js';
|
|
3
|
+
export const PARALLEL_RUNTIME_EVENT_SCHEMA = 'sks.parallel-runtime-event.v1';
|
|
4
|
+
export const PARALLEL_RUNTIME_PROOF_SCHEMA = 'sks.parallel-runtime-proof.v1';
|
|
5
|
+
export function parallelRuntimeEventPath(root, missionId) {
|
|
6
|
+
return path.join(inferAgentsDir(root, missionId), 'parallel-runtime.events.jsonl');
|
|
7
|
+
}
|
|
8
|
+
export function parallelRuntimeProofPath(root, missionId) {
|
|
9
|
+
return path.join(inferAgentsDir(root, missionId), 'parallel-runtime-proof.json');
|
|
10
|
+
}
|
|
11
|
+
export async function appendParallelRuntimeEvent(root, missionId, event) {
|
|
12
|
+
const row = normalizeParallelRuntimeEvent(missionId, event);
|
|
13
|
+
const file = parallelRuntimeEventPath(root, missionId);
|
|
14
|
+
await ensureDir(path.dirname(file));
|
|
15
|
+
await appendJsonlBounded(file, row);
|
|
16
|
+
return row;
|
|
17
|
+
}
|
|
18
|
+
export async function buildParallelRuntimeProof(root, missionId, opts = {}) {
|
|
19
|
+
const events = await readParallelRuntimeEvents(root, missionId);
|
|
20
|
+
const sorted = events.sort((a, b) => a.ms - b.ms);
|
|
21
|
+
const firstMs = sorted[0]?.ms || Date.now();
|
|
22
|
+
const lastMs = sorted[sorted.length - 1]?.ms || firstMs;
|
|
23
|
+
const workerActive = new Set();
|
|
24
|
+
const processActive = new Set();
|
|
25
|
+
const modelActive = new Set();
|
|
26
|
+
const workerStarts = new Map();
|
|
27
|
+
const workerDurations = [];
|
|
28
|
+
const workerPids = new Set();
|
|
29
|
+
const modelIds = new Set();
|
|
30
|
+
const overlapWindows = [];
|
|
31
|
+
let maxWorkers = 0;
|
|
32
|
+
let maxProcesses = 0;
|
|
33
|
+
let maxModels = 0;
|
|
34
|
+
let previousMs = firstMs;
|
|
35
|
+
let firstBatchLaunchSpanMs = 0;
|
|
36
|
+
const batchStart = new Map();
|
|
37
|
+
const batchCompleted = new Map();
|
|
38
|
+
for (const event of sorted) {
|
|
39
|
+
if (event.ms > previousMs) {
|
|
40
|
+
overlapWindows.push({
|
|
41
|
+
start_ms: previousMs - firstMs,
|
|
42
|
+
end_ms: event.ms - firstMs,
|
|
43
|
+
active_workers: workerActive.size,
|
|
44
|
+
active_model_calls: modelActive.size
|
|
45
|
+
});
|
|
46
|
+
previousMs = event.ms;
|
|
47
|
+
}
|
|
48
|
+
const workerKey = event.session_id || event.slot_id || (event.pid == null ? '' : `pid:${event.pid}`);
|
|
49
|
+
const processKey = event.pid == null ? workerKey : `pid:${event.pid}`;
|
|
50
|
+
if (event.event_type === 'batch_dispatch_started' && event.batch_id)
|
|
51
|
+
batchStart.set(event.batch_id, event.ms);
|
|
52
|
+
if (event.event_type === 'batch_dispatch_completed' && event.batch_id) {
|
|
53
|
+
batchCompleted.set(event.batch_id, event.ms);
|
|
54
|
+
const started = batchStart.get(event.batch_id);
|
|
55
|
+
if (started != null && firstBatchLaunchSpanMs === 0)
|
|
56
|
+
firstBatchLaunchSpanMs = Math.max(0, event.ms - started);
|
|
57
|
+
}
|
|
58
|
+
if (event.event_type === 'worker_launch_invoked' || event.event_type === 'worker_process_spawned') {
|
|
59
|
+
if (workerKey) {
|
|
60
|
+
workerActive.add(workerKey);
|
|
61
|
+
if (!workerStarts.has(workerKey))
|
|
62
|
+
workerStarts.set(workerKey, event.ms);
|
|
63
|
+
}
|
|
64
|
+
if (event.event_type === 'worker_process_spawned' && processKey)
|
|
65
|
+
processActive.add(processKey);
|
|
66
|
+
if (event.pid != null)
|
|
67
|
+
workerPids.add(event.pid);
|
|
68
|
+
}
|
|
69
|
+
if (event.event_type === 'worker_completed' || event.event_type === 'worker_failed') {
|
|
70
|
+
if (workerKey) {
|
|
71
|
+
workerActive.delete(workerKey);
|
|
72
|
+
const started = workerStarts.get(workerKey);
|
|
73
|
+
if (started != null)
|
|
74
|
+
workerDurations.push(Math.max(0, event.ms - started));
|
|
75
|
+
}
|
|
76
|
+
if (processKey)
|
|
77
|
+
processActive.delete(processKey);
|
|
78
|
+
}
|
|
79
|
+
if (event.event_type === 'model_call_started') {
|
|
80
|
+
const id = event.model_call_id || event.session_id || `model:${event.ms}:${modelActive.size}`;
|
|
81
|
+
modelActive.add(id);
|
|
82
|
+
modelIds.add(id);
|
|
83
|
+
}
|
|
84
|
+
if (event.event_type === 'model_call_completed') {
|
|
85
|
+
const id = event.model_call_id || event.session_id || '';
|
|
86
|
+
if (id)
|
|
87
|
+
modelActive.delete(id);
|
|
88
|
+
}
|
|
89
|
+
maxWorkers = Math.max(maxWorkers, workerActive.size);
|
|
90
|
+
maxProcesses = Math.max(maxProcesses, processActive.size);
|
|
91
|
+
maxModels = Math.max(maxModels, modelActive.size);
|
|
92
|
+
}
|
|
93
|
+
const requestedWorkers = positiveInt(opts.requestedWorkers, workerStarts.size || workerPids.size || maxWorkers);
|
|
94
|
+
const targetActiveSlots = positiveInt(opts.targetActiveSlots, requestedWorkers);
|
|
95
|
+
const proofMode = opts.proofMode || 'production';
|
|
96
|
+
const allowMissingPids = proofMode === 'in-process-fixture' && opts.allowMissingPids === true;
|
|
97
|
+
const requireWorkerPids = opts.requireWorkerPids ?? (proofMode === 'production' && requestedWorkers >= 16);
|
|
98
|
+
const wallMs = Math.max(0, lastMs - firstMs);
|
|
99
|
+
const sequentialEstimateMs = workerDurations.length
|
|
100
|
+
? workerDurations.reduce((sum, value) => sum + value, 0)
|
|
101
|
+
: requestedWorkers * positiveInt(opts.expectedWorkerRuntimeMs, 4000);
|
|
102
|
+
const visiblePanes = nonNegativeInt(opts.visiblePanes, sorted.filter((event) => event.placement === 'zellij-pane').length ? new Set(sorted.filter((event) => event.placement === 'zellij-pane').map((event) => event.slot_id || event.session_id || '')).size : 0);
|
|
103
|
+
const observedHeadlessWorkers = sorted.filter((event) => event.placement === 'headless' && (event.event_type === 'worker_launch_invoked' || event.event_type === 'worker_process_spawned')).length;
|
|
104
|
+
const headlessWorkers = Math.max(observedHeadlessWorkers, Math.max(0, targetActiveSlots - visiblePanes));
|
|
105
|
+
const minActiveWorkers = opts.minActiveWorkers === undefined
|
|
106
|
+
? Math.min(targetActiveSlots, requestedWorkers)
|
|
107
|
+
: nonNegativeInt(opts.minActiveWorkers, Math.min(targetActiveSlots, requestedWorkers));
|
|
108
|
+
const minSpeedup = Number.isFinite(Number(opts.minSpeedupRatio)) ? Number(opts.minSpeedupRatio) : requestedWorkers >= 16 ? 5 : 1;
|
|
109
|
+
const speedupRatio = wallMs > 0 ? Number((sequentialEstimateMs / wallMs).toFixed(3)) : 0;
|
|
110
|
+
const launchEvents = sorted.filter((event) => event.event_type === 'worker_launch_invoked' || event.event_type === 'worker_process_spawned');
|
|
111
|
+
const launchSpanMs = launchEvents.length ? Math.max(...launchEvents.map((event) => event.ms)) - Math.min(...launchEvents.map((event) => event.ms)) : 0;
|
|
112
|
+
const firstBatchLimit = positiveInt(opts.firstBatchLaunchSpanLimitMs, requestedWorkers >= 16 ? 2500 : 30000);
|
|
113
|
+
const schedulerState = await readJson(path.join(root, 'agent-scheduler-state.json'), null).catch(() => null);
|
|
114
|
+
const coalescedOverlapWindows = coalesceOverlapWindows(overlapWindows);
|
|
115
|
+
const utilizationProofConsistency = buildUtilizationProofConsistency(schedulerState, {
|
|
116
|
+
proofMaxActive: maxWorkers,
|
|
117
|
+
proofWallMs: wallMs,
|
|
118
|
+
proofActiveSlotTimeMs: activeSlotTimeMsFromWindows(coalescedOverlapWindows)
|
|
119
|
+
});
|
|
120
|
+
const blockers = [];
|
|
121
|
+
if (!sorted.length)
|
|
122
|
+
blockers.push('parallel_runtime_events_missing');
|
|
123
|
+
if (minActiveWorkers > 0 && maxWorkers < minActiveWorkers)
|
|
124
|
+
blockers.push('max_observed_active_workers_below_target');
|
|
125
|
+
if (requireWorkerPids && workerPids.size < minActiveWorkers)
|
|
126
|
+
blockers.push('unique_worker_pids_below_target');
|
|
127
|
+
if (requireWorkerPids && workerPids.size === 0)
|
|
128
|
+
blockers.push('unique_worker_pids_missing_in_production_proof');
|
|
129
|
+
if (speedupRatio < minSpeedup)
|
|
130
|
+
blockers.push('speedup_ratio_below_target');
|
|
131
|
+
if (firstBatchLaunchSpanMs > firstBatchLimit)
|
|
132
|
+
blockers.push('first_batch_launch_span_above_limit');
|
|
133
|
+
return {
|
|
134
|
+
schema: PARALLEL_RUNTIME_PROOF_SCHEMA,
|
|
135
|
+
mission_id: missionId,
|
|
136
|
+
generated_at: nowIso(),
|
|
137
|
+
proof_mode: proofMode,
|
|
138
|
+
require_worker_pids: requireWorkerPids,
|
|
139
|
+
allow_missing_pids: allowMissingPids,
|
|
140
|
+
requested_workers: requestedWorkers,
|
|
141
|
+
target_active_slots: targetActiveSlots,
|
|
142
|
+
max_observed_active_workers: maxWorkers,
|
|
143
|
+
max_observed_worker_processes: Math.max(maxProcesses, workerPids.size ? maxProcesses : maxWorkers),
|
|
144
|
+
unique_worker_pids: workerPids.size,
|
|
145
|
+
unique_model_call_ids: modelIds.size,
|
|
146
|
+
max_observed_model_calls: maxModels,
|
|
147
|
+
launch_span_ms: launchSpanMs,
|
|
148
|
+
first_batch_launch_span_ms: firstBatchLaunchSpanMs,
|
|
149
|
+
wall_ms: wallMs,
|
|
150
|
+
sequential_estimate_ms: sequentialEstimateMs,
|
|
151
|
+
speedup_ratio: speedupRatio,
|
|
152
|
+
overlap_windows: coalescedOverlapWindows,
|
|
153
|
+
visible_panes: visiblePanes,
|
|
154
|
+
headless_workers: headlessWorkers,
|
|
155
|
+
utilization_proof_consistency: utilizationProofConsistency,
|
|
156
|
+
passed: blockers.length === 0,
|
|
157
|
+
blockers
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
export async function writeParallelRuntimeProof(root, missionId, opts = {}) {
|
|
161
|
+
const proof = await buildParallelRuntimeProof(root, missionId, opts);
|
|
162
|
+
await writeJsonAtomic(parallelRuntimeProofPath(root, missionId), proof);
|
|
163
|
+
return proof;
|
|
164
|
+
}
|
|
165
|
+
async function readParallelRuntimeEvents(root, missionId) {
|
|
166
|
+
const text = await readText(parallelRuntimeEventPath(root, missionId), '');
|
|
167
|
+
return String(text)
|
|
168
|
+
.split(/\r?\n/)
|
|
169
|
+
.map((line) => line.trim())
|
|
170
|
+
.filter(Boolean)
|
|
171
|
+
.map((line) => {
|
|
172
|
+
try {
|
|
173
|
+
const parsed = JSON.parse(line);
|
|
174
|
+
return parsed?.schema === PARALLEL_RUNTIME_EVENT_SCHEMA ? parsed : null;
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
})
|
|
180
|
+
.filter((row) => Boolean(row));
|
|
181
|
+
}
|
|
182
|
+
function normalizeParallelRuntimeEvent(missionId, event) {
|
|
183
|
+
return {
|
|
184
|
+
schema: PARALLEL_RUNTIME_EVENT_SCHEMA,
|
|
185
|
+
ts: nowIso(),
|
|
186
|
+
ms: Date.now(),
|
|
187
|
+
mission_id: missionId,
|
|
188
|
+
event_type: event.event_type,
|
|
189
|
+
slot_id: event.slot_id == null ? null : String(event.slot_id),
|
|
190
|
+
generation_index: event.generation_index == null ? null : Math.max(1, Math.floor(Number(event.generation_index) || 1)),
|
|
191
|
+
session_id: event.session_id == null ? null : String(event.session_id),
|
|
192
|
+
pid: event.pid == null || !Number.isFinite(Number(event.pid)) ? null : Math.floor(Number(event.pid)),
|
|
193
|
+
backend: String(event.backend || 'unknown'),
|
|
194
|
+
placement: normalizePlacement(event.placement),
|
|
195
|
+
...(event.worktree_id === undefined ? {} : { worktree_id: event.worktree_id == null ? null : String(event.worktree_id) }),
|
|
196
|
+
...(event.model_call_id === undefined ? {} : { model_call_id: event.model_call_id == null ? null : String(event.model_call_id) }),
|
|
197
|
+
...(event.batch_id === undefined ? {} : { batch_id: event.batch_id == null ? null : String(event.batch_id) }),
|
|
198
|
+
...(event.meta && typeof event.meta === 'object' ? { meta: event.meta } : {})
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
function normalizePlacement(value) {
|
|
202
|
+
const text = String(value || 'unknown');
|
|
203
|
+
if (text === 'zellij-pane' || text === 'process' || text === 'headless')
|
|
204
|
+
return text;
|
|
205
|
+
return 'unknown';
|
|
206
|
+
}
|
|
207
|
+
function positiveInt(value, fallback) {
|
|
208
|
+
const parsed = Number(value);
|
|
209
|
+
if (!Number.isFinite(parsed) || parsed < 1)
|
|
210
|
+
return Math.max(1, Math.floor(fallback || 1));
|
|
211
|
+
return Math.floor(parsed);
|
|
212
|
+
}
|
|
213
|
+
function nonNegativeInt(value, fallback) {
|
|
214
|
+
const parsed = Number(value);
|
|
215
|
+
if (!Number.isFinite(parsed) || parsed < 0)
|
|
216
|
+
return Math.max(0, Math.floor(fallback || 0));
|
|
217
|
+
return Math.floor(parsed);
|
|
218
|
+
}
|
|
219
|
+
function buildUtilizationProofConsistency(state, input) {
|
|
220
|
+
if (!state || typeof state !== 'object') {
|
|
221
|
+
return {
|
|
222
|
+
ok: true,
|
|
223
|
+
scheduler_max_active: 0,
|
|
224
|
+
proof_max_active: input.proofMaxActive,
|
|
225
|
+
wall_ms_delta: 0,
|
|
226
|
+
scheduler_active_slot_time_ms: 0,
|
|
227
|
+
proof_active_slot_time_ms: input.proofActiveSlotTimeMs,
|
|
228
|
+
active_slot_time_ms_delta: 0,
|
|
229
|
+
scheduler_observation_delay_tolerance_ms: 0
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
const schedulerMaxActive = nonNegativeInt(state.max_observed_active_slots, 0);
|
|
233
|
+
const schedulerWallMs = nonNegativeInt(state.wall_time_ms, 0);
|
|
234
|
+
const schedulerActiveSlotTimeMs = nonNegativeInt(state.active_slot_time_ms, 0);
|
|
235
|
+
const wallDelta = Math.abs(schedulerWallMs - input.proofWallMs);
|
|
236
|
+
const activeSlotDelta = Math.abs(schedulerActiveSlotTimeMs - input.proofActiveSlotTimeMs);
|
|
237
|
+
const maxActiveDelta = Math.abs(schedulerMaxActive - input.proofMaxActive);
|
|
238
|
+
const wallToleranceMs = Math.max(500, Math.round(Math.max(schedulerWallMs, input.proofWallMs) * 0.25));
|
|
239
|
+
const activeSlotToleranceMs = Math.max(500, Math.round(Math.max(schedulerActiveSlotTimeMs, input.proofActiveSlotTimeMs) * 0.25));
|
|
240
|
+
const observationDelayToleranceMs = Math.max(activeSlotToleranceMs, wallDelta * Math.max(1, schedulerMaxActive));
|
|
241
|
+
const wallConsistent = wallDelta <= wallToleranceMs;
|
|
242
|
+
const activeSlotConsistent = schedulerActiveSlotTimeMs > 0 && input.proofActiveSlotTimeMs > 0 && (activeSlotDelta <= activeSlotToleranceMs
|
|
243
|
+
|| (schedulerActiveSlotTimeMs >= input.proofActiveSlotTimeMs && activeSlotDelta <= observationDelayToleranceMs));
|
|
244
|
+
return {
|
|
245
|
+
ok: maxActiveDelta <= 1 && (wallConsistent || activeSlotConsistent),
|
|
246
|
+
scheduler_max_active: schedulerMaxActive,
|
|
247
|
+
proof_max_active: input.proofMaxActive,
|
|
248
|
+
wall_ms_delta: wallDelta,
|
|
249
|
+
scheduler_active_slot_time_ms: schedulerActiveSlotTimeMs,
|
|
250
|
+
proof_active_slot_time_ms: input.proofActiveSlotTimeMs,
|
|
251
|
+
active_slot_time_ms_delta: activeSlotDelta,
|
|
252
|
+
scheduler_observation_delay_tolerance_ms: observationDelayToleranceMs
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
function activeSlotTimeMsFromWindows(windows) {
|
|
256
|
+
return windows.reduce((sum, window) => sum + Math.max(0, window.end_ms - window.start_ms) * Math.max(0, window.active_workers), 0);
|
|
257
|
+
}
|
|
258
|
+
function coalesceOverlapWindows(windows) {
|
|
259
|
+
return windows
|
|
260
|
+
.filter((window) => window.end_ms > window.start_ms)
|
|
261
|
+
.filter((window) => window.active_workers > 0 || window.active_model_calls > 0)
|
|
262
|
+
.slice(0, 2000);
|
|
263
|
+
}
|
|
264
|
+
function inferAgentsDir(root, missionId) {
|
|
265
|
+
const resolved = path.resolve(root);
|
|
266
|
+
if (path.basename(resolved) === 'agents' && path.basename(path.dirname(resolved)) === missionId)
|
|
267
|
+
return resolved;
|
|
268
|
+
if (path.basename(resolved) === missionId && path.basename(path.dirname(resolved)) === 'missions')
|
|
269
|
+
return path.join(resolved, 'agents');
|
|
270
|
+
const marker = `${path.sep}.sneakoscope${path.sep}missions${path.sep}${missionId}${path.sep}`;
|
|
271
|
+
const index = resolved.indexOf(marker);
|
|
272
|
+
if (index >= 0)
|
|
273
|
+
return path.join(resolved.slice(0, index + marker.length - 1), 'agents');
|
|
274
|
+
return path.join(resolved, '.sneakoscope', 'missions', missionId, 'agents');
|
|
275
|
+
}
|
|
276
|
+
//# sourceMappingURL=parallel-runtime-proof.js.map
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { findLatestMission, missionDir } from '../mission.js';
|
|
3
|
+
import { readJson, writeJsonAtomic } from '../fsx.js';
|
|
4
|
+
export const RUNTIME_PROOF_SUMMARY_SCHEMA = 'sks.runtime-proof-summary.v1';
|
|
5
|
+
export async function buildRuntimeProofSummary(root, missionIdInput = 'latest') {
|
|
6
|
+
const missionId = missionIdInput === 'latest' ? await findLatestMission(root) : missionIdInput;
|
|
7
|
+
if (!missionId)
|
|
8
|
+
throw new Error('runtime_proof_summary_mission_missing');
|
|
9
|
+
const dir = missionDir(root, missionId);
|
|
10
|
+
const agentsDir = path.join(dir, 'agents');
|
|
11
|
+
const parallel = await readJson(path.join(agentsDir, 'parallel-runtime-proof.json'), null);
|
|
12
|
+
const scheduler = await readJson(path.join(agentsDir, 'agent-scheduler-state.json'), null);
|
|
13
|
+
const swarm = await readJson(path.join(agentsDir, 'agent-native-cli-session-swarm.json'), null);
|
|
14
|
+
const telemetry = await readJson(path.join(dir, 'zellij', 'slot-telemetry.snapshot.json'), null);
|
|
15
|
+
const governor = await readJson(path.join(agentsDir, 'naruto-concurrency-governor.json'), null);
|
|
16
|
+
const telemetryAgeMs = telemetry?.updated_at ? Math.max(0, Date.now() - Date.parse(telemetry.updated_at)) : Number.MAX_SAFE_INTEGER;
|
|
17
|
+
const visiblePanes = Number(parallel?.visible_panes ?? swarm?.zellij_pane_worker_sessions ?? telemetryVisiblePaneCount(telemetry) ?? 0);
|
|
18
|
+
const targetActive = Number(scheduler?.target_active_slots ?? parallel?.target_active_slots ?? swarm?.target_active_slots ?? governor?.target_active_slots ?? 0);
|
|
19
|
+
const headlessWorkers = Number(parallel?.headless_workers ?? swarm?.headless_overflow_worker_count ?? Math.max(0, targetActive - visiblePanes));
|
|
20
|
+
const blockers = [
|
|
21
|
+
...(!parallel ? ['parallel_runtime_proof_missing'] : []),
|
|
22
|
+
...(!scheduler ? ['agent_scheduler_state_missing'] : []),
|
|
23
|
+
...(parallel?.passed === false ? parallel.blockers || ['parallel_runtime_proof_failed'] : []),
|
|
24
|
+
...(telemetryAgeMs > 3000 ? ['zellij_telemetry_stale'] : [])
|
|
25
|
+
].map(String);
|
|
26
|
+
const summary = {
|
|
27
|
+
schema: RUNTIME_PROOF_SUMMARY_SCHEMA,
|
|
28
|
+
ok: blockers.length === 0,
|
|
29
|
+
mission_id: missionId,
|
|
30
|
+
generated_at: new Date().toISOString(),
|
|
31
|
+
parallel: {
|
|
32
|
+
max_active_workers: Number(parallel?.max_observed_active_workers || scheduler?.max_observed_active_slots || 0),
|
|
33
|
+
unique_worker_pids: Number(parallel?.unique_worker_pids || uniqueNumbers(swarm?.process_ids).length || 0),
|
|
34
|
+
speedup_ratio: Number(parallel?.speedup_ratio || 0),
|
|
35
|
+
proof_passed: parallel?.passed === true
|
|
36
|
+
},
|
|
37
|
+
ui: {
|
|
38
|
+
visible_panes: visiblePanes,
|
|
39
|
+
headless_workers: headlessWorkers,
|
|
40
|
+
telemetry_age_ms: telemetryAgeMs,
|
|
41
|
+
stale: telemetryAgeMs > 3000
|
|
42
|
+
},
|
|
43
|
+
model_calls: {
|
|
44
|
+
max_observed: Number(parallel?.max_observed_model_calls || 0),
|
|
45
|
+
unique_model_call_ids: Number(parallel?.unique_model_call_ids || 0)
|
|
46
|
+
},
|
|
47
|
+
scheduler: {
|
|
48
|
+
largest_batch_size: Number(scheduler?.largest_batch_size || 0),
|
|
49
|
+
utilization: Number(scheduler?.scheduler_utilization || 0)
|
|
50
|
+
},
|
|
51
|
+
blockers
|
|
52
|
+
};
|
|
53
|
+
await writeJsonAtomic(path.join(agentsDir, 'runtime-proof-summary.json'), summary);
|
|
54
|
+
return summary;
|
|
55
|
+
}
|
|
56
|
+
export function renderRuntimeProofSummary(summary) {
|
|
57
|
+
return [
|
|
58
|
+
`Parallel proof: ${summary.parallel.proof_passed ? 'passed' : 'blocked'}`,
|
|
59
|
+
`Active workers: ${summary.parallel.max_active_workers}`,
|
|
60
|
+
`Unique PIDs: ${summary.parallel.unique_worker_pids}`,
|
|
61
|
+
`Speedup: ${summary.parallel.speedup_ratio}x`,
|
|
62
|
+
`Visible/headless: ${summary.ui.visible_panes} / ${summary.ui.headless_workers}`,
|
|
63
|
+
`Telemetry: ${summary.ui.stale ? `stale ${(summary.ui.telemetry_age_ms / 1000).toFixed(1)}s` : `fresh ${(summary.ui.telemetry_age_ms / 1000).toFixed(1)}s`}`,
|
|
64
|
+
`Model calls max: ${summary.model_calls.max_observed}`,
|
|
65
|
+
...(summary.blockers.length ? [`Blockers: ${summary.blockers.join(', ')}`] : [])
|
|
66
|
+
].join('\n');
|
|
67
|
+
}
|
|
68
|
+
function telemetryVisiblePaneCount(snapshot) {
|
|
69
|
+
const slots = snapshot?.slots && typeof snapshot.slots === 'object' ? Object.values(snapshot.slots) : [];
|
|
70
|
+
return slots.filter((row) => row?.status && row.status !== 'headless').length;
|
|
71
|
+
}
|
|
72
|
+
function uniqueNumbers(values) {
|
|
73
|
+
return [...new Set((Array.isArray(values) ? values : []).map((value) => Number(value)).filter((value) => Number.isFinite(value)))];
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=runtime-proof-summary.js.map
|
|
@@ -16,6 +16,7 @@ import { writeUltraRouterProof } from '../router/router-proof.js';
|
|
|
16
16
|
import { readLocalModelConfig } from '../agents/ollama-worker-config.js';
|
|
17
17
|
import { runLocalLlmTask } from '../local-llm/local-llm-control-adapter.js';
|
|
18
18
|
import { detectPythonCodexSdkCapability, runPythonCodexSdkTask } from './python-codex-sdk-adapter.js';
|
|
19
|
+
import { defaultModelCallBudget, withModelCallSlot } from './model-call-concurrency.js';
|
|
19
20
|
export async function runCodexTask(input) {
|
|
20
21
|
const root = path.resolve(input.mutationLedgerRoot);
|
|
21
22
|
await ensureDir(root);
|
|
@@ -46,7 +47,16 @@ export async function runCodexTask(input) {
|
|
|
46
47
|
];
|
|
47
48
|
let adapterResult = null;
|
|
48
49
|
if (!blockers.length) {
|
|
49
|
-
adapterResult = await
|
|
50
|
+
adapterResult = await withModelCallSlot({
|
|
51
|
+
root,
|
|
52
|
+
missionId: task.missionId,
|
|
53
|
+
provider: 'codex-sdk',
|
|
54
|
+
budget: defaultModelCallBudget('codex-sdk'),
|
|
55
|
+
slotId: task.slotId || null,
|
|
56
|
+
generationIndex: task.generationIndex ?? null,
|
|
57
|
+
sessionId: task.sessionId || null,
|
|
58
|
+
backend: 'codex-sdk'
|
|
59
|
+
}, () => runWithCodexReliabilityShield(task, async () => {
|
|
50
60
|
try {
|
|
51
61
|
return fakeAllowed
|
|
52
62
|
? await runFakeCodexSdkTask(task)
|
|
@@ -63,7 +73,7 @@ export async function runCodexTask(input) {
|
|
|
63
73
|
blockers: ['codex_sdk_run_failed:' + String(err?.message || err)]
|
|
64
74
|
};
|
|
65
75
|
}
|
|
66
|
-
});
|
|
76
|
+
}));
|
|
67
77
|
}
|
|
68
78
|
const events = Array.isArray(adapterResult?.events) ? adapterResult.events : [];
|
|
69
79
|
const translatedEvents = translateCodexSdkEvents(events);
|
|
@@ -153,7 +163,16 @@ async function runPythonControlTask(root, task, schema, routerDecision) {
|
|
|
153
163
|
await ensureDir(runtime.env.env.CODEX_HOME);
|
|
154
164
|
const fakeAllowed = process.env.SKS_PYTHON_CODEX_SDK_FAKE === '1';
|
|
155
165
|
const adapterResult = capability.ok || fakeAllowed
|
|
156
|
-
? await
|
|
166
|
+
? await withModelCallSlot({
|
|
167
|
+
root,
|
|
168
|
+
missionId: task.missionId,
|
|
169
|
+
provider: 'python-codex-sdk',
|
|
170
|
+
budget: defaultModelCallBudget('python-codex-sdk'),
|
|
171
|
+
slotId: task.slotId || null,
|
|
172
|
+
generationIndex: task.generationIndex ?? null,
|
|
173
|
+
sessionId: task.sessionId || null,
|
|
174
|
+
backend: 'python-codex-sdk'
|
|
175
|
+
}, () => runPythonCodexSdkTask(task, { env: runtime.env.env, config: runtime.config }))
|
|
157
176
|
: { ok: false, events: [], translatedEvents: [], finalResponse: '', threadId: '', turnId: '', blockers: capability.blockers, capability };
|
|
158
177
|
const events = Array.isArray(adapterResult.events) ? adapterResult.events : [];
|
|
159
178
|
const translatedEvents = Array.isArray(adapterResult.translatedEvents) ? adapterResult.translatedEvents : [];
|
|
@@ -251,7 +270,16 @@ async function runPythonControlTask(root, task, schema, routerDecision) {
|
|
|
251
270
|
}
|
|
252
271
|
async function runLocalControlTask(root, task, schema, routerDecision) {
|
|
253
272
|
const config = await readLocalModelConfig();
|
|
254
|
-
const adapterResult = await
|
|
273
|
+
const adapterResult = await withModelCallSlot({
|
|
274
|
+
root,
|
|
275
|
+
missionId: task.missionId,
|
|
276
|
+
provider: 'local-llm',
|
|
277
|
+
budget: defaultModelCallBudget('local-llm'),
|
|
278
|
+
slotId: task.slotId || null,
|
|
279
|
+
generationIndex: task.generationIndex ?? null,
|
|
280
|
+
sessionId: task.sessionId || null,
|
|
281
|
+
backend: 'local-llm'
|
|
282
|
+
}, () => runLocalLlmTask(task, { config, outputSchema: schema }));
|
|
255
283
|
for (const event of adapterResult.events || [])
|
|
256
284
|
await appendJsonl(path.join(root, 'local-llm-events.jsonl'), event);
|
|
257
285
|
const structuredOutput = adapterResult.structuredOutput;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { appendParallelRuntimeEvent } from '../agents/parallel-runtime-proof.js';
|
|
2
|
+
const semaphores = new Map();
|
|
3
|
+
export function getModelCallSemaphore(provider, budget) {
|
|
4
|
+
const normalizedProvider = String(provider || 'codex-sdk');
|
|
5
|
+
const normalizedBudget = Math.max(1, Math.floor(Number(budget || 1)));
|
|
6
|
+
const key = `${normalizedProvider}:${normalizedBudget}`;
|
|
7
|
+
const existing = semaphores.get(key);
|
|
8
|
+
if (existing)
|
|
9
|
+
return existing;
|
|
10
|
+
const created = new ModelCallSemaphoreImpl(normalizedProvider, normalizedBudget);
|
|
11
|
+
semaphores.set(key, created);
|
|
12
|
+
return created;
|
|
13
|
+
}
|
|
14
|
+
export async function withModelCallSlot(input, fn) {
|
|
15
|
+
const semaphore = getModelCallSemaphore(input.provider, input.budget);
|
|
16
|
+
const modelCallId = input.modelCallId || `${input.provider}-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
|
17
|
+
return semaphore.run(async () => {
|
|
18
|
+
await appendParallelRuntimeEvent(input.root, input.missionId, {
|
|
19
|
+
event_type: 'model_call_started',
|
|
20
|
+
slot_id: input.slotId ?? null,
|
|
21
|
+
generation_index: input.generationIndex ?? null,
|
|
22
|
+
session_id: input.sessionId ?? null,
|
|
23
|
+
model_call_id: modelCallId,
|
|
24
|
+
backend: input.backend || input.provider,
|
|
25
|
+
placement: 'unknown',
|
|
26
|
+
meta: {
|
|
27
|
+
provider: input.provider,
|
|
28
|
+
budget: semaphore.budget,
|
|
29
|
+
active_model_calls: semaphore.active,
|
|
30
|
+
queued_model_calls: semaphore.queued
|
|
31
|
+
}
|
|
32
|
+
}).catch(() => undefined);
|
|
33
|
+
try {
|
|
34
|
+
return await fn();
|
|
35
|
+
}
|
|
36
|
+
finally {
|
|
37
|
+
await appendParallelRuntimeEvent(input.root, input.missionId, {
|
|
38
|
+
event_type: 'model_call_completed',
|
|
39
|
+
slot_id: input.slotId ?? null,
|
|
40
|
+
generation_index: input.generationIndex ?? null,
|
|
41
|
+
session_id: input.sessionId ?? null,
|
|
42
|
+
model_call_id: modelCallId,
|
|
43
|
+
backend: input.backend || input.provider,
|
|
44
|
+
placement: 'unknown',
|
|
45
|
+
meta: {
|
|
46
|
+
provider: input.provider,
|
|
47
|
+
budget: semaphore.budget,
|
|
48
|
+
max_observed_model_calls: semaphore.maxObserved,
|
|
49
|
+
queued_model_calls: semaphore.queued
|
|
50
|
+
}
|
|
51
|
+
}).catch(() => undefined);
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
export function defaultModelCallBudget(provider) {
|
|
56
|
+
const text = String(provider || '');
|
|
57
|
+
if (text === 'local-llm' || text === 'ollama')
|
|
58
|
+
return envInt('SKS_LOCAL_LLM_MAX_PARALLEL_REQUESTS', 4);
|
|
59
|
+
return envInt('SKS_REMOTE_API_PARALLEL_BUDGET', 12);
|
|
60
|
+
}
|
|
61
|
+
class ModelCallSemaphoreImpl {
|
|
62
|
+
provider;
|
|
63
|
+
budget;
|
|
64
|
+
active = 0;
|
|
65
|
+
queued = 0;
|
|
66
|
+
maxObserved = 0;
|
|
67
|
+
waiters = [];
|
|
68
|
+
constructor(provider, budget) {
|
|
69
|
+
this.provider = provider;
|
|
70
|
+
this.budget = budget;
|
|
71
|
+
}
|
|
72
|
+
async run(fn) {
|
|
73
|
+
await this.acquire();
|
|
74
|
+
try {
|
|
75
|
+
return await fn();
|
|
76
|
+
}
|
|
77
|
+
finally {
|
|
78
|
+
this.release();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
async acquire() {
|
|
82
|
+
if (this.active < this.budget) {
|
|
83
|
+
this.active += 1;
|
|
84
|
+
this.maxObserved = Math.max(this.maxObserved, this.active);
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
this.queued += 1;
|
|
88
|
+
await new Promise((resolve) => this.waiters.push(resolve));
|
|
89
|
+
this.queued = Math.max(0, this.queued - 1);
|
|
90
|
+
this.active += 1;
|
|
91
|
+
this.maxObserved = Math.max(this.maxObserved, this.active);
|
|
92
|
+
}
|
|
93
|
+
release() {
|
|
94
|
+
this.active = Math.max(0, this.active - 1);
|
|
95
|
+
const next = this.waiters.shift();
|
|
96
|
+
if (next)
|
|
97
|
+
next();
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
function envInt(name, fallback) {
|
|
101
|
+
const parsed = Number(process.env[name]);
|
|
102
|
+
if (!Number.isFinite(parsed) || parsed < 1)
|
|
103
|
+
return fallback;
|
|
104
|
+
return Math.floor(parsed);
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=model-call-concurrency.js.map
|