sneakoscope 2.0.15 → 2.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/.sks-build-stamp.json +4 -4
- package/dist/bin/sks.js +1 -1
- package/dist/cli/command-registry.js +1 -1
- package/dist/core/agents/agent-orchestrator.js +66 -3
- package/dist/core/agents/agent-scheduler.js +204 -86
- package/dist/core/agents/agent-schema.js +1 -1
- package/dist/core/agents/native-cli-session-swarm.js +87 -21
- package/dist/core/agents/parallel-runtime-proof.js +217 -0
- package/dist/core/codex-control/codex-task-runner.js +32 -4
- package/dist/core/codex-control/model-call-concurrency.js +106 -0
- package/dist/core/commands/naruto-command.js +48 -5
- package/dist/core/commands/team-command.js +0 -176
- package/dist/core/db-safety.js +34 -6
- package/dist/core/fsx.js +1 -1
- package/dist/core/git/git-worktree-capability.js +18 -0
- package/dist/core/git/git-worktree-manager.js +80 -0
- package/dist/core/git/git-worktree-pool.js +4 -0
- package/dist/core/mad-db/mad-db-capability.js +33 -1
- package/dist/core/mad-db/mad-db-ledger.js +14 -0
- package/dist/core/mad-db/mad-db-policy-resolver.js +2 -0
- package/dist/core/naruto/naruto-concurrency-governor.js +14 -1
- package/dist/core/version.js +1 -1
- package/dist/core/zellij/zellij-slot-telemetry.js +56 -1
- package/dist/scripts/release-speed-summary.js +2 -0
- package/package.json +25 -1
- package/schemas/agents/parallel-runtime-proof.schema.json +48 -0
|
@@ -10,6 +10,7 @@ import { resolveProviderContext } from '../provider/provider-context.js';
|
|
|
10
10
|
import { buildZellijSlotPaneCommand } from '../zellij/zellij-slot-pane-renderer.js';
|
|
11
11
|
import { resolveZellijUiMode } from '../zellij/zellij-ui-mode.js';
|
|
12
12
|
import { appendZellijSlotTelemetry } from '../zellij/zellij-slot-telemetry.js';
|
|
13
|
+
import { appendParallelRuntimeEvent } from './parallel-runtime-proof.js';
|
|
13
14
|
export const NATIVE_CLI_SESSION_SWARM_SCHEMA = 'sks.agent-native-cli-session-swarm.v1';
|
|
14
15
|
export function createNativeCliSessionSwarmRecorder(root, input) {
|
|
15
16
|
return new NativeCliSessionSwarmRecorder(root, input);
|
|
@@ -175,6 +176,16 @@ class NativeCliSessionSwarmRecorder {
|
|
|
175
176
|
record.pid = child.pid || null;
|
|
176
177
|
record.process_id = child.pid || null;
|
|
177
178
|
record.status = 'running';
|
|
179
|
+
await appendParallelRuntimeEvent(this.root, this.input.missionId, {
|
|
180
|
+
event_type: 'worker_process_spawned',
|
|
181
|
+
slot_id: ctx.agent.slot_id || ctx.agent.id || null,
|
|
182
|
+
generation_index: ctx.agent.generation_index || null,
|
|
183
|
+
session_id: ctx.agent.session_id || null,
|
|
184
|
+
pid: child.pid || null,
|
|
185
|
+
backend: this.input.backend,
|
|
186
|
+
placement: record.worker_placement === 'headless' ? 'headless' : 'process',
|
|
187
|
+
worktree_id: worktree?.id || null
|
|
188
|
+
}).catch(() => undefined);
|
|
178
189
|
await this.telemetry(ctx, {
|
|
179
190
|
eventType: 'worker_spawned',
|
|
180
191
|
status: 'launching',
|
|
@@ -321,6 +332,30 @@ class NativeCliSessionSwarmRecorder {
|
|
|
321
332
|
mode: uiMode,
|
|
322
333
|
watch: true
|
|
323
334
|
});
|
|
335
|
+
const processRun = uiMode === 'full-debug'
|
|
336
|
+
? null
|
|
337
|
+
: await this.spawnCompactSlotWorkerProcess({
|
|
338
|
+
args: input.args,
|
|
339
|
+
cwd: workerCwd,
|
|
340
|
+
env: workerEnv,
|
|
341
|
+
stdoutRel: input.stdoutRel,
|
|
342
|
+
stderrRel: input.stderrRel
|
|
343
|
+
});
|
|
344
|
+
if (processRun?.pid) {
|
|
345
|
+
input.record.pid = processRun.pid;
|
|
346
|
+
input.record.process_id = processRun.pid;
|
|
347
|
+
await appendParallelRuntimeEvent(this.root, this.input.missionId, {
|
|
348
|
+
event_type: 'worker_process_spawned',
|
|
349
|
+
slot_id: slotId,
|
|
350
|
+
generation_index: Number(input.ctx.agent.generation_index || 1),
|
|
351
|
+
session_id: input.ctx.agent.session_id || null,
|
|
352
|
+
pid: processRun.pid,
|
|
353
|
+
backend: this.input.backend,
|
|
354
|
+
placement: 'zellij-pane',
|
|
355
|
+
worktree_id: worktree?.id || null
|
|
356
|
+
}).catch(() => undefined);
|
|
357
|
+
await this.record(input.record);
|
|
358
|
+
}
|
|
324
359
|
let paneRecord;
|
|
325
360
|
try {
|
|
326
361
|
paneRecord = await openWorkerPane({
|
|
@@ -367,7 +402,9 @@ class NativeCliSessionSwarmRecorder {
|
|
|
367
402
|
if (input.zellijReservation)
|
|
368
403
|
this.releaseVisibleZellijReservation(input.zellijReservation);
|
|
369
404
|
}
|
|
370
|
-
const
|
|
405
|
+
const zellijRequired = process.env.SKS_REQUIRE_ZELLIJ === '1';
|
|
406
|
+
const launchBlockers = zellijRequired ? paneRecord.blockers || [] : [];
|
|
407
|
+
const launchWarnings = zellijRequired ? [] : paneRecord.blockers || [];
|
|
371
408
|
input.record.command_line = ['zellij', '--session', sessionName, 'action', 'new-pane', '--direction', paneRecord.direction_applied, '--name', paneRecord.pane_name, '--', 'sh', '-lc', uiMode === 'full-debug' ? '<native-cli-worker-command>' : '<zellij-slot-pane-renderer-command>'];
|
|
372
409
|
input.record.zellij_session_name = sessionName;
|
|
373
410
|
input.record.zellij_pane_id = paneRecord.pane_id || null;
|
|
@@ -385,6 +422,7 @@ class NativeCliSessionSwarmRecorder {
|
|
|
385
422
|
input.record.slot_visualization = uiMode === 'full-debug' ? 'worker-command-pane' : 'zellij-slot-pane-renderer';
|
|
386
423
|
input.record.status = launchBlockers.length ? 'failed' : 'running';
|
|
387
424
|
input.record.blockers = launchBlockers;
|
|
425
|
+
input.record.warnings = [...(input.record.warnings || []), ...launchWarnings];
|
|
388
426
|
await this.telemetry(input.ctx, {
|
|
389
427
|
eventType: 'worker_spawned',
|
|
390
428
|
status: launchBlockers.length ? 'failed' : 'launching',
|
|
@@ -422,27 +460,18 @@ class NativeCliSessionSwarmRecorder {
|
|
|
422
460
|
goal_mode_ref: input.ctx.agent.goal_mode_ref || null
|
|
423
461
|
});
|
|
424
462
|
}
|
|
425
|
-
const
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
stderrRel: input.stderrRel
|
|
463
|
+
const heartbeatSeen = await waitForWorkerHeartbeat(path.join(this.root, input.heartbeatRel), Number(process.env.SKS_ZELLIJ_WORKER_HEARTBEAT_TIMEOUT_MS || 5000));
|
|
464
|
+
if (heartbeatSeen) {
|
|
465
|
+
await this.telemetry(input.ctx, {
|
|
466
|
+
eventType: 'heartbeat',
|
|
467
|
+
status: 'running',
|
|
468
|
+
artifacts: [input.heartbeatRel],
|
|
469
|
+
logTail: await tailFile(path.join(this.root, input.heartbeatRel), 600)
|
|
433
470
|
});
|
|
434
|
-
if (processRun?.pid) {
|
|
435
|
-
input.record.pid = processRun.pid;
|
|
436
|
-
input.record.process_id = processRun.pid;
|
|
437
|
-
await this.record(input.record);
|
|
438
471
|
}
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
status: 'running',
|
|
443
|
-
artifacts: [input.heartbeatRel],
|
|
444
|
-
logTail: await tailFile(path.join(this.root, input.heartbeatRel), 600)
|
|
445
|
-
});
|
|
472
|
+
else {
|
|
473
|
+
input.record.warnings = [...(input.record.warnings || []), 'zellij_worker_heartbeat_missing_launch_warning'];
|
|
474
|
+
}
|
|
446
475
|
await appendJsonl(path.join(this.root, input.workerDirRel, 'zellij-worker-pane-events.jsonl'), {
|
|
447
476
|
schema: 'sks.zellij-worker-pane-event.v1',
|
|
448
477
|
ts: nowIso(),
|
|
@@ -505,8 +534,10 @@ class NativeCliSessionSwarmRecorder {
|
|
|
505
534
|
const heartbeatOk = await hasHeartbeat(path.join(this.root, input.heartbeatRel));
|
|
506
535
|
input.record.blockers = [
|
|
507
536
|
...(parsed ? parsed.blockers || [] : ['zellij_worker_result_timeout']),
|
|
508
|
-
...(heartbeatOk ? [] : [
|
|
537
|
+
...(heartbeatOk ? [] : [])
|
|
509
538
|
];
|
|
539
|
+
if (!heartbeatOk)
|
|
540
|
+
input.record.warnings = [...(input.record.warnings || []), 'zellij_worker_heartbeat_missing'];
|
|
510
541
|
paneRecord = await closeWorkerPane({
|
|
511
542
|
root: this.root,
|
|
512
543
|
paneRecord,
|
|
@@ -593,6 +624,24 @@ class NativeCliSessionSwarmRecorder {
|
|
|
593
624
|
log_tail: input.logTail || '',
|
|
594
625
|
blockers: input.blockers || []
|
|
595
626
|
}).catch(() => undefined);
|
|
627
|
+
const parallelEvent = mapTelemetryToParallelEvent(input.eventType);
|
|
628
|
+
if (parallelEvent) {
|
|
629
|
+
await appendParallelRuntimeEvent(this.root, this.input.missionId, {
|
|
630
|
+
event_type: parallelEvent,
|
|
631
|
+
slot_id: String(ctx.agent?.slot_id || ctx.agent?.id || 'slot-001'),
|
|
632
|
+
generation_index: Number(ctx.agent?.generation_index || 1),
|
|
633
|
+
session_id: ctx.agent?.session_id == null ? null : String(ctx.agent.session_id),
|
|
634
|
+
pid: null,
|
|
635
|
+
backend: this.input.backend,
|
|
636
|
+
placement: normalizeParallelPlacement(ctx.opts?.workerPlacement || this.input.workerPlacement || (input.status === 'headless' ? 'headless' : 'unknown')),
|
|
637
|
+
worktree_id: ctx.agent?.worktree?.id || ctx.slice?.worktree?.id || null,
|
|
638
|
+
meta: {
|
|
639
|
+
status: input.status,
|
|
640
|
+
artifacts: input.artifacts || [],
|
|
641
|
+
blockers: input.blockers || []
|
|
642
|
+
}
|
|
643
|
+
}).catch(() => undefined);
|
|
644
|
+
}
|
|
596
645
|
}
|
|
597
646
|
async persist() {
|
|
598
647
|
this.writeLock = this.writeLock.catch(() => undefined).then(async () => {
|
|
@@ -765,6 +814,23 @@ function firstString(values) {
|
|
|
765
814
|
}
|
|
766
815
|
return null;
|
|
767
816
|
}
|
|
817
|
+
function mapTelemetryToParallelEvent(eventType) {
|
|
818
|
+
if (eventType === 'slot_reserved')
|
|
819
|
+
return 'slot_reserved';
|
|
820
|
+
if (eventType === 'heartbeat')
|
|
821
|
+
return 'worker_heartbeat_seen';
|
|
822
|
+
if (eventType === 'worker_completed')
|
|
823
|
+
return 'worker_completed';
|
|
824
|
+
if (eventType === 'worker_failed')
|
|
825
|
+
return 'worker_failed';
|
|
826
|
+
return null;
|
|
827
|
+
}
|
|
828
|
+
function normalizeParallelPlacement(value) {
|
|
829
|
+
const text = String(value || '');
|
|
830
|
+
if (text === 'zellij-pane' || text === 'process' || text === 'headless')
|
|
831
|
+
return text;
|
|
832
|
+
return 'unknown';
|
|
833
|
+
}
|
|
768
834
|
async function tailFile(file, max) {
|
|
769
835
|
try {
|
|
770
836
|
const text = await fs.promises.readFile(file, 'utf8');
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { appendJsonlBounded, ensureDir, nowIso, readText, writeJsonAtomic } from '../fsx.js';
|
|
3
|
+
export const PARALLEL_RUNTIME_EVENT_SCHEMA = 'sks.parallel-runtime-event.v1';
|
|
4
|
+
export const PARALLEL_RUNTIME_PROOF_SCHEMA = 'sks.parallel-runtime-proof.v1';
|
|
5
|
+
export function parallelRuntimeEventPath(root, missionId) {
|
|
6
|
+
return path.join(inferAgentsDir(root, missionId), 'parallel-runtime.events.jsonl');
|
|
7
|
+
}
|
|
8
|
+
export function parallelRuntimeProofPath(root, missionId) {
|
|
9
|
+
return path.join(inferAgentsDir(root, missionId), 'parallel-runtime-proof.json');
|
|
10
|
+
}
|
|
11
|
+
export async function appendParallelRuntimeEvent(root, missionId, event) {
|
|
12
|
+
const row = normalizeParallelRuntimeEvent(missionId, event);
|
|
13
|
+
const file = parallelRuntimeEventPath(root, missionId);
|
|
14
|
+
await ensureDir(path.dirname(file));
|
|
15
|
+
await appendJsonlBounded(file, row);
|
|
16
|
+
return row;
|
|
17
|
+
}
|
|
18
|
+
export async function buildParallelRuntimeProof(root, missionId, opts = {}) {
|
|
19
|
+
const events = await readParallelRuntimeEvents(root, missionId);
|
|
20
|
+
const sorted = events.sort((a, b) => a.ms - b.ms);
|
|
21
|
+
const firstMs = sorted[0]?.ms || Date.now();
|
|
22
|
+
const lastMs = sorted[sorted.length - 1]?.ms || firstMs;
|
|
23
|
+
const workerActive = new Set();
|
|
24
|
+
const processActive = new Set();
|
|
25
|
+
const modelActive = new Set();
|
|
26
|
+
const workerStarts = new Map();
|
|
27
|
+
const workerDurations = [];
|
|
28
|
+
const workerPids = new Set();
|
|
29
|
+
const modelIds = new Set();
|
|
30
|
+
const overlapWindows = [];
|
|
31
|
+
let maxWorkers = 0;
|
|
32
|
+
let maxProcesses = 0;
|
|
33
|
+
let maxModels = 0;
|
|
34
|
+
let previousMs = firstMs;
|
|
35
|
+
let firstBatchLaunchSpanMs = 0;
|
|
36
|
+
const batchStart = new Map();
|
|
37
|
+
const batchCompleted = new Map();
|
|
38
|
+
for (const event of sorted) {
|
|
39
|
+
if (event.ms > previousMs) {
|
|
40
|
+
overlapWindows.push({
|
|
41
|
+
start_ms: previousMs - firstMs,
|
|
42
|
+
end_ms: event.ms - firstMs,
|
|
43
|
+
active_workers: workerActive.size,
|
|
44
|
+
active_model_calls: modelActive.size
|
|
45
|
+
});
|
|
46
|
+
previousMs = event.ms;
|
|
47
|
+
}
|
|
48
|
+
const workerKey = event.session_id || event.slot_id || (event.pid == null ? '' : `pid:${event.pid}`);
|
|
49
|
+
const processKey = event.pid == null ? workerKey : `pid:${event.pid}`;
|
|
50
|
+
if (event.event_type === 'batch_dispatch_started' && event.batch_id)
|
|
51
|
+
batchStart.set(event.batch_id, event.ms);
|
|
52
|
+
if (event.event_type === 'batch_dispatch_completed' && event.batch_id) {
|
|
53
|
+
batchCompleted.set(event.batch_id, event.ms);
|
|
54
|
+
const started = batchStart.get(event.batch_id);
|
|
55
|
+
if (started != null && firstBatchLaunchSpanMs === 0)
|
|
56
|
+
firstBatchLaunchSpanMs = Math.max(0, event.ms - started);
|
|
57
|
+
}
|
|
58
|
+
if (event.event_type === 'worker_launch_invoked' || event.event_type === 'worker_process_spawned') {
|
|
59
|
+
if (workerKey) {
|
|
60
|
+
workerActive.add(workerKey);
|
|
61
|
+
if (!workerStarts.has(workerKey))
|
|
62
|
+
workerStarts.set(workerKey, event.ms);
|
|
63
|
+
}
|
|
64
|
+
if (event.event_type === 'worker_process_spawned' && processKey)
|
|
65
|
+
processActive.add(processKey);
|
|
66
|
+
if (event.pid != null)
|
|
67
|
+
workerPids.add(event.pid);
|
|
68
|
+
}
|
|
69
|
+
if (event.event_type === 'worker_completed' || event.event_type === 'worker_failed') {
|
|
70
|
+
if (workerKey) {
|
|
71
|
+
workerActive.delete(workerKey);
|
|
72
|
+
const started = workerStarts.get(workerKey);
|
|
73
|
+
if (started != null)
|
|
74
|
+
workerDurations.push(Math.max(0, event.ms - started));
|
|
75
|
+
}
|
|
76
|
+
if (processKey)
|
|
77
|
+
processActive.delete(processKey);
|
|
78
|
+
}
|
|
79
|
+
if (event.event_type === 'model_call_started') {
|
|
80
|
+
const id = event.model_call_id || event.session_id || `model:${event.ms}:${modelActive.size}`;
|
|
81
|
+
modelActive.add(id);
|
|
82
|
+
modelIds.add(id);
|
|
83
|
+
}
|
|
84
|
+
if (event.event_type === 'model_call_completed') {
|
|
85
|
+
const id = event.model_call_id || event.session_id || '';
|
|
86
|
+
if (id)
|
|
87
|
+
modelActive.delete(id);
|
|
88
|
+
}
|
|
89
|
+
maxWorkers = Math.max(maxWorkers, workerActive.size);
|
|
90
|
+
maxProcesses = Math.max(maxProcesses, processActive.size);
|
|
91
|
+
maxModels = Math.max(maxModels, modelActive.size);
|
|
92
|
+
}
|
|
93
|
+
const requestedWorkers = positiveInt(opts.requestedWorkers, workerStarts.size || workerPids.size || maxWorkers);
|
|
94
|
+
const targetActiveSlots = positiveInt(opts.targetActiveSlots, requestedWorkers);
|
|
95
|
+
const wallMs = Math.max(0, lastMs - firstMs);
|
|
96
|
+
const sequentialEstimateMs = workerDurations.length
|
|
97
|
+
? workerDurations.reduce((sum, value) => sum + value, 0)
|
|
98
|
+
: requestedWorkers * positiveInt(opts.expectedWorkerRuntimeMs, 4000);
|
|
99
|
+
const visiblePanes = nonNegativeInt(opts.visiblePanes, sorted.filter((event) => event.placement === 'zellij-pane').length ? new Set(sorted.filter((event) => event.placement === 'zellij-pane').map((event) => event.slot_id || event.session_id || '')).size : 0);
|
|
100
|
+
const observedHeadlessWorkers = sorted.filter((event) => event.placement === 'headless' && (event.event_type === 'worker_launch_invoked' || event.event_type === 'worker_process_spawned')).length;
|
|
101
|
+
const headlessWorkers = Math.max(observedHeadlessWorkers, Math.max(0, targetActiveSlots - visiblePanes));
|
|
102
|
+
const minActiveWorkers = opts.minActiveWorkers === undefined
|
|
103
|
+
? Math.min(targetActiveSlots, requestedWorkers)
|
|
104
|
+
: nonNegativeInt(opts.minActiveWorkers, Math.min(targetActiveSlots, requestedWorkers));
|
|
105
|
+
const minSpeedup = Number.isFinite(Number(opts.minSpeedupRatio)) ? Number(opts.minSpeedupRatio) : requestedWorkers >= 16 ? 5 : 1;
|
|
106
|
+
const speedupRatio = wallMs > 0 ? Number((sequentialEstimateMs / wallMs).toFixed(3)) : 0;
|
|
107
|
+
const launchEvents = sorted.filter((event) => event.event_type === 'worker_launch_invoked' || event.event_type === 'worker_process_spawned');
|
|
108
|
+
const launchSpanMs = launchEvents.length ? Math.max(...launchEvents.map((event) => event.ms)) - Math.min(...launchEvents.map((event) => event.ms)) : 0;
|
|
109
|
+
const firstBatchLimit = positiveInt(opts.firstBatchLaunchSpanLimitMs, requestedWorkers >= 16 ? 2500 : 30000);
|
|
110
|
+
const blockers = [
|
|
111
|
+
...(sorted.length ? [] : ['parallel_runtime_events_missing']),
|
|
112
|
+
...(minActiveWorkers <= 0 || maxWorkers >= minActiveWorkers ? [] : ['max_observed_active_workers_below_target']),
|
|
113
|
+
...(requestedWorkers >= 16 && workerPids.size && workerPids.size < minActiveWorkers ? ['unique_worker_pids_below_target'] : []),
|
|
114
|
+
...(speedupRatio >= minSpeedup ? [] : ['speedup_ratio_below_target']),
|
|
115
|
+
...(firstBatchLaunchSpanMs <= firstBatchLimit ? [] : ['first_batch_launch_span_above_limit'])
|
|
116
|
+
];
|
|
117
|
+
return {
|
|
118
|
+
schema: PARALLEL_RUNTIME_PROOF_SCHEMA,
|
|
119
|
+
mission_id: missionId,
|
|
120
|
+
generated_at: nowIso(),
|
|
121
|
+
requested_workers: requestedWorkers,
|
|
122
|
+
target_active_slots: targetActiveSlots,
|
|
123
|
+
max_observed_active_workers: maxWorkers,
|
|
124
|
+
max_observed_worker_processes: Math.max(maxProcesses, workerPids.size ? maxProcesses : maxWorkers),
|
|
125
|
+
unique_worker_pids: workerPids.size,
|
|
126
|
+
unique_model_call_ids: modelIds.size,
|
|
127
|
+
max_observed_model_calls: maxModels,
|
|
128
|
+
launch_span_ms: launchSpanMs,
|
|
129
|
+
first_batch_launch_span_ms: firstBatchLaunchSpanMs,
|
|
130
|
+
wall_ms: wallMs,
|
|
131
|
+
sequential_estimate_ms: sequentialEstimateMs,
|
|
132
|
+
speedup_ratio: speedupRatio,
|
|
133
|
+
overlap_windows: coalesceOverlapWindows(overlapWindows),
|
|
134
|
+
visible_panes: visiblePanes,
|
|
135
|
+
headless_workers: headlessWorkers,
|
|
136
|
+
passed: blockers.length === 0,
|
|
137
|
+
blockers
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
export async function writeParallelRuntimeProof(root, missionId, opts = {}) {
|
|
141
|
+
const proof = await buildParallelRuntimeProof(root, missionId, opts);
|
|
142
|
+
await writeJsonAtomic(parallelRuntimeProofPath(root, missionId), proof);
|
|
143
|
+
return proof;
|
|
144
|
+
}
|
|
145
|
+
async function readParallelRuntimeEvents(root, missionId) {
|
|
146
|
+
const text = await readText(parallelRuntimeEventPath(root, missionId), '');
|
|
147
|
+
return String(text)
|
|
148
|
+
.split(/\r?\n/)
|
|
149
|
+
.map((line) => line.trim())
|
|
150
|
+
.filter(Boolean)
|
|
151
|
+
.map((line) => {
|
|
152
|
+
try {
|
|
153
|
+
const parsed = JSON.parse(line);
|
|
154
|
+
return parsed?.schema === PARALLEL_RUNTIME_EVENT_SCHEMA ? parsed : null;
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
return null;
|
|
158
|
+
}
|
|
159
|
+
})
|
|
160
|
+
.filter((row) => Boolean(row));
|
|
161
|
+
}
|
|
162
|
+
function normalizeParallelRuntimeEvent(missionId, event) {
|
|
163
|
+
return {
|
|
164
|
+
schema: PARALLEL_RUNTIME_EVENT_SCHEMA,
|
|
165
|
+
ts: nowIso(),
|
|
166
|
+
ms: Date.now(),
|
|
167
|
+
mission_id: missionId,
|
|
168
|
+
event_type: event.event_type,
|
|
169
|
+
slot_id: event.slot_id == null ? null : String(event.slot_id),
|
|
170
|
+
generation_index: event.generation_index == null ? null : Math.max(1, Math.floor(Number(event.generation_index) || 1)),
|
|
171
|
+
session_id: event.session_id == null ? null : String(event.session_id),
|
|
172
|
+
pid: event.pid == null || !Number.isFinite(Number(event.pid)) ? null : Math.floor(Number(event.pid)),
|
|
173
|
+
backend: String(event.backend || 'unknown'),
|
|
174
|
+
placement: normalizePlacement(event.placement),
|
|
175
|
+
...(event.worktree_id === undefined ? {} : { worktree_id: event.worktree_id == null ? null : String(event.worktree_id) }),
|
|
176
|
+
...(event.model_call_id === undefined ? {} : { model_call_id: event.model_call_id == null ? null : String(event.model_call_id) }),
|
|
177
|
+
...(event.batch_id === undefined ? {} : { batch_id: event.batch_id == null ? null : String(event.batch_id) }),
|
|
178
|
+
...(event.meta && typeof event.meta === 'object' ? { meta: event.meta } : {})
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
function normalizePlacement(value) {
|
|
182
|
+
const text = String(value || 'unknown');
|
|
183
|
+
if (text === 'zellij-pane' || text === 'process' || text === 'headless')
|
|
184
|
+
return text;
|
|
185
|
+
return 'unknown';
|
|
186
|
+
}
|
|
187
|
+
function positiveInt(value, fallback) {
|
|
188
|
+
const parsed = Number(value);
|
|
189
|
+
if (!Number.isFinite(parsed) || parsed < 1)
|
|
190
|
+
return Math.max(1, Math.floor(fallback || 1));
|
|
191
|
+
return Math.floor(parsed);
|
|
192
|
+
}
|
|
193
|
+
function nonNegativeInt(value, fallback) {
|
|
194
|
+
const parsed = Number(value);
|
|
195
|
+
if (!Number.isFinite(parsed) || parsed < 0)
|
|
196
|
+
return Math.max(0, Math.floor(fallback || 0));
|
|
197
|
+
return Math.floor(parsed);
|
|
198
|
+
}
|
|
199
|
+
function coalesceOverlapWindows(windows) {
|
|
200
|
+
return windows
|
|
201
|
+
.filter((window) => window.end_ms > window.start_ms)
|
|
202
|
+
.filter((window) => window.active_workers > 0 || window.active_model_calls > 0)
|
|
203
|
+
.slice(0, 2000);
|
|
204
|
+
}
|
|
205
|
+
function inferAgentsDir(root, missionId) {
|
|
206
|
+
const resolved = path.resolve(root);
|
|
207
|
+
if (path.basename(resolved) === 'agents' && path.basename(path.dirname(resolved)) === missionId)
|
|
208
|
+
return resolved;
|
|
209
|
+
if (path.basename(resolved) === missionId && path.basename(path.dirname(resolved)) === 'missions')
|
|
210
|
+
return path.join(resolved, 'agents');
|
|
211
|
+
const marker = `${path.sep}.sneakoscope${path.sep}missions${path.sep}${missionId}${path.sep}`;
|
|
212
|
+
const index = resolved.indexOf(marker);
|
|
213
|
+
if (index >= 0)
|
|
214
|
+
return path.join(resolved.slice(0, index + marker.length - 1), 'agents');
|
|
215
|
+
return path.join(resolved, '.sneakoscope', 'missions', missionId, 'agents');
|
|
216
|
+
}
|
|
217
|
+
//# sourceMappingURL=parallel-runtime-proof.js.map
|
|
@@ -16,6 +16,7 @@ import { writeUltraRouterProof } from '../router/router-proof.js';
|
|
|
16
16
|
import { readLocalModelConfig } from '../agents/ollama-worker-config.js';
|
|
17
17
|
import { runLocalLlmTask } from '../local-llm/local-llm-control-adapter.js';
|
|
18
18
|
import { detectPythonCodexSdkCapability, runPythonCodexSdkTask } from './python-codex-sdk-adapter.js';
|
|
19
|
+
import { defaultModelCallBudget, withModelCallSlot } from './model-call-concurrency.js';
|
|
19
20
|
export async function runCodexTask(input) {
|
|
20
21
|
const root = path.resolve(input.mutationLedgerRoot);
|
|
21
22
|
await ensureDir(root);
|
|
@@ -46,7 +47,16 @@ export async function runCodexTask(input) {
|
|
|
46
47
|
];
|
|
47
48
|
let adapterResult = null;
|
|
48
49
|
if (!blockers.length) {
|
|
49
|
-
adapterResult = await
|
|
50
|
+
adapterResult = await withModelCallSlot({
|
|
51
|
+
root,
|
|
52
|
+
missionId: task.missionId,
|
|
53
|
+
provider: 'codex-sdk',
|
|
54
|
+
budget: defaultModelCallBudget('codex-sdk'),
|
|
55
|
+
slotId: task.slotId || null,
|
|
56
|
+
generationIndex: task.generationIndex ?? null,
|
|
57
|
+
sessionId: task.sessionId || null,
|
|
58
|
+
backend: 'codex-sdk'
|
|
59
|
+
}, () => runWithCodexReliabilityShield(task, async () => {
|
|
50
60
|
try {
|
|
51
61
|
return fakeAllowed
|
|
52
62
|
? await runFakeCodexSdkTask(task)
|
|
@@ -63,7 +73,7 @@ export async function runCodexTask(input) {
|
|
|
63
73
|
blockers: ['codex_sdk_run_failed:' + String(err?.message || err)]
|
|
64
74
|
};
|
|
65
75
|
}
|
|
66
|
-
});
|
|
76
|
+
}));
|
|
67
77
|
}
|
|
68
78
|
const events = Array.isArray(adapterResult?.events) ? adapterResult.events : [];
|
|
69
79
|
const translatedEvents = translateCodexSdkEvents(events);
|
|
@@ -153,7 +163,16 @@ async function runPythonControlTask(root, task, schema, routerDecision) {
|
|
|
153
163
|
await ensureDir(runtime.env.env.CODEX_HOME);
|
|
154
164
|
const fakeAllowed = process.env.SKS_PYTHON_CODEX_SDK_FAKE === '1';
|
|
155
165
|
const adapterResult = capability.ok || fakeAllowed
|
|
156
|
-
? await
|
|
166
|
+
? await withModelCallSlot({
|
|
167
|
+
root,
|
|
168
|
+
missionId: task.missionId,
|
|
169
|
+
provider: 'python-codex-sdk',
|
|
170
|
+
budget: defaultModelCallBudget('python-codex-sdk'),
|
|
171
|
+
slotId: task.slotId || null,
|
|
172
|
+
generationIndex: task.generationIndex ?? null,
|
|
173
|
+
sessionId: task.sessionId || null,
|
|
174
|
+
backend: 'python-codex-sdk'
|
|
175
|
+
}, () => runPythonCodexSdkTask(task, { env: runtime.env.env, config: runtime.config }))
|
|
157
176
|
: { ok: false, events: [], translatedEvents: [], finalResponse: '', threadId: '', turnId: '', blockers: capability.blockers, capability };
|
|
158
177
|
const events = Array.isArray(adapterResult.events) ? adapterResult.events : [];
|
|
159
178
|
const translatedEvents = Array.isArray(adapterResult.translatedEvents) ? adapterResult.translatedEvents : [];
|
|
@@ -251,7 +270,16 @@ async function runPythonControlTask(root, task, schema, routerDecision) {
|
|
|
251
270
|
}
|
|
252
271
|
async function runLocalControlTask(root, task, schema, routerDecision) {
|
|
253
272
|
const config = await readLocalModelConfig();
|
|
254
|
-
const adapterResult = await
|
|
273
|
+
const adapterResult = await withModelCallSlot({
|
|
274
|
+
root,
|
|
275
|
+
missionId: task.missionId,
|
|
276
|
+
provider: 'local-llm',
|
|
277
|
+
budget: defaultModelCallBudget('local-llm'),
|
|
278
|
+
slotId: task.slotId || null,
|
|
279
|
+
generationIndex: task.generationIndex ?? null,
|
|
280
|
+
sessionId: task.sessionId || null,
|
|
281
|
+
backend: 'local-llm'
|
|
282
|
+
}, () => runLocalLlmTask(task, { config, outputSchema: schema }));
|
|
255
283
|
for (const event of adapterResult.events || [])
|
|
256
284
|
await appendJsonl(path.join(root, 'local-llm-events.jsonl'), event);
|
|
257
285
|
const structuredOutput = adapterResult.structuredOutput;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { appendParallelRuntimeEvent } from '../agents/parallel-runtime-proof.js';
|
|
2
|
+
const semaphores = new Map();
|
|
3
|
+
export function getModelCallSemaphore(provider, budget) {
|
|
4
|
+
const normalizedProvider = String(provider || 'codex-sdk');
|
|
5
|
+
const normalizedBudget = Math.max(1, Math.floor(Number(budget || 1)));
|
|
6
|
+
const key = `${normalizedProvider}:${normalizedBudget}`;
|
|
7
|
+
const existing = semaphores.get(key);
|
|
8
|
+
if (existing)
|
|
9
|
+
return existing;
|
|
10
|
+
const created = new ModelCallSemaphoreImpl(normalizedProvider, normalizedBudget);
|
|
11
|
+
semaphores.set(key, created);
|
|
12
|
+
return created;
|
|
13
|
+
}
|
|
14
|
+
export async function withModelCallSlot(input, fn) {
|
|
15
|
+
const semaphore = getModelCallSemaphore(input.provider, input.budget);
|
|
16
|
+
const modelCallId = input.modelCallId || `${input.provider}-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
|
17
|
+
return semaphore.run(async () => {
|
|
18
|
+
await appendParallelRuntimeEvent(input.root, input.missionId, {
|
|
19
|
+
event_type: 'model_call_started',
|
|
20
|
+
slot_id: input.slotId ?? null,
|
|
21
|
+
generation_index: input.generationIndex ?? null,
|
|
22
|
+
session_id: input.sessionId ?? null,
|
|
23
|
+
model_call_id: modelCallId,
|
|
24
|
+
backend: input.backend || input.provider,
|
|
25
|
+
placement: 'unknown',
|
|
26
|
+
meta: {
|
|
27
|
+
provider: input.provider,
|
|
28
|
+
budget: semaphore.budget,
|
|
29
|
+
active_model_calls: semaphore.active,
|
|
30
|
+
queued_model_calls: semaphore.queued
|
|
31
|
+
}
|
|
32
|
+
}).catch(() => undefined);
|
|
33
|
+
try {
|
|
34
|
+
return await fn();
|
|
35
|
+
}
|
|
36
|
+
finally {
|
|
37
|
+
await appendParallelRuntimeEvent(input.root, input.missionId, {
|
|
38
|
+
event_type: 'model_call_completed',
|
|
39
|
+
slot_id: input.slotId ?? null,
|
|
40
|
+
generation_index: input.generationIndex ?? null,
|
|
41
|
+
session_id: input.sessionId ?? null,
|
|
42
|
+
model_call_id: modelCallId,
|
|
43
|
+
backend: input.backend || input.provider,
|
|
44
|
+
placement: 'unknown',
|
|
45
|
+
meta: {
|
|
46
|
+
provider: input.provider,
|
|
47
|
+
budget: semaphore.budget,
|
|
48
|
+
max_observed_model_calls: semaphore.maxObserved,
|
|
49
|
+
queued_model_calls: semaphore.queued
|
|
50
|
+
}
|
|
51
|
+
}).catch(() => undefined);
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
export function defaultModelCallBudget(provider) {
|
|
56
|
+
const text = String(provider || '');
|
|
57
|
+
if (text === 'local-llm' || text === 'ollama')
|
|
58
|
+
return envInt('SKS_LOCAL_LLM_MAX_PARALLEL_REQUESTS', 4);
|
|
59
|
+
return envInt('SKS_REMOTE_API_PARALLEL_BUDGET', 12);
|
|
60
|
+
}
|
|
61
|
+
class ModelCallSemaphoreImpl {
|
|
62
|
+
provider;
|
|
63
|
+
budget;
|
|
64
|
+
active = 0;
|
|
65
|
+
queued = 0;
|
|
66
|
+
maxObserved = 0;
|
|
67
|
+
waiters = [];
|
|
68
|
+
constructor(provider, budget) {
|
|
69
|
+
this.provider = provider;
|
|
70
|
+
this.budget = budget;
|
|
71
|
+
}
|
|
72
|
+
async run(fn) {
|
|
73
|
+
await this.acquire();
|
|
74
|
+
try {
|
|
75
|
+
return await fn();
|
|
76
|
+
}
|
|
77
|
+
finally {
|
|
78
|
+
this.release();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
async acquire() {
|
|
82
|
+
if (this.active < this.budget) {
|
|
83
|
+
this.active += 1;
|
|
84
|
+
this.maxObserved = Math.max(this.maxObserved, this.active);
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
this.queued += 1;
|
|
88
|
+
await new Promise((resolve) => this.waiters.push(resolve));
|
|
89
|
+
this.queued = Math.max(0, this.queued - 1);
|
|
90
|
+
this.active += 1;
|
|
91
|
+
this.maxObserved = Math.max(this.maxObserved, this.active);
|
|
92
|
+
}
|
|
93
|
+
release() {
|
|
94
|
+
this.active = Math.max(0, this.active - 1);
|
|
95
|
+
const next = this.waiters.shift();
|
|
96
|
+
if (next)
|
|
97
|
+
next();
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
function envInt(name, fallback) {
|
|
101
|
+
const parsed = Number(process.env[name]);
|
|
102
|
+
if (!Number.isFinite(parsed) || parsed < 1)
|
|
103
|
+
return fallback;
|
|
104
|
+
return Math.floor(parsed);
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=model-call-concurrency.js.map
|