sneakoscope 2.0.15 → 2.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +5 -3
  2. package/crates/sks-core/Cargo.lock +1 -1
  3. package/crates/sks-core/Cargo.toml +1 -1
  4. package/crates/sks-core/src/main.rs +1 -1
  5. package/dist/.sks-build-stamp.json +4 -4
  6. package/dist/bin/sks.js +1 -1
  7. package/dist/cli/command-registry.js +1 -1
  8. package/dist/commands/proof.js +21 -0
  9. package/dist/commands/zellij-slot-pane.js +7 -1
  10. package/dist/core/agents/agent-orchestrator.js +68 -3
  11. package/dist/core/agents/agent-scheduler.js +217 -86
  12. package/dist/core/agents/agent-schema.js +1 -1
  13. package/dist/core/agents/native-cli-session-swarm.js +97 -27
  14. package/dist/core/agents/native-cli-worker.js +56 -7
  15. package/dist/core/agents/parallel-runtime-proof.js +276 -0
  16. package/dist/core/agents/runtime-proof-summary.js +75 -0
  17. package/dist/core/codex-control/codex-task-runner.js +32 -4
  18. package/dist/core/codex-control/model-call-concurrency.js +106 -0
  19. package/dist/core/commands/naruto-command.js +65 -8
  20. package/dist/core/commands/team-command.js +6 -487
  21. package/dist/core/commands/team-legacy-observe-command.js +182 -0
  22. package/dist/core/db-safety.js +49 -6
  23. package/dist/core/feature-registry.js +4 -2
  24. package/dist/core/fsx.js +1 -1
  25. package/dist/core/git/git-worktree-capability.js +18 -0
  26. package/dist/core/git/git-worktree-manager.js +80 -0
  27. package/dist/core/git/git-worktree-pool.js +4 -0
  28. package/dist/core/hooks-runtime.js +41 -4
  29. package/dist/core/init.js +1 -0
  30. package/dist/core/mad-db/mad-db-capability.js +42 -2
  31. package/dist/core/mad-db/mad-db-ledger.js +14 -0
  32. package/dist/core/mad-db/mad-db-policy-resolver.js +2 -0
  33. package/dist/core/mad-db/mad-db-result-lifecycle.js +136 -0
  34. package/dist/core/naruto/naruto-concurrency-governor.js +14 -1
  35. package/dist/core/release/release-gate-affected-selector.js +47 -5
  36. package/dist/core/release/release-gate-dag.js +5 -1
  37. package/dist/core/release/release-gate-scheduler.js +2 -1
  38. package/dist/core/routes.js +3 -1
  39. package/dist/core/version.js +1 -1
  40. package/dist/core/zellij/zellij-slot-pane-renderer.js +74 -1
  41. package/dist/core/zellij/zellij-slot-telemetry.js +81 -3
  42. package/dist/core/zellij/zellij-ui-mode.js +12 -2
  43. package/dist/scripts/prepublish-release-check-or-fast.js +3 -3
  44. package/dist/scripts/release-speed-summary.js +23 -1
  45. package/package.json +38 -3
  46. package/schemas/agents/parallel-runtime-proof.schema.json +79 -0
@@ -4,6 +4,7 @@ import { MAX_AGENT_COUNT } from './agent-schema.js';
4
4
  import { appendAgentWorkQueueEvent, completeWorkItem, createAgentWorkQueue, enqueueFollowUpWorkItems, leaseNextWorkItem, pendingWorkItems, writeAgentWorkQueue } from './agent-work-queue.js';
5
5
  import { closeWorkerSlotsAfterDrain, createAgentWorkerSlots, markWorkerSlotGenerationClosed, openWorkerSlotGeneration, writeAgentWorkerSlots } from './agent-worker-slot.js';
6
6
  import { closeAgentSessionGeneration, createAgentSessionGeneration, writeAgentSessionGeneration } from './agent-session-generation.js';
7
+ import { appendParallelRuntimeEvent } from './parallel-runtime-proof.js';
7
8
  export const AGENT_SCHEDULER_SCHEMA = 'sks.agent-scheduler.v1';
8
9
  export const AGENT_SCHEDULER_EVENT_SCHEMA = 'sks.agent-scheduler-event.v1';
9
10
  export async function runAgentScheduler(input) {
@@ -19,6 +20,12 @@ export async function runAgentScheduler(input) {
19
20
  });
20
21
  const active = new Map();
21
22
  const results = [];
23
+ const schedulerStartedAt = Date.now();
24
+ let lastUtilizationUpdateMs = schedulerStartedAt;
25
+ let activeSlotTimeMs = 0;
26
+ let batchCounter = 0;
27
+ let batchLaunchSpanTotalMs = 0;
28
+ let batchDispatchInProgress = false;
22
29
  let state = buildState(input.missionId, targetActiveSlots, queue, slots, active, {
23
30
  status: 'initializing',
24
31
  refillDelayMs: input.refillDelayMs || 0,
@@ -27,7 +34,7 @@ export async function runAgentScheduler(input) {
27
34
  await writeAll(input.root, state, slots, queue, active, { event_type: 'scheduler_initialized' }, input.onSchedulerEvent);
28
35
  await refillSlots(null);
29
36
  while (active.size > 0 || pendingWorkItems(queue).length > 0) {
30
- if (active.size === 0 && pendingWorkItems(queue).length > 0) {
37
+ if (!batchDispatchInProgress && active.size === 0 && pendingWorkItems(queue).length > 0) {
31
38
  state.blockers.push('scheduler_pending_queue_without_active_sessions');
32
39
  state.status = 'blocked';
33
40
  await writeAll(input.root, state, slots, queue, active, { event_type: 'scheduler_blocked', pending_count: pendingWorkItems(queue).length }, input.onSchedulerEvent);
@@ -38,6 +45,7 @@ export async function runAgentScheduler(input) {
38
45
  if (!entry)
39
46
  continue;
40
47
  const activeCountBeforeClose = active.size;
48
+ accumulateActiveSlotTime();
41
49
  active.delete(settled.session_id);
42
50
  const resultStatus = settled.result?.status === 'done' ? 'completed' : settled.result?.status === 'blocked' ? 'blocked' : 'failed';
43
51
  completeWorkItem(queue, entry.work_item_id, settled.session_id, resultStatus, settled.error || null);
@@ -65,6 +73,7 @@ export async function runAgentScheduler(input) {
65
73
  const pendingAfterClose = pendingWorkItems(queue).length;
66
74
  if (pendingAfterClose > 0)
67
75
  state.expected_backfill_count += 1;
76
+ updateUtilizationMetrics();
68
77
  await writeAll(input.root, state, slots, queue, active, {
69
78
  event_type: 'session_completed',
70
79
  session_id: settled.session_id,
@@ -80,6 +89,7 @@ export async function runAgentScheduler(input) {
80
89
  closed_at_ms: Date.now()
81
90
  } : null);
82
91
  }
92
+ updateUtilizationMetrics();
83
93
  state.status = 'draining';
84
94
  await writeAll(input.root, state, slots, queue, active, { event_type: 'scheduler_draining' }, input.onSchedulerEvent);
85
95
  slots = closeWorkerSlotsAfterDrain(slots);
@@ -94,6 +104,7 @@ export async function runAgentScheduler(input) {
94
104
  state.all_generations_closed = true;
95
105
  if (!state.pending_queue_drained)
96
106
  state.blockers.push('scheduler_pending_queue_not_drained');
107
+ updateUtilizationMetrics();
97
108
  await writeAll(input.root, state, slots, queue, active, { event_type: 'scheduler_drained' }, input.onSchedulerEvent);
98
109
  return {
99
110
  schema: 'sks.agent-scheduler-result.v1',
@@ -105,9 +116,180 @@ export async function runAgentScheduler(input) {
105
116
  };
106
117
  async function refillSlots(backfill) {
107
118
  state.status = 'running';
119
+ const launches = collectLaunchBatch();
120
+ if (!launches.length)
121
+ return;
122
+ batchDispatchInProgress = true;
123
+ const batchId = `batch-${Date.now().toString(36)}-${batchCounter++}`;
124
+ const batchStart = Date.now();
108
125
  const launchEvents = [];
109
- while (active.size < targetActiveSlots && pendingWorkItems(queue).length > 0) {
110
- const slotIndex = slots.findIndex((slot) => slot.status === 'idle');
126
+ try {
127
+ for (const launch of launches)
128
+ slots[launch.slotIndex] = launch.openedSlot;
129
+ await Promise.all(launches.map((launch) => writeAgentSessionGeneration(input.root, launch.generation)));
130
+ await writeAll(input.root, state, slots, queue, active, {
131
+ event_type: 'batch_dispatch_started',
132
+ batch_id: batchId,
133
+ launch_count: launches.length,
134
+ session_ids: launches.map((launch) => launch.generation.session_id)
135
+ }, input.onSchedulerEvent);
136
+ await appendParallelRuntimeEvent(input.root, input.missionId, {
137
+ event_type: 'batch_dispatch_started',
138
+ slot_id: null,
139
+ generation_index: null,
140
+ session_id: null,
141
+ pid: null,
142
+ backend: 'scheduler',
143
+ placement: 'unknown',
144
+ batch_id: batchId,
145
+ meta: { launch_count: launches.length, active_count_before: active.size }
146
+ }).catch(() => undefined);
147
+ for (const launch of launches) {
148
+ const { slot, openedSlot, generation, agent, workItem } = launch;
149
+ await appendParallelRuntimeEvent(input.root, input.missionId, {
150
+ event_type: 'slot_reserved',
151
+ slot_id: slot.slot_id,
152
+ generation_index: generation.generation_index,
153
+ session_id: generation.session_id,
154
+ pid: null,
155
+ backend: 'scheduler',
156
+ placement: 'unknown',
157
+ batch_id: batchId,
158
+ meta: { work_item_id: workItem.id }
159
+ }).catch(() => undefined);
160
+ await appendParallelRuntimeEvent(input.root, input.missionId, {
161
+ event_type: 'worker_launch_invoked',
162
+ slot_id: slot.slot_id,
163
+ generation_index: generation.generation_index,
164
+ session_id: generation.session_id,
165
+ pid: null,
166
+ backend: 'scheduler',
167
+ placement: 'unknown',
168
+ batch_id: batchId,
169
+ meta: { work_item_id: workItem.id }
170
+ }).catch(() => undefined);
171
+ const promise = Promise.resolve()
172
+ .then(() => input.launchSession({ agent, workItem, generation, slot: openedSlot, queue, state }))
173
+ .then((result) => ({
174
+ result,
175
+ session_id: generation.session_id,
176
+ slot_id: slot.slot_id,
177
+ generation_index: generation.generation_index,
178
+ terminal_close_report_path: path.join(generation.artifact_dir, 'agent-terminal-close-report.json')
179
+ }))
180
+ .catch((err) => ({
181
+ result: {
182
+ schema: 'sks.agent-result.v1',
183
+ mission_id: input.missionId,
184
+ agent_id: agent.id,
185
+ session_id: generation.session_id,
186
+ persona_id: agent.persona_id,
187
+ task_slice_id: workItem.id,
188
+ status: 'failed',
189
+ backend: 'fake',
190
+ summary: err instanceof Error ? err.message : String(err),
191
+ findings: [],
192
+ proposed_changes: [],
193
+ changed_files: [],
194
+ lease_compliance: { ok: true, violations: [] },
195
+ artifacts: [],
196
+ blockers: ['scheduler_launch_failed'],
197
+ confidence: 'failed',
198
+ handoff_notes: '',
199
+ unverified: [],
200
+ writes: [],
201
+ recursion_guard: { ok: true, violations: [] },
202
+ verification: { status: 'failed', checks: [] },
203
+ source_intelligence_refs: input.sourceIntelligenceRefs || null,
204
+ goal_mode_ref: input.goalModeRef || null
205
+ },
206
+ session_id: generation.session_id,
207
+ slot_id: slot.slot_id,
208
+ generation_index: generation.generation_index,
209
+ error: err instanceof Error ? err.message : String(err),
210
+ terminal_close_report_path: path.join(generation.artifact_dir, 'agent-terminal-close-report.json')
211
+ }));
212
+ accumulateActiveSlotTime();
213
+ active.set(generation.session_id, { slot_id: slot.slot_id, work_item_id: workItem.id, session_id: generation.session_id, promise });
214
+ }
215
+ await appendAgentWorkQueueEvent(input.root, 'batch_work_items_dispatched', {
216
+ batch_id: batchId,
217
+ launch_count: launches.length,
218
+ session_ids: launches.map((launch) => launch.generation.session_id),
219
+ work_item_ids: launches.map((launch) => launch.workItem.id)
220
+ });
221
+ for (const launch of launches)
222
+ await appendAgentWorkQueueEvent(input.root, 'work_item_dispatched', { work_item_id: launch.workItem.id, session_id: launch.generation.session_id, slot_id: launch.slot.slot_id });
223
+ if (backfill) {
224
+ const firstLaunch = launches[0];
225
+ const refillLatencyMs = Math.max(0, Date.now() - backfill.closed_at_ms);
226
+ state.backfill_count += 1;
227
+ state.refill_latency_events_ms.push(refillLatencyMs);
228
+ state.refill_latency_p95_ms = percentile95(state.refill_latency_events_ms);
229
+ launchEvents.push({
230
+ event_type: 'backfill_event',
231
+ closed_session_id: backfill.closed_session_id,
232
+ new_session_id: firstLaunch?.generation.session_id || null,
233
+ slot_id: firstLaunch?.slot.slot_id || null,
234
+ batch_id: batchId,
235
+ launch_count: launches.length,
236
+ active_count_before: backfill.active_count_before,
237
+ active_count_after: active.size,
238
+ refill_latency_ms: refillLatencyMs
239
+ });
240
+ backfill = null;
241
+ }
242
+ else {
243
+ for (const launch of launches)
244
+ launchEvents.push({
245
+ event_type: 'session_launched',
246
+ session_id: launch.generation.session_id,
247
+ slot_id: launch.slot.slot_id,
248
+ work_item_id: launch.workItem.id,
249
+ active_count_after: active.size
250
+ });
251
+ }
252
+ if (input.refillDelayMs && input.refillDelayMs > 0)
253
+ await delay(input.refillDelayMs);
254
+ const launchSpanMs = Math.max(0, Date.now() - batchStart);
255
+ batchLaunchSpanTotalMs += launchSpanMs;
256
+ state.batch_dispatch_count += 1;
257
+ state.largest_batch_size = Math.max(state.largest_batch_size, launches.length);
258
+ if (state.first_batch_launch_span_ms === 0)
259
+ state.first_batch_launch_span_ms = launchSpanMs;
260
+ state.average_batch_launch_span_ms = Math.round(batchLaunchSpanTotalMs / Math.max(1, state.batch_dispatch_count));
261
+ updateUtilizationMetrics();
262
+ await appendParallelRuntimeEvent(input.root, input.missionId, {
263
+ event_type: 'batch_dispatch_completed',
264
+ slot_id: null,
265
+ generation_index: null,
266
+ session_id: null,
267
+ pid: null,
268
+ backend: 'scheduler',
269
+ placement: 'unknown',
270
+ batch_id: batchId,
271
+ meta: { launch_count: launches.length, launch_span_ms: launchSpanMs, active_count_after: active.size }
272
+ }).catch(() => undefined);
273
+ await writeAll(input.root, state, slots, queue, active, {
274
+ event_type: 'batch_dispatch_completed',
275
+ batch_id: batchId,
276
+ launch_count: launches.length,
277
+ launch_span_ms: launchSpanMs,
278
+ active_count_after: active.size,
279
+ session_ids: launches.map((launch) => launch.generation.session_id)
280
+ }, input.onSchedulerEvent);
281
+ }
282
+ finally {
283
+ batchDispatchInProgress = false;
284
+ }
285
+ for (const event of launchEvents)
286
+ await appendJsonl(path.join(input.root, 'agent-scheduler-events.jsonl'), { schema: AGENT_SCHEDULER_EVENT_SCHEMA, ts: nowIso(), ...event });
287
+ }
288
+ function collectLaunchBatch() {
289
+ const launches = [];
290
+ const reservedSlots = new Set();
291
+ while (active.size + launches.length < targetActiveSlots && pendingWorkItems(queue).length > 0) {
292
+ const slotIndex = slots.findIndex((slot, index) => slot.status === 'idle' && !reservedSlots.has(index));
111
293
  if (slotIndex < 0)
112
294
  break;
113
295
  const slot = slots[slotIndex];
@@ -133,90 +315,25 @@ export async function runAgentScheduler(input) {
133
315
  goalModeRef: workItem.goal_mode_ref
134
316
  });
135
317
  workItem.running_session_id = generation.session_id;
136
- await writeAgentSessionGeneration(input.root, generation);
137
- const agent = buildAgentForGeneration(slot, generation, workItem);
138
318
  const openedSlot = openWorkerSlotGeneration(slot, generation);
139
- slots[slotIndex] = openedSlot;
140
- await writeAll(input.root, state, slots, queue, active, {
141
- event_type: 'session_launch_started',
142
- session_id: generation.session_id,
143
- slot_id: slot.slot_id,
144
- generation_index: generation.generation_index,
145
- work_item_id: workItem.id
146
- }, input.onSchedulerEvent);
147
- const promise = Promise.resolve()
148
- .then(() => input.launchSession({ agent, workItem, generation, slot: openedSlot, queue, state }))
149
- .then((result) => ({
150
- result,
151
- session_id: generation.session_id,
152
- slot_id: slot.slot_id,
153
- generation_index: generation.generation_index,
154
- terminal_close_report_path: path.join(generation.artifact_dir, 'agent-terminal-close-report.json')
155
- }))
156
- .catch((err) => ({
157
- result: {
158
- schema: 'sks.agent-result.v1',
159
- mission_id: input.missionId,
160
- agent_id: agent.id,
161
- session_id: generation.session_id,
162
- persona_id: agent.persona_id,
163
- task_slice_id: workItem.id,
164
- status: 'failed',
165
- backend: 'fake',
166
- summary: err instanceof Error ? err.message : String(err),
167
- findings: [],
168
- proposed_changes: [],
169
- changed_files: [],
170
- lease_compliance: { ok: true, violations: [] },
171
- artifacts: [],
172
- blockers: ['scheduler_launch_failed'],
173
- confidence: 'failed',
174
- handoff_notes: '',
175
- unverified: [],
176
- writes: [],
177
- recursion_guard: { ok: true, violations: [] },
178
- verification: { status: 'failed', checks: [] },
179
- source_intelligence_refs: input.sourceIntelligenceRefs || null,
180
- goal_mode_ref: input.goalModeRef || null
181
- },
182
- session_id: generation.session_id,
183
- slot_id: slot.slot_id,
184
- generation_index: generation.generation_index,
185
- error: err instanceof Error ? err.message : String(err),
186
- terminal_close_report_path: path.join(generation.artifact_dir, 'agent-terminal-close-report.json')
187
- }));
188
- active.set(generation.session_id, { slot_id: slot.slot_id, work_item_id: workItem.id, session_id: generation.session_id, promise });
189
- await appendAgentWorkQueueEvent(input.root, 'work_item_dispatched', { work_item_id: workItem.id, session_id: generation.session_id, slot_id: slot.slot_id });
190
- if (backfill) {
191
- const refillLatencyMs = Math.max(0, Date.now() - backfill.closed_at_ms);
192
- state.backfill_count += 1;
193
- state.refill_latency_events_ms.push(refillLatencyMs);
194
- state.refill_latency_p95_ms = percentile95(state.refill_latency_events_ms);
195
- launchEvents.push({
196
- event_type: 'backfill_event',
197
- closed_session_id: backfill.closed_session_id,
198
- new_session_id: generation.session_id,
199
- slot_id: slot.slot_id,
200
- active_count_before: backfill.active_count_before,
201
- active_count_after: active.size,
202
- refill_latency_ms: refillLatencyMs
203
- });
204
- backfill = null;
205
- }
206
- else {
207
- launchEvents.push({
208
- event_type: 'session_launched',
209
- session_id: generation.session_id,
210
- slot_id: slot.slot_id,
211
- work_item_id: workItem.id,
212
- active_count_after: active.size
213
- });
214
- }
215
- if (input.refillDelayMs && input.refillDelayMs > 0)
216
- await delay(input.refillDelayMs);
319
+ const agent = buildAgentForGeneration(slot, generation, workItem);
320
+ launches.push({ slotIndex, slot, openedSlot, generation, agent, workItem, provisionalSessionId });
321
+ reservedSlots.add(slotIndex);
217
322
  }
218
- for (const event of launchEvents)
219
- await writeAll(input.root, state, slots, queue, active, event, input.onSchedulerEvent);
323
+ return launches;
324
+ }
325
+ function updateUtilizationMetrics() {
326
+ accumulateActiveSlotTime();
327
+ state.wall_time_ms = Math.max(0, Date.now() - schedulerStartedAt);
328
+ state.active_slot_time_ms = activeSlotTimeMs;
329
+ const denominator = Math.max(1, state.wall_time_ms * targetActiveSlots);
330
+ state.scheduler_utilization = Number(Math.min(1, state.active_slot_time_ms / denominator).toFixed(3));
331
+ }
332
+ function accumulateActiveSlotTime() {
333
+ const now = Date.now();
334
+ const delta = Math.max(0, now - lastUtilizationUpdateMs);
335
+ activeSlotTimeMs += active.size * delta;
336
+ lastUtilizationUpdateMs = now;
220
337
  }
221
338
  }
222
339
  export function normalizeTargetActiveSlots(value, maxActiveSlots = MAX_AGENT_COUNT) {
@@ -261,7 +378,14 @@ function buildState(missionId, targetActiveSlots, queue, slots, active, opts) {
261
378
  pending_queue_drained: pendingCount === 0,
262
379
  all_slots_closed_after_drain: slots.length > 0 && slots.every((slot) => slot.status === 'closed'),
263
380
  all_generations_closed: false,
264
- blockers: [...(previous?.blockers || [])]
381
+ blockers: [...(previous?.blockers || [])],
382
+ batch_dispatch_count: previous?.batch_dispatch_count || 0,
383
+ largest_batch_size: previous?.largest_batch_size || 0,
384
+ first_batch_launch_span_ms: previous?.first_batch_launch_span_ms || 0,
385
+ average_batch_launch_span_ms: previous?.average_batch_launch_span_ms || 0,
386
+ scheduler_utilization: previous?.scheduler_utilization || 0,
387
+ active_slot_time_ms: previous?.active_slot_time_ms || 0,
388
+ wall_time_ms: previous?.wall_time_ms || 0
265
389
  };
266
390
  }
267
391
  async function writeAll(root, currentState, slots, queue, active, event, onSchedulerEvent) {
@@ -289,6 +413,13 @@ async function writeAll(root, currentState, slots, queue, active, event, onSched
289
413
  currentState.blocked = nextState.blocked;
290
414
  currentState.pending_queue_drained = nextState.pending_queue_drained;
291
415
  currentState.all_slots_closed_after_drain = nextState.all_slots_closed_after_drain;
416
+ currentState.batch_dispatch_count = nextState.batch_dispatch_count;
417
+ currentState.largest_batch_size = nextState.largest_batch_size;
418
+ currentState.first_batch_launch_span_ms = nextState.first_batch_launch_span_ms;
419
+ currentState.average_batch_launch_span_ms = nextState.average_batch_launch_span_ms;
420
+ currentState.scheduler_utilization = nextState.scheduler_utilization;
421
+ currentState.active_slot_time_ms = nextState.active_slot_time_ms;
422
+ currentState.wall_time_ms = nextState.wall_time_ms;
292
423
  await writeAgentWorkQueue(root, queue);
293
424
  await writeAgentWorkerSlots(root, slots);
294
425
  await writeJsonAtomic(path.join(root, 'agent-scheduler-state.json'), currentState);
@@ -13,7 +13,7 @@ export const DEFAULT_AGENT_CONCURRENCY = 5;
13
13
  // ceiling to up to 100 concurrent clone sessions. Only the naruto path opts into this
14
14
  // cap; every other roster/scheduler caller keeps MAX_AGENT_COUNT as the default.
15
15
  export const MAX_NARUTO_AGENT_COUNT = 100;
16
- export const DEFAULT_NARUTO_CLONES = 12;
16
+ export const DEFAULT_NARUTO_CLONES = 32;
17
17
  export const AGENT_BACKENDS = ['fake', 'process', 'codex-sdk', 'zellij', 'ollama', 'local-llm'];
18
18
  export function normalizeAgentBackend(input) {
19
19
  const value = String(input || 'codex-sdk');
@@ -8,8 +8,9 @@ import { closeWorkerPane, openWorkerPane } from '../zellij/zellij-worker-pane-ma
8
8
  import { closeWorkerInRightColumn, recordHeadlessWorkerInRightColumn } from '../zellij/zellij-right-column-manager.js';
9
9
  import { resolveProviderContext } from '../provider/provider-context.js';
10
10
  import { buildZellijSlotPaneCommand } from '../zellij/zellij-slot-pane-renderer.js';
11
- import { resolveZellijUiMode } from '../zellij/zellij-ui-mode.js';
11
+ import { resolveZellijWorkerPaneUiMode } from '../zellij/zellij-ui-mode.js';
12
12
  import { appendZellijSlotTelemetry } from '../zellij/zellij-slot-telemetry.js';
13
+ import { appendParallelRuntimeEvent } from './parallel-runtime-proof.js';
13
14
  export const NATIVE_CLI_SESSION_SWARM_SCHEMA = 'sks.agent-native-cli-session-swarm.v1';
14
15
  export function createNativeCliSessionSwarmRecorder(root, input) {
15
16
  return new NativeCliSessionSwarmRecorder(root, input);
@@ -175,6 +176,16 @@ class NativeCliSessionSwarmRecorder {
175
176
  record.pid = child.pid || null;
176
177
  record.process_id = child.pid || null;
177
178
  record.status = 'running';
179
+ await appendParallelRuntimeEvent(this.root, this.input.missionId, {
180
+ event_type: 'worker_process_spawned',
181
+ slot_id: ctx.agent.slot_id || ctx.agent.id || null,
182
+ generation_index: ctx.agent.generation_index || null,
183
+ session_id: ctx.agent.session_id || null,
184
+ pid: child.pid || null,
185
+ backend: this.input.backend,
186
+ placement: record.worker_placement === 'headless' ? 'headless' : 'process',
187
+ worktree_id: worktree?.id || null
188
+ }).catch(() => undefined);
178
189
  await this.telemetry(ctx, {
179
190
  eventType: 'worker_spawned',
180
191
  status: 'launching',
@@ -267,7 +278,8 @@ class NativeCliSessionSwarmRecorder {
267
278
  route: this.input.route,
268
279
  serviceTier: this.input.fastModePolicy.service_tier
269
280
  });
270
- const uiMode = resolveZellijUiMode(Array.isArray(input.ctx.opts.args) ? input.ctx.opts.args : [], process.env);
281
+ const uiMode = resolveZellijWorkerPaneUiMode(Array.isArray(input.ctx.opts.args) ? input.ctx.opts.args : [], process.env);
282
+ const liveWorkerPane = uiMode !== 'compact-slots';
271
283
  const workerEnv = {
272
284
  ...(input.ctx.opts.env || {}),
273
285
  ...fastModeEnv(this.input.fastModePolicy),
@@ -289,7 +301,7 @@ class NativeCliSessionSwarmRecorder {
289
301
  artifacts: [path.join(input.workerDirRel, 'worker-intake.json'), input.heartbeatRel, input.resultRel],
290
302
  logTail: `zellij=${sessionName}`
291
303
  });
292
- const workerCommand = uiMode === 'full-debug'
304
+ const workerCommand = liveWorkerPane
293
305
  ? buildPaneWorkerCommand({
294
306
  args: input.args,
295
307
  stdoutPath: path.join(this.root, input.stdoutRel),
@@ -321,6 +333,30 @@ class NativeCliSessionSwarmRecorder {
321
333
  mode: uiMode,
322
334
  watch: true
323
335
  });
336
+ const processRun = liveWorkerPane
337
+ ? null
338
+ : await this.spawnCompactSlotWorkerProcess({
339
+ args: input.args,
340
+ cwd: workerCwd,
341
+ env: workerEnv,
342
+ stdoutRel: input.stdoutRel,
343
+ stderrRel: input.stderrRel
344
+ });
345
+ if (processRun?.pid) {
346
+ input.record.pid = processRun.pid;
347
+ input.record.process_id = processRun.pid;
348
+ await appendParallelRuntimeEvent(this.root, this.input.missionId, {
349
+ event_type: 'worker_process_spawned',
350
+ slot_id: slotId,
351
+ generation_index: Number(input.ctx.agent.generation_index || 1),
352
+ session_id: input.ctx.agent.session_id || null,
353
+ pid: processRun.pid,
354
+ backend: this.input.backend,
355
+ placement: 'zellij-pane',
356
+ worktree_id: worktree?.id || null
357
+ }).catch(() => undefined);
358
+ await this.record(input.record);
359
+ }
324
360
  let paneRecord;
325
361
  try {
326
362
  paneRecord = await openWorkerPane({
@@ -367,8 +403,10 @@ class NativeCliSessionSwarmRecorder {
367
403
  if (input.zellijReservation)
368
404
  this.releaseVisibleZellijReservation(input.zellijReservation);
369
405
  }
370
- const launchBlockers = paneRecord.blockers || [];
371
- input.record.command_line = ['zellij', '--session', sessionName, 'action', 'new-pane', '--direction', paneRecord.direction_applied, '--name', paneRecord.pane_name, '--', 'sh', '-lc', uiMode === 'full-debug' ? '<native-cli-worker-command>' : '<zellij-slot-pane-renderer-command>'];
406
+ const zellijRequired = process.env.SKS_REQUIRE_ZELLIJ === '1';
407
+ const launchBlockers = zellijRequired ? paneRecord.blockers || [] : [];
408
+ const launchWarnings = zellijRequired ? [] : paneRecord.blockers || [];
409
+ input.record.command_line = ['zellij', '--session', sessionName, 'action', 'new-pane', '--direction', paneRecord.direction_applied, '--name', paneRecord.pane_name, '--', 'sh', '-lc', liveWorkerPane ? '<native-cli-worker-command>' : '<zellij-slot-pane-renderer-command>'];
372
410
  input.record.zellij_session_name = sessionName;
373
411
  input.record.zellij_pane_id = paneRecord.pane_id || null;
374
412
  input.record.zellij_pane_id_source = paneRecord.pane_id_source;
@@ -382,9 +420,10 @@ class NativeCliSessionSwarmRecorder {
382
420
  input.record.provider_context = paneRecord.provider_context;
383
421
  input.record.worktree = worktree;
384
422
  input.record.zellij_ui_mode = uiMode;
385
- input.record.slot_visualization = uiMode === 'full-debug' ? 'worker-command-pane' : 'zellij-slot-pane-renderer';
423
+ input.record.slot_visualization = liveWorkerPane ? 'worker-command-pane' : 'zellij-slot-pane-renderer';
386
424
  input.record.status = launchBlockers.length ? 'failed' : 'running';
387
425
  input.record.blockers = launchBlockers;
426
+ input.record.warnings = [...(input.record.warnings || []), ...launchWarnings];
388
427
  await this.telemetry(input.ctx, {
389
428
  eventType: 'worker_spawned',
390
429
  status: launchBlockers.length ? 'failed' : 'launching',
@@ -422,27 +461,18 @@ class NativeCliSessionSwarmRecorder {
422
461
  goal_mode_ref: input.ctx.agent.goal_mode_ref || null
423
462
  });
424
463
  }
425
- const processRun = uiMode === 'full-debug'
426
- ? null
427
- : await this.spawnCompactSlotWorkerProcess({
428
- args: input.args,
429
- cwd: workerCwd,
430
- env: workerEnv,
431
- stdoutRel: input.stdoutRel,
432
- stderrRel: input.stderrRel
464
+ const heartbeatSeen = await waitForWorkerHeartbeat(path.join(this.root, input.heartbeatRel), Number(process.env.SKS_ZELLIJ_WORKER_HEARTBEAT_TIMEOUT_MS || 5000));
465
+ if (heartbeatSeen) {
466
+ await this.telemetry(input.ctx, {
467
+ eventType: 'heartbeat',
468
+ status: 'running',
469
+ artifacts: [input.heartbeatRel],
470
+ logTail: await tailFile(path.join(this.root, input.heartbeatRel), 600)
433
471
  });
434
- if (processRun?.pid) {
435
- input.record.pid = processRun.pid;
436
- input.record.process_id = processRun.pid;
437
- await this.record(input.record);
438
472
  }
439
- await waitForWorkerHeartbeat(path.join(this.root, input.heartbeatRel), Number(process.env.SKS_ZELLIJ_WORKER_HEARTBEAT_TIMEOUT_MS || 30000));
440
- await this.telemetry(input.ctx, {
441
- eventType: 'heartbeat',
442
- status: 'running',
443
- artifacts: [input.heartbeatRel],
444
- logTail: await tailFile(path.join(this.root, input.heartbeatRel), 600)
445
- });
473
+ else {
474
+ input.record.warnings = [...(input.record.warnings || []), 'zellij_worker_heartbeat_missing_launch_warning'];
475
+ }
446
476
  await appendJsonl(path.join(this.root, input.workerDirRel, 'zellij-worker-pane-events.jsonl'), {
447
477
  schema: 'sks.zellij-worker-pane-event.v1',
448
478
  ts: nowIso(),
@@ -505,8 +535,10 @@ class NativeCliSessionSwarmRecorder {
505
535
  const heartbeatOk = await hasHeartbeat(path.join(this.root, input.heartbeatRel));
506
536
  input.record.blockers = [
507
537
  ...(parsed ? parsed.blockers || [] : ['zellij_worker_result_timeout']),
508
- ...(heartbeatOk ? [] : ['zellij_worker_heartbeat_missing'])
538
+ ...(heartbeatOk ? [] : [])
509
539
  ];
540
+ if (!heartbeatOk)
541
+ input.record.warnings = [...(input.record.warnings || []), 'zellij_worker_heartbeat_missing'];
510
542
  paneRecord = await closeWorkerPane({
511
543
  root: this.root,
512
544
  paneRecord,
@@ -593,6 +625,24 @@ class NativeCliSessionSwarmRecorder {
593
625
  log_tail: input.logTail || '',
594
626
  blockers: input.blockers || []
595
627
  }).catch(() => undefined);
628
+ const parallelEvent = mapTelemetryToParallelEvent(input.eventType);
629
+ if (parallelEvent) {
630
+ await appendParallelRuntimeEvent(this.root, this.input.missionId, {
631
+ event_type: parallelEvent,
632
+ slot_id: String(ctx.agent?.slot_id || ctx.agent?.id || 'slot-001'),
633
+ generation_index: Number(ctx.agent?.generation_index || 1),
634
+ session_id: ctx.agent?.session_id == null ? null : String(ctx.agent.session_id),
635
+ pid: null,
636
+ backend: this.input.backend,
637
+ placement: normalizeParallelPlacement(ctx.opts?.workerPlacement || this.input.workerPlacement || (input.status === 'headless' ? 'headless' : 'unknown')),
638
+ worktree_id: ctx.agent?.worktree?.id || ctx.slice?.worktree?.id || null,
639
+ meta: {
640
+ status: input.status,
641
+ artifacts: input.artifacts || [],
642
+ blockers: input.blockers || []
643
+ }
644
+ }).catch(() => undefined);
645
+ }
596
646
  }
597
647
  async persist() {
598
648
  this.writeLock = this.writeLock.catch(() => undefined).then(async () => {
@@ -692,7 +742,10 @@ export function buildPaneWorkerCommand(input) {
692
742
  const holdMs = Math.max(0, Number(process.env.SKS_ZELLIJ_WORKER_PANE_HOLD_MS || 1500));
693
743
  const hold = holdMs > 0 ? `sleep ${shellQuote(String(Math.min(30, holdMs / 1000)))}` : ':';
694
744
  const header = input.header ? `printf '%s\\n' ${shellQuote(input.header)} | tee -a ${shellQuote(input.stdoutPath)};` : '';
695
- return `${envPrefix.join(' ')} ${header} ${command} >> ${shellQuote(input.stdoutPath)} 2>> ${shellQuote(input.stderrPath)}; code=$?; ${heartbeat}; ${hold}; exit $code`.trim();
745
+ const exitPath = `${input.heartbeatPath}.exit`;
746
+ const visibleCommand = `(${command}; printf '%s' "$?" > ${shellQuote(exitPath)}) 2>&1 | tee -a ${shellQuote(input.stdoutPath)}`;
747
+ const readExit = `code=$(cat ${shellQuote(exitPath)} 2>/dev/null || printf '1'); rm -f ${shellQuote(exitPath)}`;
748
+ return `${envPrefix.join(' ')} ${header} ${visibleCommand}; ${readExit}; ${heartbeat}; ${hold}; exit $code`.trim();
696
749
  }
697
750
  function buildPaneWorkerHeader(input) {
698
751
  return [
@@ -765,6 +818,23 @@ function firstString(values) {
765
818
  }
766
819
  return null;
767
820
  }
821
+ function mapTelemetryToParallelEvent(eventType) {
822
+ if (eventType === 'slot_reserved')
823
+ return 'slot_reserved';
824
+ if (eventType === 'heartbeat')
825
+ return 'worker_heartbeat_seen';
826
+ if (eventType === 'worker_completed')
827
+ return 'worker_completed';
828
+ if (eventType === 'worker_failed')
829
+ return 'worker_failed';
830
+ return null;
831
+ }
832
+ function normalizeParallelPlacement(value) {
833
+ const text = String(value || '');
834
+ if (text === 'zellij-pane' || text === 'process' || text === 'headless')
835
+ return text;
836
+ return 'unknown';
837
+ }
768
838
  async function tailFile(file, max) {
769
839
  try {
770
840
  const text = await fs.promises.readFile(file, 'utf8');