sneakoscope 2.0.16 → 2.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/.sks-build-stamp.json +4 -4
- package/dist/bin/sks.js +1 -1
- package/dist/cli/command-registry.js +1 -1
- package/dist/commands/proof.js +21 -0
- package/dist/commands/zellij-slot-pane.js +7 -1
- package/dist/core/agents/agent-orchestrator.js +3 -1
- package/dist/core/agents/agent-scheduler.js +14 -1
- package/dist/core/agents/native-cli-session-swarm.js +11 -7
- package/dist/core/agents/native-cli-worker.js +56 -7
- package/dist/core/agents/parallel-runtime-proof.js +68 -9
- package/dist/core/agents/runtime-proof-summary.js +75 -0
- package/dist/core/commands/naruto-command.js +17 -3
- package/dist/core/commands/team-command.js +6 -311
- package/dist/core/commands/team-legacy-observe-command.js +182 -0
- package/dist/core/db-safety.js +15 -0
- package/dist/core/feature-registry.js +4 -2
- package/dist/core/fsx.js +1 -1
- package/dist/core/hooks-runtime.js +41 -4
- package/dist/core/init.js +1 -0
- package/dist/core/mad-db/mad-db-capability.js +9 -1
- package/dist/core/mad-db/mad-db-result-lifecycle.js +136 -0
- package/dist/core/release/release-gate-affected-selector.js +47 -5
- package/dist/core/release/release-gate-dag.js +5 -1
- package/dist/core/release/release-gate-scheduler.js +2 -1
- package/dist/core/routes.js +3 -1
- package/dist/core/version.js +1 -1
- package/dist/core/zellij/zellij-slot-pane-renderer.js +74 -1
- package/dist/core/zellij/zellij-slot-telemetry.js +29 -6
- package/dist/core/zellij/zellij-ui-mode.js +12 -2
- package/dist/scripts/prepublish-release-check-or-fast.js +3 -3
- package/dist/scripts/release-speed-summary.js +22 -2
- package/package.json +14 -3
- package/schemas/agents/parallel-runtime-proof.schema.json +31 -0
package/README.md
CHANGED
|
@@ -16,7 +16,7 @@ Set up this agent project with Sneakoscope Codex. Use [[mandarange/Sneakoscope-C
|
|
|
16
16
|
|
|
17
17
|
## Current Release
|
|
18
18
|
|
|
19
|
-
SKS **2.0.
|
|
19
|
+
SKS **2.0.17** is the micro-hardening release for strict production PID proof, true scheduler active-time utilization, live Zellij slot telemetry freshness, Mad-DB result lifecycle audit, and unified runtime/release proof summaries.
|
|
20
20
|
|
|
21
21
|
What changed:
|
|
22
22
|
|
|
@@ -613,7 +613,9 @@ SKS_HERMES=1 sks status --json
|
|
|
613
613
|
|
|
614
614
|
Use these inside Codex App or another agent prompt. They are prompt commands, not terminal commands.
|
|
615
615
|
|
|
616
|
-
Common prompts: `$Team`, `$From-Chat-IMG`, `$with-local-llm-on`, `$with-local-llm-off`, `$DFix`, `$Answer`, `$SKS`, `$QA-LOOP`, `$PPT`, `$Computer-Use`/`$CU`, `$Goal`, `$Research`, `$AutoResearch`, `$DB`, `$MAD-SKS`, `$GX`, `$Wiki`, and `$Help`.
|
|
616
|
+
Common prompts: `$Team`, `$From-Chat-IMG`, `$with-local-llm-on`, `$with-local-llm-off`, `$DFix`, `$Answer`, `$SKS`, `$QA-LOOP`, `$PPT`, `$Computer-Use`/`$CU`, `$Goal`, `$Research`, `$AutoResearch`, `$DB`, `$MAD-SKS`, `$MAD-DB`, `$GX`, `$Wiki`, and `$Help`.
|
|
617
|
+
|
|
618
|
+
`$MAD-DB` is the prompt-visible Mad-DB alias for one-cycle DB break-glass work. It maps to the same guarded MAD-SKS permission route, while the terminal lifecycle remains `sks mad-db status|enable|revoke`; it is not a permanent DB unlock and catastrophic DB safeguards remain active.
|
|
617
619
|
|
|
618
620
|
## Common Workflows
|
|
619
621
|
|
|
@@ -759,7 +761,7 @@ npm run release:check
|
|
|
759
761
|
npm run publish:dry
|
|
760
762
|
```
|
|
761
763
|
|
|
762
|
-
`release:check` runs the change-aware affected release gate for ordinary local checks. Publish readiness uses `release:check:full`, which runs the full release DAG and writes a source digest stamp under `.sneakoscope/reports/` so publish commands can verify the same source/dist state. The DAG preserves the 1.18 baseline gates and adds Codex 0.136 compatibility, inherited Codex 0.135/0.134 runner truth, patch swarm runtime truth, transaction journaling, serial conflict rebase, strict strategy-to-patch proof, rollback command proof, Native CLI Session Swarm 5/10/20-process proof, Real Worker Backend Router proof, Codex child overlap proof, model-authored patch-envelope separation, Zellij layout/pane/screen/socket-dir proof, no-subagent-scaling proof, Fast mode default/worker/Codex/MAD propagation proof, Appshots attachment provenance, MCP runtime overlap evidence, task graph expansion, schema-bound follow-up work, actual Agent/Team/Research/QA route blackboxes, scheduler proof hardening, Source Intelligence propagation, Goal mode propagation checks, slot telemetry, update notice, MAD-DB, and Naruto SSOT gates. Broader live gates remain explicit scripts such as `release:real-check`; real Codex patch smoke, real Codex parallel worker proof, and real Zellij proof are optional unless their `SKS_REQUIRE_REAL_*` or `SKS_REQUIRE_ZELLIJ=1` environment variables are set. Generate the human-readable registry with `sks features inventory --write-docs`. Plain `npm publish` uses the `latest` dist-tag.
|
|
764
|
+
`release:check` runs the change-aware affected release gate for ordinary local checks. Publish readiness uses `release:check:full`, which runs the full release DAG and writes a source digest stamp under `.sneakoscope/reports/` so publish commands can verify the same source/dist state. The DAG preserves the 1.18 baseline gates and adds Codex 0.136 compatibility, inherited Codex 0.135/0.134 runner truth, patch swarm runtime truth, transaction journaling, serial conflict rebase, strict strategy-to-patch proof, rollback command proof, Native CLI Session Swarm 5/10/20-process proof, Real Worker Backend Router proof, Codex child overlap proof, model-authored patch-envelope separation, Zellij layout/pane/screen/socket-dir proof, no-subagent-scaling proof, Fast mode default/worker/Codex/MAD propagation proof, Appshots attachment provenance, MCP runtime overlap evidence, task graph expansion, schema-bound follow-up work, actual Agent/Team/Research/QA route blackboxes, scheduler proof hardening, Source Intelligence propagation, Goal mode propagation checks, slot telemetry, update notice, MAD-DB, and Naruto SSOT gates. Broader live gates remain explicit scripts such as `release:real-check`; real Codex patch smoke, real Codex parallel worker proof, and real Zellij proof are optional unless their `SKS_REQUIRE_REAL_*` or `SKS_REQUIRE_ZELLIJ=1` environment variables are set. Generate the human-readable registry with `sks features inventory --write-docs`. Plain `npm publish` uses the `latest` dist-tag. `npm run publish:dry` runs `release:check:full`, verifies the fresh stamp, and then performs provenance/registry and npm dry-run checks. npm's `prepublishOnly` uses `prepublish-release-check-or-fast` to accept that current stamp before the real publish; if the stamp is missing or stale, it runs `release:check:full` once before continuing.
|
|
763
765
|
|
|
764
766
|
Version bumps are manual. Run `sks versioning bump` only when preparing release metadata; SKS will not create `.git/hooks/pre-commit` or auto-bump during ordinary commits.
|
|
765
767
|
|
|
@@ -4,7 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom};
|
|
|
4
4
|
fn main() {
|
|
5
5
|
let mut args = std::env::args().skip(1);
|
|
6
6
|
match args.next().as_deref() {
|
|
7
|
-
Some("--version") => println!("sks-rs 2.0.
|
|
7
|
+
Some("--version") => println!("sks-rs 2.0.17"),
|
|
8
8
|
Some("compact-info") => {
|
|
9
9
|
let mut input = String::new();
|
|
10
10
|
let _ = io::stdin().read_to_string(&mut input);
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema": "sks.dist-build-stamp.v1",
|
|
3
3
|
"package_name": "sneakoscope",
|
|
4
|
-
"package_version": "2.0.
|
|
5
|
-
"source_digest": "
|
|
6
|
-
"source_file_count":
|
|
7
|
-
"built_at_source_time":
|
|
4
|
+
"package_version": "2.0.17",
|
|
5
|
+
"source_digest": "1c6ef84350a97acdfd592ae544ec9054e2ee0a997076e725bc99b771c4693cf2",
|
|
6
|
+
"source_file_count": 2201,
|
|
7
|
+
"built_at_source_time": 1780974441836
|
|
8
8
|
}
|
package/dist/bin/sks.js
CHANGED
|
@@ -119,7 +119,7 @@ export const COMMANDS = {
|
|
|
119
119
|
commit: entry('stable', 'Create a simple git commit', 'dist/commands/commit.js', directCommand(() => import('../commands/commit.js'), 'dist/commands/commit.js')),
|
|
120
120
|
'commit-and-push': entry('stable', 'Create a simple git commit and push', 'dist/commands/commit-and-push.js', directCommand(() => import('../commands/commit-and-push.js'), 'dist/commands/commit-and-push.js')),
|
|
121
121
|
dfix: entry('stable', 'Run DFix diagnose/plan/patch/verify loop', 'dist/core/commands/dfix-command.js', commandArgsCommand(() => import('../core/commands/dfix-command.js'), 'dfixCommand', 'dist/core/commands/dfix-command.js')),
|
|
122
|
-
team: entry('beta', 'Deprecated
|
|
122
|
+
team: entry('beta', 'Deprecated alias. New execution redirects to Naruto; legacy observe/watch remains.', 'dist/core/commands/team-command.js', argsCommand(() => import('../core/commands/team-command.js'), 'team', 'dist/core/commands/team-command.js')),
|
|
123
123
|
agent: entry('beta', 'Run native multi-session agent missions', 'dist/core/commands/agent-command.js', argsCommand(() => import('../core/commands/agent-command.js'), 'agentCommand', 'dist/core/commands/agent-command.js')),
|
|
124
124
|
'with-local-llm': entry('beta', 'Enable or inspect local Ollama worker backend', 'dist/core/commands/local-model-command.js', argsCommand(() => import('../core/commands/local-model-command.js'), 'localModelCommand', 'dist/core/commands/local-model-command.js')),
|
|
125
125
|
naruto: entry('labs', 'Run $Naruto shadow-clone swarm (up to 100 parallel sessions)', 'dist/core/commands/naruto-command.js', argsCommand(() => import('../core/commands/naruto-command.js'), 'narutoCommand', 'dist/core/commands/naruto-command.js')),
|
package/dist/commands/proof.js
CHANGED
|
@@ -8,10 +8,20 @@ import { writeRouteCompletionProof } from '../core/proof/route-adapter.js';
|
|
|
8
8
|
import { finalizeRouteWithProof } from '../core/proof/route-finalizer.js';
|
|
9
9
|
import { renderProofMarkdown, writeCompletionProof } from '../core/proof/proof-writer.js';
|
|
10
10
|
import { validateCompletionProof } from '../core/proof/validation.js';
|
|
11
|
+
import { buildRuntimeProofSummary, renderRuntimeProofSummary } from '../core/agents/runtime-proof-summary.js';
|
|
11
12
|
export async function run(_command, args = []) {
|
|
12
13
|
const root = await projectRoot();
|
|
13
14
|
const action = args[0] || 'show';
|
|
14
15
|
const rest = args.slice(1);
|
|
16
|
+
if (action === 'latest' && !flag(args, '--completion')) {
|
|
17
|
+
const runtime = await tryRuntimeProofSummary(root);
|
|
18
|
+
if (runtime) {
|
|
19
|
+
if (flag(args, '--json'))
|
|
20
|
+
return printJson(runtime);
|
|
21
|
+
console.log(renderRuntimeProofSummary(runtime));
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
15
25
|
if (action === 'show' || action === 'latest') {
|
|
16
26
|
const proof = await withFreshSummaries(root, await readLatestProof(root));
|
|
17
27
|
if (flag(args, '--json') || action === 'latest')
|
|
@@ -122,6 +132,17 @@ export async function run(_command, args = []) {
|
|
|
122
132
|
console.error('Usage: sks proof show|latest|validate|route <mission-id|latest>|finalize <mission-id|latest> [--route route] [--strict] [--mock] [--json]|export --md|repair latest|smoke [--json]');
|
|
123
133
|
process.exitCode = 1;
|
|
124
134
|
}
|
|
135
|
+
async function tryRuntimeProofSummary(root) {
|
|
136
|
+
try {
|
|
137
|
+
const summary = await buildRuntimeProofSummary(root, 'latest');
|
|
138
|
+
if (summary.blockers.includes('parallel_runtime_proof_missing') && summary.blockers.includes('agent_scheduler_state_missing'))
|
|
139
|
+
return null;
|
|
140
|
+
return summary;
|
|
141
|
+
}
|
|
142
|
+
catch {
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
125
146
|
async function withFreshSummaries(root, proof) {
|
|
126
147
|
const evidence = await collectProofEvidence(root);
|
|
127
148
|
return {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { renderZellijSlotPaneFromArtifacts } from '../core/zellij/zellij-slot-pane-renderer.js';
|
|
1
|
+
import { renderZellijSlotPaneFromArtifacts, renderZellijSlotPaneStatusFromArtifacts } from '../core/zellij/zellij-slot-pane-renderer.js';
|
|
2
2
|
export async function run(_command = 'zellij-slot-pane', args = []) {
|
|
3
3
|
const artifactDir = readOption(args, '--artifact-dir', process.cwd()) || process.cwd();
|
|
4
4
|
const artifactRoot = readOption(args, '--artifact-root', artifactDir) || artifactDir;
|
|
@@ -9,7 +9,13 @@ export async function run(_command = 'zellij-slot-pane', args = []) {
|
|
|
9
9
|
const role = readOption(args, '--role', null);
|
|
10
10
|
const mode = readOption(args, '--mode', 'compact-slots');
|
|
11
11
|
const watch = hasFlag(args, '--watch');
|
|
12
|
+
const json = hasFlag(args, '--json');
|
|
12
13
|
const intervalMs = Math.max(250, Number(readOption(args, '--interval-ms', '1000') || 1000));
|
|
14
|
+
if (json) {
|
|
15
|
+
const status = await renderZellijSlotPaneStatusFromArtifacts({ artifactDir, artifactRoot, missionId, slotId, generationIndex });
|
|
16
|
+
console.log(JSON.stringify(status, null, 2));
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
13
19
|
for (;;) {
|
|
14
20
|
const text = await renderZellijSlotPaneFromArtifacts({ artifactDir, artifactRoot, missionId, slotId, generationIndex, backend, role, mode });
|
|
15
21
|
process.stdout.write('\x1Bc' + text + '\n');
|
|
@@ -476,7 +476,9 @@ export async function runNativeAgentOrchestrator(opts = {}) {
|
|
|
476
476
|
targetActiveSlots,
|
|
477
477
|
visiblePanes: visualLaneCount,
|
|
478
478
|
expectedWorkerRuntimeMs: targetActiveSlots >= 10 ? 8000 : targetActiveSlots >= 2 ? 2000 : 25,
|
|
479
|
-
minActiveWorkers: Math.min(targetActiveSlots, desiredWorkItemCount)
|
|
479
|
+
minActiveWorkers: Math.min(targetActiveSlots, desiredWorkItemCount),
|
|
480
|
+
proofMode: opts.mock === true ? 'mock-process' : 'production',
|
|
481
|
+
requireWorkerPids: opts.nativeCliSwarm !== false && targetActiveSlots >= 16
|
|
480
482
|
});
|
|
481
483
|
const results = scheduler.results;
|
|
482
484
|
const nativeCliSessionProof = await writeNativeCliSessionProof(ledgerRoot, {
|
|
@@ -21,6 +21,8 @@ export async function runAgentScheduler(input) {
|
|
|
21
21
|
const active = new Map();
|
|
22
22
|
const results = [];
|
|
23
23
|
const schedulerStartedAt = Date.now();
|
|
24
|
+
let lastUtilizationUpdateMs = schedulerStartedAt;
|
|
25
|
+
let activeSlotTimeMs = 0;
|
|
24
26
|
let batchCounter = 0;
|
|
25
27
|
let batchLaunchSpanTotalMs = 0;
|
|
26
28
|
let batchDispatchInProgress = false;
|
|
@@ -43,6 +45,7 @@ export async function runAgentScheduler(input) {
|
|
|
43
45
|
if (!entry)
|
|
44
46
|
continue;
|
|
45
47
|
const activeCountBeforeClose = active.size;
|
|
48
|
+
accumulateActiveSlotTime();
|
|
46
49
|
active.delete(settled.session_id);
|
|
47
50
|
const resultStatus = settled.result?.status === 'done' ? 'completed' : settled.result?.status === 'blocked' ? 'blocked' : 'failed';
|
|
48
51
|
completeWorkItem(queue, entry.work_item_id, settled.session_id, resultStatus, settled.error || null);
|
|
@@ -70,6 +73,7 @@ export async function runAgentScheduler(input) {
|
|
|
70
73
|
const pendingAfterClose = pendingWorkItems(queue).length;
|
|
71
74
|
if (pendingAfterClose > 0)
|
|
72
75
|
state.expected_backfill_count += 1;
|
|
76
|
+
updateUtilizationMetrics();
|
|
73
77
|
await writeAll(input.root, state, slots, queue, active, {
|
|
74
78
|
event_type: 'session_completed',
|
|
75
79
|
session_id: settled.session_id,
|
|
@@ -85,6 +89,7 @@ export async function runAgentScheduler(input) {
|
|
|
85
89
|
closed_at_ms: Date.now()
|
|
86
90
|
} : null);
|
|
87
91
|
}
|
|
92
|
+
updateUtilizationMetrics();
|
|
88
93
|
state.status = 'draining';
|
|
89
94
|
await writeAll(input.root, state, slots, queue, active, { event_type: 'scheduler_draining' }, input.onSchedulerEvent);
|
|
90
95
|
slots = closeWorkerSlotsAfterDrain(slots);
|
|
@@ -204,6 +209,7 @@ export async function runAgentScheduler(input) {
|
|
|
204
209
|
error: err instanceof Error ? err.message : String(err),
|
|
205
210
|
terminal_close_report_path: path.join(generation.artifact_dir, 'agent-terminal-close-report.json')
|
|
206
211
|
}));
|
|
212
|
+
accumulateActiveSlotTime();
|
|
207
213
|
active.set(generation.session_id, { slot_id: slot.slot_id, work_item_id: workItem.id, session_id: generation.session_id, promise });
|
|
208
214
|
}
|
|
209
215
|
await appendAgentWorkQueueEvent(input.root, 'batch_work_items_dispatched', {
|
|
@@ -317,11 +323,18 @@ export async function runAgentScheduler(input) {
|
|
|
317
323
|
return launches;
|
|
318
324
|
}
|
|
319
325
|
function updateUtilizationMetrics() {
|
|
326
|
+
accumulateActiveSlotTime();
|
|
320
327
|
state.wall_time_ms = Math.max(0, Date.now() - schedulerStartedAt);
|
|
321
|
-
state.active_slot_time_ms =
|
|
328
|
+
state.active_slot_time_ms = activeSlotTimeMs;
|
|
322
329
|
const denominator = Math.max(1, state.wall_time_ms * targetActiveSlots);
|
|
323
330
|
state.scheduler_utilization = Number(Math.min(1, state.active_slot_time_ms / denominator).toFixed(3));
|
|
324
331
|
}
|
|
332
|
+
function accumulateActiveSlotTime() {
|
|
333
|
+
const now = Date.now();
|
|
334
|
+
const delta = Math.max(0, now - lastUtilizationUpdateMs);
|
|
335
|
+
activeSlotTimeMs += active.size * delta;
|
|
336
|
+
lastUtilizationUpdateMs = now;
|
|
337
|
+
}
|
|
325
338
|
}
|
|
326
339
|
export function normalizeTargetActiveSlots(value, maxActiveSlots = MAX_AGENT_COUNT) {
|
|
327
340
|
const cap = Number.isFinite(Number(maxActiveSlots)) && Number(maxActiveSlots) >= 1 ? Math.floor(Number(maxActiveSlots)) : MAX_AGENT_COUNT;
|
|
@@ -8,7 +8,7 @@ import { closeWorkerPane, openWorkerPane } from '../zellij/zellij-worker-pane-ma
|
|
|
8
8
|
import { closeWorkerInRightColumn, recordHeadlessWorkerInRightColumn } from '../zellij/zellij-right-column-manager.js';
|
|
9
9
|
import { resolveProviderContext } from '../provider/provider-context.js';
|
|
10
10
|
import { buildZellijSlotPaneCommand } from '../zellij/zellij-slot-pane-renderer.js';
|
|
11
|
-
import {
|
|
11
|
+
import { resolveZellijWorkerPaneUiMode } from '../zellij/zellij-ui-mode.js';
|
|
12
12
|
import { appendZellijSlotTelemetry } from '../zellij/zellij-slot-telemetry.js';
|
|
13
13
|
import { appendParallelRuntimeEvent } from './parallel-runtime-proof.js';
|
|
14
14
|
export const NATIVE_CLI_SESSION_SWARM_SCHEMA = 'sks.agent-native-cli-session-swarm.v1';
|
|
@@ -278,7 +278,8 @@ class NativeCliSessionSwarmRecorder {
|
|
|
278
278
|
route: this.input.route,
|
|
279
279
|
serviceTier: this.input.fastModePolicy.service_tier
|
|
280
280
|
});
|
|
281
|
-
const uiMode =
|
|
281
|
+
const uiMode = resolveZellijWorkerPaneUiMode(Array.isArray(input.ctx.opts.args) ? input.ctx.opts.args : [], process.env);
|
|
282
|
+
const liveWorkerPane = uiMode !== 'compact-slots';
|
|
282
283
|
const workerEnv = {
|
|
283
284
|
...(input.ctx.opts.env || {}),
|
|
284
285
|
...fastModeEnv(this.input.fastModePolicy),
|
|
@@ -300,7 +301,7 @@ class NativeCliSessionSwarmRecorder {
|
|
|
300
301
|
artifacts: [path.join(input.workerDirRel, 'worker-intake.json'), input.heartbeatRel, input.resultRel],
|
|
301
302
|
logTail: `zellij=${sessionName}`
|
|
302
303
|
});
|
|
303
|
-
const workerCommand =
|
|
304
|
+
const workerCommand = liveWorkerPane
|
|
304
305
|
? buildPaneWorkerCommand({
|
|
305
306
|
args: input.args,
|
|
306
307
|
stdoutPath: path.join(this.root, input.stdoutRel),
|
|
@@ -332,7 +333,7 @@ class NativeCliSessionSwarmRecorder {
|
|
|
332
333
|
mode: uiMode,
|
|
333
334
|
watch: true
|
|
334
335
|
});
|
|
335
|
-
const processRun =
|
|
336
|
+
const processRun = liveWorkerPane
|
|
336
337
|
? null
|
|
337
338
|
: await this.spawnCompactSlotWorkerProcess({
|
|
338
339
|
args: input.args,
|
|
@@ -405,7 +406,7 @@ class NativeCliSessionSwarmRecorder {
|
|
|
405
406
|
const zellijRequired = process.env.SKS_REQUIRE_ZELLIJ === '1';
|
|
406
407
|
const launchBlockers = zellijRequired ? paneRecord.blockers || [] : [];
|
|
407
408
|
const launchWarnings = zellijRequired ? [] : paneRecord.blockers || [];
|
|
408
|
-
input.record.command_line = ['zellij', '--session', sessionName, 'action', 'new-pane', '--direction', paneRecord.direction_applied, '--name', paneRecord.pane_name, '--', 'sh', '-lc',
|
|
409
|
+
input.record.command_line = ['zellij', '--session', sessionName, 'action', 'new-pane', '--direction', paneRecord.direction_applied, '--name', paneRecord.pane_name, '--', 'sh', '-lc', liveWorkerPane ? '<native-cli-worker-command>' : '<zellij-slot-pane-renderer-command>'];
|
|
409
410
|
input.record.zellij_session_name = sessionName;
|
|
410
411
|
input.record.zellij_pane_id = paneRecord.pane_id || null;
|
|
411
412
|
input.record.zellij_pane_id_source = paneRecord.pane_id_source;
|
|
@@ -419,7 +420,7 @@ class NativeCliSessionSwarmRecorder {
|
|
|
419
420
|
input.record.provider_context = paneRecord.provider_context;
|
|
420
421
|
input.record.worktree = worktree;
|
|
421
422
|
input.record.zellij_ui_mode = uiMode;
|
|
422
|
-
input.record.slot_visualization =
|
|
423
|
+
input.record.slot_visualization = liveWorkerPane ? 'worker-command-pane' : 'zellij-slot-pane-renderer';
|
|
423
424
|
input.record.status = launchBlockers.length ? 'failed' : 'running';
|
|
424
425
|
input.record.blockers = launchBlockers;
|
|
425
426
|
input.record.warnings = [...(input.record.warnings || []), ...launchWarnings];
|
|
@@ -741,7 +742,10 @@ export function buildPaneWorkerCommand(input) {
|
|
|
741
742
|
const holdMs = Math.max(0, Number(process.env.SKS_ZELLIJ_WORKER_PANE_HOLD_MS || 1500));
|
|
742
743
|
const hold = holdMs > 0 ? `sleep ${shellQuote(String(Math.min(30, holdMs / 1000)))}` : ':';
|
|
743
744
|
const header = input.header ? `printf '%s\\n' ${shellQuote(input.header)} | tee -a ${shellQuote(input.stdoutPath)};` : '';
|
|
744
|
-
|
|
745
|
+
const exitPath = `${input.heartbeatPath}.exit`;
|
|
746
|
+
const visibleCommand = `(${command}; printf '%s' "$?" > ${shellQuote(exitPath)}) 2>&1 | tee -a ${shellQuote(input.stdoutPath)}`;
|
|
747
|
+
const readExit = `code=$(cat ${shellQuote(exitPath)} 2>/dev/null || printf '1'); rm -f ${shellQuote(exitPath)}`;
|
|
748
|
+
return `${envPrefix.join(' ')} ${header} ${visibleCommand}; ${readExit}; ${heartbeat}; ${hold}; exit $code`.trim();
|
|
745
749
|
}
|
|
746
750
|
function buildPaneWorkerHeader(input) {
|
|
747
751
|
return [
|
|
@@ -126,18 +126,33 @@ export async function runNativeCliWorker(input = {}) {
|
|
|
126
126
|
});
|
|
127
127
|
await writeJsonAtomic(path.join(workerDir, 'worker-recursion-guard.json'), guard);
|
|
128
128
|
let noPatchReason = null;
|
|
129
|
-
const
|
|
129
|
+
const progressTelemetry = startWorkerProgressTelemetry({
|
|
130
130
|
agentRoot,
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
patchRel,
|
|
131
|
+
heartbeatRel,
|
|
132
|
+
intake,
|
|
134
133
|
agent,
|
|
135
134
|
slice,
|
|
136
|
-
intake: { ...intake, ...input },
|
|
137
135
|
backend,
|
|
138
|
-
|
|
139
|
-
guard
|
|
136
|
+
serviceTier: policy.service_tier
|
|
140
137
|
});
|
|
138
|
+
let routed;
|
|
139
|
+
try {
|
|
140
|
+
routed = await runNativeWorkerBackendRouter({
|
|
141
|
+
agentRoot,
|
|
142
|
+
workerDirRel,
|
|
143
|
+
resultRel,
|
|
144
|
+
patchRel,
|
|
145
|
+
agent,
|
|
146
|
+
slice,
|
|
147
|
+
intake: { ...intake, ...input },
|
|
148
|
+
backend,
|
|
149
|
+
fastModePolicy: policy,
|
|
150
|
+
guard
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
finally {
|
|
154
|
+
progressTelemetry.stop();
|
|
155
|
+
}
|
|
141
156
|
const patchEnvelopes = routed.patchEnvelopes;
|
|
142
157
|
if (patchEnvelopes.length) {
|
|
143
158
|
await writeJsonAtomic(path.resolve(agentRoot, patchRel), {
|
|
@@ -330,6 +345,39 @@ export async function runNativeCliWorker(input = {}) {
|
|
|
330
345
|
function delay(ms) {
|
|
331
346
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
332
347
|
}
|
|
348
|
+
function startWorkerProgressTelemetry(input) {
|
|
349
|
+
const parsed = Number(process.env.SKS_ZELLIJ_WORKER_PROGRESS_MS || 2000);
|
|
350
|
+
const intervalMs = Math.max(500, Number.isFinite(parsed) ? Math.floor(parsed) : 2000);
|
|
351
|
+
let tick = 0;
|
|
352
|
+
const timer = setInterval(() => {
|
|
353
|
+
tick += 1;
|
|
354
|
+
const progress = { done: tick, total: 0, label: 'backend running' };
|
|
355
|
+
appendJsonl(path.resolve(input.agentRoot, input.heartbeatRel), {
|
|
356
|
+
schema: 'sks.native-cli-worker-heartbeat.v1',
|
|
357
|
+
ts: nowIso(),
|
|
358
|
+
event: 'progress',
|
|
359
|
+
pid: process.pid,
|
|
360
|
+
session_id: input.agent.session_id,
|
|
361
|
+
slot_id: input.agent.slot_id || null,
|
|
362
|
+
generation_index: input.agent.generation_index || null,
|
|
363
|
+
progress
|
|
364
|
+
}).catch(() => undefined);
|
|
365
|
+
workerTelemetry(input.agentRoot, input.intake, input.agent, input.slice, {
|
|
366
|
+
eventType: 'task_progress',
|
|
367
|
+
status: 'running',
|
|
368
|
+
backend: input.backend,
|
|
369
|
+
serviceTier: input.serviceTier,
|
|
370
|
+
artifacts: [input.heartbeatRel],
|
|
371
|
+
progress,
|
|
372
|
+
logTail: `backend running ${tick}`
|
|
373
|
+
}).catch(() => undefined);
|
|
374
|
+
}, intervalMs);
|
|
375
|
+
return {
|
|
376
|
+
stop() {
|
|
377
|
+
clearInterval(timer);
|
|
378
|
+
}
|
|
379
|
+
};
|
|
380
|
+
}
|
|
333
381
|
function parseNativeCliWorkerArgs(args) {
|
|
334
382
|
return {
|
|
335
383
|
intake: readOption(args, '--intake', ''),
|
|
@@ -389,6 +437,7 @@ async function workerTelemetry(agentRoot, intake, agent, slice, input) {
|
|
|
389
437
|
worktree_path: agent.worktree?.path || slice.worktree?.path || intake.worktree?.path || null,
|
|
390
438
|
task_title: String(slice.description || slice.title || slice.id || 'worker task'),
|
|
391
439
|
current_file: firstString([slice.write_paths?.[0], slice.readonly_paths?.[0], slice.input_files?.[0]]) || null,
|
|
440
|
+
...(input.progress ? { progress: input.progress } : {}),
|
|
392
441
|
artifact_paths: input.artifacts || [],
|
|
393
442
|
log_tail: input.logTail || '',
|
|
394
443
|
blockers: input.blockers || []
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import { appendJsonlBounded, ensureDir, nowIso, readText, writeJsonAtomic } from '../fsx.js';
|
|
2
|
+
import { appendJsonlBounded, ensureDir, nowIso, readJson, readText, writeJsonAtomic } from '../fsx.js';
|
|
3
3
|
export const PARALLEL_RUNTIME_EVENT_SCHEMA = 'sks.parallel-runtime-event.v1';
|
|
4
4
|
export const PARALLEL_RUNTIME_PROOF_SCHEMA = 'sks.parallel-runtime-proof.v1';
|
|
5
5
|
export function parallelRuntimeEventPath(root, missionId) {
|
|
@@ -92,6 +92,9 @@ export async function buildParallelRuntimeProof(root, missionId, opts = {}) {
|
|
|
92
92
|
}
|
|
93
93
|
const requestedWorkers = positiveInt(opts.requestedWorkers, workerStarts.size || workerPids.size || maxWorkers);
|
|
94
94
|
const targetActiveSlots = positiveInt(opts.targetActiveSlots, requestedWorkers);
|
|
95
|
+
const proofMode = opts.proofMode || 'production';
|
|
96
|
+
const allowMissingPids = proofMode === 'in-process-fixture' && opts.allowMissingPids === true;
|
|
97
|
+
const requireWorkerPids = opts.requireWorkerPids ?? (proofMode === 'production' && requestedWorkers >= 16);
|
|
95
98
|
const wallMs = Math.max(0, lastMs - firstMs);
|
|
96
99
|
const sequentialEstimateMs = workerDurations.length
|
|
97
100
|
? workerDurations.reduce((sum, value) => sum + value, 0)
|
|
@@ -107,17 +110,33 @@ export async function buildParallelRuntimeProof(root, missionId, opts = {}) {
|
|
|
107
110
|
const launchEvents = sorted.filter((event) => event.event_type === 'worker_launch_invoked' || event.event_type === 'worker_process_spawned');
|
|
108
111
|
const launchSpanMs = launchEvents.length ? Math.max(...launchEvents.map((event) => event.ms)) - Math.min(...launchEvents.map((event) => event.ms)) : 0;
|
|
109
112
|
const firstBatchLimit = positiveInt(opts.firstBatchLaunchSpanLimitMs, requestedWorkers >= 16 ? 2500 : 30000);
|
|
110
|
-
const
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
113
|
+
const schedulerState = await readJson(path.join(root, 'agent-scheduler-state.json'), null).catch(() => null);
|
|
114
|
+
const coalescedOverlapWindows = coalesceOverlapWindows(overlapWindows);
|
|
115
|
+
const utilizationProofConsistency = buildUtilizationProofConsistency(schedulerState, {
|
|
116
|
+
proofMaxActive: maxWorkers,
|
|
117
|
+
proofWallMs: wallMs,
|
|
118
|
+
proofActiveSlotTimeMs: activeSlotTimeMsFromWindows(coalescedOverlapWindows)
|
|
119
|
+
});
|
|
120
|
+
const blockers = [];
|
|
121
|
+
if (!sorted.length)
|
|
122
|
+
blockers.push('parallel_runtime_events_missing');
|
|
123
|
+
if (minActiveWorkers > 0 && maxWorkers < minActiveWorkers)
|
|
124
|
+
blockers.push('max_observed_active_workers_below_target');
|
|
125
|
+
if (requireWorkerPids && workerPids.size < minActiveWorkers)
|
|
126
|
+
blockers.push('unique_worker_pids_below_target');
|
|
127
|
+
if (requireWorkerPids && workerPids.size === 0)
|
|
128
|
+
blockers.push('unique_worker_pids_missing_in_production_proof');
|
|
129
|
+
if (speedupRatio < minSpeedup)
|
|
130
|
+
blockers.push('speedup_ratio_below_target');
|
|
131
|
+
if (firstBatchLaunchSpanMs > firstBatchLimit)
|
|
132
|
+
blockers.push('first_batch_launch_span_above_limit');
|
|
117
133
|
return {
|
|
118
134
|
schema: PARALLEL_RUNTIME_PROOF_SCHEMA,
|
|
119
135
|
mission_id: missionId,
|
|
120
136
|
generated_at: nowIso(),
|
|
137
|
+
proof_mode: proofMode,
|
|
138
|
+
require_worker_pids: requireWorkerPids,
|
|
139
|
+
allow_missing_pids: allowMissingPids,
|
|
121
140
|
requested_workers: requestedWorkers,
|
|
122
141
|
target_active_slots: targetActiveSlots,
|
|
123
142
|
max_observed_active_workers: maxWorkers,
|
|
@@ -130,9 +149,10 @@ export async function buildParallelRuntimeProof(root, missionId, opts = {}) {
|
|
|
130
149
|
wall_ms: wallMs,
|
|
131
150
|
sequential_estimate_ms: sequentialEstimateMs,
|
|
132
151
|
speedup_ratio: speedupRatio,
|
|
133
|
-
overlap_windows:
|
|
152
|
+
overlap_windows: coalescedOverlapWindows,
|
|
134
153
|
visible_panes: visiblePanes,
|
|
135
154
|
headless_workers: headlessWorkers,
|
|
155
|
+
utilization_proof_consistency: utilizationProofConsistency,
|
|
136
156
|
passed: blockers.length === 0,
|
|
137
157
|
blockers
|
|
138
158
|
};
|
|
@@ -196,6 +216,45 @@ function nonNegativeInt(value, fallback) {
|
|
|
196
216
|
return Math.max(0, Math.floor(fallback || 0));
|
|
197
217
|
return Math.floor(parsed);
|
|
198
218
|
}
|
|
219
|
+
function buildUtilizationProofConsistency(state, input) {
|
|
220
|
+
if (!state || typeof state !== 'object') {
|
|
221
|
+
return {
|
|
222
|
+
ok: true,
|
|
223
|
+
scheduler_max_active: 0,
|
|
224
|
+
proof_max_active: input.proofMaxActive,
|
|
225
|
+
wall_ms_delta: 0,
|
|
226
|
+
scheduler_active_slot_time_ms: 0,
|
|
227
|
+
proof_active_slot_time_ms: input.proofActiveSlotTimeMs,
|
|
228
|
+
active_slot_time_ms_delta: 0,
|
|
229
|
+
scheduler_observation_delay_tolerance_ms: 0
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
const schedulerMaxActive = nonNegativeInt(state.max_observed_active_slots, 0);
|
|
233
|
+
const schedulerWallMs = nonNegativeInt(state.wall_time_ms, 0);
|
|
234
|
+
const schedulerActiveSlotTimeMs = nonNegativeInt(state.active_slot_time_ms, 0);
|
|
235
|
+
const wallDelta = Math.abs(schedulerWallMs - input.proofWallMs);
|
|
236
|
+
const activeSlotDelta = Math.abs(schedulerActiveSlotTimeMs - input.proofActiveSlotTimeMs);
|
|
237
|
+
const maxActiveDelta = Math.abs(schedulerMaxActive - input.proofMaxActive);
|
|
238
|
+
const wallToleranceMs = Math.max(500, Math.round(Math.max(schedulerWallMs, input.proofWallMs) * 0.25));
|
|
239
|
+
const activeSlotToleranceMs = Math.max(500, Math.round(Math.max(schedulerActiveSlotTimeMs, input.proofActiveSlotTimeMs) * 0.25));
|
|
240
|
+
const observationDelayToleranceMs = Math.max(activeSlotToleranceMs, wallDelta * Math.max(1, schedulerMaxActive));
|
|
241
|
+
const wallConsistent = wallDelta <= wallToleranceMs;
|
|
242
|
+
const activeSlotConsistent = schedulerActiveSlotTimeMs > 0 && input.proofActiveSlotTimeMs > 0 && (activeSlotDelta <= activeSlotToleranceMs
|
|
243
|
+
|| (schedulerActiveSlotTimeMs >= input.proofActiveSlotTimeMs && activeSlotDelta <= observationDelayToleranceMs));
|
|
244
|
+
return {
|
|
245
|
+
ok: maxActiveDelta <= 1 && (wallConsistent || activeSlotConsistent),
|
|
246
|
+
scheduler_max_active: schedulerMaxActive,
|
|
247
|
+
proof_max_active: input.proofMaxActive,
|
|
248
|
+
wall_ms_delta: wallDelta,
|
|
249
|
+
scheduler_active_slot_time_ms: schedulerActiveSlotTimeMs,
|
|
250
|
+
proof_active_slot_time_ms: input.proofActiveSlotTimeMs,
|
|
251
|
+
active_slot_time_ms_delta: activeSlotDelta,
|
|
252
|
+
scheduler_observation_delay_tolerance_ms: observationDelayToleranceMs
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
function activeSlotTimeMsFromWindows(windows) {
|
|
256
|
+
return windows.reduce((sum, window) => sum + Math.max(0, window.end_ms - window.start_ms) * Math.max(0, window.active_workers), 0);
|
|
257
|
+
}
|
|
199
258
|
function coalesceOverlapWindows(windows) {
|
|
200
259
|
return windows
|
|
201
260
|
.filter((window) => window.end_ms > window.start_ms)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { findLatestMission, missionDir } from '../mission.js';
|
|
3
|
+
import { readJson, writeJsonAtomic } from '../fsx.js';
|
|
4
|
+
export const RUNTIME_PROOF_SUMMARY_SCHEMA = 'sks.runtime-proof-summary.v1';
|
|
5
|
+
export async function buildRuntimeProofSummary(root, missionIdInput = 'latest') {
|
|
6
|
+
const missionId = missionIdInput === 'latest' ? await findLatestMission(root) : missionIdInput;
|
|
7
|
+
if (!missionId)
|
|
8
|
+
throw new Error('runtime_proof_summary_mission_missing');
|
|
9
|
+
const dir = missionDir(root, missionId);
|
|
10
|
+
const agentsDir = path.join(dir, 'agents');
|
|
11
|
+
const parallel = await readJson(path.join(agentsDir, 'parallel-runtime-proof.json'), null);
|
|
12
|
+
const scheduler = await readJson(path.join(agentsDir, 'agent-scheduler-state.json'), null);
|
|
13
|
+
const swarm = await readJson(path.join(agentsDir, 'agent-native-cli-session-swarm.json'), null);
|
|
14
|
+
const telemetry = await readJson(path.join(dir, 'zellij', 'slot-telemetry.snapshot.json'), null);
|
|
15
|
+
const governor = await readJson(path.join(agentsDir, 'naruto-concurrency-governor.json'), null);
|
|
16
|
+
const telemetryAgeMs = telemetry?.updated_at ? Math.max(0, Date.now() - Date.parse(telemetry.updated_at)) : Number.MAX_SAFE_INTEGER;
|
|
17
|
+
const visiblePanes = Number(parallel?.visible_panes ?? swarm?.zellij_pane_worker_sessions ?? telemetryVisiblePaneCount(telemetry) ?? 0);
|
|
18
|
+
const targetActive = Number(scheduler?.target_active_slots ?? parallel?.target_active_slots ?? swarm?.target_active_slots ?? governor?.target_active_slots ?? 0);
|
|
19
|
+
const headlessWorkers = Number(parallel?.headless_workers ?? swarm?.headless_overflow_worker_count ?? Math.max(0, targetActive - visiblePanes));
|
|
20
|
+
const blockers = [
|
|
21
|
+
...(!parallel ? ['parallel_runtime_proof_missing'] : []),
|
|
22
|
+
...(!scheduler ? ['agent_scheduler_state_missing'] : []),
|
|
23
|
+
...(parallel?.passed === false ? parallel.blockers || ['parallel_runtime_proof_failed'] : []),
|
|
24
|
+
...(telemetryAgeMs > 3000 ? ['zellij_telemetry_stale'] : [])
|
|
25
|
+
].map(String);
|
|
26
|
+
const summary = {
|
|
27
|
+
schema: RUNTIME_PROOF_SUMMARY_SCHEMA,
|
|
28
|
+
ok: blockers.length === 0,
|
|
29
|
+
mission_id: missionId,
|
|
30
|
+
generated_at: new Date().toISOString(),
|
|
31
|
+
parallel: {
|
|
32
|
+
max_active_workers: Number(parallel?.max_observed_active_workers || scheduler?.max_observed_active_slots || 0),
|
|
33
|
+
unique_worker_pids: Number(parallel?.unique_worker_pids || uniqueNumbers(swarm?.process_ids).length || 0),
|
|
34
|
+
speedup_ratio: Number(parallel?.speedup_ratio || 0),
|
|
35
|
+
proof_passed: parallel?.passed === true
|
|
36
|
+
},
|
|
37
|
+
ui: {
|
|
38
|
+
visible_panes: visiblePanes,
|
|
39
|
+
headless_workers: headlessWorkers,
|
|
40
|
+
telemetry_age_ms: telemetryAgeMs,
|
|
41
|
+
stale: telemetryAgeMs > 3000
|
|
42
|
+
},
|
|
43
|
+
model_calls: {
|
|
44
|
+
max_observed: Number(parallel?.max_observed_model_calls || 0),
|
|
45
|
+
unique_model_call_ids: Number(parallel?.unique_model_call_ids || 0)
|
|
46
|
+
},
|
|
47
|
+
scheduler: {
|
|
48
|
+
largest_batch_size: Number(scheduler?.largest_batch_size || 0),
|
|
49
|
+
utilization: Number(scheduler?.scheduler_utilization || 0)
|
|
50
|
+
},
|
|
51
|
+
blockers
|
|
52
|
+
};
|
|
53
|
+
await writeJsonAtomic(path.join(agentsDir, 'runtime-proof-summary.json'), summary);
|
|
54
|
+
return summary;
|
|
55
|
+
}
|
|
56
|
+
export function renderRuntimeProofSummary(summary) {
|
|
57
|
+
return [
|
|
58
|
+
`Parallel proof: ${summary.parallel.proof_passed ? 'passed' : 'blocked'}`,
|
|
59
|
+
`Active workers: ${summary.parallel.max_active_workers}`,
|
|
60
|
+
`Unique PIDs: ${summary.parallel.unique_worker_pids}`,
|
|
61
|
+
`Speedup: ${summary.parallel.speedup_ratio}x`,
|
|
62
|
+
`Visible/headless: ${summary.ui.visible_panes} / ${summary.ui.headless_workers}`,
|
|
63
|
+
`Telemetry: ${summary.ui.stale ? `stale ${(summary.ui.telemetry_age_ms / 1000).toFixed(1)}s` : `fresh ${(summary.ui.telemetry_age_ms / 1000).toFixed(1)}s`}`,
|
|
64
|
+
`Model calls max: ${summary.model_calls.max_observed}`,
|
|
65
|
+
...(summary.blockers.length ? [`Blockers: ${summary.blockers.join(', ')}`] : [])
|
|
66
|
+
].join('\n');
|
|
67
|
+
}
|
|
68
|
+
function telemetryVisiblePaneCount(snapshot) {
|
|
69
|
+
const slots = snapshot?.slots && typeof snapshot.slots === 'object' ? Object.values(snapshot.slots) : [];
|
|
70
|
+
return slots.filter((row) => row?.status && row.status !== 'headless').length;
|
|
71
|
+
}
|
|
72
|
+
function uniqueNumbers(values) {
|
|
73
|
+
return [...new Set((Array.isArray(values) ? values : []).map((value) => Number(value)).filter((value) => Number.isFinite(value)))];
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=runtime-proof-summary.js.map
|