sneakoscope 4.0.10 → 4.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -1
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/bin/sks.js +1 -1
- package/dist/core/fsx.js +1 -1
- package/dist/core/providers/glm/naruto/glm-naruto-apply-transaction.js +136 -0
- package/dist/core/providers/glm/naruto/glm-naruto-bench.js +97 -14
- package/dist/core/providers/glm/naruto/glm-naruto-command.js +15 -22
- package/dist/core/providers/glm/naruto/glm-naruto-concurrency-governor.js +10 -9
- package/dist/core/providers/glm/naruto/glm-naruto-decomposer.js +54 -4
- package/dist/core/providers/glm/naruto/glm-naruto-final-seal.js +75 -0
- package/dist/core/providers/glm/naruto/glm-naruto-finalizer.js +1 -0
- package/dist/core/providers/glm/naruto/glm-naruto-isolation-policy.js +38 -0
- package/dist/core/providers/glm/naruto/glm-naruto-merge-planner.js +9 -4
- package/dist/core/providers/glm/naruto/glm-naruto-metrics.js +34 -0
- package/dist/core/providers/glm/naruto/glm-naruto-orchestrator.js +136 -23
- package/dist/core/providers/glm/naruto/glm-naruto-scoreboard.js +75 -0
- package/dist/core/providers/glm/naruto/glm-naruto-secret-audit.js +54 -2
- package/dist/core/providers/glm/naruto/glm-naruto-session-id.js +10 -0
- package/dist/core/providers/glm/naruto/glm-naruto-targeted-checks.js +76 -0
- package/dist/core/providers/glm/naruto/glm-naruto-terminal.js +44 -0
- package/dist/core/providers/glm/naruto/glm-naruto-trace.js +97 -18
- package/dist/core/providers/glm/naruto/glm-naruto-usage-extractor.js +31 -0
- package/dist/core/providers/glm/naruto/glm-naruto-worker-artifacts.js +14 -0
- package/dist/core/providers/glm/naruto/glm-naruto-worker-pool.js +113 -21
- package/dist/core/providers/glm/naruto/glm-naruto-worker-runtime.js +97 -29
- package/dist/core/providers/glm/naruto/glm-naruto-worker-scheduler.js +178 -0
- package/dist/core/providers/glm/naruto/glm-naruto-worktree-cleanup.js +20 -0
- package/dist/core/providers/glm/naruto/glm-naruto-worktree-manager.js +57 -0
- package/dist/core/providers/glm/naruto/glm-naruto-worktree-worker.js +74 -0
- package/dist/core/providers/openrouter/openrouter-client.js +1 -1
- package/dist/core/providers/openrouter/openrouter-provider-health.js +3 -2
- package/dist/core/providers/openrouter/openrouter-stream.js +5 -3
- package/dist/core/version.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -35,7 +35,15 @@ Set up this agent project with Sneakoscope Codex. Use [[mandarange/Sneakoscope-C
|
|
|
35
35
|
|
|
36
36
|
## 🚀 Current Release
|
|
37
37
|
|
|
38
|
-
SKS **4.0.
|
|
38
|
+
SKS **4.0.12** seals GLM Naruto's production runtime path: worktree workers apply extracted unified diffs only, patch workers launch through a bounded adaptive scheduler, live bench compares true direct GLM against Naruto worker counts, final apply runs dirty-tree and targeted-check guards, and stop-gates reference a final seal artifact.
|
|
39
|
+
|
|
40
|
+
What changed in 4.0.12:
|
|
41
|
+
|
|
42
|
+
- **Extracted worktree patches.** `--worktree` parses `<sks_patch_candidate>` and records candidate/extracted patch hashes before any worker worktree apply.
|
|
43
|
+
- **Adaptive scheduler.** Patch workers use a finite launch queue with provider-health backpressure and retry-once handling for retryable 429/5xx/idle-timeout failures.
|
|
44
|
+
- **True direct-vs-Naruto bench.** `--bench --live --no-apply` compares direct GLM, Naruto 1, 4, 8, and 12 worker cases without fake zero metrics.
|
|
45
|
+
- **Transaction guards.** Final apply blocks dirty touched paths unless `--allow-dirty-apply` is explicit, runs targeted checks, and rolls back on validation failure by default.
|
|
46
|
+
- **Seal artifacts.** GLM Naruto writes `final-seal.json`, stop-gate final-seal evidence, `merge-rationale.md`, and `bench-report.md` for auditability.
|
|
39
47
|
|
|
40
48
|
What changed in 4.0.8:
|
|
41
49
|
|
|
@@ -4,7 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom};
|
|
|
4
4
|
fn main() {
|
|
5
5
|
let mut args = std::env::args().skip(1);
|
|
6
6
|
match args.next().as_deref() {
|
|
7
|
-
Some("--version") => println!("sks-rs 4.0.
|
|
7
|
+
Some("--version") => println!("sks-rs 4.0.12"),
|
|
8
8
|
Some("compact-info") => {
|
|
9
9
|
let mut input = String::new();
|
|
10
10
|
let _ = io::stdin().read_to_string(&mut input);
|
package/dist/bin/sks.js
CHANGED
package/dist/core/fsx.js
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
import { fileURLToPath } from 'node:url';
|
|
8
|
-
export const PACKAGE_VERSION = '4.0.
|
|
8
|
+
export const PACKAGE_VERSION = '4.0.12';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
export function nowIso() {
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { sha256, writeJsonAtomic, writeTextAtomic } from '../../../fsx.js';
|
|
4
|
+
import { parseUnifiedDiffPatch } from '../glm-patch-parser.js';
|
|
5
|
+
import { combineGlmNarutoPatches } from './glm-naruto-combined-patch.js';
|
|
6
|
+
import { runGlmNarutoTargetedChecks } from './glm-naruto-targeted-checks.js';
|
|
7
|
+
export async function runGlmNarutoApplyTransaction(input) {
|
|
8
|
+
const preStatus = await gitText(input.cwd, ['status', '--short']);
|
|
9
|
+
const preDiff = await gitText(input.cwd, ['diff', '--binary']);
|
|
10
|
+
const patch = combineGlmNarutoPatches(input.envelopes, input.selectedPatchIds);
|
|
11
|
+
const parsed = parseUnifiedDiffPatch(patch);
|
|
12
|
+
const dirtyTouchedPaths = await dirtyPaths(input.cwd, parsed.touchedPaths);
|
|
13
|
+
const patchPath = path.join(input.artifactDir, 'selected-combined.patch');
|
|
14
|
+
await writeTextAtomic(patchPath, patch);
|
|
15
|
+
const blockers = [];
|
|
16
|
+
let applyCheckPassed = false;
|
|
17
|
+
let applyPassed = false;
|
|
18
|
+
let targetedChecksPassed = null;
|
|
19
|
+
let rollbackAttempted = false;
|
|
20
|
+
let rollbackPassed = null;
|
|
21
|
+
let finalStatus = 'blocked';
|
|
22
|
+
if (!patch.trim()) {
|
|
23
|
+
blockers.push('combined_patch_empty');
|
|
24
|
+
}
|
|
25
|
+
else if (dirtyTouchedPaths.length > 0 && !input.allowDirtyApply) {
|
|
26
|
+
blockers.push(`dirty_touched_paths_before_apply:${dirtyTouchedPaths.join(',')}`);
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
const checked = await gitApply(input.cwd, patch, ['apply', '--check', '--whitespace=nowarn', '-']);
|
|
30
|
+
applyCheckPassed = checked.code === 0;
|
|
31
|
+
if (!applyCheckPassed)
|
|
32
|
+
blockers.push(checked.stderr || checked.stdout || 'git_apply_check_failed');
|
|
33
|
+
if (applyCheckPassed) {
|
|
34
|
+
const applied = await gitApply(input.cwd, patch, ['apply', '--whitespace=nowarn', '-']);
|
|
35
|
+
applyPassed = applied.code === 0;
|
|
36
|
+
if (applyPassed) {
|
|
37
|
+
const targeted = await runGlmNarutoTargetedChecks({
|
|
38
|
+
cwd: input.cwd,
|
|
39
|
+
touchedPaths: parsed.touchedPaths,
|
|
40
|
+
artifactDir: input.artifactDir,
|
|
41
|
+
...(input.strictChecks !== undefined ? { strictChecks: input.strictChecks } : {})
|
|
42
|
+
});
|
|
43
|
+
targetedChecksPassed = targeted.ok;
|
|
44
|
+
if (targeted.ok) {
|
|
45
|
+
finalStatus = 'applied';
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
blockers.push(...targeted.blockers);
|
|
49
|
+
if (!input.noRollback) {
|
|
50
|
+
rollbackAttempted = true;
|
|
51
|
+
const rollback = await gitApply(input.cwd, patch, ['apply', '-R', '--whitespace=nowarn', '-']);
|
|
52
|
+
rollbackPassed = rollback.code === 0;
|
|
53
|
+
finalStatus = rollbackPassed ? 'rolled_back' : 'blocked';
|
|
54
|
+
if (!rollbackPassed)
|
|
55
|
+
blockers.push(rollback.stderr || rollback.stdout || 'rollback_reverse_patch_failed');
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
finalStatus = 'blocked';
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
blockers.push(applied.stderr || applied.stdout || 'git_apply_failed');
|
|
64
|
+
if (!input.noRollback) {
|
|
65
|
+
rollbackAttempted = true;
|
|
66
|
+
const rollback = await gitApply(input.cwd, patch, ['apply', '-R', '--whitespace=nowarn', '-']);
|
|
67
|
+
rollbackPassed = rollback.code === 0;
|
|
68
|
+
finalStatus = rollbackPassed ? 'rolled_back' : 'blocked';
|
|
69
|
+
if (!rollbackPassed)
|
|
70
|
+
blockers.push(rollback.stderr || rollback.stdout || 'rollback_reverse_patch_failed');
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
const postDiff = await gitText(input.cwd, ['diff', '--binary']);
|
|
76
|
+
const transaction = {
|
|
77
|
+
schema: 'sks.glm-naruto-apply-transaction.v1',
|
|
78
|
+
mission_id: input.missionId,
|
|
79
|
+
selected_patch_ids: input.selectedPatchIds,
|
|
80
|
+
touched_paths: parsed.touchedPaths,
|
|
81
|
+
dirty_touched_paths_before_apply: dirtyTouchedPaths,
|
|
82
|
+
dirty_policy: input.allowDirtyApply ? 'allow' : 'block',
|
|
83
|
+
pre_status: preStatus,
|
|
84
|
+
pre_diff_sha256: sha256(preDiff),
|
|
85
|
+
post_diff_sha256: sha256(postDiff),
|
|
86
|
+
combined_patch_sha256: sha256(patch),
|
|
87
|
+
apply_check_passed: applyCheckPassed,
|
|
88
|
+
apply_passed: applyPassed,
|
|
89
|
+
targeted_checks_passed: targetedChecksPassed,
|
|
90
|
+
rollback_attempted: rollbackAttempted,
|
|
91
|
+
rollback_passed: rollbackPassed,
|
|
92
|
+
final_status: finalStatus,
|
|
93
|
+
blockers
|
|
94
|
+
};
|
|
95
|
+
await writeJsonAtomic(path.join(input.artifactDir, 'apply-transaction.json'), transaction);
|
|
96
|
+
await writeJsonAtomic(path.join(input.artifactDir, 'apply-transaction-diff-hashes.json'), {
|
|
97
|
+
schema: 'sks.glm-naruto-apply-transaction-diff-hashes.v1',
|
|
98
|
+
mission_id: input.missionId,
|
|
99
|
+
pre_diff_sha256: sha256(preDiff),
|
|
100
|
+
post_diff_sha256: sha256(postDiff),
|
|
101
|
+
combined_patch_sha256: sha256(patch)
|
|
102
|
+
});
|
|
103
|
+
return { ok: finalStatus === 'applied', applied: finalStatus === 'applied' ? input.selectedPatchIds : [], patch, transaction };
|
|
104
|
+
}
|
|
105
|
+
function gitText(cwd, args) {
|
|
106
|
+
return new Promise((resolve) => {
|
|
107
|
+
const child = spawn('git', [...args], { cwd, stdio: ['ignore', 'pipe', 'ignore'] });
|
|
108
|
+
let stdout = '';
|
|
109
|
+
child.stdout.on('data', (chunk) => { stdout += String(chunk); });
|
|
110
|
+
child.on('close', () => resolve(stdout));
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
async function dirtyPaths(cwd, paths) {
|
|
114
|
+
if (paths.length === 0)
|
|
115
|
+
return [];
|
|
116
|
+
const status = await gitText(cwd, ['status', '--short', '--', ...paths]);
|
|
117
|
+
return status.split(/\r?\n/)
|
|
118
|
+
.map((line) => line.trimEnd())
|
|
119
|
+
.filter(Boolean)
|
|
120
|
+
.map((line) => {
|
|
121
|
+
const raw = line.slice(3).trim();
|
|
122
|
+
return raw.includes(' -> ') ? raw.split(' -> ').pop().trim() : raw;
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
function gitApply(cwd, patch, args) {
|
|
126
|
+
return new Promise((resolve) => {
|
|
127
|
+
const child = spawn('git', [...args], { cwd, stdio: ['pipe', 'pipe', 'pipe'] });
|
|
128
|
+
let stdout = '';
|
|
129
|
+
let stderr = '';
|
|
130
|
+
child.stdout.on('data', (chunk) => { stdout += String(chunk); });
|
|
131
|
+
child.stderr.on('data', (chunk) => { stderr += String(chunk); });
|
|
132
|
+
child.on('close', (code) => resolve({ code, stdout, stderr }));
|
|
133
|
+
child.stdin.end(patch);
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
//# sourceMappingURL=glm-naruto-apply-transaction.js.map
|
|
@@ -1,21 +1,25 @@
|
|
|
1
|
-
import { nowIso } from '../../../fsx.js';
|
|
1
|
+
import { nowIso, writeTextAtomic } from '../../../fsx.js';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import fsp from 'node:fs/promises';
|
|
4
4
|
import os from 'node:os';
|
|
5
5
|
import { GLM_52_OPENROUTER_MODEL } from '../glm-52-settings.js';
|
|
6
|
+
import { runGlmDirectSpeedRun } from '../glm-direct-run.js';
|
|
6
7
|
import { resolveOpenRouterApiKey } from '../../openrouter/openrouter-secret-store.js';
|
|
7
8
|
import { runGlmNarutoMission } from './glm-naruto-orchestrator.js';
|
|
8
|
-
|
|
9
|
+
import { summarizeGlmNarutoWorkerMetrics } from './glm-naruto-metrics.js';
|
|
10
|
+
export async function runGlmNarutoBench(root, args = [], deps = {}) {
|
|
9
11
|
const live = args.includes('--live');
|
|
10
12
|
const execute = args.includes('--execute');
|
|
11
13
|
const started = Date.now();
|
|
14
|
+
const runDirect = deps.runDirect ?? runGlmDirectSpeedRun;
|
|
15
|
+
const runNaruto = deps.runNaruto ?? runGlmNarutoMission;
|
|
12
16
|
if (execute && !live) {
|
|
13
17
|
return blocked(root, ['execute_requires_live_flag']);
|
|
14
18
|
}
|
|
15
19
|
if (!live) {
|
|
16
20
|
return {
|
|
17
21
|
schema: 'sks.glm-naruto-bench.v1',
|
|
18
|
-
version: '4.0.
|
|
22
|
+
version: '4.0.12',
|
|
19
23
|
generated_at: nowIso(),
|
|
20
24
|
status: 'dry_run',
|
|
21
25
|
model: GLM_52_OPENROUTER_MODEL,
|
|
@@ -38,9 +42,17 @@ export async function runGlmNarutoBench(root, args = []) {
|
|
|
38
42
|
await fsp.mkdir(path.join(fixture, 'src'), { recursive: true });
|
|
39
43
|
await fsp.writeFile(path.join(fixture, 'src', 'bench-target.ts'), 'export const value = 1;\n', 'utf8');
|
|
40
44
|
const cases = [];
|
|
45
|
+
const directStarted = Date.now();
|
|
46
|
+
const direct = await runDirect({
|
|
47
|
+
cwd: fixture,
|
|
48
|
+
task: 'Change src/bench-target.ts so value is 2. Return the smallest patch only.',
|
|
49
|
+
args: ['--bench', '--live', '--dry-run'],
|
|
50
|
+
dryRun: true
|
|
51
|
+
});
|
|
52
|
+
cases.push(directBenchCase(direct, Date.now() - directStarted));
|
|
41
53
|
for (const workers of [1, 4, 8, 12]) {
|
|
42
54
|
const caseStarted = Date.now();
|
|
43
|
-
const result = await
|
|
55
|
+
const result = await runNaruto({
|
|
44
56
|
cwd: fixture,
|
|
45
57
|
task: 'Change src/bench-target.ts so value is 2. Return the smallest patch only.',
|
|
46
58
|
args: ['--bench', '--live', '--no-apply'],
|
|
@@ -48,23 +60,33 @@ export async function runGlmNarutoBench(root, args = []) {
|
|
|
48
60
|
maxWorkers: workers,
|
|
49
61
|
noApply: true
|
|
50
62
|
});
|
|
63
|
+
const traces = await readWorkerTraces(result.artifact_dir);
|
|
64
|
+
const metrics = summarizeGlmNarutoWorkerMetrics(traces);
|
|
51
65
|
cases.push({
|
|
52
|
-
name: workers === 1 ? '
|
|
66
|
+
name: `GLM Naruto ${workers} worker${workers === 1 ? '' : 's'}`,
|
|
67
|
+
kind: 'glm-naruto',
|
|
53
68
|
workers,
|
|
54
69
|
wall_clock_ms: Date.now() - caseStarted,
|
|
55
|
-
p50_ttft_ms:
|
|
56
|
-
p90_ttft_ms:
|
|
70
|
+
p50_ttft_ms: metrics.p50_ttft_ms,
|
|
71
|
+
p90_ttft_ms: metrics.p90_ttft_ms,
|
|
72
|
+
p50_total_ms: metrics.p50_total_ms,
|
|
73
|
+
p90_total_ms: metrics.p90_total_ms,
|
|
57
74
|
candidate_count: result.patch_candidates,
|
|
58
|
-
gate_pass_rate: result.patch_candidates ? result.gate_passed_candidates / result.patch_candidates :
|
|
59
|
-
verifier_pass_rate:
|
|
75
|
+
gate_pass_rate: result.patch_candidates ? result.gate_passed_candidates / result.patch_candidates : null,
|
|
76
|
+
verifier_pass_rate: metrics.verifier_pass_rate,
|
|
60
77
|
merge_success: result.mergeable_candidates > 0,
|
|
61
|
-
|
|
62
|
-
|
|
78
|
+
cached_tokens_sum: metrics.cached_tokens_sum,
|
|
79
|
+
cache_write_tokens_sum: metrics.cache_write_tokens_sum,
|
|
80
|
+
reasoning_tokens_sum: metrics.reasoning_tokens_sum,
|
|
81
|
+
metric_status: metrics.p50_total_ms === null && metrics.p50_ttft_ms === null ? 'unavailable' : 'measured',
|
|
82
|
+
workers_completed: metrics.workers_completed,
|
|
83
|
+
workers_failed: metrics.workers_failed
|
|
63
84
|
});
|
|
64
85
|
}
|
|
86
|
+
await writeBenchReport(root, cases).catch(() => undefined);
|
|
65
87
|
return {
|
|
66
88
|
schema: 'sks.glm-naruto-bench.v1',
|
|
67
|
-
version: '4.0.
|
|
89
|
+
version: '4.0.12',
|
|
68
90
|
generated_at: nowIso(),
|
|
69
91
|
status: 'live',
|
|
70
92
|
model: GLM_52_OPENROUTER_MODEL,
|
|
@@ -74,17 +96,48 @@ export async function runGlmNarutoBench(root, args = []) {
|
|
|
74
96
|
simulated_workers: Math.max(...cases.map((row) => row.workers)),
|
|
75
97
|
simulated_waves: cases.length,
|
|
76
98
|
simulated_patch_candidates: cases.reduce((sum, row) => sum + row.candidate_count, 0),
|
|
77
|
-
simulated_gate_passed: cases.reduce((sum, row) => sum + Math.round(row.candidate_count * row.gate_pass_rate), 0),
|
|
99
|
+
simulated_gate_passed: cases.reduce((sum, row) => sum + Math.round(row.candidate_count * (row.gate_pass_rate ?? 0)), 0),
|
|
78
100
|
simulated_mergeable: cases.filter((row) => row.merge_success).length,
|
|
79
101
|
wall_clock_ms: Date.now() - started
|
|
80
102
|
},
|
|
81
103
|
warnings: ['live_bench_no_apply_temp_repo']
|
|
82
104
|
};
|
|
83
105
|
}
|
|
106
|
+
async function writeBenchReport(root, cases) {
|
|
107
|
+
const rows = cases.map((row) => [
|
|
108
|
+
row.name,
|
|
109
|
+
row.kind,
|
|
110
|
+
String(row.workers),
|
|
111
|
+
String(row.wall_clock_ms),
|
|
112
|
+
String(row.p50_ttft_ms ?? 'unavailable'),
|
|
113
|
+
String(row.p90_ttft_ms ?? 'unavailable'),
|
|
114
|
+
String(row.p50_total_ms ?? 'unavailable'),
|
|
115
|
+
String(row.p90_total_ms ?? 'unavailable'),
|
|
116
|
+
String(row.gate_pass_rate ?? 'n/a'),
|
|
117
|
+
String(row.verifier_pass_rate ?? 'n/a'),
|
|
118
|
+
String(row.metric_status)
|
|
119
|
+
]);
|
|
120
|
+
const fastest = [...cases].sort((a, b) => a.wall_clock_ms - b.wall_clock_ms)[0] ?? null;
|
|
121
|
+
const md = [
|
|
122
|
+
'# GLM Naruto Bench Report',
|
|
123
|
+
'',
|
|
124
|
+
`Generated: ${nowIso()}`,
|
|
125
|
+
`Model: ${GLM_52_OPENROUTER_MODEL}`,
|
|
126
|
+
'',
|
|
127
|
+
'| Case | Kind | Workers | Wall ms | TTFT p50 | TTFT p90 | Total p50 | Total p90 | Gate pass | Verifier pass | Metric status |',
|
|
128
|
+
'| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- |',
|
|
129
|
+
...rows.map((row) => `| ${row.join(' | ')} |`),
|
|
130
|
+
'',
|
|
131
|
+
`Fastest wall-clock case: ${fastest ? fastest.name : 'unavailable'}`,
|
|
132
|
+
'Missing usage metrics are reported as `unavailable` or `n/a`, never as fake zero.',
|
|
133
|
+
''
|
|
134
|
+
].join('\n');
|
|
135
|
+
await writeTextAtomic(path.join(root, '.sneakoscope', 'glm-naruto', 'bench-report.md'), md);
|
|
136
|
+
}
|
|
84
137
|
function blocked(root, warnings) {
|
|
85
138
|
return {
|
|
86
139
|
schema: 'sks.glm-naruto-bench.v1',
|
|
87
|
-
version: '4.0.
|
|
140
|
+
version: '4.0.12',
|
|
88
141
|
generated_at: nowIso(),
|
|
89
142
|
status: 'blocked',
|
|
90
143
|
model: GLM_52_OPENROUTER_MODEL,
|
|
@@ -100,4 +153,34 @@ function blocked(root, warnings) {
|
|
|
100
153
|
warnings
|
|
101
154
|
};
|
|
102
155
|
}
|
|
156
|
+
function directBenchCase(result, wallClockMs) {
|
|
157
|
+
return {
|
|
158
|
+
name: 'direct GLM speed path',
|
|
159
|
+
kind: 'direct-glm',
|
|
160
|
+
workers: 1,
|
|
161
|
+
wall_clock_ms: wallClockMs,
|
|
162
|
+
p50_ttft_ms: null,
|
|
163
|
+
p90_ttft_ms: null,
|
|
164
|
+
p50_total_ms: null,
|
|
165
|
+
p90_total_ms: null,
|
|
166
|
+
candidate_count: result.ok ? 1 : 0,
|
|
167
|
+
gate_pass_rate: result.ok ? 1 : null,
|
|
168
|
+
verifier_pass_rate: null,
|
|
169
|
+
merge_success: result.ok,
|
|
170
|
+
cached_tokens_sum: null,
|
|
171
|
+
cache_write_tokens_sum: null,
|
|
172
|
+
reasoning_tokens_sum: null,
|
|
173
|
+
metric_status: 'unavailable'
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
async function readWorkerTraces(artifactDir) {
|
|
177
|
+
if (!artifactDir)
|
|
178
|
+
return [];
|
|
179
|
+
try {
|
|
180
|
+
return JSON.parse(await fsp.readFile(path.join(artifactDir, 'worker-traces.json'), 'utf8'));
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
return [];
|
|
184
|
+
}
|
|
185
|
+
}
|
|
103
186
|
//# sourceMappingURL=glm-naruto-bench.js.map
|
|
@@ -2,7 +2,6 @@ import { flag, readOption, positionalArgs } from '../../../../cli/args.js';
|
|
|
2
2
|
import { printJson } from '../../../../cli/output.js';
|
|
3
3
|
import { runGlmNarutoMission } from './glm-naruto-orchestrator.js';
|
|
4
4
|
import { runGlmNarutoBench } from './glm-naruto-bench.js';
|
|
5
|
-
import { resolveOpenRouterApiKey } from '../../openrouter/openrouter-secret-store.js';
|
|
6
5
|
export async function glmNarutoCommand(args = []) {
|
|
7
6
|
if (flag(args, '--bench')) {
|
|
8
7
|
const result = await runGlmNarutoBench(process.cwd(), args);
|
|
@@ -36,34 +35,20 @@ export async function glmNarutoCommand(args = []) {
|
|
|
36
35
|
process.exitCode = 1;
|
|
37
36
|
return result;
|
|
38
37
|
}
|
|
39
|
-
const key = await resolveOpenRouterApiKey({ env: process.env });
|
|
40
|
-
if (!key.key) {
|
|
41
|
-
const result = {
|
|
42
|
-
schema: 'sks.glm-naruto-result.v1',
|
|
43
|
-
ok: false,
|
|
44
|
-
status: 'blocked',
|
|
45
|
-
mission_id: 'none',
|
|
46
|
-
task,
|
|
47
|
-
model: 'z-ai/glm-5.2',
|
|
48
|
-
gpt_fallback_allowed: false,
|
|
49
|
-
termination_reason: 'glm_missing_openrouter_key',
|
|
50
|
-
blockers: ['glm_missing_openrouter_key'],
|
|
51
|
-
warnings: ['set_OPENROUTER_API_KEY_or_run_sks_--mad_--glm_--repair']
|
|
52
|
-
};
|
|
53
|
-
if (flag(args, '--json'))
|
|
54
|
-
printJson(result);
|
|
55
|
-
else
|
|
56
|
-
console.error('GLM Naruto blocked: missing OpenRouter API key. Run: sks --mad --glm --repair');
|
|
57
|
-
process.exitCode = 1;
|
|
58
|
-
return result;
|
|
59
|
-
}
|
|
60
38
|
const maxWorkers = parseInt(readOption(args, '--clones', readOption(args, '--workers', '12')), 10) || 12;
|
|
61
39
|
const deep = flag(args, '--deep');
|
|
62
40
|
const useJudge = flag(args, '--judge');
|
|
63
41
|
const xhighFinalizer = flag(args, '--xhigh-finalizer');
|
|
64
42
|
const useWorktree = flag(args, '--worktree');
|
|
65
43
|
const patchEnvelopeOnly = flag(args, '--patch-envelope-only');
|
|
44
|
+
const keepWorktrees = flag(args, '--keep-worktrees');
|
|
45
|
+
const cleanupWorktrees = flag(args, '--cleanup-worktrees') || !keepWorktrees;
|
|
46
|
+
const allowPatchEnvelopeFallback = flag(args, '--allow-patch-envelope-fallback');
|
|
66
47
|
const noApply = flag(args, '--no-apply');
|
|
48
|
+
const skipVerifier = flag(args, '--skip-verifier');
|
|
49
|
+
const allowDirtyApply = flag(args, '--allow-dirty-apply');
|
|
50
|
+
const noRollback = flag(args, '--no-rollback');
|
|
51
|
+
const strictChecks = flag(args, '--strict-checks');
|
|
67
52
|
const mergeStrategy = readOption(args, '--merge-strategy', 'deterministic');
|
|
68
53
|
const result = await runGlmNarutoMission({
|
|
69
54
|
cwd: process.cwd(),
|
|
@@ -74,7 +59,15 @@ export async function glmNarutoCommand(args = []) {
|
|
|
74
59
|
useJudge,
|
|
75
60
|
xhighFinalizer,
|
|
76
61
|
useWorktree: useWorktree && !patchEnvelopeOnly,
|
|
62
|
+
patchEnvelopeOnly,
|
|
63
|
+
allowPatchEnvelopeFallback,
|
|
64
|
+
keepWorktrees,
|
|
65
|
+
cleanupWorktrees,
|
|
77
66
|
noApply: noApply || flag(args, '--dry-run'),
|
|
67
|
+
skipVerifier,
|
|
68
|
+
allowDirtyApply,
|
|
69
|
+
noRollback,
|
|
70
|
+
strictChecks,
|
|
78
71
|
mergeStrategy
|
|
79
72
|
});
|
|
80
73
|
if (flag(args, '--json')) {
|
|
@@ -2,24 +2,25 @@ import { GLM_NARUTO_DEFAULTS } from './glm-naruto-types.js';
|
|
|
2
2
|
export function decideConcurrency(input) {
|
|
3
3
|
const maxClones = Math.min(input.operatorMax || GLM_NARUTO_DEFAULTS.max_clones, GLM_NARUTO_DEFAULTS.max_clones);
|
|
4
4
|
const requested = Math.min(input.requestedClones || GLM_NARUTO_DEFAULTS.default_clones, maxClones);
|
|
5
|
-
|
|
5
|
+
const active = input.activeWorkers > 0 ? input.activeWorkers : Math.min(requested, GLM_NARUTO_DEFAULTS.safe_active_start);
|
|
6
|
+
if (input.failureRate > 0.10) {
|
|
6
7
|
return {
|
|
7
|
-
target_active_workers:
|
|
8
|
+
target_active_workers: 0,
|
|
8
9
|
burst_workers: 0,
|
|
9
10
|
backpressure: true,
|
|
10
|
-
reason: '
|
|
11
|
+
reason: 'pause_high_5xx_or_failure_rate'
|
|
11
12
|
};
|
|
12
13
|
}
|
|
13
|
-
if (input.
|
|
14
|
+
if (input.rateLimited429 > 0 || input.ttftP90Ms > 15_000) {
|
|
14
15
|
return {
|
|
15
|
-
target_active_workers: Math.max(1, Math.floor(
|
|
16
|
+
target_active_workers: Math.max(1, Math.floor(active * 0.5)),
|
|
16
17
|
burst_workers: 0,
|
|
17
18
|
backpressure: true,
|
|
18
|
-
reason: '
|
|
19
|
+
reason: 'scale_down_high_latency_or_rate_limit'
|
|
19
20
|
};
|
|
20
21
|
}
|
|
21
|
-
if (input.ttftP90Ms < 5_000 && input.rateLimited429 === 0 && input.
|
|
22
|
-
const target = Math.min(requested,
|
|
22
|
+
if (input.ttftP90Ms < 5_000 && input.rateLimited429 === 0 && input.failureRate < 0.05 && active < requested) {
|
|
23
|
+
const target = Math.min(requested, active + 2);
|
|
23
24
|
return {
|
|
24
25
|
target_active_workers: target,
|
|
25
26
|
burst_workers: Math.min(2, requested - target),
|
|
@@ -28,7 +29,7 @@ export function decideConcurrency(input) {
|
|
|
28
29
|
};
|
|
29
30
|
}
|
|
30
31
|
return {
|
|
31
|
-
target_active_workers: Math.min(
|
|
32
|
+
target_active_workers: Math.min(active, requested),
|
|
32
33
|
burst_workers: 0,
|
|
33
34
|
backpressure: false,
|
|
34
35
|
reason: 'steady_state'
|
|
@@ -5,9 +5,32 @@ export function decomposeTask(input) {
|
|
|
5
5
|
const dependencies = [];
|
|
6
6
|
const mutableShardIds = [];
|
|
7
7
|
const verificationShardIds = [];
|
|
8
|
-
const paths = input
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
const paths = discoverTargetPaths(input);
|
|
9
|
+
if (paths.length === 0) {
|
|
10
|
+
const scoutShard = {
|
|
11
|
+
id: 'shard-scout-paths',
|
|
12
|
+
kind: 'verification',
|
|
13
|
+
task: `Discover target paths before mutation for: ${input.task}`,
|
|
14
|
+
target_paths: [],
|
|
15
|
+
forbidden_paths: ['.github/', 'dist/', 'node_modules/'],
|
|
16
|
+
base_digest: digestBase(input),
|
|
17
|
+
strategy: 'minimal_patch',
|
|
18
|
+
patches_per_shard: 0,
|
|
19
|
+
max_tokens: 2048,
|
|
20
|
+
reasoning: 'low',
|
|
21
|
+
mutable: false
|
|
22
|
+
};
|
|
23
|
+
return {
|
|
24
|
+
schema: 'sks.glm-naruto-work-graph.v1',
|
|
25
|
+
mission_id: input.missionId,
|
|
26
|
+
task: input.task,
|
|
27
|
+
shards: [scoutShard],
|
|
28
|
+
dependencies,
|
|
29
|
+
parallel_groups: [],
|
|
30
|
+
mutable_shards: [],
|
|
31
|
+
verification_shards: [scoutShard.id]
|
|
32
|
+
};
|
|
33
|
+
}
|
|
11
34
|
let shardIndex = 0;
|
|
12
35
|
for (const targetPath of paths) {
|
|
13
36
|
const shardId = `shard-${shardIndex}`;
|
|
@@ -64,6 +87,33 @@ export function decomposeTask(input) {
|
|
|
64
87
|
verification_shards: verificationShardIds
|
|
65
88
|
};
|
|
66
89
|
}
|
|
90
|
+
function discoverTargetPaths(input) {
|
|
91
|
+
const candidates = new Set();
|
|
92
|
+
for (const mentioned of input.mentionedPaths)
|
|
93
|
+
candidates.add(mentioned);
|
|
94
|
+
for (const line of (input.gitStatus || '').split(/\r?\n/)) {
|
|
95
|
+
const match = line.match(/^\s*(?:[AMDRCU?!]{1,2})\s+(.+)$/);
|
|
96
|
+
if (!match)
|
|
97
|
+
continue;
|
|
98
|
+
const file = match[1].split(/\s+->\s+/).pop().trim();
|
|
99
|
+
if (isMutableCandidate(file))
|
|
100
|
+
candidates.add(file);
|
|
101
|
+
}
|
|
102
|
+
for (const source of [input.task, input.lastError || '']) {
|
|
103
|
+
for (const match of source.matchAll(/\b([A-Za-z0-9_.-]+\/[A-Za-z0-9_./-]+\.(?:ts|tsx|js|mjs|cjs|json|md|yml|yaml|toml))\b/g)) {
|
|
104
|
+
if (isMutableCandidate(match[1]))
|
|
105
|
+
candidates.add(match[1]);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return [...candidates];
|
|
109
|
+
}
|
|
110
|
+
function isMutableCandidate(file) {
|
|
111
|
+
return Boolean(file)
|
|
112
|
+
&& !file.startsWith('.github/')
|
|
113
|
+
&& !file.startsWith('dist/')
|
|
114
|
+
&& !file.startsWith('node_modules/')
|
|
115
|
+
&& !file.endsWith('/');
|
|
116
|
+
}
|
|
67
117
|
function classifyShardKind(path) {
|
|
68
118
|
if (path.includes('test') || path.includes('__tests__') || path.includes('.test.'))
|
|
69
119
|
return 'test_fix';
|
|
@@ -87,7 +137,7 @@ export function validateWorkGraph(graph, isVerifyOnly) {
|
|
|
87
137
|
return { ok: true };
|
|
88
138
|
const mutableCount = graph.mutable_shards.length;
|
|
89
139
|
if (mutableCount === 0)
|
|
90
|
-
return { ok: false, reason: '
|
|
140
|
+
return { ok: false, reason: 'glm_naruto_needs_target_path_context' };
|
|
91
141
|
// Check ratio of mutable shards to total shards (excluding verification shards from the denominator)
|
|
92
142
|
const totalWorkShards = graph.shards.filter(s => s.mutable || s.kind !== 'verification').length;
|
|
93
143
|
const ratio = totalWorkShards > 0 ? mutableCount / totalWorkShards : 0;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { writeJsonAtomic } from '../../../fsx.js';
|
|
3
|
+
import { GLM_52_OPENROUTER_MODEL } from '../glm-52-settings.js';
|
|
4
|
+
export async function writeGlmNarutoFinalSeal(input) {
|
|
5
|
+
const mismatches = [
|
|
6
|
+
...input.envelopes.filter((env) => env.model !== GLM_52_OPENROUTER_MODEL || env.gpt_fallback_allowed !== false).map((env) => `envelope:${env.worker_id}`),
|
|
7
|
+
...input.traces.filter((trace) => trace.model !== GLM_52_OPENROUTER_MODEL).map((trace) => `trace:${trace.worker_id}`)
|
|
8
|
+
];
|
|
9
|
+
const isolationViolations = input.isolationPolicy.selected === 'blocked' ? input.isolationPolicy.blockers : [];
|
|
10
|
+
const status = finalSealStatus({
|
|
11
|
+
result: input.result,
|
|
12
|
+
secretOk: input.secretAudit.ok,
|
|
13
|
+
mismatches,
|
|
14
|
+
isolationViolations,
|
|
15
|
+
queueDrained: input.scheduler.queue_drained
|
|
16
|
+
});
|
|
17
|
+
const seal = {
|
|
18
|
+
schema: 'sks.glm-naruto-final-seal.v1',
|
|
19
|
+
mission_id: input.missionId,
|
|
20
|
+
status,
|
|
21
|
+
model_lock: {
|
|
22
|
+
model: GLM_52_OPENROUTER_MODEL,
|
|
23
|
+
gpt_fallback_allowed: false,
|
|
24
|
+
requests_checked: input.envelopes.length + input.traces.length,
|
|
25
|
+
mismatches
|
|
26
|
+
},
|
|
27
|
+
isolation: {
|
|
28
|
+
selected: input.isolationPolicy.selected,
|
|
29
|
+
workers_write_main_workspace: false,
|
|
30
|
+
violations: isolationViolations
|
|
31
|
+
},
|
|
32
|
+
scheduler: {
|
|
33
|
+
bounded: true,
|
|
34
|
+
max_observed_active_workers: input.scheduler.max_observed_active_workers,
|
|
35
|
+
queue_drained: input.scheduler.queue_drained,
|
|
36
|
+
backpressure_events: input.scheduler.backpressure_events
|
|
37
|
+
},
|
|
38
|
+
candidates: {
|
|
39
|
+
total: input.envelopes.length,
|
|
40
|
+
gate_passed: input.envelopes.filter((env) => env.status === 'gate_passed').length,
|
|
41
|
+
verifier_passed: input.envelopes.filter((env) => env.verification_passed === true).length,
|
|
42
|
+
selected: input.selectedPatchIds
|
|
43
|
+
},
|
|
44
|
+
apply: {
|
|
45
|
+
attempted: input.applyTransaction !== null,
|
|
46
|
+
transaction_path: input.applyTransaction ? path.join(input.artifactDir, 'apply-transaction.json') : null,
|
|
47
|
+
final_status: input.applyTransaction?.final_status ?? null,
|
|
48
|
+
rollback_attempted: input.applyTransaction?.rollback_attempted ?? false,
|
|
49
|
+
rollback_passed: input.applyTransaction?.rollback_passed ?? null
|
|
50
|
+
},
|
|
51
|
+
secret_audit: {
|
|
52
|
+
ok: input.secretAudit.ok,
|
|
53
|
+
findings: input.secretAudit.findings ?? []
|
|
54
|
+
},
|
|
55
|
+
stop_gate: {
|
|
56
|
+
path: input.stopGatePath,
|
|
57
|
+
passed: input.stopGatePassed
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
const out = path.join(input.artifactDir, 'final-seal.json');
|
|
61
|
+
await writeJsonAtomic(out, seal);
|
|
62
|
+
return { seal, path: out, passed: seal.status === 'passed' };
|
|
63
|
+
}
|
|
64
|
+
function finalSealStatus(input) {
|
|
65
|
+
if (!input.secretOk || input.mismatches.length > 0 || input.isolationViolations.length > 0 || !input.queueDrained)
|
|
66
|
+
return 'blocked';
|
|
67
|
+
if (input.result.ok)
|
|
68
|
+
return 'passed';
|
|
69
|
+
if (input.result.status === 'partial_candidates')
|
|
70
|
+
return 'partial';
|
|
71
|
+
if (input.result.status === 'blocked')
|
|
72
|
+
return 'blocked';
|
|
73
|
+
return 'failed';
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=glm-naruto-final-seal.js.map
|
|
@@ -15,6 +15,7 @@ export function finalizeMergePlan(input) {
|
|
|
15
15
|
return planMerge({
|
|
16
16
|
missionId: input.missionId,
|
|
17
17
|
graph: conflictGraph,
|
|
18
|
+
...(input.scoreboard ? { scoreboard: input.scoreboard } : {}),
|
|
18
19
|
strategy,
|
|
19
20
|
...(input.judgeResult ? { judgeRanking: input.judgeResult.ranked_patch_ids } : {})
|
|
20
21
|
});
|