sneakoscope 4.0.8 → 4.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crates/sks-core/Cargo.lock +1 -1
- package/crates/sks-core/Cargo.toml +1 -1
- package/crates/sks-core/src/main.rs +1 -1
- package/dist/bin/sks.js +1 -1
- package/dist/cli/command-registry.js +1 -0
- package/dist/core/commands/naruto-command.js +25 -0
- package/dist/core/commands/stop-gate-command.js +63 -0
- package/dist/core/fsx.js +1 -1
- package/dist/core/pipeline-internals/runtime-gates.js +31 -4
- package/dist/core/providers/glm/glm-bench.js +4 -4
- package/dist/core/providers/glm/glm-direct-run.js +1 -1
- package/dist/core/providers/glm/glm-latency-trace.js +1 -1
- package/dist/core/providers/glm/glm-request-cache.js +9 -4
- package/dist/core/providers/glm/naruto/glm-naruto-bench.js +47 -12
- package/dist/core/providers/glm/naruto/glm-naruto-combined-patch.js +49 -0
- package/dist/core/providers/glm/naruto/glm-naruto-conflict-graph.js +7 -1
- package/dist/core/providers/glm/naruto/glm-naruto-hunk-conflict.js +16 -0
- package/dist/core/providers/glm/naruto/glm-naruto-hunk-parser.js +36 -0
- package/dist/core/providers/glm/naruto/glm-naruto-judge.js +1 -1
- package/dist/core/providers/glm/naruto/glm-naruto-orchestrator.js +77 -13
- package/dist/core/providers/glm/naruto/glm-naruto-patch-candidate-gate.js +42 -0
- package/dist/core/providers/glm/naruto/glm-naruto-patch-candidate-parser.js +62 -0
- package/dist/core/providers/glm/naruto/glm-naruto-secret-audit.js +39 -0
- package/dist/core/providers/glm/naruto/glm-naruto-trace.js +41 -1
- package/dist/core/providers/glm/naruto/glm-naruto-verifier-output.js +26 -0
- package/dist/core/providers/glm/naruto/glm-naruto-worker-artifacts.js +42 -0
- package/dist/core/providers/glm/naruto/glm-naruto-worker-pool.js +30 -11
- package/dist/core/providers/glm/naruto/glm-naruto-worker-runtime.js +170 -16
- package/dist/core/providers/openrouter/openrouter-stream.js +46 -6
- package/dist/core/stop-gate/stop-gate-check.js +220 -0
- package/dist/core/stop-gate/stop-gate-diagnostics.js +4 -0
- package/dist/core/stop-gate/stop-gate-resolver.js +122 -0
- package/dist/core/stop-gate/stop-gate-types.js +2 -0
- package/dist/core/stop-gate/stop-gate-writer.js +126 -0
- package/dist/core/version.js +1 -1
- package/package.json +1 -1
|
@@ -4,7 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom};
|
|
|
4
4
|
fn main() {
|
|
5
5
|
let mut args = std::env::args().skip(1);
|
|
6
6
|
match args.next().as_deref() {
|
|
7
|
-
Some("--version") => println!("sks-rs 4.0.
|
|
7
|
+
Some("--version") => println!("sks-rs 4.0.9"),
|
|
8
8
|
Some("compact-info") => {
|
|
9
9
|
let mut input = String::new();
|
|
10
10
|
let _ = io::stdin().read_to_string(&mut input);
|
package/dist/bin/sks.js
CHANGED
|
@@ -123,6 +123,7 @@ export const COMMANDS = {
|
|
|
123
123
|
agent: entry('beta', 'Run native multi-session agent missions', 'dist/core/commands/agent-command.js', argsCommand(() => import('../core/commands/agent-command.js'), 'agentCommand', 'dist/core/commands/agent-command.js')),
|
|
124
124
|
'with-local-llm': entry('beta', 'Enable or inspect local Ollama worker backend', 'dist/core/commands/local-model-command.js', argsCommand(() => import('../core/commands/local-model-command.js'), 'localModelCommand', 'dist/core/commands/local-model-command.js')),
|
|
125
125
|
naruto: entry('labs', 'Run $Naruto shadow-clone swarm (up to 100 parallel sessions)', 'dist/core/commands/naruto-command.js', argsCommand(() => import('../core/commands/naruto-command.js'), 'narutoCommand', 'dist/core/commands/naruto-command.js')),
|
|
126
|
+
'stop-gate': entry('beta', 'Check canonical stop-gate resolution for a route/mission', 'dist/core/commands/stop-gate-command.js', commandArgsCommand(() => import('../core/commands/stop-gate-command.js'), 'stopGateCommand', 'dist/core/commands/stop-gate-command.js')),
|
|
126
127
|
loop: entry('labs', 'Dynamic Loop Runtime: plan/run/status/proof loop graphs.', 'dist/core/commands/loop-command.js', subcommand(() => import('../core/commands/loop-command.js'), 'loopCommand', 'dist/core/commands/loop-command.js', 'help')),
|
|
127
128
|
'qa-loop': entry('beta', 'Run QA loop missions', 'dist/core/commands/qa-loop-command.js', subcommand(() => import('../core/commands/qa-loop-command.js'), 'qaLoopCommand', 'dist/core/commands/qa-loop-command.js')),
|
|
128
129
|
research: entry('labs', 'Run research missions', 'dist/core/commands/research-command.js', subcommand(() => import('../core/commands/research-command.js'), 'researchCommand', 'dist/core/commands/research-command.js')),
|
|
@@ -25,6 +25,7 @@ import { evaluateGitWorktreeCapability } from '../git/git-worktree-capability.js
|
|
|
25
25
|
import { buildRuntimeProofSummary, renderRuntimeProofSummary } from '../agents/runtime-proof-summary.js';
|
|
26
26
|
import { writeCodex0138CapabilityArtifacts } from '../codex-control/codex-0138-capability.js';
|
|
27
27
|
import { writeCodex0139CapabilityArtifacts } from '../codex-control/codex-0139-capability.js';
|
|
28
|
+
import { writeFinalStopGate } from '../stop-gate/stop-gate-writer.js';
|
|
28
29
|
const NARUTO_RESULT_SCHEMA = 'sks.naruto-command-result.v1';
|
|
29
30
|
const NARUTO_ROUTE = '$Naruto';
|
|
30
31
|
// $Naruto — Shadow Clone Swarm (影分身 / Kage Bunshin no Jutsu).
|
|
@@ -34,6 +35,11 @@ const NARUTO_ROUTE = '$Naruto';
|
|
|
34
35
|
// writes). The standard 20-agent ceiling is lifted only for this route.
|
|
35
36
|
export async function narutoCommand(commandOrArgs = 'naruto', maybeArgs = []) {
|
|
36
37
|
const args = Array.isArray(commandOrArgs) ? commandOrArgs : maybeArgs;
|
|
38
|
+
// 4.0.9: `sks naruto --glm` delegates to GLM Naruto before legacy Naruto starts.
|
|
39
|
+
if (args.includes('--glm')) {
|
|
40
|
+
const { glmNarutoCommand } = await import('../providers/glm/naruto/glm-naruto-command.js');
|
|
41
|
+
return glmNarutoCommand(args.filter((arg) => arg !== '--glm'));
|
|
42
|
+
}
|
|
37
43
|
const parsed = parseNarutoArgs(args);
|
|
38
44
|
if (parsed.action === 'help')
|
|
39
45
|
return narutoHelp(parsed);
|
|
@@ -450,6 +456,25 @@ async function narutoRun(parsed) {
|
|
|
450
456
|
stop_gate: 'naruto-gate.json',
|
|
451
457
|
prompt: parsed.prompt
|
|
452
458
|
});
|
|
459
|
+
// 4.0.9: Write canonical stop-gate artifacts for hook resolution.
|
|
460
|
+
const narutoGatePassed = result.ok === true && nativeProofOk && finalAccepted && parallelRuntimeOk;
|
|
461
|
+
await writeFinalStopGate({
|
|
462
|
+
root,
|
|
463
|
+
missionId: mission.id,
|
|
464
|
+
route: 'Naruto',
|
|
465
|
+
routeCommand: '$Naruto',
|
|
466
|
+
status: summaryOk ? 'passed' : 'blocked',
|
|
467
|
+
terminal: summaryOk,
|
|
468
|
+
terminalState: summaryOk ? 'completed' : 'blocked',
|
|
469
|
+
evidence: {
|
|
470
|
+
build_passed: summaryOk,
|
|
471
|
+
tests_passed: summaryOk,
|
|
472
|
+
route_evidence_passed: nativeProofOk && finalAccepted,
|
|
473
|
+
native_session_split_evidence: nativeProofOk ? 'native_agent_proof' : null,
|
|
474
|
+
},
|
|
475
|
+
blockers: summaryOk ? [] : [...(result.proof?.blockers || []), ...(parallelRuntimeOk ? [] : ['naruto_parallel_runtime_proof_below_gate'])],
|
|
476
|
+
nativeGateFile: 'naruto-gate.json',
|
|
477
|
+
}).catch(() => null);
|
|
453
478
|
const summary = {
|
|
454
479
|
schema: NARUTO_RESULT_SCHEMA,
|
|
455
480
|
ok: summaryOk,
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { sksRoot } from '../fsx.js';
|
|
2
|
+
import { checkStopGate } from '../stop-gate/stop-gate-check.js';
|
|
3
|
+
export async function stopGateCommand(command, args) {
|
|
4
|
+
const subcommand = args[0] === 'check' ? 'check' : (args[0] || 'check');
|
|
5
|
+
const rest = subcommand === 'check' ? args.slice(1) : args;
|
|
6
|
+
const json = rest.includes('--json');
|
|
7
|
+
const route = readOption(rest, '--route');
|
|
8
|
+
const missionId = readOption(rest, '--mission');
|
|
9
|
+
const gatePath = readOption(rest, '--gate');
|
|
10
|
+
if (subcommand !== 'check') {
|
|
11
|
+
const result = {
|
|
12
|
+
schema: 'sks.stop-gate-command.v1',
|
|
13
|
+
ok: false,
|
|
14
|
+
action: 'continue',
|
|
15
|
+
error: `Unknown subcommand: ${subcommand}. Available: check`,
|
|
16
|
+
};
|
|
17
|
+
if (json)
|
|
18
|
+
console.log(JSON.stringify(result, null, 2));
|
|
19
|
+
else
|
|
20
|
+
console.error(`Unknown stop-gate subcommand: ${subcommand}. Use: sks stop-gate check --route Naruto --json`);
|
|
21
|
+
return result;
|
|
22
|
+
}
|
|
23
|
+
const root = await sksRoot();
|
|
24
|
+
const result = await checkStopGate({
|
|
25
|
+
root,
|
|
26
|
+
...(route ? { route } : {}),
|
|
27
|
+
...(missionId ? { missionId } : {}),
|
|
28
|
+
...(gatePath ? { explicitGatePath: gatePath } : {}),
|
|
29
|
+
});
|
|
30
|
+
if (json) {
|
|
31
|
+
console.log(JSON.stringify(result, null, 2));
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
if (result.action === 'allow_stop') {
|
|
35
|
+
console.log(`stop-gate: allow_stop — gate passed at ${result.gate_path}`);
|
|
36
|
+
}
|
|
37
|
+
else if (result.action === 'hard_blocked') {
|
|
38
|
+
console.log(`stop-gate: hard_blocked — ${result.feedback}`);
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
console.error(`stop-gate: continue — ${result.feedback}`);
|
|
42
|
+
}
|
|
43
|
+
if (result.diagnostics.checked_paths.length > 0) {
|
|
44
|
+
console.log('Checked paths:');
|
|
45
|
+
for (const p of result.diagnostics.checked_paths)
|
|
46
|
+
console.log(` ${p}`);
|
|
47
|
+
}
|
|
48
|
+
if (result.diagnostics.selected_gate_path) {
|
|
49
|
+
console.log(`Selected gate: ${result.diagnostics.selected_gate_path}`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (result.action === 'continue')
|
|
53
|
+
process.exitCode = 1;
|
|
54
|
+
return result;
|
|
55
|
+
}
|
|
56
|
+
function readOption(args, name) {
|
|
57
|
+
const index = args.indexOf(name);
|
|
58
|
+
if (index >= 0 && args[index + 1] && !String(args[index + 1]).startsWith('--'))
|
|
59
|
+
return args[index + 1];
|
|
60
|
+
const prefixed = args.find((arg) => String(arg).startsWith(name + '='));
|
|
61
|
+
return prefixed ? prefixed.slice(name.length + 1) : undefined;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=stop-gate-command.js.map
|
package/dist/core/fsx.js
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
import { fileURLToPath } from 'node:url';
|
|
8
|
-
export const PACKAGE_VERSION = '4.0.
|
|
8
|
+
export const PACKAGE_VERSION = '4.0.10';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
export function nowIso() {
|
|
@@ -17,6 +17,7 @@ import { readAgentGateStatus } from '../agents/agent-gate.js';
|
|
|
17
17
|
import { MISTAKE_RECALL_ARTIFACT, mistakeRecallGateStatus } from '../mistake-recall.js';
|
|
18
18
|
import { SSOT_GUARD_ARTIFACT, validateSsotGuardArtifact } from '../safety/ssot-guard.js';
|
|
19
19
|
import { validateTeamRuntimeArtifacts } from '../team-dag.js';
|
|
20
|
+
import { checkStopGate } from '../stop-gate/stop-gate-check.js';
|
|
20
21
|
import { clarificationStopReason, context7Evidence, hasContext7DocsEvidence, hasSubagentEvidence, subagentEvidence, } from './runtime-core.js';
|
|
21
22
|
const REFLECTION_ARTIFACT = 'reflection.md';
|
|
22
23
|
const REFLECTION_GATE = 'reflection-gate.json';
|
|
@@ -233,10 +234,36 @@ export async function evaluateStop(root, state, payload, opts = {}) {
|
|
|
233
234
|
return complianceBlock(root, state, `SKS no-question run is not done. Continue autonomously, fix failing checks, update ${gate.file || 'the active gate file'}, and do not ask the user.${missing}`, { gate: gate.file || 'active-gate', missing: gate.missing });
|
|
234
235
|
}
|
|
235
236
|
if (state?.mission_id && state?.stop_gate && !['none', 'honest_mode', 'clarification-gate'].includes(state.stop_gate)) {
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
237
|
+
// 4.0.9: Use canonical stop-gate resolver first for NARUTO/GLM_NARUTO routes.
|
|
238
|
+
const modeUpper = String(state?.mode || '').toUpperCase();
|
|
239
|
+
const routeUpper = String(state?.route || state?.route_command || '').replace(/^\$/, '').toUpperCase();
|
|
240
|
+
const narutoFamily = modeUpper === 'NARUTO' || routeUpper === 'NARUTO' || routeUpper === 'GLM_NARUTO';
|
|
241
|
+
if (narutoFamily || state.stop_gate === 'stop-gate.json' || state.stop_gate === 'naruto-gate.json') {
|
|
242
|
+
const stopCheck = await checkStopGate({
|
|
243
|
+
root,
|
|
244
|
+
route: state.route || state.mode,
|
|
245
|
+
missionId: state.mission_id,
|
|
246
|
+
explicitGatePath: typeof state.stop_gate_abs_path === 'string' && state.stop_gate_abs_path ? state.stop_gate_abs_path : undefined,
|
|
247
|
+
});
|
|
248
|
+
if (stopCheck.action === 'allow_stop') {
|
|
249
|
+
if (narutoFamily)
|
|
250
|
+
return { continue: true, systemMessage: `SKS: canonical stop-gate passed at ${stopCheck.gate_path}` };
|
|
251
|
+
}
|
|
252
|
+
else if (stopCheck.action === 'hard_blocked') {
|
|
253
|
+
return { continue: true, systemMessage: `SKS: ${stopCheck.feedback}` };
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
const missing = stopCheck.diagnostics.missing_fields?.length ? ` Missing gate fields: ${stopCheck.diagnostics.missing_fields.join(', ')}.` : '';
|
|
257
|
+
const checkedPaths = stopCheck.diagnostics.checked_paths?.length ? ` Checked: ${stopCheck.diagnostics.checked_paths.join(', ')}.` : '';
|
|
258
|
+
return complianceBlock(root, state, `SKS ${state.route_command || state.mode} route cannot stop yet. Pass ${stopCheck.gate_path || state.stop_gate} or record a hard blocker with evidence before finishing.${missing}${checkedPaths}`, { gate: stopCheck.gate_path || state.stop_gate, missing: stopCheck.diagnostics.missing_fields });
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
else {
|
|
262
|
+
const gate = await passedActiveGate(root, state);
|
|
263
|
+
if (!gate.ok) {
|
|
264
|
+
const missing = gate.missing?.length ? ` Missing gate fields: ${gate.missing.join(', ')}.` : '';
|
|
265
|
+
return complianceBlock(root, state, `SKS ${state.route_command || state.mode} route cannot stop yet. Pass ${gate.file || state.stop_gate} or record a hard blocker with evidence before finishing.${missing}`, { gate: gate.file || state.stop_gate, missing: gate.missing });
|
|
266
|
+
}
|
|
240
267
|
}
|
|
241
268
|
}
|
|
242
269
|
const proofGate = await routeProofGateStatus(root, state);
|
|
@@ -14,7 +14,7 @@ export async function runGlmBench(root, args = []) {
|
|
|
14
14
|
if (execute && !live) {
|
|
15
15
|
const blocked = {
|
|
16
16
|
schema: 'sks.glm-bench-result.v1',
|
|
17
|
-
version: '4.0.
|
|
17
|
+
version: '4.0.9',
|
|
18
18
|
generated_at: nowIso(),
|
|
19
19
|
status: 'blocked',
|
|
20
20
|
dry_run: true,
|
|
@@ -32,7 +32,7 @@ export async function runGlmBench(root, args = []) {
|
|
|
32
32
|
if (live) {
|
|
33
33
|
const blocked = {
|
|
34
34
|
schema: 'sks.glm-bench-result.v1',
|
|
35
|
-
version: '4.0.
|
|
35
|
+
version: '4.0.9',
|
|
36
36
|
generated_at: nowIso(),
|
|
37
37
|
status: 'blocked',
|
|
38
38
|
dry_run: false,
|
|
@@ -50,7 +50,7 @@ export async function runGlmBench(root, args = []) {
|
|
|
50
50
|
if (execute) {
|
|
51
51
|
const blocked = {
|
|
52
52
|
schema: 'sks.glm-bench-result.v1',
|
|
53
|
-
version: '4.0.
|
|
53
|
+
version: '4.0.9',
|
|
54
54
|
generated_at: nowIso(),
|
|
55
55
|
status: 'blocked',
|
|
56
56
|
dry_run: true,
|
|
@@ -69,7 +69,7 @@ export async function runGlmBench(root, args = []) {
|
|
|
69
69
|
const deepTotals = SYNTHETIC_CASES.map((row) => row.deep.total_ms);
|
|
70
70
|
const result = {
|
|
71
71
|
schema: 'sks.glm-bench-result.v1',
|
|
72
|
-
version: '4.0.
|
|
72
|
+
version: '4.0.9',
|
|
73
73
|
generated_at: nowIso(),
|
|
74
74
|
status: 'dry_run',
|
|
75
75
|
dry_run: true,
|
|
@@ -60,7 +60,7 @@ export async function runGlmDirectSpeedRun(input) {
|
|
|
60
60
|
return result(reason === 'glm_request_timeout' ? 'timeout' : 'failed', controller.state().run_id, input.task, termination.reason, artifactDir, [], [response.error.code], []);
|
|
61
61
|
}
|
|
62
62
|
controller.transition('model_guard');
|
|
63
|
-
const modelGuard = assertGlm52ActualModel(response.value.model
|
|
63
|
+
const modelGuard = assertGlm52ActualModel(response.value.model);
|
|
64
64
|
if (!modelGuard.ok) {
|
|
65
65
|
const termination = controller.terminate('blocked', 'glm_model_mismatch', [modelGuard.code]);
|
|
66
66
|
const artifactDir = await writeGlmRunArtifacts({ cwd: input.cwd, state: controller.state(), termination, contextOmissions: context.omitted });
|
|
@@ -3,8 +3,10 @@ import { SksLruCache } from '../../perf/lru-cache.js';
|
|
|
3
3
|
export function createGlmEncodedRequestCache(maxEntries = 128) {
|
|
4
4
|
return new SksLruCache(maxEntries);
|
|
5
5
|
}
|
|
6
|
-
export function encodeGlmRequestWithCache(
|
|
7
|
-
const
|
|
6
|
+
export function encodeGlmRequestWithCache(input, cache = defaultEncodedRequestCache) {
|
|
7
|
+
const request = 'request' in input ? input.request : input;
|
|
8
|
+
const stringify = 'request' in input && input.stringify ? input.stringify : JSON.stringify;
|
|
9
|
+
const key = 'request' in input && input.cacheKeyParts ? digestRequestCacheKeyParts(input.cacheKeyParts) : digestRequestForCache(request);
|
|
8
10
|
const hit = cache.get(key);
|
|
9
11
|
// Fix 18.2: On cache hit, return stored body without JSON.stringify
|
|
10
12
|
if (hit) {
|
|
@@ -12,11 +14,11 @@ export function encodeGlmRequestWithCache(request, cache = defaultEncodedRequest
|
|
|
12
14
|
return { body: hit.body, entry: hit, cacheHit: true };
|
|
13
15
|
}
|
|
14
16
|
// Even for non-stored bodies, skip re-stringifying by computing from request
|
|
15
|
-
const body =
|
|
17
|
+
const body = stringify(request);
|
|
16
18
|
return { body, entry: hit, cacheHit: true };
|
|
17
19
|
}
|
|
18
20
|
// Cache miss: stringify once
|
|
19
|
-
const body =
|
|
21
|
+
const body = stringify(request);
|
|
20
22
|
if (containsSecretLikeContent(body)) {
|
|
21
23
|
const entry = {
|
|
22
24
|
key,
|
|
@@ -40,6 +42,9 @@ export function encodeGlmRequestWithCache(request, cache = defaultEncodedRequest
|
|
|
40
42
|
cache.set(key, entry);
|
|
41
43
|
return { body, entry, cacheHit: false };
|
|
42
44
|
}
|
|
45
|
+
export function digestRequestCacheKeyParts(parts) {
|
|
46
|
+
return crypto.createHash('sha256').update(stableStringify(parts)).digest('hex');
|
|
47
|
+
}
|
|
43
48
|
export function digestRequestForCache(request) {
|
|
44
49
|
const safe = {
|
|
45
50
|
model: request.model,
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
import { nowIso
|
|
1
|
+
import { nowIso } from '../../../fsx.js';
|
|
2
2
|
import path from 'node:path';
|
|
3
|
+
import fsp from 'node:fs/promises';
|
|
4
|
+
import os from 'node:os';
|
|
3
5
|
import { GLM_52_OPENROUTER_MODEL } from '../glm-52-settings.js';
|
|
6
|
+
import { resolveOpenRouterApiKey } from '../../openrouter/openrouter-secret-store.js';
|
|
7
|
+
import { runGlmNarutoMission } from './glm-naruto-orchestrator.js';
|
|
4
8
|
export async function runGlmNarutoBench(root, args = []) {
|
|
5
9
|
const live = args.includes('--live');
|
|
6
10
|
const execute = args.includes('--execute');
|
|
@@ -11,7 +15,7 @@ export async function runGlmNarutoBench(root, args = []) {
|
|
|
11
15
|
if (!live) {
|
|
12
16
|
return {
|
|
13
17
|
schema: 'sks.glm-naruto-bench.v1',
|
|
14
|
-
version: '4.0.
|
|
18
|
+
version: '4.0.10',
|
|
15
19
|
generated_at: nowIso(),
|
|
16
20
|
status: 'dry_run',
|
|
17
21
|
model: GLM_52_OPENROUTER_MODEL,
|
|
@@ -27,29 +31,60 @@ export async function runGlmNarutoBench(root, args = []) {
|
|
|
27
31
|
warnings: ['dry_run_no_live_api_calls']
|
|
28
32
|
};
|
|
29
33
|
}
|
|
30
|
-
|
|
34
|
+
const key = await resolveOpenRouterApiKey({ env: process.env });
|
|
35
|
+
if (!key.key)
|
|
36
|
+
return blocked(root, ['live_bench_requires_openrouter_key']);
|
|
37
|
+
const fixture = await fsp.mkdtemp(path.join(os.tmpdir(), 'sks-glm-naruto-live-bench-'));
|
|
38
|
+
await fsp.mkdir(path.join(fixture, 'src'), { recursive: true });
|
|
39
|
+
await fsp.writeFile(path.join(fixture, 'src', 'bench-target.ts'), 'export const value = 1;\n', 'utf8');
|
|
40
|
+
const cases = [];
|
|
41
|
+
for (const workers of [1, 4, 8, 12]) {
|
|
42
|
+
const caseStarted = Date.now();
|
|
43
|
+
const result = await runGlmNarutoMission({
|
|
44
|
+
cwd: fixture,
|
|
45
|
+
task: 'Change src/bench-target.ts so value is 2. Return the smallest patch only.',
|
|
46
|
+
args: ['--bench', '--live', '--no-apply'],
|
|
47
|
+
missionId: `glm-naruto-live-bench-${workers}-${Date.now()}`,
|
|
48
|
+
maxWorkers: workers,
|
|
49
|
+
noApply: true
|
|
50
|
+
});
|
|
51
|
+
cases.push({
|
|
52
|
+
name: workers === 1 ? 'direct single GLM' : `GLM Naruto ${workers} workers`,
|
|
53
|
+
workers,
|
|
54
|
+
wall_clock_ms: Date.now() - caseStarted,
|
|
55
|
+
p50_ttft_ms: null,
|
|
56
|
+
p90_ttft_ms: null,
|
|
57
|
+
candidate_count: result.patch_candidates,
|
|
58
|
+
gate_pass_rate: result.patch_candidates ? result.gate_passed_candidates / result.patch_candidates : 0,
|
|
59
|
+
verifier_pass_rate: 0,
|
|
60
|
+
merge_success: result.mergeable_candidates > 0,
|
|
61
|
+
cached_tokens: 0,
|
|
62
|
+
cache_write_tokens: 0
|
|
63
|
+
});
|
|
64
|
+
}
|
|
31
65
|
return {
|
|
32
66
|
schema: 'sks.glm-naruto-bench.v1',
|
|
33
|
-
version: '4.0.
|
|
67
|
+
version: '4.0.10',
|
|
34
68
|
generated_at: nowIso(),
|
|
35
|
-
status: '
|
|
69
|
+
status: 'live',
|
|
36
70
|
model: GLM_52_OPENROUTER_MODEL,
|
|
37
71
|
gpt_fallback_allowed: false,
|
|
72
|
+
cases,
|
|
38
73
|
summary: {
|
|
39
|
-
simulated_workers:
|
|
40
|
-
simulated_waves:
|
|
41
|
-
simulated_patch_candidates: 0,
|
|
42
|
-
simulated_gate_passed: 0,
|
|
43
|
-
simulated_mergeable:
|
|
74
|
+
simulated_workers: Math.max(...cases.map((row) => row.workers)),
|
|
75
|
+
simulated_waves: cases.length,
|
|
76
|
+
simulated_patch_candidates: cases.reduce((sum, row) => sum + row.candidate_count, 0),
|
|
77
|
+
simulated_gate_passed: cases.reduce((sum, row) => sum + Math.round(row.candidate_count * row.gate_pass_rate), 0),
|
|
78
|
+
simulated_mergeable: cases.filter((row) => row.merge_success).length,
|
|
44
79
|
wall_clock_ms: Date.now() - started
|
|
45
80
|
},
|
|
46
|
-
warnings: ['
|
|
81
|
+
warnings: ['live_bench_no_apply_temp_repo']
|
|
47
82
|
};
|
|
48
83
|
}
|
|
49
84
|
function blocked(root, warnings) {
|
|
50
85
|
return {
|
|
51
86
|
schema: 'sks.glm-naruto-bench.v1',
|
|
52
|
-
version: '4.0.
|
|
87
|
+
version: '4.0.10',
|
|
53
88
|
generated_at: nowIso(),
|
|
54
89
|
status: 'blocked',
|
|
55
90
|
model: GLM_52_OPENROUTER_MODEL,
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { checkAndApplyGlmPatch } from '../glm-patch-apply.js';
|
|
2
|
+
export function combineGlmNarutoPatches(envelopes, selectedPatchIds) {
|
|
3
|
+
const selected = selectedPatchIds
|
|
4
|
+
.map((patchId) => envelopes.find((env) => env.worker_id === patchId || env.patch_sha256 === patchId))
|
|
5
|
+
.filter((env) => Boolean(env))
|
|
6
|
+
.sort((a, b) => a.worker_id.localeCompare(b.worker_id));
|
|
7
|
+
return mergeDiffSections(selected.flatMap((env) => splitDiffSections(env.patch)));
|
|
8
|
+
}
|
|
9
|
+
export async function checkAndApplyCombinedGlmNarutoPatch(input) {
|
|
10
|
+
const patch = combineGlmNarutoPatches(input.envelopes, input.selectedPatchIds);
|
|
11
|
+
if (!patch.trim())
|
|
12
|
+
return { ok: false, patch, applied: [], blocker: 'combined_patch_empty' };
|
|
13
|
+
const checked = await checkAndApplyGlmPatch({ cwd: input.cwd, patch, apply: input.apply });
|
|
14
|
+
if (!checked.ok)
|
|
15
|
+
return { ok: false, patch, applied: [], blocker: checked.error.code };
|
|
16
|
+
return { ok: true, patch, applied: input.selectedPatchIds };
|
|
17
|
+
}
|
|
18
|
+
function mergeDiffSections(sections) {
|
|
19
|
+
const byFile = new Map();
|
|
20
|
+
for (const section of sections)
|
|
21
|
+
byFile.set(section.file, [...(byFile.get(section.file) || []), section]);
|
|
22
|
+
const merged = [];
|
|
23
|
+
for (const [file] of byFile) {
|
|
24
|
+
const group = byFile.get(file);
|
|
25
|
+
const first = group[0];
|
|
26
|
+
merged.push([...first.header, ...group.flatMap((section) => section.hunks)].join('\n').trimEnd());
|
|
27
|
+
}
|
|
28
|
+
return merged.filter(Boolean).join('\n\n') + (merged.length ? '\n' : '');
|
|
29
|
+
}
|
|
30
|
+
function splitDiffSections(patch) {
|
|
31
|
+
const rawSections = patch
|
|
32
|
+
.split(/(?=^diff --git )/m)
|
|
33
|
+
.map((section) => section.trim())
|
|
34
|
+
.filter(Boolean);
|
|
35
|
+
return rawSections.map((section) => {
|
|
36
|
+
const lines = section.split(/\r?\n/);
|
|
37
|
+
const diff = lines[0]?.match(/^diff --git a\/(.+?) b\/(.+)$/);
|
|
38
|
+
const file = diff?.[2] || diff?.[1] || lines[0] || 'unknown';
|
|
39
|
+
const firstHunk = lines.findIndex((line) => line.startsWith('@@ '));
|
|
40
|
+
if (firstHunk < 0)
|
|
41
|
+
return { file, header: lines, hunks: [] };
|
|
42
|
+
return {
|
|
43
|
+
file,
|
|
44
|
+
header: lines.slice(0, firstHunk),
|
|
45
|
+
hunks: lines.slice(firstHunk)
|
|
46
|
+
};
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=glm-naruto-combined-patch.js.map
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { envelopesHaveHunkConflict } from './glm-naruto-hunk-conflict.js';
|
|
1
2
|
export function buildConflictGraph(envelopes, nodes) {
|
|
2
3
|
const edges = [];
|
|
3
4
|
for (let i = 0; i < nodes.length; i++) {
|
|
@@ -23,11 +24,16 @@ function detectConflict(left, right, envelopes) {
|
|
|
23
24
|
if (leftEnv && rightEnv && leftEnv.base_digest !== rightEnv.base_digest) {
|
|
24
25
|
return { left_patch_id: left.patch_id, right_patch_id: right.patch_id, reason: 'base_digest_mismatch' };
|
|
25
26
|
}
|
|
27
|
+
if (left.shard_id === right.shard_id) {
|
|
28
|
+
return { left_patch_id: left.patch_id, right_patch_id: right.patch_id, reason: 'same_hunk' };
|
|
29
|
+
}
|
|
26
30
|
const leftPaths = new Set(left.target_paths);
|
|
27
31
|
const rightPaths = new Set(right.target_paths);
|
|
28
32
|
const sharedFiles = [...leftPaths].filter((p) => rightPaths.has(p));
|
|
29
33
|
if (sharedFiles.length > 0) {
|
|
30
|
-
if (
|
|
34
|
+
if (leftEnv && rightEnv) {
|
|
35
|
+
if (!envelopesHaveHunkConflict(leftEnv, rightEnv))
|
|
36
|
+
return null;
|
|
31
37
|
return { left_patch_id: left.patch_id, right_patch_id: right.patch_id, reason: 'same_hunk' };
|
|
32
38
|
}
|
|
33
39
|
return { left_patch_id: left.patch_id, right_patch_id: right.patch_id, reason: 'same_file' };
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { hunksOverlap, parseUnifiedDiffHunks } from './glm-naruto-hunk-parser.js';
|
|
2
|
+
export function envelopesHaveHunkConflict(left, right) {
|
|
3
|
+
const leftHunks = parseUnifiedDiffHunks(left.patch);
|
|
4
|
+
const rightHunks = parseUnifiedDiffHunks(right.patch);
|
|
5
|
+
if (leftHunks.length === 0 || rightHunks.length === 0)
|
|
6
|
+
return sharesPath(left, right);
|
|
7
|
+
return leftHunks.some((leftHunk) => rightHunks.some((rightHunk) => hunksOverlap(leftHunk, rightHunk)));
|
|
8
|
+
}
|
|
9
|
+
export function envelopesShareFileButNotHunk(left, right) {
|
|
10
|
+
return sharesPath(left, right) && !envelopesHaveHunkConflict(left, right);
|
|
11
|
+
}
|
|
12
|
+
function sharesPath(left, right) {
|
|
13
|
+
const rightPaths = new Set(right.target_paths);
|
|
14
|
+
return left.target_paths.some((file) => rightPaths.has(file));
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=glm-naruto-hunk-conflict.js.map
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
export function parseUnifiedDiffHunks(patch) {
|
|
2
|
+
const hunks = [];
|
|
3
|
+
let currentFile = null;
|
|
4
|
+
for (const line of patch.split(/\r?\n/)) {
|
|
5
|
+
const diff = line.match(/^diff --git a\/(.+?) b\/(.+)$/);
|
|
6
|
+
if (diff?.[2]) {
|
|
7
|
+
currentFile = diff[2];
|
|
8
|
+
continue;
|
|
9
|
+
}
|
|
10
|
+
const file = line.match(/^\+\+\+ b\/(.+)$/);
|
|
11
|
+
if (file?.[1] && file[1] !== '/dev/null')
|
|
12
|
+
currentFile = file[1];
|
|
13
|
+
const hunk = line.match(/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)$/);
|
|
14
|
+
if (hunk && currentFile) {
|
|
15
|
+
hunks.push({
|
|
16
|
+
file: currentFile,
|
|
17
|
+
old_start: Number(hunk[1]),
|
|
18
|
+
old_lines: Number(hunk[2] || 1),
|
|
19
|
+
new_start: Number(hunk[3]),
|
|
20
|
+
new_lines: Number(hunk[4] || 1),
|
|
21
|
+
header: line
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return hunks;
|
|
26
|
+
}
|
|
27
|
+
export function hunksOverlap(left, right) {
|
|
28
|
+
if (left.file !== right.file)
|
|
29
|
+
return false;
|
|
30
|
+
return rangesOverlap(left.old_start, left.old_start + Math.max(1, left.old_lines) - 1, right.old_start, right.old_start + Math.max(1, right.old_lines) - 1)
|
|
31
|
+
|| rangesOverlap(left.new_start, left.new_start + Math.max(1, left.new_lines) - 1, right.new_start, right.new_start + Math.max(1, right.new_lines) - 1);
|
|
32
|
+
}
|
|
33
|
+
function rangesOverlap(aStart, aEnd, bStart, bEnd) {
|
|
34
|
+
return aStart <= bEnd && bStart <= aEnd;
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=glm-naruto-hunk-parser.js.map
|
|
@@ -48,7 +48,7 @@ export async function runGlmJudge(input) {
|
|
|
48
48
|
if (!response.ok) {
|
|
49
49
|
return fallbackJudgeResult(validEnvelopes, [`judge_request_failed:${response.error.code}`]);
|
|
50
50
|
}
|
|
51
|
-
const modelGuard = assertGlm52ActualModel(response.value.model
|
|
51
|
+
const modelGuard = assertGlm52ActualModel(response.value.model);
|
|
52
52
|
if (!modelGuard.ok) {
|
|
53
53
|
return fallbackJudgeResult(validEnvelopes, [`judge_model_guard:${modelGuard.code}`]);
|
|
54
54
|
}
|