kc-beta 0.7.5 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/package.json +3 -2
- package/src/agent/engine.js +390 -100
- package/src/agent/pipelines/_advance-hints.js +92 -0
- package/src/agent/pipelines/_milestone-derive.js +247 -13
- package/src/agent/pipelines/skill-authoring.js +30 -1
- package/src/agent/tools/agent-tool.js +2 -2
- package/src/agent/tools/consult-skill.js +15 -0
- package/src/agent/tools/dashboard-render.js +48 -1
- package/src/agent/tools/document-parse.js +31 -2
- package/src/agent/tools/phase-advance.js +17 -13
- package/src/agent/tools/release.js +250 -7
- package/src/agent/tools/sandbox-exec.js +65 -8
- package/src/agent/tools/worker-llm-call.js +95 -15
- package/src/agent/workspace.js +25 -4
- package/src/cli/components.js +4 -1
- package/src/cli/index.js +97 -1
- package/src/config.js +19 -2
- package/src/marathon/driver.js +217 -0
- package/src/marathon/prompts.js +93 -0
- package/template/.env.template +16 -0
- package/template/skills/en/bootstrap-workspace/SKILL.md +14 -0
- package/template/skills/en/quality-control/SKILL.md +9 -0
- package/template/skills/en/skill-authoring/SKILL.md +39 -0
- package/template/skills/en/skill-to-workflow/SKILL.md +53 -0
- package/template/skills/en/work-decomposition/SKILL.md +34 -0
- package/template/skills/phase_skills.yaml +5 -0
- package/template/skills/zh/bootstrap-workspace/SKILL.md +14 -0
- package/template/skills/zh/compliance-judgment/SKILL.md +37 -37
- package/template/skills/zh/document-chunking/SKILL.md +21 -14
- package/template/skills/zh/document-parsing/SKILL.md +65 -65
- package/template/skills/zh/entity-extraction/SKILL.md +68 -68
- package/template/skills/zh/quality-control/SKILL.md +9 -0
- package/template/skills/zh/skill-authoring/SKILL.md +39 -0
- package/template/skills/zh/skill-creator/SKILL.md +204 -200
- package/template/skills/zh/skill-to-workflow/SKILL.md +53 -0
- package/template/skills/zh/tree-processing/SKILL.md +67 -63
- package/template/skills/zh/work-decomposition/SKILL.md +34 -0
- package/template/workflows/common/llm_client.py +168 -0
- package/template/workflows/common/utils.py +132 -0
package/src/agent/engine.js
CHANGED
|
@@ -5,6 +5,9 @@ import {
|
|
|
5
5
|
deriveSkillAuthoringMilestones,
|
|
6
6
|
deriveSkillTestingMilestones,
|
|
7
7
|
} from "./pipelines/_milestone-derive.js";
|
|
8
|
+
import { getPrescriptiveHint } from "./pipelines/_advance-hints.js";
|
|
9
|
+
import { loadEnvFile } from "../config.js";
|
|
10
|
+
import { MarathonDriver } from "../marathon/driver.js";
|
|
8
11
|
import { ContextAssembler } from "./context.js";
|
|
9
12
|
import { ConversationHistory } from "./history.js";
|
|
10
13
|
import { findSafeSplitPoint } from "./message-utils.js";
|
|
@@ -166,6 +169,10 @@ export class AgentEngine {
|
|
|
166
169
|
{ gitAutoCommit: config.gitAutoCommit !== false },
|
|
167
170
|
);
|
|
168
171
|
|
|
172
|
+
// v0.8 P1-B: workspace .env overlay deferred until after eventLog
|
|
173
|
+
// init (see _overlayWorkspaceEnv call below). Workspace dir is
|
|
174
|
+
// known here, but the overlay's audit event needs eventLog.
|
|
175
|
+
|
|
169
176
|
// For sub-agents, persistence (history/events/state) lives under
|
|
170
177
|
// sub_agents/<scope>/ instead of the workspace root. Workspace files
|
|
171
178
|
// (rules/, rule_skills/, workflows/) stay shared.
|
|
@@ -203,6 +210,26 @@ export class AgentEngine {
|
|
|
203
210
|
// Event log (append-only JSONL, source of truth)
|
|
204
211
|
this.eventLog = new EventLog(this.workspace.cwd, { logDir });
|
|
205
212
|
|
|
213
|
+
// v0.8 P1-B: overlay workspace .env onto this.config. cli/index.js
|
|
214
|
+
// calls loadSettings() without a workspace path because the path
|
|
215
|
+
// isn't known until this constructor runs. Result: workspace .env's
|
|
216
|
+
// VLM_TIER1 / OCR_MODEL_TIER1 / TIER1..4 / LANGUAGE were silently
|
|
217
|
+
// ignored, with gc defaults (~/.kc_agent/config.json) winning.
|
|
218
|
+
// 资管 audit § 9.2 finding 7: user's OCR_MODEL_TIER1=zai-org/GLM-4.6V
|
|
219
|
+
// never reached document_parse; error messages quoted gc's
|
|
220
|
+
// Qwen3-VL-235B default. Overlay reads workspace .env, fills in
|
|
221
|
+
// fields where current config came from gc fallback (penv-set values
|
|
222
|
+
// still win because loadSettings applied them).
|
|
223
|
+
try { this._overlayWorkspaceEnv(); } catch { /* best-effort */ }
|
|
224
|
+
|
|
225
|
+
// v0.8.1 P8-A: inline marathon driver. v0.8.0's separate-process
|
|
226
|
+
// kc-marathon CLI + filesystem-watcher IPC died silently when the
|
|
227
|
+
// launching terminal closed (E2E #11 audit). Redesigned as an inline
|
|
228
|
+
// state machine activated via /marathon slash command. No filesystem
|
|
229
|
+
// marker, no inbox.jsonl. Driver instance set by enterMarathonMode(),
|
|
230
|
+
// cleared by exitMarathonMode(). Query via this.marathonDriver.
|
|
231
|
+
this.marathonDriver = null;
|
|
232
|
+
|
|
206
233
|
// Context windowing
|
|
207
234
|
this.contextWindow = new ContextWindow({
|
|
208
235
|
contextLimit: config.kcContextLimit || 200000,
|
|
@@ -216,8 +243,6 @@ export class AgentEngine {
|
|
|
216
243
|
// so they don't get a TaskManager.
|
|
217
244
|
this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
|
|
218
245
|
|
|
219
|
-
// Build all tool instances (but register phase-appropriate ones)
|
|
220
|
-
this._buildTools = this._createAllTools();
|
|
221
246
|
this._phaseSummaries = [];
|
|
222
247
|
|
|
223
248
|
// Pipeline system (meta-meta skills as code)
|
|
@@ -233,8 +258,15 @@ export class AgentEngine {
|
|
|
233
258
|
};
|
|
234
259
|
|
|
235
260
|
// Skill discovery (Claude Code pattern: index in context, full content on demand)
|
|
261
|
+
// v0.7.5 — must initialize BEFORE _createAllTools() because ConsultSkillTool
|
|
262
|
+
// takes this._skillLoader as a constructor arg. Was a v0.7.5 init-order bug:
|
|
263
|
+
// _createAllTools ran first, passed undefined skillLoader to ConsultSkillTool,
|
|
264
|
+
// calls to consult_skill threw "Cannot read properties of undefined".
|
|
236
265
|
this._skillLoader = new SkillLoader(config.language);
|
|
237
266
|
|
|
267
|
+
// Build all tool instances (but register phase-appropriate ones)
|
|
268
|
+
this._buildTools = this._createAllTools();
|
|
269
|
+
|
|
238
270
|
// v0.7.5 G-D1: populate <workspace>/skills/ with the initial phase's
|
|
239
271
|
// available skill set. Symlink with copy fallback. Re-populated on
|
|
240
272
|
// every phase advance/retreat (see _advancePhase).
|
|
@@ -247,20 +279,26 @@ export class AgentEngine {
|
|
|
247
279
|
});
|
|
248
280
|
} catch { /* best-effort; skills/ population is not a critical-path failure */ }
|
|
249
281
|
|
|
282
|
+
// v0.8.1 P10-A: auto-populate <workspace>/workflows/common/llm_client.py
|
|
283
|
+
// from the template. Idempotent (skips if file already exists). Covers
|
|
284
|
+
// the bench-corpus flow where `kc-beta init` was bypassed. v0.8.0
|
|
285
|
+
// shipped this shim as embedded source in skill-to-workflow teaching;
|
|
286
|
+
// E2E #11 audits found BOTH agents ignored the teaching and wrote
|
|
287
|
+
// their own (non-canonical) llm_client.py. Shipping it as a template
|
|
288
|
+
// file the agent finds via filesystem walk is more robust.
|
|
289
|
+
try { this._populateWorkspaceCommonShims(); } catch { /* best-effort */ }
|
|
290
|
+
|
|
250
291
|
// Register tools for initial phase
|
|
251
292
|
this.toolRegistry = new ToolRegistry();
|
|
252
293
|
this._registerToolsForPhase(this.currentPhase);
|
|
253
294
|
|
|
254
|
-
//
|
|
255
|
-
//
|
|
256
|
-
//
|
|
257
|
-
//
|
|
258
|
-
//
|
|
259
|
-
//
|
|
260
|
-
//
|
|
261
|
-
this._lastReady = Object.fromEntries(
|
|
262
|
-
Object.keys(this.pipelines).map((p) => [p, false]),
|
|
263
|
-
);
|
|
295
|
+
// v0.8 P1-D: removed `_lastReady` edge-trigger state. It was the
|
|
296
|
+
// bookkeeping for `_maybeAutoAdvance`, which v0.7.4 G0b decommissioned
|
|
297
|
+
// (all call sites removed because v0.7.3's mid-session auto-advance
|
|
298
|
+
// chain regression was caused by it). The method definition itself
|
|
299
|
+
// is also gone in P1-D. Phase advance is now 100% explicit: agent's
|
|
300
|
+
// `phase_advance` tool or user re-prompt. Resume + rollback paths
|
|
301
|
+
// that previously re-primed `_lastReady` are no-ops now.
|
|
264
302
|
|
|
265
303
|
// B0.1: Heap sampler. Parent engines only — sub-agents share a process
|
|
266
304
|
// with the parent and would double-log. Writes a single JSONL line
|
|
@@ -271,6 +309,111 @@ export class AgentEngine {
|
|
|
271
309
|
this._heapSamplerStop = this._isSubagent ? null : this._startHeapSampler();
|
|
272
310
|
}
|
|
273
311
|
|
|
312
|
+
/**
|
|
313
|
+
* v0.8 P1-B: overlay workspace .env onto this.config now that
|
|
314
|
+
* this.workspace.cwd is known. Only fills in fields where the current
|
|
315
|
+
* config value was a gc fallback (empty OR the gc default) — does NOT
|
|
316
|
+
* override fields that came from process.env (those win at
|
|
317
|
+
* loadSettings() time and stay winning).
|
|
318
|
+
*
|
|
319
|
+
* Without this overlay, workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 /
|
|
320
|
+
* TIER1..4 / LANGUAGE are silently ignored — the v0.7.4 G1b OCR_MODEL_TIER1
|
|
321
|
+
* alias fix landed at the config layer but never reached the runtime
|
|
322
|
+
* because loadSettings() is called without a workspace path.
|
|
323
|
+
*/
|
|
324
|
+
/**
|
|
325
|
+
* v0.8.1 P10-A: copy canonical `workflows/common/*.py` shims from the
|
|
326
|
+
* bundled template if they're missing in the workspace. Provides
|
|
327
|
+
* `llm_client.py` (worker LLM HTTP shim, provider-agnostic) and
|
|
328
|
+
* `utils.py` (strip_annotations + helpers). Idempotent — never
|
|
329
|
+
* overwrites existing files (agent edits stay intact).
|
|
330
|
+
*
|
|
331
|
+
* Runs at engine init. Covers bench-corpus mode where `kc-beta init`
|
|
332
|
+
* doesn't run; init-flow workspaces already have these from copyDir.
|
|
333
|
+
*/
|
|
334
|
+
_populateWorkspaceCommonShims() {
|
|
335
|
+
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
|
336
|
+
const templateRoot = path.resolve(__dirname, "..", "..", "template", "workflows", "common");
|
|
337
|
+
if (!fs.existsSync(templateRoot)) return;
|
|
338
|
+
|
|
339
|
+
const targetRoot = path.join(this.workspace.cwd, "workflows", "common");
|
|
340
|
+
fs.mkdirSync(targetRoot, { recursive: true });
|
|
341
|
+
|
|
342
|
+
const copied = [];
|
|
343
|
+
const skipped = [];
|
|
344
|
+
for (const entry of fs.readdirSync(templateRoot)) {
|
|
345
|
+
if (!entry.endsWith(".py") || entry.startsWith(".")) continue;
|
|
346
|
+
const srcPath = path.join(templateRoot, entry);
|
|
347
|
+
const dstPath = path.join(targetRoot, entry);
|
|
348
|
+
if (fs.existsSync(dstPath)) {
|
|
349
|
+
skipped.push(entry);
|
|
350
|
+
continue;
|
|
351
|
+
}
|
|
352
|
+
try {
|
|
353
|
+
fs.copyFileSync(srcPath, dstPath);
|
|
354
|
+
copied.push(entry);
|
|
355
|
+
} catch { /* best-effort */ }
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (copied.length > 0) {
|
|
359
|
+
try {
|
|
360
|
+
this.eventLog?.append?.("workflows_common_populated", { copied, skipped });
|
|
361
|
+
} catch { /* best-effort */ }
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
_overlayWorkspaceEnv() {
|
|
366
|
+
if (!this.workspace?.cwd) return;
|
|
367
|
+
const envPath = path.join(this.workspace.cwd, ".env");
|
|
368
|
+
if (!fs.existsSync(envPath)) return;
|
|
369
|
+
let wsEnv;
|
|
370
|
+
try { wsEnv = loadEnvFile(envPath); } catch { return; }
|
|
371
|
+
if (!wsEnv || typeof wsEnv !== "object") return;
|
|
372
|
+
|
|
373
|
+
// VLM tiers — workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 wins over
|
|
374
|
+
// gc's vlm_tiers.tier1 default. process.env precedence preserved
|
|
375
|
+
// because loadSettings already applied it; we only fill in slots
|
|
376
|
+
// that fell through to gc-or-empty.
|
|
377
|
+
const overlays = [
|
|
378
|
+
{ configKey: "vlmTier1", envKey: ["VLM_TIER1", "OCR_MODEL_TIER1"] },
|
|
379
|
+
{ configKey: "vlmTier2", envKey: ["VLM_TIER2", "OCR_MODEL_TIER2"] },
|
|
380
|
+
{ configKey: "vlmTier3", envKey: ["VLM_TIER3", "OCR_MODEL_TIER3"] },
|
|
381
|
+
{ configKey: "tier1", envKey: ["TIER1"] },
|
|
382
|
+
{ configKey: "tier2", envKey: ["TIER2"] },
|
|
383
|
+
{ configKey: "tier3", envKey: ["TIER3"] },
|
|
384
|
+
{ configKey: "tier4", envKey: ["TIER4"] },
|
|
385
|
+
{ configKey: "language", envKey: ["LANGUAGE"] },
|
|
386
|
+
];
|
|
387
|
+
|
|
388
|
+
const applied = [];
|
|
389
|
+
for (const { configKey, envKey } of overlays) {
|
|
390
|
+
// Find first non-empty workspace .env value for this config key
|
|
391
|
+
let wsValue = "";
|
|
392
|
+
for (const k of envKey) {
|
|
393
|
+
if (wsEnv[k]) { wsValue = wsEnv[k]; break; }
|
|
394
|
+
}
|
|
395
|
+
if (!wsValue) continue;
|
|
396
|
+
// Skip if process.env has the same key set — penv already won
|
|
397
|
+
const penvWon = envKey.some((k) => process.env[k] && process.env[k] !== wsValue);
|
|
398
|
+
if (penvWon) continue;
|
|
399
|
+
// Apply the workspace value
|
|
400
|
+
if (this.config[configKey] !== wsValue) {
|
|
401
|
+
applied.push({ key: configKey, from: this.config[configKey] || "(empty)", to: wsValue });
|
|
402
|
+
this.config[configKey] = wsValue;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Audit visibility: emit a one-time event listing what was overlaid.
|
|
407
|
+
if (applied.length > 0) {
|
|
408
|
+
try {
|
|
409
|
+
this.eventLog?.append?.("workspace_env_overlay", {
|
|
410
|
+
envPath: path.relative(this.workspace.cwd, envPath),
|
|
411
|
+
fields: applied,
|
|
412
|
+
});
|
|
413
|
+
} catch { /* best-effort */ }
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
274
417
|
/**
|
|
275
418
|
* Start sampling process.memoryUsage() every 60 s into logs/heap.jsonl.
|
|
276
419
|
* Returns a stop fn. Timer is .unref()'d so it never keeps the process
|
|
@@ -280,11 +423,22 @@ export class AgentEngine {
|
|
|
280
423
|
_startHeapSampler() {
|
|
281
424
|
const logDir = path.join(this.workspace.cwd, "logs");
|
|
282
425
|
const logPath = path.join(logDir, "heap.jsonl");
|
|
426
|
+
let stopped = false;
|
|
427
|
+
let lastSampleAt = 0;
|
|
428
|
+
|
|
283
429
|
const sample = () => {
|
|
284
430
|
try {
|
|
285
431
|
const mem = process.memoryUsage();
|
|
432
|
+
const now = Date.now();
|
|
433
|
+
// v0.8 P1-C: track skipped intervals. If more than 90s elapsed
|
|
434
|
+
// since last sample on a 60s cadence, the previous tick was missed
|
|
435
|
+
// (event loop sleep, GC pause, etc.). Surface in the row so the
|
|
436
|
+
// post-mortem audit can detect gaps without needing to compare
|
|
437
|
+
// adjacent timestamps.
|
|
438
|
+
const skippedMs = lastSampleAt > 0 ? (now - lastSampleAt - 60_000) : 0;
|
|
439
|
+
lastSampleAt = now;
|
|
286
440
|
const row = {
|
|
287
|
-
t: new Date().toISOString(),
|
|
441
|
+
t: new Date(now).toISOString(),
|
|
288
442
|
seq: this.eventLog?.currentSeq ?? 0,
|
|
289
443
|
phase: this.currentPhase,
|
|
290
444
|
rssMB: Math.round(mem.rss / 1024 / 1024),
|
|
@@ -301,17 +455,36 @@ export class AgentEngine {
|
|
|
301
455
|
// and the row gets `componentsErr` instead.
|
|
302
456
|
components: this._sampleComponents(),
|
|
303
457
|
};
|
|
458
|
+
if (skippedMs > 0) row.skippedMs = skippedMs;
|
|
304
459
|
fs.mkdirSync(logDir, { recursive: true });
|
|
305
460
|
fs.appendFileSync(logPath, JSON.stringify(row) + "\n", "utf-8");
|
|
306
461
|
} catch { /* never fatal */ }
|
|
307
462
|
};
|
|
463
|
+
|
|
464
|
+
// v0.8 P1-C: self-rescheduling setTimeout instead of setInterval. The
|
|
465
|
+
// 资管 v0.7.5 session shows only 2 heap.jsonl entries (12:39:40 start
|
|
466
|
+
// + 12:40:40 first tick) across an 18-hour run — the unref'd
|
|
467
|
+
// setInterval was somehow dropped between event-loop idle phases.
|
|
468
|
+
// setTimeout reschedules from inside the sample callback, so the
|
|
469
|
+
// timer is re-registered every tick. unref'd so we don't block exit.
|
|
470
|
+
let timeoutHandle = null;
|
|
471
|
+
const scheduleNext = () => {
|
|
472
|
+
if (stopped) return;
|
|
473
|
+
timeoutHandle = setTimeout(() => {
|
|
474
|
+
sample();
|
|
475
|
+
scheduleNext();
|
|
476
|
+
}, 60_000);
|
|
477
|
+
timeoutHandle.unref?.();
|
|
478
|
+
};
|
|
479
|
+
|
|
308
480
|
// Record one sample at startup so we have a baseline even on short runs.
|
|
309
481
|
sample();
|
|
310
|
-
|
|
311
|
-
|
|
482
|
+
scheduleNext();
|
|
483
|
+
|
|
312
484
|
return () => {
|
|
313
485
|
try {
|
|
314
|
-
|
|
486
|
+
stopped = true;
|
|
487
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
315
488
|
sample(); // one final sample on shutdown
|
|
316
489
|
} catch { /* ignore */ }
|
|
317
490
|
};
|
|
@@ -428,7 +601,10 @@ export class AgentEngine {
|
|
|
428
601
|
return {
|
|
429
602
|
// Always available (BUILD + DISTILL)
|
|
430
603
|
core: [
|
|
431
|
-
new SandboxExecTool(this.workspace,
|
|
604
|
+
new SandboxExecTool(this.workspace, {
|
|
605
|
+
defaultTimeoutMs: this.config.kcExecDefaultTimeoutMs,
|
|
606
|
+
maxTimeoutMs: this.config.kcExecMaxTimeoutMs,
|
|
607
|
+
}),
|
|
432
608
|
new WorkspaceFileTool(this.workspace, this.versionManager),
|
|
433
609
|
new CopyToWorkspaceTool(this.workspace, {
|
|
434
610
|
largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
|
|
@@ -468,7 +644,12 @@ export class AgentEngine {
|
|
|
468
644
|
mineruApiKey: this.config.mineruApiKey,
|
|
469
645
|
llmApiKey: workerApiKey,
|
|
470
646
|
llmBaseUrl: workerBaseUrl,
|
|
647
|
+
// v0.8.1 P9-B: live-read vlmTier1 so workspace_env_overlay
|
|
648
|
+
// changes after tool construction (or mid-run .env edits)
|
|
649
|
+
// reach document_parse. The static `ocrModel` is the
|
|
650
|
+
// construction-time fallback; getOcrModel takes precedence.
|
|
471
651
|
ocrModel: vlmModel,
|
|
652
|
+
getOcrModel: () => this.config.vlmTier1 || vlmModel,
|
|
472
653
|
}),
|
|
473
654
|
new DocumentSearchTool(this.workspace),
|
|
474
655
|
// Group C — chunker/RAG infrastructure ported from AMC app. Core
|
|
@@ -968,16 +1149,9 @@ export class AgentEngine {
|
|
|
968
1149
|
}
|
|
969
1150
|
}
|
|
970
1151
|
|
|
971
|
-
//
|
|
972
|
-
//
|
|
973
|
-
//
|
|
974
|
-
for (const phase of Object.keys(engine.pipelines)) {
|
|
975
|
-
try {
|
|
976
|
-
engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
|
|
977
|
-
} catch {
|
|
978
|
-
engine._lastReady[phase] = false;
|
|
979
|
-
}
|
|
980
|
-
}
|
|
1152
|
+
// v0.8 P1-D: removed `_lastReady` re-prime. Was the bookkeeping for
|
|
1153
|
+
// `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase advance
|
|
1154
|
+
// is explicit now; nothing to re-prime on resume.
|
|
981
1155
|
|
|
982
1156
|
engine.eventLog.append("session_resume", {
|
|
983
1157
|
resumedPhase: engine.currentPhase,
|
|
@@ -1086,6 +1260,29 @@ export class AgentEngine {
|
|
|
1086
1260
|
// budget. Better to lose some history than crash with HTTP 400.
|
|
1087
1261
|
messages = this._enforceTokenBudget(messages);
|
|
1088
1262
|
|
|
1263
|
+
// v0.8 P3-A: skill usage counter — emit one skill_byte_send event
|
|
1264
|
+
// per always-loaded skill per LLM send. Captures the cost of having
|
|
1265
|
+
// a skill body inlined in the system prompt (Layer B per design doc).
|
|
1266
|
+
// Agent-blind: events go to events.jsonl only; never surfaced to the
|
|
1267
|
+
// agent's context. consult_skill tool results emit their own
|
|
1268
|
+
// skill_invoked events with via_tool="consult_skill" (already in
|
|
1269
|
+
// place since v0.7.5 G-C4), so we don't double-count those here.
|
|
1270
|
+
try {
|
|
1271
|
+
const { alwaysLoaded } = this._skillLoader.getPhaseSkillSet(this.currentPhase) || {};
|
|
1272
|
+
if (Array.isArray(alwaysLoaded)) {
|
|
1273
|
+
for (const skill of alwaysLoaded) {
|
|
1274
|
+
const body = this._skillLoader.loadSkillBody(skill);
|
|
1275
|
+
if (!body) continue;
|
|
1276
|
+
this.eventLog.append("skill_byte_send", {
|
|
1277
|
+
skill,
|
|
1278
|
+
via: "system_prompt_always_loaded",
|
|
1279
|
+
byte_count: body.length,
|
|
1280
|
+
phase: this.currentPhase,
|
|
1281
|
+
});
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
} catch { /* counter is best-effort; never break the turn */ }
|
|
1285
|
+
|
|
1089
1286
|
this.eventLog.append("llm_start", {
|
|
1090
1287
|
model: this.config.kcModel,
|
|
1091
1288
|
messageCount: messages.length,
|
|
@@ -1335,11 +1532,17 @@ export class AgentEngine {
|
|
|
1335
1532
|
// path-matching emission stays only as a fallback for any agent
|
|
1336
1533
|
// that reads a SKILL.md path directly (out of pattern).
|
|
1337
1534
|
try {
|
|
1535
|
+
// v0.8 P1-E: heredoc detection. `cat << 'EOF' > /tmp/skill.md`
|
|
1536
|
+
// matches the read-verb regex but is actually a WRITE — the
|
|
1537
|
+
// heredoc operator `<<` means cat is consuming inline content
|
|
1538
|
+
// (the heredoc body), not a file path. 资管 v0.7.5 audit § 5f
|
|
1539
|
+
// confirmed 1 spurious skill_invoked event of this kind.
|
|
1540
|
+
// Excluding any command with `<<` from the isRead classification.
|
|
1541
|
+
const cmd = String(inputData?.command || "");
|
|
1542
|
+
const isHeredoc = cmd.includes("<<");
|
|
1338
1543
|
const isRead =
|
|
1339
1544
|
(tc.name === "workspace_file" && inputData?.operation === "read") ||
|
|
1340
|
-
(tc.name === "sandbox_exec" && /\b(cat|head|tail|less|grep|view|read)\b/.test(
|
|
1341
|
-
String(inputData?.command || "")
|
|
1342
|
-
));
|
|
1545
|
+
(tc.name === "sandbox_exec" && !isHeredoc && /\b(cat|head|tail|less|grep|view|read)\b/.test(cmd));
|
|
1343
1546
|
if (
|
|
1344
1547
|
!result.isError &&
|
|
1345
1548
|
isRead &&
|
|
@@ -1349,9 +1552,13 @@ export class AgentEngine {
|
|
|
1349
1552
|
// v0.7.5 flat layout: skills/<name>/SKILL.md (workspace scope)
|
|
1350
1553
|
// OR template/skills/<lang>/<name>/SKILL.md (template scope, rare)
|
|
1351
1554
|
// Deep layout backward-compat preserved for any stragglers.
|
|
1555
|
+
// v0.8 P0-B: accept lowercase `skill.md` too — 资管 audit § 3.2
|
|
1556
|
+
// found agents writing lowercase consistently (14/14 rule_skills/).
|
|
1557
|
+
// Limited to exact uppercase OR exact lowercase (no mixed case)
|
|
1558
|
+
// to avoid spurious matches on unrelated files (e.g., `Skill.md`).
|
|
1352
1559
|
const skillMatch = p.match(
|
|
1353
|
-
/(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/SKILL\.md\b/
|
|
1354
|
-
) || p.match(/\
|
|
1560
|
+
/(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/(?:SKILL|skill)\.md\b/
|
|
1561
|
+
) || p.match(/\b(?:SKILL|skill)\.md\b/);
|
|
1355
1562
|
if (skillMatch) {
|
|
1356
1563
|
const skillName = skillMatch[1] || "(unknown)";
|
|
1357
1564
|
this.eventLog.append("skill_invoked", {
|
|
@@ -1452,10 +1659,12 @@ export class AgentEngine {
|
|
|
1452
1659
|
}
|
|
1453
1660
|
|
|
1454
1661
|
/**
|
|
1455
|
-
* Centralized phase transition (Bug 4).
|
|
1662
|
+
* Centralized phase transition (Bug 4). Two triggers route through here
|
|
1663
|
+
* after v0.7.4 G0b + v0.8 P1-D:
|
|
1456
1664
|
* (1) pipeline.onToolResult returning phase_ready
|
|
1457
|
-
* (2)
|
|
1458
|
-
* (3)
|
|
1665
|
+
* (2) explicit user request via the phase_advance tool
|
|
1666
|
+
* (The historical (3) post-turn auto-check via `_maybeAutoAdvance` was
|
|
1667
|
+
* removed; phase advance is 100% explicit.)
|
|
1459
1668
|
*
|
|
1460
1669
|
* Reachability: by default only forward-by-one transitions per NEXT_PHASE.
|
|
1461
1670
|
* Set `force: true` to allow non-adjacent or backward transitions (e.g. user
|
|
@@ -1533,9 +1742,17 @@ export class AgentEngine {
|
|
|
1533
1742
|
try { criteriaMet = !!fromPipeline?.exitCriteriaMet?.(); } catch { criteriaMet = true; }
|
|
1534
1743
|
if (!criteriaMet) {
|
|
1535
1744
|
const counts = this._buildEngineCountsBlock(this.currentPhase);
|
|
1745
|
+
// v0.8 P0-E: prescriptive hint in the event payload so post-mortem
|
|
1746
|
+
// audits see what the agent was told (matches what phase-advance.js
|
|
1747
|
+
// returns to the LLM).
|
|
1748
|
+
let prescriptive = null;
|
|
1749
|
+
try {
|
|
1750
|
+
prescriptive = getPrescriptiveHint(this.currentPhase, null, counts || "");
|
|
1751
|
+
} catch { /* hint generation is best-effort */ }
|
|
1536
1752
|
this.eventLog.append("phase_advance_refused", {
|
|
1537
1753
|
from: this.currentPhase, to: nextPhase, reason,
|
|
1538
1754
|
hint: "exit criteria not met by engine telemetry",
|
|
1755
|
+
prescriptive_hint: prescriptive,
|
|
1539
1756
|
engineCounts: counts || null,
|
|
1540
1757
|
});
|
|
1541
1758
|
return false;
|
|
@@ -1610,23 +1827,16 @@ export class AgentEngine {
|
|
|
1610
1827
|
});
|
|
1611
1828
|
}
|
|
1612
1829
|
|
|
1613
|
-
// v0.
|
|
1614
|
-
//
|
|
1615
|
-
//
|
|
1616
|
-
// this, the auto-advance edge trigger stays latched true and the
|
|
1617
|
-
// moment the agent returns to fromPhase the engine immediately
|
|
1618
|
-
// bounces them back out — defeating the rollback.
|
|
1619
|
-
if (direction === "rollback" && this._lastReady) {
|
|
1620
|
-
this._lastReady[fromPhase] = false;
|
|
1621
|
-
}
|
|
1830
|
+
// v0.8 P1-D: removed `_lastReady` rollback reset. Was the bookkeeping
|
|
1831
|
+
// for `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase
|
|
1832
|
+
// advance is explicit now; rollback just needs to commit the new phase.
|
|
1622
1833
|
|
|
1623
1834
|
this.saveState();
|
|
1624
1835
|
|
|
1625
1836
|
// B8: Soft signal — surface any sub-agents left running from the prior
|
|
1626
1837
|
// phase so the main agent's next turn can decide whether to kill them.
|
|
1627
|
-
// NOT automated:
|
|
1628
|
-
//
|
|
1629
|
-
// radius. This just informs.
|
|
1838
|
+
// NOT automated: auto-killing would couple lifecycle with blast radius.
|
|
1839
|
+
// This just informs.
|
|
1630
1840
|
try {
|
|
1631
1841
|
const agentTool = this._buildTools?.core?.find((t) => t?.name === "agent_tool");
|
|
1632
1842
|
const runningIds = agentTool?.getRunningTaskIds?.() || [];
|
|
@@ -1826,35 +2036,12 @@ export class AgentEngine {
|
|
|
1826
2036
|
return false;
|
|
1827
2037
|
}
|
|
1828
2038
|
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
_maybeAutoAdvance() {
|
|
1836
|
-
const phase = this.currentPhase;
|
|
1837
|
-
const pipeline = this.pipelines[phase];
|
|
1838
|
-
let nowReady = false;
|
|
1839
|
-
try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
|
|
1840
|
-
|
|
1841
|
-
if (!nowReady) {
|
|
1842
|
-
this._lastReady[phase] = false;
|
|
1843
|
-
return null;
|
|
1844
|
-
}
|
|
1845
|
-
// Edge-trigger: nowReady && !wasReady
|
|
1846
|
-
if (this._lastReady[phase]) return null;
|
|
1847
|
-
this._lastReady[phase] = true;
|
|
1848
|
-
|
|
1849
|
-
const next = NEXT_PHASE[phase];
|
|
1850
|
-
if (!next) return null;
|
|
1851
|
-
const advanced = this._advancePhase(next, "exit criteria flipped to met");
|
|
1852
|
-
if (!advanced) return null;
|
|
1853
|
-
return new AgentEvent({
|
|
1854
|
-
type: "pipeline_event",
|
|
1855
|
-
data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
|
|
1856
|
-
});
|
|
1857
|
-
}
|
|
2039
|
+
// v0.8 P1-D: `_maybeAutoAdvance()` deleted. The method auto-fired phase
|
|
2040
|
+
// advance on a false→true flip of `exitCriteriaMet()`, but v0.7.3
|
|
2041
|
+
// showed mid-session auto-advance chains were a regression hazard
|
|
2042
|
+
// (user couldn't review between phases). v0.7.4 G0b removed all call
|
|
2043
|
+
// sites; v0.8 P1-D removes the now-dead method definition + the
|
|
2044
|
+
// `_lastReady` bookkeeping it relied on. Phase advance is 100% explicit.
|
|
1858
2045
|
|
|
1859
2046
|
/**
|
|
1860
2047
|
* Tool-call offloading. If the tool's content exceeds the threshold,
|
|
@@ -2163,27 +2350,38 @@ export class AgentEngine {
|
|
|
2163
2350
|
/** B1: original serial ralph-loop path — one task at a time, shared
|
|
2164
2351
|
* conversation history. Unchanged from pre-v0.6.0 behavior. */
|
|
2165
2352
|
async *_runTaskLoopSerial(userMessage) {
|
|
2166
|
-
//
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
//
|
|
2170
|
-
//
|
|
2171
|
-
//
|
|
2172
|
-
//
|
|
2173
|
-
//
|
|
2174
|
-
//
|
|
2175
|
-
//
|
|
2176
|
-
// TODO: after the overnight E2E results come in (2026-05-14), decide:
|
|
2177
|
-
// (a) re-enable F5 strict and build marathon as a separate mode
|
|
2178
|
-
// (external driver pattern, e.g., /loop-kc command) — locked
|
|
2179
|
-
// earlier decision per harness-research § 7
|
|
2180
|
-
// (b) keep capture-AFTER permanently and accept multi-phase prompts
|
|
2353
|
+
// v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
|
|
2354
|
+
// - Interactive sessions (marathon NOT active): capture startingPhase
|
|
2355
|
+
// BEFORE the initial runTurn, and exit the loop on ANY phase change
|
|
2356
|
+
// (including within the initial runTurn). One user prompt = one
|
|
2357
|
+
// phase advance. Path (a) per design doc Q5 lean.
|
|
2358
|
+
// - Marathon sessions: the kc-marathon driver provides per-phase
|
|
2359
|
+
// prompts via .kc_marathon/inbox.jsonl, so the engine doesn't need
|
|
2360
|
+
// F5's checkpoint — phase chaining is OK because each chained phase
|
|
2361
|
+
// STILL gets its own driver-emitted prompt next tick.
|
|
2181
2362
|
//
|
|
2182
|
-
//
|
|
2183
|
-
//
|
|
2184
|
-
//
|
|
2185
|
-
//
|
|
2363
|
+
// v0.7.3 demonstrated why F5 matters interactively: auto-chained
|
|
2364
|
+
// phase advances skip the user check-in cycle and broke phase
|
|
2365
|
+
// control in team testing. v0.7.4 G0c first fixed it via
|
|
2366
|
+
// post-initial-runTurn exit; v0.7.5 added the strict capture-BEFORE
|
|
2367
|
+
// refinement; v0.8 P5-A preserves both with the marathon escape;
|
|
2368
|
+
// v0.8.1 P8-A switched marathon-active source from filesystem
|
|
2369
|
+
// marker to inline driver instance.
|
|
2370
|
+
const marathonActive = this.isMarathonActive();
|
|
2186
2371
|
const startingPhase = this.currentPhase;
|
|
2372
|
+
yield* this.runTurn(userMessage);
|
|
2373
|
+
|
|
2374
|
+
// F5 strict gate: if interactive AND phase changed during initial
|
|
2375
|
+
// runTurn, exit immediately (don't auto-continue tasks in the new
|
|
2376
|
+
// phase). Marathon bypasses — driver decides pacing.
|
|
2377
|
+
if (!marathonActive && this.currentPhase !== startingPhase) {
|
|
2378
|
+
this.eventLog.append("ralph_loop_exit", {
|
|
2379
|
+
reason: "f5_strict_initial_turn",
|
|
2380
|
+
from: startingPhase,
|
|
2381
|
+
to: this.currentPhase,
|
|
2382
|
+
});
|
|
2383
|
+
return;
|
|
2384
|
+
}
|
|
2187
2385
|
|
|
2188
2386
|
// Auto-continue through pending tasks (within current phase only)
|
|
2189
2387
|
while (this.taskManager.getNextPending()) {
|
|
@@ -2262,6 +2460,86 @@ export class AgentEngine {
|
|
|
2262
2460
|
break;
|
|
2263
2461
|
}
|
|
2264
2462
|
}
|
|
2463
|
+
|
|
2464
|
+
// v0.8.1 P8-A: marathon mode — inline driver. After the F5 phase-
|
|
2465
|
+
// boundary exit, if marathon is active, query the driver for the
|
|
2466
|
+
// next continuation prompt and run additional turns until the driver
|
|
2467
|
+
// signals stop (null return). State machine logic unchanged from
|
|
2468
|
+
// v0.8.0; the I/O wrapper just shifted from filesystem-watcher to
|
|
2469
|
+
// direct method calls.
|
|
2470
|
+
while (this.marathonDriver) {
|
|
2471
|
+
const turnsSnapshot = this.marathonDriver.turnsThisPhase;
|
|
2472
|
+
const phaseChanged = this.currentPhase !== this.marathonDriver.currentPhase;
|
|
2473
|
+
const milestones = this._buildEngineCountsBlock(this.currentPhase) || {};
|
|
2474
|
+
const decision = this.marathonDriver.decideNext({
|
|
2475
|
+
currentPhase: this.currentPhase,
|
|
2476
|
+
milestones,
|
|
2477
|
+
phaseChanged,
|
|
2478
|
+
errorSeen: false, // engine surfaces errors via tool_result.isError; not propagated here for v0.8.1 MVP
|
|
2479
|
+
turnsThisPhase: turnsSnapshot + 1,
|
|
2480
|
+
});
|
|
2481
|
+
if (!decision) {
|
|
2482
|
+
// Stop condition met — driver returned null
|
|
2483
|
+
this.eventLog.append("marathon_detach", {
|
|
2484
|
+
reason: this.marathonDriver.stopReason || "unknown",
|
|
2485
|
+
decisions: this.marathonDriver.decisionCount,
|
|
2486
|
+
});
|
|
2487
|
+
this.marathonDriver = null;
|
|
2488
|
+
break;
|
|
2489
|
+
}
|
|
2490
|
+
this.eventLog.append("marathon_decision", {
|
|
2491
|
+
template: decision.template,
|
|
2492
|
+
reason: decision.reason,
|
|
2493
|
+
phase: this.currentPhase,
|
|
2494
|
+
});
|
|
2495
|
+
yield* this.runTurn(decision.prompt);
|
|
2496
|
+
// Loop back: another turn just completed; driver gets another decideNext call.
|
|
2497
|
+
}
|
|
2498
|
+
}
|
|
2499
|
+
|
|
2500
|
+
/**
|
|
2501
|
+
* v0.8.1 P8-A: activate marathon mode with a goal-description.
|
|
2502
|
+
* Called from cli/index.js's /marathon slash command handler.
|
|
2503
|
+
* The engine's next runTaskLoop will use marathonDriver.getInitialPrompt()
|
|
2504
|
+
* as the kickoff user message.
|
|
2505
|
+
*
|
|
2506
|
+
* @param {string} goal — the marathon goal description (user-typed)
|
|
2507
|
+
* @param {object} [opts] — {maxWallclockMs?, stuckAfterMs?}
|
|
2508
|
+
* @returns {object} {goal, language, startedAt} for confirmation
|
|
2509
|
+
*/
|
|
2510
|
+
enterMarathonMode(goal, opts = {}) {
|
|
2511
|
+
if (this.marathonDriver) {
|
|
2512
|
+
throw new Error("Marathon already active — use /marathon off to disengage first");
|
|
2513
|
+
}
|
|
2514
|
+
this.marathonDriver = new MarathonDriver({
|
|
2515
|
+
goal,
|
|
2516
|
+
language: this.config.language || "en",
|
|
2517
|
+
maxWallclockMs: opts.maxWallclockMs,
|
|
2518
|
+
stuckAfterMs: opts.stuckAfterMs,
|
|
2519
|
+
});
|
|
2520
|
+
this.eventLog.append("marathon_attach", {
|
|
2521
|
+
goal: goal.slice(0, 200),
|
|
2522
|
+
language: this.config.language || "en",
|
|
2523
|
+
});
|
|
2524
|
+
return this.marathonDriver.getStatus();
|
|
2525
|
+
}
|
|
2526
|
+
|
|
2527
|
+
/** v0.8.1 P8-A: deactivate marathon mode. Returns final status snapshot. */
|
|
2528
|
+
exitMarathonMode(reason = "user_off") {
|
|
2529
|
+
if (!this.marathonDriver) return null;
|
|
2530
|
+
const status = this.marathonDriver.getStatus();
|
|
2531
|
+
this.marathonDriver.stop(reason);
|
|
2532
|
+
this.eventLog.append("marathon_detach", {
|
|
2533
|
+
reason,
|
|
2534
|
+
decisions: this.marathonDriver.decisionCount,
|
|
2535
|
+
});
|
|
2536
|
+
this.marathonDriver = null;
|
|
2537
|
+
return status;
|
|
2538
|
+
}
|
|
2539
|
+
|
|
2540
|
+
/** v0.8.1 P8-A: is marathon mode currently active? (for TUI status bar) */
|
|
2541
|
+
isMarathonActive() {
|
|
2542
|
+
return !!this.marathonDriver && !this.marathonDriver.stopped;
|
|
2265
2543
|
}
|
|
2266
2544
|
|
|
2267
2545
|
/**
|
|
@@ -2282,14 +2560,26 @@ export class AgentEngine {
|
|
|
2282
2560
|
* amortized against the 2-4× wall-clock speedup.
|
|
2283
2561
|
*/
|
|
2284
2562
|
async *_runTaskLoopParallel(userMessage, parallelism) {
|
|
2563
|
+
// v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
|
|
2564
|
+
// Mirror _runTaskLoopSerial — capture startingPhase BEFORE initial
|
|
2565
|
+
// runTurn so phase advance during the initial turn exits the loop
|
|
2566
|
+
// unless marathon is active.
|
|
2567
|
+
// v0.8.1 P8-A: marathon check now uses inline driver instance.
|
|
2568
|
+
const marathonActive = this.isMarathonActive();
|
|
2569
|
+
const startingPhase = this.currentPhase;
|
|
2570
|
+
|
|
2285
2571
|
// Initial turn: main agent reads user request, creates tasks.
|
|
2286
2572
|
yield* this.runTurn(userMessage);
|
|
2287
2573
|
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2574
|
+
if (!marathonActive && this.currentPhase !== startingPhase) {
|
|
2575
|
+
this.eventLog.append("ralph_loop_exit", {
|
|
2576
|
+
reason: "f5_strict_initial_turn",
|
|
2577
|
+
from: startingPhase,
|
|
2578
|
+
to: this.currentPhase,
|
|
2579
|
+
mode: "parallel",
|
|
2580
|
+
});
|
|
2581
|
+
return;
|
|
2582
|
+
}
|
|
2293
2583
|
|
|
2294
2584
|
const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
|
|
2295
2585
|
if (!agentTool) {
|