kc-beta 0.7.3 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -4
- package/bin/kc-beta.js +20 -6
- package/package.json +3 -2
- package/src/agent/engine.js +493 -132
- package/src/agent/pipelines/_advance-hints.js +92 -0
- package/src/agent/pipelines/_milestone-derive.js +387 -17
- package/src/agent/pipelines/initializer.js +4 -1
- package/src/agent/pipelines/skill-authoring.js +30 -1
- package/src/agent/skill-loader.js +433 -111
- package/src/agent/tools/agent-tool.js +2 -2
- package/src/agent/tools/consult-skill.js +127 -0
- package/src/agent/tools/copy-to-workspace.js +4 -3
- package/src/agent/tools/dashboard-render.js +48 -1
- package/src/agent/tools/document-parse.js +31 -2
- package/src/agent/tools/phase-advance.js +17 -13
- package/src/agent/tools/release.js +378 -8
- package/src/agent/tools/sandbox-exec.js +65 -8
- package/src/agent/tools/worker-llm-call.js +95 -15
- package/src/agent/tools/workspace-file.js +7 -7
- package/src/agent/workspace.js +25 -4
- package/src/cli/components.js +4 -1
- package/src/cli/index.js +97 -1
- package/src/config.js +20 -3
- package/src/marathon/driver.js +217 -0
- package/src/marathon/prompts.js +93 -0
- package/template/.env.template +16 -0
- package/template/AGENT.md +182 -7
- package/template/skills/en/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +15 -0
- package/template/skills/{zh/meta → en}/compliance-judgment/SKILL.md +1 -0
- package/template/skills/en/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
- package/template/skills/en/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
- package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
- package/template/skills/en/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
- package/template/skills/{zh/meta → en}/document-chunking/SKILL.md +1 -0
- package/template/skills/en/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
- package/template/skills/{zh/meta → en}/entity-extraction/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/quality-control → quality-control}/SKILL.md +10 -0
- package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +40 -0
- package/template/skills/en/skill-creator/SKILL.md +2 -1
- package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/SKILL.md +58 -4
- package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
- package/template/skills/en/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/version-control → version-control}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +51 -6
- package/template/skills/phase_skills.yaml +112 -0
- package/template/skills/zh/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +15 -0
- package/template/skills/zh/compliance-judgment/SKILL.md +83 -0
- package/template/skills/zh/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
- package/template/skills/zh/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
- package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
- package/template/skills/zh/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
- package/template/skills/zh/document-chunking/SKILL.md +40 -0
- package/template/skills/zh/document-parsing/SKILL.md +102 -0
- package/template/skills/zh/entity-extraction/SKILL.md +121 -0
- package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/quality-control → quality-control}/SKILL.md +10 -0
- package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +40 -0
- package/template/skills/zh/skill-creator/SKILL.md +205 -200
- package/template/skills/zh/skill-to-workflow/SKILL.md +243 -0
- package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
- package/template/skills/zh/tree-processing/SKILL.md +126 -0
- package/template/skills/zh/{meta-meta/version-control → version-control}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +49 -4
- package/template/workflows/common/llm_client.py +168 -0
- package/template/workflows/common/utils.py +132 -0
- package/template/CLAUDE.md +0 -150
- package/template/skills/en/meta/compliance-judgment/SKILL.md +0 -82
- package/template/skills/en/meta/document-chunking/SKILL.md +0 -32
- package/template/skills/en/meta/entity-extraction/SKILL.md +0 -120
- package/template/skills/zh/meta/document-parsing/SKILL.md +0 -101
- package/template/skills/zh/meta/tree-processing/SKILL.md +0 -121
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +0 -188
- /package/template/skills/en/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
- /package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
- /package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
- /package/template/skills/en/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
- /package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
- /package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
- /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
- /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
- /package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
- /package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
- /package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
- /package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
- /package/template/skills/en/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
- /package/template/skills/zh/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
- /package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
- /package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
- /package/template/skills/zh/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
- /package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
- /package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
- /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
- /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
- /package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
- /package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
- /package/template/skills/zh/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
- /package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
- /package/template/skills/zh/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
package/src/agent/engine.js
CHANGED
|
@@ -5,6 +5,9 @@ import {
|
|
|
5
5
|
deriveSkillAuthoringMilestones,
|
|
6
6
|
deriveSkillTestingMilestones,
|
|
7
7
|
} from "./pipelines/_milestone-derive.js";
|
|
8
|
+
import { getPrescriptiveHint } from "./pipelines/_advance-hints.js";
|
|
9
|
+
import { loadEnvFile } from "../config.js";
|
|
10
|
+
import { MarathonDriver } from "../marathon/driver.js";
|
|
8
11
|
import { ContextAssembler } from "./context.js";
|
|
9
12
|
import { ConversationHistory } from "./history.js";
|
|
10
13
|
import { findSafeSplitPoint } from "./message-utils.js";
|
|
@@ -38,6 +41,7 @@ import { TierDowngradeTool } from "./tools/tier-downgrade.js";
|
|
|
38
41
|
import { AgentTool } from "./tools/agent-tool.js";
|
|
39
42
|
import { WebSearchTool } from "./tools/web-search.js";
|
|
40
43
|
import { TaskCreateTool, TaskUpdateTool, TaskCompleteTool } from "./tools/task-board.js";
|
|
44
|
+
import { ConsultSkillTool } from "./tools/consult-skill.js";
|
|
41
45
|
import { SkillLoader } from "./skill-loader.js";
|
|
42
46
|
import { TaskManager } from "./task-manager.js";
|
|
43
47
|
import { Scheduler } from "./scheduler.js";
|
|
@@ -165,6 +169,10 @@ export class AgentEngine {
|
|
|
165
169
|
{ gitAutoCommit: config.gitAutoCommit !== false },
|
|
166
170
|
);
|
|
167
171
|
|
|
172
|
+
// v0.8 P1-B: workspace .env overlay deferred until after eventLog
|
|
173
|
+
// init (see _overlayWorkspaceEnv call below). Workspace dir is
|
|
174
|
+
// known here, but the overlay's audit event needs eventLog.
|
|
175
|
+
|
|
168
176
|
// For sub-agents, persistence (history/events/state) lives under
|
|
169
177
|
// sub_agents/<scope>/ instead of the workspace root. Workspace files
|
|
170
178
|
// (rules/, rule_skills/, workflows/) stay shared.
|
|
@@ -202,6 +210,26 @@ export class AgentEngine {
|
|
|
202
210
|
// Event log (append-only JSONL, source of truth)
|
|
203
211
|
this.eventLog = new EventLog(this.workspace.cwd, { logDir });
|
|
204
212
|
|
|
213
|
+
// v0.8 P1-B: overlay workspace .env onto this.config. cli/index.js
|
|
214
|
+
// calls loadSettings() without a workspace path because the path
|
|
215
|
+
// isn't known until this constructor runs. Result: workspace .env's
|
|
216
|
+
// VLM_TIER1 / OCR_MODEL_TIER1 / TIER1..4 / LANGUAGE were silently
|
|
217
|
+
// ignored, with gc defaults (~/.kc_agent/config.json) winning.
|
|
218
|
+
// 资管 audit § 9.2 finding 7: user's OCR_MODEL_TIER1=zai-org/GLM-4.6V
|
|
219
|
+
// never reached document_parse; error messages quoted gc's
|
|
220
|
+
// Qwen3-VL-235B default. Overlay reads workspace .env, fills in
|
|
221
|
+
// fields where current config came from gc fallback (penv-set values
|
|
222
|
+
// still win because loadSettings applied them).
|
|
223
|
+
try { this._overlayWorkspaceEnv(); } catch { /* best-effort */ }
|
|
224
|
+
|
|
225
|
+
// v0.8.1 P8-A: inline marathon driver. v0.8.0's separate-process
|
|
226
|
+
// kc-marathon CLI + filesystem-watcher IPC died silently when the
|
|
227
|
+
// launching terminal closed (E2E #11 audit). Redesigned as an inline
|
|
228
|
+
// state machine activated via /marathon slash command. No filesystem
|
|
229
|
+
// marker, no inbox.jsonl. Driver instance set by enterMarathonMode(),
|
|
230
|
+
// cleared by exitMarathonMode(). Query via this.marathonDriver.
|
|
231
|
+
this.marathonDriver = null;
|
|
232
|
+
|
|
205
233
|
// Context windowing
|
|
206
234
|
this.contextWindow = new ContextWindow({
|
|
207
235
|
contextLimit: config.kcContextLimit || 200000,
|
|
@@ -215,8 +243,6 @@ export class AgentEngine {
|
|
|
215
243
|
// so they don't get a TaskManager.
|
|
216
244
|
this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
|
|
217
245
|
|
|
218
|
-
// Build all tool instances (but register phase-appropriate ones)
|
|
219
|
-
this._buildTools = this._createAllTools();
|
|
220
246
|
this._phaseSummaries = [];
|
|
221
247
|
|
|
222
248
|
// Pipeline system (meta-meta skills as code)
|
|
@@ -232,22 +258,47 @@ export class AgentEngine {
|
|
|
232
258
|
};
|
|
233
259
|
|
|
234
260
|
// Skill discovery (Claude Code pattern: index in context, full content on demand)
|
|
261
|
+
// v0.7.5 — must initialize BEFORE _createAllTools() because ConsultSkillTool
|
|
262
|
+
// takes this._skillLoader as a constructor arg. Was a v0.7.5 init-order bug:
|
|
263
|
+
// _createAllTools ran first, passed undefined skillLoader to ConsultSkillTool,
|
|
264
|
+
// calls to consult_skill threw "Cannot read properties of undefined".
|
|
235
265
|
this._skillLoader = new SkillLoader(config.language);
|
|
236
266
|
|
|
267
|
+
// Build all tool instances (but register phase-appropriate ones)
|
|
268
|
+
this._buildTools = this._createAllTools();
|
|
269
|
+
|
|
270
|
+
// v0.7.5 G-D1: populate <workspace>/skills/ with the initial phase's
|
|
271
|
+
// available skill set. Symlink with copy fallback. Re-populated on
|
|
272
|
+
// every phase advance/retreat (see _advancePhase).
|
|
273
|
+
try {
|
|
274
|
+
const res = this._skillLoader.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
|
|
275
|
+
this.eventLog?.append?.("skills_populated", {
|
|
276
|
+
phase: res.phase,
|
|
277
|
+
populated: res.populated,
|
|
278
|
+
failures: res.failures,
|
|
279
|
+
});
|
|
280
|
+
} catch { /* best-effort; skills/ population is not a critical-path failure */ }
|
|
281
|
+
|
|
282
|
+
// v0.8.1 P10-A: auto-populate <workspace>/workflows/common/llm_client.py
|
|
283
|
+
// from the template. Idempotent (skips if file already exists). Covers
|
|
284
|
+
// the bench-corpus flow where `kc-beta init` was bypassed. v0.8.0
|
|
285
|
+
// shipped this shim as embedded source in skill-to-workflow teaching;
|
|
286
|
+
// E2E #11 audits found BOTH agents ignored the teaching and wrote
|
|
287
|
+
// their own (non-canonical) llm_client.py. Shipping it as a template
|
|
288
|
+
// file the agent finds via filesystem walk is more robust.
|
|
289
|
+
try { this._populateWorkspaceCommonShims(); } catch { /* best-effort */ }
|
|
290
|
+
|
|
237
291
|
// Register tools for initial phase
|
|
238
292
|
this.toolRegistry = new ToolRegistry();
|
|
239
293
|
this._registerToolsForPhase(this.currentPhase);
|
|
240
294
|
|
|
241
|
-
//
|
|
242
|
-
//
|
|
243
|
-
//
|
|
244
|
-
//
|
|
245
|
-
//
|
|
246
|
-
//
|
|
247
|
-
//
|
|
248
|
-
this._lastReady = Object.fromEntries(
|
|
249
|
-
Object.keys(this.pipelines).map((p) => [p, false]),
|
|
250
|
-
);
|
|
295
|
+
// v0.8 P1-D: removed `_lastReady` edge-trigger state. It was the
|
|
296
|
+
// bookkeeping for `_maybeAutoAdvance`, which v0.7.4 G0b decommissioned
|
|
297
|
+
// (all call sites removed because v0.7.3's mid-session auto-advance
|
|
298
|
+
// chain regression was caused by it). The method definition itself
|
|
299
|
+
// is also gone in P1-D. Phase advance is now 100% explicit: agent's
|
|
300
|
+
// `phase_advance` tool or user re-prompt. Resume + rollback paths
|
|
301
|
+
// that previously re-primed `_lastReady` are no-ops now.
|
|
251
302
|
|
|
252
303
|
// B0.1: Heap sampler. Parent engines only — sub-agents share a process
|
|
253
304
|
// with the parent and would double-log. Writes a single JSONL line
|
|
@@ -258,6 +309,111 @@ export class AgentEngine {
|
|
|
258
309
|
this._heapSamplerStop = this._isSubagent ? null : this._startHeapSampler();
|
|
259
310
|
}
|
|
260
311
|
|
|
312
|
+
/**
|
|
313
|
+
* v0.8 P1-B: overlay workspace .env onto this.config now that
|
|
314
|
+
* this.workspace.cwd is known. Only fills in fields where the current
|
|
315
|
+
* config value was a gc fallback (empty OR the gc default) — does NOT
|
|
316
|
+
* override fields that came from process.env (those win at
|
|
317
|
+
* loadSettings() time and stay winning).
|
|
318
|
+
*
|
|
319
|
+
* Without this overlay, workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 /
|
|
320
|
+
* TIER1..4 / LANGUAGE are silently ignored — the v0.7.4 G1b OCR_MODEL_TIER1
|
|
321
|
+
* alias fix landed at the config layer but never reached the runtime
|
|
322
|
+
* because loadSettings() is called without a workspace path.
|
|
323
|
+
*/
|
|
324
|
+
/**
|
|
325
|
+
* v0.8.1 P10-A: copy canonical `workflows/common/*.py` shims from the
|
|
326
|
+
* bundled template if they're missing in the workspace. Provides
|
|
327
|
+
* `llm_client.py` (worker LLM HTTP shim, provider-agnostic) and
|
|
328
|
+
* `utils.py` (strip_annotations + helpers). Idempotent — never
|
|
329
|
+
* overwrites existing files (agent edits stay intact).
|
|
330
|
+
*
|
|
331
|
+
* Runs at engine init. Covers bench-corpus mode where `kc-beta init`
|
|
332
|
+
* doesn't run; init-flow workspaces already have these from copyDir.
|
|
333
|
+
*/
|
|
334
|
+
_populateWorkspaceCommonShims() {
|
|
335
|
+
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
|
336
|
+
const templateRoot = path.resolve(__dirname, "..", "..", "template", "workflows", "common");
|
|
337
|
+
if (!fs.existsSync(templateRoot)) return;
|
|
338
|
+
|
|
339
|
+
const targetRoot = path.join(this.workspace.cwd, "workflows", "common");
|
|
340
|
+
fs.mkdirSync(targetRoot, { recursive: true });
|
|
341
|
+
|
|
342
|
+
const copied = [];
|
|
343
|
+
const skipped = [];
|
|
344
|
+
for (const entry of fs.readdirSync(templateRoot)) {
|
|
345
|
+
if (!entry.endsWith(".py") || entry.startsWith(".")) continue;
|
|
346
|
+
const srcPath = path.join(templateRoot, entry);
|
|
347
|
+
const dstPath = path.join(targetRoot, entry);
|
|
348
|
+
if (fs.existsSync(dstPath)) {
|
|
349
|
+
skipped.push(entry);
|
|
350
|
+
continue;
|
|
351
|
+
}
|
|
352
|
+
try {
|
|
353
|
+
fs.copyFileSync(srcPath, dstPath);
|
|
354
|
+
copied.push(entry);
|
|
355
|
+
} catch { /* best-effort */ }
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (copied.length > 0) {
|
|
359
|
+
try {
|
|
360
|
+
this.eventLog?.append?.("workflows_common_populated", { copied, skipped });
|
|
361
|
+
} catch { /* best-effort */ }
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
_overlayWorkspaceEnv() {
|
|
366
|
+
if (!this.workspace?.cwd) return;
|
|
367
|
+
const envPath = path.join(this.workspace.cwd, ".env");
|
|
368
|
+
if (!fs.existsSync(envPath)) return;
|
|
369
|
+
let wsEnv;
|
|
370
|
+
try { wsEnv = loadEnvFile(envPath); } catch { return; }
|
|
371
|
+
if (!wsEnv || typeof wsEnv !== "object") return;
|
|
372
|
+
|
|
373
|
+
// VLM tiers — workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 wins over
|
|
374
|
+
// gc's vlm_tiers.tier1 default. process.env precedence preserved
|
|
375
|
+
// because loadSettings already applied it; we only fill in slots
|
|
376
|
+
// that fell through to gc-or-empty.
|
|
377
|
+
const overlays = [
|
|
378
|
+
{ configKey: "vlmTier1", envKey: ["VLM_TIER1", "OCR_MODEL_TIER1"] },
|
|
379
|
+
{ configKey: "vlmTier2", envKey: ["VLM_TIER2", "OCR_MODEL_TIER2"] },
|
|
380
|
+
{ configKey: "vlmTier3", envKey: ["VLM_TIER3", "OCR_MODEL_TIER3"] },
|
|
381
|
+
{ configKey: "tier1", envKey: ["TIER1"] },
|
|
382
|
+
{ configKey: "tier2", envKey: ["TIER2"] },
|
|
383
|
+
{ configKey: "tier3", envKey: ["TIER3"] },
|
|
384
|
+
{ configKey: "tier4", envKey: ["TIER4"] },
|
|
385
|
+
{ configKey: "language", envKey: ["LANGUAGE"] },
|
|
386
|
+
];
|
|
387
|
+
|
|
388
|
+
const applied = [];
|
|
389
|
+
for (const { configKey, envKey } of overlays) {
|
|
390
|
+
// Find first non-empty workspace .env value for this config key
|
|
391
|
+
let wsValue = "";
|
|
392
|
+
for (const k of envKey) {
|
|
393
|
+
if (wsEnv[k]) { wsValue = wsEnv[k]; break; }
|
|
394
|
+
}
|
|
395
|
+
if (!wsValue) continue;
|
|
396
|
+
// Skip if process.env has the same key set — penv already won
|
|
397
|
+
const penvWon = envKey.some((k) => process.env[k] && process.env[k] !== wsValue);
|
|
398
|
+
if (penvWon) continue;
|
|
399
|
+
// Apply the workspace value
|
|
400
|
+
if (this.config[configKey] !== wsValue) {
|
|
401
|
+
applied.push({ key: configKey, from: this.config[configKey] || "(empty)", to: wsValue });
|
|
402
|
+
this.config[configKey] = wsValue;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Audit visibility: emit a one-time event listing what was overlaid.
|
|
407
|
+
if (applied.length > 0) {
|
|
408
|
+
try {
|
|
409
|
+
this.eventLog?.append?.("workspace_env_overlay", {
|
|
410
|
+
envPath: path.relative(this.workspace.cwd, envPath),
|
|
411
|
+
fields: applied,
|
|
412
|
+
});
|
|
413
|
+
} catch { /* best-effort */ }
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
261
417
|
/**
|
|
262
418
|
* Start sampling process.memoryUsage() every 60 s into logs/heap.jsonl.
|
|
263
419
|
* Returns a stop fn. Timer is .unref()'d so it never keeps the process
|
|
@@ -267,11 +423,22 @@ export class AgentEngine {
|
|
|
267
423
|
_startHeapSampler() {
|
|
268
424
|
const logDir = path.join(this.workspace.cwd, "logs");
|
|
269
425
|
const logPath = path.join(logDir, "heap.jsonl");
|
|
426
|
+
let stopped = false;
|
|
427
|
+
let lastSampleAt = 0;
|
|
428
|
+
|
|
270
429
|
const sample = () => {
|
|
271
430
|
try {
|
|
272
431
|
const mem = process.memoryUsage();
|
|
432
|
+
const now = Date.now();
|
|
433
|
+
// v0.8 P1-C: track skipped intervals. If more than 90s elapsed
|
|
434
|
+
// since last sample on a 60s cadence, the previous tick was missed
|
|
435
|
+
// (event loop sleep, GC pause, etc.). Surface in the row so the
|
|
436
|
+
// post-mortem audit can detect gaps without needing to compare
|
|
437
|
+
// adjacent timestamps.
|
|
438
|
+
const skippedMs = lastSampleAt > 0 ? (now - lastSampleAt - 60_000) : 0;
|
|
439
|
+
lastSampleAt = now;
|
|
273
440
|
const row = {
|
|
274
|
-
t: new Date().toISOString(),
|
|
441
|
+
t: new Date(now).toISOString(),
|
|
275
442
|
seq: this.eventLog?.currentSeq ?? 0,
|
|
276
443
|
phase: this.currentPhase,
|
|
277
444
|
rssMB: Math.round(mem.rss / 1024 / 1024),
|
|
@@ -288,17 +455,36 @@ export class AgentEngine {
|
|
|
288
455
|
// and the row gets `componentsErr` instead.
|
|
289
456
|
components: this._sampleComponents(),
|
|
290
457
|
};
|
|
458
|
+
if (skippedMs > 0) row.skippedMs = skippedMs;
|
|
291
459
|
fs.mkdirSync(logDir, { recursive: true });
|
|
292
460
|
fs.appendFileSync(logPath, JSON.stringify(row) + "\n", "utf-8");
|
|
293
461
|
} catch { /* never fatal */ }
|
|
294
462
|
};
|
|
463
|
+
|
|
464
|
+
// v0.8 P1-C: self-rescheduling setTimeout instead of setInterval. The
|
|
465
|
+
// 资管 v0.7.5 session shows only 2 heap.jsonl entries (12:39:40 start
|
|
466
|
+
// + 12:40:40 first tick) across an 18-hour run — the unref'd
|
|
467
|
+
// setInterval was somehow dropped between event-loop idle phases.
|
|
468
|
+
// setTimeout reschedules from inside the sample callback, so the
|
|
469
|
+
// timer is re-registered every tick. unref'd so we don't block exit.
|
|
470
|
+
let timeoutHandle = null;
|
|
471
|
+
const scheduleNext = () => {
|
|
472
|
+
if (stopped) return;
|
|
473
|
+
timeoutHandle = setTimeout(() => {
|
|
474
|
+
sample();
|
|
475
|
+
scheduleNext();
|
|
476
|
+
}, 60_000);
|
|
477
|
+
timeoutHandle.unref?.();
|
|
478
|
+
};
|
|
479
|
+
|
|
295
480
|
// Record one sample at startup so we have a baseline even on short runs.
|
|
296
481
|
sample();
|
|
297
|
-
|
|
298
|
-
|
|
482
|
+
scheduleNext();
|
|
483
|
+
|
|
299
484
|
return () => {
|
|
300
485
|
try {
|
|
301
|
-
|
|
486
|
+
stopped = true;
|
|
487
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
302
488
|
sample(); // one final sample on shutdown
|
|
303
489
|
} catch { /* ignore */ }
|
|
304
490
|
};
|
|
@@ -415,7 +601,10 @@ export class AgentEngine {
|
|
|
415
601
|
return {
|
|
416
602
|
// Always available (BUILD + DISTILL)
|
|
417
603
|
core: [
|
|
418
|
-
new SandboxExecTool(this.workspace,
|
|
604
|
+
new SandboxExecTool(this.workspace, {
|
|
605
|
+
defaultTimeoutMs: this.config.kcExecDefaultTimeoutMs,
|
|
606
|
+
maxTimeoutMs: this.config.kcExecMaxTimeoutMs,
|
|
607
|
+
}),
|
|
419
608
|
new WorkspaceFileTool(this.workspace, this.versionManager),
|
|
420
609
|
new CopyToWorkspaceTool(this.workspace, {
|
|
421
610
|
largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
|
|
@@ -455,7 +644,12 @@ export class AgentEngine {
|
|
|
455
644
|
mineruApiKey: this.config.mineruApiKey,
|
|
456
645
|
llmApiKey: workerApiKey,
|
|
457
646
|
llmBaseUrl: workerBaseUrl,
|
|
647
|
+
// v0.8.1 P9-B: live-read vlmTier1 so workspace_env_overlay
|
|
648
|
+
// changes after tool construction (or mid-run .env edits)
|
|
649
|
+
// reach document_parse. The static `ocrModel` is the
|
|
650
|
+
// construction-time fallback; getOcrModel takes precedence.
|
|
458
651
|
ocrModel: vlmModel,
|
|
652
|
+
getOcrModel: () => this.config.vlmTier1 || vlmModel,
|
|
459
653
|
}),
|
|
460
654
|
new DocumentSearchTool(this.workspace),
|
|
461
655
|
// Group C — chunker/RAG infrastructure ported from AMC app. Core
|
|
@@ -476,16 +670,27 @@ export class AgentEngine {
|
|
|
476
670
|
() => this.currentPhase,
|
|
477
671
|
),
|
|
478
672
|
new WebSearchTool(this.config.tavilyApiKey),
|
|
479
|
-
// v0.7.
|
|
480
|
-
//
|
|
481
|
-
//
|
|
482
|
-
//
|
|
483
|
-
// (taskManager
|
|
673
|
+
// v0.7.4 (re-applied from v0.7.3 G2b): TaskCreate /
|
|
674
|
+
// TaskUpdate / TaskComplete — agent populates the
|
|
675
|
+
// Ralph-loop queue for the CURRENT phase only. Phase
|
|
676
|
+
// boundaries exit the loop (v0.7.4 G0c). Skipped for
|
|
677
|
+
// subagents (taskManager null in subagent scope).
|
|
484
678
|
...(this.taskManager ? [
|
|
485
679
|
new TaskCreateTool(this.workspace, this.taskManager),
|
|
486
680
|
new TaskUpdateTool(this.workspace, this.taskManager),
|
|
487
681
|
new TaskCompleteTool(this.workspace, this.taskManager),
|
|
488
682
|
] : []),
|
|
683
|
+
// v0.7.5: consult_skill loads a meta-skill body into conversation
|
|
684
|
+
// history on demand. Always-loaded skills are already in the
|
|
685
|
+
// system prompt via SkillLoader.formatForContext; this tool covers
|
|
686
|
+
// the "available" set for the current phase. Both main + subagents
|
|
687
|
+
// register their own — each has its own skillLoader + phase.
|
|
688
|
+
new ConsultSkillTool(
|
|
689
|
+
this.workspace,
|
|
690
|
+
this._skillLoader,
|
|
691
|
+
() => this.currentPhase,
|
|
692
|
+
this.eventLog,
|
|
693
|
+
),
|
|
489
694
|
],
|
|
490
695
|
// Distillation+ only (DISTILL mode)
|
|
491
696
|
distill: [
|
|
@@ -944,16 +1149,9 @@ export class AgentEngine {
|
|
|
944
1149
|
}
|
|
945
1150
|
}
|
|
946
1151
|
|
|
947
|
-
//
|
|
948
|
-
//
|
|
949
|
-
//
|
|
950
|
-
for (const phase of Object.keys(engine.pipelines)) {
|
|
951
|
-
try {
|
|
952
|
-
engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
|
|
953
|
-
} catch {
|
|
954
|
-
engine._lastReady[phase] = false;
|
|
955
|
-
}
|
|
956
|
-
}
|
|
1152
|
+
// v0.8 P1-D: removed `_lastReady` re-prime. Was the bookkeeping for
|
|
1153
|
+
// `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase advance
|
|
1154
|
+
// is explicit now; nothing to re-prime on resume.
|
|
957
1155
|
|
|
958
1156
|
engine.eventLog.append("session_resume", {
|
|
959
1157
|
resumedPhase: engine.currentPhase,
|
|
@@ -1062,6 +1260,29 @@ export class AgentEngine {
|
|
|
1062
1260
|
// budget. Better to lose some history than crash with HTTP 400.
|
|
1063
1261
|
messages = this._enforceTokenBudget(messages);
|
|
1064
1262
|
|
|
1263
|
+
// v0.8 P3-A: skill usage counter — emit one skill_byte_send event
|
|
1264
|
+
// per always-loaded skill per LLM send. Captures the cost of having
|
|
1265
|
+
// a skill body inlined in the system prompt (Layer B per design doc).
|
|
1266
|
+
// Agent-blind: events go to events.jsonl only; never surfaced to the
|
|
1267
|
+
// agent's context. consult_skill tool results emit their own
|
|
1268
|
+
// skill_invoked events with via_tool="consult_skill" (already in
|
|
1269
|
+
// place since v0.7.5 G-C4), so we don't double-count those here.
|
|
1270
|
+
try {
|
|
1271
|
+
const { alwaysLoaded } = this._skillLoader.getPhaseSkillSet(this.currentPhase) || {};
|
|
1272
|
+
if (Array.isArray(alwaysLoaded)) {
|
|
1273
|
+
for (const skill of alwaysLoaded) {
|
|
1274
|
+
const body = this._skillLoader.loadSkillBody(skill);
|
|
1275
|
+
if (!body) continue;
|
|
1276
|
+
this.eventLog.append("skill_byte_send", {
|
|
1277
|
+
skill,
|
|
1278
|
+
via: "system_prompt_always_loaded",
|
|
1279
|
+
byte_count: body.length,
|
|
1280
|
+
phase: this.currentPhase,
|
|
1281
|
+
});
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
} catch { /* counter is best-effort; never break the turn */ }
|
|
1285
|
+
|
|
1065
1286
|
this.eventLog.append("llm_start", {
|
|
1066
1287
|
model: this.config.kcModel,
|
|
1067
1288
|
messageCount: messages.length,
|
|
@@ -1196,11 +1417,11 @@ export class AgentEngine {
|
|
|
1196
1417
|
}
|
|
1197
1418
|
this._totalTurns = (this._totalTurns || 0) + 1;
|
|
1198
1419
|
|
|
1199
|
-
//
|
|
1200
|
-
//
|
|
1201
|
-
// tool
|
|
1202
|
-
|
|
1203
|
-
|
|
1420
|
+
// v0.7.4 G0b: removed `_maybeAutoAdvance()` auto-fire here.
|
|
1421
|
+
// Phase advance is now 100% explicit (agent's `phase_advance`
|
|
1422
|
+
// tool, or user re-prompt). v0.7.3 phase-control regression
|
|
1423
|
+
// was caused by this edge-triggered auto-advance firing mid-
|
|
1424
|
+
// session and chaining into next phase without user check-in.
|
|
1204
1425
|
|
|
1205
1426
|
this.eventLog.append("turn_complete", {});
|
|
1206
1427
|
this.saveState();
|
|
@@ -1289,23 +1510,55 @@ export class AgentEngine {
|
|
|
1289
1510
|
|
|
1290
1511
|
this.eventLog.append("tool_result", {
|
|
1291
1512
|
name: tc.name,
|
|
1513
|
+
input: inputData,
|
|
1292
1514
|
output: result.content || "",
|
|
1293
1515
|
isError: result.isError,
|
|
1294
1516
|
traceId: offload?.traceId || null,
|
|
1295
1517
|
});
|
|
1296
1518
|
|
|
1297
|
-
//
|
|
1298
|
-
//
|
|
1299
|
-
//
|
|
1300
|
-
//
|
|
1301
|
-
|
|
1519
|
+
// v0.7.5 (G-F4): added `input` above so events.jsonl carries the
|
|
1520
|
+
// tool inputs (v0.7.4 G1c only patched the AgentEvent yield path,
|
|
1521
|
+
// missed the persistence path — audit confirmed 0/453 + 0/946
|
|
1522
|
+
// tool_result events had `input` in v0.7.4 sessions).
|
|
1523
|
+
|
|
1524
|
+
// D3a: trace skill invocations. v0.7.5 (G-C6): only fire on
|
|
1525
|
+
// READS of meta-skill paths. Writes to rule_skills/<id>/SKILL.md
|
|
1526
|
+
// during skill_authoring are NOT skill invocations — they're the
|
|
1527
|
+
// agent producing its own deliverable. The old "(unknown)" spam
|
|
1528
|
+
// (100% of events in v0.7.1 + v0.7.4 sessions) is gone.
|
|
1529
|
+
//
|
|
1530
|
+
// Note: meta-skill body reads now happen via consult_skill, which
|
|
1531
|
+
// emits skill_invoked itself (with the real skill name). This
|
|
1532
|
+
// path-matching emission stays only as a fallback for any agent
|
|
1533
|
+
// that reads a SKILL.md path directly (out of pattern).
|
|
1302
1534
|
try {
|
|
1535
|
+
// v0.8 P1-E: heredoc detection. `cat << 'EOF' > /tmp/skill.md`
|
|
1536
|
+
// matches the read-verb regex but is actually a WRITE — the
|
|
1537
|
+
// heredoc operator `<<` means cat is consuming inline content
|
|
1538
|
+
// (the heredoc body), not a file path. 资管 v0.7.5 audit § 5f
|
|
1539
|
+
// confirmed 1 spurious skill_invoked event of this kind.
|
|
1540
|
+
// Excluding any command with `<<` from the isRead classification.
|
|
1541
|
+
const cmd = String(inputData?.command || "");
|
|
1542
|
+
const isHeredoc = cmd.includes("<<");
|
|
1543
|
+
const isRead =
|
|
1544
|
+
(tc.name === "workspace_file" && inputData?.operation === "read") ||
|
|
1545
|
+
(tc.name === "sandbox_exec" && !isHeredoc && /\b(cat|head|tail|less|grep|view|read)\b/.test(cmd));
|
|
1303
1546
|
if (
|
|
1304
1547
|
!result.isError &&
|
|
1548
|
+
isRead &&
|
|
1305
1549
|
(tc.name === "workspace_file" || tc.name === "sandbox_exec")
|
|
1306
1550
|
) {
|
|
1307
1551
|
const p = String(inputData?.path || inputData?.command || "");
|
|
1308
|
-
|
|
1552
|
+
// v0.7.5 flat layout: skills/<name>/SKILL.md (workspace scope)
|
|
1553
|
+
// OR template/skills/<lang>/<name>/SKILL.md (template scope, rare)
|
|
1554
|
+
// Deep layout backward-compat preserved for any stragglers.
|
|
1555
|
+
// v0.8 P0-B: accept lowercase `skill.md` too — 资管 audit § 3.2
|
|
1556
|
+
// found agents writing lowercase consistently (14/14 rule_skills/).
|
|
1557
|
+
// Limited to exact uppercase OR exact lowercase (no mixed case)
|
|
1558
|
+
// to avoid spurious matches on unrelated files (e.g., `Skill.md`).
|
|
1559
|
+
const skillMatch = p.match(
|
|
1560
|
+
/(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/(?:SKILL|skill)\.md\b/
|
|
1561
|
+
) || p.match(/\b(?:SKILL|skill)\.md\b/);
|
|
1309
1562
|
if (skillMatch) {
|
|
1310
1563
|
const skillName = skillMatch[1] || "(unknown)";
|
|
1311
1564
|
this.eventLog.append("skill_invoked", {
|
|
@@ -1386,12 +1639,9 @@ export class AgentEngine {
|
|
|
1386
1639
|
}
|
|
1387
1640
|
}
|
|
1388
1641
|
|
|
1389
|
-
//
|
|
1390
|
-
//
|
|
1391
|
-
//
|
|
1392
|
-
// just need to act on it eagerly.
|
|
1393
|
-
const ev = this._maybeAutoAdvance();
|
|
1394
|
-
if (ev) yield ev;
|
|
1642
|
+
// v0.7.4 G0b: removed post-tool `_maybeAutoAdvance()` call.
|
|
1643
|
+
// Phase advance is now 100% explicit. See `_runTaskLoopSerial`
|
|
1644
|
+
// phase-change-exit guard for the loop-level checkpoint.
|
|
1395
1645
|
|
|
1396
1646
|
} catch (err) {
|
|
1397
1647
|
// A8: If the LLM client tagged the stream termination reason, pass
|
|
@@ -1409,10 +1659,12 @@ export class AgentEngine {
|
|
|
1409
1659
|
}
|
|
1410
1660
|
|
|
1411
1661
|
/**
|
|
1412
|
-
* Centralized phase transition (Bug 4).
|
|
1662
|
+
* Centralized phase transition (Bug 4). Two triggers route through here
|
|
1663
|
+
* after v0.7.4 G0b + v0.8 P1-D:
|
|
1413
1664
|
* (1) pipeline.onToolResult returning phase_ready
|
|
1414
|
-
* (2)
|
|
1415
|
-
* (3)
|
|
1665
|
+
* (2) explicit user request via the phase_advance tool
|
|
1666
|
+
* (The historical (3) post-turn auto-check via `_maybeAutoAdvance` was
|
|
1667
|
+
* removed; phase advance is 100% explicit.)
|
|
1416
1668
|
*
|
|
1417
1669
|
* Reachability: by default only forward-by-one transitions per NEXT_PHASE.
|
|
1418
1670
|
* Set `force: true` to allow non-adjacent or backward transitions (e.g. user
|
|
@@ -1490,9 +1742,17 @@ export class AgentEngine {
|
|
|
1490
1742
|
try { criteriaMet = !!fromPipeline?.exitCriteriaMet?.(); } catch { criteriaMet = true; }
|
|
1491
1743
|
if (!criteriaMet) {
|
|
1492
1744
|
const counts = this._buildEngineCountsBlock(this.currentPhase);
|
|
1745
|
+
// v0.8 P0-E: prescriptive hint in the event payload so post-mortem
|
|
1746
|
+
// audits see what the agent was told (matches what phase-advance.js
|
|
1747
|
+
// returns to the LLM).
|
|
1748
|
+
let prescriptive = null;
|
|
1749
|
+
try {
|
|
1750
|
+
prescriptive = getPrescriptiveHint(this.currentPhase, null, counts || "");
|
|
1751
|
+
} catch { /* hint generation is best-effort */ }
|
|
1493
1752
|
this.eventLog.append("phase_advance_refused", {
|
|
1494
1753
|
from: this.currentPhase, to: nextPhase, reason,
|
|
1495
1754
|
hint: "exit criteria not met by engine telemetry",
|
|
1755
|
+
prescriptive_hint: prescriptive,
|
|
1496
1756
|
engineCounts: counts || null,
|
|
1497
1757
|
});
|
|
1498
1758
|
return false;
|
|
@@ -1541,6 +1801,20 @@ export class AgentEngine {
|
|
|
1541
1801
|
this.workspace.setPhase(this.currentPhase);
|
|
1542
1802
|
this._createTasksForPhase(this.currentPhase);
|
|
1543
1803
|
|
|
1804
|
+
// v0.7.5 G-D2: re-populate <workspace>/skills/ with the new phase's
|
|
1805
|
+
// available set. Symlinks are wiped + recreated. Agent's `ls skills/`
|
|
1806
|
+
// and any read-by-path reflects the current phase's allowlist.
|
|
1807
|
+
try {
|
|
1808
|
+
const res = this._skillLoader?.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
|
|
1809
|
+
if (res) {
|
|
1810
|
+
this.eventLog.append("skills_populated", {
|
|
1811
|
+
phase: res.phase,
|
|
1812
|
+
populated: res.populated,
|
|
1813
|
+
failures: res.failures,
|
|
1814
|
+
});
|
|
1815
|
+
}
|
|
1816
|
+
} catch { /* best-effort */ }
|
|
1817
|
+
|
|
1544
1818
|
// v0.7.0 N (#94): give the entered pipeline a chance to do
|
|
1545
1819
|
// phase-entry setup. Used by finalization to copy the release
|
|
1546
1820
|
// template into output/releases/v1/. Other pipelines are no-ops.
|
|
@@ -1553,23 +1827,16 @@ export class AgentEngine {
|
|
|
1553
1827
|
});
|
|
1554
1828
|
}
|
|
1555
1829
|
|
|
1556
|
-
// v0.
|
|
1557
|
-
//
|
|
1558
|
-
//
|
|
1559
|
-
// this, the auto-advance edge trigger stays latched true and the
|
|
1560
|
-
// moment the agent returns to fromPhase the engine immediately
|
|
1561
|
-
// bounces them back out — defeating the rollback.
|
|
1562
|
-
if (direction === "rollback" && this._lastReady) {
|
|
1563
|
-
this._lastReady[fromPhase] = false;
|
|
1564
|
-
}
|
|
1830
|
+
// v0.8 P1-D: removed `_lastReady` rollback reset. Was the bookkeeping
|
|
1831
|
+
// for `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase
|
|
1832
|
+
// advance is explicit now; rollback just needs to commit the new phase.
|
|
1565
1833
|
|
|
1566
1834
|
this.saveState();
|
|
1567
1835
|
|
|
1568
1836
|
// B8: Soft signal — surface any sub-agents left running from the prior
|
|
1569
1837
|
// phase so the main agent's next turn can decide whether to kill them.
|
|
1570
|
-
// NOT automated:
|
|
1571
|
-
//
|
|
1572
|
-
// radius. This just informs.
|
|
1838
|
+
// NOT automated: auto-killing would couple lifecycle with blast radius.
|
|
1839
|
+
// This just informs.
|
|
1573
1840
|
try {
|
|
1574
1841
|
const agentTool = this._buildTools?.core?.find((t) => t?.name === "agent_tool");
|
|
1575
1842
|
const runningIds = agentTool?.getRunningTaskIds?.() || [];
|
|
@@ -1769,35 +2036,12 @@ export class AgentEngine {
|
|
|
1769
2036
|
return false;
|
|
1770
2037
|
}
|
|
1771
2038
|
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
_maybeAutoAdvance() {
|
|
1779
|
-
const phase = this.currentPhase;
|
|
1780
|
-
const pipeline = this.pipelines[phase];
|
|
1781
|
-
let nowReady = false;
|
|
1782
|
-
try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
|
|
1783
|
-
|
|
1784
|
-
if (!nowReady) {
|
|
1785
|
-
this._lastReady[phase] = false;
|
|
1786
|
-
return null;
|
|
1787
|
-
}
|
|
1788
|
-
// Edge-trigger: nowReady && !wasReady
|
|
1789
|
-
if (this._lastReady[phase]) return null;
|
|
1790
|
-
this._lastReady[phase] = true;
|
|
1791
|
-
|
|
1792
|
-
const next = NEXT_PHASE[phase];
|
|
1793
|
-
if (!next) return null;
|
|
1794
|
-
const advanced = this._advancePhase(next, "exit criteria flipped to met");
|
|
1795
|
-
if (!advanced) return null;
|
|
1796
|
-
return new AgentEvent({
|
|
1797
|
-
type: "pipeline_event",
|
|
1798
|
-
data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
|
|
1799
|
-
});
|
|
1800
|
-
}
|
|
2039
|
+
// v0.8 P1-D: `_maybeAutoAdvance()` deleted. The method auto-fired phase
|
|
2040
|
+
// advance on a false→true flip of `exitCriteriaMet()`, but v0.7.3
|
|
2041
|
+
// showed mid-session auto-advance chains were a regression hazard
|
|
2042
|
+
// (user couldn't review between phases). v0.7.4 G0b removed all call
|
|
2043
|
+
// sites; v0.8 P1-D removes the now-dead method definition + the
|
|
2044
|
+
// `_lastReady` bookkeeping it relied on. Phase advance is 100% explicit.
|
|
1801
2045
|
|
|
1802
2046
|
/**
|
|
1803
2047
|
* Tool-call offloading. If the tool's content exceeds the threshold,
|
|
@@ -2106,10 +2350,40 @@ export class AgentEngine {
|
|
|
2106
2350
|
/** B1: original serial ralph-loop path — one task at a time, shared
|
|
2107
2351
|
* conversation history. Unchanged from pre-v0.6.0 behavior. */
|
|
2108
2352
|
async *_runTaskLoopSerial(userMessage) {
|
|
2109
|
-
//
|
|
2353
|
+
// v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
|
|
2354
|
+
// - Interactive sessions (marathon NOT active): capture startingPhase
|
|
2355
|
+
// BEFORE the initial runTurn, and exit the loop on ANY phase change
|
|
2356
|
+
// (including within the initial runTurn). One user prompt = one
|
|
2357
|
+
// phase advance. Path (a) per design doc Q5 lean.
|
|
2358
|
+
// - Marathon sessions: the kc-marathon driver provides per-phase
|
|
2359
|
+
// prompts via .kc_marathon/inbox.jsonl, so the engine doesn't need
|
|
2360
|
+
// F5's checkpoint — phase chaining is OK because each chained phase
|
|
2361
|
+
// STILL gets its own driver-emitted prompt next tick.
|
|
2362
|
+
//
|
|
2363
|
+
// v0.7.3 demonstrated why F5 matters interactively: auto-chained
|
|
2364
|
+
// phase advances skip the user check-in cycle and broke phase
|
|
2365
|
+
// control in team testing. v0.7.4 G0c first fixed it via
|
|
2366
|
+
// post-initial-runTurn exit; v0.7.5 added the strict capture-BEFORE
|
|
2367
|
+
// refinement; v0.8 P5-A preserves both with the marathon escape;
|
|
2368
|
+
// v0.8.1 P8-A switched marathon-active source from filesystem
|
|
2369
|
+
// marker to inline driver instance.
|
|
2370
|
+
const marathonActive = this.isMarathonActive();
|
|
2371
|
+
const startingPhase = this.currentPhase;
|
|
2110
2372
|
yield* this.runTurn(userMessage);
|
|
2111
2373
|
|
|
2112
|
-
//
|
|
2374
|
+
// F5 strict gate: if interactive AND phase changed during initial
|
|
2375
|
+
// runTurn, exit immediately (don't auto-continue tasks in the new
|
|
2376
|
+
// phase). Marathon bypasses — driver decides pacing.
|
|
2377
|
+
if (!marathonActive && this.currentPhase !== startingPhase) {
|
|
2378
|
+
this.eventLog.append("ralph_loop_exit", {
|
|
2379
|
+
reason: "f5_strict_initial_turn",
|
|
2380
|
+
from: startingPhase,
|
|
2381
|
+
to: this.currentPhase,
|
|
2382
|
+
});
|
|
2383
|
+
return;
|
|
2384
|
+
}
|
|
2385
|
+
|
|
2386
|
+
// Auto-continue through pending tasks (within current phase only)
|
|
2113
2387
|
while (this.taskManager.getNextPending()) {
|
|
2114
2388
|
// v0.7.0 #93: budget-aware compact threshold. The old
|
|
2115
2389
|
// `messages.length > 15` was message-count-based and frozen
|
|
@@ -2170,28 +2444,102 @@ export class AgentEngine {
|
|
|
2170
2444
|
},
|
|
2171
2445
|
});
|
|
2172
2446
|
|
|
2173
|
-
//
|
|
2174
|
-
//
|
|
2175
|
-
//
|
|
2176
|
-
//
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2447
|
+
// v0.7.4 G0c: phase boundary = user checkpoint. Exit the
|
|
2448
|
+
// loop if the agent advanced phase during this task —
|
|
2449
|
+
// even if pre-created tasks for the new phase are queued.
|
|
2450
|
+
// User sees current state and explicitly re-prompts to
|
|
2451
|
+
// begin the next phase. Marathon-style end-to-end
|
|
2452
|
+
// autonomy belongs to an external driver (Claude Code
|
|
2453
|
+
// /loop pattern), not the engine.
|
|
2454
|
+
if (this.currentPhase !== startingPhase) {
|
|
2455
|
+
this.eventLog.append("ralph_loop_exit", {
|
|
2456
|
+
reason: "phase_changed",
|
|
2457
|
+
from: startingPhase,
|
|
2458
|
+
to: this.currentPhase,
|
|
2459
|
+
});
|
|
2460
|
+
break;
|
|
2461
|
+
}
|
|
2462
|
+
}
|
|
2463
|
+
|
|
2464
|
+
// v0.8.1 P8-A: marathon mode — inline driver. After the F5 phase-
|
|
2465
|
+
// boundary exit, if marathon is active, query the driver for the
|
|
2466
|
+
// next continuation prompt and run additional turns until the driver
|
|
2467
|
+
// signals stop (null return). State machine logic unchanged from
|
|
2468
|
+
// v0.8.0; the I/O wrapper just shifted from filesystem-watcher to
|
|
2469
|
+
// direct method calls.
|
|
2470
|
+
while (this.marathonDriver) {
|
|
2471
|
+
const turnsSnapshot = this.marathonDriver.turnsThisPhase;
|
|
2472
|
+
const phaseChanged = this.currentPhase !== this.marathonDriver.currentPhase;
|
|
2473
|
+
const milestones = this._buildEngineCountsBlock(this.currentPhase) || {};
|
|
2474
|
+
const decision = this.marathonDriver.decideNext({
|
|
2475
|
+
currentPhase: this.currentPhase,
|
|
2476
|
+
milestones,
|
|
2477
|
+
phaseChanged,
|
|
2478
|
+
errorSeen: false, // engine surfaces errors via tool_result.isError; not propagated here for v0.8.1 MVP
|
|
2479
|
+
turnsThisPhase: turnsSnapshot + 1,
|
|
2480
|
+
});
|
|
2481
|
+
if (!decision) {
|
|
2482
|
+
// Stop condition met — driver returned null
|
|
2483
|
+
this.eventLog.append("marathon_detach", {
|
|
2484
|
+
reason: this.marathonDriver.stopReason || "unknown",
|
|
2485
|
+
decisions: this.marathonDriver.decisionCount,
|
|
2486
|
+
});
|
|
2487
|
+
this.marathonDriver = null;
|
|
2488
|
+
break;
|
|
2193
2489
|
}
|
|
2490
|
+
this.eventLog.append("marathon_decision", {
|
|
2491
|
+
template: decision.template,
|
|
2492
|
+
reason: decision.reason,
|
|
2493
|
+
phase: this.currentPhase,
|
|
2494
|
+
});
|
|
2495
|
+
yield* this.runTurn(decision.prompt);
|
|
2496
|
+
// Loop back: another turn just completed; driver gets another decideNext call.
|
|
2497
|
+
}
|
|
2498
|
+
}
|
|
2499
|
+
|
|
2500
|
+
/**
|
|
2501
|
+
* v0.8.1 P8-A: activate marathon mode with a goal-description.
|
|
2502
|
+
* Called from cli/index.js's /marathon slash command handler.
|
|
2503
|
+
* The engine's next runTaskLoop will use marathonDriver.getInitialPrompt()
|
|
2504
|
+
* as the kickoff user message.
|
|
2505
|
+
*
|
|
2506
|
+
* @param {string} goal — the marathon goal description (user-typed)
|
|
2507
|
+
* @param {object} [opts] — {maxWallclockMs?, stuckAfterMs?}
|
|
2508
|
+
* @returns {object} {goal, language, startedAt} for confirmation
|
|
2509
|
+
*/
|
|
2510
|
+
enterMarathonMode(goal, opts = {}) {
|
|
2511
|
+
if (this.marathonDriver) {
|
|
2512
|
+
throw new Error("Marathon already active — use /marathon off to disengage first");
|
|
2194
2513
|
}
|
|
2514
|
+
this.marathonDriver = new MarathonDriver({
|
|
2515
|
+
goal,
|
|
2516
|
+
language: this.config.language || "en",
|
|
2517
|
+
maxWallclockMs: opts.maxWallclockMs,
|
|
2518
|
+
stuckAfterMs: opts.stuckAfterMs,
|
|
2519
|
+
});
|
|
2520
|
+
this.eventLog.append("marathon_attach", {
|
|
2521
|
+
goal: goal.slice(0, 200),
|
|
2522
|
+
language: this.config.language || "en",
|
|
2523
|
+
});
|
|
2524
|
+
return this.marathonDriver.getStatus();
|
|
2525
|
+
}
|
|
2526
|
+
|
|
2527
|
+
/** v0.8.1 P8-A: deactivate marathon mode. Returns final status snapshot. */
|
|
2528
|
+
exitMarathonMode(reason = "user_off") {
|
|
2529
|
+
if (!this.marathonDriver) return null;
|
|
2530
|
+
const status = this.marathonDriver.getStatus();
|
|
2531
|
+
this.marathonDriver.stop(reason);
|
|
2532
|
+
this.eventLog.append("marathon_detach", {
|
|
2533
|
+
reason,
|
|
2534
|
+
decisions: this.marathonDriver.decisionCount,
|
|
2535
|
+
});
|
|
2536
|
+
this.marathonDriver = null;
|
|
2537
|
+
return status;
|
|
2538
|
+
}
|
|
2539
|
+
|
|
2540
|
+
/** v0.8.1 P8-A: is marathon mode currently active? (for TUI status bar) */
|
|
2541
|
+
isMarathonActive() {
|
|
2542
|
+
return !!this.marathonDriver && !this.marathonDriver.stopped;
|
|
2195
2543
|
}
|
|
2196
2544
|
|
|
2197
2545
|
/**
|
|
@@ -2212,9 +2560,27 @@ export class AgentEngine {
|
|
|
2212
2560
|
* amortized against the 2-4× wall-clock speedup.
|
|
2213
2561
|
*/
|
|
2214
2562
|
async *_runTaskLoopParallel(userMessage, parallelism) {
|
|
2563
|
+
// v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
|
|
2564
|
+
// Mirror _runTaskLoopSerial — capture startingPhase BEFORE initial
|
|
2565
|
+
// runTurn so phase advance during the initial turn exits the loop
|
|
2566
|
+
// unless marathon is active.
|
|
2567
|
+
// v0.8.1 P8-A: marathon check now uses inline driver instance.
|
|
2568
|
+
const marathonActive = this.isMarathonActive();
|
|
2569
|
+
const startingPhase = this.currentPhase;
|
|
2570
|
+
|
|
2215
2571
|
// Initial turn: main agent reads user request, creates tasks.
|
|
2216
2572
|
yield* this.runTurn(userMessage);
|
|
2217
2573
|
|
|
2574
|
+
if (!marathonActive && this.currentPhase !== startingPhase) {
|
|
2575
|
+
this.eventLog.append("ralph_loop_exit", {
|
|
2576
|
+
reason: "f5_strict_initial_turn",
|
|
2577
|
+
from: startingPhase,
|
|
2578
|
+
to: this.currentPhase,
|
|
2579
|
+
mode: "parallel",
|
|
2580
|
+
});
|
|
2581
|
+
return;
|
|
2582
|
+
}
|
|
2583
|
+
|
|
2218
2584
|
const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
|
|
2219
2585
|
if (!agentTool) {
|
|
2220
2586
|
// Shouldn't happen (agent_tool is core), but fall back safely.
|
|
@@ -2239,6 +2605,9 @@ export class AgentEngine {
|
|
|
2239
2605
|
const inFlight = new Map();
|
|
2240
2606
|
|
|
2241
2607
|
const dispatch = async () => {
|
|
2608
|
+
// v0.7.4 G0c: stop dispatching if phase changed since loop start.
|
|
2609
|
+
// In-flight workers complete naturally; queue stays untouched.
|
|
2610
|
+
if (this.currentPhase !== startingPhase) return;
|
|
2242
2611
|
while (inFlight.size < parallelism) {
|
|
2243
2612
|
const task = this.taskManager.claimNextPending(`pool${inFlight.size}`);
|
|
2244
2613
|
if (!task) return;
|
|
@@ -2374,23 +2743,15 @@ export class AgentEngine {
|
|
|
2374
2743
|
|
|
2375
2744
|
this.saveState();
|
|
2376
2745
|
|
|
2377
|
-
//
|
|
2378
|
-
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
if (advanced) {
|
|
2387
|
-
yield new AgentEvent({
|
|
2388
|
-
type: "pipeline_event",
|
|
2389
|
-
data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
|
|
2390
|
-
});
|
|
2391
|
-
}
|
|
2392
|
-
}
|
|
2393
|
-
}
|
|
2746
|
+
// v0.7.4 G0c: if phase changed during the parallel run, log the
|
|
2747
|
+
// checkpoint event for the audit trail. No auto-advance — that
|
|
2748
|
+
// belongs to the agent (phase_advance tool) or user re-prompt.
|
|
2749
|
+
if (this.currentPhase !== startingPhase) {
|
|
2750
|
+
this.eventLog.append("ralph_loop_exit", {
|
|
2751
|
+
reason: "phase_changed",
|
|
2752
|
+
from: startingPhase,
|
|
2753
|
+
to: this.currentPhase,
|
|
2754
|
+
});
|
|
2394
2755
|
}
|
|
2395
2756
|
}
|
|
2396
2757
|
|