kc-beta 0.3.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/confidence-scorer.js +8 -0
- package/src/agent/context-window.js +7 -2
- package/src/agent/context.js +25 -0
- package/src/agent/corner-case-registry.js +5 -0
- package/src/agent/engine.js +564 -76
- package/src/agent/event-log.js +15 -2
- package/src/agent/history.js +91 -23
- package/src/agent/pipelines/initializer.js +3 -6
- package/src/agent/retry.js +9 -1
- package/src/agent/rule-catalog-normalize.js +37 -0
- package/src/agent/scheduler.js +276 -0
- package/src/agent/session-state.js +11 -2
- package/src/agent/task-manager.js +5 -0
- package/src/agent/tools/agent-tool.js +57 -14
- package/src/agent/tools/archive-file.js +94 -0
- package/src/agent/tools/copy-to-workspace.js +140 -0
- package/src/agent/tools/phase-advance.js +60 -0
- package/src/agent/tools/release.js +323 -0
- package/src/agent/tools/rule-catalog.js +56 -4
- package/src/agent/tools/schedule-fetch.js +118 -0
- package/src/agent/tools/snapshot.js +101 -0
- package/src/agent/tools/workspace-file.js +10 -7
- package/src/agent/version-manager.js +29 -120
- package/src/agent/workspace.js +127 -4
- package/src/cli/components.js +68 -12
- package/src/cli/index.js +147 -15
- package/src/config.js +10 -1
- package/src/model-tiers.json +5 -5
- package/template/release-runtime/README.md.tmpl +84 -0
- package/template/release-runtime/kc_runtime/__init__.py +2 -0
- package/template/release-runtime/kc_runtime/confidence.py +93 -0
- package/template/release-runtime/kc_runtime/dashboard.py +208 -0
- package/template/release-runtime/render_dashboard.py +49 -0
- package/template/release-runtime/run.py +230 -0
- package/template/release-runtime/serve.sh +15 -0
- package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +11 -0
- package/template/skills/en/meta-meta/quality-control/SKILL.md +13 -1
- package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +8 -0
- package/template/skills/en/meta-meta/task-decomposition/SKILL.md +13 -0
- package/template/skills/en/meta-meta/version-control/SKILL.md +13 -0
- package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +11 -0
- package/template/skills/zh/meta-meta/quality-control/SKILL.md +12 -0
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +8 -0
- package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +16 -0
- package/template/skills/zh/meta-meta/version-control/SKILL.md +13 -0
- package/template/workspace.gitignore +22 -0
package/src/agent/engine.js
CHANGED
|
@@ -4,12 +4,19 @@ import { AgentEvent } from "./events.js";
|
|
|
4
4
|
import { ContextAssembler } from "./context.js";
|
|
5
5
|
import { ConversationHistory } from "./history.js";
|
|
6
6
|
import { Workspace } from "./workspace.js";
|
|
7
|
+
import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
|
|
7
8
|
import { VersionManager } from "./version-manager.js";
|
|
8
9
|
import { CornerCaseRegistry } from "./corner-case-registry.js";
|
|
9
10
|
import { ConfidenceScorer } from "./confidence-scorer.js";
|
|
10
11
|
import { ToolRegistry } from "./tools/registry.js";
|
|
11
12
|
import { SandboxExecTool } from "./tools/sandbox-exec.js";
|
|
12
13
|
import { WorkspaceFileTool } from "./tools/workspace-file.js";
|
|
14
|
+
import { CopyToWorkspaceTool } from "./tools/copy-to-workspace.js";
|
|
15
|
+
import { SnapshotTool } from "./tools/snapshot.js";
|
|
16
|
+
import { ArchiveFileTool } from "./tools/archive-file.js";
|
|
17
|
+
import { ScheduleFetchTool } from "./tools/schedule-fetch.js";
|
|
18
|
+
import { ReleaseTool } from "./tools/release.js";
|
|
19
|
+
import { PhaseAdvanceTool } from "./tools/phase-advance.js";
|
|
13
20
|
import { DocumentParseTool } from "./tools/document-parse.js";
|
|
14
21
|
import { DocumentSearchTool } from "./tools/document-search.js";
|
|
15
22
|
import { WorkerLLMCallTool } from "./tools/worker-llm-call.js";
|
|
@@ -23,6 +30,7 @@ import { AgentTool } from "./tools/agent-tool.js";
|
|
|
23
30
|
import { WebSearchTool } from "./tools/web-search.js";
|
|
24
31
|
import { SkillLoader } from "./skill-loader.js";
|
|
25
32
|
import { TaskManager } from "./task-manager.js";
|
|
33
|
+
import { Scheduler } from "./scheduler.js";
|
|
26
34
|
import { Phase } from "./pipelines/index.js";
|
|
27
35
|
import { ProjectInitializer } from "./pipelines/initializer.js";
|
|
28
36
|
import { RuleExtractionPipeline } from "./pipelines/extraction.js";
|
|
@@ -35,9 +43,25 @@ import { ContextWindow } from "./context-window.js";
|
|
|
35
43
|
import { SessionState } from "./session-state.js";
|
|
36
44
|
import { estimateTokens, estimateMessagesTokens } from "./token-counter.js";
|
|
37
45
|
|
|
46
|
+
// Default max output tokens for the conductor LLM. SOTA models (GLM-5,
|
|
47
|
+
// Claude Sonnet 4) handle this comfortably. Override via KC_MAX_TOKENS env
|
|
48
|
+
// or kc_max_tokens in the global config.
|
|
49
|
+
const DEFAULT_KC_MAX_TOKENS = 65536;
|
|
50
|
+
|
|
38
51
|
// Phases where worker LLM tools are available (DISTILL mode)
|
|
39
52
|
const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
|
|
40
53
|
|
|
54
|
+
// Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
|
|
55
|
+
// Exported so the TUI's /phase slash command (src/cli/index.js) can call
|
|
56
|
+
// _advancePhase with the right successor without re-declaring the map.
|
|
57
|
+
export const NEXT_PHASE = {
|
|
58
|
+
[Phase.BOOTSTRAP]: Phase.EXTRACTION,
|
|
59
|
+
[Phase.EXTRACTION]: Phase.SKILL_AUTHORING,
|
|
60
|
+
[Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
|
|
61
|
+
[Phase.SKILL_TESTING]: Phase.DISTILLATION,
|
|
62
|
+
[Phase.DISTILLATION]: Phase.PRODUCTION_QC,
|
|
63
|
+
};
|
|
64
|
+
|
|
41
65
|
/**
|
|
42
66
|
* The KC Agent conversation engine.
|
|
43
67
|
*
|
|
@@ -52,40 +76,85 @@ export class AgentEngine {
|
|
|
52
76
|
* @param {import('./llm-client.js').LLMClient} opts.client
|
|
53
77
|
* @param {object} opts.config - Settings from loadSettings()
|
|
54
78
|
* @param {string} [opts.sessionId]
|
|
79
|
+
* @param {string} [opts.subagentScope] - When set, persistence is isolated to
|
|
80
|
+
* sub_agents/<scope>/ inside the workspace. Used by `agent_tool` to spawn
|
|
81
|
+
* children that share workspace files but don't trash parent's history /
|
|
82
|
+
* tasks / session-state. (Bug 2)
|
|
83
|
+
* @param {string} [opts.initialPhase] - When set, the engine starts in this phase
|
|
84
|
+
* instead of BOOTSTRAP. Used by sub-agents to inherit parent's phase so they
|
|
85
|
+
* get the right tools registered. (Bug 2)
|
|
55
86
|
*/
|
|
56
|
-
constructor({ client, config, sessionId }) {
|
|
87
|
+
constructor({ client, config, sessionId, subagentScope, initialPhase }) {
|
|
57
88
|
this.client = client;
|
|
58
89
|
this.config = config;
|
|
59
90
|
this.context = new ContextAssembler();
|
|
91
|
+
this._isSubagent = !!subagentScope;
|
|
92
|
+
this._subagentScope = subagentScope || null;
|
|
60
93
|
|
|
61
94
|
// Workspace + structural components
|
|
62
|
-
this.workspace = new Workspace(
|
|
63
|
-
|
|
95
|
+
this.workspace = new Workspace(
|
|
96
|
+
config.kcWorkspaceRoot,
|
|
97
|
+
sessionId,
|
|
98
|
+
config.projectDir,
|
|
99
|
+
{ gitAutoCommit: config.gitAutoCommit !== false },
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
// For sub-agents, persistence (history/events/state) lives under
|
|
103
|
+
// sub_agents/<scope>/ instead of the workspace root. Workspace files
|
|
104
|
+
// (rules/, rule_skills/, workflows/) stay shared.
|
|
105
|
+
let conversationDir, logDir, statePath;
|
|
106
|
+
if (this._isSubagent) {
|
|
107
|
+
// Defense-in-depth: even though agent_tool sanitizes task_id against
|
|
108
|
+
// VALID_TASK_ID, an attacker reaching engine construction through
|
|
109
|
+
// another path (e.g. future callers) must not escape the workspace.
|
|
110
|
+
const scopeRoot = path.resolve(this.workspace.cwd, "sub_agents", subagentScope);
|
|
111
|
+
const wsRoot = path.resolve(this.workspace.cwd);
|
|
112
|
+
if (scopeRoot !== wsRoot && !scopeRoot.startsWith(wsRoot + path.sep)) {
|
|
113
|
+
throw new Error(`sub-agent scope escapes workspace: ${subagentScope}`);
|
|
114
|
+
}
|
|
115
|
+
// Also reject the scopeRoot being the workspace root itself, since that
|
|
116
|
+
// would defeat isolation.
|
|
117
|
+
if (scopeRoot === wsRoot || scopeRoot === path.resolve(wsRoot, "sub_agents")) {
|
|
118
|
+
throw new Error(`sub-agent scope must be a unique subfolder, got: ${subagentScope}`);
|
|
119
|
+
}
|
|
120
|
+
fs.mkdirSync(scopeRoot, { recursive: true });
|
|
121
|
+
conversationDir = path.join(scopeRoot, "conversation");
|
|
122
|
+
logDir = path.join(scopeRoot, "logs");
|
|
123
|
+
statePath = path.join(scopeRoot, "session-state.json");
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const initialPhaseValue = initialPhase || Phase.BOOTSTRAP;
|
|
127
|
+
this.workspace.setPhase(initialPhaseValue);
|
|
128
|
+
this.history = new ConversationHistory(this.workspace.cwd, {
|
|
129
|
+
conversationDir,
|
|
130
|
+
maxMessageTokens: this.config.maxMessageTokens,
|
|
131
|
+
});
|
|
64
132
|
this.versionManager = new VersionManager(this.workspace.cwd);
|
|
65
133
|
this.cornerCases = new CornerCaseRegistry(this.workspace.cwd);
|
|
66
134
|
this.confidence = new ConfidenceScorer(this.workspace.cwd, this.cornerCases);
|
|
67
135
|
|
|
68
136
|
// Event log (append-only JSONL, source of truth)
|
|
69
|
-
this.eventLog = new EventLog(this.workspace.cwd);
|
|
137
|
+
this.eventLog = new EventLog(this.workspace.cwd, { logDir });
|
|
70
138
|
|
|
71
139
|
// Context windowing
|
|
72
140
|
this.contextWindow = new ContextWindow({
|
|
73
141
|
contextLimit: config.kcContextLimit || 200000,
|
|
74
|
-
reserveForResponse: config.kcMaxTokens ||
|
|
142
|
+
reserveForResponse: config.kcMaxTokens || DEFAULT_KC_MAX_TOKENS,
|
|
75
143
|
});
|
|
76
144
|
|
|
77
145
|
// Session state persistence
|
|
78
|
-
this.sessionState = new SessionState(this.workspace.cwd);
|
|
146
|
+
this.sessionState = new SessionState(this.workspace.cwd, { statePath });
|
|
79
147
|
|
|
80
|
-
// Task manager (ralph-loop)
|
|
81
|
-
|
|
148
|
+
// Task manager (ralph-loop) — sub-agents don't queue further sub-tasks,
|
|
149
|
+
// so they don't get a TaskManager.
|
|
150
|
+
this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
|
|
82
151
|
|
|
83
152
|
// Build all tool instances (but register phase-appropriate ones)
|
|
84
153
|
this._buildTools = this._createAllTools();
|
|
85
154
|
this._phaseSummaries = [];
|
|
86
155
|
|
|
87
156
|
// Pipeline system (meta-meta skills as code)
|
|
88
|
-
this.currentPhase =
|
|
157
|
+
this.currentPhase = initialPhaseValue;
|
|
89
158
|
this.pipelines = {
|
|
90
159
|
[Phase.BOOTSTRAP]: new ProjectInitializer(this.workspace),
|
|
91
160
|
[Phase.EXTRACTION]: new RuleExtractionPipeline(this.workspace),
|
|
@@ -101,6 +170,17 @@ export class AgentEngine {
|
|
|
101
170
|
// Register tools for initial phase
|
|
102
171
|
this.toolRegistry = new ToolRegistry();
|
|
103
172
|
this._registerToolsForPhase(this.currentPhase);
|
|
173
|
+
|
|
174
|
+
// Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
|
|
175
|
+
// phase so the first real false→true flip inside onToolResult triggers an
|
|
176
|
+
// advance — even when the user launches from a pre-populated workspace
|
|
177
|
+
// whose exit criteria already happen to be met at boot.
|
|
178
|
+
// resume() re-primes this from the restored pipeline state (see ~L566),
|
|
179
|
+
// which is the correct behaviour there: resumed sessions that were already
|
|
180
|
+
// past this phase shouldn't re-fire.
|
|
181
|
+
this._lastReady = Object.fromEntries(
|
|
182
|
+
Object.keys(this.pipelines).map((p) => [p, false]),
|
|
183
|
+
);
|
|
104
184
|
}
|
|
105
185
|
|
|
106
186
|
/**
|
|
@@ -127,6 +207,14 @@ export class AgentEngine {
|
|
|
127
207
|
core: [
|
|
128
208
|
new SandboxExecTool(this.workspace, this.config.kcExecTimeout),
|
|
129
209
|
new WorkspaceFileTool(this.workspace, this.versionManager),
|
|
210
|
+
new CopyToWorkspaceTool(this.workspace, {
|
|
211
|
+
largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
|
|
212
|
+
}),
|
|
213
|
+
new SnapshotTool(this.workspace),
|
|
214
|
+
new ArchiveFileTool(this.workspace),
|
|
215
|
+
new ScheduleFetchTool(this.workspace),
|
|
216
|
+
new ReleaseTool(this.workspace, { kcVersion: "0.5.2" }),
|
|
217
|
+
new PhaseAdvanceTool((to, reason, opts) => this._advancePhase(to, reason, opts)),
|
|
130
218
|
new DocumentParseTool(this.workspace, {
|
|
131
219
|
mineruApiUrl: this.config.mineruApiUrl,
|
|
132
220
|
mineruApiKey: this.config.mineruApiKey,
|
|
@@ -138,9 +226,14 @@ export class AgentEngine {
|
|
|
138
226
|
new RuleCatalogTool(this.workspace),
|
|
139
227
|
new EvolutionCycleTool(this.workspace, this.cornerCases),
|
|
140
228
|
new DashboardRenderTool(this.workspace),
|
|
141
|
-
new AgentTool(
|
|
142
|
-
|
|
143
|
-
|
|
229
|
+
new AgentTool(
|
|
230
|
+
this.workspace,
|
|
231
|
+
({ sessionId, subagentScope, initialPhase }) => new AgentEngine({
|
|
232
|
+
client: this.client, config: this.config,
|
|
233
|
+
sessionId, subagentScope, initialPhase,
|
|
234
|
+
}),
|
|
235
|
+
() => this.currentPhase,
|
|
236
|
+
),
|
|
144
237
|
new WebSearchTool(this.config.tavilyApiKey),
|
|
145
238
|
],
|
|
146
239
|
// Distillation+ only (DISTILL mode)
|
|
@@ -204,9 +297,11 @@ export class AgentEngine {
|
|
|
204
297
|
);
|
|
205
298
|
}
|
|
206
299
|
|
|
207
|
-
// Task progress (ralph-loop)
|
|
208
|
-
|
|
209
|
-
|
|
300
|
+
// Task progress (ralph-loop) — skipped for sub-agents (no taskManager)
|
|
301
|
+
if (this.taskManager) {
|
|
302
|
+
const taskContext = this.taskManager.describeForContext();
|
|
303
|
+
if (taskContext) lines.push("", taskContext);
|
|
304
|
+
}
|
|
210
305
|
|
|
211
306
|
return lines.join("\n");
|
|
212
307
|
}
|
|
@@ -233,9 +328,126 @@ export class AgentEngine {
|
|
|
233
328
|
};
|
|
234
329
|
}
|
|
235
330
|
|
|
331
|
+
/**
|
|
332
|
+
* Run the windowing check immediately after a tool result appends to
|
|
333
|
+
* history. Called from runTurn() so that a large tool result can't sit in
|
|
334
|
+
* history past the threshold until the next LLM-loop iteration, where a
|
|
335
|
+
* stream-abort could then trap the context in a bloated state.
|
|
336
|
+
*
|
|
337
|
+
* Safe to call frequently — contextWindow.window() fast-paths when under
|
|
338
|
+
* the trigger fraction.
|
|
339
|
+
*/
|
|
340
|
+
_maybeWindowAfterToolResult() {
|
|
341
|
+
if (!this.contextWindow) return;
|
|
342
|
+
const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
|
|
343
|
+
if (windowed.wasWindowed) {
|
|
344
|
+
this.history.messages = windowed.messages;
|
|
345
|
+
this.eventLog.append("context_windowed", {
|
|
346
|
+
removed: windowed.removedCount,
|
|
347
|
+
trigger: "post_tool_result",
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Heap-pressure diagnostic. The TUI has its own virtualization + tool-
|
|
352
|
+
// output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
|
|
353
|
+
// still see high heap usage, something else is leaking — log it once per
|
|
354
|
+
// pressure-crossing so operators can investigate without flooding logs.
|
|
355
|
+
try {
|
|
356
|
+
const mem = process.memoryUsage();
|
|
357
|
+
const frac = mem.heapUsed / (mem.heapTotal || 1);
|
|
358
|
+
if (frac > 0.80 && !this._memPressureLogged) {
|
|
359
|
+
this._memPressureLogged = true;
|
|
360
|
+
this.eventLog.append("memory_pressure", {
|
|
361
|
+
heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
|
|
362
|
+
heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
|
|
363
|
+
rssMB: Math.round(mem.rss / 1024 / 1024),
|
|
364
|
+
historyLength: this.history.messages.length,
|
|
365
|
+
});
|
|
366
|
+
} else if (frac < 0.60 && this._memPressureLogged) {
|
|
367
|
+
this._memPressureLogged = false; // re-arm for next crossing
|
|
368
|
+
}
|
|
369
|
+
} catch { /* process.memoryUsage failures are non-fatal */ }
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Pre-flight hard ceiling (Bug 1). After windowing, if the message
|
|
374
|
+
* array's total token count still exceeds the model's input budget,
|
|
375
|
+
* drop oldest user-bounded blocks until under budget.
|
|
376
|
+
*
|
|
377
|
+
* Drops in BLOCK units — a block is `user(N) + everything until the
|
|
378
|
+
* next user`. This guarantees the head after a drop is always either a
|
|
379
|
+
* user message or empty, satisfying Anthropic's "first message must use
|
|
380
|
+
* the user role" requirement and OpenAI's tool-call adjacency rules.
|
|
381
|
+
*
|
|
382
|
+
* Treats the compaction summary pair (user with `[Previous conversation
|
|
383
|
+
* summary]` or `[Context Summary` marker, followed by assistant ack) as
|
|
384
|
+
* sticky — it represents prior LLM-summarized work and should outlive
|
|
385
|
+
* any normal turn.
|
|
386
|
+
*/
|
|
387
|
+
_enforceTokenBudget(messages) {
|
|
388
|
+
const limit = this.config.kcContextLimit || 200000;
|
|
389
|
+
const reserve = this.config.kcMaxTokens || DEFAULT_KC_MAX_TOKENS;
|
|
390
|
+
const budget = limit - reserve;
|
|
391
|
+
let totalTokens = estimateMessagesTokens(messages);
|
|
392
|
+
if (totalTokens <= budget) return messages;
|
|
393
|
+
|
|
394
|
+
// Sticky region: system + (optional summary user + ack assistant)
|
|
395
|
+
let stickyEnd = messages[0]?.role === "system" ? 1 : 0;
|
|
396
|
+
const sumMarkers = ["[Previous conversation summary]", "[Context Summary"];
|
|
397
|
+
const hasSummaryAt = (i) =>
|
|
398
|
+
messages[i]?.role === "user" &&
|
|
399
|
+
typeof messages[i].content === "string" &&
|
|
400
|
+
sumMarkers.some((m) => messages[i].content.startsWith(m));
|
|
401
|
+
if (hasSummaryAt(stickyEnd)) {
|
|
402
|
+
stickyEnd++;
|
|
403
|
+
if (messages[stickyEnd]?.role === "assistant") stickyEnd++;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
let droppedCount = 0;
|
|
407
|
+
let droppedTokens = 0;
|
|
408
|
+
|
|
409
|
+
// Drop user-bounded blocks. A block starts at messages[stickyEnd]
|
|
410
|
+
// (expected to be a user message in normal flow) and runs up to (not
|
|
411
|
+
// including) the next user message — or to the end of array.
|
|
412
|
+
while (totalTokens > budget && messages.length > stickyEnd) {
|
|
413
|
+
const blockStart = stickyEnd;
|
|
414
|
+
let blockEnd = blockStart + 1;
|
|
415
|
+
while (blockEnd < messages.length && messages[blockEnd].role !== "user") blockEnd++;
|
|
416
|
+
// If this block goes to end-of-array, there's no following user to anchor
|
|
417
|
+
// the head — dropping it would leave just [system, (summary)?]. Stop and
|
|
418
|
+
// let the LLM call attempt; the API will surface a clear error if even
|
|
419
|
+
// sticky alone is over budget.
|
|
420
|
+
if (blockEnd === messages.length) break;
|
|
421
|
+
const removed = messages.splice(blockStart, blockEnd - blockStart);
|
|
422
|
+
droppedCount += removed.length;
|
|
423
|
+
droppedTokens += removed.reduce((a, m) => a + estimateTokens(JSON.stringify(m)), 0);
|
|
424
|
+
totalTokens = estimateMessagesTokens(messages);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// Defensive postcondition: head after sticky must be a user message or
|
|
428
|
+
// the array must end at sticky. Block-drop should make this trivially true,
|
|
429
|
+
// but if the input was malformed (e.g., already started with a non-user),
|
|
430
|
+
// clean up here so we never send an Anthropic-invalid sequence.
|
|
431
|
+
while (messages.length > stickyEnd && messages[stickyEnd].role !== "user") {
|
|
432
|
+
messages.splice(stickyEnd, 1);
|
|
433
|
+
droppedCount++;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (droppedCount > 0) {
|
|
437
|
+
this.eventLog.append("context_truncated", {
|
|
438
|
+
droppedCount,
|
|
439
|
+
droppedTokens,
|
|
440
|
+
finalTokens: totalTokens,
|
|
441
|
+
budget,
|
|
442
|
+
});
|
|
443
|
+
}
|
|
444
|
+
return messages;
|
|
445
|
+
}
|
|
446
|
+
|
|
236
447
|
/**
|
|
237
448
|
* Compact conversation history by summarizing older messages via LLM.
|
|
238
|
-
* Keeps the most recent messages intact.
|
|
449
|
+
* Keeps the most recent messages intact. (Bug 1: now chunked — never sends
|
|
450
|
+
* a single oversized prompt to the summarizer LLM.)
|
|
239
451
|
* @param {object} [opts]
|
|
240
452
|
* @param {number} [opts.recentCount=20] - Number of recent messages to keep
|
|
241
453
|
* @returns {Promise<{removedCount: number, retainedCount: number, summaryTokens: number}|null>}
|
|
@@ -246,46 +458,20 @@ export class AgentEngine {
|
|
|
246
458
|
const olderMessages = this.history.messages.slice(0, -recentCount);
|
|
247
459
|
const recentMessages = this.history.messages.slice(-recentCount);
|
|
248
460
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
const summaryResp = await this.client.chat({
|
|
252
|
-
model: this.config.kcModel,
|
|
253
|
-
messages: [
|
|
254
|
-
{
|
|
255
|
-
role: "system",
|
|
256
|
-
content:
|
|
257
|
-
"You are a conversation summarizer. Produce a concise summary of the following conversation. " +
|
|
258
|
-
"Focus on: decisions made, files created or modified, current state of work, key findings, " +
|
|
259
|
-
"unresolved questions. Be specific about file paths, rule IDs, and results. Keep under 2000 tokens.",
|
|
260
|
-
},
|
|
261
|
-
{
|
|
262
|
-
role: "user",
|
|
263
|
-
content: `Summarize this conversation:\n\n${JSON.stringify(olderMessages)}`,
|
|
264
|
-
},
|
|
265
|
-
],
|
|
266
|
-
maxTokens: 2048,
|
|
267
|
-
});
|
|
268
|
-
summary = summaryResp.choices?.[0]?.message?.content || null;
|
|
269
|
-
} catch {
|
|
270
|
-
// LLM summary failed — do mechanical fallback
|
|
271
|
-
summary = null;
|
|
272
|
-
}
|
|
461
|
+
const CHUNK_BUDGET = 30000; // tokens per summarization request
|
|
462
|
+
const chunks = this._chunkMessages(olderMessages, CHUNK_BUDGET);
|
|
273
463
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
const
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
lines.push(`- User: ${(msg.content || "").slice(0, 100)}`);
|
|
280
|
-
} else if (msg.role === "assistant" && msg.tool_calls) {
|
|
281
|
-
for (const tc of msg.tool_calls) {
|
|
282
|
-
lines.push(`- Tool call: ${tc.function?.name}`);
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
summary = lines.join("\n");
|
|
464
|
+
const partials = [];
|
|
465
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
466
|
+
const chunk = chunks[i];
|
|
467
|
+
const partial = await this._summarizeChunk(chunk, i, chunks.length);
|
|
468
|
+
partials.push(partial);
|
|
287
469
|
}
|
|
288
470
|
|
|
471
|
+
const summary = partials.length === 1
|
|
472
|
+
? partials[0]
|
|
473
|
+
: "## Compacted history (multi-part)\n\n" + partials.map((p, i) => `### Part ${i + 1}\n${p}`).join("\n\n");
|
|
474
|
+
|
|
289
475
|
// Replace history
|
|
290
476
|
this.history._messages = [
|
|
291
477
|
{ role: "user", content: `[Previous conversation summary]\n${summary}` },
|
|
@@ -298,6 +484,7 @@ export class AgentEngine {
|
|
|
298
484
|
this.eventLog.append("compact", {
|
|
299
485
|
removedCount: olderMessages.length,
|
|
300
486
|
retainedCount: recentMessages.length,
|
|
487
|
+
chunkCount: chunks.length,
|
|
301
488
|
summary,
|
|
302
489
|
});
|
|
303
490
|
|
|
@@ -308,6 +495,81 @@ export class AgentEngine {
|
|
|
308
495
|
};
|
|
309
496
|
}
|
|
310
497
|
|
|
498
|
+
/**
|
|
499
|
+
* Split a flat message list into chunks where each chunk's serialized JSON
|
|
500
|
+
* fits within tokenBudget. Chunks are turn-aligned where possible (a single
|
|
501
|
+
* user→assistant→tool sequence won't be split mid-turn unless that single
|
|
502
|
+
* turn alone exceeds the budget; in that case it gets its own oversized
|
|
503
|
+
* chunk and the LLM call may fail → mechanical fallback fires).
|
|
504
|
+
*/
|
|
505
|
+
_chunkMessages(messages, tokenBudget) {
|
|
506
|
+
const chunks = [];
|
|
507
|
+
let current = [];
|
|
508
|
+
let currentTokens = 0;
|
|
509
|
+
for (const msg of messages) {
|
|
510
|
+
const mTokens = estimateTokens(JSON.stringify(msg));
|
|
511
|
+
if (current.length > 0 && currentTokens + mTokens > tokenBudget) {
|
|
512
|
+
chunks.push(current);
|
|
513
|
+
current = [];
|
|
514
|
+
currentTokens = 0;
|
|
515
|
+
}
|
|
516
|
+
current.push(msg);
|
|
517
|
+
currentTokens += mTokens;
|
|
518
|
+
}
|
|
519
|
+
if (current.length > 0) chunks.push(current);
|
|
520
|
+
return chunks;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Summarize one chunk via the conductor LLM. On failure (incl. context-length
|
|
525
|
+
* errors that the chunked split should usually prevent), fall back to a
|
|
526
|
+
* mechanical summary so we always produce *something*.
|
|
527
|
+
*/
|
|
528
|
+
async _summarizeChunk(chunk, idx, total) {
|
|
529
|
+
const partLabel = total > 1 ? ` (part ${idx + 1}/${total})` : "";
|
|
530
|
+
try {
|
|
531
|
+
const resp = await this.client.chat({
|
|
532
|
+
model: this.config.kcModel,
|
|
533
|
+
messages: [
|
|
534
|
+
{
|
|
535
|
+
role: "system",
|
|
536
|
+
content:
|
|
537
|
+
"You are a conversation summarizer. Produce a concise summary of the following conversation excerpt. " +
|
|
538
|
+
"Focus on: decisions made, files created or modified, current state of work, key findings, " +
|
|
539
|
+
"unresolved questions. Be specific about file paths, rule IDs, and results. Keep under 1500 tokens.",
|
|
540
|
+
},
|
|
541
|
+
{
|
|
542
|
+
role: "user",
|
|
543
|
+
content: `Summarize this conversation excerpt${partLabel}:\n\n${JSON.stringify(chunk)}`,
|
|
544
|
+
},
|
|
545
|
+
],
|
|
546
|
+
maxTokens: 1800,
|
|
547
|
+
});
|
|
548
|
+
const text = resp.choices?.[0]?.message?.content;
|
|
549
|
+
if (text) return text;
|
|
550
|
+
} catch {
|
|
551
|
+
// fall through to mechanical
|
|
552
|
+
}
|
|
553
|
+
return this._mechanicalSummary(chunk, partLabel);
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
_mechanicalSummary(chunk, partLabel) {
|
|
557
|
+
const lines = [`Mechanical summary${partLabel}:`];
|
|
558
|
+
for (const msg of chunk) {
|
|
559
|
+
if (msg.role === "user" && typeof msg.content === "string") {
|
|
560
|
+
lines.push(`- User: ${msg.content.slice(0, 120).replace(/\s+/g, " ")}`);
|
|
561
|
+
} else if (msg.role === "assistant") {
|
|
562
|
+
if (typeof msg.content === "string" && msg.content) {
|
|
563
|
+
lines.push(`- Assistant: ${msg.content.slice(0, 120).replace(/\s+/g, " ")}`);
|
|
564
|
+
}
|
|
565
|
+
for (const tc of msg.tool_calls || []) {
|
|
566
|
+
lines.push(`- Tool call: ${tc.function?.name || "?"}`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
return lines.join("\n");
|
|
571
|
+
}
|
|
572
|
+
|
|
311
573
|
/**
|
|
312
574
|
* Restore an engine from a persisted session.
|
|
313
575
|
* @param {object} opts
|
|
@@ -325,6 +587,7 @@ export class AgentEngine {
|
|
|
325
587
|
engine.currentPhase = data.currentPhase || Phase.BOOTSTRAP;
|
|
326
588
|
engine._phaseSummaries = data.phaseSummaries || [];
|
|
327
589
|
engine._registerToolsForPhase(engine.currentPhase);
|
|
590
|
+
engine.workspace.setPhase(engine.currentPhase);
|
|
328
591
|
|
|
329
592
|
// Restore project directory from saved state
|
|
330
593
|
if (data.projectDir) {
|
|
@@ -342,6 +605,17 @@ export class AgentEngine {
|
|
|
342
605
|
}
|
|
343
606
|
}
|
|
344
607
|
|
|
608
|
+
// Re-prime _lastReady AFTER importState so it reflects the restored
|
|
609
|
+
// pipeline milestones, not the empty defaults from constructor.
|
|
610
|
+
// (Bug 5 fix — without this, resume reignites auto-advance.)
|
|
611
|
+
for (const phase of Object.keys(engine.pipelines)) {
|
|
612
|
+
try {
|
|
613
|
+
engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
|
|
614
|
+
} catch {
|
|
615
|
+
engine._lastReady[phase] = false;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
|
|
345
619
|
engine.eventLog.append("session_resume", {
|
|
346
620
|
resumedPhase: engine.currentPhase,
|
|
347
621
|
resumedFromSeq: data.lastEventSeq,
|
|
@@ -358,6 +632,56 @@ export class AgentEngine {
|
|
|
358
632
|
this.sessionState.save(this);
|
|
359
633
|
}
|
|
360
634
|
|
|
635
|
+
/**
|
|
636
|
+
* Rename the workspace folder and cascade the new path to every persistence
|
|
637
|
+
* subsystem that captured `workspace.cwd` at construction time (Bug 3).
|
|
638
|
+
* Without this cascade, subsystems keep writing to the OLD path even
|
|
639
|
+
* though the directory has moved on disk — the user sees the renamed dir
|
|
640
|
+
* "die" while the old dir keeps growing.
|
|
641
|
+
*
|
|
642
|
+
* Also regenerates Block 9 cron wrapper scripts which bake in absolute
|
|
643
|
+
* paths to the workspace. Returns information for the TUI to surface
|
|
644
|
+
* (incl. whether the user needs to re-install crontab lines).
|
|
645
|
+
*
|
|
646
|
+
* @param {string} newName
|
|
647
|
+
* @returns {{ sessionId: string, oldCwd: string, newCwd: string,
|
|
648
|
+
* scheduleWrappersRegenerated: string[],
|
|
649
|
+
* scheduleWrappersSkipped: string[] }}
|
|
650
|
+
*/
|
|
651
|
+
renameSession(newName) {
|
|
652
|
+
const r = this.workspace.rename(newName);
|
|
653
|
+
if (r.changed) {
|
|
654
|
+
// Cascade to every subsystem that captured workspace.cwd
|
|
655
|
+
this.history._setWorkspacePath?.(r.newCwd);
|
|
656
|
+
this.eventLog._setWorkspacePath?.(r.newCwd);
|
|
657
|
+
this.sessionState._setWorkspacePath?.(r.newCwd);
|
|
658
|
+
this.taskManager?._setWorkspacePath?.(r.newCwd);
|
|
659
|
+
this.confidence._setWorkspacePath?.(r.newCwd);
|
|
660
|
+
this.cornerCases._setWorkspacePath?.(r.newCwd);
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// Regenerate cron wrapper scripts — they bake absolute paths to WORKSPACE,
|
|
664
|
+
// INPUT_DIR, LOG_FILE, so rename invalidates them. The Scheduler is
|
|
665
|
+
// workspace-bound (created on demand inside the schedule_fetch tool), so
|
|
666
|
+
// construct a fresh one against the renamed workspace.
|
|
667
|
+
let scheduleResult = { regenerated: [], disabled: [], failed: [] };
|
|
668
|
+
try {
|
|
669
|
+
const sched = new Scheduler(this.workspace);
|
|
670
|
+
scheduleResult = sched.regenerateAllWrappers();
|
|
671
|
+
} catch {
|
|
672
|
+
// Best effort — never let scheduler issues block the rename
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
return {
|
|
676
|
+
sessionId: r.sessionId,
|
|
677
|
+
oldCwd: r.oldCwd,
|
|
678
|
+
newCwd: r.newCwd,
|
|
679
|
+
scheduleWrappersRegenerated: scheduleResult.regenerated,
|
|
680
|
+
scheduleWrappersDisabled: scheduleResult.disabled,
|
|
681
|
+
scheduleWrappersFailed: scheduleResult.failed,
|
|
682
|
+
};
|
|
683
|
+
}
|
|
684
|
+
|
|
361
685
|
/**
|
|
362
686
|
* Run one conversation turn. Yields AgentEvent objects.
|
|
363
687
|
* Loops: LLM call -> tool execution -> LLM call ... until no tool calls.
|
|
@@ -383,7 +707,7 @@ export class AgentEngine {
|
|
|
383
707
|
while (true) {
|
|
384
708
|
// Apply context windowing before sending to LLM
|
|
385
709
|
const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
|
|
386
|
-
|
|
710
|
+
let messages = [{ role: "system", content: systemPrompt }, ...windowed.messages];
|
|
387
711
|
|
|
388
712
|
if (windowed.wasWindowed) {
|
|
389
713
|
this.eventLog.append("context_windowed", {
|
|
@@ -392,6 +716,12 @@ export class AgentEngine {
|
|
|
392
716
|
});
|
|
393
717
|
}
|
|
394
718
|
|
|
719
|
+
// Pre-flight hard ceiling (Bug 1 P0). Even after windowing, if the
|
|
720
|
+
// request still exceeds the model's input budget (e.g., recent messages
|
|
721
|
+
// alone are too big), drop the oldest non-system messages until under
|
|
722
|
+
// budget. Better to lose some history than crash with HTTP 400.
|
|
723
|
+
messages = this._enforceTokenBudget(messages);
|
|
724
|
+
|
|
395
725
|
this.eventLog.append("llm_start", {
|
|
396
726
|
model: this.config.kcModel,
|
|
397
727
|
messageCount: messages.length,
|
|
@@ -448,6 +778,12 @@ export class AgentEngine {
|
|
|
448
778
|
});
|
|
449
779
|
|
|
450
780
|
if (toolCallsAcc.size === 0) {
|
|
781
|
+
// Bug 4 trigger (1): re-check phase criteria at end of every turn —
|
|
782
|
+
// KC may have advanced state via conversation alone, without any
|
|
783
|
+
// tool that the pipeline narrowly watches.
|
|
784
|
+
const advancedEv = this._maybeAutoAdvance();
|
|
785
|
+
if (advancedEv) yield advancedEv;
|
|
786
|
+
|
|
451
787
|
this.eventLog.append("turn_complete", {});
|
|
452
788
|
this.saveState();
|
|
453
789
|
yield new AgentEvent({ type: "turn_complete" });
|
|
@@ -466,52 +802,57 @@ export class AgentEngine {
|
|
|
466
802
|
|
|
467
803
|
const result = await this.toolRegistry.execute(tc.name, inputData);
|
|
468
804
|
|
|
805
|
+
// Tool-call offloading: large outputs go to logs/tool_results/<traceId>.txt;
|
|
806
|
+
// history holds head + tail with a pointer. Event log keeps the full output
|
|
807
|
+
// (it's append-only and the source of truth).
|
|
808
|
+
const offload = this._maybeOffload(tc.name, result);
|
|
809
|
+
const historyContent = offload ? offload.digest : (result.content || "");
|
|
810
|
+
|
|
469
811
|
this.eventLog.append("tool_result", {
|
|
470
812
|
name: tc.name,
|
|
471
|
-
output: result.content
|
|
813
|
+
output: result.content || "",
|
|
472
814
|
isError: result.isError,
|
|
815
|
+
traceId: offload?.traceId || null,
|
|
473
816
|
});
|
|
474
817
|
yield new AgentEvent({
|
|
475
818
|
type: "tool_result",
|
|
476
819
|
name: tc.name,
|
|
477
|
-
output:
|
|
820
|
+
output: historyContent,
|
|
478
821
|
isError: result.isError,
|
|
479
822
|
});
|
|
480
823
|
|
|
481
824
|
this.history.addRaw({
|
|
482
825
|
role: "tool",
|
|
483
826
|
tool_call_id: tc.id,
|
|
484
|
-
content:
|
|
827
|
+
content: historyContent,
|
|
485
828
|
});
|
|
486
829
|
|
|
830
|
+
// Post-tool-result safety net: check for context pressure RIGHT NOW
|
|
831
|
+
// rather than waiting for the next LLM-loop iteration. A large tool
|
|
832
|
+
// result that tips history over the threshold used to sit there
|
|
833
|
+
// until the next turn, and if the stream aborted in between the
|
|
834
|
+
// user saw "CTX: 210% / stream terminated" with no recovery.
|
|
835
|
+
this._maybeWindowAfterToolResult();
|
|
836
|
+
|
|
487
837
|
// Pipeline controller: update state and re-register tools on phase change
|
|
488
838
|
if (pipeline?.onToolResult) {
|
|
489
839
|
const pEvent = pipeline.onToolResult(tc.name, inputData, result);
|
|
490
840
|
if (pEvent) {
|
|
491
841
|
if (pEvent.type === "phase_ready" && pEvent.nextPhase) {
|
|
492
|
-
|
|
493
|
-
this._phaseSummaries.push(phaseSummary);
|
|
494
|
-
this.eventLog.append("phase_transition", {
|
|
495
|
-
from: this.currentPhase,
|
|
496
|
-
to: pEvent.nextPhase,
|
|
497
|
-
summary: phaseSummary,
|
|
498
|
-
});
|
|
499
|
-
this.currentPhase = pEvent.nextPhase;
|
|
500
|
-
this._registerToolsForPhase(this.currentPhase);
|
|
501
|
-
|
|
502
|
-
// Ralph-loop: create per-rule tasks for the new phase
|
|
503
|
-
this._createTasksForPhase(this.currentPhase);
|
|
504
|
-
|
|
505
|
-
this.saveState();
|
|
842
|
+
this._advancePhase(pEvent.nextPhase, pEvent.message || "exit criteria met");
|
|
506
843
|
}
|
|
507
|
-
yield new AgentEvent({
|
|
508
|
-
type: "pipeline_event",
|
|
509
|
-
data: pEvent,
|
|
510
|
-
});
|
|
844
|
+
yield new AgentEvent({ type: "pipeline_event", data: pEvent });
|
|
511
845
|
}
|
|
512
846
|
}
|
|
513
847
|
}
|
|
514
848
|
|
|
849
|
+
// Bug 4 fix: re-check exit criteria after every tool-result loop, not
|
|
850
|
+
// just from pipeline.onToolResult. The pipeline's describeState() (called
|
|
851
|
+
// on every turn) already re-scans, so exitCriteriaMet() is accurate; we
|
|
852
|
+
// just need to act on it eagerly.
|
|
853
|
+
const ev = this._maybeAutoAdvance();
|
|
854
|
+
if (ev) yield ev;
|
|
855
|
+
|
|
515
856
|
} catch (err) {
|
|
516
857
|
this.eventLog.append("error", { message: err.message });
|
|
517
858
|
yield new AgentEvent({ type: "error", message: err.message });
|
|
@@ -520,17 +861,123 @@ export class AgentEngine {
|
|
|
520
861
|
}
|
|
521
862
|
}
|
|
522
863
|
|
|
864
|
+
/**
|
|
865
|
+
* Centralized phase transition (Bug 4). All three triggers route through here:
|
|
866
|
+
* (1) pipeline.onToolResult returning phase_ready
|
|
867
|
+
* (2) post-turn auto-check via _maybeAutoAdvance
|
|
868
|
+
* (3) explicit user request via the phase_advance tool
|
|
869
|
+
*
|
|
870
|
+
* Reachability: by default only forward-by-one transitions per NEXT_PHASE.
|
|
871
|
+
* Set `force: true` to allow non-adjacent or backward transitions (e.g. user
|
|
872
|
+
* explicitly requests a regression for testing). The refusal is logged.
|
|
873
|
+
*
|
|
874
|
+
* Idempotent — calling with the current phase is a no-op.
|
|
875
|
+
*/
|
|
876
|
+
_advancePhase(nextPhase, reason = "", { force = false } = {}) {
|
|
877
|
+
if (!nextPhase || nextPhase === this.currentPhase) return false;
|
|
878
|
+
|
|
879
|
+
const expected = NEXT_PHASE[this.currentPhase];
|
|
880
|
+
if (!force && nextPhase !== expected) {
|
|
881
|
+
this.eventLog.append("phase_advance_refused", {
|
|
882
|
+
from: this.currentPhase, to: nextPhase, reason,
|
|
883
|
+
hint: expected ? `expected next phase is '${expected}' — pass force:true to override`
|
|
884
|
+
: `${this.currentPhase} is the terminal phase`,
|
|
885
|
+
});
|
|
886
|
+
return false;
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
const phaseSummary = `[${this.currentPhase.toUpperCase()} → ${nextPhase.toUpperCase()}]: ${reason}${force && nextPhase !== expected ? " (forced)" : ""}`;
|
|
890
|
+
this._phaseSummaries.push(phaseSummary);
|
|
891
|
+
this.eventLog.append("phase_transition", {
|
|
892
|
+
from: this.currentPhase,
|
|
893
|
+
to: nextPhase,
|
|
894
|
+
reason,
|
|
895
|
+
forced: force && nextPhase !== expected,
|
|
896
|
+
});
|
|
897
|
+
this.currentPhase = nextPhase;
|
|
898
|
+
this._registerToolsForPhase(this.currentPhase);
|
|
899
|
+
this.workspace.setPhase(this.currentPhase);
|
|
900
|
+
this._createTasksForPhase(this.currentPhase);
|
|
901
|
+
this.saveState();
|
|
902
|
+
return true;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
/**
|
|
906
|
+
* Bug 4 trigger (1) auto-detect, edge-triggered (Bug 5): only fires on a
|
|
907
|
+
* fresh false → true flip in `exitCriteriaMet()`. Sessions resumed in an
|
|
908
|
+
* already-met state do nothing; users iterating in a phase whose criteria
|
|
909
|
+
* have been met for a while do nothing. Real new evidence is required.
|
|
910
|
+
*/
|
|
911
|
+
_maybeAutoAdvance() {
|
|
912
|
+
const phase = this.currentPhase;
|
|
913
|
+
const pipeline = this.pipelines[phase];
|
|
914
|
+
let nowReady = false;
|
|
915
|
+
try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
|
|
916
|
+
|
|
917
|
+
if (!nowReady) {
|
|
918
|
+
this._lastReady[phase] = false;
|
|
919
|
+
return null;
|
|
920
|
+
}
|
|
921
|
+
// Edge-trigger: nowReady && !wasReady
|
|
922
|
+
if (this._lastReady[phase]) return null;
|
|
923
|
+
this._lastReady[phase] = true;
|
|
924
|
+
|
|
925
|
+
const next = NEXT_PHASE[phase];
|
|
926
|
+
if (!next) return null;
|
|
927
|
+
const advanced = this._advancePhase(next, "exit criteria flipped to met");
|
|
928
|
+
if (!advanced) return null;
|
|
929
|
+
return new AgentEvent({
|
|
930
|
+
type: "pipeline_event",
|
|
931
|
+
data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
|
|
932
|
+
});
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
/**
|
|
936
|
+
* Tool-call offloading. If the tool's content exceeds the threshold,
|
|
937
|
+
* write the full content to logs/tool_results/<traceId>.txt and return a
|
|
938
|
+
* digest (head + tail) with a pointer. Otherwise return null (caller uses
|
|
939
|
+
* full content).
|
|
940
|
+
*/
|
|
941
|
+
_maybeOffload(toolName, result) {
|
|
942
|
+
const content = result.content || "";
|
|
943
|
+
if (!content) return null;
|
|
944
|
+
const threshold = result.isError
|
|
945
|
+
? (this.config.toolOutputOffloadErrorTokens ?? 500)
|
|
946
|
+
: (this.config.toolOutputOffloadTokens ?? 2000);
|
|
947
|
+
const tokens = estimateTokens(content);
|
|
948
|
+
if (tokens <= threshold) return null;
|
|
949
|
+
|
|
950
|
+
const safeToolName = String(toolName || "tool").replace(/[^A-Za-z0-9_-]/g, "_");
|
|
951
|
+
const traceId = this.versionManager.generateTraceId(safeToolName, "result");
|
|
952
|
+
const offloadDir = path.join(this.workspace.cwd, "logs", "tool_results");
|
|
953
|
+
try {
|
|
954
|
+
fs.mkdirSync(offloadDir, { recursive: true });
|
|
955
|
+
fs.writeFileSync(path.join(offloadDir, `${traceId}.txt`), content, "utf-8");
|
|
956
|
+
} catch {
|
|
957
|
+
// If we can't write the offload file, fall back to keeping full content in context.
|
|
958
|
+
return null;
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
const HEAD = 800, TAIL = 800;
|
|
962
|
+
const truncatedNote = `\n\n[…truncated, ${tokens} tokens; full at logs/tool_results/${traceId}.txt — read with workspace_file if needed…]\n\n`;
|
|
963
|
+
const digest = content.length > HEAD + TAIL
|
|
964
|
+
? content.slice(0, HEAD) + truncatedNote + content.slice(-TAIL)
|
|
965
|
+
: content + truncatedNote;
|
|
966
|
+
return { traceId, digest };
|
|
967
|
+
}
|
|
968
|
+
|
|
523
969
|
/**
|
|
524
970
|
* Create per-rule tasks when entering a new phase.
|
|
525
971
|
* Reads the rule catalog and creates one task per rule for the given phase.
|
|
526
972
|
*/
|
|
527
973
|
_createTasksForPhase(phase) {
|
|
974
|
+
if (!this.taskManager) return; // Sub-agents don't manage tasks
|
|
528
975
|
const catalogPath = path.join(this.workspace.cwd, "rules", "catalog.json");
|
|
529
976
|
if (!fs.existsSync(catalogPath)) return;
|
|
530
977
|
|
|
531
978
|
try {
|
|
532
979
|
const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
533
|
-
const rules =
|
|
980
|
+
const rules = normalizeRuleCatalog(catalog);
|
|
534
981
|
if (rules.length > 0) {
|
|
535
982
|
this.taskManager.createRuleTasks(rules, phase);
|
|
536
983
|
}
|
|
@@ -546,6 +993,12 @@ export class AgentEngine {
|
|
|
546
993
|
* @yields {AgentEvent}
|
|
547
994
|
*/
|
|
548
995
|
async *runTaskLoop(userMessage) {
|
|
996
|
+
// Sub-agents don't run task loops — they execute one task and exit
|
|
997
|
+
if (!this.taskManager) {
|
|
998
|
+
yield* this.runTurn(userMessage);
|
|
999
|
+
return;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
549
1002
|
// Run the initial turn (user's request)
|
|
550
1003
|
yield* this.runTurn(userMessage);
|
|
551
1004
|
|
|
@@ -593,6 +1046,41 @@ export class AgentEngine {
|
|
|
593
1046
|
progress: this.taskManager.progress,
|
|
594
1047
|
},
|
|
595
1048
|
});
|
|
1049
|
+
|
|
1050
|
+
// Bug 4 trigger (2): auto-advance when all phase tasks are done AND
|
|
1051
|
+
// the pipeline's exit criteria are also met (Bug 5 fix — task state
|
|
1052
|
+
// alone is a ralph-loop convenience, not authoritative phase signal;
|
|
1053
|
+
// tasks could be marked skipped manually or by an editor).
|
|
1054
|
+
if (this._allCurrentPhaseTasksComplete()) {
|
|
1055
|
+
const pipeline = this.pipelines[this.currentPhase];
|
|
1056
|
+
let exitMet = false;
|
|
1057
|
+
try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
|
|
1058
|
+
if (exitMet) {
|
|
1059
|
+
const next = NEXT_PHASE[this.currentPhase];
|
|
1060
|
+
if (next) {
|
|
1061
|
+
const advanced = this._advancePhase(next, "all current-phase tasks completed + exit criteria met");
|
|
1062
|
+
if (advanced) {
|
|
1063
|
+
yield new AgentEvent({
|
|
1064
|
+
type: "pipeline_event",
|
|
1065
|
+
data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
|
|
1066
|
+
});
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
596
1071
|
}
|
|
597
1072
|
}
|
|
1073
|
+
|
|
1074
|
+
/**
|
|
1075
|
+
* True when every task tagged with the current phase is in a terminal state
|
|
1076
|
+
* (completed | failed | skipped) and at least one such task exists. Used by
|
|
1077
|
+
* runTaskLoop's auto-advance trigger.
|
|
1078
|
+
*/
|
|
1079
|
+
_allCurrentPhaseTasksComplete() {
|
|
1080
|
+
if (!this.taskManager) return false;
|
|
1081
|
+
const phase = this.currentPhase;
|
|
1082
|
+
const phaseTasks = this.taskManager.getAllTasks().filter((t) => t.phase === phase);
|
|
1083
|
+
if (phaseTasks.length === 0) return false;
|
|
1084
|
+
return phaseTasks.every((t) => t.status === "completed" || t.status === "failed" || t.status === "skipped");
|
|
1085
|
+
}
|
|
598
1086
|
}
|