kc-beta 0.5.6 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QUICKSTART.md +17 -4
- package/README.md +58 -11
- package/bin/kc-beta.js +35 -1
- package/package.json +1 -1
- package/src/agent/bundle-tree.js +553 -0
- package/src/agent/context.js +40 -1
- package/src/agent/engine.js +644 -28
- package/src/agent/llm-client.js +67 -18
- package/src/agent/pipelines/finalization.js +186 -0
- package/src/agent/pipelines/index.js +8 -0
- package/src/agent/pipelines/initializer.js +40 -0
- package/src/agent/pipelines/skill-authoring.js +100 -6
- package/src/agent/skill-loader.js +54 -4
- package/src/agent/task-manager.js +66 -3
- package/src/agent/tools/agent-tool.js +283 -35
- package/src/agent/tools/bundle-search.js +146 -0
- package/src/agent/tools/document-chunk.js +246 -0
- package/src/agent/tools/document-classify.js +311 -0
- package/src/agent/tools/document-parse.js +8 -1
- package/src/agent/tools/phase-advance.js +30 -7
- package/src/agent/tools/registry.js +10 -0
- package/src/agent/tools/rule-catalog.js +17 -3
- package/src/agent/tools/sandbox-exec.js +30 -0
- package/src/agent/workspace.js +168 -14
- package/src/cli/components.js +165 -17
- package/src/cli/index.js +166 -19
- package/src/cli/meme.js +58 -0
- package/src/config.js +39 -2
- package/src/providers.js +26 -0
- package/template/skills/en/meta-meta/evolution-loop/SKILL.md +13 -1
- package/template/skills/en/meta-meta/rule-extraction/SKILL.md +74 -0
- package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +7 -1
- package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +73 -0
package/src/agent/engine.js
CHANGED
|
@@ -19,6 +19,9 @@ import { ReleaseTool } from "./tools/release.js";
|
|
|
19
19
|
import { PhaseAdvanceTool } from "./tools/phase-advance.js";
|
|
20
20
|
import { DocumentParseTool } from "./tools/document-parse.js";
|
|
21
21
|
import { DocumentSearchTool } from "./tools/document-search.js";
|
|
22
|
+
import { DocumentChunkTool } from "./tools/document-chunk.js";
|
|
23
|
+
import { BundleSearchTool } from "./tools/bundle-search.js";
|
|
24
|
+
import { DocumentClassifyTool } from "./tools/document-classify.js";
|
|
22
25
|
import { WorkerLLMCallTool } from "./tools/worker-llm-call.js";
|
|
23
26
|
import { WorkflowRunTool } from "./tools/workflow-run.js";
|
|
24
27
|
import { RuleCatalogTool } from "./tools/rule-catalog.js";
|
|
@@ -38,6 +41,7 @@ import { SkillAuthoringPipeline } from "./pipelines/skill-authoring.js";
|
|
|
38
41
|
import { SkillTestingPipeline } from "./pipelines/skill-testing.js";
|
|
39
42
|
import { DistillationEngine as DistillationPipeline } from "./pipelines/distillation.js";
|
|
40
43
|
import { ProductionQCPipeline } from "./pipelines/production-qc.js";
|
|
44
|
+
import { FinalizationPipeline } from "./pipelines/finalization.js";
|
|
41
45
|
import { EventLog } from "./event-log.js";
|
|
42
46
|
import { ContextWindow } from "./context-window.js";
|
|
43
47
|
import { SessionState } from "./session-state.js";
|
|
@@ -48,8 +52,10 @@ import { estimateTokens, estimateMessagesTokens } from "./token-counter.js";
|
|
|
48
52
|
// or kc_max_tokens in the global config.
|
|
49
53
|
const DEFAULT_KC_MAX_TOKENS = 65536;
|
|
50
54
|
|
|
51
|
-
// Phases where worker LLM tools are available (DISTILL mode)
|
|
52
|
-
|
|
55
|
+
// Phases where worker LLM tools are available (DISTILL mode).
|
|
56
|
+
// E1: FINALIZATION inherits worker-LLM access so one-last-pass validation
|
|
57
|
+
// runs + dashboard_render + workflow_run stay usable during packaging.
|
|
58
|
+
const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC, Phase.FINALIZATION]);
|
|
53
59
|
|
|
54
60
|
// Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
|
|
55
61
|
// Exported so the TUI's /phase slash command (src/cli/index.js) can call
|
|
@@ -60,6 +66,7 @@ export const NEXT_PHASE = {
|
|
|
60
66
|
[Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
|
|
61
67
|
[Phase.SKILL_TESTING]: Phase.DISTILLATION,
|
|
62
68
|
[Phase.DISTILLATION]: Phase.PRODUCTION_QC,
|
|
69
|
+
[Phase.PRODUCTION_QC]: Phase.FINALIZATION, // E1: new 7th phase
|
|
63
70
|
};
|
|
64
71
|
|
|
65
72
|
/**
|
|
@@ -162,6 +169,7 @@ export class AgentEngine {
|
|
|
162
169
|
[Phase.SKILL_TESTING]: new SkillTestingPipeline(this.workspace),
|
|
163
170
|
[Phase.DISTILLATION]: new DistillationPipeline(this.workspace),
|
|
164
171
|
[Phase.PRODUCTION_QC]: new ProductionQCPipeline(this.workspace),
|
|
172
|
+
[Phase.FINALIZATION]: new FinalizationPipeline(this.workspace), // E1
|
|
165
173
|
};
|
|
166
174
|
|
|
167
175
|
// Skill discovery (Claude Code pattern: index in context, full content on demand)
|
|
@@ -181,6 +189,61 @@ export class AgentEngine {
|
|
|
181
189
|
this._lastReady = Object.fromEntries(
|
|
182
190
|
Object.keys(this.pipelines).map((p) => [p, false]),
|
|
183
191
|
);
|
|
192
|
+
|
|
193
|
+
// B0.1: Heap sampler. Parent engines only — sub-agents share a process
|
|
194
|
+
// with the parent and would double-log. Writes a single JSONL line
|
|
195
|
+
// per minute to <workspace>/logs/heap.jsonl with the numbers needed
|
|
196
|
+
// to diagnose RSS creep (heapUsed/heapTotal/external/rss/arrayBuffers,
|
|
197
|
+
// plus active task count and history length). Always on, ~60 bytes
|
|
198
|
+
// per minute to disk.
|
|
199
|
+
this._heapSamplerStop = this._isSubagent ? null : this._startHeapSampler();
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Start sampling process.memoryUsage() every 60 s into logs/heap.jsonl.
|
|
204
|
+
* Returns a stop fn. Timer is .unref()'d so it never keeps the process
|
|
205
|
+
* alive by itself. Failures are silently suppressed — this is a
|
|
206
|
+
* diagnostic, not a correctness feature.
|
|
207
|
+
*/
|
|
208
|
+
_startHeapSampler() {
|
|
209
|
+
const logDir = path.join(this.workspace.cwd, "logs");
|
|
210
|
+
const logPath = path.join(logDir, "heap.jsonl");
|
|
211
|
+
const sample = () => {
|
|
212
|
+
try {
|
|
213
|
+
const mem = process.memoryUsage();
|
|
214
|
+
const row = {
|
|
215
|
+
t: new Date().toISOString(),
|
|
216
|
+
seq: this.eventLog?.currentSeq ?? 0,
|
|
217
|
+
phase: this.currentPhase,
|
|
218
|
+
rssMB: Math.round(mem.rss / 1024 / 1024),
|
|
219
|
+
heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
|
|
220
|
+
heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
|
|
221
|
+
externalMB: Math.round((mem.external || 0) / 1024 / 1024),
|
|
222
|
+
arrayBuffersMB: Math.round((mem.arrayBuffers || 0) / 1024 / 1024),
|
|
223
|
+
historyLen: this.history?.messages?.length ?? 0,
|
|
224
|
+
tasksPending: this.taskManager?.progress?.pending ?? 0,
|
|
225
|
+
tasksInProgress: this.taskManager?.progress?.inProgress ?? 0,
|
|
226
|
+
};
|
|
227
|
+
fs.mkdirSync(logDir, { recursive: true });
|
|
228
|
+
fs.appendFileSync(logPath, JSON.stringify(row) + "\n", "utf-8");
|
|
229
|
+
} catch { /* never fatal */ }
|
|
230
|
+
};
|
|
231
|
+
// Record one sample at startup so we have a baseline even on short runs.
|
|
232
|
+
sample();
|
|
233
|
+
const timer = setInterval(sample, 60_000);
|
|
234
|
+
timer.unref?.();
|
|
235
|
+
return () => {
|
|
236
|
+
try {
|
|
237
|
+
clearInterval(timer);
|
|
238
|
+
sample(); // one final sample on shutdown
|
|
239
|
+
} catch { /* ignore */ }
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/** Stop background diagnostics. Call on graceful shutdown. */
|
|
244
|
+
stop() {
|
|
245
|
+
try { this._heapSamplerStop?.(); } catch { /* ignore */ }
|
|
246
|
+
this._heapSamplerStop = null;
|
|
184
247
|
}
|
|
185
248
|
|
|
186
249
|
/**
|
|
@@ -214,7 +277,10 @@ export class AgentEngine {
|
|
|
214
277
|
new ArchiveFileTool(this.workspace),
|
|
215
278
|
new ScheduleFetchTool(this.workspace),
|
|
216
279
|
new ReleaseTool(this.workspace, { kcVersion: "0.5.2" }),
|
|
217
|
-
new PhaseAdvanceTool(
|
|
280
|
+
new PhaseAdvanceTool(
|
|
281
|
+
(to, reason, opts) => this._advancePhase(to, reason, opts),
|
|
282
|
+
() => this.currentPhase, // H1: tool reads phase BEFORE its own call
|
|
283
|
+
),
|
|
218
284
|
new DocumentParseTool(this.workspace, {
|
|
219
285
|
mineruApiUrl: this.config.mineruApiUrl,
|
|
220
286
|
mineruApiKey: this.config.mineruApiKey,
|
|
@@ -223,6 +289,12 @@ export class AgentEngine {
|
|
|
223
289
|
ocrModel: vlmModel,
|
|
224
290
|
}),
|
|
225
291
|
new DocumentSearchTool(this.workspace),
|
|
292
|
+
// Group C — chunker/RAG infrastructure ported from AMC app. Core
|
|
293
|
+
// tools (not phase-gated): useful from BOOTSTRAP through FINALIZATION
|
|
294
|
+
// for any doc-heavy project, not just rule extraction.
|
|
295
|
+
new DocumentChunkTool(this.workspace),
|
|
296
|
+
new BundleSearchTool(this.workspace),
|
|
297
|
+
new DocumentClassifyTool(this.workspace, this.config),
|
|
226
298
|
new RuleCatalogTool(this.workspace),
|
|
227
299
|
new EvolutionCycleTool(this.workspace, this.cornerCases),
|
|
228
300
|
new DashboardRenderTool(this.workspace),
|
|
@@ -313,7 +385,7 @@ export class AgentEngine {
|
|
|
313
385
|
getContextStats() {
|
|
314
386
|
const systemPrompt = this.context.build({
|
|
315
387
|
agentMd: this._readAgentMd(),
|
|
316
|
-
skillIndex: this._skillLoader.formatForContext(),
|
|
388
|
+
skillIndex: this._skillLoader.formatForContext(this.currentPhase),
|
|
317
389
|
pipelineState: this.pipelines[this.currentPhase]?.describeState?.() || null,
|
|
318
390
|
workspaceState: this._buildWorkspaceState(),
|
|
319
391
|
});
|
|
@@ -353,21 +425,37 @@ export class AgentEngine {
|
|
|
353
425
|
|
|
354
426
|
// Heap-pressure diagnostic. The TUI has its own virtualization + tool-
|
|
355
427
|
// output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
|
|
356
|
-
// still see high heap usage, something else is leaking
|
|
357
|
-
//
|
|
428
|
+
// still see high heap usage, something else is leaking.
|
|
429
|
+
//
|
|
430
|
+
// A9: Original design logged once per pressure-crossing (edge-triggered),
|
|
431
|
+
// which went silent for 17h during E2E #3 while RSS climbed to 3.8GB.
|
|
432
|
+
// Now: still edge-trigger on entry (noisy otherwise), but ALSO re-emit
|
|
433
|
+
// every 15min while we're still above the threshold, so an operator
|
|
434
|
+
// watching logs after hour 4 still sees the signal. Drops to silent on
|
|
435
|
+
// recovery below 0.60.
|
|
358
436
|
try {
|
|
359
437
|
const mem = process.memoryUsage();
|
|
360
438
|
const frac = mem.heapUsed / (mem.heapTotal || 1);
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
439
|
+
const now = Date.now();
|
|
440
|
+
const REPRESS_INTERVAL_MS = 15 * 60 * 1000;
|
|
441
|
+
if (frac > 0.80) {
|
|
442
|
+
const firstCrossing = !this._memPressureLogged;
|
|
443
|
+
const dueForRepress = this._memPressureLastEmittedAt &&
|
|
444
|
+
(now - this._memPressureLastEmittedAt) >= REPRESS_INTERVAL_MS;
|
|
445
|
+
if (firstCrossing || dueForRepress) {
|
|
446
|
+
this._memPressureLogged = true;
|
|
447
|
+
this._memPressureLastEmittedAt = now;
|
|
448
|
+
this.eventLog.append("memory_pressure", {
|
|
449
|
+
heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
|
|
450
|
+
heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
|
|
451
|
+
rssMB: Math.round(mem.rss / 1024 / 1024),
|
|
452
|
+
historyLength: this.history.messages.length,
|
|
453
|
+
kind: firstCrossing ? "crossing" : "sustained",
|
|
454
|
+
});
|
|
455
|
+
}
|
|
369
456
|
} else if (frac < 0.60 && this._memPressureLogged) {
|
|
370
457
|
this._memPressureLogged = false; // re-arm for next crossing
|
|
458
|
+
this._memPressureLastEmittedAt = null;
|
|
371
459
|
}
|
|
372
460
|
} catch { /* process.memoryUsage failures are non-fatal */ }
|
|
373
461
|
}
|
|
@@ -701,7 +789,7 @@ export class AgentEngine {
|
|
|
701
789
|
|
|
702
790
|
const systemPrompt = this.context.build({
|
|
703
791
|
agentMd: this._readAgentMd(),
|
|
704
|
-
skillIndex: this._skillLoader.formatForContext(),
|
|
792
|
+
skillIndex: this._skillLoader.formatForContext(this.currentPhase),
|
|
705
793
|
pipelineState,
|
|
706
794
|
workspaceState: this._buildWorkspaceState(),
|
|
707
795
|
});
|
|
@@ -781,6 +869,30 @@ export class AgentEngine {
|
|
|
781
869
|
});
|
|
782
870
|
|
|
783
871
|
if (toolCallsAcc.size === 0) {
|
|
872
|
+
// A3: Empty-response guard. If the LLM returned no content AND no
|
|
873
|
+
// tool calls, count it. Two in a row almost always means the
|
|
874
|
+
// provider is silently failing (context exceeded, rate-limit
|
|
875
|
+
// corruption, auth expired) and continuing wastes tokens + time.
|
|
876
|
+
// Reset on any non-empty turn. Reason-tagged so /status can
|
|
877
|
+
// surface the running rate.
|
|
878
|
+
if (!collectedText || !collectedText.trim()) {
|
|
879
|
+
this._consecutiveEmptyResponses = (this._consecutiveEmptyResponses || 0) + 1;
|
|
880
|
+
this._totalEmptyResponses = (this._totalEmptyResponses || 0) + 1;
|
|
881
|
+
if (this._consecutiveEmptyResponses >= 2) {
|
|
882
|
+
const message =
|
|
883
|
+
`LLM returned empty response ${this._consecutiveEmptyResponses}× in a row — ` +
|
|
884
|
+
`likely context-length exceeded or provider-side silent failure. ` +
|
|
885
|
+
`Stopping this turn to prevent runaway API spend.`;
|
|
886
|
+
this.eventLog.append("error", { message, kind: "empty_response_streak" });
|
|
887
|
+
yield new AgentEvent({ type: "error", message });
|
|
888
|
+
this._consecutiveEmptyResponses = 0; // reset so next /run isn't blocked
|
|
889
|
+
return;
|
|
890
|
+
}
|
|
891
|
+
} else {
|
|
892
|
+
this._consecutiveEmptyResponses = 0;
|
|
893
|
+
}
|
|
894
|
+
this._totalTurns = (this._totalTurns || 0) + 1;
|
|
895
|
+
|
|
784
896
|
// Bug 4 trigger (1): re-check phase criteria at end of every turn —
|
|
785
897
|
// KC may have advanced state via conversation alone, without any
|
|
786
898
|
// tool that the pipeline narrowly watches.
|
|
@@ -793,6 +905,10 @@ export class AgentEngine {
|
|
|
793
905
|
return;
|
|
794
906
|
}
|
|
795
907
|
|
|
908
|
+
// A3: A turn with tool_calls or content is not empty — reset streak.
|
|
909
|
+
this._consecutiveEmptyResponses = 0;
|
|
910
|
+
this._totalTurns = (this._totalTurns || 0) + 1;
|
|
911
|
+
|
|
796
912
|
// Tool execution loop
|
|
797
913
|
for (const tc of toolCallsAcc.values()) {
|
|
798
914
|
let inputData = {};
|
|
@@ -803,6 +919,12 @@ export class AgentEngine {
|
|
|
803
919
|
this.eventLog.append("tool_start", { name: tc.name, input: inputData });
|
|
804
920
|
yield new AgentEvent({ type: "tool_start", name: tc.name, input: inputData });
|
|
805
921
|
|
|
922
|
+
// A1: Capture phase BEFORE tool execution. Some tools — notably
|
|
923
|
+
// phase_advance — mutate this.currentPhase via a callback without
|
|
924
|
+
// yielding any AgentEvent, so the TUI's status bar never gets the
|
|
925
|
+
// signal. We diff after execute() and emit a synthetic
|
|
926
|
+
// pipeline_event so subscribers can sync.
|
|
927
|
+
const beforePhase = this.currentPhase;
|
|
806
928
|
const result = await this.toolRegistry.execute(tc.name, inputData);
|
|
807
929
|
|
|
808
930
|
// Tool-call offloading: large outputs go to logs/tool_results/<traceId>.txt;
|
|
@@ -817,6 +939,29 @@ export class AgentEngine {
|
|
|
817
939
|
isError: result.isError,
|
|
818
940
|
traceId: offload?.traceId || null,
|
|
819
941
|
});
|
|
942
|
+
|
|
943
|
+
// D3a: trace skill invocations. When the agent reads a SKILL.md via
|
|
944
|
+
// workspace_file (the canonical way KC "uses" a skill, since skills
|
|
945
|
+
// are progressively-disclosed markdown), emit a skill_invoked event.
|
|
946
|
+
// Makes "which skills did KC actually consult?" answerable in post-run
|
|
947
|
+
// analysis — before this, skills were opaque to the event log.
|
|
948
|
+
try {
|
|
949
|
+
if (
|
|
950
|
+
!result.isError &&
|
|
951
|
+
(tc.name === "workspace_file" || tc.name === "sandbox_exec")
|
|
952
|
+
) {
|
|
953
|
+
const p = String(inputData?.path || inputData?.command || "");
|
|
954
|
+
const skillMatch = p.match(/(?:template\/)?skills\/[a-z-]+\/(?:meta-meta|meta|skill-creator)\/([a-zA-Z0-9_-]+)(?:\/SKILL\.md|\/)?|\bSKILL\.md\b/);
|
|
955
|
+
if (skillMatch) {
|
|
956
|
+
const skillName = skillMatch[1] || "(unknown)";
|
|
957
|
+
this.eventLog.append("skill_invoked", {
|
|
958
|
+
skill: skillName,
|
|
959
|
+
via_tool: tc.name,
|
|
960
|
+
phase: this.currentPhase,
|
|
961
|
+
});
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
} catch { /* never let tracing break a tool call */ }
|
|
820
965
|
yield new AgentEvent({
|
|
821
966
|
type: "tool_result",
|
|
822
967
|
name: tc.name,
|
|
@@ -837,6 +982,22 @@ export class AgentEngine {
|
|
|
837
982
|
// user saw "CTX: 210% / stream terminated" with no recovery.
|
|
838
983
|
this._maybeWindowAfterToolResult();
|
|
839
984
|
|
|
985
|
+
// A1: If the tool mutated the phase (e.g. phase_advance), emit the
|
|
986
|
+
// signal the TUI and pipelines need to re-sync state. Runs BEFORE
|
|
987
|
+
// pipeline.onToolResult so the fresh phase is active if the pipeline
|
|
988
|
+
// itself wants to react to the transition.
|
|
989
|
+
if (this.currentPhase !== beforePhase) {
|
|
990
|
+
yield new AgentEvent({
|
|
991
|
+
type: "pipeline_event",
|
|
992
|
+
data: {
|
|
993
|
+
type: "phase_changed",
|
|
994
|
+
from: beforePhase,
|
|
995
|
+
nextPhase: this.currentPhase,
|
|
996
|
+
reason: `via ${tc.name}`,
|
|
997
|
+
},
|
|
998
|
+
});
|
|
999
|
+
}
|
|
1000
|
+
|
|
840
1001
|
// Pipeline controller: update state and re-register tools on phase change
|
|
841
1002
|
if (pipeline?.onToolResult) {
|
|
842
1003
|
const pEvent = pipeline.onToolResult(tc.name, inputData, result);
|
|
@@ -857,8 +1018,15 @@ export class AgentEngine {
|
|
|
857
1018
|
if (ev) yield ev;
|
|
858
1019
|
|
|
859
1020
|
} catch (err) {
|
|
860
|
-
|
|
861
|
-
|
|
1021
|
+
// A8: If the LLM client tagged the stream termination reason, pass
|
|
1022
|
+
// it through. Upstream log consumers + the TUI can then distinguish
|
|
1023
|
+
// "provider returned 429" from "socket died mid-token" from "SSE
|
|
1024
|
+
// buffer exploded" — today they're all just "Error: ...".
|
|
1025
|
+
const payload = { message: err.message };
|
|
1026
|
+
if (err.streamTermination) payload.kind = err.streamTermination;
|
|
1027
|
+
if (err.status) payload.status = err.status;
|
|
1028
|
+
this.eventLog.append("error", payload);
|
|
1029
|
+
yield new AgentEvent({ type: "error", message: err.message, ...payload });
|
|
862
1030
|
return;
|
|
863
1031
|
}
|
|
864
1032
|
}
|
|
@@ -897,11 +1065,31 @@ export class AgentEngine {
|
|
|
897
1065
|
reason,
|
|
898
1066
|
forced: force && nextPhase !== expected,
|
|
899
1067
|
});
|
|
1068
|
+
const fromPhase = this.currentPhase;
|
|
900
1069
|
this.currentPhase = nextPhase;
|
|
901
1070
|
this._registerToolsForPhase(this.currentPhase);
|
|
902
1071
|
this.workspace.setPhase(this.currentPhase);
|
|
903
1072
|
this._createTasksForPhase(this.currentPhase);
|
|
904
1073
|
this.saveState();
|
|
1074
|
+
|
|
1075
|
+
// B8: Soft signal — surface any sub-agents left running from the prior
|
|
1076
|
+
// phase so the main agent's next turn can decide whether to kill them.
|
|
1077
|
+
// NOT automated: phase_advance can fire from _maybeAutoAdvance on a
|
|
1078
|
+
// criteria-flip, and auto-killing would couple lifecycle with blast
|
|
1079
|
+
// radius. This just informs.
|
|
1080
|
+
try {
|
|
1081
|
+
const agentTool = this._buildTools?.core?.find((t) => t?.name === "agent_tool");
|
|
1082
|
+
const runningIds = agentTool?.getRunningTaskIds?.() || [];
|
|
1083
|
+
if (runningIds.length > 0) {
|
|
1084
|
+
this.eventLog.append("stale_subagents", {
|
|
1085
|
+
from_phase: fromPhase,
|
|
1086
|
+
to_phase: nextPhase,
|
|
1087
|
+
running_task_ids: runningIds,
|
|
1088
|
+
hint: "These sub-agents were dispatched during the prior phase. Consider operation=poll to check status, or operation=kill to abort if stale.",
|
|
1089
|
+
});
|
|
1090
|
+
}
|
|
1091
|
+
} catch { /* never let signal emission break phase advance */ }
|
|
1092
|
+
|
|
905
1093
|
return true;
|
|
906
1094
|
}
|
|
907
1095
|
|
|
@@ -972,6 +1160,16 @@ export class AgentEngine {
|
|
|
972
1160
|
/**
|
|
973
1161
|
* Create per-rule tasks when entering a new phase.
|
|
974
1162
|
* Reads the rule catalog and creates one task per rule for the given phase.
|
|
1163
|
+
*
|
|
1164
|
+
* D6: For skill_authoring / skill_testing, filter rules via the bundle
|
|
1165
|
+
* classification cache (`cache/bundles/<hash>.classification.json`,
|
|
1166
|
+
* written by document_classify). Rules whose `applicable_product_types`
|
|
1167
|
+
* or `report_types` don't overlap with the bundle's classification get
|
|
1168
|
+
* SKIPPED at task-creation time — we don't mutate catalog.json to mark
|
|
1169
|
+
* them not_applicable, we just keep them out of the task queue. The
|
|
1170
|
+
* finalization phase (Group E) will report them in the coverage
|
|
1171
|
+
* artifact as "not applicable to this bundle." Conservative default:
|
|
1172
|
+
* if no classification exists, include all rules (pre-B9 behavior).
|
|
975
1173
|
*/
|
|
976
1174
|
_createTasksForPhase(phase) {
|
|
977
1175
|
if (!this.taskManager) return; // Sub-agents don't manage tasks
|
|
@@ -980,28 +1178,258 @@ export class AgentEngine {
|
|
|
980
1178
|
|
|
981
1179
|
try {
|
|
982
1180
|
const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
983
|
-
|
|
984
|
-
if (rules.length
|
|
985
|
-
|
|
1181
|
+
let rules = normalizeRuleCatalog(catalog);
|
|
1182
|
+
if (rules.length === 0) return;
|
|
1183
|
+
|
|
1184
|
+
// D6: applicability pre-filter (skill phases only — bootstrap/extraction
|
|
1185
|
+
// have no task creation here per A6).
|
|
1186
|
+
if (phase === "skill_authoring" || phase === "skill_testing") {
|
|
1187
|
+
const classification = this._loadBundleClassification();
|
|
1188
|
+
if (classification) {
|
|
1189
|
+
const before = rules.length;
|
|
1190
|
+
rules = rules.filter((r) => this._ruleAppliesToBundle(r, classification));
|
|
1191
|
+
if (rules.length < before) {
|
|
1192
|
+
this.eventLog.append("applicability_prefilter", {
|
|
1193
|
+
phase,
|
|
1194
|
+
classification: {
|
|
1195
|
+
product_type: classification.product_type,
|
|
1196
|
+
report_type: classification.report_type,
|
|
1197
|
+
source: classification.source,
|
|
1198
|
+
},
|
|
1199
|
+
rules_before: before,
|
|
1200
|
+
rules_after: rules.length,
|
|
1201
|
+
skipped: before - rules.length,
|
|
1202
|
+
});
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
986
1205
|
}
|
|
1206
|
+
this.taskManager.createRuleTasks(rules, phase);
|
|
987
1207
|
} catch { /* skip if catalog can't be read */ }
|
|
988
1208
|
}
|
|
989
1209
|
|
|
1210
|
+
/**
|
|
1211
|
+
* D6: Load the most recent bundle classification cache, if one exists.
|
|
1212
|
+
* Written by the `document_classify` tool. Returns null if no cache or
|
|
1213
|
+
* unreadable — callers must treat null as "all rules apply."
|
|
1214
|
+
*/
|
|
1215
|
+
_loadBundleClassification() {
|
|
1216
|
+
const cacheDir = path.join(this.workspace.cwd, "cache", "bundles");
|
|
1217
|
+
if (!fs.existsSync(cacheDir)) return null;
|
|
1218
|
+
let entries;
|
|
1219
|
+
try { entries = fs.readdirSync(cacheDir); }
|
|
1220
|
+
catch { return null; }
|
|
1221
|
+
const files = entries
|
|
1222
|
+
.filter((n) => n.endsWith(".classification.json"))
|
|
1223
|
+
.map((n) => {
|
|
1224
|
+
const p = path.join(cacheDir, n);
|
|
1225
|
+
try { return { path: p, mtime: fs.statSync(p).mtimeMs }; }
|
|
1226
|
+
catch { return null; }
|
|
1227
|
+
})
|
|
1228
|
+
.filter(Boolean)
|
|
1229
|
+
.sort((a, b) => b.mtime - a.mtime);
|
|
1230
|
+
if (files.length === 0) return null;
|
|
1231
|
+
try { return JSON.parse(fs.readFileSync(files[0].path, "utf-8")); }
|
|
1232
|
+
catch { return null; }
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
/**
|
|
1236
|
+
* D6: Rule-applicability check mirroring the AMC app's `applies_to`.
|
|
1237
|
+
* Conservative: returns true when we don't have enough info to
|
|
1238
|
+
* confidently skip (missing fields on rule, or classification with
|
|
1239
|
+
* empty product/report).
|
|
1240
|
+
*/
|
|
1241
|
+
_ruleAppliesToBundle(rule, classification) {
|
|
1242
|
+
const docProduct = classification?.product_type || "";
|
|
1243
|
+
const docReport = classification?.report_type || "";
|
|
1244
|
+
const ruleProducts = rule.applicable_product_types || rule.applicable_sections || [];
|
|
1245
|
+
const ruleReports = rule.report_types || [];
|
|
1246
|
+
|
|
1247
|
+
const allProducts = ruleProducts.length === 0 ||
|
|
1248
|
+
ruleProducts.some((x) => x === "全部" || x === "all" || x === "");
|
|
1249
|
+
const allReports = ruleReports.length === 0 ||
|
|
1250
|
+
ruleReports.some((x) => x === "全部" || x === "all" || x === "");
|
|
1251
|
+
if (allProducts && allReports) return true;
|
|
1252
|
+
|
|
1253
|
+
const productOk = allProducts || (
|
|
1254
|
+
docProduct && ruleProducts.some((rp) => rp.includes(docProduct) || docProduct.includes(rp))
|
|
1255
|
+
);
|
|
1256
|
+
const reportOk = allReports || (
|
|
1257
|
+
docReport && ruleReports.some((rr) => rr.includes(docReport) || docReport.includes(rr))
|
|
1258
|
+
);
|
|
1259
|
+
|
|
1260
|
+
// Unknown classification → don't prefilter, let the agent judge.
|
|
1261
|
+
if (!docProduct && !docReport) return true;
|
|
1262
|
+
return productOk && reportOk;
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
/**
|
|
1266
|
+
* D1: Enrich a skill_authoring / skill_testing task prompt with the
|
|
1267
|
+
* rule's source context — reads `source_chunk_ids` back-refs from
|
|
1268
|
+
* catalog.json (populated by extraction) and fetches chunk text from
|
|
1269
|
+
* the most recent BundleTree cache. Falls back to the minimal prompt
|
|
1270
|
+
* when catalog / cache aren't available.
|
|
1271
|
+
*
|
|
1272
|
+
* Previously the task prompt was ONE line — "Continue with next task:
|
|
1273
|
+
* ${title}" — leaving the skill-author agent to re-read the rule and
|
|
1274
|
+
* re-find its evidence per task. Auto-attach saves the LLM turn
|
|
1275
|
+
* needed for document_search on every task, and ensures the author
|
|
1276
|
+
* sees the exact regulation text the extractor used to justify the
|
|
1277
|
+
* rule.
|
|
1278
|
+
*
|
|
1279
|
+
* @param {{id: string, title: string, ruleId?: string, phase: string}} task
|
|
1280
|
+
* @returns {string}
|
|
1281
|
+
*/
|
|
1282
|
+
_buildEnrichedTaskPrompt(task) {
|
|
1283
|
+
const fallback = `Continue with next task: ${task.title}` +
|
|
1284
|
+
(task.ruleId ? ` (rule: ${task.ruleId})` : "");
|
|
1285
|
+
|
|
1286
|
+
// Only enrich for rule-anchored phases
|
|
1287
|
+
if (task.phase !== "skill_authoring" && task.phase !== "skill_testing") {
|
|
1288
|
+
return fallback;
|
|
1289
|
+
}
|
|
1290
|
+
if (!task.ruleId) return fallback;
|
|
1291
|
+
|
|
1292
|
+
// Find the rule in catalog.json
|
|
1293
|
+
const catalogPath = path.join(this.workspace.cwd, "rules", "catalog.json");
|
|
1294
|
+
if (!fs.existsSync(catalogPath)) return fallback;
|
|
1295
|
+
let rules;
|
|
1296
|
+
try {
|
|
1297
|
+
rules = normalizeRuleCatalog(JSON.parse(fs.readFileSync(catalogPath, "utf-8")));
|
|
1298
|
+
} catch { return fallback; }
|
|
1299
|
+
const rule = rules.find((r) => r.id === task.ruleId);
|
|
1300
|
+
if (!rule) return fallback;
|
|
1301
|
+
|
|
1302
|
+
// Assemble the enriched brief. Every section is optional — when a
|
|
1303
|
+
// back-ref or cache is missing, just skip that section rather than
|
|
1304
|
+
// failing back to the minimal prompt.
|
|
1305
|
+
const lines = [];
|
|
1306
|
+
lines.push(`# Task: ${task.title}`);
|
|
1307
|
+
lines.push("");
|
|
1308
|
+
lines.push(`## Rule ${rule.id}`);
|
|
1309
|
+
if (rule.source_ref) lines.push(`Source: ${rule.source_ref}`);
|
|
1310
|
+
if (rule.severity) lines.push(`Severity: ${rule.severity}`);
|
|
1311
|
+
if (rule.description) lines.push(`\n${rule.description}`);
|
|
1312
|
+
if (rule.falsifiability_statement) lines.push(`\n**Falsifiability**: ${rule.falsifiability_statement}`);
|
|
1313
|
+
if (rule.test_case_stub) lines.push(`**Test stub**: ${rule.test_case_stub}`);
|
|
1314
|
+
|
|
1315
|
+
// D1: if rule has source_chunk_ids AND a BundleTree cache exists,
|
|
1316
|
+
// pull chunk text inline so the author doesn't need to call
|
|
1317
|
+
// bundle_search manually. Bounded to ~3000 tokens total to avoid
|
|
1318
|
+
// blowing the author's context budget.
|
|
1319
|
+
const chunkIds = Array.isArray(rule.source_chunk_ids) ? rule.source_chunk_ids : [];
|
|
1320
|
+
if (chunkIds.length > 0) {
|
|
1321
|
+
const chunks = this._loadChunksFromBundleCache(chunkIds);
|
|
1322
|
+
if (chunks.length > 0) {
|
|
1323
|
+
lines.push("");
|
|
1324
|
+
lines.push("## Source context");
|
|
1325
|
+
let totalChars = 0;
|
|
1326
|
+
const MAX_CHARS = 7500; // ~3000 CJK tokens
|
|
1327
|
+
for (const ch of chunks) {
|
|
1328
|
+
const header = `### ${ch.title || ch.chunk_id} · ${ch.source_file} p.${(ch.page_range || [1, 1]).join("-")}`;
|
|
1329
|
+
const body = (ch.content || "").trim();
|
|
1330
|
+
const block = `${header}\n${body}\n`;
|
|
1331
|
+
if (totalChars + block.length > MAX_CHARS) {
|
|
1332
|
+
lines.push(`\n[…${chunks.length - chunks.indexOf(ch)} more source chunks truncated; use bundle_search to retrieve them…]`);
|
|
1333
|
+
break;
|
|
1334
|
+
}
|
|
1335
|
+
lines.push("");
|
|
1336
|
+
lines.push(block);
|
|
1337
|
+
totalChars += block.length;
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
}
|
|
1341
|
+
|
|
1342
|
+
// Sibling rules (same source_ref prefix) — helps the author see the
|
|
1343
|
+
// surrounding catalog and avoid re-implementing cross-referenced logic.
|
|
1344
|
+
const siblings = this._findSiblingRuleIds(rule, rules);
|
|
1345
|
+
if (siblings.length > 0) {
|
|
1346
|
+
lines.push("");
|
|
1347
|
+
lines.push(`## Sibling rules (same regulation section)`);
|
|
1348
|
+
lines.push(siblings.map((id) => `- ${id}`).join("\n"));
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
lines.push("");
|
|
1352
|
+
lines.push("Write the skill to `rule_skills/<rule_id>/SKILL.md` + detect script. Prefer 1 rule = 1 skill dir (use `check_rNNN_rMMM.py` naming ONLY when rules share evidence and fail together).");
|
|
1353
|
+
|
|
1354
|
+
return lines.join("\n");
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
/** D1: Load chunk text from the most recent BundleTree cache. */
|
|
1358
|
+
_loadChunksFromBundleCache(chunkIds) {
|
|
1359
|
+
const cacheDir = path.join(this.workspace.cwd, "cache", "bundles");
|
|
1360
|
+
if (!fs.existsSync(cacheDir)) return [];
|
|
1361
|
+
let entries;
|
|
1362
|
+
try { entries = fs.readdirSync(cacheDir); }
|
|
1363
|
+
catch { return []; }
|
|
1364
|
+
const candidates = entries
|
|
1365
|
+
.filter((n) => n.endsWith(".json") && !n.endsWith(".classification.json"))
|
|
1366
|
+
.map((n) => {
|
|
1367
|
+
const p = path.join(cacheDir, n);
|
|
1368
|
+
try { return { path: p, mtime: fs.statSync(p).mtimeMs }; }
|
|
1369
|
+
catch { return null; }
|
|
1370
|
+
})
|
|
1371
|
+
.filter(Boolean)
|
|
1372
|
+
.sort((a, b) => b.mtime - a.mtime);
|
|
1373
|
+
if (candidates.length === 0) return [];
|
|
1374
|
+
let tree;
|
|
1375
|
+
try { tree = JSON.parse(fs.readFileSync(candidates[0].path, "utf-8")); }
|
|
1376
|
+
catch { return []; }
|
|
1377
|
+
const out = [];
|
|
1378
|
+
for (const cid of chunkIds) {
|
|
1379
|
+
const ch = tree.chunks?.[cid];
|
|
1380
|
+
if (ch) out.push(ch);
|
|
1381
|
+
}
|
|
1382
|
+
return out;
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
/** D1: Rules that share the same regulation article (naive: source_ref prefix). */
|
|
1386
|
+
_findSiblingRuleIds(rule, allRules) {
|
|
1387
|
+
if (!rule.source_ref) return [];
|
|
1388
|
+
const prefix = rule.source_ref.split(/[第条款项]/)[0].trim();
|
|
1389
|
+
if (!prefix) return [];
|
|
1390
|
+
return allRules
|
|
1391
|
+
.filter((r) => r.id !== rule.id && (r.source_ref || "").startsWith(prefix))
|
|
1392
|
+
.slice(0, 8)
|
|
1393
|
+
.map((r) => r.id);
|
|
1394
|
+
}
|
|
1395
|
+
|
|
990
1396
|
/**
|
|
991
1397
|
* Ralph-loop: run a turn, then auto-continue through pending tasks.
|
|
992
1398
|
* Compacts context aggressively between tasks to prevent context blowup.
|
|
993
1399
|
* If no tasks exist, behaves identically to runTurn().
|
|
994
1400
|
*
|
|
995
1401
|
* @param {string} userMessage
|
|
1402
|
+
* @param {{parallelism?: number}} [opts] — B1: optional parallel mode.
|
|
1403
|
+
* N > 1 dispatches tasks through N concurrent subagents (using the
|
|
1404
|
+
* agent_tool infrastructure from B8). Clamped to `effectiveParallelism`
|
|
1405
|
+
* from config.js — which silently downgrades to 1 unless
|
|
1406
|
+
* KC_PARALLELISM_VERIFIED=1 is set AND heap.jsonl shows flat RSS
|
|
1407
|
+
* (B0.6 guard; prevents accidental $100+ runaway runs).
|
|
996
1408
|
* @yields {AgentEvent}
|
|
997
1409
|
*/
|
|
998
|
-
async *runTaskLoop(userMessage) {
|
|
1410
|
+
async *runTaskLoop(userMessage, opts = {}) {
|
|
999
1411
|
// Sub-agents don't run task loops — they execute one task and exit
|
|
1000
1412
|
if (!this.taskManager) {
|
|
1001
1413
|
yield* this.runTurn(userMessage);
|
|
1002
1414
|
return;
|
|
1003
1415
|
}
|
|
1004
1416
|
|
|
1417
|
+
// B1: resolve effective parallelism. Caller opts override config.
|
|
1418
|
+
const requested = Number.isFinite(opts.parallelism)
|
|
1419
|
+
? Math.max(1, Math.min(8, opts.parallelism))
|
|
1420
|
+
: (this.config.effectiveParallelism?.() ?? 1);
|
|
1421
|
+
|
|
1422
|
+
if (requested > 1) {
|
|
1423
|
+
yield* this._runTaskLoopParallel(userMessage, requested);
|
|
1424
|
+
return;
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
yield* this._runTaskLoopSerial(userMessage);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
/** B1: original serial ralph-loop path — one task at a time, shared
|
|
1431
|
+
* conversation history. Unchanged from pre-v0.6.0 behavior. */
|
|
1432
|
+
async *_runTaskLoopSerial(userMessage) {
|
|
1005
1433
|
// Run the initial turn (user's request)
|
|
1006
1434
|
yield* this.runTurn(userMessage);
|
|
1007
1435
|
|
|
@@ -1015,8 +1443,11 @@ export class AgentEngine {
|
|
|
1015
1443
|
await this.compact({ recentCount: 8 });
|
|
1016
1444
|
}
|
|
1017
1445
|
|
|
1018
|
-
|
|
1019
|
-
|
|
1446
|
+
// B2: atomic claim — for serial we could use getNextPending, but
|
|
1447
|
+
// using claimNextPending gives us consistent state fields (worker
|
|
1448
|
+
// label, startedAt) whether in serial or parallel mode.
|
|
1449
|
+
const task = this.taskManager.claimNextPending("serial");
|
|
1450
|
+
if (!task) break;
|
|
1020
1451
|
|
|
1021
1452
|
// Yield task progress event for TUI
|
|
1022
1453
|
yield new AgentEvent({
|
|
@@ -1030,14 +1461,15 @@ export class AgentEngine {
|
|
|
1030
1461
|
},
|
|
1031
1462
|
});
|
|
1032
1463
|
|
|
1033
|
-
//
|
|
1034
|
-
|
|
1035
|
-
|
|
1464
|
+
// D1: synthesize a task-focused prompt, enriched with rule source
|
|
1465
|
+
// context (rule NL + source_ref + chunk text + sibling ids) when
|
|
1466
|
+
// the catalog + BundleTree cache are available. Falls back to the
|
|
1467
|
+
// minimal "Continue with next task" line otherwise.
|
|
1468
|
+
const taskPrompt = this._buildEnrichedTaskPrompt(task);
|
|
1036
1469
|
|
|
1037
1470
|
yield* this.runTurn(taskPrompt);
|
|
1038
1471
|
|
|
1039
|
-
this.taskManager.
|
|
1040
|
-
this.taskManager.save();
|
|
1472
|
+
this.taskManager.markDone(task.id);
|
|
1041
1473
|
this.saveState();
|
|
1042
1474
|
|
|
1043
1475
|
yield new AgentEvent({
|
|
@@ -1074,6 +1506,190 @@ export class AgentEngine {
|
|
|
1074
1506
|
}
|
|
1075
1507
|
}
|
|
1076
1508
|
|
|
1509
|
+
/**
|
|
1510
|
+
* B1: Parallel ralph-loop — N concurrent subagents each executing one
|
|
1511
|
+
* task at a time, claimed atomically from TaskManager.
|
|
1512
|
+
*
|
|
1513
|
+
* Implementation: leverages B8's agent_tool infrastructure. Each worker
|
|
1514
|
+
* slot is a sub-engine with its own heap-isolated history; workspace
|
|
1515
|
+
* writes are serialized through B9's file locks. The main engine acts
|
|
1516
|
+
* as dispatcher — it claims tasks and spawns subagents, then waits.
|
|
1517
|
+
*
|
|
1518
|
+
* Chosen over in-process history-forking because: (a) sub-engines are
|
|
1519
|
+
* already heap-isolated (good under B0's RSS-safety regime); (b)
|
|
1520
|
+
* kill authority from B8 applies uniformly; (c) no runTurn refactor
|
|
1521
|
+
* needed — the engine's conversation-state assumptions stay intact.
|
|
1522
|
+
* Trade-off: each task pays a cold-start cost (re-read AGENT.md,
|
|
1523
|
+
* skill index, pipeline state). For 100+ task sessions this is
|
|
1524
|
+
* amortized against the 2-4× wall-clock speedup.
|
|
1525
|
+
*/
|
|
1526
|
+
async *_runTaskLoopParallel(userMessage, parallelism) {
|
|
1527
|
+
// Initial turn: main agent reads user request, creates tasks.
|
|
1528
|
+
yield* this.runTurn(userMessage);
|
|
1529
|
+
|
|
1530
|
+
const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
|
|
1531
|
+
if (!agentTool) {
|
|
1532
|
+
// Shouldn't happen (agent_tool is core), but fall back safely.
|
|
1533
|
+
yield new AgentEvent({
|
|
1534
|
+
type: "error",
|
|
1535
|
+
message: "agent_tool not registered; parallel mode requires it. Falling back to serial.",
|
|
1536
|
+
});
|
|
1537
|
+
yield* this._runTaskLoopSerial("");
|
|
1538
|
+
return;
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
// Event queue so concurrent workers can yield progress through a
|
|
1542
|
+
// single async-generator consumer. push-style with a notifier.
|
|
1543
|
+
const eventQueue = [];
|
|
1544
|
+
let notify = null;
|
|
1545
|
+
const enq = (ev) => {
|
|
1546
|
+
eventQueue.push(ev);
|
|
1547
|
+
if (notify) { const n = notify; notify = null; n(); }
|
|
1548
|
+
};
|
|
1549
|
+
|
|
1550
|
+
// In-flight: subagent task_id → { task, promise }
|
|
1551
|
+
const inFlight = new Map();
|
|
1552
|
+
|
|
1553
|
+
const dispatch = async () => {
|
|
1554
|
+
while (inFlight.size < parallelism) {
|
|
1555
|
+
const task = this.taskManager.claimNextPending(`pool${inFlight.size}`);
|
|
1556
|
+
if (!task) return;
|
|
1557
|
+
|
|
1558
|
+
const workerLabel = `pool${[...inFlight.keys()].length}`;
|
|
1559
|
+
const subId = `pool_${task.id}`.replace(/[^A-Za-z0-9_-]/g, "_").slice(0, 60);
|
|
1560
|
+
|
|
1561
|
+
// D1: build the enriched brief with source context. Parallel workers
|
|
1562
|
+
// are subagents — each with zero conversation history, so the brief
|
|
1563
|
+
// must carry everything they need. Even more important to have
|
|
1564
|
+
// source context inline vs. expecting them to call document_search.
|
|
1565
|
+
const enriched = this._buildEnrichedTaskPrompt(task);
|
|
1566
|
+
const brief =
|
|
1567
|
+
enriched +
|
|
1568
|
+
`\n\nNOTE (parallel worker): write outputs via workspace_file or ` +
|
|
1569
|
+
`rule_catalog — do NOT write to shared coordination files ` +
|
|
1570
|
+
`(rules/catalog.json, rules/manifest.json) via sandbox_exec; they're ` +
|
|
1571
|
+
`lock-protected and bypassing the lock will race with other workers.`;
|
|
1572
|
+
|
|
1573
|
+
enq(new AgentEvent({
|
|
1574
|
+
type: "task_progress",
|
|
1575
|
+
data: {
|
|
1576
|
+
taskId: task.id, title: task.title, ruleId: task.ruleId,
|
|
1577
|
+
status: "in_progress", worker: workerLabel,
|
|
1578
|
+
progress: this.taskManager.progress,
|
|
1579
|
+
},
|
|
1580
|
+
}));
|
|
1581
|
+
|
|
1582
|
+
// Spawn via the tool's public API. agent_tool writes status.txt,
|
|
1583
|
+
// abort controller, etc. We read _runningTasks to get a promise
|
|
1584
|
+
// handle we can await.
|
|
1585
|
+
const spawnRes = await agentTool.execute({
|
|
1586
|
+
operation: "spawn",
|
|
1587
|
+
task_description: brief,
|
|
1588
|
+
task_id: subId,
|
|
1589
|
+
});
|
|
1590
|
+
|
|
1591
|
+
if (spawnRes.isError) {
|
|
1592
|
+
this.taskManager.markFailed(task.id, `spawn failed: ${spawnRes.content}`);
|
|
1593
|
+
enq(new AgentEvent({
|
|
1594
|
+
type: "task_progress",
|
|
1595
|
+
data: { taskId: task.id, status: "failed", worker: workerLabel },
|
|
1596
|
+
}));
|
|
1597
|
+
continue;
|
|
1598
|
+
}
|
|
1599
|
+
|
|
1600
|
+
const entry = agentTool._runningTasks.get(subId);
|
|
1601
|
+
if (!entry) {
|
|
1602
|
+
// Sub-agent completed synchronously (no events) — mark done.
|
|
1603
|
+
this.taskManager.markDone(task.id);
|
|
1604
|
+
enq(new AgentEvent({
|
|
1605
|
+
type: "task_progress",
|
|
1606
|
+
data: { taskId: task.id, status: "completed", worker: workerLabel },
|
|
1607
|
+
}));
|
|
1608
|
+
continue;
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
const trackedPromise = entry.promise.then(
|
|
1612
|
+
() => ({ taskId: task.id, subId, ok: true }),
|
|
1613
|
+
(e) => ({ taskId: task.id, subId, ok: false, error: e?.message || String(e) }),
|
|
1614
|
+
);
|
|
1615
|
+
inFlight.set(subId, { task, workerLabel, promise: trackedPromise });
|
|
1616
|
+
}
|
|
1617
|
+
};
|
|
1618
|
+
|
|
1619
|
+
// Prime the pool
|
|
1620
|
+
await dispatch();
|
|
1621
|
+
|
|
1622
|
+
// Drain events + replenish until queue is empty and all in-flight done.
|
|
1623
|
+
while (inFlight.size > 0 || eventQueue.length > 0) {
|
|
1624
|
+
// Drain all queued events first
|
|
1625
|
+
while (eventQueue.length > 0) yield eventQueue.shift();
|
|
1626
|
+
|
|
1627
|
+
if (inFlight.size === 0) break;
|
|
1628
|
+
|
|
1629
|
+
// Wait for either the next event OR a worker to complete
|
|
1630
|
+
const workerCompletion = Promise.race([...inFlight.values()].map((v) => v.promise));
|
|
1631
|
+
const eventArrival = new Promise((resolve) => { notify = () => resolve("event"); });
|
|
1632
|
+
const winner = await Promise.race([
|
|
1633
|
+
workerCompletion.then((done) => ({ kind: "worker", done })),
|
|
1634
|
+
eventArrival.then(() => ({ kind: "event" })),
|
|
1635
|
+
]);
|
|
1636
|
+
|
|
1637
|
+
if (winner.kind === "worker") {
|
|
1638
|
+
const { taskId, subId, ok, error } = winner.done;
|
|
1639
|
+
const entry = inFlight.get(subId);
|
|
1640
|
+
inFlight.delete(subId);
|
|
1641
|
+
|
|
1642
|
+
if (ok) {
|
|
1643
|
+
this.taskManager.markDone(taskId);
|
|
1644
|
+
enq(new AgentEvent({
|
|
1645
|
+
type: "task_progress",
|
|
1646
|
+
data: {
|
|
1647
|
+
taskId, status: "completed",
|
|
1648
|
+
worker: entry?.workerLabel,
|
|
1649
|
+
progress: this.taskManager.progress,
|
|
1650
|
+
},
|
|
1651
|
+
}));
|
|
1652
|
+
} else {
|
|
1653
|
+
this.taskManager.markFailed(taskId, error);
|
|
1654
|
+
enq(new AgentEvent({
|
|
1655
|
+
type: "task_progress",
|
|
1656
|
+
data: {
|
|
1657
|
+
taskId, status: "failed",
|
|
1658
|
+
worker: entry?.workerLabel,
|
|
1659
|
+
error,
|
|
1660
|
+
progress: this.taskManager.progress,
|
|
1661
|
+
},
|
|
1662
|
+
}));
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1665
|
+
// Refill the pool. If no pending tasks left, in-flight drains naturally.
|
|
1666
|
+
await dispatch();
|
|
1667
|
+
}
|
|
1668
|
+
// event winner: loop re-iterates and drains eventQueue
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
this.saveState();
|
|
1672
|
+
|
|
1673
|
+
// After all workers done, check for phase auto-advance (same as serial path).
|
|
1674
|
+
if (this._allCurrentPhaseTasksComplete()) {
|
|
1675
|
+
const pipeline = this.pipelines[this.currentPhase];
|
|
1676
|
+
let exitMet = false;
|
|
1677
|
+
try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
|
|
1678
|
+
if (exitMet) {
|
|
1679
|
+
const next = NEXT_PHASE[this.currentPhase];
|
|
1680
|
+
if (next) {
|
|
1681
|
+
const advanced = this._advancePhase(next, "all parallel tasks completed + exit criteria met");
|
|
1682
|
+
if (advanced) {
|
|
1683
|
+
yield new AgentEvent({
|
|
1684
|
+
type: "pipeline_event",
|
|
1685
|
+
data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
|
|
1686
|
+
});
|
|
1687
|
+
}
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
}
|
|
1691
|
+
}
|
|
1692
|
+
|
|
1077
1693
|
/**
|
|
1078
1694
|
* True when every task tagged with the current phase is in a terminal state
|
|
1079
1695
|
* (completed | failed | skipped) and at least one such task exists. Used by
|