kc-beta 0.6.2 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +81 -0
- package/LICENSE-COMMERCIAL.md +125 -0
- package/README.md +21 -3
- package/package.json +14 -5
- package/src/agent/context-window.js +9 -12
- package/src/agent/context.js +14 -1
- package/src/agent/document-parser.js +169 -0
- package/src/agent/engine.js +382 -19
- package/src/agent/history/event-history.js +222 -0
- package/src/agent/llm-client.js +55 -0
- package/src/agent/message-utils.js +63 -0
- package/src/agent/pipelines/_milestone-derive.js +566 -0
- package/src/agent/pipelines/base.js +21 -0
- package/src/agent/pipelines/distillation.js +28 -15
- package/src/agent/pipelines/extraction.js +130 -36
- package/src/agent/pipelines/finalization.js +178 -11
- package/src/agent/pipelines/index.js +6 -1
- package/src/agent/pipelines/initializer.js +74 -8
- package/src/agent/pipelines/production-qc.js +31 -44
- package/src/agent/pipelines/skill-authoring.js +97 -80
- package/src/agent/pipelines/skill-testing.js +106 -23
- package/src/agent/retry.js +10 -2
- package/src/agent/scheduler.js +14 -2
- package/src/agent/session-state.js +18 -1
- package/src/agent/skill-loader.js +13 -7
- package/src/agent/skill-validator.js +19 -5
- package/src/agent/task-manager.js +61 -5
- package/src/agent/tools/document-chunk.js +21 -9
- package/src/agent/tools/phase-advance.js +37 -5
- package/src/agent/tools/release.js +51 -9
- package/src/agent/tools/rule-catalog.js +11 -1
- package/src/agent/tools/workspace-file.js +32 -0
- package/src/agent/workspace.js +39 -1
- package/src/cli/components.js +64 -14
- package/src/cli/index.js +62 -3
- package/src/cli/meme.js +26 -25
- package/src/config.js +65 -22
- package/src/model-tiers.json +24 -8
- package/src/providers.js +42 -0
- package/template/release/v1/README.md.tmpl +108 -0
- package/template/release/v1/catalog.json.tmpl +4 -0
- package/template/release/v1/kc_runtime/__init__.py +11 -0
- package/template/release/v1/kc_runtime/confidence.py +63 -0
- package/template/release/v1/kc_runtime/doc_parser.py +127 -0
- package/template/release/v1/manifest.json.tmpl +11 -0
- package/template/release/v1/render_dashboard.py +117 -0
- package/template/release/v1/run.py +212 -0
- package/template/release/v1/serve.sh +17 -0
- package/template/skills/en/meta-meta/work-decomposition/SKILL.md +326 -0
- package/template/skills/en/skill-creator/SKILL.md +1 -1
- package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +321 -0
- package/template/skills/zh/skill-creator/SKILL.md +1 -1
package/src/agent/engine.js
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { AgentEvent } from "./events.js";
|
|
4
|
+
import {
|
|
5
|
+
deriveSkillAuthoringMilestones,
|
|
6
|
+
deriveSkillTestingMilestones,
|
|
7
|
+
} from "./pipelines/_milestone-derive.js";
|
|
4
8
|
import { ContextAssembler } from "./context.js";
|
|
5
9
|
import { ConversationHistory } from "./history.js";
|
|
10
|
+
import { findSafeSplitPoint } from "./message-utils.js";
|
|
6
11
|
import { Workspace } from "./workspace.js";
|
|
7
12
|
import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
|
|
8
13
|
import { VersionManager } from "./version-manager.js";
|
|
@@ -52,6 +57,45 @@ import { estimateTokens, estimateMessagesTokens } from "./token-counter.js";
|
|
|
52
57
|
// or kc_max_tokens in the global config.
|
|
53
58
|
const DEFAULT_KC_MAX_TOKENS = 65536;
|
|
54
59
|
|
|
60
|
+
/**
|
|
61
|
+
* v0.6.3.1: Tolerant JSON parse for streamed tool-call arguments. When LLMs
|
|
62
|
+
* (esp. SiliconFlow GLM-5.1 in E2E #5) hit max_tokens mid-arguments, the
|
|
63
|
+
* stream returns truncated JSON missing N closing braces or quotes. Strict
|
|
64
|
+
* parse fails; old code silently dropped to {} which masked the actual issue.
|
|
65
|
+
*
|
|
66
|
+
* Strategy:
|
|
67
|
+
* 1. Try strict JSON.parse (fast path, most calls).
|
|
68
|
+
* 2. On failure, attempt to balance braces by appending up to BRACE_BUDGET
|
|
69
|
+
* `}` characters. Cheap; recovers the common single-brace-truncation case.
|
|
70
|
+
* 3. If still failing, return error so caller surfaces it to the agent.
|
|
71
|
+
*
|
|
72
|
+
* Returns { ok: true, value, recovered? } | { ok: false, error }.
|
|
73
|
+
*/
|
|
74
|
+
const BRACE_RECOVERY_BUDGET = 4;
|
|
75
|
+
function parseToolArgsTolerant(raw) {
|
|
76
|
+
if (typeof raw !== "string") return { ok: false, error: "arguments not a string" };
|
|
77
|
+
if (raw === "") return { ok: true, value: {} };
|
|
78
|
+
// Fast path
|
|
79
|
+
try { return { ok: true, value: JSON.parse(raw) }; } catch (e0) {
|
|
80
|
+
// Recovery: balance braces by appending up to BRACE_RECOVERY_BUDGET `}`
|
|
81
|
+
const opens = (raw.match(/\{/g) || []).length;
|
|
82
|
+
const closes = (raw.match(/\}/g) || []).length;
|
|
83
|
+
const needed = opens - closes;
|
|
84
|
+
if (needed > 0 && needed <= BRACE_RECOVERY_BUDGET) {
|
|
85
|
+
const padded = raw + "}".repeat(needed);
|
|
86
|
+
try { return { ok: true, value: JSON.parse(padded), recovered: needed }; } catch (_) { /* fall through */ }
|
|
87
|
+
}
|
|
88
|
+
// Last-ditch: try closing an open string then balancing braces.
|
|
89
|
+
// Truncation can land mid-string-value: ..."description": "abc<EOF>
|
|
90
|
+
const quotes = (raw.match(/"/g) || []).length;
|
|
91
|
+
if (quotes % 2 === 1) {
|
|
92
|
+
const candidate = raw + '"' + "}".repeat(Math.max(1, needed));
|
|
93
|
+
try { return { ok: true, value: JSON.parse(candidate), recovered: candidate.length - raw.length }; } catch (_) { /* fall through */ }
|
|
94
|
+
}
|
|
95
|
+
return { ok: false, error: e0.message || "JSON parse failed" };
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
55
99
|
// Phases where worker LLM tools are available (DISTILL mode).
|
|
56
100
|
// E1: FINALIZATION inherits worker-LLM access so one-last-pass validation
|
|
57
101
|
// runs + dashboard_render + workflow_run stay usable during packaging.
|
|
@@ -379,7 +423,21 @@ export class AgentEngine {
|
|
|
379
423
|
new ScheduleFetchTool(this.workspace),
|
|
380
424
|
new ReleaseTool(this.workspace, { kcVersion: "0.5.2" }),
|
|
381
425
|
new PhaseAdvanceTool(
|
|
382
|
-
|
|
426
|
+
// v0.7.1 2c: advanceFn returns rich `{advanced, engineCounts?}`
|
|
427
|
+
// so the tool's refusal text can surface the engine telemetry
|
|
428
|
+
// that motivated the refusal. Internal callers of
|
|
429
|
+
// `_advancePhase` continue to use the bool return value
|
|
430
|
+
// directly; only this lambda wraps for the LLM-facing tool.
|
|
431
|
+
(to, reason, opts) => {
|
|
432
|
+
const advanced = this._advancePhase(to, reason, opts);
|
|
433
|
+
if (!advanced) {
|
|
434
|
+
let engineCounts = null;
|
|
435
|
+
try { engineCounts = this._buildEngineCountsBlock(this.currentPhase); }
|
|
436
|
+
catch { /* defensive */ }
|
|
437
|
+
return { advanced: false, engineCounts };
|
|
438
|
+
}
|
|
439
|
+
return { advanced: true };
|
|
440
|
+
},
|
|
383
441
|
() => this.currentPhase, // H1: tool reads phase BEFORE its own call
|
|
384
442
|
// v0.6.2 J1: surface running subagents so the tool can refuse
|
|
385
443
|
// advance until the agent explicitly acknowledges them.
|
|
@@ -462,6 +520,27 @@ export class AgentEngine {
|
|
|
462
520
|
return "";
|
|
463
521
|
}
|
|
464
522
|
|
|
523
|
+
/**
|
|
524
|
+
* v0.7.0 B3: Read rules/PATTERNS.md (project memory) for surfacing in
|
|
525
|
+
* the system prompt. Only loaded for phases where the agent owns
|
|
526
|
+
* decomposition decisions (skill_authoring + skill_testing — the two
|
|
527
|
+
* phases the work-decomposition skill operates in). Capped at ~5 KB
|
|
528
|
+
* so it stays trivial token-wise; if the file is larger, we truncate
|
|
529
|
+
* to the first 5 KB and append a "...truncated" marker so the agent
|
|
530
|
+
* knows to prune.
|
|
531
|
+
*/
|
|
532
|
+
_readProjectMemory() {
|
|
533
|
+
if (!["skill_authoring", "skill_testing"].includes(this.currentPhase)) return null;
|
|
534
|
+
const p = path.join(this.workspace.cwd, "rules", "PATTERNS.md");
|
|
535
|
+
try {
|
|
536
|
+
if (!fs.existsSync(p)) return null;
|
|
537
|
+
const raw = fs.readFileSync(p, "utf-8");
|
|
538
|
+
const CAP = 5 * 1024;
|
|
539
|
+
if (raw.length <= CAP) return raw;
|
|
540
|
+
return raw.slice(0, CAP) + "\n\n…truncated at 5 KB — prune the least-actionable entries (work-decomposition skill: Sizing).";
|
|
541
|
+
} catch { return null; }
|
|
542
|
+
}
|
|
543
|
+
|
|
465
544
|
/**
|
|
466
545
|
* Build the workspace/project directory state string for the system prompt.
|
|
467
546
|
*/
|
|
@@ -501,6 +580,7 @@ export class AgentEngine {
|
|
|
501
580
|
skillIndex: this._skillLoader.formatForContext(this.currentPhase),
|
|
502
581
|
pipelineState: this.pipelines[this.currentPhase]?.describeState?.() || null,
|
|
503
582
|
workspaceState: this._buildWorkspaceState(),
|
|
583
|
+
projectMemory: this._readProjectMemory(),
|
|
504
584
|
});
|
|
505
585
|
const systemTokens = estimateTokens(systemPrompt);
|
|
506
586
|
const messageTokens = estimateMessagesTokens(this.history.messages);
|
|
@@ -659,8 +739,18 @@ export class AgentEngine {
|
|
|
659
739
|
async compact({ recentCount = 20 } = {}) {
|
|
660
740
|
if (this.history.messages.length <= recentCount) return null;
|
|
661
741
|
|
|
662
|
-
|
|
663
|
-
|
|
742
|
+
// v0.6.3.1: tool-pair atomicity. Naive slice(-recentCount) can land on
|
|
743
|
+
// a tool message (whose assistant_with_tool_calls is in the older batch
|
|
744
|
+
// about to be summarized) OR put the split between an assistant with
|
|
745
|
+
// tool_calls and its tool results. Either creates an orphan that
|
|
746
|
+
// DeepSeek's strict API rejects with 400. Walk the split point forward
|
|
747
|
+
// until BOTH (recent[0] isn't tool) AND (older[-1] isn't
|
|
748
|
+
// assistant_with_tool_calls).
|
|
749
|
+
const desiredSplit = this.history.messages.length - recentCount;
|
|
750
|
+
const splitPoint = findSafeSplitPoint(this.history.messages, desiredSplit);
|
|
751
|
+
const olderMessages = this.history.messages.slice(0, splitPoint);
|
|
752
|
+
const recentMessages = this.history.messages.slice(splitPoint);
|
|
753
|
+
if (olderMessages.length === 0) return null; // nothing safely summarizable
|
|
664
754
|
|
|
665
755
|
const CHUNK_BUDGET = 30000; // tokens per summarization request
|
|
666
756
|
const chunks = this._chunkMessages(olderMessages, CHUNK_BUDGET);
|
|
@@ -793,6 +883,39 @@ export class AgentEngine {
|
|
|
793
883
|
engine._registerToolsForPhase(engine.currentPhase);
|
|
794
884
|
engine.workspace.setPhase(engine.currentPhase);
|
|
795
885
|
|
|
886
|
+
// v0.6.3.1: detect whether prior turns of this session used reasoning
|
|
887
|
+
// mode, so the field-consistency invariant continues across resume.
|
|
888
|
+
// Without this, the first assistant turn after resume might lack
|
|
889
|
+
// reasoning_content even though earlier turns have it, and DeepSeek's
|
|
890
|
+
// strict-mode rejects with 400.
|
|
891
|
+
try {
|
|
892
|
+
const msgs = engine.history?.messages || [];
|
|
893
|
+
engine._sessionUsesReasoning = msgs.some(
|
|
894
|
+
(m) => m?.role === "assistant" && "reasoning_content" in m,
|
|
895
|
+
);
|
|
896
|
+
// One-shot migration: backfill empty reasoning_content on assistant
|
|
897
|
+
// messages that are missing the field. Pre-v0.6.3.1 sessions could
|
|
898
|
+
// accumulate "holes" (turns where the model skipped reasoning) that
|
|
899
|
+
// poison the conversation for resume. A single empty string on each
|
|
900
|
+
// hole is enough to satisfy DeepSeek's field-consistency rule.
|
|
901
|
+
if (engine._sessionUsesReasoning) {
|
|
902
|
+
let patched = 0;
|
|
903
|
+
for (const m of msgs) {
|
|
904
|
+
if (m?.role === "assistant" && !("reasoning_content" in m)) {
|
|
905
|
+
m.reasoning_content = "";
|
|
906
|
+
patched++;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
if (patched > 0) {
|
|
910
|
+
engine.history._save?.();
|
|
911
|
+
engine.eventLog.append("reasoning_content_backfilled", {
|
|
912
|
+
count: patched,
|
|
913
|
+
reason: "v0.6.3.1 migration on resume",
|
|
914
|
+
});
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
} catch { /* never let resume break on this */ }
|
|
918
|
+
|
|
796
919
|
// Restore project directory from saved state
|
|
797
920
|
if (data.projectDir) {
|
|
798
921
|
if (fs.existsSync(data.projectDir)) {
|
|
@@ -905,6 +1028,7 @@ export class AgentEngine {
|
|
|
905
1028
|
skillIndex: this._skillLoader.formatForContext(this.currentPhase),
|
|
906
1029
|
pipelineState,
|
|
907
1030
|
workspaceState: this._buildWorkspaceState(),
|
|
1031
|
+
projectMemory: this._readProjectMemory(),
|
|
908
1032
|
});
|
|
909
1033
|
const tools = this.toolRegistry.schemasOpenai();
|
|
910
1034
|
|
|
@@ -933,6 +1057,19 @@ export class AgentEngine {
|
|
|
933
1057
|
|
|
934
1058
|
try {
|
|
935
1059
|
let collectedText = "";
|
|
1060
|
+
// v0.7.0 L (#76): Anthropic-only — accumulator for the
|
|
1061
|
+
// signature_delta blob that proves the thinking content came
|
|
1062
|
+
// from Anthropic's model. Required alongside thinking text on
|
|
1063
|
+
// multi-turn replay.
|
|
1064
|
+
let collectedReasoningSignature = "";
|
|
1065
|
+
// v0.6.3: hybrid reasoning models (GLM-5.1, DeepSeek v4, MiMo v2.5,
|
|
1066
|
+
// Qwen3, ...) stream `delta.reasoning_content` separately from
|
|
1067
|
+
// `delta.content`. DeepSeek's strict API requires this field to be
|
|
1068
|
+
// round-tripped on subsequent assistant messages or it rejects the
|
|
1069
|
+
// request with "reasoning_content in the thinking mode must be passed
|
|
1070
|
+
// back". Even providers that don't enforce this (SiliconFlow) still
|
|
1071
|
+
// benefit from preservation — without it, prior reasoning is wasted.
|
|
1072
|
+
let collectedReasoning = "";
|
|
936
1073
|
/** @type {Map<number, {id: string, name: string, arguments: string}>} */
|
|
937
1074
|
const toolCallsAcc = new Map();
|
|
938
1075
|
|
|
@@ -952,6 +1089,22 @@ export class AgentEngine {
|
|
|
952
1089
|
collectedText += delta.content;
|
|
953
1090
|
}
|
|
954
1091
|
|
|
1092
|
+
// v0.6.3: capture reasoning_content from the same delta. Emit a
|
|
1093
|
+
// separate event type so the TUI can optionally render thinking
|
|
1094
|
+
// (today it's silently consumed; round-trip is the priority fix).
|
|
1095
|
+
if (delta.reasoning_content) {
|
|
1096
|
+
yield new AgentEvent({ type: "reasoning_delta", text: delta.reasoning_content });
|
|
1097
|
+
collectedReasoning += delta.reasoning_content;
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
// v0.7.0 L (#76): Anthropic-only signature_delta. Carries the
|
|
1101
|
+
// opaque proof-of-thinking blob that strict-mode multi-turn
|
|
1102
|
+
// requires alongside the thinking text. OpenAI-shape providers
|
|
1103
|
+
// never emit this delta; it's a no-op for them.
|
|
1104
|
+
if (delta.reasoning_signature) {
|
|
1105
|
+
collectedReasoningSignature += delta.reasoning_signature;
|
|
1106
|
+
}
|
|
1107
|
+
|
|
955
1108
|
if (delta.tool_calls) {
|
|
956
1109
|
for (const tcDelta of delta.tool_calls) {
|
|
957
1110
|
const idx = tcDelta.index;
|
|
@@ -968,6 +1121,31 @@ export class AgentEngine {
|
|
|
968
1121
|
|
|
969
1122
|
// Log the complete assistant message (coalesced, not per-delta)
|
|
970
1123
|
const assistantMsg = { role: "assistant", content: collectedText || null };
|
|
1124
|
+
// v0.6.3: persist reasoning_content on the assistant message so it
|
|
1125
|
+
// round-trips on the next request. history.addRaw spreads the input,
|
|
1126
|
+
// preserving unknown fields; OpenAI body builder doesn't strip them.
|
|
1127
|
+
//
|
|
1128
|
+
// v0.6.3.1: DeepSeek's strict-mode rule is FIELD CONSISTENCY, not
|
|
1129
|
+
// field content — once any assistant turn in the conversation has
|
|
1130
|
+
// reasoning_content, every subsequent assistant turn must also have
|
|
1131
|
+
// it (empty string OK; missing the field rejects with 400). Hybrid
|
|
1132
|
+
// reasoning models sometimes skip reasoning on trivial follow-through
|
|
1133
|
+
// tool calls, leaving collectedReasoning="". Track at session level:
|
|
1134
|
+
// once we see ANY reasoning, keep setting the field (possibly empty)
|
|
1135
|
+
// for the rest of the session. Providers that don't use the field
|
|
1136
|
+
// ignore it silently.
|
|
1137
|
+
if (collectedReasoning) {
|
|
1138
|
+
assistantMsg.reasoning_content = collectedReasoning;
|
|
1139
|
+
this._sessionUsesReasoning = true;
|
|
1140
|
+
} else if (this._sessionUsesReasoning) {
|
|
1141
|
+
assistantMsg.reasoning_content = "";
|
|
1142
|
+
}
|
|
1143
|
+
// v0.7.0 L (#76): persist Anthropic signature alongside thinking.
|
|
1144
|
+
// Always stored together — if either is missing, _buildAnthropicBody
|
|
1145
|
+
// skips the thinking-block replay (would be rejected as malformed).
|
|
1146
|
+
if (collectedReasoningSignature) {
|
|
1147
|
+
assistantMsg.reasoning_signature = collectedReasoningSignature;
|
|
1148
|
+
}
|
|
971
1149
|
if (toolCallsAcc.size > 0) {
|
|
972
1150
|
assistantMsg.tool_calls = Array.from(toolCallsAcc.values()).map((tc) => ({
|
|
973
1151
|
id: tc.id,
|
|
@@ -1024,10 +1202,61 @@ export class AgentEngine {
|
|
|
1024
1202
|
|
|
1025
1203
|
// Tool execution loop
|
|
1026
1204
|
for (const tc of toolCallsAcc.values()) {
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1205
|
+
// v0.6.3.1: tool-argument JSON parsing used to be `try { parse } catch {}`
|
|
1206
|
+
// — silently falling back to {} on any parse failure. E2E #5 GLM
|
|
1207
|
+
// session showed this firing 100+ times: SiliconFlow streaming
|
|
1208
|
+
// truncates GLM-5.1 tool_call arguments by ~1 closing brace
|
|
1209
|
+
// (likely max_tokens cutoff mid-args), the silent fallback shipped
|
|
1210
|
+
// {} to the tool, and the tool returned generic "(empty)" errors
|
|
1211
|
+
// which the agent kept retrying without understanding why.
|
|
1212
|
+
//
|
|
1213
|
+
// Fix: try strict parse, then attempt brace-balance recovery (cheap
|
|
1214
|
+
// — recovers from the common single-brace-truncation case), and if
|
|
1215
|
+
// that fails, surface a structured error to the agent so it can
|
|
1216
|
+
// see what it sent and self-correct.
|
|
1217
|
+
let inputData = null;
|
|
1218
|
+
let argParseError = null;
|
|
1219
|
+
if (tc.arguments) {
|
|
1220
|
+
const recovery = parseToolArgsTolerant(tc.arguments);
|
|
1221
|
+
if (recovery.ok) {
|
|
1222
|
+
inputData = recovery.value;
|
|
1223
|
+
if (recovery.recovered) {
|
|
1224
|
+
this.eventLog.append("tool_args_recovered", {
|
|
1225
|
+
name: tc.name,
|
|
1226
|
+
added_chars: recovery.recovered,
|
|
1227
|
+
original_len: tc.arguments.length,
|
|
1228
|
+
});
|
|
1229
|
+
}
|
|
1230
|
+
} else {
|
|
1231
|
+
argParseError = recovery.error;
|
|
1232
|
+
}
|
|
1233
|
+
} else {
|
|
1234
|
+
inputData = {};
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
// If arguments were unparseable, skip execution and return a tool
|
|
1238
|
+
// result that tells the agent what went wrong. Engine's tool result
|
|
1239
|
+
// loop continues so the rest of the assistant's tool_calls in this
|
|
1240
|
+
// turn still execute.
|
|
1241
|
+
if (argParseError) {
|
|
1242
|
+
const preview = (tc.arguments || "").slice(0, 200);
|
|
1243
|
+
const errMsg =
|
|
1244
|
+
`Tool arguments were malformed JSON for ${tc.name}. ` +
|
|
1245
|
+
`Likely streaming truncation by the model (provider cut tokens mid-output). ` +
|
|
1246
|
+
`Parser error: ${argParseError}. ` +
|
|
1247
|
+
`First 200 chars of what was received: ${preview}${tc.arguments && tc.arguments.length > 200 ? "..." : ""}. ` +
|
|
1248
|
+
`Retry the call with shorter / simpler arguments — the model may have hit max_tokens partway through encoding.`;
|
|
1249
|
+
this.eventLog.append("tool_args_parse_failed", {
|
|
1250
|
+
name: tc.name,
|
|
1251
|
+
error: argParseError,
|
|
1252
|
+
raw_args_len: (tc.arguments || "").length,
|
|
1253
|
+
raw_preview: preview,
|
|
1254
|
+
});
|
|
1255
|
+
yield new AgentEvent({ type: "tool_start", name: tc.name, input: { _parse_error: argParseError } });
|
|
1256
|
+
yield new AgentEvent({ type: "tool_result", name: tc.name, output: errMsg, isError: true });
|
|
1257
|
+
this.history.addRaw({ role: "tool", tool_call_id: tc.id, content: errMsg });
|
|
1258
|
+
continue;
|
|
1259
|
+
}
|
|
1031
1260
|
|
|
1032
1261
|
this.eventLog.append("tool_start", { name: tc.name, input: inputData });
|
|
1033
1262
|
yield new AgentEvent({ type: "tool_start", name: tc.name, input: inputData });
|
|
@@ -1082,10 +1311,31 @@ export class AgentEngine {
|
|
|
1082
1311
|
isError: result.isError,
|
|
1083
1312
|
});
|
|
1084
1313
|
|
|
1314
|
+
// v0.6.3 (#74): phase-misfit nudge. Ask the current pipeline whether
|
|
1315
|
+
// this tool call looks like work that belongs to a different phase.
|
|
1316
|
+
// If so, append a `<system-reminder>` tag to the tool result content
|
|
1317
|
+
// (same convention as task-tools and auto-memory reminders). The
|
|
1318
|
+
// agent sees this on its next turn and can self-check whether to
|
|
1319
|
+
// call phase_advance. Only fires for non-error results — failed
|
|
1320
|
+
// tool calls have their own error message and don't need the nudge.
|
|
1321
|
+
let nudgedContent = historyContent;
|
|
1322
|
+
try {
|
|
1323
|
+
const pipelineForPhase = this.pipelines?.[beforePhase];
|
|
1324
|
+
const hint = pipelineForPhase?.phaseMisfitHint?.(tc.name, inputData, result);
|
|
1325
|
+
if (hint && !result.isError) {
|
|
1326
|
+
nudgedContent = `${historyContent}\n\n<system-reminder>\nPhase-misfit detected: ${hint}\n</system-reminder>`;
|
|
1327
|
+
this.eventLog.append("phase_misfit_hint", {
|
|
1328
|
+
phase: beforePhase,
|
|
1329
|
+
tool: tc.name,
|
|
1330
|
+
hint,
|
|
1331
|
+
});
|
|
1332
|
+
}
|
|
1333
|
+
} catch { /* never let the nudge logic break the tool loop */ }
|
|
1334
|
+
|
|
1085
1335
|
this.history.addRaw({
|
|
1086
1336
|
role: "tool",
|
|
1087
1337
|
tool_call_id: tc.id,
|
|
1088
|
-
content:
|
|
1338
|
+
content: nudgedContent,
|
|
1089
1339
|
});
|
|
1090
1340
|
|
|
1091
1341
|
// Post-tool-result safety net: check for context pressure RIGHT NOW
|
|
@@ -1162,14 +1412,81 @@ export class AgentEngine {
|
|
|
1162
1412
|
|
|
1163
1413
|
const expected = NEXT_PHASE[this.currentPhase];
|
|
1164
1414
|
if (!force && nextPhase !== expected) {
|
|
1415
|
+
// v0.7.0 A3: event-log hint stays factual (records what the gate
|
|
1416
|
+
// saw) — the LLM-facing refusal text in phase-advance.js no longer
|
|
1417
|
+
// advertises force:true. Hint kept here for post-mortem audit.
|
|
1165
1418
|
this.eventLog.append("phase_advance_refused", {
|
|
1166
1419
|
from: this.currentPhase, to: nextPhase, reason,
|
|
1167
|
-
hint: expected ? `
|
|
1420
|
+
hint: expected ? `non-adjacent transition; immediate next phase is '${expected}'`
|
|
1168
1421
|
: `${this.currentPhase} is the terminal phase`,
|
|
1169
1422
|
});
|
|
1170
1423
|
return false;
|
|
1171
1424
|
}
|
|
1172
1425
|
|
|
1426
|
+
// v0.7.0 A5: reconcile per-rule tasks against disk artifacts before
|
|
1427
|
+
// checking exit criteria. Catches the E2E #5 DS pattern (tasks.json
|
|
1428
|
+
// showed 70/70 done while only 56 dirs / 36 with check_*.py existed):
|
|
1429
|
+
// markDone() is fire-and-forget today, so the agent can claim
|
|
1430
|
+
// completion that didn't materialize. Reconcile flips back to
|
|
1431
|
+
// pending if the helper-derived ruleIdsCovered set doesn't include
|
|
1432
|
+
// the task's ruleId. A "force"d advance bypasses reconcile too —
|
|
1433
|
+
// the gate already gives the agent / user that escape.
|
|
1434
|
+
if (!force && this.taskManager && this.workspace) {
|
|
1435
|
+
try {
|
|
1436
|
+
const sa = deriveSkillAuthoringMilestones(this.workspace);
|
|
1437
|
+
const covered = new Set(sa.ruleIdsCovered);
|
|
1438
|
+
const tm = deriveSkillTestingMilestones(this.workspace);
|
|
1439
|
+
const tested = new Set(tm.skillsTested);
|
|
1440
|
+
const r = this.taskManager.reconcileAgainstDisk((task) => {
|
|
1441
|
+
if (task.phase === "skill_authoring") return covered.has(task.ruleId);
|
|
1442
|
+
if (task.phase === "skill_testing") return tested.has(task.ruleId);
|
|
1443
|
+
return true; // unknown phase — leave alone
|
|
1444
|
+
});
|
|
1445
|
+
if (r.flippedBack.length > 0) {
|
|
1446
|
+
this.eventLog.append("tasks_reconciled", {
|
|
1447
|
+
from_phase: this.currentPhase,
|
|
1448
|
+
target_phase: nextPhase,
|
|
1449
|
+
flipped_back: r.flippedBack,
|
|
1450
|
+
count: r.flippedBack.length,
|
|
1451
|
+
inspected: r.reconciled,
|
|
1452
|
+
});
|
|
1453
|
+
}
|
|
1454
|
+
} catch { /* never let reconcile break advance */ }
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
// v0.6.3: HARD-TRACKING GATE — refuse forward advance unless the source
|
|
1458
|
+
// phase's exit criteria are met by engine telemetry. v0.6.1 added the
|
|
1459
|
+
// engineCounts block to phase summaries (observation) but never wired
|
|
1460
|
+
// exitCriteriaMet() into the gate (enforcement). E2E #5 surfaced the
|
|
1461
|
+
// gap: MiMo advanced rule_extraction → skill_authoring with
|
|
1462
|
+
// rulesExtracted=0 in engine telemetry because rule_catalog had been
|
|
1463
|
+
// writing to a stranded post-rename path AND nothing checked the gate.
|
|
1464
|
+
//
|
|
1465
|
+
// Forward-only enforcement: rollbacks (_advancePhase from a later phase
|
|
1466
|
+
// to an earlier one with force:true) are an explicit escape, not a
|
|
1467
|
+
// criteria check — the rolled-from phase doesn't need to be "complete".
|
|
1468
|
+
// force:true also bypasses (matches existing escape pattern: user/agent
|
|
1469
|
+
// explicitly chose to skip).
|
|
1470
|
+
if (!force) {
|
|
1471
|
+
const fromIdx = PHASE_ORDER.indexOf(this.currentPhase);
|
|
1472
|
+
const toIdx = PHASE_ORDER.indexOf(nextPhase);
|
|
1473
|
+
const isForward = fromIdx >= 0 && toIdx >= 0 && toIdx > fromIdx;
|
|
1474
|
+
if (isForward) {
|
|
1475
|
+
const fromPipeline = this.pipelines?.[this.currentPhase];
|
|
1476
|
+
let criteriaMet = true;
|
|
1477
|
+
try { criteriaMet = !!fromPipeline?.exitCriteriaMet?.(); } catch { criteriaMet = true; }
|
|
1478
|
+
if (!criteriaMet) {
|
|
1479
|
+
const counts = this._buildEngineCountsBlock(this.currentPhase);
|
|
1480
|
+
this.eventLog.append("phase_advance_refused", {
|
|
1481
|
+
from: this.currentPhase, to: nextPhase, reason,
|
|
1482
|
+
hint: "exit criteria not met by engine telemetry",
|
|
1483
|
+
engineCounts: counts || null,
|
|
1484
|
+
});
|
|
1485
|
+
return false;
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1173
1490
|
// v0.6.2 J2: detect rollback direction. PHASE_ORDER is a linear array
|
|
1174
1491
|
// of all phases; if target index < current index, this is a rollback
|
|
1175
1492
|
// (e.g., production_qc → skill_authoring after gates revealed gaps).
|
|
@@ -1185,9 +1502,15 @@ export class AgentEngine {
|
|
|
1185
1502
|
const engineCounts = this._buildEngineCountsBlock(this.currentPhase);
|
|
1186
1503
|
const mismatchPrefix = this._detectSummaryMismatch(reason, this.currentPhase) ? "⚠️ POSSIBLE MISMATCH: " : "";
|
|
1187
1504
|
const directionTag = direction === "rollback" ? " [ROLLBACK]" : "";
|
|
1505
|
+
// v0.7.0 A2: forced is now `!!force` (honest), not the old
|
|
1506
|
+
// `force && nextPhase !== expected` which masked every adjacent-forward
|
|
1507
|
+
// force in the audit log. E2E #5 had 12/12 force-bypasses but the event
|
|
1508
|
+
// log read 0 forced because every transition was to the immediate next
|
|
1509
|
+
// phase. Truth in audit logs first; refinement (forward-vs-non-adjacent
|
|
1510
|
+
// distinction) lives in the `direction` field.
|
|
1188
1511
|
const phaseSummary =
|
|
1189
1512
|
`[${this.currentPhase.toUpperCase()} → ${nextPhase.toUpperCase()}]${directionTag}: ${mismatchPrefix}${reason}` +
|
|
1190
|
-
(force
|
|
1513
|
+
(force ? " (forced)" : "") +
|
|
1191
1514
|
(engineCounts ? `\n (engine) ${engineCounts}` : "");
|
|
1192
1515
|
this._phaseSummaries.push(phaseSummary);
|
|
1193
1516
|
this.eventLog.append("phase_transition", {
|
|
@@ -1197,7 +1520,7 @@ export class AgentEngine {
|
|
|
1197
1520
|
direction,
|
|
1198
1521
|
engineCounts: engineCounts || null,
|
|
1199
1522
|
possibleMismatch: !!mismatchPrefix,
|
|
1200
|
-
forced: force
|
|
1523
|
+
forced: !!force,
|
|
1201
1524
|
});
|
|
1202
1525
|
const fromPhase = this.currentPhase;
|
|
1203
1526
|
this.currentPhase = nextPhase;
|
|
@@ -1205,6 +1528,18 @@ export class AgentEngine {
|
|
|
1205
1528
|
this.workspace.setPhase(this.currentPhase);
|
|
1206
1529
|
this._createTasksForPhase(this.currentPhase);
|
|
1207
1530
|
|
|
1531
|
+
// v0.7.0 N (#94): give the entered pipeline a chance to do
|
|
1532
|
+
// phase-entry setup. Used by finalization to copy the release
|
|
1533
|
+
// template into output/releases/v1/. Other pipelines are no-ops.
|
|
1534
|
+
// Wrapped so a failure here can't trap the phase advance.
|
|
1535
|
+
try { this.pipelines[this.currentPhase]?.onPhaseEnter?.({ fromPhase, workspace: this.workspace }); }
|
|
1536
|
+
catch (e) {
|
|
1537
|
+
this.eventLog.append("phase_enter_hook_failed", {
|
|
1538
|
+
phase: this.currentPhase,
|
|
1539
|
+
error: e?.message || String(e),
|
|
1540
|
+
});
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1208
1543
|
// v0.6.2 J2: on rollback, reset the rolled-FROM phase's lastReady
|
|
1209
1544
|
// edge-trigger so that if the agent revisits it and re-flips
|
|
1210
1545
|
// exit-criteria true, _maybeAutoAdvance will fire correctly. Without
|
|
@@ -1298,7 +1633,7 @@ export class AgentEngine {
|
|
|
1298
1633
|
const parts = [];
|
|
1299
1634
|
try {
|
|
1300
1635
|
switch (fromPhase) {
|
|
1301
|
-
case "
|
|
1636
|
+
case "rule_extraction": {
|
|
1302
1637
|
const total = pipeline._catalogRuleCount?.() ?? pipeline.rulesExtracted?.length ?? 0;
|
|
1303
1638
|
parts.push(`rulesExtracted: ${pipeline.rulesExtracted?.length ?? 0}`);
|
|
1304
1639
|
parts.push(`rulesWithChunkRefs: ${pipeline.rulesWithChunkRefs?.length ?? 0}/${total}`);
|
|
@@ -1746,11 +2081,23 @@ export class AgentEngine {
|
|
|
1746
2081
|
|
|
1747
2082
|
// Auto-continue through pending tasks
|
|
1748
2083
|
while (this.taskManager.getNextPending()) {
|
|
1749
|
-
//
|
|
2084
|
+
// v0.7.0 #93: budget-aware compact threshold. The old
|
|
2085
|
+
// `messages.length > 15` was message-count-based and frozen
|
|
2086
|
+
// from when KC ran on smaller contexts. With 200K+ budgets it
|
|
2087
|
+
// fired on every iteration of any non-trivial task — E2E #5
|
|
2088
|
+
// GLM saw 76 memory_pressure events and DS saw 46 because
|
|
2089
|
+
// compact pre-empted natural windowing. Replace with token-
|
|
2090
|
+
// budget threshold (default 60% of context, configurable via
|
|
2091
|
+
// KC_COMPACT_THRESHOLD_TOKENS) so compact runs when there's
|
|
2092
|
+
// actual pressure, not just when message count crossed an
|
|
2093
|
+
// ancient heuristic.
|
|
1750
2094
|
const stats = this.getContextStats();
|
|
2095
|
+
const thresholdTokens = parseInt(
|
|
2096
|
+
process.env.KC_COMPACT_THRESHOLD_TOKENS || "0", 10,
|
|
2097
|
+
) || Math.round((this.config.kcContextLimit || 200000) * 0.6);
|
|
1751
2098
|
if (stats.percentage > 70) {
|
|
1752
2099
|
await this.compact();
|
|
1753
|
-
} else if (
|
|
2100
|
+
} else if (stats.totalTokens > thresholdTokens) {
|
|
1754
2101
|
await this.compact({ recentCount: 8 });
|
|
1755
2102
|
}
|
|
1756
2103
|
|
|
@@ -1919,10 +2266,18 @@ export class AgentEngine {
|
|
|
1919
2266
|
continue;
|
|
1920
2267
|
}
|
|
1921
2268
|
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
2269
|
+
// v0.7.0 H1: trackedPromise covers both fulfilled and rejected
|
|
2270
|
+
// paths (second arg). The .catch tail is belt-and-braces in case
|
|
2271
|
+
// the .then callbacks themselves throw — without it, a JSON
|
|
2272
|
+
// serialization throw inside the success-arm callback would
|
|
2273
|
+
// surface as UnhandledPromiseRejection and crash strict-mode
|
|
2274
|
+
// Node. We never want a worker error to take the engine down.
|
|
2275
|
+
const trackedPromise = entry.promise
|
|
2276
|
+
.then(
|
|
2277
|
+
() => ({ taskId: task.id, subId, ok: true }),
|
|
2278
|
+
(e) => ({ taskId: task.id, subId, ok: false, error: e?.message || String(e) }),
|
|
2279
|
+
)
|
|
2280
|
+
.catch((e) => ({ taskId: task.id, subId, ok: false, error: `tracked-promise threw: ${e?.message || String(e)}` }));
|
|
1926
2281
|
inFlight.set(subId, { task, workerLabel, promise: trackedPromise });
|
|
1927
2282
|
}
|
|
1928
2283
|
};
|
|
@@ -1937,7 +2292,15 @@ export class AgentEngine {
|
|
|
1937
2292
|
|
|
1938
2293
|
if (inFlight.size === 0) break;
|
|
1939
2294
|
|
|
1940
|
-
// Wait for either the next event OR a worker to complete
|
|
2295
|
+
// Wait for either the next event OR a worker to complete.
|
|
2296
|
+
//
|
|
2297
|
+
// v0.7.0 C1 note: losers in Promise.race() keep their .then()
|
|
2298
|
+
// chains active and resolve into garbage objects. That's the
|
|
2299
|
+
// intended JS Promise behavior — rejections are still handled,
|
|
2300
|
+
// memory drops at GC. The audit was overstated; no actual hang
|
|
2301
|
+
// or leak. Each loop iteration rebuilds the race from current
|
|
2302
|
+
// inFlight.values() so stale promises from prior iterations
|
|
2303
|
+
// are naturally re-observed (they've already resolved by then).
|
|
1941
2304
|
const workerCompletion = Promise.race([...inFlight.values()].map((v) => v.promise));
|
|
1942
2305
|
const eventArrival = new Promise((resolve) => { notify = () => resolve("event"); });
|
|
1943
2306
|
const winner = await Promise.race([
|