kc-beta 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +81 -0
- package/LICENSE-COMMERCIAL.md +125 -0
- package/README.md +21 -3
- package/package.json +14 -5
- package/src/agent/context-window.js +9 -12
- package/src/agent/context.js +14 -1
- package/src/agent/document-parser.js +169 -0
- package/src/agent/engine.js +367 -18
- package/src/agent/history/event-history.js +222 -0
- package/src/agent/llm-client.js +55 -0
- package/src/agent/message-utils.js +63 -0
- package/src/agent/pipelines/_milestone-derive.js +511 -0
- package/src/agent/pipelines/base.js +21 -0
- package/src/agent/pipelines/distillation.js +28 -15
- package/src/agent/pipelines/extraction.js +103 -36
- package/src/agent/pipelines/finalization.js +178 -11
- package/src/agent/pipelines/index.js +6 -1
- package/src/agent/pipelines/initializer.js +74 -8
- package/src/agent/pipelines/production-qc.js +31 -44
- package/src/agent/pipelines/skill-authoring.js +97 -80
- package/src/agent/pipelines/skill-testing.js +67 -23
- package/src/agent/retry.js +10 -2
- package/src/agent/scheduler.js +14 -2
- package/src/agent/session-state.js +18 -1
- package/src/agent/skill-loader.js +13 -7
- package/src/agent/skill-validator.js +19 -5
- package/src/agent/task-manager.js +61 -5
- package/src/agent/tools/document-chunk.js +21 -9
- package/src/agent/tools/phase-advance.js +18 -3
- package/src/agent/tools/release.js +51 -9
- package/src/agent/tools/rule-catalog.js +11 -1
- package/src/agent/tools/workspace-file.js +32 -0
- package/src/agent/workspace.js +39 -1
- package/src/cli/components.js +64 -14
- package/src/cli/index.js +62 -3
- package/src/cli/meme.js +26 -25
- package/src/config.js +65 -22
- package/src/model-tiers.json +24 -8
- package/src/providers.js +42 -0
- package/template/release/v1/README.md.tmpl +108 -0
- package/template/release/v1/catalog.json.tmpl +4 -0
- package/template/release/v1/kc_runtime/__init__.py +11 -0
- package/template/release/v1/kc_runtime/confidence.py +63 -0
- package/template/release/v1/kc_runtime/doc_parser.py +127 -0
- package/template/release/v1/manifest.json.tmpl +11 -0
- package/template/release/v1/render_dashboard.py +117 -0
- package/template/release/v1/run.py +212 -0
- package/template/release/v1/serve.sh +17 -0
- package/template/skills/en/meta-meta/work-decomposition/SKILL.md +266 -0
- package/template/skills/en/skill-creator/SKILL.md +1 -1
- package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +264 -0
- package/template/skills/zh/skill-creator/SKILL.md +1 -1
package/src/agent/engine.js
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { AgentEvent } from "./events.js";
|
|
4
|
+
import {
|
|
5
|
+
deriveSkillAuthoringMilestones,
|
|
6
|
+
deriveSkillTestingMilestones,
|
|
7
|
+
} from "./pipelines/_milestone-derive.js";
|
|
4
8
|
import { ContextAssembler } from "./context.js";
|
|
5
9
|
import { ConversationHistory } from "./history.js";
|
|
10
|
+
import { findSafeSplitPoint } from "./message-utils.js";
|
|
6
11
|
import { Workspace } from "./workspace.js";
|
|
7
12
|
import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
|
|
8
13
|
import { VersionManager } from "./version-manager.js";
|
|
@@ -52,6 +57,45 @@ import { estimateTokens, estimateMessagesTokens } from "./token-counter.js";
|
|
|
52
57
|
// or kc_max_tokens in the global config.
|
|
53
58
|
const DEFAULT_KC_MAX_TOKENS = 65536;
|
|
54
59
|
|
|
60
|
+
/**
|
|
61
|
+
* v0.6.3.1: Tolerant JSON parse for streamed tool-call arguments. When LLMs
|
|
62
|
+
* (esp. SiliconFlow GLM-5.1 in E2E #5) hit max_tokens mid-arguments, the
|
|
63
|
+
* stream returns truncated JSON missing N closing braces or quotes. Strict
|
|
64
|
+
* parse fails; old code silently dropped to {} which masked the actual issue.
|
|
65
|
+
*
|
|
66
|
+
* Strategy:
|
|
67
|
+
* 1. Try strict JSON.parse (fast path, most calls).
|
|
68
|
+
* 2. On failure, attempt to balance braces by appending up to BRACE_BUDGET
|
|
69
|
+
* `}` characters. Cheap; recovers the common single-brace-truncation case.
|
|
70
|
+
* 3. If still failing, return error so caller surfaces it to the agent.
|
|
71
|
+
*
|
|
72
|
+
* Returns { ok: true, value, recovered? } | { ok: false, error }.
|
|
73
|
+
*/
|
|
74
|
+
const BRACE_RECOVERY_BUDGET = 4;
|
|
75
|
+
function parseToolArgsTolerant(raw) {
|
|
76
|
+
if (typeof raw !== "string") return { ok: false, error: "arguments not a string" };
|
|
77
|
+
if (raw === "") return { ok: true, value: {} };
|
|
78
|
+
// Fast path
|
|
79
|
+
try { return { ok: true, value: JSON.parse(raw) }; } catch (e0) {
|
|
80
|
+
// Recovery: balance braces by appending up to BRACE_RECOVERY_BUDGET `}`
|
|
81
|
+
const opens = (raw.match(/\{/g) || []).length;
|
|
82
|
+
const closes = (raw.match(/\}/g) || []).length;
|
|
83
|
+
const needed = opens - closes;
|
|
84
|
+
if (needed > 0 && needed <= BRACE_RECOVERY_BUDGET) {
|
|
85
|
+
const padded = raw + "}".repeat(needed);
|
|
86
|
+
try { return { ok: true, value: JSON.parse(padded), recovered: needed }; } catch (_) { /* fall through */ }
|
|
87
|
+
}
|
|
88
|
+
// Last-ditch: try closing an open string then balancing braces.
|
|
89
|
+
// Truncation can land mid-string-value: ..."description": "abc<EOF>
|
|
90
|
+
const quotes = (raw.match(/"/g) || []).length;
|
|
91
|
+
if (quotes % 2 === 1) {
|
|
92
|
+
const candidate = raw + '"' + "}".repeat(Math.max(1, needed));
|
|
93
|
+
try { return { ok: true, value: JSON.parse(candidate), recovered: candidate.length - raw.length }; } catch (_) { /* fall through */ }
|
|
94
|
+
}
|
|
95
|
+
return { ok: false, error: e0.message || "JSON parse failed" };
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
55
99
|
// Phases where worker LLM tools are available (DISTILL mode).
|
|
56
100
|
// E1: FINALIZATION inherits worker-LLM access so one-last-pass validation
|
|
57
101
|
// runs + dashboard_render + workflow_run stay usable during packaging.
|
|
@@ -462,6 +506,27 @@ export class AgentEngine {
|
|
|
462
506
|
return "";
|
|
463
507
|
}
|
|
464
508
|
|
|
509
|
+
/**
|
|
510
|
+
* v0.7.0 B3: Read rules/PATTERNS.md (project memory) for surfacing in
|
|
511
|
+
* the system prompt. Only loaded for phases where the agent owns
|
|
512
|
+
* decomposition decisions (skill_authoring + skill_testing — the two
|
|
513
|
+
* phases the work-decomposition skill operates in). Capped at ~5 KB
|
|
514
|
+
* so it stays trivial token-wise; if the file is larger, we truncate
|
|
515
|
+
* to the first 5 KB and append a "...truncated" marker so the agent
|
|
516
|
+
* knows to prune.
|
|
517
|
+
*/
|
|
518
|
+
_readProjectMemory() {
|
|
519
|
+
if (!["skill_authoring", "skill_testing"].includes(this.currentPhase)) return null;
|
|
520
|
+
const p = path.join(this.workspace.cwd, "rules", "PATTERNS.md");
|
|
521
|
+
try {
|
|
522
|
+
if (!fs.existsSync(p)) return null;
|
|
523
|
+
const raw = fs.readFileSync(p, "utf-8");
|
|
524
|
+
const CAP = 5 * 1024;
|
|
525
|
+
if (raw.length <= CAP) return raw;
|
|
526
|
+
return raw.slice(0, CAP) + "\n\n…truncated at 5 KB — prune the least-actionable entries (work-decomposition skill: Sizing).";
|
|
527
|
+
} catch { return null; }
|
|
528
|
+
}
|
|
529
|
+
|
|
465
530
|
/**
|
|
466
531
|
* Build the workspace/project directory state string for the system prompt.
|
|
467
532
|
*/
|
|
@@ -501,6 +566,7 @@ export class AgentEngine {
|
|
|
501
566
|
skillIndex: this._skillLoader.formatForContext(this.currentPhase),
|
|
502
567
|
pipelineState: this.pipelines[this.currentPhase]?.describeState?.() || null,
|
|
503
568
|
workspaceState: this._buildWorkspaceState(),
|
|
569
|
+
projectMemory: this._readProjectMemory(),
|
|
504
570
|
});
|
|
505
571
|
const systemTokens = estimateTokens(systemPrompt);
|
|
506
572
|
const messageTokens = estimateMessagesTokens(this.history.messages);
|
|
@@ -659,8 +725,18 @@ export class AgentEngine {
|
|
|
659
725
|
async compact({ recentCount = 20 } = {}) {
|
|
660
726
|
if (this.history.messages.length <= recentCount) return null;
|
|
661
727
|
|
|
662
|
-
|
|
663
|
-
|
|
728
|
+
// v0.6.3.1: tool-pair atomicity. Naive slice(-recentCount) can land on
|
|
729
|
+
// a tool message (whose assistant_with_tool_calls is in the older batch
|
|
730
|
+
// about to be summarized) OR put the split between an assistant with
|
|
731
|
+
// tool_calls and its tool results. Either creates an orphan that
|
|
732
|
+
// DeepSeek's strict API rejects with 400. Walk the split point forward
|
|
733
|
+
// until BOTH (recent[0] isn't tool) AND (older[-1] isn't
|
|
734
|
+
// assistant_with_tool_calls).
|
|
735
|
+
const desiredSplit = this.history.messages.length - recentCount;
|
|
736
|
+
const splitPoint = findSafeSplitPoint(this.history.messages, desiredSplit);
|
|
737
|
+
const olderMessages = this.history.messages.slice(0, splitPoint);
|
|
738
|
+
const recentMessages = this.history.messages.slice(splitPoint);
|
|
739
|
+
if (olderMessages.length === 0) return null; // nothing safely summarizable
|
|
664
740
|
|
|
665
741
|
const CHUNK_BUDGET = 30000; // tokens per summarization request
|
|
666
742
|
const chunks = this._chunkMessages(olderMessages, CHUNK_BUDGET);
|
|
@@ -793,6 +869,39 @@ export class AgentEngine {
|
|
|
793
869
|
engine._registerToolsForPhase(engine.currentPhase);
|
|
794
870
|
engine.workspace.setPhase(engine.currentPhase);
|
|
795
871
|
|
|
872
|
+
// v0.6.3.1: detect whether prior turns of this session used reasoning
|
|
873
|
+
// mode, so the field-consistency invariant continues across resume.
|
|
874
|
+
// Without this, the first assistant turn after resume might lack
|
|
875
|
+
// reasoning_content even though earlier turns have it, and DeepSeek's
|
|
876
|
+
// strict-mode rejects with 400.
|
|
877
|
+
try {
|
|
878
|
+
const msgs = engine.history?.messages || [];
|
|
879
|
+
engine._sessionUsesReasoning = msgs.some(
|
|
880
|
+
(m) => m?.role === "assistant" && "reasoning_content" in m,
|
|
881
|
+
);
|
|
882
|
+
// One-shot migration: backfill empty reasoning_content on assistant
|
|
883
|
+
// messages that are missing the field. Pre-v0.6.3.1 sessions could
|
|
884
|
+
// accumulate "holes" (turns where the model skipped reasoning) that
|
|
885
|
+
// poison the conversation for resume. A single empty string on each
|
|
886
|
+
// hole is enough to satisfy DeepSeek's field-consistency rule.
|
|
887
|
+
if (engine._sessionUsesReasoning) {
|
|
888
|
+
let patched = 0;
|
|
889
|
+
for (const m of msgs) {
|
|
890
|
+
if (m?.role === "assistant" && !("reasoning_content" in m)) {
|
|
891
|
+
m.reasoning_content = "";
|
|
892
|
+
patched++;
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
if (patched > 0) {
|
|
896
|
+
engine.history._save?.();
|
|
897
|
+
engine.eventLog.append("reasoning_content_backfilled", {
|
|
898
|
+
count: patched,
|
|
899
|
+
reason: "v0.6.3.1 migration on resume",
|
|
900
|
+
});
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
} catch { /* never let resume break on this */ }
|
|
904
|
+
|
|
796
905
|
// Restore project directory from saved state
|
|
797
906
|
if (data.projectDir) {
|
|
798
907
|
if (fs.existsSync(data.projectDir)) {
|
|
@@ -905,6 +1014,7 @@ export class AgentEngine {
|
|
|
905
1014
|
skillIndex: this._skillLoader.formatForContext(this.currentPhase),
|
|
906
1015
|
pipelineState,
|
|
907
1016
|
workspaceState: this._buildWorkspaceState(),
|
|
1017
|
+
projectMemory: this._readProjectMemory(),
|
|
908
1018
|
});
|
|
909
1019
|
const tools = this.toolRegistry.schemasOpenai();
|
|
910
1020
|
|
|
@@ -933,6 +1043,19 @@ export class AgentEngine {
|
|
|
933
1043
|
|
|
934
1044
|
try {
|
|
935
1045
|
let collectedText = "";
|
|
1046
|
+
// v0.7.0 L (#76): Anthropic-only — accumulator for the
|
|
1047
|
+
// signature_delta blob that proves the thinking content came
|
|
1048
|
+
// from Anthropic's model. Required alongside thinking text on
|
|
1049
|
+
// multi-turn replay.
|
|
1050
|
+
let collectedReasoningSignature = "";
|
|
1051
|
+
// v0.6.3: hybrid reasoning models (GLM-5.1, DeepSeek v4, MiMo v2.5,
|
|
1052
|
+
// Qwen3, ...) stream `delta.reasoning_content` separately from
|
|
1053
|
+
// `delta.content`. DeepSeek's strict API requires this field to be
|
|
1054
|
+
// round-tripped on subsequent assistant messages or it rejects the
|
|
1055
|
+
// request with "reasoning_content in the thinking mode must be passed
|
|
1056
|
+
// back". Even providers that don't enforce this (SiliconFlow) still
|
|
1057
|
+
// benefit from preservation — without it, prior reasoning is wasted.
|
|
1058
|
+
let collectedReasoning = "";
|
|
936
1059
|
/** @type {Map<number, {id: string, name: string, arguments: string}>} */
|
|
937
1060
|
const toolCallsAcc = new Map();
|
|
938
1061
|
|
|
@@ -952,6 +1075,22 @@ export class AgentEngine {
|
|
|
952
1075
|
collectedText += delta.content;
|
|
953
1076
|
}
|
|
954
1077
|
|
|
1078
|
+
// v0.6.3: capture reasoning_content from the same delta. Emit a
|
|
1079
|
+
// separate event type so the TUI can optionally render thinking
|
|
1080
|
+
// (today it's silently consumed; round-trip is the priority fix).
|
|
1081
|
+
if (delta.reasoning_content) {
|
|
1082
|
+
yield new AgentEvent({ type: "reasoning_delta", text: delta.reasoning_content });
|
|
1083
|
+
collectedReasoning += delta.reasoning_content;
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
// v0.7.0 L (#76): Anthropic-only signature_delta. Carries the
|
|
1087
|
+
// opaque proof-of-thinking blob that strict-mode multi-turn
|
|
1088
|
+
// requires alongside the thinking text. OpenAI-shape providers
|
|
1089
|
+
// never emit this delta; it's a no-op for them.
|
|
1090
|
+
if (delta.reasoning_signature) {
|
|
1091
|
+
collectedReasoningSignature += delta.reasoning_signature;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
955
1094
|
if (delta.tool_calls) {
|
|
956
1095
|
for (const tcDelta of delta.tool_calls) {
|
|
957
1096
|
const idx = tcDelta.index;
|
|
@@ -968,6 +1107,31 @@ export class AgentEngine {
|
|
|
968
1107
|
|
|
969
1108
|
// Log the complete assistant message (coalesced, not per-delta)
|
|
970
1109
|
const assistantMsg = { role: "assistant", content: collectedText || null };
|
|
1110
|
+
// v0.6.3: persist reasoning_content on the assistant message so it
|
|
1111
|
+
// round-trips on the next request. history.addRaw spreads the input,
|
|
1112
|
+
// preserving unknown fields; OpenAI body builder doesn't strip them.
|
|
1113
|
+
//
|
|
1114
|
+
// v0.6.3.1: DeepSeek's strict-mode rule is FIELD CONSISTENCY, not
|
|
1115
|
+
// field content — once any assistant turn in the conversation has
|
|
1116
|
+
// reasoning_content, every subsequent assistant turn must also have
|
|
1117
|
+
// it (empty string OK; missing the field rejects with 400). Hybrid
|
|
1118
|
+
// reasoning models sometimes skip reasoning on trivial follow-through
|
|
1119
|
+
// tool calls, leaving collectedReasoning="". Track at session level:
|
|
1120
|
+
// once we see ANY reasoning, keep setting the field (possibly empty)
|
|
1121
|
+
// for the rest of the session. Providers that don't use the field
|
|
1122
|
+
// ignore it silently.
|
|
1123
|
+
if (collectedReasoning) {
|
|
1124
|
+
assistantMsg.reasoning_content = collectedReasoning;
|
|
1125
|
+
this._sessionUsesReasoning = true;
|
|
1126
|
+
} else if (this._sessionUsesReasoning) {
|
|
1127
|
+
assistantMsg.reasoning_content = "";
|
|
1128
|
+
}
|
|
1129
|
+
// v0.7.0 L (#76): persist Anthropic signature alongside thinking.
|
|
1130
|
+
// Always stored together — if either is missing, _buildAnthropicBody
|
|
1131
|
+
// skips the thinking-block replay (would be rejected as malformed).
|
|
1132
|
+
if (collectedReasoningSignature) {
|
|
1133
|
+
assistantMsg.reasoning_signature = collectedReasoningSignature;
|
|
1134
|
+
}
|
|
971
1135
|
if (toolCallsAcc.size > 0) {
|
|
972
1136
|
assistantMsg.tool_calls = Array.from(toolCallsAcc.values()).map((tc) => ({
|
|
973
1137
|
id: tc.id,
|
|
@@ -1024,10 +1188,61 @@ export class AgentEngine {
|
|
|
1024
1188
|
|
|
1025
1189
|
// Tool execution loop
|
|
1026
1190
|
for (const tc of toolCallsAcc.values()) {
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1191
|
+
// v0.6.3.1: tool-argument JSON parsing used to be `try { parse } catch {}`
|
|
1192
|
+
// — silently falling back to {} on any parse failure. E2E #5 GLM
|
|
1193
|
+
// session showed this firing 100+ times: SiliconFlow streaming
|
|
1194
|
+
// truncates GLM-5.1 tool_call arguments by ~1 closing brace
|
|
1195
|
+
// (likely max_tokens cutoff mid-args), the silent fallback shipped
|
|
1196
|
+
// {} to the tool, and the tool returned generic "(empty)" errors
|
|
1197
|
+
// which the agent kept retrying without understanding why.
|
|
1198
|
+
//
|
|
1199
|
+
// Fix: try strict parse, then attempt brace-balance recovery (cheap
|
|
1200
|
+
// — recovers from the common single-brace-truncation case), and if
|
|
1201
|
+
// that fails, surface a structured error to the agent so it can
|
|
1202
|
+
// see what it sent and self-correct.
|
|
1203
|
+
let inputData = null;
|
|
1204
|
+
let argParseError = null;
|
|
1205
|
+
if (tc.arguments) {
|
|
1206
|
+
const recovery = parseToolArgsTolerant(tc.arguments);
|
|
1207
|
+
if (recovery.ok) {
|
|
1208
|
+
inputData = recovery.value;
|
|
1209
|
+
if (recovery.recovered) {
|
|
1210
|
+
this.eventLog.append("tool_args_recovered", {
|
|
1211
|
+
name: tc.name,
|
|
1212
|
+
added_chars: recovery.recovered,
|
|
1213
|
+
original_len: tc.arguments.length,
|
|
1214
|
+
});
|
|
1215
|
+
}
|
|
1216
|
+
} else {
|
|
1217
|
+
argParseError = recovery.error;
|
|
1218
|
+
}
|
|
1219
|
+
} else {
|
|
1220
|
+
inputData = {};
|
|
1221
|
+
}
|
|
1222
|
+
|
|
1223
|
+
// If arguments were unparseable, skip execution and return a tool
|
|
1224
|
+
// result that tells the agent what went wrong. Engine's tool result
|
|
1225
|
+
// loop continues so the rest of the assistant's tool_calls in this
|
|
1226
|
+
// turn still execute.
|
|
1227
|
+
if (argParseError) {
|
|
1228
|
+
const preview = (tc.arguments || "").slice(0, 200);
|
|
1229
|
+
const errMsg =
|
|
1230
|
+
`Tool arguments were malformed JSON for ${tc.name}. ` +
|
|
1231
|
+
`Likely streaming truncation by the model (provider cut tokens mid-output). ` +
|
|
1232
|
+
`Parser error: ${argParseError}. ` +
|
|
1233
|
+
`First 200 chars of what was received: ${preview}${tc.arguments && tc.arguments.length > 200 ? "..." : ""}. ` +
|
|
1234
|
+
`Retry the call with shorter / simpler arguments — the model may have hit max_tokens partway through encoding.`;
|
|
1235
|
+
this.eventLog.append("tool_args_parse_failed", {
|
|
1236
|
+
name: tc.name,
|
|
1237
|
+
error: argParseError,
|
|
1238
|
+
raw_args_len: (tc.arguments || "").length,
|
|
1239
|
+
raw_preview: preview,
|
|
1240
|
+
});
|
|
1241
|
+
yield new AgentEvent({ type: "tool_start", name: tc.name, input: { _parse_error: argParseError } });
|
|
1242
|
+
yield new AgentEvent({ type: "tool_result", name: tc.name, output: errMsg, isError: true });
|
|
1243
|
+
this.history.addRaw({ role: "tool", tool_call_id: tc.id, content: errMsg });
|
|
1244
|
+
continue;
|
|
1245
|
+
}
|
|
1031
1246
|
|
|
1032
1247
|
this.eventLog.append("tool_start", { name: tc.name, input: inputData });
|
|
1033
1248
|
yield new AgentEvent({ type: "tool_start", name: tc.name, input: inputData });
|
|
@@ -1082,10 +1297,31 @@ export class AgentEngine {
|
|
|
1082
1297
|
isError: result.isError,
|
|
1083
1298
|
});
|
|
1084
1299
|
|
|
1300
|
+
// v0.6.3 (#74): phase-misfit nudge. Ask the current pipeline whether
|
|
1301
|
+
// this tool call looks like work that belongs to a different phase.
|
|
1302
|
+
// If so, append a `<system-reminder>` tag to the tool result content
|
|
1303
|
+
// (same convention as task-tools and auto-memory reminders). The
|
|
1304
|
+
// agent sees this on its next turn and can self-check whether to
|
|
1305
|
+
// call phase_advance. Only fires for non-error results — failed
|
|
1306
|
+
// tool calls have their own error message and don't need the nudge.
|
|
1307
|
+
let nudgedContent = historyContent;
|
|
1308
|
+
try {
|
|
1309
|
+
const pipelineForPhase = this.pipelines?.[beforePhase];
|
|
1310
|
+
const hint = pipelineForPhase?.phaseMisfitHint?.(tc.name, inputData, result);
|
|
1311
|
+
if (hint && !result.isError) {
|
|
1312
|
+
nudgedContent = `${historyContent}\n\n<system-reminder>\nPhase-misfit detected: ${hint}\n</system-reminder>`;
|
|
1313
|
+
this.eventLog.append("phase_misfit_hint", {
|
|
1314
|
+
phase: beforePhase,
|
|
1315
|
+
tool: tc.name,
|
|
1316
|
+
hint,
|
|
1317
|
+
});
|
|
1318
|
+
}
|
|
1319
|
+
} catch { /* never let the nudge logic break the tool loop */ }
|
|
1320
|
+
|
|
1085
1321
|
this.history.addRaw({
|
|
1086
1322
|
role: "tool",
|
|
1087
1323
|
tool_call_id: tc.id,
|
|
1088
|
-
content:
|
|
1324
|
+
content: nudgedContent,
|
|
1089
1325
|
});
|
|
1090
1326
|
|
|
1091
1327
|
// Post-tool-result safety net: check for context pressure RIGHT NOW
|
|
@@ -1162,14 +1398,81 @@ export class AgentEngine {
|
|
|
1162
1398
|
|
|
1163
1399
|
const expected = NEXT_PHASE[this.currentPhase];
|
|
1164
1400
|
if (!force && nextPhase !== expected) {
|
|
1401
|
+
// v0.7.0 A3: event-log hint stays factual (records what the gate
|
|
1402
|
+
// saw) — the LLM-facing refusal text in phase-advance.js no longer
|
|
1403
|
+
// advertises force:true. Hint kept here for post-mortem audit.
|
|
1165
1404
|
this.eventLog.append("phase_advance_refused", {
|
|
1166
1405
|
from: this.currentPhase, to: nextPhase, reason,
|
|
1167
|
-
hint: expected ? `
|
|
1406
|
+
hint: expected ? `non-adjacent transition; immediate next phase is '${expected}'`
|
|
1168
1407
|
: `${this.currentPhase} is the terminal phase`,
|
|
1169
1408
|
});
|
|
1170
1409
|
return false;
|
|
1171
1410
|
}
|
|
1172
1411
|
|
|
1412
|
+
// v0.7.0 A5: reconcile per-rule tasks against disk artifacts before
|
|
1413
|
+
// checking exit criteria. Catches the E2E #5 DS pattern (tasks.json
|
|
1414
|
+
// showed 70/70 done while only 56 dirs / 36 with check_*.py existed):
|
|
1415
|
+
// markDone() is fire-and-forget today, so the agent can claim
|
|
1416
|
+
// completion that didn't materialize. Reconcile flips back to
|
|
1417
|
+
// pending if the helper-derived ruleIdsCovered set doesn't include
|
|
1418
|
+
// the task's ruleId. A "force"d advance bypasses reconcile too —
|
|
1419
|
+
// the gate already gives the agent / user that escape.
|
|
1420
|
+
if (!force && this.taskManager && this.workspace) {
|
|
1421
|
+
try {
|
|
1422
|
+
const sa = deriveSkillAuthoringMilestones(this.workspace);
|
|
1423
|
+
const covered = new Set(sa.ruleIdsCovered);
|
|
1424
|
+
const tm = deriveSkillTestingMilestones(this.workspace);
|
|
1425
|
+
const tested = new Set(tm.skillsTested);
|
|
1426
|
+
const r = this.taskManager.reconcileAgainstDisk((task) => {
|
|
1427
|
+
if (task.phase === "skill_authoring") return covered.has(task.ruleId);
|
|
1428
|
+
if (task.phase === "skill_testing") return tested.has(task.ruleId);
|
|
1429
|
+
return true; // unknown phase — leave alone
|
|
1430
|
+
});
|
|
1431
|
+
if (r.flippedBack.length > 0) {
|
|
1432
|
+
this.eventLog.append("tasks_reconciled", {
|
|
1433
|
+
from_phase: this.currentPhase,
|
|
1434
|
+
target_phase: nextPhase,
|
|
1435
|
+
flipped_back: r.flippedBack,
|
|
1436
|
+
count: r.flippedBack.length,
|
|
1437
|
+
inspected: r.reconciled,
|
|
1438
|
+
});
|
|
1439
|
+
}
|
|
1440
|
+
} catch { /* never let reconcile break advance */ }
|
|
1441
|
+
}
|
|
1442
|
+
|
|
1443
|
+
// v0.6.3: HARD-TRACKING GATE — refuse forward advance unless the source
|
|
1444
|
+
// phase's exit criteria are met by engine telemetry. v0.6.1 added the
|
|
1445
|
+
// engineCounts block to phase summaries (observation) but never wired
|
|
1446
|
+
// exitCriteriaMet() into the gate (enforcement). E2E #5 surfaced the
|
|
1447
|
+
// gap: MiMo advanced rule_extraction → skill_authoring with
|
|
1448
|
+
// rulesExtracted=0 in engine telemetry because rule_catalog had been
|
|
1449
|
+
// writing to a stranded post-rename path AND nothing checked the gate.
|
|
1450
|
+
//
|
|
1451
|
+
// Forward-only enforcement: rollbacks (_advancePhase from a later phase
|
|
1452
|
+
// to an earlier one with force:true) are an explicit escape, not a
|
|
1453
|
+
// criteria check — the rolled-from phase doesn't need to be "complete".
|
|
1454
|
+
// force:true also bypasses (matches existing escape pattern: user/agent
|
|
1455
|
+
// explicitly chose to skip).
|
|
1456
|
+
if (!force) {
|
|
1457
|
+
const fromIdx = PHASE_ORDER.indexOf(this.currentPhase);
|
|
1458
|
+
const toIdx = PHASE_ORDER.indexOf(nextPhase);
|
|
1459
|
+
const isForward = fromIdx >= 0 && toIdx >= 0 && toIdx > fromIdx;
|
|
1460
|
+
if (isForward) {
|
|
1461
|
+
const fromPipeline = this.pipelines?.[this.currentPhase];
|
|
1462
|
+
let criteriaMet = true;
|
|
1463
|
+
try { criteriaMet = !!fromPipeline?.exitCriteriaMet?.(); } catch { criteriaMet = true; }
|
|
1464
|
+
if (!criteriaMet) {
|
|
1465
|
+
const counts = this._buildEngineCountsBlock(this.currentPhase);
|
|
1466
|
+
this.eventLog.append("phase_advance_refused", {
|
|
1467
|
+
from: this.currentPhase, to: nextPhase, reason,
|
|
1468
|
+
hint: "exit criteria not met by engine telemetry",
|
|
1469
|
+
engineCounts: counts || null,
|
|
1470
|
+
});
|
|
1471
|
+
return false;
|
|
1472
|
+
}
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1173
1476
|
// v0.6.2 J2: detect rollback direction. PHASE_ORDER is a linear array
|
|
1174
1477
|
// of all phases; if target index < current index, this is a rollback
|
|
1175
1478
|
// (e.g., production_qc → skill_authoring after gates revealed gaps).
|
|
@@ -1185,9 +1488,15 @@ export class AgentEngine {
|
|
|
1185
1488
|
const engineCounts = this._buildEngineCountsBlock(this.currentPhase);
|
|
1186
1489
|
const mismatchPrefix = this._detectSummaryMismatch(reason, this.currentPhase) ? "⚠️ POSSIBLE MISMATCH: " : "";
|
|
1187
1490
|
const directionTag = direction === "rollback" ? " [ROLLBACK]" : "";
|
|
1491
|
+
// v0.7.0 A2: forced is now `!!force` (honest), not the old
|
|
1492
|
+
// `force && nextPhase !== expected` which masked every adjacent-forward
|
|
1493
|
+
// force in the audit log. E2E #5 had 12/12 force-bypasses but the event
|
|
1494
|
+
// log read 0 forced because every transition was to the immediate next
|
|
1495
|
+
// phase. Truth in audit logs first; refinement (forward-vs-non-adjacent
|
|
1496
|
+
// distinction) lives in the `direction` field.
|
|
1188
1497
|
const phaseSummary =
|
|
1189
1498
|
`[${this.currentPhase.toUpperCase()} → ${nextPhase.toUpperCase()}]${directionTag}: ${mismatchPrefix}${reason}` +
|
|
1190
|
-
(force
|
|
1499
|
+
(force ? " (forced)" : "") +
|
|
1191
1500
|
(engineCounts ? `\n (engine) ${engineCounts}` : "");
|
|
1192
1501
|
this._phaseSummaries.push(phaseSummary);
|
|
1193
1502
|
this.eventLog.append("phase_transition", {
|
|
@@ -1197,7 +1506,7 @@ export class AgentEngine {
|
|
|
1197
1506
|
direction,
|
|
1198
1507
|
engineCounts: engineCounts || null,
|
|
1199
1508
|
possibleMismatch: !!mismatchPrefix,
|
|
1200
|
-
forced: force
|
|
1509
|
+
forced: !!force,
|
|
1201
1510
|
});
|
|
1202
1511
|
const fromPhase = this.currentPhase;
|
|
1203
1512
|
this.currentPhase = nextPhase;
|
|
@@ -1205,6 +1514,18 @@ export class AgentEngine {
|
|
|
1205
1514
|
this.workspace.setPhase(this.currentPhase);
|
|
1206
1515
|
this._createTasksForPhase(this.currentPhase);
|
|
1207
1516
|
|
|
1517
|
+
// v0.7.0 N (#94): give the entered pipeline a chance to do
|
|
1518
|
+
// phase-entry setup. Used by finalization to copy the release
|
|
1519
|
+
// template into output/releases/v1/. Other pipelines are no-ops.
|
|
1520
|
+
// Wrapped so a failure here can't trap the phase advance.
|
|
1521
|
+
try { this.pipelines[this.currentPhase]?.onPhaseEnter?.({ fromPhase, workspace: this.workspace }); }
|
|
1522
|
+
catch (e) {
|
|
1523
|
+
this.eventLog.append("phase_enter_hook_failed", {
|
|
1524
|
+
phase: this.currentPhase,
|
|
1525
|
+
error: e?.message || String(e),
|
|
1526
|
+
});
|
|
1527
|
+
}
|
|
1528
|
+
|
|
1208
1529
|
// v0.6.2 J2: on rollback, reset the rolled-FROM phase's lastReady
|
|
1209
1530
|
// edge-trigger so that if the agent revisits it and re-flips
|
|
1210
1531
|
// exit-criteria true, _maybeAutoAdvance will fire correctly. Without
|
|
@@ -1298,7 +1619,7 @@ export class AgentEngine {
|
|
|
1298
1619
|
const parts = [];
|
|
1299
1620
|
try {
|
|
1300
1621
|
switch (fromPhase) {
|
|
1301
|
-
case "
|
|
1622
|
+
case "rule_extraction": {
|
|
1302
1623
|
const total = pipeline._catalogRuleCount?.() ?? pipeline.rulesExtracted?.length ?? 0;
|
|
1303
1624
|
parts.push(`rulesExtracted: ${pipeline.rulesExtracted?.length ?? 0}`);
|
|
1304
1625
|
parts.push(`rulesWithChunkRefs: ${pipeline.rulesWithChunkRefs?.length ?? 0}/${total}`);
|
|
@@ -1746,11 +2067,23 @@ export class AgentEngine {
|
|
|
1746
2067
|
|
|
1747
2068
|
// Auto-continue through pending tasks
|
|
1748
2069
|
while (this.taskManager.getNextPending()) {
|
|
1749
|
-
//
|
|
2070
|
+
// v0.7.0 #93: budget-aware compact threshold. The old
|
|
2071
|
+
// `messages.length > 15` was message-count-based and frozen
|
|
2072
|
+
// from when KC ran on smaller contexts. With 200K+ budgets it
|
|
2073
|
+
// fired on every iteration of any non-trivial task — E2E #5
|
|
2074
|
+
// GLM saw 76 memory_pressure events and DS saw 46 because
|
|
2075
|
+
// compact pre-empted natural windowing. Replace with token-
|
|
2076
|
+
// budget threshold (default 60% of context, configurable via
|
|
2077
|
+
// KC_COMPACT_THRESHOLD_TOKENS) so compact runs when there's
|
|
2078
|
+
// actual pressure, not just when message count crossed an
|
|
2079
|
+
// ancient heuristic.
|
|
1750
2080
|
const stats = this.getContextStats();
|
|
2081
|
+
const thresholdTokens = parseInt(
|
|
2082
|
+
process.env.KC_COMPACT_THRESHOLD_TOKENS || "0", 10,
|
|
2083
|
+
) || Math.round((this.config.kcContextLimit || 200000) * 0.6);
|
|
1751
2084
|
if (stats.percentage > 70) {
|
|
1752
2085
|
await this.compact();
|
|
1753
|
-
} else if (
|
|
2086
|
+
} else if (stats.totalTokens > thresholdTokens) {
|
|
1754
2087
|
await this.compact({ recentCount: 8 });
|
|
1755
2088
|
}
|
|
1756
2089
|
|
|
@@ -1919,10 +2252,18 @@ export class AgentEngine {
|
|
|
1919
2252
|
continue;
|
|
1920
2253
|
}
|
|
1921
2254
|
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
2255
|
+
// v0.7.0 H1: trackedPromise covers both fulfilled and rejected
|
|
2256
|
+
// paths (second arg). The .catch tail is belt-and-braces in case
|
|
2257
|
+
// the .then callbacks themselves throw — without it, a JSON
|
|
2258
|
+
// serialization throw inside the success-arm callback would
|
|
2259
|
+
// surface as UnhandledPromiseRejection and crash strict-mode
|
|
2260
|
+
// Node. We never want a worker error to take the engine down.
|
|
2261
|
+
const trackedPromise = entry.promise
|
|
2262
|
+
.then(
|
|
2263
|
+
() => ({ taskId: task.id, subId, ok: true }),
|
|
2264
|
+
(e) => ({ taskId: task.id, subId, ok: false, error: e?.message || String(e) }),
|
|
2265
|
+
)
|
|
2266
|
+
.catch((e) => ({ taskId: task.id, subId, ok: false, error: `tracked-promise threw: ${e?.message || String(e)}` }));
|
|
1926
2267
|
inFlight.set(subId, { task, workerLabel, promise: trackedPromise });
|
|
1927
2268
|
}
|
|
1928
2269
|
};
|
|
@@ -1937,7 +2278,15 @@ export class AgentEngine {
|
|
|
1937
2278
|
|
|
1938
2279
|
if (inFlight.size === 0) break;
|
|
1939
2280
|
|
|
1940
|
-
// Wait for either the next event OR a worker to complete
|
|
2281
|
+
// Wait for either the next event OR a worker to complete.
|
|
2282
|
+
//
|
|
2283
|
+
// v0.7.0 C1 note: losers in Promise.race() keep their .then()
|
|
2284
|
+
// chains active and resolve into garbage objects. That's the
|
|
2285
|
+
// intended JS Promise behavior — rejections are still handled,
|
|
2286
|
+
// memory drops at GC. The audit was overstated; no actual hang
|
|
2287
|
+
// or leak. Each loop iteration rebuilds the race from current
|
|
2288
|
+
// inFlight.values() so stale promises from prior iterations
|
|
2289
|
+
// are naturally re-observed (they've already resolved by then).
|
|
1941
2290
|
const workerCompletion = Promise.race([...inFlight.values()].map((v) => v.promise));
|
|
1942
2291
|
const eventArrival = new Promise((resolve) => { notify = () => resolve("event"); });
|
|
1943
2292
|
const winner = await Promise.race([
|