kc-beta 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +81 -0
- package/LICENSE-COMMERCIAL.md +125 -0
- package/README.md +21 -3
- package/package.json +14 -5
- package/src/agent/context-window.js +9 -12
- package/src/agent/context.js +14 -1
- package/src/agent/document-parser.js +169 -0
- package/src/agent/engine.js +367 -18
- package/src/agent/history/event-history.js +222 -0
- package/src/agent/llm-client.js +55 -0
- package/src/agent/message-utils.js +63 -0
- package/src/agent/pipelines/_milestone-derive.js +511 -0
- package/src/agent/pipelines/base.js +21 -0
- package/src/agent/pipelines/distillation.js +28 -15
- package/src/agent/pipelines/extraction.js +103 -36
- package/src/agent/pipelines/finalization.js +178 -11
- package/src/agent/pipelines/index.js +6 -1
- package/src/agent/pipelines/initializer.js +74 -8
- package/src/agent/pipelines/production-qc.js +31 -44
- package/src/agent/pipelines/skill-authoring.js +97 -80
- package/src/agent/pipelines/skill-testing.js +67 -23
- package/src/agent/retry.js +10 -2
- package/src/agent/scheduler.js +14 -2
- package/src/agent/session-state.js +18 -1
- package/src/agent/skill-loader.js +13 -7
- package/src/agent/skill-validator.js +19 -5
- package/src/agent/task-manager.js +61 -5
- package/src/agent/tools/document-chunk.js +21 -9
- package/src/agent/tools/phase-advance.js +18 -3
- package/src/agent/tools/release.js +51 -9
- package/src/agent/tools/rule-catalog.js +11 -1
- package/src/agent/tools/workspace-file.js +32 -0
- package/src/agent/workspace.js +39 -1
- package/src/cli/components.js +64 -14
- package/src/cli/index.js +62 -3
- package/src/cli/meme.js +26 -25
- package/src/config.js +65 -22
- package/src/model-tiers.json +24 -8
- package/src/providers.js +42 -0
- package/template/release/v1/README.md.tmpl +108 -0
- package/template/release/v1/catalog.json.tmpl +4 -0
- package/template/release/v1/kc_runtime/__init__.py +11 -0
- package/template/release/v1/kc_runtime/confidence.py +63 -0
- package/template/release/v1/kc_runtime/doc_parser.py +127 -0
- package/template/release/v1/manifest.json.tmpl +11 -0
- package/template/release/v1/render_dashboard.py +117 -0
- package/template/release/v1/run.py +212 -0
- package/template/release/v1/serve.sh +17 -0
- package/template/skills/en/meta-meta/work-decomposition/SKILL.md +266 -0
- package/template/skills/en/skill-creator/SKILL.md +1 -1
- package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +264 -0
- package/template/skills/zh/skill-creator/SKILL.md +1 -1
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v0.7.0 E1m (#90): minimal event-atomic context.
|
|
3
|
+
*
|
|
4
|
+
* History is conceptually a sequence of *events*. Each event encapsulates
|
|
5
|
+
* one or more chat messages that must travel together — splitting one
|
|
6
|
+
* event mid-flight produces the orphan-tool / orphan-tool_calls failure
|
|
7
|
+
* mode that DeepSeek's strict API rejects with HTTP 400.
|
|
8
|
+
*
|
|
9
|
+
* Yibo's framing (E2E #5 post-mortem): "history message and context
|
|
10
|
+
* management... should be managed by events, like agent message, llm
|
|
11
|
+
* call, tool use, etc. By design, a cut in the middle of an event
|
|
12
|
+
* shouldn't happen."
|
|
13
|
+
*
|
|
14
|
+
* Scope (v0.7.0 minimal): events are a *derived view*, computed from
|
|
15
|
+
* the existing flat messages array on demand. The flat array stays as
|
|
16
|
+
* the canonical store. compact() and windowing use event boundaries
|
|
17
|
+
* to find safe cut points; they never split mid-event.
|
|
18
|
+
*
|
|
19
|
+
* Future v0.8.x may invert this and make events the canonical store.
|
|
20
|
+
* The reversible helpers (messagesToEvents / eventsToMessages) make
|
|
21
|
+
* that migration cheap when the time comes.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
export const EventType = Object.freeze({
|
|
25
|
+
USER_TURN: "user_turn",
|
|
26
|
+
ASSISTANT_TURN: "assistant_turn",
|
|
27
|
+
TOOL_CALL_PAIR: "tool_call_pair",
|
|
28
|
+
SYSTEM_REMINDER: "system_reminder",
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Group a flat OpenAI-shape messages array into atomic events.
|
|
33
|
+
*
|
|
34
|
+
* Event types and shapes:
|
|
35
|
+
* user_turn — { type, messages: [{role: "user", ...}] }
|
|
36
|
+
* assistant_turn — { type, messages: [{role: "assistant", content: "...", reasoning_content?, ...}] }
|
|
37
|
+
* (no tool_calls; if tool_calls present, becomes tool_call_pair)
|
|
38
|
+
* tool_call_pair — { type, messages: [
|
|
39
|
+
* {role: "assistant", tool_calls: [...], ...},
|
|
40
|
+
* {role: "tool", tool_call_id: ...},
|
|
41
|
+
* {role: "tool", tool_call_id: ...}, // 1+ tool results
|
|
42
|
+
* ] }
|
|
43
|
+
* system_reminder — { type, messages: [{role: "system", content: "..."}] }
|
|
44
|
+
* (mid-session system messages — kept for
|
|
45
|
+
* v0.6.3 phase-misfit-nudge etc.; the bootstrap
|
|
46
|
+
* system prompt is NOT in messages, lives separately)
|
|
47
|
+
*
|
|
48
|
+
* Unmatched tool messages (no preceding assistant_with_tool_calls)
|
|
49
|
+
* become a degenerate one-message tool_call_pair with no anchor. They
|
|
50
|
+
* mark a problematic split — caller can decide whether to drop or warn.
|
|
51
|
+
*
|
|
52
|
+
* @param {Array<object>} messages - flat OpenAI-shape array
|
|
53
|
+
* @returns {Array<{type: string, messages: object[], startIdx: number, endIdx: number}>}
|
|
54
|
+
* Events with original-index ranges so callers can map event boundaries
|
|
55
|
+
* back to slice cut points in the source messages array.
|
|
56
|
+
*/
|
|
57
|
+
export function messagesToEvents(messages) {
|
|
58
|
+
const events = [];
|
|
59
|
+
let i = 0;
|
|
60
|
+
while (i < messages.length) {
|
|
61
|
+
const m = messages[i];
|
|
62
|
+
if (!m || typeof m !== "object") { i++; continue; }
|
|
63
|
+
|
|
64
|
+
if (m.role === "system") {
|
|
65
|
+
events.push({
|
|
66
|
+
type: EventType.SYSTEM_REMINDER,
|
|
67
|
+
messages: [m],
|
|
68
|
+
startIdx: i,
|
|
69
|
+
endIdx: i,
|
|
70
|
+
});
|
|
71
|
+
i++;
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (m.role === "user") {
|
|
76
|
+
events.push({
|
|
77
|
+
type: EventType.USER_TURN,
|
|
78
|
+
messages: [m],
|
|
79
|
+
startIdx: i,
|
|
80
|
+
endIdx: i,
|
|
81
|
+
});
|
|
82
|
+
i++;
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (m.role === "assistant") {
|
|
87
|
+
const hasToolCalls = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
|
|
88
|
+
if (!hasToolCalls) {
|
|
89
|
+
events.push({
|
|
90
|
+
type: EventType.ASSISTANT_TURN,
|
|
91
|
+
messages: [m],
|
|
92
|
+
startIdx: i,
|
|
93
|
+
endIdx: i,
|
|
94
|
+
});
|
|
95
|
+
i++;
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
// Assistant with tool_calls — collect the matching tool result(s)
|
|
99
|
+
// that follow. Tool results may not appear in tool_calls order;
|
|
100
|
+
// we just consume contiguous tool messages until a non-tool
|
|
101
|
+
// appears or the array ends. Real OpenAI/Anthropic tool result
|
|
102
|
+
// sequences are always contiguous and immediate.
|
|
103
|
+
const expected = new Set(m.tool_calls.map((tc) => tc.id));
|
|
104
|
+
const group = [m];
|
|
105
|
+
let j = i + 1;
|
|
106
|
+
while (j < messages.length && messages[j]?.role === "tool") {
|
|
107
|
+
group.push(messages[j]);
|
|
108
|
+
// Don't enforce match strictly — Anthropic-format collapse
|
|
109
|
+
// can produce tool messages with synthesized IDs. Just consume
|
|
110
|
+
// contiguously; consumer of the event can validate IDs if needed.
|
|
111
|
+
j++;
|
|
112
|
+
}
|
|
113
|
+
events.push({
|
|
114
|
+
type: EventType.TOOL_CALL_PAIR,
|
|
115
|
+
messages: group,
|
|
116
|
+
startIdx: i,
|
|
117
|
+
endIdx: j - 1,
|
|
118
|
+
// Diagnostic: did we collect all expected tool results?
|
|
119
|
+
completePair: expected.size === 0 ||
|
|
120
|
+
group.slice(1).every((tm) => expected.has(tm.tool_call_id)),
|
|
121
|
+
});
|
|
122
|
+
i = j;
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (m.role === "tool") {
|
|
127
|
+
// Orphan tool message (no preceding assistant_with_tool_calls).
|
|
128
|
+
// Record as degenerate event so callers can spot + handle.
|
|
129
|
+
events.push({
|
|
130
|
+
type: EventType.TOOL_CALL_PAIR,
|
|
131
|
+
messages: [m],
|
|
132
|
+
startIdx: i,
|
|
133
|
+
endIdx: i,
|
|
134
|
+
completePair: false,
|
|
135
|
+
orphan: true,
|
|
136
|
+
});
|
|
137
|
+
i++;
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Unknown role — pass through as a singleton event with the role
|
|
142
|
+
// as type. Defensive: don't drop.
|
|
143
|
+
events.push({
|
|
144
|
+
type: m.role || "unknown",
|
|
145
|
+
messages: [m],
|
|
146
|
+
startIdx: i,
|
|
147
|
+
endIdx: i,
|
|
148
|
+
});
|
|
149
|
+
i++;
|
|
150
|
+
}
|
|
151
|
+
return events;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Inverse of messagesToEvents. Concatenates each event's messages
|
|
156
|
+
* array. Used by callers that work in the events space and need to
|
|
157
|
+
* hand a flat messages array to the LLM client.
|
|
158
|
+
*
|
|
159
|
+
* @param {Array<object>} events
|
|
160
|
+
* @returns {Array<object>}
|
|
161
|
+
*/
|
|
162
|
+
export function eventsToMessages(events) {
|
|
163
|
+
const out = [];
|
|
164
|
+
for (const ev of events) {
|
|
165
|
+
if (Array.isArray(ev?.messages)) {
|
|
166
|
+
for (const m of ev.messages) out.push(m);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return out;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Find the message index of the first event boundary at or after
|
|
174
|
+
* `desiredSplit` such that splitting there produces two halves where
|
|
175
|
+
* neither half contains a partial event.
|
|
176
|
+
*
|
|
177
|
+
* Used by compact() and windowing as the canonical cut-point chooser.
|
|
178
|
+
* Backwards-compatible drop-in for findSafeSplitPoint (same signature
|
|
179
|
+
* + same return semantics).
|
|
180
|
+
*
|
|
181
|
+
* Algorithm: convert messages to events, find the event whose endIdx
|
|
182
|
+
* is the largest value < desiredSplit (everything up to and including
|
|
183
|
+
* that event goes to "older"). Return that event's endIdx + 1 (the
|
|
184
|
+
* first index of the "recent" half). If desiredSplit is at or before
|
|
185
|
+
* the first event, return 0. If past the last event, return messages.length.
|
|
186
|
+
*
|
|
187
|
+
* @param {Array<object>} messages
|
|
188
|
+
* @param {number} desiredSplit - the cut point you'd take naïvely
|
|
189
|
+
* @returns {number} a cut point that lands on an event boundary
|
|
190
|
+
*/
|
|
191
|
+
export function findEventBoundary(messages, desiredSplit) {
|
|
192
|
+
if (!Array.isArray(messages) || messages.length === 0) return 0;
|
|
193
|
+
const target = Math.max(0, Math.min(desiredSplit, messages.length));
|
|
194
|
+
if (target === 0) return 0;
|
|
195
|
+
if (target >= messages.length) return messages.length;
|
|
196
|
+
|
|
197
|
+
const events = messagesToEvents(messages);
|
|
198
|
+
// Walk forward — find the first event whose startIdx >= target.
|
|
199
|
+
// The cut goes BEFORE that event (so the prior event is intact in
|
|
200
|
+
// the "older" half). If no event satisfies, all events are before
|
|
201
|
+
// target → cut at messages.length.
|
|
202
|
+
for (const ev of events) {
|
|
203
|
+
if (ev.startIdx >= target) return ev.startIdx;
|
|
204
|
+
}
|
|
205
|
+
return messages.length;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Diagnostic: count event types in a messages array. Used by tests
|
|
210
|
+
* and the heap analyzer to surface event-shape statistics.
|
|
211
|
+
*
|
|
212
|
+
* @param {Array<object>} messages
|
|
213
|
+
* @returns {Record<string, number>}
|
|
214
|
+
*/
|
|
215
|
+
export function countEvents(messages) {
|
|
216
|
+
const events = messagesToEvents(messages);
|
|
217
|
+
const counts = {};
|
|
218
|
+
for (const ev of events) {
|
|
219
|
+
counts[ev.type] = (counts[ev.type] || 0) + 1;
|
|
220
|
+
}
|
|
221
|
+
return counts;
|
|
222
|
+
}
|
package/src/agent/llm-client.js
CHANGED
|
@@ -88,6 +88,18 @@ export class LLMClient {
|
|
|
88
88
|
const body = { model, messages, stream };
|
|
89
89
|
if (maxTokens) body.max_tokens = maxTokens;
|
|
90
90
|
if (tools && tools.length > 0) body.tools = tools;
|
|
91
|
+
|
|
92
|
+
// Hybrid reasoning models (GLM-5.1, DeepSeek v4, MiMo v2.5, Qwen3, ...)
|
|
93
|
+
// emit `reasoning_content` by default. KC's history layer doesn't round-
|
|
94
|
+
// trip that field yet, so DeepSeek's strict API rejects subsequent turns
|
|
95
|
+
// ("reasoning_content in the thinking mode must be passed back to the
|
|
96
|
+
// API"). Set KC_DISABLE_THINKING=1 to opt out cleanly — providers that
|
|
97
|
+
// don't recognize the field ignore it. Proper round-trip support is a
|
|
98
|
+
// v0.6.3 item.
|
|
99
|
+
if (process.env.KC_DISABLE_THINKING === "1" || process.env.KC_DISABLE_THINKING === "true") {
|
|
100
|
+
body.thinking = { type: "disabled" };
|
|
101
|
+
}
|
|
102
|
+
|
|
91
103
|
return body;
|
|
92
104
|
}
|
|
93
105
|
|
|
@@ -111,6 +123,18 @@ export class LLMClient {
|
|
|
111
123
|
} else if (msg.role === "assistant" && msg.tool_calls) {
|
|
112
124
|
// Convert OpenAI tool_calls to Anthropic content blocks
|
|
113
125
|
const content = [];
|
|
126
|
+
// v0.7.0 L (#76): replay thinking block FIRST when prior turn
|
|
127
|
+
// produced one. Anthropic strict-mode requires the signature
|
|
128
|
+
// alongside the thinking text — drop either and the API rejects
|
|
129
|
+
// multi-turn. The thinking block belongs at the top of the
|
|
130
|
+
// assistant content array, before text and tool_use blocks.
|
|
131
|
+
if (msg.reasoning_content && msg.reasoning_signature) {
|
|
132
|
+
content.push({
|
|
133
|
+
type: "thinking",
|
|
134
|
+
thinking: msg.reasoning_content,
|
|
135
|
+
signature: msg.reasoning_signature,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
114
138
|
if (msg.content) content.push({ type: "text", text: msg.content });
|
|
115
139
|
for (const tc of msg.tool_calls) {
|
|
116
140
|
let input = {};
|
|
@@ -123,6 +147,16 @@ export class LLMClient {
|
|
|
123
147
|
});
|
|
124
148
|
}
|
|
125
149
|
filteredMessages.push({ role: "assistant", content });
|
|
150
|
+
} else if (msg.role === "assistant" && msg.reasoning_content && msg.reasoning_signature) {
|
|
151
|
+
// v0.7.0 L: assistant turn with thinking but no tool_calls — wrap
|
|
152
|
+
// content as the dual-block form so the thinking block round-trips.
|
|
153
|
+
const content = [{
|
|
154
|
+
type: "thinking",
|
|
155
|
+
thinking: msg.reasoning_content,
|
|
156
|
+
signature: msg.reasoning_signature,
|
|
157
|
+
}];
|
|
158
|
+
if (msg.content) content.push({ type: "text", text: msg.content });
|
|
159
|
+
filteredMessages.push({ role: "assistant", content });
|
|
126
160
|
} else {
|
|
127
161
|
filteredMessages.push(msg);
|
|
128
162
|
}
|
|
@@ -447,6 +481,27 @@ export class LLMClient {
|
|
|
447
481
|
}],
|
|
448
482
|
};
|
|
449
483
|
}
|
|
484
|
+
// v0.7.0 L (#76): Anthropic streams reasoning ("thinking") as its
|
|
485
|
+
// own content block type. Normalize to the same `reasoning_content`
|
|
486
|
+
// field the OpenAI-compatible providers (DeepSeek, GLM-5.1, MiMo)
|
|
487
|
+
// already use, so engine.js's v0.6.3 round-trip path handles it
|
|
488
|
+
// without an Anthropic-specific branch.
|
|
489
|
+
//
|
|
490
|
+
// Anthropic also emits a signature_delta that proves the thinking
|
|
491
|
+
// came from Anthropic's model — the next-turn body MUST include it
|
|
492
|
+
// alongside the thinking text for strict-mode multi-turn to work.
|
|
493
|
+
// We carry it through as `reasoning_signature` (custom field) so
|
|
494
|
+
// _buildAnthropicBody can re-attach it.
|
|
495
|
+
if (delta?.type === "thinking_delta") {
|
|
496
|
+
return {
|
|
497
|
+
choices: [{ delta: { reasoning_content: delta.thinking } }],
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
if (delta?.type === "signature_delta") {
|
|
501
|
+
return {
|
|
502
|
+
choices: [{ delta: { reasoning_signature: delta.signature } }],
|
|
503
|
+
};
|
|
504
|
+
}
|
|
450
505
|
return null;
|
|
451
506
|
}
|
|
452
507
|
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Message-array utilities shared by engine.js (compact) and
|
|
3
|
+
* context-window.js (windowing). Lives in its own module to avoid
|
|
4
|
+
* the circular import that would result if either of those imported
|
|
5
|
+
* from the other.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { findEventBoundary } from "./history/event-history.js";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Find a split point in a message array that won't create orphan tool
|
|
12
|
+
* messages or orphan tool_calls.
|
|
13
|
+
*
|
|
14
|
+
* v0.7.0 E1m (#90): now delegates to findEventBoundary, which operates
|
|
15
|
+
* on derived event boundaries from `history/event-history.js`. The
|
|
16
|
+
* legacy heuristic check (orphan-tool / unpaired-tool_calls walk) is
|
|
17
|
+
* kept as belt-and-braces defense — if the event helper for some
|
|
18
|
+
* reason returns a position that still has a local orphan, the legacy
|
|
19
|
+
* walk forwards past it.
|
|
20
|
+
*
|
|
21
|
+
* Invariant for a clean split at index `s`:
|
|
22
|
+
* - messages[s] is not role:"tool" (would orphan a tool result whose
|
|
23
|
+
* preceding assistant_with_tool_calls
|
|
24
|
+
* got summarized into the older slice)
|
|
25
|
+
* - messages[s-1] is not role:"assistant" with tool_calls (would orphan
|
|
26
|
+
* the tool_calls because their tool results sit at start of recent and
|
|
27
|
+
* the older-side summary breaks the pairing)
|
|
28
|
+
*
|
|
29
|
+
* E2E #5 (2026-04-28) surfaced this: compact() reduced 84 msgs → 12 with
|
|
30
|
+
* msg[2] being an orphan tool message → DeepSeek 400 every subsequent
|
|
31
|
+
* turn. v0.6.3.1 fixed it via the heuristic; v0.7.0 makes the event
|
|
32
|
+
* structure explicit so future event types (sub-agent results, etc.)
|
|
33
|
+
* extend the model rather than the heuristic.
|
|
34
|
+
*
|
|
35
|
+
* @param {Array<object>} messages
|
|
36
|
+
* @param {number} desiredSplit - the split point you'd take naïvely
|
|
37
|
+
* @returns {number} a safe split point ≥ desiredSplit
|
|
38
|
+
*/
|
|
39
|
+
export function findSafeSplitPoint(messages, desiredSplit) {
|
|
40
|
+
// Primary: ask the event helper for the next event boundary at or
|
|
41
|
+
// after desiredSplit. If events are well-formed (which they
|
|
42
|
+
// always are when produced by the engine's own history.addRaw path),
|
|
43
|
+
// this lands on a clean boundary by construction.
|
|
44
|
+
let s = findEventBoundary(messages, Math.max(0, Math.min(desiredSplit, messages.length)));
|
|
45
|
+
|
|
46
|
+
// Defense-in-depth: legacy heuristic walk catches the edge case where
|
|
47
|
+
// the messages array contains a manually-injected orphan (e.g., from
|
|
48
|
+
// a prior buggy compact, or an externally-edited messages.json).
|
|
49
|
+
// Cheap to keep and prevents regressions if a future event-type
|
|
50
|
+
// addition has a bug.
|
|
51
|
+
while (s < messages.length) {
|
|
52
|
+
const recentStart = messages[s];
|
|
53
|
+
const olderEnd = s > 0 ? messages[s - 1] : null;
|
|
54
|
+
const recentStartsWithOrphanTool = recentStart?.role === "tool";
|
|
55
|
+
const olderEndsWithUnpairedToolCalls =
|
|
56
|
+
olderEnd?.role === "assistant" &&
|
|
57
|
+
Array.isArray(olderEnd?.tool_calls) &&
|
|
58
|
+
olderEnd.tool_calls.length > 0;
|
|
59
|
+
if (!recentStartsWithOrphanTool && !olderEndsWithUnpairedToolCalls) return s;
|
|
60
|
+
s++;
|
|
61
|
+
}
|
|
62
|
+
return s;
|
|
63
|
+
}
|