kc-beta 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/context.js +17 -1
- package/src/agent/engine.js +85 -8
- package/src/agent/llm-client.js +24 -1
- package/src/agent/pipelines/_milestone-derive.js +78 -7
- package/src/agent/pipelines/skill-authoring.js +19 -2
- package/src/agent/tools/release.js +94 -1
- package/src/cli/index.js +28 -7
- package/template/.env.template +1 -1
- package/template/AGENT.md +2 -2
- package/template/skills/en/auto-model-selection/SKILL.md +55 -35
- package/template/skills/en/bootstrap-workspace/SKILL.md +13 -0
- package/template/skills/en/compliance-judgment/SKILL.md +14 -0
- package/template/skills/en/confidence-system/SKILL.md +30 -8
- package/template/skills/en/corner-case-management/SKILL.md +53 -33
- package/template/skills/en/cross-document-verification/SKILL.md +88 -83
- package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
- package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
- package/template/skills/en/data-sensibility/SKILL.md +19 -12
- package/template/skills/en/document-chunking/SKILL.md +99 -15
- package/template/skills/en/entity-extraction/SKILL.md +14 -4
- package/template/skills/en/quality-control/SKILL.md +14 -0
- package/template/skills/en/rule-extraction/SKILL.md +92 -94
- package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
- package/template/skills/en/skill-authoring/SKILL.md +52 -8
- package/template/skills/en/skill-creator/SKILL.md +25 -3
- package/template/skills/en/skill-to-workflow/SKILL.md +23 -4
- package/template/skills/en/task-decomposition/SKILL.md +1 -1
- package/template/skills/en/tree-processing/SKILL.md +1 -1
- package/template/skills/en/version-control/SKILL.md +15 -0
- package/template/skills/en/work-decomposition/SKILL.md +21 -35
- package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
- package/template/skills/zh/bootstrap-workspace/SKILL.md +13 -0
- package/template/skills/zh/compliance-judgment/SKILL.md +14 -0
- package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
- package/template/skills/zh/confidence-system/SKILL.md +34 -9
- package/template/skills/zh/corner-case-management/SKILL.md +71 -104
- package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
- package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
- package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
- package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
- package/template/skills/zh/data-sensibility/SKILL.md +13 -0
- package/template/skills/zh/document-chunking/SKILL.md +96 -20
- package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
- package/template/skills/zh/entity-extraction/SKILL.md +14 -4
- package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
- package/template/skills/zh/quality-control/SKILL.md +14 -0
- package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
- package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
- package/template/skills/zh/rule-extraction/SKILL.md +199 -188
- package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
- package/template/skills/zh/skill-authoring/SKILL.md +108 -69
- package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
- package/template/skills/zh/skill-creator/SKILL.md +71 -61
- package/template/skills/zh/skill-creator/references/schemas.md +60 -60
- package/template/skills/zh/skill-to-workflow/SKILL.md +24 -5
- package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
- package/template/skills/zh/task-decomposition/SKILL.md +1 -1
- package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
- package/template/skills/zh/tree-processing/SKILL.md +1 -1
- package/template/skills/zh/version-control/SKILL.md +15 -0
- package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
- package/template/skills/zh/work-decomposition/SKILL.md +21 -33
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "kc-beta",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.3",
|
|
4
4
|
"description": "KC Agent — LLM document verification agent (pure Node.js CLI). Dual-licensed: PolyForm Noncommercial 1.0.0 for personal/noncommercial use; commercial license required for enterprise production. See LICENSE and LICENSE-COMMERCIAL.md.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
package/src/agent/context.js
CHANGED
|
@@ -152,10 +152,26 @@ export class ContextAssembler {
|
|
|
152
152
|
* @param {string} [opts.projectMemory] - v0.7.0 B3: rules/PATTERNS.md
|
|
153
153
|
* content. Capped at ~5 KB by the caller. Surfaced for phases the
|
|
154
154
|
* work-decomposition skill operates in (skill_authoring + skill_testing).
|
|
155
|
+
* @param {string} [opts.marathonGoal] - v0.8.2 P12-A: the active marathon
|
|
156
|
+
* goal text. Pinned at the system-prompt layer (never windowed) for the
|
|
157
|
+
* duration of the marathon session. Surfaced only when marathon mode is
|
|
158
|
+
* active; absent otherwise. Fixes the v0.8.1 regression where the goal
|
|
159
|
+
* user_message got evicted by context_windowed before distillation, so
|
|
160
|
+
* agents reverted to default behavior mid-run.
|
|
155
161
|
* @returns {string}
|
|
156
162
|
*/
|
|
157
|
-
build({ agentMd, pipelineState, workspaceState, skillIndex, projectMemory } = {}) {
|
|
163
|
+
build({ agentMd, pipelineState, workspaceState, skillIndex, projectMemory, marathonGoal } = {}) {
|
|
158
164
|
const parts = [AGENT_IDENTITY];
|
|
165
|
+
if (marathonGoal) {
|
|
166
|
+
parts.push(
|
|
167
|
+
"## Marathon goal (pinned for the duration of this session)\n\n" +
|
|
168
|
+
marathonGoal.trim() + "\n\n" +
|
|
169
|
+
"You are running in marathon mode — no manual user check-ins between " +
|
|
170
|
+
"phases. This goal is your north star; keep returning to it as you " +
|
|
171
|
+
"advance through the pipeline. If a continuation prompt focuses on " +
|
|
172
|
+
"phase mechanics, the goal above tells you *why*.",
|
|
173
|
+
);
|
|
174
|
+
}
|
|
159
175
|
if (agentMd) parts.push(agentMd);
|
|
160
176
|
if (skillIndex) parts.push(skillIndex);
|
|
161
177
|
if (projectMemory) {
|
package/src/agent/engine.js
CHANGED
|
@@ -229,6 +229,17 @@ export class AgentEngine {
|
|
|
229
229
|
// marker, no inbox.jsonl. Driver instance set by enterMarathonMode(),
|
|
230
230
|
// cleared by exitMarathonMode(). Query via this.marathonDriver.
|
|
231
231
|
this.marathonDriver = null;
|
|
232
|
+
// v0.8.2 P12-A: marathon goal text. Pinned at system-prompt level via
|
|
233
|
+
// ContextAssembler so it survives context_windowed eviction (the v0.8.1
|
|
234
|
+
// regression). Stored alongside marathonDriver lifecycle.
|
|
235
|
+
this.marathonGoal = null;
|
|
236
|
+
// v0.8.2 P12-B: shared user-input queue between TUI and engine. The TUI
|
|
237
|
+
// queues mid-run typed messages here; the marathon decision loop drains
|
|
238
|
+
// this queue BEFORE asking the driver for a continuation, so user
|
|
239
|
+
// interrupts always win over driver autonomy. Fixes the v0.8.1 silent
|
|
240
|
+
// queue-starvation where /marathon mode kept the user message in a
|
|
241
|
+
// TUI-local queue that never reached the engine.
|
|
242
|
+
this.inputQueue = [];
|
|
232
243
|
|
|
233
244
|
// Context windowing
|
|
234
245
|
this.contextWindow = new ContextWindow({
|
|
@@ -461,12 +472,16 @@ export class AgentEngine {
|
|
|
461
472
|
} catch { /* never fatal */ }
|
|
462
473
|
};
|
|
463
474
|
|
|
464
|
-
// v0.8 P1-C: self-rescheduling setTimeout instead of setInterval.
|
|
465
|
-
//
|
|
466
|
-
//
|
|
467
|
-
//
|
|
468
|
-
//
|
|
469
|
-
//
|
|
475
|
+
// v0.8 P1-C: self-rescheduling setTimeout instead of setInterval.
|
|
476
|
+
// v0.8.3 P21-B4: removed .unref() — both 资管 + 贷款 v0.8.2 sessions
|
|
477
|
+
// showed only 1 line in heap.jsonl across 7+ hour runs even with
|
|
478
|
+
// self-rescheduling setTimeout. The .unref'd timer was apparently
|
|
479
|
+
// being dropped by Node's event-loop housekeeping despite the
|
|
480
|
+
// process being kept alive by stdin / React render loop / other
|
|
481
|
+
// refs. The cost of dropping .unref() is that on a graceful exit
|
|
482
|
+
// path that doesn't call engine.stop(), the timer can delay exit
|
|
483
|
+
// by up to 60s. We accept this — engine.stop() is the canonical
|
|
484
|
+
// shutdown path and it clears the timer via clearTimeout.
|
|
470
485
|
let timeoutHandle = null;
|
|
471
486
|
const scheduleNext = () => {
|
|
472
487
|
if (stopped) return;
|
|
@@ -474,7 +489,6 @@ export class AgentEngine {
|
|
|
474
489
|
sample();
|
|
475
490
|
scheduleNext();
|
|
476
491
|
}, 60_000);
|
|
477
|
-
timeoutHandle.unref?.();
|
|
478
492
|
};
|
|
479
493
|
|
|
480
494
|
// Record one sample at startup so we have a baseline even on short runs.
|
|
@@ -798,6 +812,7 @@ export class AgentEngine {
|
|
|
798
812
|
pipelineState: this.pipelines[this.currentPhase]?.describeState?.() || null,
|
|
799
813
|
workspaceState: this._buildWorkspaceState(),
|
|
800
814
|
projectMemory: this._readProjectMemory(),
|
|
815
|
+
marathonGoal: this.marathonGoal,
|
|
801
816
|
});
|
|
802
817
|
const systemTokens = estimateTokens(systemPrompt);
|
|
803
818
|
const messageTokens = estimateMessagesTokens(this.history.messages);
|
|
@@ -1239,6 +1254,7 @@ export class AgentEngine {
|
|
|
1239
1254
|
pipelineState,
|
|
1240
1255
|
workspaceState: this._buildWorkspaceState(),
|
|
1241
1256
|
projectMemory: this._readProjectMemory(),
|
|
1257
|
+
marathonGoal: this.marathonGoal,
|
|
1242
1258
|
});
|
|
1243
1259
|
const tools = this.toolRegistry.schemasOpenai();
|
|
1244
1260
|
|
|
@@ -2468,6 +2484,18 @@ export class AgentEngine {
|
|
|
2468
2484
|
// v0.8.0; the I/O wrapper just shifted from filesystem-watcher to
|
|
2469
2485
|
// direct method calls.
|
|
2470
2486
|
while (this.marathonDriver) {
|
|
2487
|
+
// v0.8.2 P12-B: user-input queue priority. Drain queued user messages
|
|
2488
|
+
// FIRST so mid-run nudges always win over driver autonomy. Fixes the
|
|
2489
|
+
// v0.8.1 silent queue-starvation: the TUI used to queue messages in a
|
|
2490
|
+
// local ref that only drained after runTurn() returned, but the
|
|
2491
|
+
// marathon loop never returns while the driver is active. Now the
|
|
2492
|
+
// engine owns the queue; TUI hands off via queueUserInput().
|
|
2493
|
+
const queuedUserInput = this._drainNextQueuedUserInput();
|
|
2494
|
+
if (queuedUserInput) {
|
|
2495
|
+
yield* this.runTurn(queuedUserInput);
|
|
2496
|
+
continue;
|
|
2497
|
+
}
|
|
2498
|
+
|
|
2471
2499
|
const turnsSnapshot = this.marathonDriver.turnsThisPhase;
|
|
2472
2500
|
const phaseChanged = this.currentPhase !== this.marathonDriver.currentPhase;
|
|
2473
2501
|
const milestones = this._buildEngineCountsBlock(this.currentPhase) || {};
|
|
@@ -2485,6 +2513,7 @@ export class AgentEngine {
|
|
|
2485
2513
|
decisions: this.marathonDriver.decisionCount,
|
|
2486
2514
|
});
|
|
2487
2515
|
this.marathonDriver = null;
|
|
2516
|
+
this.marathonGoal = null;
|
|
2488
2517
|
break;
|
|
2489
2518
|
}
|
|
2490
2519
|
this.eventLog.append("marathon_decision", {
|
|
@@ -2493,7 +2522,8 @@ export class AgentEngine {
|
|
|
2493
2522
|
phase: this.currentPhase,
|
|
2494
2523
|
});
|
|
2495
2524
|
yield* this.runTurn(decision.prompt);
|
|
2496
|
-
// Loop back: another turn just completed;
|
|
2525
|
+
// Loop back: another turn just completed; engine queue + driver both
|
|
2526
|
+
// get another chance via the next iteration's drain-then-decide.
|
|
2497
2527
|
}
|
|
2498
2528
|
}
|
|
2499
2529
|
|
|
@@ -2511,6 +2541,7 @@ export class AgentEngine {
|
|
|
2511
2541
|
if (this.marathonDriver) {
|
|
2512
2542
|
throw new Error("Marathon already active — use /marathon off to disengage first");
|
|
2513
2543
|
}
|
|
2544
|
+
this.marathonGoal = goal;
|
|
2514
2545
|
this.marathonDriver = new MarathonDriver({
|
|
2515
2546
|
goal,
|
|
2516
2547
|
language: this.config.language || "en",
|
|
@@ -2534,6 +2565,7 @@ export class AgentEngine {
|
|
|
2534
2565
|
decisions: this.marathonDriver.decisionCount,
|
|
2535
2566
|
});
|
|
2536
2567
|
this.marathonDriver = null;
|
|
2568
|
+
this.marathonGoal = null;
|
|
2537
2569
|
return status;
|
|
2538
2570
|
}
|
|
2539
2571
|
|
|
@@ -2542,6 +2574,51 @@ export class AgentEngine {
|
|
|
2542
2574
|
return !!this.marathonDriver && !this.marathonDriver.stopped;
|
|
2543
2575
|
}
|
|
2544
2576
|
|
|
2577
|
+
/**
|
|
2578
|
+
* v0.8.2 P12-B: queue a user-typed message for the engine to pick up at
|
|
2579
|
+
* the next turn boundary. Called by the TUI when the user types during an
|
|
2580
|
+
* in-flight marathon turn. The marathon decision loop drains this queue
|
|
2581
|
+
* BEFORE asking the driver for a continuation, so user interrupts always
|
|
2582
|
+
* win over driver autonomy.
|
|
2583
|
+
*
|
|
2584
|
+
* @param {string} text — user-typed message
|
|
2585
|
+
*/
|
|
2586
|
+
queueUserInput(text) {
|
|
2587
|
+
if (!text || typeof text !== "string") return;
|
|
2588
|
+
this.inputQueue.push(text);
|
|
2589
|
+
this.eventLog.append("user_input_queued", {
|
|
2590
|
+
preview: text.slice(0, 100),
|
|
2591
|
+
queueDepth: this.inputQueue.length,
|
|
2592
|
+
marathonActive: this.isMarathonActive(),
|
|
2593
|
+
});
|
|
2594
|
+
}
|
|
2595
|
+
|
|
2596
|
+
/**
|
|
2597
|
+
* v0.8.2 P12-B: drain the next queued user input, or null if empty.
|
|
2598
|
+
* Internal helper for the marathon decision loop.
|
|
2599
|
+
*
|
|
2600
|
+
* @returns {string|null}
|
|
2601
|
+
*/
|
|
2602
|
+
_drainNextQueuedUserInput() {
|
|
2603
|
+
if (this.inputQueue.length === 0) return null;
|
|
2604
|
+
const text = this.inputQueue.shift();
|
|
2605
|
+
this.eventLog.append("user_input_drained", {
|
|
2606
|
+
preview: text.slice(0, 100),
|
|
2607
|
+
queueDepth: this.inputQueue.length,
|
|
2608
|
+
});
|
|
2609
|
+
return text;
|
|
2610
|
+
}
|
|
2611
|
+
|
|
2612
|
+
/**
|
|
2613
|
+
* v0.8.2 P12-B: query the queue depth without draining.
|
|
2614
|
+
* Used by TUI to display "Queued (N waiting)" indicator.
|
|
2615
|
+
*
|
|
2616
|
+
* @returns {number}
|
|
2617
|
+
*/
|
|
2618
|
+
getQueueDepth() {
|
|
2619
|
+
return this.inputQueue.length;
|
|
2620
|
+
}
|
|
2621
|
+
|
|
2545
2622
|
/**
|
|
2546
2623
|
* B1: Parallel ralph-loop — N concurrent subagents each executing one
|
|
2547
2624
|
* task at a time, claimed atomically from TaskManager.
|
package/src/agent/llm-client.js
CHANGED
|
@@ -32,6 +32,16 @@ export class LLMClient {
|
|
|
32
32
|
this.baseUrl = baseUrl.replace(/\/+$/, "");
|
|
33
33
|
this.authType = authType;
|
|
34
34
|
this.apiFormat = apiFormat;
|
|
35
|
+
// v0.8.2 P14-A: request-level timeout for fetch. SiliconFlow GLM-5.1
|
|
36
|
+
// streams hung 8h+ overnight in E2E #12 with no HTTP-level cutoff.
|
|
37
|
+
// 10 min ceiling (configurable via KC_LLM_REQUEST_TIMEOUT_MS) lets the
|
|
38
|
+
// marathon driver's `error: terminated` → recovery path kick in within
|
|
39
|
+
// minutes instead of hours when the upstream stalls a request without
|
|
40
|
+
// closing the TCP connection.
|
|
41
|
+
const envTimeout = parseInt(process.env.KC_LLM_REQUEST_TIMEOUT_MS || "0", 10);
|
|
42
|
+
this.requestTimeoutMs = Number.isFinite(envTimeout) && envTimeout > 0
|
|
43
|
+
? envTimeout
|
|
44
|
+
: 10 * 60 * 1000;
|
|
35
45
|
}
|
|
36
46
|
|
|
37
47
|
/**
|
|
@@ -196,10 +206,15 @@ export class LLMClient {
|
|
|
196
206
|
let resp;
|
|
197
207
|
try {
|
|
198
208
|
resp = await withRetry(async () => {
|
|
209
|
+
// v0.8.2 P14-A: AbortSignal.timeout for stream connect + per-chunk
|
|
210
|
+
// forward progress. Hung streams (SiliconFlow GLM-5.1 overnight,
|
|
211
|
+
// E2E #12) abort within requestTimeoutMs and surface as an error
|
|
212
|
+
// event the marathon driver can recover from.
|
|
199
213
|
const r = await fetch(this._getEndpoint(), {
|
|
200
214
|
method: "POST",
|
|
201
215
|
headers: this._buildHeaders(),
|
|
202
216
|
body: JSON.stringify(body),
|
|
217
|
+
signal: AbortSignal.timeout(this.requestTimeoutMs),
|
|
203
218
|
});
|
|
204
219
|
if (!r.ok) {
|
|
205
220
|
const text = await r.text();
|
|
@@ -215,7 +230,13 @@ export class LLMClient {
|
|
|
215
230
|
// A8: Any pre-stream failure (network, auth, 4xx/5xx after retry) is
|
|
216
231
|
// tagged and re-thrown. Engine's outer catch sees exactly one tagged
|
|
217
232
|
// error event.
|
|
218
|
-
|
|
233
|
+
// v0.8.2 P14-A: AbortError from AbortSignal.timeout marks request_timeout
|
|
234
|
+
// distinctly so audits can count these vs. generic connect errors.
|
|
235
|
+
if (err.name === "TimeoutError" || err.name === "AbortError") {
|
|
236
|
+
err.streamTermination = "request_timeout";
|
|
237
|
+
} else if (!err.streamTermination) {
|
|
238
|
+
err.streamTermination = "connect_error";
|
|
239
|
+
}
|
|
219
240
|
throw err;
|
|
220
241
|
}
|
|
221
242
|
|
|
@@ -256,10 +277,12 @@ export class LLMClient {
|
|
|
256
277
|
const body = this._buildNonStreamBody({ model, messages, maxTokens });
|
|
257
278
|
|
|
258
279
|
const resp = await withRetry(async () => {
|
|
280
|
+
// v0.8.2 P14-A: same request-level timeout as streamChat for symmetry.
|
|
259
281
|
const r = await fetch(this._getEndpoint(), {
|
|
260
282
|
method: "POST",
|
|
261
283
|
headers: this._buildHeaders(),
|
|
262
284
|
body: JSON.stringify(body),
|
|
285
|
+
signal: AbortSignal.timeout(this.requestTimeoutMs),
|
|
263
286
|
});
|
|
264
287
|
if (!r.ok) {
|
|
265
288
|
const text = await r.text();
|
|
@@ -156,13 +156,33 @@ function sha256OfFile(p) {
|
|
|
156
156
|
} catch { return null; }
|
|
157
157
|
}
|
|
158
158
|
|
|
159
|
-
// Normalize a rule id
|
|
159
|
+
// Normalize a rule id to a canonical form for dedup + comparison.
|
|
160
|
+
// Accepts two shapes:
|
|
161
|
+
// Bare-numeric: "R14" / "r014" / "R0014" → "R014"
|
|
162
|
+
// Compound: "R01-01" / "R01_01" / "R001-005" → "R001-005"
|
|
163
|
+
// (zero-pads the major part to 3 digits; preserves the
|
|
164
|
+
// minor part numerically; uses dash separator canonically)
|
|
160
165
|
// Returns null for non-matching strings (e.g., thematic skill names like
|
|
161
|
-
// "account_identity" — those stay as-is
|
|
162
|
-
|
|
166
|
+
// "account_identity" — those stay as-is and don't get credited via this
|
|
167
|
+
// path; their credit comes from frontmatter `source_rules:` instead).
|
|
168
|
+
//
|
|
169
|
+
// v0.8.3 P20-B2: compound form added. E2E #13 资管 used `R01-01`..`R07-01`
|
|
170
|
+
// naturally following the regulation's subsection numbering; v0.8.2's
|
|
171
|
+
// bare-only regex returned null for all 15 dirs → `rulesCovered: 0/15`
|
|
172
|
+
// → engine refused natural skill_testing advance.
|
|
173
|
+
export function canonicalRuleId(s) {
|
|
163
174
|
if (typeof s !== "string") return null;
|
|
164
|
-
const
|
|
165
|
-
|
|
175
|
+
const trimmed = s.trim();
|
|
176
|
+
// Compound form: R01-01, R01_01, R001-005, etc.
|
|
177
|
+
const compound = trimmed.match(/^R0*(\d+)[-_](\d+)$/i);
|
|
178
|
+
if (compound) {
|
|
179
|
+
const major = String(parseInt(compound[1], 10)).padStart(3, "0");
|
|
180
|
+
const minor = String(parseInt(compound[2], 10)).padStart(2, "0");
|
|
181
|
+
return `R${major}-${minor}`;
|
|
182
|
+
}
|
|
183
|
+
// Bare-numeric form
|
|
184
|
+
const bare = trimmed.match(/^R0*(\d+)$/i);
|
|
185
|
+
if (bare) return `R${String(parseInt(bare[1], 10)).padStart(3, "0")}`;
|
|
166
186
|
return null;
|
|
167
187
|
}
|
|
168
188
|
|
|
@@ -193,9 +213,16 @@ export function deriveRuleExtractionMilestones(workspace) {
|
|
|
193
213
|
|
|
194
214
|
// rulesExtracted: every rule object across every JSON file in rules/
|
|
195
215
|
// that has a non-empty `id` field. catalog.json is canonical but agents
|
|
196
|
-
// sometimes fan out to per-rule files (E2E #5 DS)
|
|
216
|
+
// sometimes fan out to per-rule files (E2E #5 DS) — or write SIBLING
|
|
217
|
+
// files with the same IDs plus additional metadata (E2E #13 资管's
|
|
218
|
+
// `rules/difficulty.json` added judgment-type classifications and
|
|
219
|
+
// doubled the count from 15 → 30 because the engine pushed IDs without
|
|
220
|
+
// dedup). v0.8.3 P20-B1: dedup by ID across all rules/*.json files.
|
|
221
|
+
// First-seen wins for chunk-ref counting (catalog.json is read first
|
|
222
|
+
// by alphabetical / fs order in most cases).
|
|
197
223
|
const rulesExtracted = [];
|
|
198
224
|
const rulesWithChunkRefs = [];
|
|
225
|
+
const seenIds = new Set();
|
|
199
226
|
if (dirExists(rulesDir)) {
|
|
200
227
|
for (const e of listChildFiles(rulesDir)) {
|
|
201
228
|
if (!e.name.endsWith(".json")) continue;
|
|
@@ -204,8 +231,21 @@ export function deriveRuleExtractionMilestones(workspace) {
|
|
|
204
231
|
const items = Array.isArray(data) ? data : (data.rules || []);
|
|
205
232
|
for (const r of items) {
|
|
206
233
|
if (r && typeof r.id === "string" && r.id.length) {
|
|
234
|
+
if (seenIds.has(r.id)) continue; // v0.8.3 P20-B1 dedup
|
|
235
|
+
seenIds.add(r.id);
|
|
207
236
|
rulesExtracted.push(r.id);
|
|
208
|
-
|
|
237
|
+
// v0.8.2 P13-C: accept any of three field names for chunk
|
|
238
|
+
// references. Engine historically looked only for
|
|
239
|
+
// `source_chunk_ids`, but 贷款 v0.8.1 + 资管 v0.8.1 catalogs
|
|
240
|
+
// wrote `chunk_ids` (the shorter form agents naturally pick
|
|
241
|
+
// from the rule-extraction skill examples). `chunk_refs` is
|
|
242
|
+
// a legacy alias from older audit docs. Any non-empty match
|
|
243
|
+
// counts.
|
|
244
|
+
const chunks = (Array.isArray(r.source_chunk_ids) && r.source_chunk_ids)
|
|
245
|
+
|| (Array.isArray(r.chunk_ids) && r.chunk_ids)
|
|
246
|
+
|| (Array.isArray(r.chunk_refs) && r.chunk_refs)
|
|
247
|
+
|| null;
|
|
248
|
+
if (chunks && chunks.length > 0) {
|
|
209
249
|
rulesWithChunkRefs.push(r.id);
|
|
210
250
|
}
|
|
211
251
|
}
|
|
@@ -331,6 +371,37 @@ export function deriveSkillAuthoringMilestones(workspace) {
|
|
|
331
371
|
}
|
|
332
372
|
} catch { /* best-effort */ }
|
|
333
373
|
}
|
|
374
|
+
|
|
375
|
+
// v0.8.2 P13-D: also credit rule_ids declared in rule_mapping.json.
|
|
376
|
+
// 资管 v0.8.1 wrote 6 thematic-overlay dirs (R01_periodic_report,
|
|
377
|
+
// R02_custodian_core, etc.) each containing a rule_mapping.json that
|
|
378
|
+
// maps rule_ids to engine-level check function names. The dirs have
|
|
379
|
+
// no own check.py because the actual implementation lives in
|
|
380
|
+
// workspace-root verify_v*.py. Without recognizing rule_mapping.json,
|
|
381
|
+
// the engine treats them as orphan dirs.
|
|
382
|
+
//
|
|
383
|
+
// Rule-id formats in the wild include both bare-numeric (R01, R027)
|
|
384
|
+
// and compound (R01-05, R02-08). canonicalRuleId() only handles the
|
|
385
|
+
// bare form, so we accept either canonicalized form OR a raw key
|
|
386
|
+
// that looks like a rule id (matches R\d+ optionally followed by
|
|
387
|
+
// `-` or `_` and more digits).
|
|
388
|
+
try {
|
|
389
|
+
const mappingPath = path.join(skillPath, "rule_mapping.json");
|
|
390
|
+
if (fileExists(mappingPath)) {
|
|
391
|
+
const mapping = readJsonSafe(mappingPath);
|
|
392
|
+
if (mapping && typeof mapping === "object" && !Array.isArray(mapping)) {
|
|
393
|
+
for (const key of Object.keys(mapping)) {
|
|
394
|
+
const canon = canonicalRuleId(key);
|
|
395
|
+
if (canon) {
|
|
396
|
+
ruleIdsCovered.add(canon);
|
|
397
|
+
} else if (/^R0*\d+[-_]?\d*$/i.test(key.trim())) {
|
|
398
|
+
// Compound form like "R01-05" — preserve as-is
|
|
399
|
+
ruleIdsCovered.add(key.trim());
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
} catch { /* best-effort */ }
|
|
334
405
|
}
|
|
335
406
|
|
|
336
407
|
// v0.8 P2-F (item 22): count stub-shaped check.py files. Pairs with
|
|
@@ -3,7 +3,7 @@ import path from "node:path";
|
|
|
3
3
|
import { Phase, PipelineEvent } from "./index.js";
|
|
4
4
|
import { Pipeline } from "./base.js";
|
|
5
5
|
import { SkillValidator } from "../skill-validator.js";
|
|
6
|
-
import { deriveSkillAuthoringMilestones } from "./_milestone-derive.js";
|
|
6
|
+
import { deriveSkillAuthoringMilestones, canonicalRuleId } from "./_milestone-derive.js";
|
|
7
7
|
|
|
8
8
|
export class SkillAuthoringPipeline extends Pipeline {
|
|
9
9
|
/**
|
|
@@ -37,14 +37,31 @@ export class SkillAuthoringPipeline extends Pipeline {
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
_loadRules() {
|
|
40
|
+
// v0.8.3 P20-B1+B2: dedup rule IDs across all rules/*.json files AND
|
|
41
|
+
// canonicalize them so the rulesCovered comparison against
|
|
42
|
+
// ruleIdsCovered (which now goes through canonicalRuleId) works for
|
|
43
|
+
// BOTH bare-numeric (R14) AND compound (R01-01, R02-03) forms.
|
|
44
|
+
// E2E #13 资管 used compound IDs + wrote a sibling difficulty.json;
|
|
45
|
+
// the raw-string + no-dedup pre-v0.8.3 path produced rulesCovered:
|
|
46
|
+
// 0/30 (compound IDs unmatched + double-counted).
|
|
40
47
|
this.totalRules = [];
|
|
48
|
+
const seen = new Set();
|
|
41
49
|
const rulesDir = path.join(this._workspace.cwd, "rules");
|
|
42
50
|
if (!fs.existsSync(rulesDir)) return;
|
|
43
51
|
for (const f of fs.readdirSync(rulesDir).filter((f) => f.endsWith(".json"))) {
|
|
44
52
|
try {
|
|
45
53
|
const data = JSON.parse(fs.readFileSync(path.join(rulesDir, f), "utf-8"));
|
|
46
54
|
const rules = Array.isArray(data) ? data : (data.rules || []);
|
|
47
|
-
for (const r of rules) {
|
|
55
|
+
for (const r of rules) {
|
|
56
|
+
if (!r || !r.id) continue;
|
|
57
|
+
// Canonicalize to match ruleIdsCovered which is built from
|
|
58
|
+
// canonicalRuleId() output. If canonicalRuleId returns null
|
|
59
|
+
// (non-rule-shaped string), preserve the raw trimmed string.
|
|
60
|
+
const canon = canonicalRuleId(r.id) || String(r.id).trim();
|
|
61
|
+
if (seen.has(canon)) continue;
|
|
62
|
+
seen.add(canon);
|
|
63
|
+
this.totalRules.push(canon);
|
|
64
|
+
}
|
|
48
65
|
} catch { /* skip */ }
|
|
49
66
|
}
|
|
50
67
|
}
|
|
@@ -763,7 +763,100 @@ export class ReleaseTool extends BaseTool {
|
|
|
763
763
|
}
|
|
764
764
|
}
|
|
765
765
|
|
|
766
|
-
// 5)
|
|
766
|
+
// 5) v0.8.2 P13-A: doc-keyed → rules-keyed nested shape.
|
|
767
|
+
// 贷款 v0.8.1 wrote skill_test_v*_results.json + v2_hybrid_results.json
|
|
768
|
+
// + run_all_checks.json all with this shape:
|
|
769
|
+
// {
|
|
770
|
+
// "<doc_filename>": {
|
|
771
|
+
// "channel": "...", "expected": "PASS"|"FAIL",
|
|
772
|
+
// "rules": {
|
|
773
|
+
// "R01": {"rule_id": "R01", "verdict": "PASS", "confidence": 0.95, "method": "regex"},
|
|
774
|
+
// "R02": {...}
|
|
775
|
+
// }
|
|
776
|
+
// },
|
|
777
|
+
// ...
|
|
778
|
+
// }
|
|
779
|
+
// The optional outer "results" wrapper from v2_full_regression.json
|
|
780
|
+
// (which nests this further) is unwrapped via d.results || d.
|
|
781
|
+
if (tally.size === 0) {
|
|
782
|
+
for (const f of files) {
|
|
783
|
+
if (!/qc|verdict|result|test/i.test(f.name)) continue;
|
|
784
|
+
try {
|
|
785
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
786
|
+
const root = d?.results || d;
|
|
787
|
+
if (!root || typeof root !== "object" || Array.isArray(root)) continue;
|
|
788
|
+
let matched = false;
|
|
789
|
+
for (const docKey of Object.keys(root)) {
|
|
790
|
+
const docEntry = root[docKey];
|
|
791
|
+
if (!docEntry || typeof docEntry !== "object") continue;
|
|
792
|
+
const rulesMap = docEntry.rules;
|
|
793
|
+
if (!rulesMap || typeof rulesMap !== "object" || Array.isArray(rulesMap)) continue;
|
|
794
|
+
for (const rid of Object.keys(rulesMap)) {
|
|
795
|
+
if (!isRuleId(rid)) continue;
|
|
796
|
+
const r = rulesMap[rid];
|
|
797
|
+
if (!r || typeof r !== "object") continue;
|
|
798
|
+
const verdict = (r.verdict || r.result_type || r.status || "").toString().toUpperCase();
|
|
799
|
+
if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
|
|
800
|
+
else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
|
|
801
|
+
else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
if (matched) {
|
|
805
|
+
sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
806
|
+
break;
|
|
807
|
+
}
|
|
808
|
+
} catch { /* skip non-JSON */ }
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
// 6) v0.8.3 P22-B6: top-level array of {doc_id, results: [{rule_id, status}]}.
|
|
813
|
+
// 资管 v0.8.2 wrote `output/skill_test_v*.json` + `workflow_v*_results.json`
|
|
814
|
+
// + `evolution_round*.json` all with this shape:
|
|
815
|
+
// [
|
|
816
|
+
// {
|
|
817
|
+
// "doc_id": "<doc-filename>",
|
|
818
|
+
// "results": [
|
|
819
|
+
// {"rule_id": "R01-01", "status": "WARNING", "found_fields": {...}},
|
|
820
|
+
// {"rule_id": "R01-02", "status": "PASS", ...},
|
|
821
|
+
// ...
|
|
822
|
+
// ]
|
|
823
|
+
// },
|
|
824
|
+
// ...
|
|
825
|
+
// ]
|
|
826
|
+
// Distinct from Shape 5: top-level is an ARRAY (not object), and the
|
|
827
|
+
// per-rule data lives in `results: [...]` (an array of rule outcomes)
|
|
828
|
+
// rather than `rules: {<rule>: ...}` (object keyed by rule).
|
|
829
|
+
if (tally.size === 0) {
|
|
830
|
+
for (const f of files) {
|
|
831
|
+
if (!/qc|verdict|result|test|evolution|workflow/i.test(f.name)) continue;
|
|
832
|
+
try {
|
|
833
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
834
|
+
if (!Array.isArray(d)) continue;
|
|
835
|
+
let matched = false;
|
|
836
|
+
for (const docEntry of d) {
|
|
837
|
+
if (!docEntry || typeof docEntry !== "object") continue;
|
|
838
|
+
const results = docEntry.results;
|
|
839
|
+
if (!Array.isArray(results)) continue;
|
|
840
|
+
for (const r of results) {
|
|
841
|
+
if (!r || typeof r !== "object") continue;
|
|
842
|
+
const rid = r.rule_id || r.ruleId || r.id;
|
|
843
|
+
if (!isRuleId(rid)) continue;
|
|
844
|
+
const verdict = (r.status || r.verdict || r.result_type || "").toString().toUpperCase();
|
|
845
|
+
if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
|
|
846
|
+
else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
|
|
847
|
+
else if (verdict === "WARNING") { bump(rid, "pass"); matched = true; } // WARNING counts as pass (per existing shape conventions)
|
|
848
|
+
else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
if (matched) {
|
|
852
|
+
sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
853
|
+
break;
|
|
854
|
+
}
|
|
855
|
+
} catch { /* skip non-JSON */ }
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
// 7) Fallback (belt-and-suspenders per v0.8 plan Risk #7):
|
|
767
860
|
// walk any output/*.json with a top-level rule_id-keyed shape that has
|
|
768
861
|
// verdict-like leaf objects. Catches future schema drift before the
|
|
769
862
|
// next audit cycle.
|
package/src/cli/index.js
CHANGED
|
@@ -716,12 +716,27 @@ function App({ engine, config }) {
|
|
|
716
716
|
}
|
|
717
717
|
|
|
718
718
|
if (streamingRef.current) {
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
719
|
+
// v0.8.2 P12-B: in marathon mode, hand off to engine's input queue
|
|
720
|
+
// instead of the TUI-local queueRef. The engine's marathon decision
|
|
721
|
+
// loop drains it FIRST at each turn boundary, so the user's nudge
|
|
722
|
+
// wins over the driver's continuation. Outside marathon, keep the
|
|
723
|
+
// existing TUI-local queue (drained after runTurn returns).
|
|
724
|
+
const marathonActive = engineRef.current?.isMarathonActive?.() ?? false;
|
|
725
|
+
if (marathonActive && engineRef.current?.queueUserInput) {
|
|
726
|
+
engineRef.current.queueUserInput(trimmed);
|
|
727
|
+
const depth = engineRef.current.getQueueDepth?.() ?? 1;
|
|
728
|
+
addMessage({
|
|
729
|
+
role: "system",
|
|
730
|
+
content: `⏳ Queued for marathon (${depth} waiting). Will be sent before the next driver continuation.`,
|
|
731
|
+
});
|
|
732
|
+
} else {
|
|
733
|
+
queueRef.current.push(trimmed);
|
|
734
|
+
setQueueSize(queueRef.current.length); // F2
|
|
735
|
+
addMessage({
|
|
736
|
+
role: "system",
|
|
737
|
+
content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
|
|
738
|
+
});
|
|
739
|
+
}
|
|
725
740
|
} else {
|
|
726
741
|
runTurn(trimmed);
|
|
727
742
|
}
|
|
@@ -850,9 +865,15 @@ export async function main({ languageOverride } = {}) {
|
|
|
850
865
|
// Capture user's project directory (CWD at launch)
|
|
851
866
|
config.projectDir = process.cwd();
|
|
852
867
|
|
|
853
|
-
// Session-only language override (does NOT persist to config)
|
|
868
|
+
// Session-only language override (does NOT persist to config).
|
|
869
|
+
// v0.8.3 P20-B3 (Task #218): also set process.env.LANGUAGE so the
|
|
870
|
+
// engine's _overlayWorkspaceEnv() penvWon check honors the CLI flag.
|
|
871
|
+
// Pre-v0.8.3, workspace .env LANGUAGE=en would overwrite a CLI --zh
|
|
872
|
+
// override during engine construction because the overlay only
|
|
873
|
+
// checked process.env, not in-memory config.language.
|
|
854
874
|
if (languageOverride) {
|
|
855
875
|
config.language = languageOverride;
|
|
876
|
+
process.env.LANGUAGE = languageOverride;
|
|
856
877
|
}
|
|
857
878
|
|
|
858
879
|
if (!config.llmApiKey) {
|
package/template/.env.template
CHANGED
package/template/AGENT.md
CHANGED
|
@@ -15,7 +15,7 @@ update as you learn about this specific business scenario.
|
|
|
15
15
|
|
|
16
16
|
---
|
|
17
17
|
|
|
18
|
-
# KC
|
|
18
|
+
# KC — Document Verification Workspace
|
|
19
19
|
|
|
20
20
|
## What This Workspace Is
|
|
21
21
|
|
|
@@ -93,7 +93,7 @@ The skill body is the methodology. Skills convey philosophy and decision framewo
|
|
|
93
93
|
|
|
94
94
|
---
|
|
95
95
|
|
|
96
|
-
# KC
|
|
96
|
+
# KC — 文档核查工作区
|
|
97
97
|
|
|
98
98
|
> **技能优先级**: meta-meta 技能是架构层面 —— 当指导冲突时,
|
|
99
99
|
> meta-meta 凌驾于 meta (技法层面) 之上。架构师的框架约束技法。
|