kc-beta 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/package.json +1 -1
  2. package/src/agent/context.js +17 -1
  3. package/src/agent/engine.js +85 -8
  4. package/src/agent/llm-client.js +24 -1
  5. package/src/agent/pipelines/_milestone-derive.js +78 -7
  6. package/src/agent/pipelines/skill-authoring.js +19 -2
  7. package/src/agent/tools/release.js +94 -1
  8. package/src/cli/index.js +28 -7
  9. package/template/.env.template +1 -1
  10. package/template/AGENT.md +2 -2
  11. package/template/skills/en/auto-model-selection/SKILL.md +55 -35
  12. package/template/skills/en/bootstrap-workspace/SKILL.md +13 -0
  13. package/template/skills/en/compliance-judgment/SKILL.md +14 -0
  14. package/template/skills/en/confidence-system/SKILL.md +30 -8
  15. package/template/skills/en/corner-case-management/SKILL.md +53 -33
  16. package/template/skills/en/cross-document-verification/SKILL.md +88 -83
  17. package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
  18. package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
  19. package/template/skills/en/data-sensibility/SKILL.md +19 -12
  20. package/template/skills/en/document-chunking/SKILL.md +99 -15
  21. package/template/skills/en/entity-extraction/SKILL.md +14 -4
  22. package/template/skills/en/quality-control/SKILL.md +14 -0
  23. package/template/skills/en/rule-extraction/SKILL.md +92 -94
  24. package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
  25. package/template/skills/en/skill-authoring/SKILL.md +52 -8
  26. package/template/skills/en/skill-creator/SKILL.md +25 -3
  27. package/template/skills/en/skill-to-workflow/SKILL.md +23 -4
  28. package/template/skills/en/task-decomposition/SKILL.md +1 -1
  29. package/template/skills/en/tree-processing/SKILL.md +1 -1
  30. package/template/skills/en/version-control/SKILL.md +15 -0
  31. package/template/skills/en/work-decomposition/SKILL.md +21 -35
  32. package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
  33. package/template/skills/zh/bootstrap-workspace/SKILL.md +13 -0
  34. package/template/skills/zh/compliance-judgment/SKILL.md +14 -0
  35. package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
  36. package/template/skills/zh/confidence-system/SKILL.md +34 -9
  37. package/template/skills/zh/corner-case-management/SKILL.md +71 -104
  38. package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
  39. package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
  40. package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
  41. package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
  42. package/template/skills/zh/data-sensibility/SKILL.md +13 -0
  43. package/template/skills/zh/document-chunking/SKILL.md +96 -20
  44. package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
  45. package/template/skills/zh/entity-extraction/SKILL.md +14 -4
  46. package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
  47. package/template/skills/zh/quality-control/SKILL.md +14 -0
  48. package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
  49. package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
  50. package/template/skills/zh/rule-extraction/SKILL.md +199 -188
  51. package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
  52. package/template/skills/zh/skill-authoring/SKILL.md +108 -69
  53. package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
  54. package/template/skills/zh/skill-creator/SKILL.md +71 -61
  55. package/template/skills/zh/skill-creator/references/schemas.md +60 -60
  56. package/template/skills/zh/skill-to-workflow/SKILL.md +24 -5
  57. package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
  58. package/template/skills/zh/task-decomposition/SKILL.md +1 -1
  59. package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
  60. package/template/skills/zh/tree-processing/SKILL.md +1 -1
  61. package/template/skills/zh/version-control/SKILL.md +15 -0
  62. package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
  63. package/template/skills/zh/work-decomposition/SKILL.md +21 -33
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kc-beta",
3
- "version": "0.8.1",
3
+ "version": "0.8.3",
4
4
  "description": "KC Agent — LLM document verification agent (pure Node.js CLI). Dual-licensed: PolyForm Noncommercial 1.0.0 for personal/noncommercial use; commercial license required for enterprise production. See LICENSE and LICENSE-COMMERCIAL.md.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -152,10 +152,26 @@ export class ContextAssembler {
152
152
  * @param {string} [opts.projectMemory] - v0.7.0 B3: rules/PATTERNS.md
153
153
  * content. Capped at ~5 KB by the caller. Surfaced for phases the
154
154
  * work-decomposition skill operates in (skill_authoring + skill_testing).
155
+ * @param {string} [opts.marathonGoal] - v0.8.2 P12-A: the active marathon
156
+ * goal text. Pinned at the system-prompt layer (never windowed) for the
157
+ * duration of the marathon session. Surfaced only when marathon mode is
158
+ * active; absent otherwise. Fixes the v0.8.1 regression where the goal
159
+ * user_message got evicted by context_windowed before distillation, so
160
+ * agents reverted to default behavior mid-run.
155
161
  * @returns {string}
156
162
  */
157
- build({ agentMd, pipelineState, workspaceState, skillIndex, projectMemory } = {}) {
163
+ build({ agentMd, pipelineState, workspaceState, skillIndex, projectMemory, marathonGoal } = {}) {
158
164
  const parts = [AGENT_IDENTITY];
165
+ if (marathonGoal) {
166
+ parts.push(
167
+ "## Marathon goal (pinned for the duration of this session)\n\n" +
168
+ marathonGoal.trim() + "\n\n" +
169
+ "You are running in marathon mode — no manual user check-ins between " +
170
+ "phases. This goal is your north star; keep returning to it as you " +
171
+ "advance through the pipeline. If a continuation prompt focuses on " +
172
+ "phase mechanics, the goal above tells you *why*.",
173
+ );
174
+ }
159
175
  if (agentMd) parts.push(agentMd);
160
176
  if (skillIndex) parts.push(skillIndex);
161
177
  if (projectMemory) {
@@ -229,6 +229,17 @@ export class AgentEngine {
229
229
  // marker, no inbox.jsonl. Driver instance set by enterMarathonMode(),
230
230
  // cleared by exitMarathonMode(). Query via this.marathonDriver.
231
231
  this.marathonDriver = null;
232
+ // v0.8.2 P12-A: marathon goal text. Pinned at system-prompt level via
233
+ // ContextAssembler so it survives context_windowed eviction (the v0.8.1
234
+ // regression). Stored alongside marathonDriver lifecycle.
235
+ this.marathonGoal = null;
236
+ // v0.8.2 P12-B: shared user-input queue between TUI and engine. The TUI
237
+ // queues mid-run typed messages here; the marathon decision loop drains
238
+ // this queue BEFORE asking the driver for a continuation, so user
239
+ // interrupts always win over driver autonomy. Fixes the v0.8.1 silent
240
+ // queue-starvation where /marathon mode kept the user message in a
241
+ // TUI-local queue that never reached the engine.
242
+ this.inputQueue = [];
232
243
 
233
244
  // Context windowing
234
245
  this.contextWindow = new ContextWindow({
@@ -461,12 +472,16 @@ export class AgentEngine {
461
472
  } catch { /* never fatal */ }
462
473
  };
463
474
 
464
- // v0.8 P1-C: self-rescheduling setTimeout instead of setInterval. The
465
- // 资管 v0.7.5 session shows only 2 heap.jsonl entries (12:39:40 start
466
- // + 12:40:40 first tick) across an 18-hour run the unref'd
467
- // setInterval was somehow dropped between event-loop idle phases.
468
- // setTimeout reschedules from inside the sample callback, so the
469
- // timer is re-registered every tick. unref'd so we don't block exit.
475
+ // v0.8 P1-C: self-rescheduling setTimeout instead of setInterval.
476
+ // v0.8.3 P21-B4: removed .unref() both 资管 + 贷款 v0.8.2 sessions
477
+ // showed only 1 line in heap.jsonl across 7+ hour runs even with
478
+ // self-rescheduling setTimeout. The .unref'd timer was apparently
479
+ // being dropped by Node's event-loop housekeeping despite the
480
+ // process being kept alive by stdin / React render loop / other
481
+ // refs. The cost of dropping .unref() is that on a graceful exit
482
+ // path that doesn't call engine.stop(), the timer can delay exit
483
+ // by up to 60s. We accept this — engine.stop() is the canonical
484
+ // shutdown path and it clears the timer via clearTimeout.
470
485
  let timeoutHandle = null;
471
486
  const scheduleNext = () => {
472
487
  if (stopped) return;
@@ -474,7 +489,6 @@ export class AgentEngine {
474
489
  sample();
475
490
  scheduleNext();
476
491
  }, 60_000);
477
- timeoutHandle.unref?.();
478
492
  };
479
493
 
480
494
  // Record one sample at startup so we have a baseline even on short runs.
@@ -798,6 +812,7 @@ export class AgentEngine {
798
812
  pipelineState: this.pipelines[this.currentPhase]?.describeState?.() || null,
799
813
  workspaceState: this._buildWorkspaceState(),
800
814
  projectMemory: this._readProjectMemory(),
815
+ marathonGoal: this.marathonGoal,
801
816
  });
802
817
  const systemTokens = estimateTokens(systemPrompt);
803
818
  const messageTokens = estimateMessagesTokens(this.history.messages);
@@ -1239,6 +1254,7 @@ export class AgentEngine {
1239
1254
  pipelineState,
1240
1255
  workspaceState: this._buildWorkspaceState(),
1241
1256
  projectMemory: this._readProjectMemory(),
1257
+ marathonGoal: this.marathonGoal,
1242
1258
  });
1243
1259
  const tools = this.toolRegistry.schemasOpenai();
1244
1260
 
@@ -2468,6 +2484,18 @@ export class AgentEngine {
2468
2484
  // v0.8.0; the I/O wrapper just shifted from filesystem-watcher to
2469
2485
  // direct method calls.
2470
2486
  while (this.marathonDriver) {
2487
+ // v0.8.2 P12-B: user-input queue priority. Drain queued user messages
2488
+ // FIRST so mid-run nudges always win over driver autonomy. Fixes the
2489
+ // v0.8.1 silent queue-starvation: the TUI used to queue messages in a
2490
+ // local ref that only drained after runTurn() returned, but the
2491
+ // marathon loop never returns while the driver is active. Now the
2492
+ // engine owns the queue; TUI hands off via queueUserInput().
2493
+ const queuedUserInput = this._drainNextQueuedUserInput();
2494
+ if (queuedUserInput) {
2495
+ yield* this.runTurn(queuedUserInput);
2496
+ continue;
2497
+ }
2498
+
2471
2499
  const turnsSnapshot = this.marathonDriver.turnsThisPhase;
2472
2500
  const phaseChanged = this.currentPhase !== this.marathonDriver.currentPhase;
2473
2501
  const milestones = this._buildEngineCountsBlock(this.currentPhase) || {};
@@ -2485,6 +2513,7 @@ export class AgentEngine {
2485
2513
  decisions: this.marathonDriver.decisionCount,
2486
2514
  });
2487
2515
  this.marathonDriver = null;
2516
+ this.marathonGoal = null;
2488
2517
  break;
2489
2518
  }
2490
2519
  this.eventLog.append("marathon_decision", {
@@ -2493,7 +2522,8 @@ export class AgentEngine {
2493
2522
  phase: this.currentPhase,
2494
2523
  });
2495
2524
  yield* this.runTurn(decision.prompt);
2496
- // Loop back: another turn just completed; driver gets another decideNext call.
2525
+ // Loop back: another turn just completed; engine queue + driver both
2526
+ // get another chance via the next iteration's drain-then-decide.
2497
2527
  }
2498
2528
  }
2499
2529
 
@@ -2511,6 +2541,7 @@ export class AgentEngine {
2511
2541
  if (this.marathonDriver) {
2512
2542
  throw new Error("Marathon already active — use /marathon off to disengage first");
2513
2543
  }
2544
+ this.marathonGoal = goal;
2514
2545
  this.marathonDriver = new MarathonDriver({
2515
2546
  goal,
2516
2547
  language: this.config.language || "en",
@@ -2534,6 +2565,7 @@ export class AgentEngine {
2534
2565
  decisions: this.marathonDriver.decisionCount,
2535
2566
  });
2536
2567
  this.marathonDriver = null;
2568
+ this.marathonGoal = null;
2537
2569
  return status;
2538
2570
  }
2539
2571
 
@@ -2542,6 +2574,51 @@ export class AgentEngine {
2542
2574
  return !!this.marathonDriver && !this.marathonDriver.stopped;
2543
2575
  }
2544
2576
 
2577
+ /**
2578
+ * v0.8.2 P12-B: queue a user-typed message for the engine to pick up at
2579
+ * the next turn boundary. Called by the TUI when the user types during an
2580
+ * in-flight marathon turn. The marathon decision loop drains this queue
2581
+ * BEFORE asking the driver for a continuation, so user interrupts always
2582
+ * win over driver autonomy.
2583
+ *
2584
+ * @param {string} text — user-typed message
2585
+ */
2586
+ queueUserInput(text) {
2587
+ if (!text || typeof text !== "string") return;
2588
+ this.inputQueue.push(text);
2589
+ this.eventLog.append("user_input_queued", {
2590
+ preview: text.slice(0, 100),
2591
+ queueDepth: this.inputQueue.length,
2592
+ marathonActive: this.isMarathonActive(),
2593
+ });
2594
+ }
2595
+
2596
+ /**
2597
+ * v0.8.2 P12-B: drain the next queued user input, or null if empty.
2598
+ * Internal helper for the marathon decision loop.
2599
+ *
2600
+ * @returns {string|null}
2601
+ */
2602
+ _drainNextQueuedUserInput() {
2603
+ if (this.inputQueue.length === 0) return null;
2604
+ const text = this.inputQueue.shift();
2605
+ this.eventLog.append("user_input_drained", {
2606
+ preview: text.slice(0, 100),
2607
+ queueDepth: this.inputQueue.length,
2608
+ });
2609
+ return text;
2610
+ }
2611
+
2612
+ /**
2613
+ * v0.8.2 P12-B: query the queue depth without draining.
2614
+ * Used by TUI to display "Queued (N waiting)" indicator.
2615
+ *
2616
+ * @returns {number}
2617
+ */
2618
+ getQueueDepth() {
2619
+ return this.inputQueue.length;
2620
+ }
2621
+
2545
2622
  /**
2546
2623
  * B1: Parallel ralph-loop — N concurrent subagents each executing one
2547
2624
  * task at a time, claimed atomically from TaskManager.
@@ -32,6 +32,16 @@ export class LLMClient {
32
32
  this.baseUrl = baseUrl.replace(/\/+$/, "");
33
33
  this.authType = authType;
34
34
  this.apiFormat = apiFormat;
35
+ // v0.8.2 P14-A: request-level timeout for fetch. SiliconFlow GLM-5.1
36
+ // streams hung 8h+ overnight in E2E #12 with no HTTP-level cutoff.
37
+ // 10 min ceiling (configurable via KC_LLM_REQUEST_TIMEOUT_MS) lets the
38
+ // marathon driver's `error: terminated` → recovery path kick in within
39
+ // minutes instead of hours when the upstream stalls a request without
40
+ // closing the TCP connection.
41
+ const envTimeout = parseInt(process.env.KC_LLM_REQUEST_TIMEOUT_MS || "0", 10);
42
+ this.requestTimeoutMs = Number.isFinite(envTimeout) && envTimeout > 0
43
+ ? envTimeout
44
+ : 10 * 60 * 1000;
35
45
  }
36
46
 
37
47
  /**
@@ -196,10 +206,15 @@ export class LLMClient {
196
206
  let resp;
197
207
  try {
198
208
  resp = await withRetry(async () => {
209
+ // v0.8.2 P14-A: AbortSignal.timeout for stream connect + per-chunk
210
+ // forward progress. Hung streams (SiliconFlow GLM-5.1 overnight,
211
+ // E2E #12) abort within requestTimeoutMs and surface as an error
212
+ // event the marathon driver can recover from.
199
213
  const r = await fetch(this._getEndpoint(), {
200
214
  method: "POST",
201
215
  headers: this._buildHeaders(),
202
216
  body: JSON.stringify(body),
217
+ signal: AbortSignal.timeout(this.requestTimeoutMs),
203
218
  });
204
219
  if (!r.ok) {
205
220
  const text = await r.text();
@@ -215,7 +230,13 @@ export class LLMClient {
215
230
  // A8: Any pre-stream failure (network, auth, 4xx/5xx after retry) is
216
231
  // tagged and re-thrown. Engine's outer catch sees exactly one tagged
217
232
  // error event.
218
- if (!err.streamTermination) err.streamTermination = "connect_error";
233
+ // v0.8.2 P14-A: AbortError from AbortSignal.timeout marks request_timeout
234
+ // distinctly so audits can count these vs. generic connect errors.
235
+ if (err.name === "TimeoutError" || err.name === "AbortError") {
236
+ err.streamTermination = "request_timeout";
237
+ } else if (!err.streamTermination) {
238
+ err.streamTermination = "connect_error";
239
+ }
219
240
  throw err;
220
241
  }
221
242
 
@@ -256,10 +277,12 @@ export class LLMClient {
256
277
  const body = this._buildNonStreamBody({ model, messages, maxTokens });
257
278
 
258
279
  const resp = await withRetry(async () => {
280
+ // v0.8.2 P14-A: same request-level timeout as streamChat for symmetry.
259
281
  const r = await fetch(this._getEndpoint(), {
260
282
  method: "POST",
261
283
  headers: this._buildHeaders(),
262
284
  body: JSON.stringify(body),
285
+ signal: AbortSignal.timeout(this.requestTimeoutMs),
263
286
  });
264
287
  if (!r.ok) {
265
288
  const text = await r.text();
@@ -156,13 +156,33 @@ function sha256OfFile(p) {
156
156
  } catch { return null; }
157
157
  }
158
158
 
159
- // Normalize a rule id like "R14" / "r014" / "R0014" to canonical "R014".
159
+ // Normalize a rule id to a canonical form for dedup + comparison.
160
+ // Accepts two shapes:
161
+ // Bare-numeric: "R14" / "r014" / "R0014" → "R014"
162
+ // Compound: "R01-01" / "R01_01" / "R001-005" → "R001-005"
163
+ // (zero-pads the major part to 3 digits; preserves the
164
+ // minor part numerically; uses dash separator canonically)
160
165
  // Returns null for non-matching strings (e.g., thematic skill names like
161
- // "account_identity" — those stay as-is via the second branch).
162
- function canonicalRuleId(s) {
166
+ // "account_identity" — those stay as-is and don't get credited via this
167
+ // path; their credit comes from frontmatter `source_rules:` instead).
168
+ //
169
+ // v0.8.3 P20-B2: compound form added. E2E #13 资管 used `R01-01`..`R07-01`
170
+ // naturally following the regulation's subsection numbering; v0.8.2's
171
+ // bare-only regex returned null for all 15 dirs → `rulesCovered: 0/15`
172
+ // → engine refused natural skill_testing advance.
173
+ export function canonicalRuleId(s) {
163
174
  if (typeof s !== "string") return null;
164
- const m = s.match(/^R0*(\d+)$/i);
165
- if (m) return `R${String(parseInt(m[1], 10)).padStart(3, "0")}`;
175
+ const trimmed = s.trim();
176
+ // Compound form: R01-01, R01_01, R001-005, etc.
177
+ const compound = trimmed.match(/^R0*(\d+)[-_](\d+)$/i);
178
+ if (compound) {
179
+ const major = String(parseInt(compound[1], 10)).padStart(3, "0");
180
+ const minor = String(parseInt(compound[2], 10)).padStart(2, "0");
181
+ return `R${major}-${minor}`;
182
+ }
183
+ // Bare-numeric form
184
+ const bare = trimmed.match(/^R0*(\d+)$/i);
185
+ if (bare) return `R${String(parseInt(bare[1], 10)).padStart(3, "0")}`;
166
186
  return null;
167
187
  }
168
188
 
@@ -193,9 +213,16 @@ export function deriveRuleExtractionMilestones(workspace) {
193
213
 
194
214
  // rulesExtracted: every rule object across every JSON file in rules/
195
215
  // that has a non-empty `id` field. catalog.json is canonical but agents
196
- // sometimes fan out to per-rule files (E2E #5 DS).
216
+ // sometimes fan out to per-rule files (E2E #5 DS) — or write SIBLING
217
+ // files with the same IDs plus additional metadata (E2E #13 资管's
218
+ // `rules/difficulty.json` added judgment-type classifications and
219
+ // doubled the count from 15 → 30 because the engine pushed IDs without
220
+ // dedup). v0.8.3 P20-B1: dedup by ID across all rules/*.json files.
221
+ // First-seen wins for chunk-ref counting (catalog.json is read first
222
+ // by alphabetical / fs order in most cases).
197
223
  const rulesExtracted = [];
198
224
  const rulesWithChunkRefs = [];
225
+ const seenIds = new Set();
199
226
  if (dirExists(rulesDir)) {
200
227
  for (const e of listChildFiles(rulesDir)) {
201
228
  if (!e.name.endsWith(".json")) continue;
@@ -204,8 +231,21 @@ export function deriveRuleExtractionMilestones(workspace) {
204
231
  const items = Array.isArray(data) ? data : (data.rules || []);
205
232
  for (const r of items) {
206
233
  if (r && typeof r.id === "string" && r.id.length) {
234
+ if (seenIds.has(r.id)) continue; // v0.8.3 P20-B1 dedup
235
+ seenIds.add(r.id);
207
236
  rulesExtracted.push(r.id);
208
- if (Array.isArray(r.source_chunk_ids) && r.source_chunk_ids.length > 0) {
237
+ // v0.8.2 P13-C: accept any of three field names for chunk
238
+ // references. Engine historically looked only for
239
+ // `source_chunk_ids`, but 贷款 v0.8.1 + 资管 v0.8.1 catalogs
240
+ // wrote `chunk_ids` (the shorter form agents naturally pick
241
+ // from the rule-extraction skill examples). `chunk_refs` is
242
+ // a legacy alias from older audit docs. Any non-empty match
243
+ // counts.
244
+ const chunks = (Array.isArray(r.source_chunk_ids) && r.source_chunk_ids)
245
+ || (Array.isArray(r.chunk_ids) && r.chunk_ids)
246
+ || (Array.isArray(r.chunk_refs) && r.chunk_refs)
247
+ || null;
248
+ if (chunks && chunks.length > 0) {
209
249
  rulesWithChunkRefs.push(r.id);
210
250
  }
211
251
  }
@@ -331,6 +371,37 @@ export function deriveSkillAuthoringMilestones(workspace) {
331
371
  }
332
372
  } catch { /* best-effort */ }
333
373
  }
374
+
375
+ // v0.8.2 P13-D: also credit rule_ids declared in rule_mapping.json.
376
+ // 资管 v0.8.1 wrote 6 thematic-overlay dirs (R01_periodic_report,
377
+ // R02_custodian_core, etc.) each containing a rule_mapping.json that
378
+ // maps rule_ids to engine-level check function names. The dirs have
379
+ // no own check.py because the actual implementation lives in
380
+ // workspace-root verify_v*.py. Without recognizing rule_mapping.json,
381
+ // the engine treats them as orphan dirs.
382
+ //
383
+ // Rule-id formats in the wild include both bare-numeric (R01, R027)
384
+ // and compound (R01-05, R02-08). canonicalRuleId() only handles the
385
+ // bare form, so we accept either canonicalized form OR a raw key
386
+ // that looks like a rule id (matches R\d+ optionally followed by
387
+ // `-` or `_` and more digits).
388
+ try {
389
+ const mappingPath = path.join(skillPath, "rule_mapping.json");
390
+ if (fileExists(mappingPath)) {
391
+ const mapping = readJsonSafe(mappingPath);
392
+ if (mapping && typeof mapping === "object" && !Array.isArray(mapping)) {
393
+ for (const key of Object.keys(mapping)) {
394
+ const canon = canonicalRuleId(key);
395
+ if (canon) {
396
+ ruleIdsCovered.add(canon);
397
+ } else if (/^R0*\d+[-_]?\d*$/i.test(key.trim())) {
398
+ // Compound form like "R01-05" — preserve as-is
399
+ ruleIdsCovered.add(key.trim());
400
+ }
401
+ }
402
+ }
403
+ }
404
+ } catch { /* best-effort */ }
334
405
  }
335
406
 
336
407
  // v0.8 P2-F (item 22): count stub-shaped check.py files. Pairs with
@@ -3,7 +3,7 @@ import path from "node:path";
3
3
  import { Phase, PipelineEvent } from "./index.js";
4
4
  import { Pipeline } from "./base.js";
5
5
  import { SkillValidator } from "../skill-validator.js";
6
- import { deriveSkillAuthoringMilestones } from "./_milestone-derive.js";
6
+ import { deriveSkillAuthoringMilestones, canonicalRuleId } from "./_milestone-derive.js";
7
7
 
8
8
  export class SkillAuthoringPipeline extends Pipeline {
9
9
  /**
@@ -37,14 +37,31 @@ export class SkillAuthoringPipeline extends Pipeline {
37
37
  }
38
38
 
39
39
  _loadRules() {
40
+ // v0.8.3 P20-B1+B2: dedup rule IDs across all rules/*.json files AND
41
+ // canonicalize them so the rulesCovered comparison against
42
+ // ruleIdsCovered (which now goes through canonicalRuleId) works for
43
+ // BOTH bare-numeric (R14) AND compound (R01-01, R02-03) forms.
44
+ // E2E #13 资管 used compound IDs + wrote a sibling difficulty.json;
45
+ // the raw-string + no-dedup pre-v0.8.3 path produced rulesCovered:
46
+ // 0/30 (compound IDs unmatched + double-counted).
40
47
  this.totalRules = [];
48
+ const seen = new Set();
41
49
  const rulesDir = path.join(this._workspace.cwd, "rules");
42
50
  if (!fs.existsSync(rulesDir)) return;
43
51
  for (const f of fs.readdirSync(rulesDir).filter((f) => f.endsWith(".json"))) {
44
52
  try {
45
53
  const data = JSON.parse(fs.readFileSync(path.join(rulesDir, f), "utf-8"));
46
54
  const rules = Array.isArray(data) ? data : (data.rules || []);
47
- for (const r of rules) { if (r.id) this.totalRules.push(r.id); }
55
+ for (const r of rules) {
56
+ if (!r || !r.id) continue;
57
+ // Canonicalize to match ruleIdsCovered which is built from
58
+ // canonicalRuleId() output. If canonicalRuleId returns null
59
+ // (non-rule-shaped string), preserve the raw trimmed string.
60
+ const canon = canonicalRuleId(r.id) || String(r.id).trim();
61
+ if (seen.has(canon)) continue;
62
+ seen.add(canon);
63
+ this.totalRules.push(canon);
64
+ }
48
65
  } catch { /* skip */ }
49
66
  }
50
67
  }
@@ -763,7 +763,100 @@ export class ReleaseTool extends BaseTool {
763
763
  }
764
764
  }
765
765
 
766
- // 5) Fallback (belt-and-suspenders per v0.8 plan Risk #7):
766
+ // 5) v0.8.2 P13-A: doc-keyed rules-keyed nested shape.
767
+ // 贷款 v0.8.1 wrote skill_test_v*_results.json + v2_hybrid_results.json
768
+ // + run_all_checks.json all with this shape:
769
+ // {
770
+ // "<doc_filename>": {
771
+ // "channel": "...", "expected": "PASS"|"FAIL",
772
+ // "rules": {
773
+ // "R01": {"rule_id": "R01", "verdict": "PASS", "confidence": 0.95, "method": "regex"},
774
+ // "R02": {...}
775
+ // }
776
+ // },
777
+ // ...
778
+ // }
779
+ // The optional outer "results" wrapper from v2_full_regression.json
780
+ // (which nests this further) is unwrapped via d.results || d.
781
+ if (tally.size === 0) {
782
+ for (const f of files) {
783
+ if (!/qc|verdict|result|test/i.test(f.name)) continue;
784
+ try {
785
+ const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
786
+ const root = d?.results || d;
787
+ if (!root || typeof root !== "object" || Array.isArray(root)) continue;
788
+ let matched = false;
789
+ for (const docKey of Object.keys(root)) {
790
+ const docEntry = root[docKey];
791
+ if (!docEntry || typeof docEntry !== "object") continue;
792
+ const rulesMap = docEntry.rules;
793
+ if (!rulesMap || typeof rulesMap !== "object" || Array.isArray(rulesMap)) continue;
794
+ for (const rid of Object.keys(rulesMap)) {
795
+ if (!isRuleId(rid)) continue;
796
+ const r = rulesMap[rid];
797
+ if (!r || typeof r !== "object") continue;
798
+ const verdict = (r.verdict || r.result_type || r.status || "").toString().toUpperCase();
799
+ if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
800
+ else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
801
+ else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
802
+ }
803
+ }
804
+ if (matched) {
805
+ sourceFiles.push(path.relative(this._workspace.cwd, f.path));
806
+ break;
807
+ }
808
+ } catch { /* skip non-JSON */ }
809
+ }
810
+ }
811
+
812
+ // 6) v0.8.3 P22-B6: top-level array of {doc_id, results: [{rule_id, status}]}.
813
+ // 资管 v0.8.2 wrote `output/skill_test_v*.json` + `workflow_v*_results.json`
814
+ // + `evolution_round*.json` all with this shape:
815
+ // [
816
+ // {
817
+ // "doc_id": "<doc-filename>",
818
+ // "results": [
819
+ // {"rule_id": "R01-01", "status": "WARNING", "found_fields": {...}},
820
+ // {"rule_id": "R01-02", "status": "PASS", ...},
821
+ // ...
822
+ // ]
823
+ // },
824
+ // ...
825
+ // ]
826
+ // Distinct from Shape 5: top-level is an ARRAY (not object), and the
827
+ // per-rule data lives in `results: [...]` (an array of rule outcomes)
828
+ // rather than `rules: {<rule>: ...}` (object keyed by rule).
829
+ if (tally.size === 0) {
830
+ for (const f of files) {
831
+ if (!/qc|verdict|result|test|evolution|workflow/i.test(f.name)) continue;
832
+ try {
833
+ const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
834
+ if (!Array.isArray(d)) continue;
835
+ let matched = false;
836
+ for (const docEntry of d) {
837
+ if (!docEntry || typeof docEntry !== "object") continue;
838
+ const results = docEntry.results;
839
+ if (!Array.isArray(results)) continue;
840
+ for (const r of results) {
841
+ if (!r || typeof r !== "object") continue;
842
+ const rid = r.rule_id || r.ruleId || r.id;
843
+ if (!isRuleId(rid)) continue;
844
+ const verdict = (r.status || r.verdict || r.result_type || "").toString().toUpperCase();
845
+ if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
846
+ else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
847
+ else if (verdict === "WARNING") { bump(rid, "pass"); matched = true; } // WARNING counts as pass (per existing shape conventions)
848
+ else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
849
+ }
850
+ }
851
+ if (matched) {
852
+ sourceFiles.push(path.relative(this._workspace.cwd, f.path));
853
+ break;
854
+ }
855
+ } catch { /* skip non-JSON */ }
856
+ }
857
+ }
858
+
859
+ // 7) Fallback (belt-and-suspenders per v0.8 plan Risk #7):
767
860
  // walk any output/*.json with a top-level rule_id-keyed shape that has
768
861
  // verdict-like leaf objects. Catches future schema drift before the
769
862
  // next audit cycle.
package/src/cli/index.js CHANGED
@@ -716,12 +716,27 @@ function App({ engine, config }) {
716
716
  }
717
717
 
718
718
  if (streamingRef.current) {
719
- queueRef.current.push(trimmed);
720
- setQueueSize(queueRef.current.length); // F2
721
- addMessage({
722
- role: "system",
723
- content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
724
- });
719
+ // v0.8.2 P12-B: in marathon mode, hand off to engine's input queue
720
+ // instead of the TUI-local queueRef. The engine's marathon decision
721
+ // loop drains it FIRST at each turn boundary, so the user's nudge
722
+ // wins over the driver's continuation. Outside marathon, keep the
723
+ // existing TUI-local queue (drained after runTurn returns).
724
+ const marathonActive = engineRef.current?.isMarathonActive?.() ?? false;
725
+ if (marathonActive && engineRef.current?.queueUserInput) {
726
+ engineRef.current.queueUserInput(trimmed);
727
+ const depth = engineRef.current.getQueueDepth?.() ?? 1;
728
+ addMessage({
729
+ role: "system",
730
+ content: `⏳ Queued for marathon (${depth} waiting). Will be sent before the next driver continuation.`,
731
+ });
732
+ } else {
733
+ queueRef.current.push(trimmed);
734
+ setQueueSize(queueRef.current.length); // F2
735
+ addMessage({
736
+ role: "system",
737
+ content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
738
+ });
739
+ }
725
740
  } else {
726
741
  runTurn(trimmed);
727
742
  }
@@ -850,9 +865,15 @@ export async function main({ languageOverride } = {}) {
850
865
  // Capture user's project directory (CWD at launch)
851
866
  config.projectDir = process.cwd();
852
867
 
853
- // Session-only language override (does NOT persist to config)
868
+ // Session-only language override (does NOT persist to config).
869
+ // v0.8.3 P20-B3 (Task #218): also set process.env.LANGUAGE so the
870
+ // engine's _overlayWorkspaceEnv() penvWon check honors the CLI flag.
871
+ // Pre-v0.8.3, workspace .env LANGUAGE=en would overwrite a CLI --zh
872
+ // override during engine construction because the overlay only
873
+ // checked process.env, not in-memory config.language.
854
874
  if (languageOverride) {
855
875
  config.language = languageOverride;
876
+ process.env.LANGUAGE = languageOverride;
856
877
  }
857
878
 
858
879
  if (!config.llmApiKey) {
@@ -1,4 +1,4 @@
1
- # === KC Reborn Configuration ===
1
+ # === KC Configuration ===
2
2
 
3
3
  # Language: en | zh
4
4
  LANGUAGE=en
package/template/AGENT.md CHANGED
@@ -15,7 +15,7 @@ update as you learn about this specific business scenario.
15
15
 
16
16
  ---
17
17
 
18
- # KC Reborn — Document Verification Workspace
18
+ # KC — Document Verification Workspace
19
19
 
20
20
  ## What This Workspace Is
21
21
 
@@ -93,7 +93,7 @@ The skill body is the methodology. Skills convey philosophy and decision framewo
93
93
 
94
94
  ---
95
95
 
96
- # KC Reborn — 文档核查工作区
96
+ # KC — 文档核查工作区
97
97
 
98
98
  > **技能优先级**: meta-meta 技能是架构层面 —— 当指导冲突时,
99
99
  > meta-meta 凌驾于 meta (技法层面) 之上。架构师的框架约束技法。