kc-beta 0.3.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/agent/confidence-scorer.js +8 -0
  3. package/src/agent/context-window.js +7 -2
  4. package/src/agent/context.js +25 -0
  5. package/src/agent/corner-case-registry.js +5 -0
  6. package/src/agent/engine.js +564 -76
  7. package/src/agent/event-log.js +15 -2
  8. package/src/agent/history.js +91 -23
  9. package/src/agent/pipelines/initializer.js +3 -6
  10. package/src/agent/retry.js +9 -1
  11. package/src/agent/rule-catalog-normalize.js +37 -0
  12. package/src/agent/scheduler.js +276 -0
  13. package/src/agent/session-state.js +11 -2
  14. package/src/agent/task-manager.js +5 -0
  15. package/src/agent/tools/agent-tool.js +57 -14
  16. package/src/agent/tools/archive-file.js +94 -0
  17. package/src/agent/tools/copy-to-workspace.js +140 -0
  18. package/src/agent/tools/phase-advance.js +60 -0
  19. package/src/agent/tools/release.js +323 -0
  20. package/src/agent/tools/rule-catalog.js +56 -4
  21. package/src/agent/tools/schedule-fetch.js +118 -0
  22. package/src/agent/tools/snapshot.js +101 -0
  23. package/src/agent/tools/workspace-file.js +10 -7
  24. package/src/agent/version-manager.js +29 -120
  25. package/src/agent/workspace.js +127 -4
  26. package/src/cli/components.js +68 -12
  27. package/src/cli/index.js +147 -15
  28. package/src/config.js +10 -1
  29. package/src/model-tiers.json +5 -5
  30. package/template/release-runtime/README.md.tmpl +84 -0
  31. package/template/release-runtime/kc_runtime/__init__.py +2 -0
  32. package/template/release-runtime/kc_runtime/confidence.py +93 -0
  33. package/template/release-runtime/kc_runtime/dashboard.py +208 -0
  34. package/template/release-runtime/render_dashboard.py +49 -0
  35. package/template/release-runtime/run.py +230 -0
  36. package/template/release-runtime/serve.sh +15 -0
  37. package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +11 -0
  38. package/template/skills/en/meta-meta/quality-control/SKILL.md +13 -1
  39. package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +8 -0
  40. package/template/skills/en/meta-meta/task-decomposition/SKILL.md +13 -0
  41. package/template/skills/en/meta-meta/version-control/SKILL.md +13 -0
  42. package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +11 -0
  43. package/template/skills/zh/meta-meta/quality-control/SKILL.md +12 -0
  44. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +8 -0
  45. package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +16 -0
  46. package/template/skills/zh/meta-meta/version-control/SKILL.md +13 -0
  47. package/template/workspace.gitignore +22 -0
package/src/cli/index.js CHANGED
@@ -2,9 +2,10 @@ import React, { useState, useEffect, useCallback, useRef } from "react";
2
2
  import { render, Box, Text, useApp, useInput } from "ink";
3
3
  import { loadSettings } from "../config.js";
4
4
  import { LLMClient } from "../agent/llm-client.js";
5
- import { AgentEngine } from "../agent/engine.js";
5
+ import { AgentEngine, NEXT_PHASE } from "../agent/engine.js";
6
6
  import { Workspace } from "../agent/workspace.js";
7
7
  import { ConversationHistory } from "../agent/history.js";
8
+ import { Scheduler } from "../agent/scheduler.js";
8
9
  import {
9
10
  WelcomeBanner,
10
11
  StatusBar,
@@ -17,6 +18,18 @@ import {
17
18
 
18
19
  const h = React.createElement;
19
20
 
21
+ // Only the last N messages stay in the Ink render tree. Older messages
22
+ // remain in React state (so /compact can summarize them) but aren't
23
+ // diffed on every keystroke — this is what keeps long sessions responsive
24
+ // and prevents the 4 GB heap OOM observed in the v0.5.3 E2E test.
25
+ // Full conversation is persisted to logs/events.jsonl on every event,
26
+ // so dropping from render is purely visual.
27
+ const VISIBLE_WINDOW = 50;
28
+
29
+ // How many recent messages render their ToolBlock with full preview.
30
+ // Older ToolBlocks show header only. Both still persist full output to disk.
31
+ const RECENT_TOOL_WINDOW = 10;
32
+
20
33
  /**
21
34
  * Main KC Agent CLI App using Ink (React for terminals).
22
35
  */
@@ -158,6 +171,8 @@ function App({ engine, config }) {
158
171
  " /help Show this help\n" +
159
172
  " /status Show session info, model, phase, workspace\n" +
160
173
  " /tasks Show task progress\n" +
174
+ " /phase [sub] advance | status | <name> — manual phase override\n" +
175
+ " /schedule Show scheduled ingestion jobs and recent log lines\n" +
161
176
  " /clear Clear conversation history (keep workspace)\n" +
162
177
  " /compact Summarize older messages to reduce context\n" +
163
178
  " /sessions List all sessions\n" +
@@ -193,6 +208,79 @@ function App({ engine, config }) {
193
208
  });
194
209
  return true;
195
210
 
211
+ case "/phase": {
212
+ // User-driven phase override. Useful when auto-advance fails to fire
213
+ // or when debugging. Subcommands:
214
+ // /phase → current phase (alias: /phase status)
215
+ // /phase advance | next → move to NEXT_PHASE[current]
216
+ // /phase <name> → force-jump to any phase (forward or back)
217
+ const engine = engineRef.current;
218
+ const sub = (parts[1] || "").toLowerCase();
219
+
220
+ if (!sub || sub === "status") {
221
+ const next = NEXT_PHASE[engine.currentPhase];
222
+ addMessage({
223
+ role: "system",
224
+ content:
225
+ `Current phase: ${engine.currentPhase.toUpperCase()}` +
226
+ (next ? ` (next auto: ${next})` : " (final phase)"),
227
+ });
228
+ return true;
229
+ }
230
+
231
+ if (sub === "advance" || sub === "next") {
232
+ const next = NEXT_PHASE[engine.currentPhase];
233
+ if (!next) {
234
+ addMessage({ role: "system", content: `Already in final phase (${engine.currentPhase}).` });
235
+ return true;
236
+ }
237
+ const ok = engine._advancePhase(next, "manual /phase advance");
238
+ addMessage({
239
+ role: "system",
240
+ content: ok
241
+ ? `→ phase advanced to ${next.toUpperCase()}.`
242
+ : `Failed to advance from ${engine.currentPhase}.`,
243
+ });
244
+ updateContextStats();
245
+ return true;
246
+ }
247
+
248
+ // /phase <name> — force-jump. Uses {force:true} to allow backward jumps.
249
+ const ok = engine._advancePhase(sub, "manual /phase <name>", { force: true });
250
+ addMessage({
251
+ role: "system",
252
+ content: ok
253
+ ? `→ phase set to ${sub.toUpperCase()}.`
254
+ : `Unknown phase: ${sub}. Valid: bootstrap, extraction, skill_authoring, skill_testing, distillation, production_qc`,
255
+ });
256
+ updateContextStats();
257
+ return true;
258
+ }
259
+
260
+ case "/schedule": {
261
+ const sched = new Scheduler(engineRef.current.workspace);
262
+ const jobs = sched.list();
263
+ if (jobs.length === 0) {
264
+ addMessage({ role: "system", content: "No scheduled ingestion jobs. Ask KC to set one up via the schedule_fetch tool." });
265
+ } else {
266
+ const lines = jobs.map((j) => {
267
+ const status = j.enabled ? "✓ enabled" : "· disabled";
268
+ const hint = j.cron_hint ? ` cron: ${j.cron_hint}` : " cron: (not set)";
269
+ return ` ${status} ${j.id}\n${hint}\n cmd: ${j.command}`;
270
+ });
271
+ const tail = sched.tailLog(8);
272
+ const pending = sched.pendingInputCount();
273
+ addMessage({
274
+ role: "system",
275
+ content:
276
+ `Scheduled jobs:\n${lines.join("\n\n")}\n\n` +
277
+ `Pending in input/: ${pending} file(s)` +
278
+ (tail ? `\n\nlogs/ingest.log (last 8):\n${tail}` : ""),
279
+ });
280
+ }
281
+ return true;
282
+ }
283
+
196
284
  case "/clear":
197
285
  engineRef.current.history = new ConversationHistory(engineRef.current.workspace.cwd);
198
286
  setMessages([]);
@@ -202,15 +290,22 @@ function App({ engine, config }) {
202
290
 
203
291
  case "/compact": {
204
292
  addMessage({ role: "system", content: "Compacting conversation history..." });
205
- // Run compact asynchronously
206
293
  (async () => {
207
294
  try {
208
295
  const result = await engineRef.current.compact();
209
296
  if (result) {
210
- addMessage({
297
+ // Claude Code pattern: after successful compact, clear the
298
+ // visible TUI messages and start fresh with a single summary
299
+ // line. The underlying engine.history already contains the
300
+ // compact-summary message pair; the TUI doesn't need to keep
301
+ // showing the pre-compact history (it's on disk in
302
+ // logs/events.jsonl anyway) and clearing it immediately frees
303
+ // Ink render-tree memory — fixing the lag that builds up over
304
+ // long sessions.
305
+ setMessages([{
211
306
  role: "system",
212
- content: `Compacted: removed ${result.removedCount} messages, kept ${result.retainedCount}. Summary: ~${result.summaryTokens} tokens.`,
213
- });
307
+ content: `✓ 上下文已压缩:合并了 ${result.removedCount} 条早期消息(摘要约 ${result.summaryTokens} tokens,保留最近 ${result.retainedCount} 条)`,
308
+ }]);
214
309
  } else {
215
310
  addMessage({ role: "system", content: "Nothing to compact (conversation is short enough)." });
216
311
  }
@@ -227,9 +322,22 @@ function App({ engine, config }) {
227
322
  addMessage({ role: "system", content: "Usage: /rename <new_name>" });
228
323
  } else {
229
324
  try {
230
- const newId = engineRef.current.workspace.rename(arg);
231
- setSessionId(newId);
232
- addMessage({ role: "system", content: `Session renamed to: ${newId}` });
325
+ const r = engineRef.current.renameSession(arg);
326
+ setSessionId(r.sessionId);
327
+ const lines = [`Session renamed to: ${r.sessionId}`];
328
+ if (r.scheduleWrappersRegenerated.length > 0) {
329
+ lines.push(
330
+ `${r.scheduleWrappersRegenerated.length} cron wrapper script(s) regenerated.`,
331
+ `If you'd installed crontab lines for the OLD path, re-install via 'schedule_fetch print_crontab'.`,
332
+ );
333
+ }
334
+ if (r.scheduleWrappersFailed && r.scheduleWrappersFailed.length > 0) {
335
+ const ids = r.scheduleWrappersFailed.map((f) => f.id).join(", ");
336
+ lines.push(
337
+ `⚠ ${r.scheduleWrappersFailed.length} wrapper script(s) failed to regenerate (${ids}). Check workspace/scripts/ingest/ and disk space.`,
338
+ );
339
+ }
340
+ addMessage({ role: "system", content: lines.join("\n") });
233
341
  } catch (err) {
234
342
  addMessage({ role: "system", content: `Rename failed: ${err.message}` });
235
343
  }
@@ -342,36 +450,52 @@ function App({ engine, config }) {
342
450
 
343
451
  return h(Box, { flexDirection: "column" },
344
452
  // Welcome banner
345
- showWelcome ? h(WelcomeBanner, { projectDir: config.projectDir }) : null,
453
+ showWelcome ? h(WelcomeBanner, {
454
+ projectDir: config.projectDir,
455
+ pendingInputCount: (() => {
456
+ try { return new Scheduler(engineRef.current.workspace).pendingInputCount(); }
457
+ catch { return 0; }
458
+ })(),
459
+ }) : null,
346
460
 
347
461
  // Task dashboard (ralph-loop)
348
462
  taskList.length > 0 ? h(TaskDashboard, { tasks: taskList, progress: taskProgress }) : null,
349
463
 
350
- // Message history
351
- ...messages.map((msg, i) => {
464
+ // Message history (virtualized — only last VISIBLE_WINDOW render).
465
+ // Hidden-count hint for earlier messages, so users know the full
466
+ // history still exists (on disk) even though the TUI is slim.
467
+ messages.length > VISIBLE_WINDOW ? h(Box, { key: "hidden-hint" },
468
+ h(Text, { dimColor: true },
469
+ `— 前 ${messages.length - VISIBLE_WINDOW} 条消息已折叠,完整记录在 logs/events.jsonl —`),
470
+ ) : null,
471
+ ...messages.slice(-VISIBLE_WINDOW).map((msg, i, arr) => {
472
+ // Global index (for stable React keys) vs visible index (for isRecent).
473
+ const globalIdx = messages.length - arr.length + i;
474
+ const visibleIdx = arr.length - 1 - i; // 0 = most recent
352
475
  if (msg.role === "user") {
353
- return h(Box, { key: `msg-${i}` },
476
+ return h(Box, { key: `msg-${globalIdx}` },
354
477
  h(Text, { dimColor: true }, "❯ "),
355
478
  h(Text, null, msg.content),
356
479
  );
357
480
  }
358
481
  if (msg.role === "agent") {
359
- return h(Box, { key: `msg-${i}` },
482
+ return h(Box, { key: `msg-${globalIdx}` },
360
483
  h(Text, null, msg.content),
361
484
  );
362
485
  }
363
486
  if (msg.role === "tool") {
364
487
  return h(ToolBlock, {
365
- key: `msg-${i}`,
488
+ key: `msg-${globalIdx}`,
366
489
  name: msg.toolName,
367
490
  input: msg.toolInput,
368
491
  output: msg.toolOutput,
369
492
  isError: msg.toolIsError,
370
493
  isRunning: false,
494
+ isRecent: visibleIdx < RECENT_TOOL_WINDOW,
371
495
  });
372
496
  }
373
497
  if (msg.role === "system") {
374
- return h(Box, { key: `msg-${i}` },
498
+ return h(Box, { key: `msg-${globalIdx}` },
375
499
  h(Text, { dimColor: true }, msg.content),
376
500
  );
377
501
  }
@@ -436,6 +560,14 @@ export async function main({ languageOverride } = {}) {
436
560
  console.log(`\x1b[33m${msg}\x1b[0m\n`);
437
561
  }
438
562
 
563
+ // Warn if git is missing — Block 11 file system relies on git for version history.
564
+ if (config.gitAutoCommit !== false && !Workspace.isGitInstalled()) {
565
+ const msg = config.language === "zh"
566
+ ? " ⚠ 未检测到 git。本会话将不记录版本历史。安装 git 以启用自动提交。"
567
+ : " ⚠ git not found — version history disabled this session. Install git to enable auto-commit.";
568
+ console.log(`\x1b[33m${msg}\x1b[0m\n`);
569
+ }
570
+
439
571
  const client = new LLMClient({
440
572
  apiKey: config.llmApiKey,
441
573
  baseUrl: config.llmBaseUrl,
package/src/config.js CHANGED
@@ -65,7 +65,7 @@ export function loadSettings(workspacePath) {
65
65
  llmApiKey: env.LLM_API_KEY || env.SILICONFLOW_API_KEY || gc.api_key || "",
66
66
  llmBaseUrl: env.LLM_BASE_URL || env.SILICONFLOW_BASE_URL || gc.base_url || "https://api.siliconflow.cn/v1",
67
67
  kcModel: gc.conductor_model || "glm-5",
68
- kcMaxTokens: 65536,
68
+ kcMaxTokens: parseInt(env.KC_MAX_TOKENS || gc.kc_max_tokens?.toString() || "65536", 10),
69
69
 
70
70
  // Tier models (from .env or global config tiers)
71
71
  tier1: env.TIER1 || gc.tiers?.tier1 || "",
@@ -111,6 +111,15 @@ export function loadSettings(workspacePath) {
111
111
 
112
112
  // Context management
113
113
  kcContextLimit: parseInt(env.KC_CONTEXT_LIMIT || "200000", 10),
114
+ toolOutputOffloadTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_TOKENS || gc.tool_output_offload_tokens?.toString() || "2000", 10),
115
+ toolOutputOffloadErrorTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_ERROR_TOKENS || gc.tool_output_offload_error_tokens?.toString() || "500", 10),
116
+ maxMessageTokens: parseInt(env.MAX_MESSAGE_TOKENS || gc.max_message_tokens?.toString() || "60000", 10),
117
+
118
+ // File system (Block 11)
119
+ gitAutoCommit: (env.GIT_AUTO_COMMIT ?? gc.git_auto_commit ?? true) !== false &&
120
+ (env.GIT_AUTO_COMMIT !== "false") &&
121
+ (gc.git_auto_commit !== false),
122
+ largeRefThresholdMB: parseInt(env.LARGE_REF_THRESHOLD_MB || gc.large_ref_threshold_mb?.toString() || "10", 10),
114
123
 
115
124
  // Language
116
125
  language: env.LANGUAGE || gc.language || "en",
@@ -2,17 +2,17 @@
2
2
  "_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
3
3
 
4
4
  "siliconflow": {
5
- "conductor": "Pro/zai-org/GLM-5",
5
+ "conductor": "Pro/zai-org/GLM-5.1",
6
6
  "llm": {
7
- "tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
7
+ "tier1": "Pro/zai-org/GLM-5.1, Pro/moonshotai/Kimi-K2.5",
8
8
  "tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
9
9
  "tier3": "Qwen/Qwen3.5-122B-A10B",
10
10
  "tier4": "Qwen/Qwen3.5-35B-A3B"
11
11
  },
12
12
  "vlm": {
13
- "tier1": "Pro/Qwen/Qwen2.5-VL-72B-Instruct",
14
- "tier2": "Qwen/Qwen2.5-VL-32B-Instruct",
15
- "tier3": "Qwen/Qwen2.5-VL-7B-Instruct"
13
+ "tier1": "Qwen/Qwen3-VL-235B-A22B-Instruct",
14
+ "tier2": "Qwen/Qwen3-VL-30B-A3B-Instruct",
15
+ "tier3": "Qwen/Qwen3-VL-8B-Instruct"
16
16
  }
17
17
  },
18
18
 
@@ -0,0 +1,84 @@
1
+ # {LABEL} — KC Verification Release
2
+
3
+ Generated: {CREATED_AT}
4
+ Snapshot tag: `{SNAPSHOT_TAG}`
5
+ Commit: `{SNAPSHOT_COMMIT}`
6
+ Built by: kc-beta {KC_VERSION}
7
+
8
+ {NOTES_BLOCK}
9
+
10
+ This bundle is self-contained. It runs without `kc-beta` installed — only Python 3 and a worker LLM API key are required.
11
+
12
+ ## What's in here
13
+
14
+ ```
15
+ manifest.json — release metadata (rules, models, snapshot tag)
16
+ README.md — this file
17
+ run.py — standalone driver, runs all rules
18
+ render_dashboard.py — re-render an HTML dashboard from a result JSON
19
+ serve.sh — optional helper, serves this dir over local HTTP
20
+ kc_runtime/ — bundled Python helpers (confidence scoring, dashboard)
21
+ workflows/ — pinned per-rule Python workflows + prompts
22
+ fixtures/ — sample inputs (if KC selected any)
23
+ glossary.json — project entity vocabulary at release time
24
+ catalog.json — rule catalog at release time
25
+ corner_cases.json — known corner cases (used by confidence scoring)
26
+ confidence_calibration.json — per-rule historical accuracy
27
+ models.json — worker LLM tier→model assignments
28
+ ```
29
+
30
+ ## Run a verification
31
+
32
+ ```sh
33
+ export LLM_API_KEY="sk-..."
34
+ export LLM_BASE_URL="https://api.siliconflow.cn/v1" # or your provider
35
+ export TIER1="..." # comma-separated model list
36
+ export TIER2="..."
37
+
38
+ python run.py /path/to/document.pdf > result.json
39
+ ```
40
+
41
+ Each rule's workflow runs against the document; results are aggregated into a single JSON.
42
+
43
+ ### Useful flags
44
+
45
+ ```sh
46
+ python run.py doc.pdf --rule R001 # run only one rule
47
+ python run.py doc.pdf --output result.json # write to a file
48
+ python run.py doc.pdf --dashboard # also emit an HTML dashboard
49
+ ```
50
+
51
+ ### Re-render a dashboard
52
+
53
+ ```sh
54
+ python render_dashboard.py result.json
55
+ # → result.html alongside the JSON
56
+ ```
57
+
58
+ ### Browse dashboards in a browser
59
+
60
+ ```sh
61
+ ./serve.sh
62
+ # → http://localhost:8080/result.html
63
+ ```
64
+
65
+ ## Rules in this release
66
+
67
+ {RULES_LIST}
68
+
69
+ ## Reproducibility
70
+
71
+ The release bundle is regenerable from the snapshot tag:
72
+
73
+ ```sh
74
+ git checkout {SNAPSHOT_TAG}
75
+ # then run kc-beta and ask it to release({label: "{LABEL}"}) again
76
+ ```
77
+
78
+ The `manifest.json` records the exact commit (`{SNAPSHOT_COMMIT}`) so you can verify what's running.
79
+
80
+ ## Caveats
81
+
82
+ - Workflows call worker LLMs. Costs depend on your provider; the bundle does not enforce a budget.
83
+ - Workflow output for each rule is preserved in `result.raw[*]` for audit. If you need full audit history with KC's event log + corner-case registry, work from the source workspace, not this bundle.
84
+ - Bundle does not sandbox `python`. Treat it like any executable you trust.
@@ -0,0 +1,2 @@
1
+ # KC release-runtime support package.
2
+ # Bundled into every release. Self-contained, no external dependencies.
@@ -0,0 +1,93 @@
1
+ """
2
+ Confidence scorer — Python port of src/agent/confidence-scorer.js.
3
+
4
+ Composite formula: confidence = method_prior * source_presence
5
+ * historical_accuracy * (1 - corner_proximity)
6
+
7
+ Identical to the JS scorer used inside KC, so release runs produce the same
8
+ confidence values KC produces in-workspace.
9
+
10
+ Note on rounding: JS Math.round() is half-up, Python's round() is half-to-even
11
+ (banker's rounding). We use a half-up implementation here to match JS exactly.
12
+ """
13
+
14
+ import math
15
+
16
+
17
+ def _round3_halfup(x):
18
+ """Round x to 3 decimals, half-up (matches JS Math.round)."""
19
+ return math.floor(x * 1000 + 0.5) / 1000
20
+
21
+
22
+ DEFAULT_PRIORS = {
23
+ "regex": 0.95,
24
+ "python": 0.90,
25
+ "llm": 0.75,
26
+ "ocr": 0.65,
27
+ "fallback": 0.50,
28
+ }
29
+
30
+
31
+ def score(rule_id, extracted_value, source_text="", method="llm",
32
+ document="", priors=None, historical=None, corner_cases=None):
33
+ """
34
+ Compute composite confidence score (0.0 - 1.0).
35
+
36
+ rule_id: rule identifier
37
+ extracted_value: the value the workflow extracted (string)
38
+ source_text: optional surrounding text from the document
39
+ method: "regex" | "python" | "llm" | "ocr" | "fallback"
40
+ document: document name / path (used for corner-case proximity)
41
+ priors: dict overriding DEFAULT_PRIORS
42
+ historical: dict of {rule_id: accuracy} from confidence_calibration.json
43
+ corner_cases: list/dict from corner_cases.json registry
44
+ """
45
+ p = priors or DEFAULT_PRIORS
46
+ method_prior = p.get(method, p.get("fallback", 0.50))
47
+
48
+ source_presence = 1.0
49
+ if source_text and extracted_value:
50
+ source_presence = 1.0 if str(extracted_value) in source_text else 0.7
51
+
52
+ hist = (historical or {}).get(rule_id, 0.8)
53
+
54
+ corner_proximity = _corner_proximity(corner_cases, document, rule_id)
55
+
56
+ confidence = method_prior * source_presence * hist * (1.0 - corner_proximity)
57
+ confidence = max(0.0, min(1.0, confidence))
58
+ return _round3_halfup(confidence)
59
+
60
+
61
+ def band(confidence):
62
+ """Classify confidence into low/medium/high band — matches JS getBand()."""
63
+ if confidence >= 0.8:
64
+ return "high"
65
+ if confidence >= 0.5:
66
+ return "medium"
67
+ return "low"
68
+
69
+
70
+ def _corner_proximity(corner_cases, document, rule_id):
71
+ """Mirror CornerCaseRegistry.match: count entries matching this doc + rule.
72
+ Each match adds 0.1 (capped at 0.3). Schema is intentionally loose — KC's
73
+ JS registry stores entries with optional `document_pattern` and `rule_id`
74
+ fields; we replicate the same matching semantics here.
75
+ """
76
+ if not corner_cases or not document:
77
+ return 0.0
78
+ entries = corner_cases if isinstance(corner_cases, list) else corner_cases.get("entries", [])
79
+ if not entries:
80
+ return 0.0
81
+
82
+ matches = 0
83
+ for e in entries:
84
+ if not isinstance(e, dict):
85
+ continue
86
+ if e.get("rule_id") and e.get("rule_id") != rule_id:
87
+ continue
88
+ pattern = e.get("document_pattern") or e.get("document") or ""
89
+ if pattern and pattern not in document:
90
+ continue
91
+ matches += 1
92
+
93
+ return min(0.3, 0.1 * matches)