kc-beta 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/LICENSE +81 -0
  2. package/LICENSE-COMMERCIAL.md +125 -0
  3. package/README.md +21 -3
  4. package/package.json +14 -5
  5. package/src/agent/context-window.js +9 -12
  6. package/src/agent/context.js +14 -1
  7. package/src/agent/document-parser.js +169 -0
  8. package/src/agent/engine.js +499 -20
  9. package/src/agent/history/event-history.js +222 -0
  10. package/src/agent/llm-client.js +55 -0
  11. package/src/agent/message-utils.js +63 -0
  12. package/src/agent/pipelines/_milestone-derive.js +511 -0
  13. package/src/agent/pipelines/base.js +21 -0
  14. package/src/agent/pipelines/distillation.js +28 -15
  15. package/src/agent/pipelines/extraction.js +103 -36
  16. package/src/agent/pipelines/finalization.js +178 -11
  17. package/src/agent/pipelines/index.js +6 -1
  18. package/src/agent/pipelines/initializer.js +74 -8
  19. package/src/agent/pipelines/production-qc.js +31 -44
  20. package/src/agent/pipelines/skill-authoring.js +152 -80
  21. package/src/agent/pipelines/skill-testing.js +67 -23
  22. package/src/agent/retry.js +10 -2
  23. package/src/agent/scheduler.js +14 -2
  24. package/src/agent/session-state.js +35 -2
  25. package/src/agent/skill-loader.js +13 -7
  26. package/src/agent/skill-validator.js +163 -0
  27. package/src/agent/task-manager.js +61 -5
  28. package/src/agent/tools/_workflow-result-schema.js +249 -0
  29. package/src/agent/tools/document-chunk.js +21 -9
  30. package/src/agent/tools/phase-advance.js +52 -6
  31. package/src/agent/tools/release.js +51 -9
  32. package/src/agent/tools/rule-catalog.js +11 -1
  33. package/src/agent/tools/workflow-run.js +9 -4
  34. package/src/agent/tools/workspace-file.js +32 -0
  35. package/src/agent/workspace.js +61 -0
  36. package/src/cli/components.js +64 -14
  37. package/src/cli/index.js +62 -3
  38. package/src/cli/meme.js +26 -25
  39. package/src/config.js +65 -22
  40. package/src/model-tiers.json +48 -0
  41. package/src/providers.js +87 -0
  42. package/template/release/v1/README.md.tmpl +108 -0
  43. package/template/release/v1/catalog.json.tmpl +4 -0
  44. package/template/release/v1/kc_runtime/__init__.py +11 -0
  45. package/template/release/v1/kc_runtime/confidence.py +63 -0
  46. package/template/release/v1/kc_runtime/doc_parser.py +127 -0
  47. package/template/release/v1/manifest.json.tmpl +11 -0
  48. package/template/release/v1/render_dashboard.py +117 -0
  49. package/template/release/v1/run.py +212 -0
  50. package/template/release/v1/serve.sh +17 -0
  51. package/template/skills/en/meta-meta/skill-authoring/SKILL.md +19 -0
  52. package/template/skills/en/meta-meta/work-decomposition/SKILL.md +266 -0
  53. package/template/skills/en/skill-creator/SKILL.md +1 -1
  54. package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +19 -0
  55. package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +264 -0
  56. package/template/skills/zh/skill-creator/SKILL.md +1 -1
package/src/config.js CHANGED
@@ -23,8 +23,20 @@ function loadGlobalConfig() {
23
23
  */
24
24
  function loadEnvFile(envPath) {
25
25
  if (!fs.existsSync(envPath)) return {};
26
+ // v0.7.0 H9: defend bootstrap against a .env that exists but isn't
27
+ // readable (permission denied, unexpected directory, encoding error,
28
+ // race with concurrent write). Old code threw and crashed config
29
+ // bootstrap before the CLI was even up — return empty {} on any
30
+ // read failure so the user sees the more actionable
31
+ // "no API key configured" error from loadSettings instead.
32
+ let raw;
33
+ try {
34
+ raw = fs.readFileSync(envPath, "utf-8");
35
+ } catch {
36
+ return {};
37
+ }
26
38
  const env = {};
27
- const lines = fs.readFileSync(envPath, "utf-8").split("\n");
39
+ const lines = raw.split("\n");
28
40
  for (const line of lines) {
29
41
  const trimmed = line.trim();
30
42
  if (!trimmed || trimmed.startsWith("#")) continue;
@@ -51,8 +63,13 @@ export function loadSettings(workspacePath) {
51
63
  const gc = loadGlobalConfig();
52
64
  const env = workspacePath ? loadEnvFile(path.join(workspacePath, ".env")) : {};
53
65
 
66
+ // Session-scoped overrides (process.env). Internal knob for benchmarking
67
+ // — lets a single launch swap conductor/workspace/context without touching
68
+ // ~/.kc_agent/config.json. Not exposed in --help or onboard.
69
+ const penv = process.env;
70
+
54
71
  // Resolve provider metadata for authType/apiFormat defaults
55
- const provider = gc.provider || "siliconflow";
72
+ const provider = penv.KC_PROVIDER || gc.provider || "siliconflow";
56
73
  const providerDef = getProviderById(provider);
57
74
 
58
75
  const settings = {
@@ -61,10 +78,10 @@ export function loadSettings(workspacePath) {
61
78
  authType: gc.auth_type || providerDef?.authType || "bearer",
62
79
  apiFormat: gc.api_format || providerDef?.apiFormat || "openai",
63
80
 
64
- // Conductor LLM (generic keys with legacy fallback)
65
- llmApiKey: env.LLM_API_KEY || env.SILICONFLOW_API_KEY || gc.api_key || "",
66
- llmBaseUrl: env.LLM_BASE_URL || env.SILICONFLOW_BASE_URL || gc.base_url || "https://api.siliconflow.cn/v1",
67
- kcModel: gc.conductor_model || "glm-5",
81
+ // Conductor LLM (process.env wins workspace .env → global config)
82
+ llmApiKey: penv.KC_LLM_API_KEY || env.LLM_API_KEY || env.SILICONFLOW_API_KEY || gc.api_key || "",
83
+ llmBaseUrl: penv.KC_LLM_BASE_URL || env.LLM_BASE_URL || env.SILICONFLOW_BASE_URL || gc.base_url || "https://api.siliconflow.cn/v1",
84
+ kcModel: penv.KC_CONDUCTOR_MODEL || gc.conductor_model || "glm-5",
68
85
  kcMaxTokens: parseInt(env.KC_MAX_TOKENS || gc.kc_max_tokens?.toString() || "65536", 10),
69
86
 
70
87
  // Tier models (from .env or global config tiers)
@@ -78,10 +95,10 @@ export function loadSettings(workspacePath) {
78
95
  vlmTier2: env.VLM_TIER2 || gc.vlm_tiers?.tier2 || "",
79
96
  vlmTier3: env.VLM_TIER3 || gc.vlm_tiers?.tier3 || "",
80
97
 
81
- // Worker LLM — optional, defaults to conductor config
82
- workerProvider: gc.worker_provider || "",
83
- workerApiKey: env.WORKER_API_KEY || gc.worker_api_key || "",
84
- workerBaseUrl: env.WORKER_BASE_URL || gc.worker_base_url || "",
98
+ // Worker LLM — optional, defaults to conductor config (process.env wins)
99
+ workerProvider: penv.KC_WORKER_PROVIDER || gc.worker_provider || "",
100
+ workerApiKey: penv.KC_WORKER_API_KEY || env.WORKER_API_KEY || gc.worker_api_key || "",
101
+ workerBaseUrl: penv.KC_WORKER_BASE_URL || env.WORKER_BASE_URL || gc.worker_base_url || "",
85
102
  workerAuthType: gc.worker_auth_type || "",
86
103
  workerApiFormat: gc.worker_api_format || "",
87
104
 
@@ -89,8 +106,8 @@ export function loadSettings(workspacePath) {
89
106
  mineruApiUrl: env.MINERU_API_URL || "",
90
107
  mineruApiKey: env.MINERU_API_KEY || "",
91
108
 
92
- // Workspace
93
- kcWorkspaceRoot: gc.workspace_root || path.join(os.homedir(), ".kc_agent", "workspaces"),
109
+ // Workspace (process.env wins — for parallel benchmark runs)
110
+ kcWorkspaceRoot: penv.KC_WORKSPACE_ROOT || gc.workspace_root || path.join(os.homedir(), ".kc_agent", "workspaces"),
94
111
  kcExecTimeout: parseInt(env.KC_EXEC_TIMEOUT || "30", 10),
95
112
 
96
113
  // Accuracy thresholds
@@ -110,16 +127,42 @@ export function loadSettings(workspacePath) {
110
127
  tavilyApiKey: env.TAVILY_API_KEY || gc.tavily_api_key || "",
111
128
 
112
129
  // Context management — A2: prefer per-provider cap from providers.js
113
- // over the generic 200000 default. KC_CONTEXT_LIMIT env still wins.
114
- // gc.kc_context_limit (global config) is next. Then provider.contextLimit.
115
- // Then a safe 200000 fallback for unknown/custom providers.
116
- kcContextLimit: parseInt(
117
- env.KC_CONTEXT_LIMIT ||
118
- gc.kc_context_limit?.toString() ||
119
- providerDef?.contextLimit?.toString() ||
120
- "200000",
121
- 10,
122
- ),
130
+ // over the generic 200000 default. process.env.KC_CONTEXT_LIMIT wins
131
+ // (session-scoped override for benchmarking long-context models without
132
+ // editing global config), then workspace .env, then global config, then
133
+ // provider.contextLimit, then a safe 200000 fallback.
134
+ //
135
+ // v0.7.0 E3 (#96): providerContextCap is the deployment hard ceiling
136
+ // (e.g., SiliconFlow's GLM-5.1 caps at 202_752 despite the model's
137
+ // native 1M). Effective contextLimit = min(user-requested,
138
+ // providerContextCap). E2E #5 GLM hit HTTP 413 because user set
139
+ // KC_CONTEXT_LIMIT=400000 but the deployment refused at ~203k.
140
+ // The cap is applied AFTER user-priority resolution so the user
141
+ // can't accidentally bypass it.
142
+ kcContextLimit: (() => {
143
+ const requested = parseInt(
144
+ penv.KC_CONTEXT_LIMIT ||
145
+ env.KC_CONTEXT_LIMIT ||
146
+ gc.kc_context_limit?.toString() ||
147
+ providerDef?.contextLimit?.toString() ||
148
+ "200000",
149
+ 10,
150
+ );
151
+ const cap = providerDef?.providerContextCap;
152
+ if (typeof cap === "number" && cap > 0 && requested > cap) {
153
+ // Surface a one-time warning so users notice the clamp without
154
+ // burying it in events.jsonl.
155
+ // eslint-disable-next-line no-console
156
+ console.warn(
157
+ `[config] KC_CONTEXT_LIMIT=${requested} clamped to ${cap} ` +
158
+ `(provider ${providerDef.id} hardCap). E2E #5 hit HTTP 413 at ` +
159
+ `~203k on SiliconFlow GLM-5.1; cap protects against deployment ` +
160
+ `hard-ceiling rejections.`,
161
+ );
162
+ return cap;
163
+ }
164
+ return requested;
165
+ })(),
123
166
  toolOutputOffloadTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_TOKENS || gc.tool_output_offload_tokens?.toString() || "2000", 10),
124
167
  toolOutputOffloadErrorTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_ERROR_TOKENS || gc.tool_output_offload_error_tokens?.toString() || "500", 10),
125
168
  maxMessageTokens: parseInt(env.MAX_MESSAGE_TOKENS || gc.max_message_tokens?.toString() || "60000", 10),
@@ -123,6 +123,54 @@
123
123
  }
124
124
  },
125
125
 
126
+ "deepseek": {
127
+ "_comment": "DeepSeek v4 family — flagship pro + cheap flash. Native 1M context but KC caps to 200K.",
128
+ "conductor": "deepseek-v4-pro",
129
+ "llm": {
130
+ "tier1": "deepseek-v4-pro",
131
+ "tier2": "deepseek-v4-pro",
132
+ "tier3": "deepseek-v4-flash",
133
+ "tier4": "deepseek-v4-flash"
134
+ },
135
+ "vlm": {
136
+ "tier1": "",
137
+ "tier2": "",
138
+ "tier3": ""
139
+ }
140
+ },
141
+
142
+ "tencent": {
143
+ "_comment": "Tencent Hunyuan via Lkeap plan endpoint. hy3-preview is the hidden flagship (not in /models listing but accepts requests). hunyuan-t1 is a thinking model — if used as conductor, ensure v0.6.3.1 reasoning_content roundtrip is in place.",
144
+ "conductor": "hy3-preview",
145
+ "llm": {
146
+ "tier1": "hy3-preview, hunyuan-t1",
147
+ "tier2": "hunyuan-turbos, hunyuan-2.0-thinking",
148
+ "tier3": "hunyuan-2.0-instruct, tc-code-latest",
149
+ "tier4": "tc-code-latest"
150
+ },
151
+ "vlm": {
152
+ "tier1": "",
153
+ "tier2": "",
154
+ "tier3": ""
155
+ }
156
+ },
157
+
158
+ "xiaomi": {
159
+ "_comment": "Xiaomi MiMo coding plan — flagship Pro + standard + multimodal Omni. Native 1M context but KC caps to 200K. TTS variants excluded (no KC use case). Endpoint normalizes IDs to lowercase — must match exactly.",
160
+ "conductor": "mimo-v2.5-pro",
161
+ "llm": {
162
+ "tier1": "mimo-v2.5-pro",
163
+ "tier2": "mimo-v2.5",
164
+ "tier3": "mimo-v2-pro",
165
+ "tier4": "mimo-v2-pro"
166
+ },
167
+ "vlm": {
168
+ "tier1": "mimo-v2-omni",
169
+ "tier2": "mimo-v2-omni",
170
+ "tier3": ""
171
+ }
172
+ },
173
+
126
174
  "openrouter": {
127
175
  "conductor": "anthropic/claude-sonnet-4-20250514",
128
176
  "llm": {
package/src/providers.js CHANGED
@@ -47,6 +47,14 @@ const PROVIDERS = [
47
47
  apiFormat: "openai",
48
48
  modelsEndpoint: "/models",
49
49
  contextLimit: 200000, // GLM-5.1, Kimi-K2.5 — 200K native
50
+ // v0.7.0 E3 (#96): provider hardCap. SiliconFlow's GLM-5.1
51
+ // deployment caps prompts at ~202,752 tokens despite the model's
52
+ // native 1M — E2E #5 GLM hit HTTP 413 at 203,363 tokens with
53
+ // KC_CONTEXT_LIMIT=400000 set. providerContextCap protects against
54
+ // user-set context limits exceeding the deployment hard ceiling.
55
+ // Effective limit becomes min(providerContextCap, modelContextLimit,
56
+ // KC_CONTEXT_LIMIT). When undefined, no provider cap applied.
57
+ providerContextCap: 200000,
50
58
  defaultModel: getTierConfig("siliconflow").conductor || "glm-5",
51
59
  defaultTiers: getTierConfig("siliconflow").llm,
52
60
  defaultVlm: getTierConfig("siliconflow").vlm,
@@ -211,6 +219,85 @@ const PROVIDERS = [
211
219
  zh: "MiniMax",
212
220
  },
213
221
  },
222
+ {
223
+ id: "deepseek",
224
+ name: "DeepSeek",
225
+ baseUrl: "https://api.deepseek.com",
226
+ authType: "bearer",
227
+ apiFormat: "openai",
228
+ modelsEndpoint: "/models",
229
+ contextLimit: 200000, // KC cap — DeepSeek v4 is native 1M; we cap to 200K
230
+ defaultModel: getTierConfig("deepseek").conductor || "deepseek-v4-pro",
231
+ defaultTiers: getTierConfig("deepseek").llm,
232
+ defaultVlm: getTierConfig("deepseek").vlm,
233
+ curatedModels: [
234
+ { id: "deepseek-v4-pro", ownedBy: "deepseek" },
235
+ { id: "deepseek-v4-flash", ownedBy: "deepseek" },
236
+ ],
237
+ labels: {
238
+ en: "DeepSeek (v4 family)",
239
+ zh: "DeepSeek(v4 系列)",
240
+ },
241
+ },
242
+ {
243
+ id: "xiaomi",
244
+ name: "Xiaomi MiMo",
245
+ baseUrl: "https://token-plan-cn.xiaomimimo.com/v1",
246
+ authType: "bearer",
247
+ apiFormat: "openai",
248
+ modelsEndpoint: null, // Xiaomi coding-plan endpoint, no /models — use curated list
249
+ supportsCodingPlanKey: true,
250
+ contextLimit: 200000, // KC cap — MiMo V2.5 is native 1M
251
+ defaultModel: getTierConfig("xiaomi").conductor || "MiMo-V2.5-Pro",
252
+ defaultTiers: getTierConfig("xiaomi").llm,
253
+ defaultVlm: getTierConfig("xiaomi").vlm,
254
+ curatedModels: [
255
+ { id: "MiMo-V2.5-Pro", ownedBy: "xiaomi" },
256
+ { id: "MiMo-V2.5", ownedBy: "xiaomi" },
257
+ { id: "MiMo-V2-Pro", ownedBy: "xiaomi" },
258
+ { id: "MiMo-V2-Omni", ownedBy: "xiaomi" }, // multimodal
259
+ // TTS variants (MiMo-V2.5-TTS, *-VoiceClone, *-VoiceDesign, MiMo-V2-TTS)
260
+ // intentionally excluded — KC has no TTS use case.
261
+ ],
262
+ labels: {
263
+ en: "Xiaomi MiMo (V2.5 family, coding plan)",
264
+ zh: "小米 MiMo(V2.5 系列,编程计划)",
265
+ },
266
+ },
267
+ {
268
+ // Tencent Hunyuan via the Lkeap "plan" coding-token endpoint. The /models
269
+ // endpoint exposes a multi-vendor menu (glm-5.x, kimi-k2.5, minimax,
270
+ // hunyuan-*, tc-code-latest); hy3-preview is a hidden flagship that
271
+ // accepts requests but doesn't appear in /models. Curated list reflects
272
+ // what was advertised + the preview model the user has access to.
273
+ id: "tencent",
274
+ name: "Tencent Hunyuan",
275
+ baseUrl: "https://api.lkeap.cloud.tencent.com/plan/v3",
276
+ authType: "bearer",
277
+ apiFormat: "openai",
278
+ modelsEndpoint: "/models",
279
+ supportsCodingPlanKey: true,
280
+ contextLimit: 200000, // hy3-preview is officially 256K; keep below cap with margin
281
+ defaultModel: getTierConfig("tencent").conductor || "hy3-preview",
282
+ defaultTiers: getTierConfig("tencent").llm,
283
+ defaultVlm: getTierConfig("tencent").vlm,
284
+ curatedModels: [
285
+ { id: "hy3-preview", ownedBy: "tencent" }, // hidden flagship
286
+ { id: "hunyuan-t1", ownedBy: "tencent" }, // thinking model
287
+ { id: "hunyuan-turbos", ownedBy: "tencent" },
288
+ { id: "hunyuan-2.0-thinking", ownedBy: "tencent" },
289
+ { id: "hunyuan-2.0-instruct", ownedBy: "tencent" },
290
+ { id: "tc-code-latest", ownedBy: "tencent" },
291
+ // Multi-vendor pass-throughs on the same plan key:
292
+ { id: "glm-5.1", ownedBy: "system" },
293
+ { id: "kimi-k2.5", ownedBy: "system" },
294
+ { id: "minimax-m2.7", ownedBy: "system" },
295
+ ],
296
+ labels: {
297
+ en: "Tencent Hunyuan (Lkeap plan)",
298
+ zh: "腾讯混元(Lkeap 编程计划)",
299
+ },
300
+ },
214
301
  {
215
302
  id: "openrouter",
216
303
  name: "OpenRouter",
@@ -0,0 +1,108 @@
1
+ # KC Verification Release — v1
2
+
3
+ This bundle is a self-contained verification system produced by KC's
4
+ finalization phase. It runs without KC's CLI installed.
5
+
6
+ ## Project
7
+
8
+ - **Generated by**: KC v{{kc_version}}
9
+ - **Session**: `{{session_id}}`
10
+ - **Generated at**: {{generated_at}}
11
+ - **Rules**: {{rule_count}}
12
+ - **Workflows**: {{workflow_count}}
13
+
14
+ ## What this does
15
+
16
+ {{project_description}}
17
+
18
+ ## How to run
19
+
20
+ ### Prerequisites
21
+
22
+ ```
23
+ python3 >= 3.9
24
+ # Optional native parsers (recommended; falls back to LibreOffice if missing):
25
+ pip install pypdf python-docx
26
+ ```
27
+
28
+ ### Single-document smoke test
29
+
30
+ ```bash
31
+ python3 run.py --doc /path/to/document.pdf
32
+ ```
33
+
34
+ ### Full batch
35
+
36
+ ```bash
37
+ python3 run.py /path/to/input_dir/
38
+ # results land in output/results/<doc_stem>.json
39
+ # summary in output/results/summary.json
40
+ ```
41
+
42
+ ### Filter by rule
43
+
44
+ ```bash
45
+ python3 run.py /path/to/input_dir/ --rules R001,R005,R012
46
+ ```
47
+
48
+ ### Render dashboard
49
+
50
+ ```bash
51
+ python3 render_dashboard.py output/results/ > dashboard.html
52
+ ./serve.sh # http://localhost:8765/dashboard.html
53
+ ```
54
+
55
+ ## Layout
56
+
57
+ ```
58
+ release/v1/
59
+ ├── run.py # entry point
60
+ ├── render_dashboard.py # HTML dashboard renderer
61
+ ├── serve.sh # local http server shim
62
+ ├── manifest.json # populated bundle manifest
63
+ ├── catalog.json # populated rule catalog
64
+ ├── confidence_calibration.json # historical accuracy per rule (for confidence calibration)
65
+ ├── README.md # this file
66
+ ├── kc_runtime/
67
+ │ ├── __init__.py
68
+ │ ├── doc_parser.py # PDF/DOCX/TXT → text
69
+ │ └── confidence.py # calibration helpers
70
+ └── workflows/
71
+ └── <rule_id>/workflow_v1.py
72
+ ```
73
+
74
+ ## Workflow contract
75
+
76
+ Each `workflows/<rule_id>/workflow_v1.py` is a standalone Python script:
77
+
78
+ - Takes a document path on `sys.argv[1]`
79
+ - Emits a single JSON line on stdout containing the verdict
80
+ - Exit code 0 on success, non-zero on workflow-internal error
81
+
82
+ Verdict shape:
83
+
84
+ ```json
85
+ {
86
+ "rule_id": "R001",
87
+ "verdict": "PASS|FAIL|PARTIAL|NOT_APPLICABLE|UNDETERMINED|ERROR",
88
+ "confidence": 0.0,
89
+ "reason": "human-readable explanation",
90
+ "evidence": ["snippet 1", "snippet 2"]
91
+ }
92
+ ```
93
+
94
+ ## Known limitations
95
+
96
+ {{known_limitations}}
97
+
98
+ ## License
99
+
100
+ This bundle is licensed under the same terms as KC itself
101
+ (PolyForm Noncommercial 1.0.0). For commercial use, see KC's
102
+ LICENSE-COMMERCIAL.md.
103
+
104
+ ---
105
+
106
+ *Re-running this bundle on a new document set is the recommended path.
107
+ For methodology changes (new rules, threshold tuning), re-run KC's
108
+ distillation + production_qc phases and re-emit a fresh release.*
@@ -0,0 +1,4 @@
1
+ [
2
+ /* Populated by KC finalization from rules/catalog.json. Each entry: */
3
+ /* { "id": "R001", "title": "...", "description": "...", "source_ref": "..." } */
4
+ ]
@@ -0,0 +1,11 @@
1
+ """KC release runtime — v1.
2
+
3
+ Minimal Python helpers used by run.py to dispatch verification
4
+ workflows. Designed to be drop-in self-contained: stdlib + a handful
5
+ of optional native parsers (pypdf, python-docx) for document
6
+ parsing. Falls back to plaintext + LibreOffice CLI if natives
7
+ unavailable — never crashes the run on a missing dep.
8
+ """
9
+
10
+ __version__ = "1.0.0"
11
+ __all__ = ["doc_parser", "confidence"]
@@ -0,0 +1,63 @@
1
+ """
2
+ Confidence calibration helpers for the release runtime.
3
+
4
+ Workflows return raw verdicts with a self-reported confidence score.
5
+ This module re-weights that score against the historical accuracy
6
+ captured during KC's distillation phase, so users see calibrated
7
+ confidence rather than the agent's prior. Falls back to identity
8
+ when no calibration data is available.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+
14
+ def calibrate(verdict: dict, historical: dict) -> dict:
15
+ """
16
+ Adjust verdict["confidence"] using historical accuracy for the rule.
17
+
18
+ Schema for `historical`:
19
+ {
20
+ "historical_accuracy": {
21
+ "<rule_id>": {"accuracy": float in [0, 1], "n_samples": int},
22
+ ...
23
+ }
24
+ }
25
+
26
+ If the rule has no calibration data, the verdict is returned
27
+ unchanged. If the rule's accuracy is < 0.5 (worse than coin flip),
28
+ confidence is dampened by the calibration ratio. If accuracy is
29
+ high but n_samples is small, calibration trusts the raw score
30
+ more (avoid over-correcting on weak prior).
31
+ """
32
+ rule_id = verdict.get("rule_id")
33
+ if not rule_id:
34
+ return verdict
35
+
36
+ hist = historical.get("historical_accuracy", {}).get(rule_id)
37
+ if not hist:
38
+ return verdict
39
+
40
+ accuracy = float(hist.get("accuracy", 1.0))
41
+ n_samples = int(hist.get("n_samples", 0))
42
+
43
+ raw = float(verdict.get("confidence", 0.5))
44
+
45
+ # Bayesian-ish blend: weight raw confidence vs accuracy by n_samples.
46
+ # Small n → trust the raw score; large n → trust the prior more.
47
+ weight = min(0.5, n_samples / 100.0)
48
+ calibrated = raw * (1 - weight) + raw * accuracy * weight
49
+
50
+ out = dict(verdict)
51
+ out["confidence"] = round(calibrated, 4)
52
+ out["confidence_raw"] = raw
53
+ out["confidence_calibrated"] = True
54
+ return out
55
+
56
+
57
+ def confidence_band(score: float) -> str:
58
+ """Map numeric score to a verbal band: high / medium / low."""
59
+ if score >= 0.8:
60
+ return "high"
61
+ if score >= 0.5:
62
+ return "medium"
63
+ return "low"
@@ -0,0 +1,127 @@
1
+ """
2
+ Minimal document parser for the release runtime.
3
+
4
+ Strategy: try native Python parsers first (pypdf, python-docx),
5
+ fall back to LibreOffice CLI if natives unavailable AND lo is on
6
+ PATH, finally fall back to UTF-8 plaintext read. Each strategy
7
+ records what it tried via the result dict so workflows can decide
8
+ whether to trust the text.
9
+
10
+ This is a release-time helper — KC's CLI mode uses its own document
11
+ parsing pipeline (src/agent/document-parser.js + LibreOffice).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import shutil
18
+ import subprocess
19
+ from pathlib import Path
20
+
21
+
22
+ def preflight(doc: Path) -> dict:
23
+ """
24
+ Verify a document is parseable; return a small status dict.
25
+ Lets workflows skip cleanly when the parse will fail rather than
26
+ burning a worker-LLM call on an unreadable file.
27
+ """
28
+ if not doc.exists():
29
+ return {"ok": False, "reason": "not_found", "path": str(doc)}
30
+ if not doc.is_file():
31
+ return {"ok": False, "reason": "not_file", "path": str(doc)}
32
+ if doc.stat().st_size == 0:
33
+ return {"ok": False, "reason": "empty", "path": str(doc)}
34
+ return {"ok": True, "path": str(doc), "size_bytes": doc.stat().st_size}
35
+
36
+
37
+ def extract_text(doc: Path) -> dict:
38
+ """
39
+ Pull text out of a document. Returns:
40
+ { "text": "...", "via": "<strategy>", "ok": bool, "error"?: str }
41
+ Strategies tried in order:
42
+ 1. Suffix-specific native parser (pypdf for .pdf, python-docx for .docx)
43
+ 2. LibreOffice CLI (`soffice --headless --convert-to txt`) if on PATH
44
+ 3. UTF-8 plaintext (.txt, .md, or any file with text-like bytes)
45
+ """
46
+ suffix = doc.suffix.lower()
47
+
48
+ if suffix == ".pdf":
49
+ text = _try_pypdf(doc)
50
+ if text is not None:
51
+ return {"text": text, "via": "pypdf", "ok": True}
52
+
53
+ if suffix in (".docx",):
54
+ text = _try_python_docx(doc)
55
+ if text is not None:
56
+ return {"text": text, "via": "python-docx", "ok": True}
57
+
58
+ # LibreOffice fallback for anything we couldn't parse natively
59
+ if suffix in (".pdf", ".doc", ".docx", ".odt", ".rtf"):
60
+ text = _try_libreoffice(doc)
61
+ if text is not None:
62
+ return {"text": text, "via": "libreoffice", "ok": True}
63
+
64
+ # Plaintext fallback (covers .txt, .md, .csv, .json, etc.)
65
+ try:
66
+ text = doc.read_text(encoding="utf-8")
67
+ return {"text": text, "via": "plaintext_utf8", "ok": True}
68
+ except UnicodeDecodeError:
69
+ try:
70
+ text = doc.read_text(encoding="gbk") # common in Chinese corpora
71
+ return {"text": text, "via": "plaintext_gbk", "ok": True}
72
+ except Exception as exc:
73
+ return {"text": "", "via": "none", "ok": False, "error": str(exc)}
74
+
75
+
76
+ # --- internals ---
77
+
78
+
79
+ def _try_pypdf(doc: Path):
80
+ try:
81
+ import pypdf # type: ignore
82
+ except ImportError:
83
+ return None
84
+ try:
85
+ reader = pypdf.PdfReader(str(doc))
86
+ return "\n".join(page.extract_text() or "" for page in reader.pages)
87
+ except Exception:
88
+ return None
89
+
90
+
91
+ def _try_python_docx(doc: Path):
92
+ try:
93
+ import docx # python-docx
94
+ except ImportError:
95
+ return None
96
+ try:
97
+ d = docx.Document(str(doc))
98
+ parts = [p.text for p in d.paragraphs]
99
+ for table in d.tables:
100
+ for row in table.rows:
101
+ for cell in row.cells:
102
+ if cell.text:
103
+ parts.append(cell.text)
104
+ return "\n".join(parts)
105
+ except Exception:
106
+ return None
107
+
108
+
109
+ def _try_libreoffice(doc: Path):
110
+ soffice = shutil.which("soffice") or shutil.which("libreoffice")
111
+ if not soffice:
112
+ return None
113
+ out_dir = doc.parent / ".kc-lo-out"
114
+ out_dir.mkdir(exist_ok=True)
115
+ try:
116
+ subprocess.run(
117
+ [soffice, "--headless", "--convert-to", "txt", "--outdir", str(out_dir), str(doc)],
118
+ capture_output=True,
119
+ timeout=60,
120
+ check=True,
121
+ )
122
+ txt_path = out_dir / (doc.stem + ".txt")
123
+ if txt_path.exists():
124
+ return txt_path.read_text(encoding="utf-8")
125
+ except Exception:
126
+ return None
127
+ return None
@@ -0,0 +1,11 @@
1
+ {
2
+ "release_version": "v1",
3
+ "kc_version": "{{kc_version}}",
4
+ "generated_at": "{{generated_at}}",
5
+ "session_id": "{{session_id}}",
6
+ "rules_count": {{rule_count}},
7
+ "workflows_count": {{workflow_count}},
8
+ "workflows": {},
9
+ "calibration_source": "confidence_calibration.json",
10
+ "documentation": "README.md"
11
+ }