kc-beta 0.3.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/confidence-scorer.js +8 -0
- package/src/agent/context-window.js +7 -2
- package/src/agent/context.js +25 -0
- package/src/agent/corner-case-registry.js +5 -0
- package/src/agent/engine.js +564 -76
- package/src/agent/event-log.js +15 -2
- package/src/agent/history.js +91 -23
- package/src/agent/pipelines/initializer.js +3 -6
- package/src/agent/retry.js +9 -1
- package/src/agent/rule-catalog-normalize.js +37 -0
- package/src/agent/scheduler.js +276 -0
- package/src/agent/session-state.js +11 -2
- package/src/agent/task-manager.js +5 -0
- package/src/agent/tools/agent-tool.js +57 -14
- package/src/agent/tools/archive-file.js +94 -0
- package/src/agent/tools/copy-to-workspace.js +140 -0
- package/src/agent/tools/phase-advance.js +60 -0
- package/src/agent/tools/release.js +323 -0
- package/src/agent/tools/rule-catalog.js +56 -4
- package/src/agent/tools/schedule-fetch.js +118 -0
- package/src/agent/tools/snapshot.js +101 -0
- package/src/agent/tools/workspace-file.js +10 -7
- package/src/agent/version-manager.js +29 -120
- package/src/agent/workspace.js +127 -4
- package/src/cli/components.js +68 -12
- package/src/cli/index.js +147 -15
- package/src/config.js +10 -1
- package/src/model-tiers.json +5 -5
- package/template/release-runtime/README.md.tmpl +84 -0
- package/template/release-runtime/kc_runtime/__init__.py +2 -0
- package/template/release-runtime/kc_runtime/confidence.py +93 -0
- package/template/release-runtime/kc_runtime/dashboard.py +208 -0
- package/template/release-runtime/render_dashboard.py +49 -0
- package/template/release-runtime/run.py +230 -0
- package/template/release-runtime/serve.sh +15 -0
- package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +11 -0
- package/template/skills/en/meta-meta/quality-control/SKILL.md +13 -1
- package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +8 -0
- package/template/skills/en/meta-meta/task-decomposition/SKILL.md +13 -0
- package/template/skills/en/meta-meta/version-control/SKILL.md +13 -0
- package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +11 -0
- package/template/skills/zh/meta-meta/quality-control/SKILL.md +12 -0
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +8 -0
- package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +16 -0
- package/template/skills/zh/meta-meta/version-control/SKILL.md +13 -0
- package/template/workspace.gitignore +22 -0
package/src/cli/index.js
CHANGED
|
@@ -2,9 +2,10 @@ import React, { useState, useEffect, useCallback, useRef } from "react";
|
|
|
2
2
|
import { render, Box, Text, useApp, useInput } from "ink";
|
|
3
3
|
import { loadSettings } from "../config.js";
|
|
4
4
|
import { LLMClient } from "../agent/llm-client.js";
|
|
5
|
-
import { AgentEngine } from "../agent/engine.js";
|
|
5
|
+
import { AgentEngine, NEXT_PHASE } from "../agent/engine.js";
|
|
6
6
|
import { Workspace } from "../agent/workspace.js";
|
|
7
7
|
import { ConversationHistory } from "../agent/history.js";
|
|
8
|
+
import { Scheduler } from "../agent/scheduler.js";
|
|
8
9
|
import {
|
|
9
10
|
WelcomeBanner,
|
|
10
11
|
StatusBar,
|
|
@@ -17,6 +18,18 @@ import {
|
|
|
17
18
|
|
|
18
19
|
const h = React.createElement;
|
|
19
20
|
|
|
21
|
+
// Only the last N messages stay in the Ink render tree. Older messages
|
|
22
|
+
// remain in React state (so /compact can summarize them) but aren't
|
|
23
|
+
// diffed on every keystroke — this is what keeps long sessions responsive
|
|
24
|
+
// and prevents the 4 GB heap OOM observed in the v0.5.3 E2E test.
|
|
25
|
+
// Full conversation is persisted to logs/events.jsonl on every event,
|
|
26
|
+
// so dropping from render is purely visual.
|
|
27
|
+
const VISIBLE_WINDOW = 50;
|
|
28
|
+
|
|
29
|
+
// How many recent messages render their ToolBlock with full preview.
|
|
30
|
+
// Older ToolBlocks show header only. Both still persist full output to disk.
|
|
31
|
+
const RECENT_TOOL_WINDOW = 10;
|
|
32
|
+
|
|
20
33
|
/**
|
|
21
34
|
* Main KC Agent CLI App using Ink (React for terminals).
|
|
22
35
|
*/
|
|
@@ -158,6 +171,8 @@ function App({ engine, config }) {
|
|
|
158
171
|
" /help Show this help\n" +
|
|
159
172
|
" /status Show session info, model, phase, workspace\n" +
|
|
160
173
|
" /tasks Show task progress\n" +
|
|
174
|
+
" /phase [sub] advance | status | <name> — manual phase override\n" +
|
|
175
|
+
" /schedule Show scheduled ingestion jobs and recent log lines\n" +
|
|
161
176
|
" /clear Clear conversation history (keep workspace)\n" +
|
|
162
177
|
" /compact Summarize older messages to reduce context\n" +
|
|
163
178
|
" /sessions List all sessions\n" +
|
|
@@ -193,6 +208,79 @@ function App({ engine, config }) {
|
|
|
193
208
|
});
|
|
194
209
|
return true;
|
|
195
210
|
|
|
211
|
+
case "/phase": {
|
|
212
|
+
// User-driven phase override. Useful when auto-advance fails to fire
|
|
213
|
+
// or when debugging. Subcommands:
|
|
214
|
+
// /phase → current phase (alias: /phase status)
|
|
215
|
+
// /phase advance | next → move to NEXT_PHASE[current]
|
|
216
|
+
// /phase <name> → force-jump to any phase (forward or back)
|
|
217
|
+
const engine = engineRef.current;
|
|
218
|
+
const sub = (parts[1] || "").toLowerCase();
|
|
219
|
+
|
|
220
|
+
if (!sub || sub === "status") {
|
|
221
|
+
const next = NEXT_PHASE[engine.currentPhase];
|
|
222
|
+
addMessage({
|
|
223
|
+
role: "system",
|
|
224
|
+
content:
|
|
225
|
+
`Current phase: ${engine.currentPhase.toUpperCase()}` +
|
|
226
|
+
(next ? ` (next auto: ${next})` : " (final phase)"),
|
|
227
|
+
});
|
|
228
|
+
return true;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (sub === "advance" || sub === "next") {
|
|
232
|
+
const next = NEXT_PHASE[engine.currentPhase];
|
|
233
|
+
if (!next) {
|
|
234
|
+
addMessage({ role: "system", content: `Already in final phase (${engine.currentPhase}).` });
|
|
235
|
+
return true;
|
|
236
|
+
}
|
|
237
|
+
const ok = engine._advancePhase(next, "manual /phase advance");
|
|
238
|
+
addMessage({
|
|
239
|
+
role: "system",
|
|
240
|
+
content: ok
|
|
241
|
+
? `→ phase advanced to ${next.toUpperCase()}.`
|
|
242
|
+
: `Failed to advance from ${engine.currentPhase}.`,
|
|
243
|
+
});
|
|
244
|
+
updateContextStats();
|
|
245
|
+
return true;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// /phase <name> — force-jump. Uses {force:true} to allow backward jumps.
|
|
249
|
+
const ok = engine._advancePhase(sub, "manual /phase <name>", { force: true });
|
|
250
|
+
addMessage({
|
|
251
|
+
role: "system",
|
|
252
|
+
content: ok
|
|
253
|
+
? `→ phase set to ${sub.toUpperCase()}.`
|
|
254
|
+
: `Unknown phase: ${sub}. Valid: bootstrap, extraction, skill_authoring, skill_testing, distillation, production_qc`,
|
|
255
|
+
});
|
|
256
|
+
updateContextStats();
|
|
257
|
+
return true;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
case "/schedule": {
|
|
261
|
+
const sched = new Scheduler(engineRef.current.workspace);
|
|
262
|
+
const jobs = sched.list();
|
|
263
|
+
if (jobs.length === 0) {
|
|
264
|
+
addMessage({ role: "system", content: "No scheduled ingestion jobs. Ask KC to set one up via the schedule_fetch tool." });
|
|
265
|
+
} else {
|
|
266
|
+
const lines = jobs.map((j) => {
|
|
267
|
+
const status = j.enabled ? "✓ enabled" : "· disabled";
|
|
268
|
+
const hint = j.cron_hint ? ` cron: ${j.cron_hint}` : " cron: (not set)";
|
|
269
|
+
return ` ${status} ${j.id}\n${hint}\n cmd: ${j.command}`;
|
|
270
|
+
});
|
|
271
|
+
const tail = sched.tailLog(8);
|
|
272
|
+
const pending = sched.pendingInputCount();
|
|
273
|
+
addMessage({
|
|
274
|
+
role: "system",
|
|
275
|
+
content:
|
|
276
|
+
`Scheduled jobs:\n${lines.join("\n\n")}\n\n` +
|
|
277
|
+
`Pending in input/: ${pending} file(s)` +
|
|
278
|
+
(tail ? `\n\nlogs/ingest.log (last 8):\n${tail}` : ""),
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
return true;
|
|
282
|
+
}
|
|
283
|
+
|
|
196
284
|
case "/clear":
|
|
197
285
|
engineRef.current.history = new ConversationHistory(engineRef.current.workspace.cwd);
|
|
198
286
|
setMessages([]);
|
|
@@ -202,15 +290,22 @@ function App({ engine, config }) {
|
|
|
202
290
|
|
|
203
291
|
case "/compact": {
|
|
204
292
|
addMessage({ role: "system", content: "Compacting conversation history..." });
|
|
205
|
-
// Run compact asynchronously
|
|
206
293
|
(async () => {
|
|
207
294
|
try {
|
|
208
295
|
const result = await engineRef.current.compact();
|
|
209
296
|
if (result) {
|
|
210
|
-
|
|
297
|
+
// Claude Code pattern: after successful compact, clear the
|
|
298
|
+
// visible TUI messages and start fresh with a single summary
|
|
299
|
+
// line. The underlying engine.history already contains the
|
|
300
|
+
// compact-summary message pair; the TUI doesn't need to keep
|
|
301
|
+
// showing the pre-compact history (it's on disk in
|
|
302
|
+
// logs/events.jsonl anyway) and clearing it immediately frees
|
|
303
|
+
// Ink render-tree memory — fixing the lag that builds up over
|
|
304
|
+
// long sessions.
|
|
305
|
+
setMessages([{
|
|
211
306
|
role: "system",
|
|
212
|
-
content:
|
|
213
|
-
});
|
|
307
|
+
content: `✓ 上下文已压缩:合并了 ${result.removedCount} 条早期消息(摘要约 ${result.summaryTokens} tokens,保留最近 ${result.retainedCount} 条)`,
|
|
308
|
+
}]);
|
|
214
309
|
} else {
|
|
215
310
|
addMessage({ role: "system", content: "Nothing to compact (conversation is short enough)." });
|
|
216
311
|
}
|
|
@@ -227,9 +322,22 @@ function App({ engine, config }) {
|
|
|
227
322
|
addMessage({ role: "system", content: "Usage: /rename <new_name>" });
|
|
228
323
|
} else {
|
|
229
324
|
try {
|
|
230
|
-
const
|
|
231
|
-
setSessionId(
|
|
232
|
-
|
|
325
|
+
const r = engineRef.current.renameSession(arg);
|
|
326
|
+
setSessionId(r.sessionId);
|
|
327
|
+
const lines = [`Session renamed to: ${r.sessionId}`];
|
|
328
|
+
if (r.scheduleWrappersRegenerated.length > 0) {
|
|
329
|
+
lines.push(
|
|
330
|
+
`${r.scheduleWrappersRegenerated.length} cron wrapper script(s) regenerated.`,
|
|
331
|
+
`If you'd installed crontab lines for the OLD path, re-install via 'schedule_fetch print_crontab'.`,
|
|
332
|
+
);
|
|
333
|
+
}
|
|
334
|
+
if (r.scheduleWrappersFailed && r.scheduleWrappersFailed.length > 0) {
|
|
335
|
+
const ids = r.scheduleWrappersFailed.map((f) => f.id).join(", ");
|
|
336
|
+
lines.push(
|
|
337
|
+
`⚠ ${r.scheduleWrappersFailed.length} wrapper script(s) failed to regenerate (${ids}). Check workspace/scripts/ingest/ and disk space.`,
|
|
338
|
+
);
|
|
339
|
+
}
|
|
340
|
+
addMessage({ role: "system", content: lines.join("\n") });
|
|
233
341
|
} catch (err) {
|
|
234
342
|
addMessage({ role: "system", content: `Rename failed: ${err.message}` });
|
|
235
343
|
}
|
|
@@ -342,36 +450,52 @@ function App({ engine, config }) {
|
|
|
342
450
|
|
|
343
451
|
return h(Box, { flexDirection: "column" },
|
|
344
452
|
// Welcome banner
|
|
345
|
-
showWelcome ? h(WelcomeBanner, {
|
|
453
|
+
showWelcome ? h(WelcomeBanner, {
|
|
454
|
+
projectDir: config.projectDir,
|
|
455
|
+
pendingInputCount: (() => {
|
|
456
|
+
try { return new Scheduler(engineRef.current.workspace).pendingInputCount(); }
|
|
457
|
+
catch { return 0; }
|
|
458
|
+
})(),
|
|
459
|
+
}) : null,
|
|
346
460
|
|
|
347
461
|
// Task dashboard (ralph-loop)
|
|
348
462
|
taskList.length > 0 ? h(TaskDashboard, { tasks: taskList, progress: taskProgress }) : null,
|
|
349
463
|
|
|
350
|
-
// Message history
|
|
351
|
-
|
|
464
|
+
// Message history (virtualized — only last VISIBLE_WINDOW render).
|
|
465
|
+
// Hidden-count hint for earlier messages, so users know the full
|
|
466
|
+
// history still exists (on disk) even though the TUI is slim.
|
|
467
|
+
messages.length > VISIBLE_WINDOW ? h(Box, { key: "hidden-hint" },
|
|
468
|
+
h(Text, { dimColor: true },
|
|
469
|
+
`— 前 ${messages.length - VISIBLE_WINDOW} 条消息已折叠,完整记录在 logs/events.jsonl —`),
|
|
470
|
+
) : null,
|
|
471
|
+
...messages.slice(-VISIBLE_WINDOW).map((msg, i, arr) => {
|
|
472
|
+
// Global index (for stable React keys) vs visible index (for isRecent).
|
|
473
|
+
const globalIdx = messages.length - arr.length + i;
|
|
474
|
+
const visibleIdx = arr.length - 1 - i; // 0 = most recent
|
|
352
475
|
if (msg.role === "user") {
|
|
353
|
-
return h(Box, { key: `msg-${
|
|
476
|
+
return h(Box, { key: `msg-${globalIdx}` },
|
|
354
477
|
h(Text, { dimColor: true }, "❯ "),
|
|
355
478
|
h(Text, null, msg.content),
|
|
356
479
|
);
|
|
357
480
|
}
|
|
358
481
|
if (msg.role === "agent") {
|
|
359
|
-
return h(Box, { key: `msg-${
|
|
482
|
+
return h(Box, { key: `msg-${globalIdx}` },
|
|
360
483
|
h(Text, null, msg.content),
|
|
361
484
|
);
|
|
362
485
|
}
|
|
363
486
|
if (msg.role === "tool") {
|
|
364
487
|
return h(ToolBlock, {
|
|
365
|
-
key: `msg-${
|
|
488
|
+
key: `msg-${globalIdx}`,
|
|
366
489
|
name: msg.toolName,
|
|
367
490
|
input: msg.toolInput,
|
|
368
491
|
output: msg.toolOutput,
|
|
369
492
|
isError: msg.toolIsError,
|
|
370
493
|
isRunning: false,
|
|
494
|
+
isRecent: visibleIdx < RECENT_TOOL_WINDOW,
|
|
371
495
|
});
|
|
372
496
|
}
|
|
373
497
|
if (msg.role === "system") {
|
|
374
|
-
return h(Box, { key: `msg-${
|
|
498
|
+
return h(Box, { key: `msg-${globalIdx}` },
|
|
375
499
|
h(Text, { dimColor: true }, msg.content),
|
|
376
500
|
);
|
|
377
501
|
}
|
|
@@ -436,6 +560,14 @@ export async function main({ languageOverride } = {}) {
|
|
|
436
560
|
console.log(`\x1b[33m${msg}\x1b[0m\n`);
|
|
437
561
|
}
|
|
438
562
|
|
|
563
|
+
// Warn if git is missing — Block 11 file system relies on git for version history.
|
|
564
|
+
if (config.gitAutoCommit !== false && !Workspace.isGitInstalled()) {
|
|
565
|
+
const msg = config.language === "zh"
|
|
566
|
+
? " ⚠ 未检测到 git。本会话将不记录版本历史。安装 git 以启用自动提交。"
|
|
567
|
+
: " ⚠ git not found — version history disabled this session. Install git to enable auto-commit.";
|
|
568
|
+
console.log(`\x1b[33m${msg}\x1b[0m\n`);
|
|
569
|
+
}
|
|
570
|
+
|
|
439
571
|
const client = new LLMClient({
|
|
440
572
|
apiKey: config.llmApiKey,
|
|
441
573
|
baseUrl: config.llmBaseUrl,
|
package/src/config.js
CHANGED
|
@@ -65,7 +65,7 @@ export function loadSettings(workspacePath) {
|
|
|
65
65
|
llmApiKey: env.LLM_API_KEY || env.SILICONFLOW_API_KEY || gc.api_key || "",
|
|
66
66
|
llmBaseUrl: env.LLM_BASE_URL || env.SILICONFLOW_BASE_URL || gc.base_url || "https://api.siliconflow.cn/v1",
|
|
67
67
|
kcModel: gc.conductor_model || "glm-5",
|
|
68
|
-
kcMaxTokens: 65536,
|
|
68
|
+
kcMaxTokens: parseInt(env.KC_MAX_TOKENS || gc.kc_max_tokens?.toString() || "65536", 10),
|
|
69
69
|
|
|
70
70
|
// Tier models (from .env or global config tiers)
|
|
71
71
|
tier1: env.TIER1 || gc.tiers?.tier1 || "",
|
|
@@ -111,6 +111,15 @@ export function loadSettings(workspacePath) {
|
|
|
111
111
|
|
|
112
112
|
// Context management
|
|
113
113
|
kcContextLimit: parseInt(env.KC_CONTEXT_LIMIT || "200000", 10),
|
|
114
|
+
toolOutputOffloadTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_TOKENS || gc.tool_output_offload_tokens?.toString() || "2000", 10),
|
|
115
|
+
toolOutputOffloadErrorTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_ERROR_TOKENS || gc.tool_output_offload_error_tokens?.toString() || "500", 10),
|
|
116
|
+
maxMessageTokens: parseInt(env.MAX_MESSAGE_TOKENS || gc.max_message_tokens?.toString() || "60000", 10),
|
|
117
|
+
|
|
118
|
+
// File system (Block 11)
|
|
119
|
+
gitAutoCommit: (env.GIT_AUTO_COMMIT ?? gc.git_auto_commit ?? true) !== false &&
|
|
120
|
+
(env.GIT_AUTO_COMMIT !== "false") &&
|
|
121
|
+
(gc.git_auto_commit !== false),
|
|
122
|
+
largeRefThresholdMB: parseInt(env.LARGE_REF_THRESHOLD_MB || gc.large_ref_threshold_mb?.toString() || "10", 10),
|
|
114
123
|
|
|
115
124
|
// Language
|
|
116
125
|
language: env.LANGUAGE || gc.language || "en",
|
package/src/model-tiers.json
CHANGED
|
@@ -2,17 +2,17 @@
|
|
|
2
2
|
"_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
|
|
3
3
|
|
|
4
4
|
"siliconflow": {
|
|
5
|
-
"conductor": "Pro/zai-org/GLM-5",
|
|
5
|
+
"conductor": "Pro/zai-org/GLM-5.1",
|
|
6
6
|
"llm": {
|
|
7
|
-
"tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
|
|
7
|
+
"tier1": "Pro/zai-org/GLM-5.1, Pro/moonshotai/Kimi-K2.5",
|
|
8
8
|
"tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
|
|
9
9
|
"tier3": "Qwen/Qwen3.5-122B-A10B",
|
|
10
10
|
"tier4": "Qwen/Qwen3.5-35B-A3B"
|
|
11
11
|
},
|
|
12
12
|
"vlm": {
|
|
13
|
-
"tier1": "
|
|
14
|
-
"tier2": "Qwen/
|
|
15
|
-
"tier3": "Qwen/
|
|
13
|
+
"tier1": "Qwen/Qwen3-VL-235B-A22B-Instruct",
|
|
14
|
+
"tier2": "Qwen/Qwen3-VL-30B-A3B-Instruct",
|
|
15
|
+
"tier3": "Qwen/Qwen3-VL-8B-Instruct"
|
|
16
16
|
}
|
|
17
17
|
},
|
|
18
18
|
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# {LABEL} — KC Verification Release
|
|
2
|
+
|
|
3
|
+
Generated: {CREATED_AT}
|
|
4
|
+
Snapshot tag: `{SNAPSHOT_TAG}`
|
|
5
|
+
Commit: `{SNAPSHOT_COMMIT}`
|
|
6
|
+
Built by: kc-beta {KC_VERSION}
|
|
7
|
+
|
|
8
|
+
{NOTES_BLOCK}
|
|
9
|
+
|
|
10
|
+
This bundle is self-contained. It runs without `kc-beta` installed — only Python 3 and a worker LLM API key are required.
|
|
11
|
+
|
|
12
|
+
## What's in here
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
manifest.json — release metadata (rules, models, snapshot tag)
|
|
16
|
+
README.md — this file
|
|
17
|
+
run.py — standalone driver, runs all rules
|
|
18
|
+
render_dashboard.py — re-render an HTML dashboard from a result JSON
|
|
19
|
+
serve.sh — optional helper, serves this dir over local HTTP
|
|
20
|
+
kc_runtime/ — bundled Python helpers (confidence scoring, dashboard)
|
|
21
|
+
workflows/ — pinned per-rule Python workflows + prompts
|
|
22
|
+
fixtures/ — sample inputs (if KC selected any)
|
|
23
|
+
glossary.json — project entity vocabulary at release time
|
|
24
|
+
catalog.json — rule catalog at release time
|
|
25
|
+
corner_cases.json — known corner cases (used by confidence scoring)
|
|
26
|
+
confidence_calibration.json — per-rule historical accuracy
|
|
27
|
+
models.json — worker LLM tier→model assignments
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Run a verification
|
|
31
|
+
|
|
32
|
+
```sh
|
|
33
|
+
export LLM_API_KEY="sk-..."
|
|
34
|
+
export LLM_BASE_URL="https://api.siliconflow.cn/v1" # or your provider
|
|
35
|
+
export TIER1="..." # comma-separated model list
|
|
36
|
+
export TIER2="..."
|
|
37
|
+
|
|
38
|
+
python run.py /path/to/document.pdf > result.json
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Each rule's workflow runs against the document; results are aggregated into a single JSON.
|
|
42
|
+
|
|
43
|
+
### Useful flags
|
|
44
|
+
|
|
45
|
+
```sh
|
|
46
|
+
python run.py doc.pdf --rule R001 # run only one rule
|
|
47
|
+
python run.py doc.pdf --output result.json # write to a file
|
|
48
|
+
python run.py doc.pdf --dashboard # also emit an HTML dashboard
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Re-render a dashboard
|
|
52
|
+
|
|
53
|
+
```sh
|
|
54
|
+
python render_dashboard.py result.json
|
|
55
|
+
# → result.html alongside the JSON
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Browse dashboards in a browser
|
|
59
|
+
|
|
60
|
+
```sh
|
|
61
|
+
./serve.sh
|
|
62
|
+
# → http://localhost:8080/result.html
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Rules in this release
|
|
66
|
+
|
|
67
|
+
{RULES_LIST}
|
|
68
|
+
|
|
69
|
+
## Reproducibility
|
|
70
|
+
|
|
71
|
+
The release bundle is regenerable from the snapshot tag:
|
|
72
|
+
|
|
73
|
+
```sh
|
|
74
|
+
git checkout {SNAPSHOT_TAG}
|
|
75
|
+
# then run kc-beta and ask it to release({label: "{LABEL}"}) again
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
The `manifest.json` records the exact commit (`{SNAPSHOT_COMMIT}`) so you can verify what's running.
|
|
79
|
+
|
|
80
|
+
## Caveats
|
|
81
|
+
|
|
82
|
+
- Workflows call worker LLMs. Costs depend on your provider; the bundle does not enforce a budget.
|
|
83
|
+
- Workflow output for each rule is preserved in `result.raw[*]` for audit. If you need full audit history with KC's event log + corner-case registry, work from the source workspace, not this bundle.
|
|
84
|
+
- Bundle does not sandbox `python`. Treat it like any executable you trust.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Confidence scorer — Python port of src/agent/confidence-scorer.js.
|
|
3
|
+
|
|
4
|
+
Composite formula: confidence = method_prior * source_presence
|
|
5
|
+
* historical_accuracy * (1 - corner_proximity)
|
|
6
|
+
|
|
7
|
+
Identical to the JS scorer used inside KC, so release runs produce the same
|
|
8
|
+
confidence values KC produces in-workspace.
|
|
9
|
+
|
|
10
|
+
Note on rounding: JS Math.round() is half-up, Python's round() is half-to-even
|
|
11
|
+
(banker's rounding). We use a half-up implementation here to match JS exactly.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import math
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _round3_halfup(x):
|
|
18
|
+
"""Round x to 3 decimals, half-up (matches JS Math.round)."""
|
|
19
|
+
return math.floor(x * 1000 + 0.5) / 1000
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
DEFAULT_PRIORS = {
|
|
23
|
+
"regex": 0.95,
|
|
24
|
+
"python": 0.90,
|
|
25
|
+
"llm": 0.75,
|
|
26
|
+
"ocr": 0.65,
|
|
27
|
+
"fallback": 0.50,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def score(rule_id, extracted_value, source_text="", method="llm",
|
|
32
|
+
document="", priors=None, historical=None, corner_cases=None):
|
|
33
|
+
"""
|
|
34
|
+
Compute composite confidence score (0.0 - 1.0).
|
|
35
|
+
|
|
36
|
+
rule_id: rule identifier
|
|
37
|
+
extracted_value: the value the workflow extracted (string)
|
|
38
|
+
source_text: optional surrounding text from the document
|
|
39
|
+
method: "regex" | "python" | "llm" | "ocr" | "fallback"
|
|
40
|
+
document: document name / path (used for corner-case proximity)
|
|
41
|
+
priors: dict overriding DEFAULT_PRIORS
|
|
42
|
+
historical: dict of {rule_id: accuracy} from confidence_calibration.json
|
|
43
|
+
corner_cases: list/dict from corner_cases.json registry
|
|
44
|
+
"""
|
|
45
|
+
p = priors or DEFAULT_PRIORS
|
|
46
|
+
method_prior = p.get(method, p.get("fallback", 0.50))
|
|
47
|
+
|
|
48
|
+
source_presence = 1.0
|
|
49
|
+
if source_text and extracted_value:
|
|
50
|
+
source_presence = 1.0 if str(extracted_value) in source_text else 0.7
|
|
51
|
+
|
|
52
|
+
hist = (historical or {}).get(rule_id, 0.8)
|
|
53
|
+
|
|
54
|
+
corner_proximity = _corner_proximity(corner_cases, document, rule_id)
|
|
55
|
+
|
|
56
|
+
confidence = method_prior * source_presence * hist * (1.0 - corner_proximity)
|
|
57
|
+
confidence = max(0.0, min(1.0, confidence))
|
|
58
|
+
return _round3_halfup(confidence)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def band(confidence):
|
|
62
|
+
"""Classify confidence into low/medium/high band — matches JS getBand()."""
|
|
63
|
+
if confidence >= 0.8:
|
|
64
|
+
return "high"
|
|
65
|
+
if confidence >= 0.5:
|
|
66
|
+
return "medium"
|
|
67
|
+
return "low"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _corner_proximity(corner_cases, document, rule_id):
|
|
71
|
+
"""Mirror CornerCaseRegistry.match: count entries matching this doc + rule.
|
|
72
|
+
Each match adds 0.1 (capped at 0.3). Schema is intentionally loose — KC's
|
|
73
|
+
JS registry stores entries with optional `document_pattern` and `rule_id`
|
|
74
|
+
fields; we replicate the same matching semantics here.
|
|
75
|
+
"""
|
|
76
|
+
if not corner_cases or not document:
|
|
77
|
+
return 0.0
|
|
78
|
+
entries = corner_cases if isinstance(corner_cases, list) else corner_cases.get("entries", [])
|
|
79
|
+
if not entries:
|
|
80
|
+
return 0.0
|
|
81
|
+
|
|
82
|
+
matches = 0
|
|
83
|
+
for e in entries:
|
|
84
|
+
if not isinstance(e, dict):
|
|
85
|
+
continue
|
|
86
|
+
if e.get("rule_id") and e.get("rule_id") != rule_id:
|
|
87
|
+
continue
|
|
88
|
+
pattern = e.get("document_pattern") or e.get("document") or ""
|
|
89
|
+
if pattern and pattern not in document:
|
|
90
|
+
continue
|
|
91
|
+
matches += 1
|
|
92
|
+
|
|
93
|
+
return min(0.3, 0.1 * matches)
|