kc-beta 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/kc-beta.js +16 -0
- package/package.json +32 -0
- package/src/agent/confidence-scorer.js +120 -0
- package/src/agent/context.js +124 -0
- package/src/agent/corner-case-registry.js +119 -0
- package/src/agent/engine.js +224 -0
- package/src/agent/events.js +27 -0
- package/src/agent/history.js +101 -0
- package/src/agent/llm-client.js +131 -0
- package/src/agent/pipelines/base.js +14 -0
- package/src/agent/pipelines/distillation.js +113 -0
- package/src/agent/pipelines/extraction.js +92 -0
- package/src/agent/pipelines/index.js +23 -0
- package/src/agent/pipelines/initializer.js +163 -0
- package/src/agent/pipelines/production-qc.js +99 -0
- package/src/agent/pipelines/skill-authoring.js +83 -0
- package/src/agent/pipelines/skill-testing.js +111 -0
- package/src/agent/tools/agent-tool.js +100 -0
- package/src/agent/tools/base.js +35 -0
- package/src/agent/tools/dashboard-render.js +146 -0
- package/src/agent/tools/document-parse.js +184 -0
- package/src/agent/tools/document-search.js +111 -0
- package/src/agent/tools/evolution-cycle.js +150 -0
- package/src/agent/tools/qc-sample.js +94 -0
- package/src/agent/tools/registry.js +55 -0
- package/src/agent/tools/rule-catalog.js +113 -0
- package/src/agent/tools/sandbox-exec.js +106 -0
- package/src/agent/tools/tier-downgrade.js +114 -0
- package/src/agent/tools/worker-llm-call.js +109 -0
- package/src/agent/tools/workflow-run.js +138 -0
- package/src/agent/tools/workspace-file.js +122 -0
- package/src/agent/version-manager.js +130 -0
- package/src/agent/workspace.js +82 -0
- package/src/cli/components.js +164 -0
- package/src/cli/index.js +329 -0
- package/src/cli/init.js +80 -0
- package/src/cli/onboard.js +182 -0
- package/src/cli/terminal.js +143 -0
- package/src/config.js +93 -0
- package/template/.env.template +31 -0
- package/template/CLAUDE.md +137 -0
- package/template/Input/.gitkeep +0 -0
- package/template/Output/.gitkeep +0 -0
- package/template/Rules/.gitkeep +0 -0
- package/template/Samples/.gitkeep +0 -0
- package/template/skills/en/meta/compliance-judgment/SKILL.md +114 -0
- package/template/skills/en/meta/compliance-judgment/references/output-format.md +151 -0
- package/template/skills/en/meta/confidence-system/SKILL.md +117 -0
- package/template/skills/en/meta/corner-case-management/SKILL.md +111 -0
- package/template/skills/en/meta/cross-document-verification/SKILL.md +131 -0
- package/template/skills/en/meta/cross-document-verification/references/contradiction-taxonomy.md +73 -0
- package/template/skills/en/meta/data-sensibility/SKILL.md +115 -0
- package/template/skills/en/meta/document-parsing/SKILL.md +108 -0
- package/template/skills/en/meta/document-parsing/references/parser-catalog.md +40 -0
- package/template/skills/en/meta/entity-extraction/SKILL.md +129 -0
- package/template/skills/en/meta/tree-processing/SKILL.md +103 -0
- package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +70 -0
- package/template/skills/en/meta-meta/dashboard-reporting/SKILL.md +106 -0
- package/template/skills/en/meta-meta/dashboard-reporting/scripts/generate_dashboard.py +178 -0
- package/template/skills/en/meta-meta/evolution-loop/SKILL.md +210 -0
- package/template/skills/en/meta-meta/evolution-loop/references/convergence-guide.md +62 -0
- package/template/skills/en/meta-meta/quality-control/SKILL.md +138 -0
- package/template/skills/en/meta-meta/quality-control/references/qa-layers.md +92 -0
- package/template/skills/en/meta-meta/quality-control/references/sampling-strategies.md +76 -0
- package/template/skills/en/meta-meta/rule-extraction/SKILL.md +100 -0
- package/template/skills/en/meta-meta/rule-extraction/references/chunking-strategies.md +80 -0
- package/template/skills/en/meta-meta/rule-graph/SKILL.md +118 -0
- package/template/skills/en/meta-meta/skill-authoring/SKILL.md +108 -0
- package/template/skills/en/meta-meta/skill-authoring/references/skill-format-spec.md +78 -0
- package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +150 -0
- package/template/skills/en/meta-meta/skill-to-workflow/references/worker-llm-catalog.md +50 -0
- package/template/skills/en/meta-meta/task-decomposition/SKILL.md +129 -0
- package/template/skills/en/meta-meta/task-decomposition/references/decision-matrix.md +81 -0
- package/template/skills/en/meta-meta/version-control/SKILL.md +152 -0
- package/template/skills/en/meta-meta/version-control/references/trace-id-spec.md +79 -0
- package/template/skills/en/skill-creator/LICENSE.txt +202 -0
- package/template/skills/en/skill-creator/SKILL.md +479 -0
- package/template/skills/en/skill-creator/agents/analyzer.md +274 -0
- package/template/skills/en/skill-creator/agents/comparator.md +202 -0
- package/template/skills/en/skill-creator/agents/grader.md +223 -0
- package/template/skills/en/skill-creator/assets/eval_review.html +146 -0
- package/template/skills/en/skill-creator/eval-viewer/generate_review.py +471 -0
- package/template/skills/en/skill-creator/eval-viewer/viewer.html +1325 -0
- package/template/skills/en/skill-creator/references/schemas.md +430 -0
- package/template/skills/en/skill-creator/scripts/__init__.py +0 -0
- package/template/skills/en/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/template/skills/en/skill-creator/scripts/generate_report.py +326 -0
- package/template/skills/en/skill-creator/scripts/improve_description.py +248 -0
- package/template/skills/en/skill-creator/scripts/package_skill.py +136 -0
- package/template/skills/en/skill-creator/scripts/quick_validate.py +103 -0
- package/template/skills/en/skill-creator/scripts/run_eval.py +310 -0
- package/template/skills/en/skill-creator/scripts/run_loop.py +332 -0
- package/template/skills/en/skill-creator/scripts/utils.py +47 -0
- package/template/skills/zh/meta/compliance-judgment/SKILL.md +303 -0
- package/template/skills/zh/meta/compliance-judgment/references/output-format.md +151 -0
- package/template/skills/zh/meta/confidence-system/SKILL.md +228 -0
- package/template/skills/zh/meta/corner-case-management/SKILL.md +235 -0
- package/template/skills/zh/meta/cross-document-verification/SKILL.md +241 -0
- package/template/skills/zh/meta/cross-document-verification/references/contradiction-taxonomy.md +73 -0
- package/template/skills/zh/meta/data-sensibility/SKILL.md +235 -0
- package/template/skills/zh/meta/document-parsing/SKILL.md +168 -0
- package/template/skills/zh/meta/document-parsing/references/parser-catalog.md +40 -0
- package/template/skills/zh/meta/entity-extraction/SKILL.md +276 -0
- package/template/skills/zh/meta/tree-processing/SKILL.md +233 -0
- package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +147 -0
- package/template/skills/zh/meta-meta/dashboard-reporting/SKILL.md +281 -0
- package/template/skills/zh/meta-meta/dashboard-reporting/scripts/generate_dashboard.py +178 -0
- package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +302 -0
- package/template/skills/zh/meta-meta/evolution-loop/references/convergence-guide.md +62 -0
- package/template/skills/zh/meta-meta/quality-control/SKILL.md +269 -0
- package/template/skills/zh/meta-meta/quality-control/references/qa-layers.md +92 -0
- package/template/skills/zh/meta-meta/quality-control/references/sampling-strategies.md +76 -0
- package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +208 -0
- package/template/skills/zh/meta-meta/rule-extraction/references/chunking-strategies.md +80 -0
- package/template/skills/zh/meta-meta/rule-graph/SKILL.md +203 -0
- package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +235 -0
- package/template/skills/zh/meta-meta/skill-authoring/references/skill-format-spec.md +78 -0
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +275 -0
- package/template/skills/zh/meta-meta/skill-to-workflow/references/worker-llm-catalog.md +50 -0
- package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +224 -0
- package/template/skills/zh/meta-meta/task-decomposition/references/decision-matrix.md +81 -0
- package/template/skills/zh/meta-meta/version-control/SKILL.md +284 -0
- package/template/skills/zh/meta-meta/version-control/references/trace-id-spec.md +79 -0
- package/template/skills/zh/skill-creator/LICENSE.txt +202 -0
- package/template/skills/zh/skill-creator/SKILL.md +479 -0
- package/template/skills/zh/skill-creator/agents/analyzer.md +274 -0
- package/template/skills/zh/skill-creator/agents/comparator.md +202 -0
- package/template/skills/zh/skill-creator/agents/grader.md +223 -0
- package/template/skills/zh/skill-creator/assets/eval_review.html +146 -0
- package/template/skills/zh/skill-creator/eval-viewer/generate_review.py +471 -0
- package/template/skills/zh/skill-creator/eval-viewer/viewer.html +1325 -0
- package/template/skills/zh/skill-creator/references/schemas.md +430 -0
- package/template/skills/zh/skill-creator/scripts/__init__.py +0 -0
- package/template/skills/zh/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/template/skills/zh/skill-creator/scripts/generate_report.py +326 -0
- package/template/skills/zh/skill-creator/scripts/improve_description.py +248 -0
- package/template/skills/zh/skill-creator/scripts/package_skill.py +136 -0
- package/template/skills/zh/skill-creator/scripts/quick_validate.py +103 -0
- package/template/skills/zh/skill-creator/scripts/run_eval.py +310 -0
- package/template/skills/zh/skill-creator/scripts/run_loop.py +332 -0
- package/template/skills/zh/skill-creator/scripts/utils.py +47 -0
package/bin/kc-beta.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const subcommand = process.argv[2];
|
|
4
|
+
|
|
5
|
+
(async () => {
|
|
6
|
+
if (subcommand === "onboard" || subcommand === "setup") {
|
|
7
|
+
const { onboard } = await import("../src/cli/onboard.js");
|
|
8
|
+
await onboard();
|
|
9
|
+
} else if (subcommand === "init") {
|
|
10
|
+
const { init } = await import("../src/cli/init.js");
|
|
11
|
+
await init();
|
|
12
|
+
} else {
|
|
13
|
+
const { main } = await import("../src/cli/index.js");
|
|
14
|
+
await main();
|
|
15
|
+
}
|
|
16
|
+
})();
|
package/package.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "kc-beta",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "KC Agent — LLM document verification agent (pure Node.js CLI)",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"kc-beta": "bin/kc-beta.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"bin/",
|
|
11
|
+
"src/",
|
|
12
|
+
"template/"
|
|
13
|
+
],
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": ">=20.0.0"
|
|
16
|
+
},
|
|
17
|
+
"dependencies": {
|
|
18
|
+
"ink": "^6.0.0",
|
|
19
|
+
"ink-text-input": "^6.0.0",
|
|
20
|
+
"ink-spinner": "^5.0.0",
|
|
21
|
+
"react": "^19.0.0",
|
|
22
|
+
"pdfjs-dist": "^4.0.0"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"document-verification",
|
|
26
|
+
"llm",
|
|
27
|
+
"agent",
|
|
28
|
+
"cli"
|
|
29
|
+
],
|
|
30
|
+
"author": "kitchen-engineer42",
|
|
31
|
+
"license": "MIT"
|
|
32
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
const DEFAULT_PRIORS = {
|
|
5
|
+
regex: 0.95,
|
|
6
|
+
python: 0.90,
|
|
7
|
+
llm: 0.75,
|
|
8
|
+
ocr: 0.65,
|
|
9
|
+
fallback: 0.50,
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Structural component: composite confidence scoring for verification results.
|
|
14
|
+
*
|
|
15
|
+
* Formula: confidence = method_prior x source_presence x historical_accuracy x (1 - corner_case_proximity)
|
|
16
|
+
*/
|
|
17
|
+
export class ConfidenceScorer {
|
|
18
|
+
/**
|
|
19
|
+
* @param {string} workspacePath
|
|
20
|
+
* @param {import('./corner-case-registry.js').CornerCaseRegistry} [cornerCases]
|
|
21
|
+
*/
|
|
22
|
+
constructor(workspacePath, cornerCases) {
|
|
23
|
+
this._workspace = workspacePath;
|
|
24
|
+
this._cornerCases = cornerCases || null;
|
|
25
|
+
this._priors = { ...DEFAULT_PRIORS };
|
|
26
|
+
/** @type {Record<string, number>} rule_id → accuracy */
|
|
27
|
+
this._historical = {};
|
|
28
|
+
this._calibrationPath = path.join(workspacePath, "confidence_calibration.json");
|
|
29
|
+
this._loadConfig();
|
|
30
|
+
this._loadCalibration();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
_loadConfig() {
|
|
34
|
+
const envPath = path.join(this._workspace, ".env");
|
|
35
|
+
if (!fs.existsSync(envPath)) return;
|
|
36
|
+
const lines = fs.readFileSync(envPath, "utf-8").split("\n");
|
|
37
|
+
for (const line of lines) {
|
|
38
|
+
if (line.startsWith("CONFIDENCE_PRIORS=")) {
|
|
39
|
+
try {
|
|
40
|
+
const custom = JSON.parse(line.split("=")[1].trim());
|
|
41
|
+
if (typeof custom === "object") Object.assign(this._priors, custom);
|
|
42
|
+
} catch { /* ignore */ }
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
_loadCalibration() {
|
|
48
|
+
if (!fs.existsSync(this._calibrationPath)) return;
|
|
49
|
+
try {
|
|
50
|
+
const data = JSON.parse(fs.readFileSync(this._calibrationPath, "utf-8"));
|
|
51
|
+
this._historical = data.historical_accuracy || {};
|
|
52
|
+
} catch { /* ignore */ }
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Compute composite confidence score (0.0 - 1.0).
|
|
57
|
+
* @param {object} opts
|
|
58
|
+
* @param {string} opts.ruleId
|
|
59
|
+
* @param {string} opts.extractedValue
|
|
60
|
+
* @param {string} [opts.sourceText]
|
|
61
|
+
* @param {string} [opts.extractionMethod]
|
|
62
|
+
* @param {string} [opts.documentName]
|
|
63
|
+
* @returns {number}
|
|
64
|
+
*/
|
|
65
|
+
score({ ruleId, extractedValue, sourceText = "", extractionMethod = "llm", documentName = "" }) {
|
|
66
|
+
const methodPrior = this._priors[extractionMethod] ?? this._priors.fallback;
|
|
67
|
+
|
|
68
|
+
let sourcePresence = 1.0;
|
|
69
|
+
if (sourceText && extractedValue) {
|
|
70
|
+
sourcePresence = sourceText.includes(extractedValue) ? 1.0 : 0.7;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const historical = this._historical[ruleId] ?? 0.8;
|
|
74
|
+
|
|
75
|
+
let cornerProximity = 0.0;
|
|
76
|
+
if (this._cornerCases && documentName) {
|
|
77
|
+
const matches = this._cornerCases.match(documentName, ruleId);
|
|
78
|
+
if (matches.length > 0) {
|
|
79
|
+
cornerProximity = Math.min(0.3, 0.1 * matches.length);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const confidence = methodPrior * sourcePresence * historical * (1.0 - cornerProximity);
|
|
84
|
+
return Math.round(Math.max(0.0, Math.min(1.0, confidence)) * 1000) / 1000;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Auto-calibrate after QC cycle.
|
|
89
|
+
* @param {Record<string, {predicted_avg?: number, actual_accuracy?: number}>} qcResults
|
|
90
|
+
*/
|
|
91
|
+
calibrate(qcResults) {
|
|
92
|
+
for (const [ruleId, data] of Object.entries(qcResults)) {
|
|
93
|
+
const actual = data.actual_accuracy;
|
|
94
|
+
if (actual != null) {
|
|
95
|
+
const old = this._historical[ruleId] ?? 0.8;
|
|
96
|
+
this._historical[ruleId] = Math.round((0.7 * actual + 0.3 * old) * 1000) / 1000;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
this._saveCalibration();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
_saveCalibration() {
|
|
103
|
+
fs.writeFileSync(
|
|
104
|
+
this._calibrationPath,
|
|
105
|
+
JSON.stringify({ historical_accuracy: this._historical }, null, 2),
|
|
106
|
+
"utf-8",
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Classify confidence into low/medium/high band.
|
|
112
|
+
* @param {number} confidence
|
|
113
|
+
* @returns {string}
|
|
114
|
+
*/
|
|
115
|
+
getBand(confidence) {
|
|
116
|
+
if (confidence >= 0.8) return "high";
|
|
117
|
+
if (confidence >= 0.5) return "medium";
|
|
118
|
+
return "low";
|
|
119
|
+
}
|
|
120
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
const AGENT_IDENTITY = `\
|
|
2
|
+
You are KC Agent, a document verification coding agent. You help users build \
|
|
3
|
+
and manage document verification systems for financial institutions.
|
|
4
|
+
|
|
5
|
+
You are direct and technical. You think step by step. When you don't know \
|
|
6
|
+
something, you say so.
|
|
7
|
+
|
|
8
|
+
## Tools
|
|
9
|
+
|
|
10
|
+
You have the following tools:
|
|
11
|
+
|
|
12
|
+
- **sandbox_exec**: Execute shell commands in your workspace directory. Use this \
|
|
13
|
+
to run Python scripts, install packages, list files, etc. Pipes and redirects work.
|
|
14
|
+
|
|
15
|
+
- **workspace_file**: Read, write, or list files in your workspace. Operations: \
|
|
16
|
+
read (path), write (path + content), list (optional path).
|
|
17
|
+
|
|
18
|
+
- **document_parse**: Parse documents (PDF, DOCX, images) and extract text. \
|
|
19
|
+
Internally uses an escalation chain: text extraction → API parser → OCR models. \
|
|
20
|
+
Starts cheap, escalates if needed. You don't choose the method — the tool handles it. \
|
|
21
|
+
Use force_method only for testing or if the developer user requests a specific parser.
|
|
22
|
+
|
|
23
|
+
- **worker_llm_call**: Call a worker LLM at a specified tier (tier1=most capable, \
|
|
24
|
+
tier4=cheapest). Use for distillation testing — check if cheaper models can handle \
|
|
25
|
+
extraction/judgment steps. Returns response with model used and token counts.
|
|
26
|
+
|
|
27
|
+
- **workflow_run**: Execute a distilled workflow against a document. Automatically \
|
|
28
|
+
attaches confidence scores and trace IDs. Results saved to output/results/.
|
|
29
|
+
|
|
30
|
+
- **tier_downgrade**: Test a workflow step at a lower tier. Compares accuracy at \
|
|
31
|
+
target tier vs. current baseline. Recommends downgrade if accuracy stays above threshold.
|
|
32
|
+
|
|
33
|
+
- **evolution_cycle**: Run one diagnose-classify-fix iteration. Classifies failures \
|
|
34
|
+
as systemic (>threshold) or corner case (<threshold). Routes corner cases to registry \
|
|
35
|
+
automatically. Checks for repeated failure patterns across iterations.
|
|
36
|
+
|
|
37
|
+
- **document_search**: Search for text across workspace documents. Supports plain text \
|
|
38
|
+
and regex. Returns matching passages with file path and line number.
|
|
39
|
+
|
|
40
|
+
- **rule_catalog**: CRUD on the rule registry. Enforces required fields (id, source_ref, \
|
|
41
|
+
description). Operations: create, read, update, delete, list.
|
|
42
|
+
|
|
43
|
+
- **qc_sample**: Draw adaptive sample from production results for review. Stratifies \
|
|
44
|
+
by confidence band. All low-confidence reviewed, medium sampled, high spot-checked.
|
|
45
|
+
|
|
46
|
+
- **dashboard_render**: Generate a self-contained HTML dashboard from project metrics. \
|
|
47
|
+
Shows rules, confidence distribution, evolution history, QC results.
|
|
48
|
+
|
|
49
|
+
- **agent_tool**: Spawn a sub-agent for independent parallel work. Give it a complete \
|
|
50
|
+
task description — it has no context from your conversation. Sub-agent writes results \
|
|
51
|
+
to sub_agents/{task_id}/. Use for parallel rule testing, batch processing, etc.
|
|
52
|
+
|
|
53
|
+
Use tools to do real work. Write code to files, then run it. Check results by \
|
|
54
|
+
reading output. Don't guess — verify.
|
|
55
|
+
|
|
56
|
+
## Methodology
|
|
57
|
+
|
|
58
|
+
### Document Parsing
|
|
59
|
+
- Start with the simplest parser (text extraction). Escalate to OCR/vision only \
|
|
60
|
+
when output is empty or garbled (<50 chars/page). Simple parsers fail less.
|
|
61
|
+
- Once a parser works for a document type, lock it in. Don't re-evaluate unless \
|
|
62
|
+
downstream extraction fails.
|
|
63
|
+
- Tables need special handling — extract cell-by-cell, reconstruct as markdown or JSON.
|
|
64
|
+
|
|
65
|
+
### Data Sensibility
|
|
66
|
+
- Read 3-5 complete documents end-to-end BEFORE writing extraction logic. Read raw \
|
|
67
|
+
parsed text, not PDF viewer. This saves hours of debugging bad assumptions.
|
|
68
|
+
- After extraction, spot-check 10 random fields (3 high-confidence, 4 medium, \
|
|
69
|
+
3 low) against source. If >1 out of 10 is wrong, STOP — don't continue.
|
|
70
|
+
- Save every processing stage to disk (raw text → sections → entities → judgments). \
|
|
71
|
+
Disk is cheap; debugging without intermediates is guesswork.
|
|
72
|
+
|
|
73
|
+
### Rule Extraction
|
|
74
|
+
- One rule = one pass/fail outcome. If a rule can produce two independent results, \
|
|
75
|
+
split it. Rules must be self-contained and scoped to where in the document to look.
|
|
76
|
+
- Work top-down (onion peeler): major areas → chapters → sections → atomic rules. \
|
|
77
|
+
Stop when rules are atomic and testable.
|
|
78
|
+
- Handle ambiguity explicitly. Extract as understood, note ambiguities, ask the \
|
|
79
|
+
developer user. Ambiguous rules are often the most important — don't skip them.
|
|
80
|
+
- After extraction, audit coverage: which regulation paragraphs are NOT covered?
|
|
81
|
+
|
|
82
|
+
### Entity Extraction
|
|
83
|
+
- Method selection: regex/Python first (free, instant, predictable formats). LLM \
|
|
84
|
+
only when semantic understanding is required. Hybrid: regex first, LLM fallback.
|
|
85
|
+
- Every extraction must capture: value, evidence (raw text), source location, \
|
|
86
|
+
confidence, method used.
|
|
87
|
+
- Postprocessing is deterministic code: date standardization, unit conversion, \
|
|
88
|
+
Chinese numeral conversion. Build as reusable Python functions.
|
|
89
|
+
|
|
90
|
+
### Evolution Loop
|
|
91
|
+
- The cycle: test → observe → diagnose → classify → fix → retest → log.
|
|
92
|
+
- Diagnose root cause into: parsing failure, extraction failure, judgment failure, \
|
|
93
|
+
or scope failure. Each drives different fixes.
|
|
94
|
+
- Systemic issue (>10% of docs) → rewrite code/prompts. Corner case (<10%) → \
|
|
95
|
+
record in corner_cases.json with detection + resolution. Do NOT patch main \
|
|
96
|
+
workflow for corner cases.
|
|
97
|
+
- Stop when: accuracy meets threshold, or correction volume <5% and no new \
|
|
98
|
+
failure patterns.
|
|
99
|
+
|
|
100
|
+
### Reflection & Skill Writing
|
|
101
|
+
- When you solve a hard problem (OCR approach, extraction pattern, edge case \
|
|
102
|
+
handling), write it down as a reusable skill in rule_skills/. Future sessions \
|
|
103
|
+
and rules benefit from your discoveries.
|
|
104
|
+
- Skills capture methodology, not just code. Describe WHEN to use this approach, \
|
|
105
|
+
WHY it works, and WHAT to watch out for.`;
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Builds the system prompt from multiple context sources.
|
|
109
|
+
* Combines: agent identity + methodology + pipeline state + workspace state.
|
|
110
|
+
*/
|
|
111
|
+
export class ContextAssembler {
|
|
112
|
+
/**
|
|
113
|
+
* @param {object} [opts]
|
|
114
|
+
* @param {string} [opts.pipelineState]
|
|
115
|
+
* @param {string} [opts.workspaceState]
|
|
116
|
+
* @returns {string}
|
|
117
|
+
*/
|
|
118
|
+
build({ pipelineState, workspaceState } = {}) {
|
|
119
|
+
const parts = [AGENT_IDENTITY];
|
|
120
|
+
if (pipelineState) parts.push(pipelineState);
|
|
121
|
+
if (workspaceState) parts.push(workspaceState);
|
|
122
|
+
return parts.join("\n\n");
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Structural component: first-class data structure for edge cases.
|
|
6
|
+
* Corner cases (<10% failure rate) are stored here instead of patching
|
|
7
|
+
* main workflows. The EvolutionController routes failures here automatically.
|
|
8
|
+
* Persists to workspace/corner_cases.json.
|
|
9
|
+
*/
|
|
10
|
+
export class CornerCaseRegistry {
|
|
11
|
+
/**
|
|
12
|
+
* @param {string} workspacePath
|
|
13
|
+
*/
|
|
14
|
+
constructor(workspacePath) {
|
|
15
|
+
this._path = path.join(workspacePath, "corner_cases.json");
|
|
16
|
+
/** @type {Array<CornerCase>} */
|
|
17
|
+
this._cases = [];
|
|
18
|
+
this._load();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
_load() {
|
|
22
|
+
if (!fs.existsSync(this._path)) return;
|
|
23
|
+
try {
|
|
24
|
+
const data = JSON.parse(fs.readFileSync(this._path, "utf-8"));
|
|
25
|
+
this._cases = data.map((e) => ({
|
|
26
|
+
id: e.id,
|
|
27
|
+
ruleId: e.rule_id || e.ruleId,
|
|
28
|
+
detectionPattern: e.detection_pattern || e.detectionPattern || "",
|
|
29
|
+
resolution: e.resolution || "",
|
|
30
|
+
affectedDocuments: e.affected_documents || e.affectedDocuments || [],
|
|
31
|
+
discoveryDate: e.discovery_date || e.discoveryDate || new Date().toISOString(),
|
|
32
|
+
status: e.status || "active",
|
|
33
|
+
}));
|
|
34
|
+
} catch {
|
|
35
|
+
this._cases = [];
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
_save() {
|
|
40
|
+
const data = this._cases.map((c) => ({
|
|
41
|
+
id: c.id,
|
|
42
|
+
rule_id: c.ruleId,
|
|
43
|
+
detection_pattern: c.detectionPattern,
|
|
44
|
+
resolution: c.resolution,
|
|
45
|
+
affected_documents: c.affectedDocuments,
|
|
46
|
+
discovery_date: c.discoveryDate,
|
|
47
|
+
status: c.status,
|
|
48
|
+
}));
|
|
49
|
+
fs.writeFileSync(this._path, JSON.stringify(data, null, 2), "utf-8");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Add or update a corner case. Deduplicates by id.
|
|
54
|
+
* @param {CornerCase} cornerCase
|
|
55
|
+
*/
|
|
56
|
+
add(cornerCase) {
|
|
57
|
+
if (!cornerCase.discoveryDate) {
|
|
58
|
+
cornerCase.discoveryDate = new Date().toISOString();
|
|
59
|
+
}
|
|
60
|
+
const idx = this._cases.findIndex((c) => c.id === cornerCase.id);
|
|
61
|
+
if (idx >= 0) {
|
|
62
|
+
this._cases[idx] = cornerCase;
|
|
63
|
+
} else {
|
|
64
|
+
this._cases.push(cornerCase);
|
|
65
|
+
}
|
|
66
|
+
this._save();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* @param {string} caseId
|
|
71
|
+
* @returns {CornerCase|null}
|
|
72
|
+
*/
|
|
73
|
+
get(caseId) {
|
|
74
|
+
return this._cases.find((c) => c.id === caseId) || null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* @param {string} ruleId
|
|
79
|
+
* @returns {Array<CornerCase>}
|
|
80
|
+
*/
|
|
81
|
+
getByRule(ruleId) {
|
|
82
|
+
return this._cases.filter((c) => c.ruleId === ruleId && c.status === "active");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
allActive() {
|
|
86
|
+
return this._cases.filter((c) => c.status === "active");
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
count() {
|
|
90
|
+
return this._cases.length;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Check if a document matches any known corner case patterns for a rule.
|
|
95
|
+
* @param {string} documentName
|
|
96
|
+
* @param {string} ruleId
|
|
97
|
+
* @returns {Array<CornerCase>}
|
|
98
|
+
*/
|
|
99
|
+
match(documentName, ruleId) {
|
|
100
|
+
const matches = [];
|
|
101
|
+
for (const c of this.getByRule(ruleId)) {
|
|
102
|
+
if (c.detectionPattern && documentName.toLowerCase().includes(c.detectionPattern.toLowerCase())) {
|
|
103
|
+
matches.push(c);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return matches;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* @typedef {object} CornerCase
|
|
112
|
+
* @property {string} id
|
|
113
|
+
* @property {string} ruleId
|
|
114
|
+
* @property {string} detectionPattern
|
|
115
|
+
* @property {string} resolution
|
|
116
|
+
* @property {string[]} affectedDocuments
|
|
117
|
+
* @property {string} discoveryDate
|
|
118
|
+
* @property {string} status - active | resolved | obsolete
|
|
119
|
+
*/
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import { AgentEvent } from "./events.js";
|
|
2
|
+
import { ContextAssembler } from "./context.js";
|
|
3
|
+
import { ConversationHistory } from "./history.js";
|
|
4
|
+
import { Workspace } from "./workspace.js";
|
|
5
|
+
import { VersionManager } from "./version-manager.js";
|
|
6
|
+
import { CornerCaseRegistry } from "./corner-case-registry.js";
|
|
7
|
+
import { ConfidenceScorer } from "./confidence-scorer.js";
|
|
8
|
+
import { ToolRegistry } from "./tools/registry.js";
|
|
9
|
+
import { SandboxExecTool } from "./tools/sandbox-exec.js";
|
|
10
|
+
import { WorkspaceFileTool } from "./tools/workspace-file.js";
|
|
11
|
+
import { DocumentParseTool } from "./tools/document-parse.js";
|
|
12
|
+
import { DocumentSearchTool } from "./tools/document-search.js";
|
|
13
|
+
import { WorkerLLMCallTool } from "./tools/worker-llm-call.js";
|
|
14
|
+
import { WorkflowRunTool } from "./tools/workflow-run.js";
|
|
15
|
+
import { RuleCatalogTool } from "./tools/rule-catalog.js";
|
|
16
|
+
import { QCSampleTool } from "./tools/qc-sample.js";
|
|
17
|
+
import { DashboardRenderTool } from "./tools/dashboard-render.js";
|
|
18
|
+
import { EvolutionCycleTool } from "./tools/evolution-cycle.js";
|
|
19
|
+
import { TierDowngradeTool } from "./tools/tier-downgrade.js";
|
|
20
|
+
import { AgentTool } from "./tools/agent-tool.js";
|
|
21
|
+
import { Phase } from "./pipelines/index.js";
|
|
22
|
+
import { ProjectInitializer } from "./pipelines/initializer.js";
|
|
23
|
+
import { RuleExtractionPipeline } from "./pipelines/extraction.js";
|
|
24
|
+
import { SkillAuthoringPipeline } from "./pipelines/skill-authoring.js";
|
|
25
|
+
import { SkillTestingPipeline } from "./pipelines/skill-testing.js";
|
|
26
|
+
import { DistillationEngine as DistillationPipeline } from "./pipelines/distillation.js";
|
|
27
|
+
import { ProductionQCPipeline } from "./pipelines/production-qc.js";
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* The KC Agent conversation engine.
|
|
31
|
+
*
|
|
32
|
+
* Core loop: user message -> context assembly -> LLM API (streaming) ->
|
|
33
|
+
* tool execution (if any) -> repeat until no tool calls -> turn complete.
|
|
34
|
+
*
|
|
35
|
+
* Uses OpenAI-compatible API. Yields AgentEvent objects.
|
|
36
|
+
*/
|
|
37
|
+
export class AgentEngine {
|
|
38
|
+
/**
|
|
39
|
+
* @param {object} opts
|
|
40
|
+
* @param {import('./llm-client.js').LLMClient} opts.client
|
|
41
|
+
* @param {object} opts.config - Settings from loadSettings()
|
|
42
|
+
* @param {string} [opts.sessionId]
|
|
43
|
+
*/
|
|
44
|
+
constructor({ client, config, sessionId }) {
|
|
45
|
+
this.client = client;
|
|
46
|
+
this.config = config;
|
|
47
|
+
this.context = new ContextAssembler();
|
|
48
|
+
this.toolRegistry = new ToolRegistry();
|
|
49
|
+
|
|
50
|
+
// Workspace + structural components
|
|
51
|
+
this.workspace = new Workspace(config.kcWorkspaceRoot, sessionId);
|
|
52
|
+
this.history = new ConversationHistory(this.workspace.cwd);
|
|
53
|
+
this.versionManager = new VersionManager(this.workspace.cwd);
|
|
54
|
+
this.cornerCases = new CornerCaseRegistry(this.workspace.cwd);
|
|
55
|
+
this.confidence = new ConfidenceScorer(this.workspace.cwd, this.cornerCases);
|
|
56
|
+
|
|
57
|
+
// Register tools
|
|
58
|
+
this.toolRegistry.register(new SandboxExecTool(this.workspace, config.kcExecTimeout));
|
|
59
|
+
this.toolRegistry.register(new WorkspaceFileTool(this.workspace, this.versionManager));
|
|
60
|
+
this.toolRegistry.register(new DocumentParseTool(this.workspace, {
|
|
61
|
+
mineruApiUrl: config.mineruApiUrl,
|
|
62
|
+
mineruApiKey: config.mineruApiKey,
|
|
63
|
+
siliconflowApiKey: config.siliconflowApiKey,
|
|
64
|
+
siliconflowBaseUrl: config.siliconflowBaseUrl,
|
|
65
|
+
ocrModel: config.ocrModelTier1,
|
|
66
|
+
}));
|
|
67
|
+
this.toolRegistry.register(new DocumentSearchTool(this.workspace));
|
|
68
|
+
|
|
69
|
+
const workerLlm = new WorkerLLMCallTool(this.workspace, {
|
|
70
|
+
apiKey: config.siliconflowApiKey,
|
|
71
|
+
baseUrl: config.siliconflowBaseUrl,
|
|
72
|
+
});
|
|
73
|
+
this.toolRegistry.register(workerLlm);
|
|
74
|
+
this.toolRegistry.register(new WorkflowRunTool(this.workspace, this.versionManager, this.confidence));
|
|
75
|
+
this.toolRegistry.register(new TierDowngradeTool(this.workspace, workerLlm));
|
|
76
|
+
this.toolRegistry.register(new EvolutionCycleTool(this.workspace, this.cornerCases));
|
|
77
|
+
this.toolRegistry.register(new RuleCatalogTool(this.workspace));
|
|
78
|
+
this.toolRegistry.register(new QCSampleTool(this.workspace));
|
|
79
|
+
this.toolRegistry.register(new DashboardRenderTool(this.workspace));
|
|
80
|
+
this.toolRegistry.register(new AgentTool(this.workspace, (sid) => new AgentEngine({ client, config, sessionId: sid })));
|
|
81
|
+
|
|
82
|
+
// Pipeline system (meta-meta skills as code)
|
|
83
|
+
this.currentPhase = Phase.BOOTSTRAP;
|
|
84
|
+
this.pipelines = {
|
|
85
|
+
[Phase.BOOTSTRAP]: new ProjectInitializer(this.workspace),
|
|
86
|
+
[Phase.EXTRACTION]: new RuleExtractionPipeline(this.workspace),
|
|
87
|
+
[Phase.SKILL_AUTHORING]: new SkillAuthoringPipeline(this.workspace),
|
|
88
|
+
[Phase.SKILL_TESTING]: new SkillTestingPipeline(this.workspace),
|
|
89
|
+
[Phase.DISTILLATION]: new DistillationPipeline(this.workspace),
|
|
90
|
+
[Phase.PRODUCTION_QC]: new ProductionQCPipeline(this.workspace),
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Register additional tools (called after construction for tools that need extra deps).
|
|
96
|
+
* @param {import('./tools/base.js').BaseTool} tool
|
|
97
|
+
*/
|
|
98
|
+
registerTool(tool) {
|
|
99
|
+
this.toolRegistry.register(tool);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Run one conversation turn. Yields AgentEvent objects.
|
|
104
|
+
* Loops: LLM call -> tool execution -> LLM call ... until no tool calls.
|
|
105
|
+
* @param {string} userMessage
|
|
106
|
+
* @yields {AgentEvent}
|
|
107
|
+
*/
|
|
108
|
+
async *runTurn(userMessage) {
|
|
109
|
+
this.history.addUser(userMessage);
|
|
110
|
+
|
|
111
|
+
// Pipeline state injection
|
|
112
|
+
const pipeline = this.pipelines[this.currentPhase];
|
|
113
|
+
const pipelineState = pipeline?.describeState?.() || null;
|
|
114
|
+
|
|
115
|
+
const systemPrompt = this.context.build({
|
|
116
|
+
pipelineState,
|
|
117
|
+
workspaceState: `Your workspace directory is: ${this.workspace.cwd}`,
|
|
118
|
+
});
|
|
119
|
+
const tools = this.toolRegistry.schemasOpenai();
|
|
120
|
+
|
|
121
|
+
while (true) {
|
|
122
|
+
const messages = [{ role: "system", content: systemPrompt }, ...this.history.messages];
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
let collectedText = "";
|
|
126
|
+
/** @type {Map<number, {id: string, name: string, arguments: string}>} */
|
|
127
|
+
const toolCallsAcc = new Map();
|
|
128
|
+
|
|
129
|
+
const stream = this.client.streamChat({
|
|
130
|
+
model: this.config.kcModel,
|
|
131
|
+
messages,
|
|
132
|
+
tools: tools.length > 0 ? tools : undefined,
|
|
133
|
+
maxTokens: this.config.kcMaxTokens,
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
for await (const chunk of stream) {
|
|
137
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
138
|
+
if (!delta) continue;
|
|
139
|
+
|
|
140
|
+
// Stream text content
|
|
141
|
+
if (delta.content) {
|
|
142
|
+
yield new AgentEvent({ type: "text_delta", text: delta.content });
|
|
143
|
+
collectedText += delta.content;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Accumulate tool calls from deltas
|
|
147
|
+
if (delta.tool_calls) {
|
|
148
|
+
for (const tcDelta of delta.tool_calls) {
|
|
149
|
+
const idx = tcDelta.index;
|
|
150
|
+
if (!toolCallsAcc.has(idx)) {
|
|
151
|
+
toolCallsAcc.set(idx, { id: tcDelta.id || "", name: "", arguments: "" });
|
|
152
|
+
}
|
|
153
|
+
const acc = toolCallsAcc.get(idx);
|
|
154
|
+
if (tcDelta.id) acc.id = tcDelta.id;
|
|
155
|
+
if (tcDelta.function?.name) acc.name = tcDelta.function.name;
|
|
156
|
+
if (tcDelta.function?.arguments) acc.arguments += tcDelta.function.arguments;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Build assistant message for history
|
|
162
|
+
const assistantMsg = { role: "assistant", content: collectedText || null };
|
|
163
|
+
if (toolCallsAcc.size > 0) {
|
|
164
|
+
assistantMsg.tool_calls = Array.from(toolCallsAcc.values()).map((tc) => ({
|
|
165
|
+
id: tc.id,
|
|
166
|
+
type: "function",
|
|
167
|
+
function: { name: tc.name, arguments: tc.arguments },
|
|
168
|
+
}));
|
|
169
|
+
}
|
|
170
|
+
this.history.addRaw(assistantMsg);
|
|
171
|
+
|
|
172
|
+
// No tool calls → turn complete
|
|
173
|
+
if (toolCallsAcc.size === 0) {
|
|
174
|
+
yield new AgentEvent({ type: "turn_complete" });
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Tool execution loop
|
|
179
|
+
for (const tc of toolCallsAcc.values()) {
|
|
180
|
+
let inputData = {};
|
|
181
|
+
try {
|
|
182
|
+
inputData = tc.arguments ? JSON.parse(tc.arguments) : {};
|
|
183
|
+
} catch { /* ignore parse errors */ }
|
|
184
|
+
|
|
185
|
+
yield new AgentEvent({ type: "tool_start", name: tc.name, input: inputData });
|
|
186
|
+
const result = await this.toolRegistry.execute(tc.name, inputData);
|
|
187
|
+
yield new AgentEvent({
|
|
188
|
+
type: "tool_result",
|
|
189
|
+
name: tc.name,
|
|
190
|
+
output: result.content,
|
|
191
|
+
isError: result.isError,
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
// Add tool result message
|
|
195
|
+
this.history.addRaw({
|
|
196
|
+
role: "tool",
|
|
197
|
+
tool_call_id: tc.id,
|
|
198
|
+
content: result.content,
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// Pipeline controller: update state after tool execution
|
|
202
|
+
if (pipeline?.onToolResult) {
|
|
203
|
+
const pEvent = pipeline.onToolResult(tc.name, inputData, result);
|
|
204
|
+
if (pEvent) {
|
|
205
|
+
if (pEvent.type === "phase_ready" && pEvent.nextPhase) {
|
|
206
|
+
this.currentPhase = pEvent.nextPhase;
|
|
207
|
+
}
|
|
208
|
+
yield new AgentEvent({
|
|
209
|
+
type: "pipeline_event",
|
|
210
|
+
data: pEvent,
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Loop continues — send tool results back to LLM
|
|
217
|
+
|
|
218
|
+
} catch (err) {
|
|
219
|
+
yield new AgentEvent({ type: "error", message: err.message });
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A single event emitted by the agent engine during a turn.
|
|
3
|
+
* The CLI renders these directly — no serialization needed.
|
|
4
|
+
*/
|
|
5
|
+
export class AgentEvent {
|
|
6
|
+
/**
|
|
7
|
+
* @param {object} opts
|
|
8
|
+
* @param {string} opts.type - Event type: text_delta, turn_complete, tool_start, tool_result, error, pipeline_event, phase_update, session_info, system_message, history_load
|
|
9
|
+
* @param {string} [opts.text] - Text content (text_delta)
|
|
10
|
+
* @param {string} [opts.name] - Tool name (tool_start, tool_result)
|
|
11
|
+
* @param {object} [opts.input] - Tool input (tool_start)
|
|
12
|
+
* @param {string} [opts.output] - Tool output (tool_result)
|
|
13
|
+
* @param {boolean} [opts.isError] - Whether tool errored (tool_result)
|
|
14
|
+
* @param {string} [opts.message] - Message (error, system_message)
|
|
15
|
+
* @param {object} [opts.data] - Generic payload (pipeline_event)
|
|
16
|
+
*/
|
|
17
|
+
constructor({ type, text, name, input, output, isError, message, data }) {
|
|
18
|
+
this.type = type;
|
|
19
|
+
this.text = text ?? null;
|
|
20
|
+
this.name = name ?? null;
|
|
21
|
+
this.input = input ?? null;
|
|
22
|
+
this.output = output ?? null;
|
|
23
|
+
this.isError = isError ?? false;
|
|
24
|
+
this.message = message ?? null;
|
|
25
|
+
this.data = data ?? {};
|
|
26
|
+
}
|
|
27
|
+
}
|