kc-beta 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/bin/kc-beta.js +16 -0
  2. package/package.json +32 -0
  3. package/src/agent/confidence-scorer.js +120 -0
  4. package/src/agent/context.js +124 -0
  5. package/src/agent/corner-case-registry.js +119 -0
  6. package/src/agent/engine.js +224 -0
  7. package/src/agent/events.js +27 -0
  8. package/src/agent/history.js +101 -0
  9. package/src/agent/llm-client.js +131 -0
  10. package/src/agent/pipelines/base.js +14 -0
  11. package/src/agent/pipelines/distillation.js +113 -0
  12. package/src/agent/pipelines/extraction.js +92 -0
  13. package/src/agent/pipelines/index.js +23 -0
  14. package/src/agent/pipelines/initializer.js +163 -0
  15. package/src/agent/pipelines/production-qc.js +99 -0
  16. package/src/agent/pipelines/skill-authoring.js +83 -0
  17. package/src/agent/pipelines/skill-testing.js +111 -0
  18. package/src/agent/tools/agent-tool.js +100 -0
  19. package/src/agent/tools/base.js +35 -0
  20. package/src/agent/tools/dashboard-render.js +146 -0
  21. package/src/agent/tools/document-parse.js +184 -0
  22. package/src/agent/tools/document-search.js +111 -0
  23. package/src/agent/tools/evolution-cycle.js +150 -0
  24. package/src/agent/tools/qc-sample.js +94 -0
  25. package/src/agent/tools/registry.js +55 -0
  26. package/src/agent/tools/rule-catalog.js +113 -0
  27. package/src/agent/tools/sandbox-exec.js +106 -0
  28. package/src/agent/tools/tier-downgrade.js +114 -0
  29. package/src/agent/tools/worker-llm-call.js +109 -0
  30. package/src/agent/tools/workflow-run.js +138 -0
  31. package/src/agent/tools/workspace-file.js +122 -0
  32. package/src/agent/version-manager.js +130 -0
  33. package/src/agent/workspace.js +82 -0
  34. package/src/cli/components.js +164 -0
  35. package/src/cli/index.js +329 -0
  36. package/src/cli/init.js +80 -0
  37. package/src/cli/onboard.js +182 -0
  38. package/src/cli/terminal.js +143 -0
  39. package/src/config.js +93 -0
  40. package/template/.env.template +31 -0
  41. package/template/CLAUDE.md +137 -0
  42. package/template/Input/.gitkeep +0 -0
  43. package/template/Output/.gitkeep +0 -0
  44. package/template/Rules/.gitkeep +0 -0
  45. package/template/Samples/.gitkeep +0 -0
  46. package/template/skills/en/meta/compliance-judgment/SKILL.md +114 -0
  47. package/template/skills/en/meta/compliance-judgment/references/output-format.md +151 -0
  48. package/template/skills/en/meta/confidence-system/SKILL.md +117 -0
  49. package/template/skills/en/meta/corner-case-management/SKILL.md +111 -0
  50. package/template/skills/en/meta/cross-document-verification/SKILL.md +131 -0
  51. package/template/skills/en/meta/cross-document-verification/references/contradiction-taxonomy.md +73 -0
  52. package/template/skills/en/meta/data-sensibility/SKILL.md +115 -0
  53. package/template/skills/en/meta/document-parsing/SKILL.md +108 -0
  54. package/template/skills/en/meta/document-parsing/references/parser-catalog.md +40 -0
  55. package/template/skills/en/meta/entity-extraction/SKILL.md +129 -0
  56. package/template/skills/en/meta/tree-processing/SKILL.md +103 -0
  57. package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +70 -0
  58. package/template/skills/en/meta-meta/dashboard-reporting/SKILL.md +106 -0
  59. package/template/skills/en/meta-meta/dashboard-reporting/scripts/generate_dashboard.py +178 -0
  60. package/template/skills/en/meta-meta/evolution-loop/SKILL.md +210 -0
  61. package/template/skills/en/meta-meta/evolution-loop/references/convergence-guide.md +62 -0
  62. package/template/skills/en/meta-meta/quality-control/SKILL.md +138 -0
  63. package/template/skills/en/meta-meta/quality-control/references/qa-layers.md +92 -0
  64. package/template/skills/en/meta-meta/quality-control/references/sampling-strategies.md +76 -0
  65. package/template/skills/en/meta-meta/rule-extraction/SKILL.md +100 -0
  66. package/template/skills/en/meta-meta/rule-extraction/references/chunking-strategies.md +80 -0
  67. package/template/skills/en/meta-meta/rule-graph/SKILL.md +118 -0
  68. package/template/skills/en/meta-meta/skill-authoring/SKILL.md +108 -0
  69. package/template/skills/en/meta-meta/skill-authoring/references/skill-format-spec.md +78 -0
  70. package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +150 -0
  71. package/template/skills/en/meta-meta/skill-to-workflow/references/worker-llm-catalog.md +50 -0
  72. package/template/skills/en/meta-meta/task-decomposition/SKILL.md +129 -0
  73. package/template/skills/en/meta-meta/task-decomposition/references/decision-matrix.md +81 -0
  74. package/template/skills/en/meta-meta/version-control/SKILL.md +152 -0
  75. package/template/skills/en/meta-meta/version-control/references/trace-id-spec.md +79 -0
  76. package/template/skills/en/skill-creator/LICENSE.txt +202 -0
  77. package/template/skills/en/skill-creator/SKILL.md +479 -0
  78. package/template/skills/en/skill-creator/agents/analyzer.md +274 -0
  79. package/template/skills/en/skill-creator/agents/comparator.md +202 -0
  80. package/template/skills/en/skill-creator/agents/grader.md +223 -0
  81. package/template/skills/en/skill-creator/assets/eval_review.html +146 -0
  82. package/template/skills/en/skill-creator/eval-viewer/generate_review.py +471 -0
  83. package/template/skills/en/skill-creator/eval-viewer/viewer.html +1325 -0
  84. package/template/skills/en/skill-creator/references/schemas.md +430 -0
  85. package/template/skills/en/skill-creator/scripts/__init__.py +0 -0
  86. package/template/skills/en/skill-creator/scripts/aggregate_benchmark.py +401 -0
  87. package/template/skills/en/skill-creator/scripts/generate_report.py +326 -0
  88. package/template/skills/en/skill-creator/scripts/improve_description.py +248 -0
  89. package/template/skills/en/skill-creator/scripts/package_skill.py +136 -0
  90. package/template/skills/en/skill-creator/scripts/quick_validate.py +103 -0
  91. package/template/skills/en/skill-creator/scripts/run_eval.py +310 -0
  92. package/template/skills/en/skill-creator/scripts/run_loop.py +332 -0
  93. package/template/skills/en/skill-creator/scripts/utils.py +47 -0
  94. package/template/skills/zh/meta/compliance-judgment/SKILL.md +303 -0
  95. package/template/skills/zh/meta/compliance-judgment/references/output-format.md +151 -0
  96. package/template/skills/zh/meta/confidence-system/SKILL.md +228 -0
  97. package/template/skills/zh/meta/corner-case-management/SKILL.md +235 -0
  98. package/template/skills/zh/meta/cross-document-verification/SKILL.md +241 -0
  99. package/template/skills/zh/meta/cross-document-verification/references/contradiction-taxonomy.md +73 -0
  100. package/template/skills/zh/meta/data-sensibility/SKILL.md +235 -0
  101. package/template/skills/zh/meta/document-parsing/SKILL.md +168 -0
  102. package/template/skills/zh/meta/document-parsing/references/parser-catalog.md +40 -0
  103. package/template/skills/zh/meta/entity-extraction/SKILL.md +276 -0
  104. package/template/skills/zh/meta/tree-processing/SKILL.md +233 -0
  105. package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +147 -0
  106. package/template/skills/zh/meta-meta/dashboard-reporting/SKILL.md +281 -0
  107. package/template/skills/zh/meta-meta/dashboard-reporting/scripts/generate_dashboard.py +178 -0
  108. package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +302 -0
  109. package/template/skills/zh/meta-meta/evolution-loop/references/convergence-guide.md +62 -0
  110. package/template/skills/zh/meta-meta/quality-control/SKILL.md +269 -0
  111. package/template/skills/zh/meta-meta/quality-control/references/qa-layers.md +92 -0
  112. package/template/skills/zh/meta-meta/quality-control/references/sampling-strategies.md +76 -0
  113. package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +208 -0
  114. package/template/skills/zh/meta-meta/rule-extraction/references/chunking-strategies.md +80 -0
  115. package/template/skills/zh/meta-meta/rule-graph/SKILL.md +203 -0
  116. package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +235 -0
  117. package/template/skills/zh/meta-meta/skill-authoring/references/skill-format-spec.md +78 -0
  118. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +275 -0
  119. package/template/skills/zh/meta-meta/skill-to-workflow/references/worker-llm-catalog.md +50 -0
  120. package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +224 -0
  121. package/template/skills/zh/meta-meta/task-decomposition/references/decision-matrix.md +81 -0
  122. package/template/skills/zh/meta-meta/version-control/SKILL.md +284 -0
  123. package/template/skills/zh/meta-meta/version-control/references/trace-id-spec.md +79 -0
  124. package/template/skills/zh/skill-creator/LICENSE.txt +202 -0
  125. package/template/skills/zh/skill-creator/SKILL.md +479 -0
  126. package/template/skills/zh/skill-creator/agents/analyzer.md +274 -0
  127. package/template/skills/zh/skill-creator/agents/comparator.md +202 -0
  128. package/template/skills/zh/skill-creator/agents/grader.md +223 -0
  129. package/template/skills/zh/skill-creator/assets/eval_review.html +146 -0
  130. package/template/skills/zh/skill-creator/eval-viewer/generate_review.py +471 -0
  131. package/template/skills/zh/skill-creator/eval-viewer/viewer.html +1325 -0
  132. package/template/skills/zh/skill-creator/references/schemas.md +430 -0
  133. package/template/skills/zh/skill-creator/scripts/__init__.py +0 -0
  134. package/template/skills/zh/skill-creator/scripts/aggregate_benchmark.py +401 -0
  135. package/template/skills/zh/skill-creator/scripts/generate_report.py +326 -0
  136. package/template/skills/zh/skill-creator/scripts/improve_description.py +248 -0
  137. package/template/skills/zh/skill-creator/scripts/package_skill.py +136 -0
  138. package/template/skills/zh/skill-creator/scripts/quick_validate.py +103 -0
  139. package/template/skills/zh/skill-creator/scripts/run_eval.py +310 -0
  140. package/template/skills/zh/skill-creator/scripts/run_loop.py +332 -0
  141. package/template/skills/zh/skill-creator/scripts/utils.py +47 -0
@@ -0,0 +1,184 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { BaseTool, ToolResult } from "./base.js";
4
+
5
+ const MAX_OUTPUT = 50_000;
6
+ const MIN_CHARS_PER_PAGE = 50;
7
+
8
+ /**
9
+ * Parse documents through a hard-coded escalation chain.
10
+ * Level 1: pdfjs-dist (free, local) — text extraction
11
+ * Level 2: MineRU API (if configured) — for scanned/complex documents
12
+ * Level 3: OCR models via SiliconFlow — fallback via vision models
13
+ */
14
+ export class DocumentParseTool extends BaseTool {
15
+ constructor(workspace, { mineruApiUrl, mineruApiKey, siliconflowApiKey, siliconflowBaseUrl, ocrModel } = {}) {
16
+ super();
17
+ this._workspace = workspace;
18
+ this._mineruApiUrl = mineruApiUrl || "";
19
+ this._mineruApiKey = mineruApiKey || "";
20
+ this._sfApiKey = siliconflowApiKey || "";
21
+ this._sfBaseUrl = siliconflowBaseUrl || "https://api.siliconflow.cn/v1";
22
+ this._ocrModel = ocrModel || "";
23
+ }
24
+
25
+ get name() { return "document_parse"; }
26
+
27
+ get description() {
28
+ return (
29
+ "Parse a document (PDF, DOCX, TXT) and extract its text content. " +
30
+ "Internally uses an escalation chain: text extraction → API parser → OCR models. " +
31
+ "Starts cheap, escalates if needed. Use force_method to skip the chain."
32
+ );
33
+ }
34
+
35
+ get inputSchema() {
36
+ return {
37
+ type: "object",
38
+ properties: {
39
+ path: { type: "string", description: "Relative path to the document in the workspace" },
40
+ pages: { type: "string", description: "Page range to extract, e.g. '1-5', '3', '10-20'. Omit for all pages." },
41
+ force_method: {
42
+ type: "string",
43
+ enum: ["pdfjs", "mineru", "ocr"],
44
+ description: "Force a specific parsing method, skipping the escalation chain.",
45
+ },
46
+ },
47
+ required: ["path"],
48
+ };
49
+ }
50
+
51
+ async execute(input) {
52
+ const pathStr = input.path || "";
53
+ const pages = input.pages;
54
+ const force = input.force_method;
55
+
56
+ if (!pathStr) return new ToolResult("No path provided", true);
57
+
58
+ let resolved;
59
+ try { resolved = this._workspace.resolvePath(pathStr); }
60
+ catch (e) { return new ToolResult(e.message, true); }
61
+
62
+ if (!fs.existsSync(resolved) || !fs.statSync(resolved).isFile()) {
63
+ return new ToolResult(`File not found: ${pathStr}`, true);
64
+ }
65
+
66
+ const pageRange = this._parsePageRange(pages);
67
+
68
+ // Plain text files — read directly
69
+ const ext = path.extname(resolved).toLowerCase();
70
+ if ([".txt", ".md", ".csv", ".json", ".env"].includes(ext)) {
71
+ let text = fs.readFileSync(resolved, "utf-8");
72
+ if (text.length > MAX_OUTPUT) text = text.slice(0, MAX_OUTPUT) + "\n[truncated]";
73
+ return new ToolResult(`[Parsed via text read]\n\n${text}`);
74
+ }
75
+
76
+ if (force) return this._runMethod(force, resolved, pageRange);
77
+
78
+ // Escalation chain
79
+ // Level 1: pdfjs-dist
80
+ let result = await this._tryPdfjs(resolved, pageRange);
81
+ if (result && this._qualityOk(result)) {
82
+ return new ToolResult(this._formatOutput(result, "pdfjs", resolved));
83
+ }
84
+
85
+ // Level 2: MineRU API
86
+ if (this._mineruApiUrl) {
87
+ result = await this._tryMineru(resolved, pageRange);
88
+ if (result && this._qualityOk(result)) {
89
+ return new ToolResult(this._formatOutput(result, "mineru", resolved));
90
+ }
91
+ }
92
+
93
+ // Level 3: OCR via SiliconFlow
94
+ if (this._sfApiKey && this._ocrModel) {
95
+ result = await this._tryOcr(resolved, pageRange);
96
+ if (result) return new ToolResult(this._formatOutput(result, "ocr", resolved));
97
+ }
98
+
99
+ if (result) return new ToolResult(this._formatOutput(result, "pdfjs (low quality)", resolved));
100
+
101
+ return new ToolResult(
102
+ `Could not extract text from ${pathStr}. Configure OCR models in .env for image-based documents.`,
103
+ true,
104
+ );
105
+ }
106
+
107
+ async _runMethod(method, filePath, pageRange) {
108
+ let result;
109
+ if (method === "pdfjs") result = await this._tryPdfjs(filePath, pageRange);
110
+ else if (method === "mineru") result = await this._tryMineru(filePath, pageRange);
111
+ else if (method === "ocr") result = await this._tryOcr(filePath, pageRange);
112
+ else return new ToolResult(`Unknown method: ${method}`, true);
113
+
114
+ if (result) return new ToolResult(this._formatOutput(result, method, filePath));
115
+ return new ToolResult(`Method '${method}' failed for this document`, true);
116
+ }
117
+
118
+ async _tryPdfjs(filePath, pageRange) {
119
+ try {
120
+ const pdfjsLib = await import("pdfjs-dist/legacy/build/pdf.mjs");
121
+ const data = new Uint8Array(fs.readFileSync(filePath));
122
+ const doc = await pdfjsLib.getDocument({ data, useSystemFonts: true }).promise;
123
+
124
+ const start = pageRange ? pageRange[0] : 0;
125
+ const end = pageRange ? pageRange[1] : doc.numPages - 1;
126
+ const pages = [];
127
+
128
+ for (let i = Math.max(0, start); i <= Math.min(end, doc.numPages - 1); i++) {
129
+ const page = await doc.getPage(i + 1); // 1-indexed
130
+ const content = await page.getTextContent();
131
+ const text = content.items.map((item) => item.str).join(" ");
132
+ if (text.trim()) {
133
+ pages.push(`--- Page ${i + 1} ---\n${text.trim()}`);
134
+ }
135
+ }
136
+
137
+ return pages.length > 0 ? pages.join("\n\n") : "";
138
+ } catch (e) {
139
+ return null;
140
+ }
141
+ }
142
+
143
+ async _tryMineru(filePath, pageRange) {
144
+ // TODO: Implement MineRU API call when available
145
+ return null;
146
+ }
147
+
148
+ async _tryOcr(filePath, pageRange) {
149
+ // OCR requires sending page images to a vision model API.
150
+ // Without a native image renderer, we delegate to the agent
151
+ // to use sandbox_exec for custom OCR pipelines.
152
+ // For now, return null to signal OCR is not available natively.
153
+ return null;
154
+ }
155
+
156
+ _qualityOk(text) {
157
+ if (!text || !text.trim()) return false;
158
+ const pages = (text.match(/--- Page \d+ ---/g) || []).length || 1;
159
+ const charsPerPage = text.length / pages;
160
+ if (charsPerPage < MIN_CHARS_PER_PAGE) return false;
161
+ const replacementRatio = (text.match(/\uFFFD/g) || []).length / Math.max(text.length, 1);
162
+ if (replacementRatio > 0.1) return false;
163
+ return true;
164
+ }
165
+
166
+ _formatOutput(text, method, filePath) {
167
+ if (text.length > MAX_OUTPUT) text = text.slice(0, MAX_OUTPUT) + "\n[truncated]";
168
+ return `[Parsed via ${method}]\n\n${text}`;
169
+ }
170
+
171
+ _parsePageRange(pages) {
172
+ if (!pages) return null;
173
+ pages = pages.trim();
174
+ if (pages.includes("-")) {
175
+ const parts = pages.split("-", 2);
176
+ try { return [parseInt(parts[0]) - 1, parseInt(parts[1]) - 1]; }
177
+ catch { return null; }
178
+ }
179
+ try {
180
+ const p = parseInt(pages) - 1;
181
+ return [p, p];
182
+ } catch { return null; }
183
+ }
184
+ }
@@ -0,0 +1,111 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { BaseTool, ToolResult } from "./base.js";
4
+
5
+ const MAX_RESULTS = 20;
6
+ const CONTEXT_CHARS = 200;
7
+ const TEXT_EXTENSIONS = new Set([".txt", ".md", ".json", ".py", ".csv", ".env", ".log", ".js"]);
8
+
9
+ /**
10
+ * Full-text search across documents in the workspace.
11
+ * Searches text files and parsed document outputs. Returns passages
12
+ * with source coordinates (file, line number).
13
+ */
14
+ export class DocumentSearchTool extends BaseTool {
15
+ constructor(workspace) {
16
+ super();
17
+ this._workspace = workspace;
18
+ }
19
+
20
+ get name() { return "document_search"; }
21
+
22
+ get description() {
23
+ return (
24
+ "Search for text across documents in the workspace. Returns matching " +
25
+ "passages with file path and context. Supports plain text and regex queries."
26
+ );
27
+ }
28
+
29
+ get inputSchema() {
30
+ return {
31
+ type: "object",
32
+ properties: {
33
+ query: { type: "string", description: "Search query (plain text or regex pattern)" },
34
+ path: { type: "string", description: "Subdirectory to search in (default: entire workspace)" },
35
+ max_results: { type: "integer", description: `Maximum results to return (default: ${MAX_RESULTS})` },
36
+ regex: { type: "boolean", description: "Treat query as regex pattern (default: false)" },
37
+ },
38
+ required: ["query"],
39
+ };
40
+ }
41
+
42
+ async execute(input) {
43
+ const query = input.query || "";
44
+ const searchPath = input.path || ".";
45
+ const maxResults = input.max_results || MAX_RESULTS;
46
+ const useRegex = input.regex || false;
47
+
48
+ if (!query) return new ToolResult("No query provided", true);
49
+
50
+ let searchDir;
51
+ try { searchDir = this._workspace.resolvePath(searchPath); }
52
+ catch (e) { return new ToolResult(e.message, true); }
53
+
54
+ if (!fs.existsSync(searchDir) || !fs.statSync(searchDir).isDirectory()) {
55
+ return new ToolResult(`Not a directory: ${searchPath}`, true);
56
+ }
57
+
58
+ let pattern;
59
+ try {
60
+ pattern = useRegex ? new RegExp(query, "gi") : new RegExp(query.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "gi");
61
+ } catch (e) {
62
+ return new ToolResult(`Invalid regex: ${e.message}`, true);
63
+ }
64
+
65
+ const results = [];
66
+ this._searchDir(searchDir, pattern, results, maxResults);
67
+
68
+ if (results.length === 0) return new ToolResult(`No matches found for: ${query}`);
69
+
70
+ const lines = [];
71
+ for (const r of results) {
72
+ lines.push(`--- ${r.file}:${r.line} ---`);
73
+ lines.push(r.context);
74
+ lines.push("");
75
+ }
76
+ return new ToolResult(`Found ${results.length} match(es):\n\n${lines.join("\n")}`);
77
+ }
78
+
79
+ _searchDir(dir, pattern, results, maxResults) {
80
+ let entries;
81
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
82
+ catch { return; }
83
+
84
+ for (const entry of entries.sort((a, b) => a.name.localeCompare(b.name))) {
85
+ if (results.length >= maxResults) break;
86
+ const fullPath = path.join(dir, entry.name);
87
+
88
+ if (entry.isDirectory()) {
89
+ if (entry.name.startsWith(".") || entry.name === "node_modules" || entry.name === "__pycache__") continue;
90
+ this._searchDir(fullPath, pattern, results, maxResults);
91
+ } else if (entry.isFile() && TEXT_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) {
92
+ let content;
93
+ try { content = fs.readFileSync(fullPath, "utf-8"); }
94
+ catch { continue; }
95
+
96
+ let match;
97
+ pattern.lastIndex = 0;
98
+ while ((match = pattern.exec(content)) !== null) {
99
+ const start = Math.max(0, match.index - CONTEXT_CHARS);
100
+ const end = Math.min(content.length, match.index + match[0].length + CONTEXT_CHARS);
101
+ const context = content.slice(start, end).trim();
102
+ const lineNum = content.slice(0, match.index).split("\n").length;
103
+ const relPath = path.relative(this._workspace.cwd, fullPath);
104
+
105
+ results.push({ file: relPath, line: lineNum, match: match[0], context });
106
+ if (results.length >= maxResults) break;
107
+ }
108
+ }
109
+ }
110
+ }
111
+ }
@@ -0,0 +1,150 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { BaseTool, ToolResult } from "./base.js";
4
+
5
+ /**
6
+ * Run one structured iteration of the evolution loop.
7
+ * Enforces: diagnose -> classify -> fix instructions -> log.
8
+ * Classification is CODE: counts failure rate, applies threshold.
9
+ * Routes corner cases to CornerCaseRegistry automatically.
10
+ */
11
+ export class EvolutionCycleTool extends BaseTool {
12
+ constructor(workspace, cornerCases) {
13
+ super();
14
+ this._workspace = workspace;
15
+ this._cornerCases = cornerCases;
16
+ }
17
+
18
+ get name() { return "evolution_cycle"; }
19
+ get description() {
20
+ return (
21
+ "Run one structured iteration of diagnose -> classify -> fix -> log. " +
22
+ "Provide test results with failures. The tool classifies failures as " +
23
+ "systemic (>10%) or corner case (<10%), routes corner cases to the " +
24
+ "registry, and saves a structured evolution log entry."
25
+ );
26
+ }
27
+
28
+ get inputSchema() {
29
+ return {
30
+ type: "object",
31
+ properties: {
32
+ rule_id: { type: "string", description: "Rule being evolved" },
33
+ total_test_docs: { type: "integer", description: "Total number of documents tested" },
34
+ failed_docs: {
35
+ type: "array",
36
+ items: {
37
+ type: "object",
38
+ properties: {
39
+ doc_id: { type: "string" },
40
+ diagnosis: { type: "string", enum: ["parsing", "extraction", "judgment", "scope"] },
41
+ root_cause: { type: "string" },
42
+ },
43
+ },
44
+ description: "List of failed documents with diagnosis",
45
+ },
46
+ accuracy_before: { type: "number", description: "Accuracy before this cycle" },
47
+ fix_applied: { type: "string", description: "Description of the fix applied (or planned)" },
48
+ },
49
+ required: ["rule_id", "total_test_docs", "failed_docs", "accuracy_before"],
50
+ };
51
+ }
52
+
53
+ async execute(input) {
54
+ const ruleId = input.rule_id || "";
55
+ const total = input.total_test_docs || 0;
56
+ const failures = input.failed_docs || [];
57
+ const accuracyBefore = input.accuracy_before || 0;
58
+ const fixApplied = input.fix_applied || "";
59
+
60
+ if (!ruleId || total <= 0) return new ToolResult("rule_id and total_test_docs required", true);
61
+
62
+ // Read systemic threshold from .env
63
+ let systemicThreshold = 0.10;
64
+ const envPath = path.join(this._workspace.cwd, ".env");
65
+ if (fs.existsSync(envPath)) {
66
+ for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
67
+ if (line.startsWith("SYSTEMIC_THRESHOLD=")) {
68
+ try { systemicThreshold = parseFloat(line.split("=")[1].trim()); }
69
+ catch { /* ignore */ }
70
+ }
71
+ }
72
+ }
73
+
74
+ const failureRate = failures.length / total;
75
+ const classification = failureRate >= systemicThreshold ? "systemic" : "corner_case";
76
+
77
+ // Check repeated patterns
78
+ const repeatedPatterns = this._checkRepeatedPatterns(ruleId, failures);
79
+
80
+ // Route corner cases to registry
81
+ const cornerCasesAdded = [];
82
+ if (classification === "corner_case") {
83
+ for (const f of failures) {
84
+ const c = {
85
+ id: `CC_${ruleId}_${f.doc_id || "unknown"}`,
86
+ ruleId,
87
+ detectionPattern: f.root_cause || "unknown pattern",
88
+ resolution: fixApplied || "pending fix",
89
+ affectedDocuments: [f.doc_id || ""],
90
+ discoveryDate: new Date().toISOString(),
91
+ status: "active",
92
+ };
93
+ this._cornerCases.add(c);
94
+ cornerCasesAdded.push(c.id);
95
+ }
96
+ }
97
+
98
+ // Count iteration
99
+ const logDir = path.join(this._workspace.cwd, "logs", "evolution");
100
+ fs.mkdirSync(logDir, { recursive: true });
101
+ const existing = fs.readdirSync(logDir).filter((f) => f.startsWith(`${ruleId}_iter_`));
102
+ const iteration = existing.length + 1;
103
+
104
+ // Build log entry
105
+ const logEntry = {
106
+ iteration, rule_id: ruleId, timestamp: new Date().toISOString(),
107
+ accuracy_before: accuracyBefore, total_docs: total,
108
+ failed_docs: failures.length, failure_rate: Math.round(failureRate * 1000) / 1000,
109
+ classification, failures, fix_applied: fixApplied,
110
+ corner_cases_added: cornerCasesAdded, repeated_patterns: repeatedPatterns,
111
+ };
112
+
113
+ const logPath = path.join(logDir, `${ruleId}_iter_${String(iteration).padStart(3, "0")}.json`);
114
+ fs.writeFileSync(logPath, JSON.stringify(logEntry, null, 2), "utf-8");
115
+
116
+ const response = {
117
+ iteration, classification,
118
+ failure_rate: `${(failureRate * 100).toFixed(1)}%`,
119
+ action: classification === "systemic"
120
+ ? "REWRITE component — systemic issue affecting >10% of documents"
121
+ : `Recorded ${cornerCasesAdded.length} corner case(s) — do NOT patch main workflow`,
122
+ repeated_patterns: repeatedPatterns,
123
+ log_saved: path.relative(this._workspace.cwd, logPath),
124
+ };
125
+
126
+ if (repeatedPatterns.length > 0) {
127
+ response.warning = "Repeated failure patterns detected. Consider escalating approach.";
128
+ }
129
+
130
+ return new ToolResult(JSON.stringify(response, null, 2));
131
+ }
132
+
133
+ _checkRepeatedPatterns(ruleId, failures) {
134
+ const logDir = path.join(this._workspace.cwd, "logs", "evolution");
135
+ if (!fs.existsSync(logDir)) return [];
136
+ const currentCauses = new Set(failures.map((f) => (f.root_cause || "").toLowerCase()).filter(Boolean));
137
+ const repeated = [];
138
+
139
+ for (const f of fs.readdirSync(logDir).filter((f) => f.startsWith(`${ruleId}_iter_`)).sort()) {
140
+ try {
141
+ const data = JSON.parse(fs.readFileSync(path.join(logDir, f), "utf-8"));
142
+ const prevCauses = new Set((data.failures || []).map((f) => (f.root_cause || "").toLowerCase()).filter(Boolean));
143
+ for (const cause of currentCauses) {
144
+ if (prevCauses.has(cause) && !repeated.includes(cause)) repeated.push(cause);
145
+ }
146
+ } catch { /* skip */ }
147
+ }
148
+ return repeated;
149
+ }
150
+ }
@@ -0,0 +1,94 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { BaseTool, ToolResult } from "./base.js";
4
+
5
+ const FREQUENCY_RATES = { high: 0.5, mid: 0.3, low: 0.1 };
6
+
7
+ /**
8
+ * Draw adaptive sample from production results for quality review.
9
+ * Stratifies by confidence band: review ALL low, sample medium, spot-check high.
10
+ */
11
+ export class QCSampleTool extends BaseTool {
12
+ constructor(workspace) {
13
+ super();
14
+ this._workspace = workspace;
15
+ }
16
+
17
+ get name() { return "qc_sample"; }
18
+ get description() {
19
+ return (
20
+ "Draw an adaptive sample from production results for quality review. " +
21
+ "Stratifies by confidence band (low=review all, medium=sample, high=spot-check). " +
22
+ "Returns list of document IDs to review."
23
+ );
24
+ }
25
+
26
+ get inputSchema() {
27
+ return {
28
+ type: "object",
29
+ properties: {
30
+ results_path: { type: "string", description: "Path to results directory (default: output/results/)" },
31
+ },
32
+ };
33
+ }
34
+
35
+ async execute(input) {
36
+ const resultsPath = input.results_path || "output/results";
37
+ let resultsDir;
38
+ try { resultsDir = this._workspace.resolvePath(resultsPath); }
39
+ catch (e) { return new ToolResult(e.message, true); }
40
+
41
+ if (!fs.existsSync(resultsDir) || !fs.statSync(resultsDir).isDirectory()) {
42
+ return new ToolResult(`Results directory not found: ${resultsPath}`, true);
43
+ }
44
+
45
+ // Load MONITOR_FREQUENCY from .env
46
+ let mediumRate = 0.3;
47
+ const envPath = path.join(this._workspace.cwd, ".env");
48
+ if (fs.existsSync(envPath)) {
49
+ for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
50
+ if (line.startsWith("MONITOR_FREQUENCY=")) {
51
+ const freq = line.split("=")[1].trim().toLowerCase();
52
+ mediumRate = FREQUENCY_RATES[freq] ?? 0.3;
53
+ }
54
+ }
55
+ }
56
+
57
+ const low = [], medium = [], high = [];
58
+ const files = fs.readdirSync(resultsDir).filter((f) => f.endsWith(".json")).sort();
59
+ for (const f of files) {
60
+ try {
61
+ const data = JSON.parse(fs.readFileSync(path.join(resultsDir, f), "utf-8"));
62
+ const band = data.confidence_band || "medium";
63
+ const entry = { file: f, rule_id: data.rule_id || "", confidence: data.confidence || 0 };
64
+ if (band === "low") low.push(entry);
65
+ else if (band === "high") high.push(entry);
66
+ else medium.push(entry);
67
+ } catch { /* skip */ }
68
+ }
69
+
70
+ const toReview = [...low];
71
+ if (medium.length > 0) {
72
+ const sampleSize = Math.max(1, Math.floor(medium.length * mediumRate));
73
+ toReview.push(...this._sample(medium, sampleSize));
74
+ }
75
+ if (high.length > 0) {
76
+ const spotSize = Math.max(1, Math.floor(high.length * 0.1));
77
+ toReview.push(...this._sample(high, spotSize));
78
+ }
79
+
80
+ const report = {
81
+ total_results: low.length + medium.length + high.length,
82
+ distribution: { low: low.length, medium: medium.length, high: high.length },
83
+ sampling_rate_medium: mediumRate,
84
+ to_review: toReview.length,
85
+ review_list: toReview,
86
+ };
87
+ return new ToolResult(JSON.stringify(report, null, 2));
88
+ }
89
+
90
+ _sample(arr, n) {
91
+ const shuffled = [...arr].sort(() => Math.random() - 0.5);
92
+ return shuffled.slice(0, Math.min(n, shuffled.length));
93
+ }
94
+ }
@@ -0,0 +1,55 @@
1
+ import { ToolResult } from "./base.js";
2
+
3
+ /**
4
+ * Manages tool registration and dispatch.
5
+ * Tools register themselves; the engine loop discovers them via schemasOpenai()
6
+ * and dispatches to execute() when the LLM invokes a tool.
7
+ */
8
+ export class ToolRegistry {
9
+ constructor() {
10
+ /** @type {Map<string, import('./base.js').BaseTool>} */
11
+ this._tools = new Map();
12
+ }
13
+
14
+ /**
15
+ * Register a tool instance.
16
+ * @param {import('./base.js').BaseTool} tool
17
+ */
18
+ register(tool) {
19
+ this._tools.set(tool.name, tool);
20
+ }
21
+
22
+ /**
23
+ * Return tool schemas in OpenAI function-calling format.
24
+ * @returns {Array<object>}
25
+ */
26
+ schemasOpenai() {
27
+ return Array.from(this._tools.values()).map((t) => ({
28
+ type: "function",
29
+ function: {
30
+ name: t.name,
31
+ description: t.description,
32
+ parameters: t.inputSchema,
33
+ },
34
+ }));
35
+ }
36
+
37
+ /**
38
+ * Execute a tool by name.
39
+ * @param {string} name
40
+ * @param {object} input
41
+ * @returns {Promise<ToolResult>}
42
+ */
43
+ async execute(name, input) {
44
+ const tool = this._tools.get(name);
45
+ if (!tool) {
46
+ return new ToolResult(`Unknown tool: ${name}`, true);
47
+ }
48
+ return tool.execute(input);
49
+ }
50
+
51
+ /** @returns {number} Number of registered tools */
52
+ get size() {
53
+ return this._tools.size;
54
+ }
55
+ }