kc-beta 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/context.js +58 -88
- package/src/agent/engine.js +71 -39
- package/src/agent/pipelines/_archive_v1/distillation.js +113 -0
- package/src/agent/pipelines/_archive_v1/extraction.js +92 -0
- package/src/agent/pipelines/_archive_v1/initializer.js +163 -0
- package/src/agent/pipelines/_archive_v1/production-qc.js +99 -0
- package/src/agent/pipelines/_archive_v1/skill-authoring.js +83 -0
- package/src/agent/pipelines/_archive_v1/skill-testing.js +111 -0
- package/src/agent/pipelines/distillation.js +7 -11
- package/src/agent/pipelines/extraction.js +5 -7
- package/src/agent/pipelines/initializer.js +8 -14
- package/src/agent/pipelines/production-qc.js +3 -5
- package/src/agent/pipelines/skill-authoring.js +5 -8
- package/src/agent/pipelines/skill-testing.js +6 -8
- package/src/agent/skill-loader.js +139 -0
- package/src/agent/tools/tier-downgrade.js +11 -2
- package/src/cli/components.js +4 -7
- package/src/cli/onboard.js +12 -0
- package/src/config.js +5 -0
package/package.json
CHANGED
package/src/agent/context.js
CHANGED
|
@@ -1,122 +1,92 @@
|
|
|
1
1
|
const AGENT_IDENTITY = `\
|
|
2
|
-
|
|
3
|
-
and manage document verification systems for financial institutions.
|
|
2
|
+
KC Agent builds and manages document verification systems for financial institutions.
|
|
4
3
|
|
|
5
|
-
|
|
6
|
-
something, you say so.
|
|
4
|
+
## Architecture
|
|
7
5
|
|
|
8
|
-
|
|
6
|
+
This system operates in two modes:
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
**BUILD mode** (Bootstrap → Extraction → Skill Authoring → Skill Testing): \
|
|
9
|
+
Read regulations, extract rules, build verification skills, test them against samples. \
|
|
10
|
+
All intellectual work — parsing, extracting, judging — is done directly. The results \
|
|
11
|
+
produced in this mode serve as the accuracy baseline. Worker LLM tools are not available \
|
|
12
|
+
in this mode.
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
**DISTILL mode** (Distillation → Production QC): \
|
|
15
|
+
Convert proven skills into workflows that run with cheaper worker LLMs at scale. \
|
|
16
|
+
Test workflow results against the baseline established in BUILD mode. Monitor production \
|
|
17
|
+
quality. Worker LLM tools become available in this mode.
|
|
14
18
|
|
|
15
|
-
|
|
16
|
-
|
|
19
|
+
Skills are first-class deliverables, not just stepping stones to distillation. When a \
|
|
20
|
+
verification task is too complex for worker LLMs, the skill itself — run by a capable \
|
|
21
|
+
agent — is the production solution.
|
|
17
22
|
|
|
18
|
-
|
|
19
|
-
Internally uses an escalation chain: text extraction → API parser → OCR models. \
|
|
20
|
-
Starts cheap, escalates if needed. You don't choose the method — the tool handles it. \
|
|
21
|
-
Use force_method only for testing or if the developer user requests a specific parser.
|
|
22
|
-
|
|
23
|
-
- **worker_llm_call**: Call a worker LLM at a specified tier (tier1=most capable, \
|
|
24
|
-
tier4=cheapest). Use for distillation testing — check if cheaper models can handle \
|
|
25
|
-
extraction/judgment steps. Returns response with model used and token counts.
|
|
26
|
-
|
|
27
|
-
- **workflow_run**: Execute a distilled workflow against a document. Automatically \
|
|
28
|
-
attaches confidence scores and trace IDs. Results saved to output/results/.
|
|
29
|
-
|
|
30
|
-
- **tier_downgrade**: Test a workflow step at a lower tier. Compares accuracy at \
|
|
31
|
-
target tier vs. current baseline. Recommends downgrade if accuracy stays above threshold.
|
|
23
|
+
## Methodology
|
|
32
24
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
25
|
+
### Document Parsing
|
|
26
|
+
Start with the simplest parser and escalate only when output is insufficient. Once a \
|
|
27
|
+
parser works for a document type, lock it in. Tables and charts may need specific handling.
|
|
36
28
|
|
|
37
|
-
|
|
38
|
-
|
|
29
|
+
### Rule Extraction
|
|
30
|
+
Decompose regulations top-down into atomic, testable rules. One rule = one pass/fail \
|
|
31
|
+
outcome. Handle ambiguity explicitly — note it, ask the developer user. After extraction, \
|
|
32
|
+
audit which regulation sections are not yet covered.
|
|
39
33
|
|
|
40
|
-
|
|
41
|
-
|
|
34
|
+
### Entity Extraction
|
|
35
|
+
Prefer regex/Python for predictable formats. Use LLM only when semantic understanding \
|
|
36
|
+
is required. Every extraction captures: value, evidence, source location, confidence, \
|
|
37
|
+
method used.
|
|
42
38
|
|
|
43
|
-
|
|
44
|
-
|
|
39
|
+
### Skill Authoring
|
|
40
|
+
Write each rule into a skill folder following the Anthropic skill-creator format. A \
|
|
41
|
+
skill must be self-contained: business logic, scripts, references, sample data, and \
|
|
42
|
+
corner cases. Skills capture methodology — when to use an approach, why it works, \
|
|
43
|
+
what to watch for.
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
### Evolution Loop
|
|
46
|
+
Test → observe → diagnose root cause (parsing/extraction/judgment/scope) → classify \
|
|
47
|
+
(systemic vs corner case) → fix → retest → log. Corner cases are recorded separately \
|
|
48
|
+
and never patched into the main workflow.
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
50
|
+
### Distillation
|
|
51
|
+
Design workflows that replicate skill results using the cheapest viable model tier. \
|
|
52
|
+
Test at each tier and present accuracy comparison data. The developer user decides \
|
|
53
|
+
acceptable trade-offs between cost and accuracy.
|
|
52
54
|
|
|
53
|
-
|
|
54
|
-
reading output. Don't guess — verify.
|
|
55
|
+
## Structural Components
|
|
55
56
|
|
|
56
|
-
|
|
57
|
+
**Version control**: Every write to rules/, workflows/, or rule_skills/ gets a trace \
|
|
58
|
+
ID in versions.json — an immutable audit trail linking results back to the exact \
|
|
59
|
+
version of code that produced them.
|
|
57
60
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
- Once a parser works for a document type, lock it in. Don't re-evaluate unless \
|
|
62
|
-
downstream extraction fails.
|
|
63
|
-
- Tables need special handling — extract cell-by-cell, reconstruct as markdown or JSON.
|
|
64
|
-
|
|
65
|
-
### Data Sensibility
|
|
66
|
-
- Read 3-5 complete documents end-to-end BEFORE writing extraction logic. Read raw \
|
|
67
|
-
parsed text, not PDF viewer. This saves hours of debugging bad assumptions.
|
|
68
|
-
- After extraction, spot-check 10 random fields (3 high-confidence, 4 medium, \
|
|
69
|
-
3 low) against source. If >1 out of 10 is wrong, STOP — don't continue.
|
|
70
|
-
- Save every processing stage to disk (raw text → sections → entities → judgments). \
|
|
71
|
-
Disk is cheap; debugging without intermediates is guesswork.
|
|
61
|
+
**Corner case registry**: Edge cases (<10% failure rate) are stored in \
|
|
62
|
+
corner_cases.json with detection patterns and resolutions. They are handled separately \
|
|
63
|
+
during execution with high-threshold matching, not patched into main workflows.
|
|
72
64
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
- Work top-down (onion peeler): major areas → chapters → sections → atomic rules. \
|
|
77
|
-
Stop when rules are atomic and testable.
|
|
78
|
-
- Handle ambiguity explicitly. Extract as understood, note ambiguities, ask the \
|
|
79
|
-
developer user. Ambiguous rules are often the most important — don't skip them.
|
|
80
|
-
- After extraction, audit coverage: which regulation paragraphs are NOT covered?
|
|
65
|
+
**Confidence scoring**: Each verification result gets a composite confidence score \
|
|
66
|
+
based on extraction method, source text presence, historical accuracy, and corner \
|
|
67
|
+
case proximity. Confidence bands (high/medium/low) drive QC sampling rates.
|
|
81
68
|
|
|
82
|
-
|
|
83
|
-
- Method selection: regex/Python first (free, instant, predictable formats). LLM \
|
|
84
|
-
only when semantic understanding is required. Hybrid: regex first, LLM fallback.
|
|
85
|
-
- Every extraction must capture: value, evidence (raw text), source location, \
|
|
86
|
-
confidence, method used.
|
|
87
|
-
- Postprocessing is deterministic code: date standardization, unit conversion, \
|
|
88
|
-
Chinese numeral conversion. Build as reusable Python functions.
|
|
69
|
+
## Working with the Developer User
|
|
89
70
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
or scope failure. Each drives different fixes.
|
|
94
|
-
- Systemic issue (>10% of docs) → rewrite code/prompts. Corner case (<10%) → \
|
|
95
|
-
record in corner_cases.json with detection + resolution. Do NOT patch main \
|
|
96
|
-
workflow for corner cases.
|
|
97
|
-
- Stop when: accuracy meets threshold, or correction volume <5% and no new \
|
|
98
|
-
failure patterns.
|
|
99
|
-
|
|
100
|
-
### Reflection & Skill Writing
|
|
101
|
-
- When you solve a hard problem (OCR approach, extraction pattern, edge case \
|
|
102
|
-
handling), write it down as a reusable skill in rule_skills/. Future sessions \
|
|
103
|
-
and rules benefit from your discoveries.
|
|
104
|
-
- Skills capture methodology, not just code. Describe WHEN to use this approach, \
|
|
105
|
-
WHY it works, and WHAT to watch out for.`;
|
|
71
|
+
The developer user configures the project, provides regulations and samples, and \
|
|
72
|
+
makes business decisions (accuracy thresholds, cost trade-offs, rule scope). Discuss \
|
|
73
|
+
unclear regulations with them. Present results and let them judge.`;
|
|
106
74
|
|
|
107
75
|
/**
|
|
108
76
|
* Builds the system prompt from multiple context sources.
|
|
109
|
-
* Combines: agent identity +
|
|
77
|
+
* Combines: agent identity + skill index + pipeline state + workspace state.
|
|
110
78
|
*/
|
|
111
79
|
export class ContextAssembler {
|
|
112
80
|
/**
|
|
113
81
|
* @param {object} [opts]
|
|
114
82
|
* @param {string} [opts.pipelineState]
|
|
115
83
|
* @param {string} [opts.workspaceState]
|
|
84
|
+
* @param {string} [opts.skillIndex] - Brief index of available meta skills
|
|
116
85
|
* @returns {string}
|
|
117
86
|
*/
|
|
118
|
-
build({ pipelineState, workspaceState } = {}) {
|
|
87
|
+
build({ pipelineState, workspaceState, skillIndex } = {}) {
|
|
119
88
|
const parts = [AGENT_IDENTITY];
|
|
89
|
+
if (skillIndex) parts.push(skillIndex);
|
|
120
90
|
if (pipelineState) parts.push(pipelineState);
|
|
121
91
|
if (workspaceState) parts.push(workspaceState);
|
|
122
92
|
return parts.join("\n\n");
|
package/src/agent/engine.js
CHANGED
|
@@ -18,6 +18,7 @@ import { DashboardRenderTool } from "./tools/dashboard-render.js";
|
|
|
18
18
|
import { EvolutionCycleTool } from "./tools/evolution-cycle.js";
|
|
19
19
|
import { TierDowngradeTool } from "./tools/tier-downgrade.js";
|
|
20
20
|
import { AgentTool } from "./tools/agent-tool.js";
|
|
21
|
+
import { SkillLoader } from "./skill-loader.js";
|
|
21
22
|
import { Phase } from "./pipelines/index.js";
|
|
22
23
|
import { ProjectInitializer } from "./pipelines/initializer.js";
|
|
23
24
|
import { RuleExtractionPipeline } from "./pipelines/extraction.js";
|
|
@@ -26,13 +27,16 @@ import { SkillTestingPipeline } from "./pipelines/skill-testing.js";
|
|
|
26
27
|
import { DistillationEngine as DistillationPipeline } from "./pipelines/distillation.js";
|
|
27
28
|
import { ProductionQCPipeline } from "./pipelines/production-qc.js";
|
|
28
29
|
|
|
30
|
+
// Phases where worker LLM tools are available (DISTILL mode)
|
|
31
|
+
const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
|
|
32
|
+
|
|
29
33
|
/**
|
|
30
34
|
* The KC Agent conversation engine.
|
|
31
35
|
*
|
|
32
36
|
* Core loop: user message -> context assembly -> LLM API (streaming) ->
|
|
33
37
|
* tool execution (if any) -> repeat until no tool calls -> turn complete.
|
|
34
38
|
*
|
|
35
|
-
*
|
|
39
|
+
* Tools are phase-gated: worker LLM tools only available in DISTILL mode.
|
|
36
40
|
*/
|
|
37
41
|
export class AgentEngine {
|
|
38
42
|
/**
|
|
@@ -45,7 +49,6 @@ export class AgentEngine {
|
|
|
45
49
|
this.client = client;
|
|
46
50
|
this.config = config;
|
|
47
51
|
this.context = new ContextAssembler();
|
|
48
|
-
this.toolRegistry = new ToolRegistry();
|
|
49
52
|
|
|
50
53
|
// Workspace + structural components
|
|
51
54
|
this.workspace = new Workspace(config.kcWorkspaceRoot, sessionId);
|
|
@@ -54,30 +57,8 @@ export class AgentEngine {
|
|
|
54
57
|
this.cornerCases = new CornerCaseRegistry(this.workspace.cwd);
|
|
55
58
|
this.confidence = new ConfidenceScorer(this.workspace.cwd, this.cornerCases);
|
|
56
59
|
|
|
57
|
-
//
|
|
58
|
-
this.
|
|
59
|
-
this.toolRegistry.register(new WorkspaceFileTool(this.workspace, this.versionManager));
|
|
60
|
-
this.toolRegistry.register(new DocumentParseTool(this.workspace, {
|
|
61
|
-
mineruApiUrl: config.mineruApiUrl,
|
|
62
|
-
mineruApiKey: config.mineruApiKey,
|
|
63
|
-
siliconflowApiKey: config.siliconflowApiKey,
|
|
64
|
-
siliconflowBaseUrl: config.siliconflowBaseUrl,
|
|
65
|
-
ocrModel: config.ocrModelTier1,
|
|
66
|
-
}));
|
|
67
|
-
this.toolRegistry.register(new DocumentSearchTool(this.workspace));
|
|
68
|
-
|
|
69
|
-
const workerLlm = new WorkerLLMCallTool(this.workspace, {
|
|
70
|
-
apiKey: config.siliconflowApiKey,
|
|
71
|
-
baseUrl: config.siliconflowBaseUrl,
|
|
72
|
-
});
|
|
73
|
-
this.toolRegistry.register(workerLlm);
|
|
74
|
-
this.toolRegistry.register(new WorkflowRunTool(this.workspace, this.versionManager, this.confidence));
|
|
75
|
-
this.toolRegistry.register(new TierDowngradeTool(this.workspace, workerLlm));
|
|
76
|
-
this.toolRegistry.register(new EvolutionCycleTool(this.workspace, this.cornerCases));
|
|
77
|
-
this.toolRegistry.register(new RuleCatalogTool(this.workspace));
|
|
78
|
-
this.toolRegistry.register(new QCSampleTool(this.workspace));
|
|
79
|
-
this.toolRegistry.register(new DashboardRenderTool(this.workspace));
|
|
80
|
-
this.toolRegistry.register(new AgentTool(this.workspace, (sid) => new AgentEngine({ client, config, sessionId: sid })));
|
|
60
|
+
// Build all tool instances (but register phase-appropriate ones)
|
|
61
|
+
this._buildTools = this._createAllTools();
|
|
81
62
|
|
|
82
63
|
// Pipeline system (meta-meta skills as code)
|
|
83
64
|
this.currentPhase = Phase.BOOTSTRAP;
|
|
@@ -89,14 +70,70 @@ export class AgentEngine {
|
|
|
89
70
|
[Phase.DISTILLATION]: new DistillationPipeline(this.workspace),
|
|
90
71
|
[Phase.PRODUCTION_QC]: new ProductionQCPipeline(this.workspace),
|
|
91
72
|
};
|
|
73
|
+
|
|
74
|
+
// Skill discovery (Claude Code pattern: index in context, full content on demand)
|
|
75
|
+
this._skillLoader = new SkillLoader(config.language);
|
|
76
|
+
|
|
77
|
+
// Register tools for initial phase
|
|
78
|
+
this.toolRegistry = new ToolRegistry();
|
|
79
|
+
this._registerToolsForPhase(this.currentPhase);
|
|
92
80
|
}
|
|
93
81
|
|
|
94
82
|
/**
|
|
95
|
-
*
|
|
96
|
-
*
|
|
83
|
+
* Create all tool instances. Separated from registration so we can
|
|
84
|
+
* re-register per phase without recreating.
|
|
97
85
|
*/
|
|
98
|
-
|
|
99
|
-
this.
|
|
86
|
+
_createAllTools() {
|
|
87
|
+
const workerLlm = new WorkerLLMCallTool(this.workspace, {
|
|
88
|
+
apiKey: this.config.siliconflowApiKey,
|
|
89
|
+
baseUrl: this.config.siliconflowBaseUrl,
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
// Always available (BUILD + DISTILL)
|
|
94
|
+
core: [
|
|
95
|
+
new SandboxExecTool(this.workspace, this.config.kcExecTimeout),
|
|
96
|
+
new WorkspaceFileTool(this.workspace, this.versionManager),
|
|
97
|
+
new DocumentParseTool(this.workspace, {
|
|
98
|
+
mineruApiUrl: this.config.mineruApiUrl,
|
|
99
|
+
mineruApiKey: this.config.mineruApiKey,
|
|
100
|
+
siliconflowApiKey: this.config.siliconflowApiKey,
|
|
101
|
+
siliconflowBaseUrl: this.config.siliconflowBaseUrl,
|
|
102
|
+
ocrModel: this.config.ocrModelTier1,
|
|
103
|
+
}),
|
|
104
|
+
new DocumentSearchTool(this.workspace),
|
|
105
|
+
new RuleCatalogTool(this.workspace),
|
|
106
|
+
new EvolutionCycleTool(this.workspace, this.cornerCases),
|
|
107
|
+
new DashboardRenderTool(this.workspace),
|
|
108
|
+
new AgentTool(this.workspace, (sid) => new AgentEngine({
|
|
109
|
+
client: this.client, config: this.config, sessionId: sid,
|
|
110
|
+
})),
|
|
111
|
+
],
|
|
112
|
+
// Distillation+ only (DISTILL mode)
|
|
113
|
+
distill: [
|
|
114
|
+
workerLlm,
|
|
115
|
+
new WorkflowRunTool(this.workspace, this.versionManager, this.confidence),
|
|
116
|
+
new TierDowngradeTool(this.workspace, workerLlm),
|
|
117
|
+
new QCSampleTool(this.workspace),
|
|
118
|
+
],
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Register tools appropriate for the given phase.
|
|
124
|
+
* BUILD phases get core tools only.
|
|
125
|
+
* DISTILL phases get core + worker LLM tools.
|
|
126
|
+
*/
|
|
127
|
+
_registerToolsForPhase(phase) {
|
|
128
|
+
this.toolRegistry = new ToolRegistry();
|
|
129
|
+
for (const tool of this._buildTools.core) {
|
|
130
|
+
this.toolRegistry.register(tool);
|
|
131
|
+
}
|
|
132
|
+
if (DISTILL_PHASES.has(phase)) {
|
|
133
|
+
for (const tool of this._buildTools.distill) {
|
|
134
|
+
this.toolRegistry.register(tool);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
100
137
|
}
|
|
101
138
|
|
|
102
139
|
/**
|
|
@@ -113,6 +150,7 @@ export class AgentEngine {
|
|
|
113
150
|
const pipelineState = pipeline?.describeState?.() || null;
|
|
114
151
|
|
|
115
152
|
const systemPrompt = this.context.build({
|
|
153
|
+
skillIndex: this._skillLoader.formatForContext(),
|
|
116
154
|
pipelineState,
|
|
117
155
|
workspaceState: `Your workspace directory is: ${this.workspace.cwd}`,
|
|
118
156
|
});
|
|
@@ -137,13 +175,11 @@ export class AgentEngine {
|
|
|
137
175
|
const delta = chunk.choices?.[0]?.delta;
|
|
138
176
|
if (!delta) continue;
|
|
139
177
|
|
|
140
|
-
// Stream text content
|
|
141
178
|
if (delta.content) {
|
|
142
179
|
yield new AgentEvent({ type: "text_delta", text: delta.content });
|
|
143
180
|
collectedText += delta.content;
|
|
144
181
|
}
|
|
145
182
|
|
|
146
|
-
// Accumulate tool calls from deltas
|
|
147
183
|
if (delta.tool_calls) {
|
|
148
184
|
for (const tcDelta of delta.tool_calls) {
|
|
149
185
|
const idx = tcDelta.index;
|
|
@@ -158,7 +194,6 @@ export class AgentEngine {
|
|
|
158
194
|
}
|
|
159
195
|
}
|
|
160
196
|
|
|
161
|
-
// Build assistant message for history
|
|
162
197
|
const assistantMsg = { role: "assistant", content: collectedText || null };
|
|
163
198
|
if (toolCallsAcc.size > 0) {
|
|
164
199
|
assistantMsg.tool_calls = Array.from(toolCallsAcc.values()).map((tc) => ({
|
|
@@ -169,7 +204,6 @@ export class AgentEngine {
|
|
|
169
204
|
}
|
|
170
205
|
this.history.addRaw(assistantMsg);
|
|
171
206
|
|
|
172
|
-
// No tool calls → turn complete
|
|
173
207
|
if (toolCallsAcc.size === 0) {
|
|
174
208
|
yield new AgentEvent({ type: "turn_complete" });
|
|
175
209
|
return;
|
|
@@ -180,7 +214,7 @@ export class AgentEngine {
|
|
|
180
214
|
let inputData = {};
|
|
181
215
|
try {
|
|
182
216
|
inputData = tc.arguments ? JSON.parse(tc.arguments) : {};
|
|
183
|
-
} catch { /* ignore
|
|
217
|
+
} catch { /* ignore */ }
|
|
184
218
|
|
|
185
219
|
yield new AgentEvent({ type: "tool_start", name: tc.name, input: inputData });
|
|
186
220
|
const result = await this.toolRegistry.execute(tc.name, inputData);
|
|
@@ -191,19 +225,19 @@ export class AgentEngine {
|
|
|
191
225
|
isError: result.isError,
|
|
192
226
|
});
|
|
193
227
|
|
|
194
|
-
// Add tool result message
|
|
195
228
|
this.history.addRaw({
|
|
196
229
|
role: "tool",
|
|
197
230
|
tool_call_id: tc.id,
|
|
198
231
|
content: result.content,
|
|
199
232
|
});
|
|
200
233
|
|
|
201
|
-
// Pipeline controller: update state
|
|
234
|
+
// Pipeline controller: update state and re-register tools on phase change
|
|
202
235
|
if (pipeline?.onToolResult) {
|
|
203
236
|
const pEvent = pipeline.onToolResult(tc.name, inputData, result);
|
|
204
237
|
if (pEvent) {
|
|
205
238
|
if (pEvent.type === "phase_ready" && pEvent.nextPhase) {
|
|
206
239
|
this.currentPhase = pEvent.nextPhase;
|
|
240
|
+
this._registerToolsForPhase(this.currentPhase);
|
|
207
241
|
}
|
|
208
242
|
yield new AgentEvent({
|
|
209
243
|
type: "pipeline_event",
|
|
@@ -213,8 +247,6 @@ export class AgentEngine {
|
|
|
213
247
|
}
|
|
214
248
|
}
|
|
215
249
|
|
|
216
|
-
// Loop continues — send tool results back to LLM
|
|
217
|
-
|
|
218
250
|
} catch (err) {
|
|
219
251
|
yield new AgentEvent({ type: "error", message: err.message });
|
|
220
252
|
return;
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Phase, PipelineEvent } from "./index.js";
|
|
4
|
+
import { Pipeline } from "./base.js";
|
|
5
|
+
|
|
6
|
+
export class DistillationEngine extends Pipeline {
|
|
7
|
+
constructor(workspace) {
|
|
8
|
+
super();
|
|
9
|
+
this._workspace = workspace;
|
|
10
|
+
this.skillsToDistill = [];
|
|
11
|
+
this.workflowsCreated = {};
|
|
12
|
+
this.workflowsTested = {};
|
|
13
|
+
this.workflowsPassing = [];
|
|
14
|
+
this.tierAssignments = {};
|
|
15
|
+
this._workflowAccuracy = 0.9;
|
|
16
|
+
this._scanWorkspace();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
_scanWorkspace() {
|
|
20
|
+
this._loadConfig();
|
|
21
|
+
this._loadSkills();
|
|
22
|
+
this._scanWorkflows();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
_loadConfig() {
|
|
26
|
+
const envPath = path.join(this._workspace.cwd, ".env");
|
|
27
|
+
if (!fs.existsSync(envPath)) return;
|
|
28
|
+
for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
|
|
29
|
+
if (line.startsWith("WORKFLOW_ACCURACY=")) try { this._workflowAccuracy = parseFloat(line.split("=")[1]); } catch { /* skip */ }
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
_loadSkills() {
|
|
34
|
+
this.skillsToDistill = [];
|
|
35
|
+
const dir = path.join(this._workspace.cwd, "rule_skills");
|
|
36
|
+
if (!fs.existsSync(dir)) return;
|
|
37
|
+
for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
38
|
+
if (e.isDirectory() && !e.name.startsWith("__")) this.skillsToDistill.push(e.name);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
_scanWorkflows() {
|
|
43
|
+
this.workflowsCreated = {};
|
|
44
|
+
this.workflowsTested = {};
|
|
45
|
+
this.workflowsPassing = [];
|
|
46
|
+
this.tierAssignments = {};
|
|
47
|
+
const wfDir = path.join(this._workspace.cwd, "workflows");
|
|
48
|
+
if (!fs.existsSync(wfDir)) return;
|
|
49
|
+
|
|
50
|
+
for (const e of fs.readdirSync(wfDir, { withFileTypes: true })) {
|
|
51
|
+
if (e.isDirectory()) {
|
|
52
|
+
const ruleDir = path.join(wfDir, e.name);
|
|
53
|
+
const pyFiles = fs.readdirSync(ruleDir).filter((f) => f.endsWith(".py"));
|
|
54
|
+
if (pyFiles.length > 0) this.workflowsCreated[e.name] = pyFiles.length;
|
|
55
|
+
const cfgPath = path.join(ruleDir, "config.json");
|
|
56
|
+
if (fs.existsSync(cfgPath)) {
|
|
57
|
+
try {
|
|
58
|
+
const cfg = JSON.parse(fs.readFileSync(cfgPath, "utf-8"));
|
|
59
|
+
if (cfg.tier) this.tierAssignments[e.name] = cfg.tier;
|
|
60
|
+
if (cfg.accuracy != null) {
|
|
61
|
+
const acc = parseFloat(cfg.accuracy);
|
|
62
|
+
this.workflowsTested[e.name] = acc;
|
|
63
|
+
if (acc >= this._workflowAccuracy) this.workflowsPassing.push(e.name);
|
|
64
|
+
}
|
|
65
|
+
} catch { /* skip */ }
|
|
66
|
+
}
|
|
67
|
+
} else if (e.isFile() && e.name.endsWith(".py")) {
|
|
68
|
+
this.workflowsCreated[path.parse(e.name).name] = 1;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
describeState() {
|
|
74
|
+
this._scanWorkspace();
|
|
75
|
+
const total = this.skillsToDistill.length;
|
|
76
|
+
const created = Object.keys(this.workflowsCreated).length;
|
|
77
|
+
const passing = this.workflowsPassing.length;
|
|
78
|
+
const parts = ["## Current Phase: DISTILLATION"];
|
|
79
|
+
parts.push(`### Progress\n- Skills to distill: ${total}\n- Workflows created: ${created}\n- Workflows passing (>=${this._workflowAccuracy}): ${passing}`);
|
|
80
|
+
|
|
81
|
+
if (this.exitCriteriaMet()) {
|
|
82
|
+
parts.push("### Ready\nAll workflows passing. Proceed to PRODUCTION_QC.");
|
|
83
|
+
} else if (created === 0) {
|
|
84
|
+
parts.push("### What to do now\nConvert proven skills into worker LLM workflows.\nFor each skill: write workflow script, write prompts, test vs ground truth, tier-downgrade test.");
|
|
85
|
+
} else {
|
|
86
|
+
const notCreated = this.skillsToDistill.filter((s) => !(s in this.workflowsCreated));
|
|
87
|
+
const notPassing = Object.keys(this.workflowsCreated).filter((s) => !this.workflowsPassing.includes(s));
|
|
88
|
+
let guidance = "### What to do now\n";
|
|
89
|
+
if (notCreated.length) guidance += `Create workflows for: ${notCreated.slice(0, 10).join(", ")}\n`;
|
|
90
|
+
if (notPassing.length) guidance += `Improve accuracy for: ${notPassing.slice(0, 10).join(", ")}\n`;
|
|
91
|
+
parts.push(guidance);
|
|
92
|
+
}
|
|
93
|
+
return parts.join("\n\n");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
onToolResult(toolName, toolInput, result) {
|
|
97
|
+
if (result.isError) return null;
|
|
98
|
+
const wasReady = this.exitCriteriaMet();
|
|
99
|
+
if (toolName === "workspace_file" && ((toolInput.path || "").includes("workflows/") || (toolInput.path || "").includes("output/"))) {
|
|
100
|
+
this._scanWorkflows();
|
|
101
|
+
}
|
|
102
|
+
if (!wasReady && this.exitCriteriaMet()) {
|
|
103
|
+
return new PipelineEvent({ type: "phase_ready", message: "Distillation complete. Ready for PRODUCTION_QC.", nextPhase: Phase.PRODUCTION_QC });
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
exitCriteriaMet() {
|
|
109
|
+
const total = this.skillsToDistill.length;
|
|
110
|
+
if (!total) return false;
|
|
111
|
+
return Object.keys(this.workflowsCreated).length >= total && this.workflowsPassing.length >= total;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Phase, PipelineEvent } from "./index.js";
|
|
4
|
+
import { Pipeline } from "./base.js";
|
|
5
|
+
|
|
6
|
+
export class RuleExtractionPipeline extends Pipeline {
|
|
7
|
+
constructor(workspace) {
|
|
8
|
+
super();
|
|
9
|
+
this._workspace = workspace;
|
|
10
|
+
this.regulationsScanned = false;
|
|
11
|
+
this.rulesExtracted = [];
|
|
12
|
+
this.rulesWithTests = [];
|
|
13
|
+
this.coverageAudited = false;
|
|
14
|
+
this._scanWorkspace();
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
_scanWorkspace() {
|
|
18
|
+
const rulesDir = path.join(this._workspace.cwd, "rules");
|
|
19
|
+
if (fs.existsSync(rulesDir)) {
|
|
20
|
+
const regFiles = fs.readdirSync(rulesDir).filter((f) => !f.endsWith(".json") && fs.statSync(path.join(rulesDir, f)).isFile());
|
|
21
|
+
this.regulationsScanned = regFiles.length > 0;
|
|
22
|
+
}
|
|
23
|
+
this._scanRules();
|
|
24
|
+
this._scanTests();
|
|
25
|
+
this.coverageAudited = fs.existsSync(path.join(this._workspace.cwd, "rules", "coverage_audit.md")) ||
|
|
26
|
+
fs.existsSync(path.join(this._workspace.cwd, "rules", "coverage_audit.json"));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
_scanRules() {
|
|
30
|
+
this.rulesExtracted = [];
|
|
31
|
+
const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
|
|
32
|
+
if (fs.existsSync(catalogPath)) {
|
|
33
|
+
try {
|
|
34
|
+
const data = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
35
|
+
if (Array.isArray(data)) this.rulesExtracted = data.map((r, i) => r.id || `rule_${i}`);
|
|
36
|
+
} catch { /* skip */ }
|
|
37
|
+
}
|
|
38
|
+
const skillsDir = path.join(this._workspace.cwd, "rule_skills");
|
|
39
|
+
if (fs.existsSync(skillsDir)) {
|
|
40
|
+
for (const e of fs.readdirSync(skillsDir, { withFileTypes: true })) {
|
|
41
|
+
if (e.isDirectory() && !e.name.startsWith("__") && !this.rulesExtracted.includes(e.name)) {
|
|
42
|
+
this.rulesExtracted.push(e.name);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
_scanTests() {
|
|
49
|
+
this.rulesWithTests = [];
|
|
50
|
+
const skillsDir = path.join(this._workspace.cwd, "rule_skills");
|
|
51
|
+
if (!fs.existsSync(skillsDir)) return;
|
|
52
|
+
for (const e of fs.readdirSync(skillsDir, { withFileTypes: true })) {
|
|
53
|
+
if (!e.isDirectory()) continue;
|
|
54
|
+
const testDir = path.join(skillsDir, e.name, "test_cases");
|
|
55
|
+
if (fs.existsSync(testDir) && fs.readdirSync(testDir).length > 0) {
|
|
56
|
+
this.rulesWithTests.push(e.name);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
describeState() {
|
|
62
|
+
this._scanWorkspace();
|
|
63
|
+
const parts = ["## Current Phase: EXTRACTION"];
|
|
64
|
+
parts.push(`### Progress\n- Regulations scanned: ${this.regulationsScanned ? "yes" : "no"}\n- Rules extracted: ${this.rulesExtracted.length}\n- Rules with tests: ${this.rulesWithTests.length}\n- Coverage audit: ${this.coverageAudited ? "done" : "not yet"}`);
|
|
65
|
+
|
|
66
|
+
if (this.exitCriteriaMet()) {
|
|
67
|
+
parts.push("### Ready\nExtraction complete. Proceed to SKILL_AUTHORING phase.");
|
|
68
|
+
} else if (this.rulesExtracted.length === 0) {
|
|
69
|
+
parts.push("### What to do now\nDecompose regulations into atomic, testable rules.\n- One rule = one pass/fail outcome\n- Work top-down: major areas → chapters → sections → atomic rules\n- Save rules to rules/catalog.json via rule_catalog tool");
|
|
70
|
+
} else if (!this.coverageAudited) {
|
|
71
|
+
parts.push("### What to do now\nRun a coverage audit: which regulation sections are NOT covered? Save to rules/coverage_audit.md");
|
|
72
|
+
}
|
|
73
|
+
return parts.join("\n\n");
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
onToolResult(toolName, toolInput, result) {
|
|
77
|
+
if (result.isError) return null;
|
|
78
|
+
const wasReady = this.exitCriteriaMet();
|
|
79
|
+
if (toolName === "workspace_file" || toolName === "rule_catalog") {
|
|
80
|
+
this._scanWorkspace();
|
|
81
|
+
}
|
|
82
|
+
if (!wasReady && this.exitCriteriaMet()) {
|
|
83
|
+
return new PipelineEvent({ type: "phase_ready", message: "Extraction complete. Ready for SKILL_AUTHORING.", nextPhase: Phase.SKILL_AUTHORING });
|
|
84
|
+
}
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
exitCriteriaMet() {
|
|
89
|
+
return this.regulationsScanned && this.rulesExtracted.length > 0 &&
|
|
90
|
+
this.rulesWithTests.length >= Math.max(this.rulesExtracted.length * 0.8, 1) && this.coverageAudited;
|
|
91
|
+
}
|
|
92
|
+
}
|