kc-beta 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/context.js +8 -4
- package/src/agent/engine.js +65 -9
- package/src/agent/pipelines/initializer.js +53 -8
- package/src/agent/session-state.js +1 -0
- package/src/agent/skill-loader.js +13 -1
- package/src/agent/tools/document-parse.js +104 -21
- package/src/agent/tools/document-search.js +24 -8
- package/src/agent/tools/sandbox-exec.js +16 -5
- package/src/agent/tools/workspace-file.js +47 -20
- package/src/agent/workspace.js +24 -1
- package/src/cli/components.js +8 -1
- package/src/cli/config.js +100 -6
- package/src/cli/index.js +14 -1
- package/src/cli/onboard.js +70 -1
- package/src/config.js +43 -3
- package/src/model-tiers.json +153 -0
- package/src/providers.js +63 -66
- package/template/AGENT.md +20 -0
- package/template/skills/en/meta/compliance-judgment/SKILL.md +10 -42
- package/template/skills/en/meta/document-chunking/SKILL.md +32 -0
- package/template/skills/en/meta/document-parsing/SKILL.md +11 -18
- package/template/skills/en/meta/entity-extraction/SKILL.md +13 -28
- package/template/skills/en/meta/tree-processing/SKILL.md +19 -1
- package/template/skills/en/meta-meta/auto-model-selection/SKILL.md +53 -0
- package/template/skills/en/meta-meta/pdf-review-dashboard/SKILL.md +57 -0
- package/template/skills/en/meta-meta/pdf-review-dashboard/scripts/generate_review.js +262 -0
- package/template/skills/en/meta-meta/rule-extraction/SKILL.md +24 -1
- package/template/skills/en/meta-meta/skill-authoring/SKILL.md +6 -0
- package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +4 -0
- package/template/skills/zh/meta/compliance-judgment/SKILL.md +41 -262
- package/template/skills/zh/meta/document-chunking/SKILL.md +32 -0
- package/template/skills/zh/meta/document-parsing/SKILL.md +65 -132
- package/template/skills/zh/meta/entity-extraction/SKILL.md +68 -230
- package/template/skills/zh/meta/tree-processing/SKILL.md +82 -194
- package/template/skills/zh/meta-meta/auto-model-selection/SKILL.md +51 -0
- package/template/skills/zh/meta-meta/pdf-review-dashboard/SKILL.md +55 -0
- package/template/skills/zh/meta-meta/pdf-review-dashboard/scripts/generate_review.js +262 -0
- package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +79 -164
- package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +64 -185
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +95 -216
package/package.json
CHANGED
package/src/agent/context.js
CHANGED
|
@@ -32,9 +32,11 @@ outcome. Handle ambiguity explicitly — note it, ask the developer user. After
|
|
|
32
32
|
audit which regulation sections are not yet covered.
|
|
33
33
|
|
|
34
34
|
### Entity Extraction
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
Choose the cheapest method that meets accuracy threshold. Regex is the smallest \
|
|
36
|
+
"model" — zero cost, instant, deterministic. Worker LLM handles semantic tasks \
|
|
37
|
+
regex cannot (contextual interpretation, misleading language, adequacy judgment). \
|
|
38
|
+
Try different methods, find the cost-accuracy balance. Every extraction captures: \
|
|
39
|
+
value, evidence, source location, confidence, method used.
|
|
38
40
|
|
|
39
41
|
### Skill Authoring
|
|
40
42
|
Write each rule into a skill folder following the Anthropic skill-creator format. A \
|
|
@@ -79,13 +81,15 @@ unclear regulations with them. Present results and let them judge.`;
|
|
|
79
81
|
export class ContextAssembler {
|
|
80
82
|
/**
|
|
81
83
|
* @param {object} [opts]
|
|
84
|
+
* @param {string} [opts.agentMd] - Content of workspace AGENT.md (per-project context)
|
|
82
85
|
* @param {string} [opts.pipelineState]
|
|
83
86
|
* @param {string} [opts.workspaceState]
|
|
84
87
|
* @param {string} [opts.skillIndex] - Brief index of available meta skills
|
|
85
88
|
* @returns {string}
|
|
86
89
|
*/
|
|
87
|
-
build({ pipelineState, workspaceState, skillIndex } = {}) {
|
|
90
|
+
build({ agentMd, pipelineState, workspaceState, skillIndex } = {}) {
|
|
88
91
|
const parts = [AGENT_IDENTITY];
|
|
92
|
+
if (agentMd) parts.push(agentMd);
|
|
89
93
|
if (skillIndex) parts.push(skillIndex);
|
|
90
94
|
if (pipelineState) parts.push(pipelineState);
|
|
91
95
|
if (workspaceState) parts.push(workspaceState);
|
package/src/agent/engine.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
1
3
|
import { AgentEvent } from "./events.js";
|
|
2
4
|
import { ContextAssembler } from "./context.js";
|
|
3
5
|
import { ConversationHistory } from "./history.js";
|
|
@@ -56,7 +58,7 @@ export class AgentEngine {
|
|
|
56
58
|
this.context = new ContextAssembler();
|
|
57
59
|
|
|
58
60
|
// Workspace + structural components
|
|
59
|
-
this.workspace = new Workspace(config.kcWorkspaceRoot, sessionId);
|
|
61
|
+
this.workspace = new Workspace(config.kcWorkspaceRoot, sessionId, config.projectDir);
|
|
60
62
|
this.history = new ConversationHistory(this.workspace.cwd);
|
|
61
63
|
this.versionManager = new VersionManager(this.workspace.cwd);
|
|
62
64
|
this.cornerCases = new CornerCaseRegistry(this.workspace.cwd);
|
|
@@ -102,12 +104,20 @@ export class AgentEngine {
|
|
|
102
104
|
* re-register per phase without recreating.
|
|
103
105
|
*/
|
|
104
106
|
_createAllTools() {
|
|
107
|
+
// Worker LLM uses separate config if set, otherwise falls back to conductor
|
|
108
|
+
const workerApiKey = this.config.effectiveWorkerApiKey();
|
|
109
|
+
const workerBaseUrl = this.config.effectiveWorkerBaseUrl();
|
|
110
|
+
const workerAuthType = this.config.effectiveWorkerAuthType();
|
|
111
|
+
|
|
105
112
|
const workerLlm = new WorkerLLMCallTool(this.workspace, {
|
|
106
|
-
apiKey:
|
|
107
|
-
baseUrl:
|
|
108
|
-
authType:
|
|
113
|
+
apiKey: workerApiKey,
|
|
114
|
+
baseUrl: workerBaseUrl,
|
|
115
|
+
authType: workerAuthType,
|
|
109
116
|
});
|
|
110
117
|
|
|
118
|
+
// OCR/VLM uses worker config (VLM is a type of worker LLM)
|
|
119
|
+
const vlmModel = this.config.vlmTier1 || "";
|
|
120
|
+
|
|
111
121
|
return {
|
|
112
122
|
// Always available (BUILD + DISTILL)
|
|
113
123
|
core: [
|
|
@@ -116,9 +126,9 @@ export class AgentEngine {
|
|
|
116
126
|
new DocumentParseTool(this.workspace, {
|
|
117
127
|
mineruApiUrl: this.config.mineruApiUrl,
|
|
118
128
|
mineruApiKey: this.config.mineruApiKey,
|
|
119
|
-
llmApiKey:
|
|
120
|
-
llmBaseUrl:
|
|
121
|
-
ocrModel:
|
|
129
|
+
llmApiKey: workerApiKey,
|
|
130
|
+
llmBaseUrl: workerBaseUrl,
|
|
131
|
+
ocrModel: vlmModel,
|
|
122
132
|
}),
|
|
123
133
|
new DocumentSearchTool(this.workspace),
|
|
124
134
|
new RuleCatalogTool(this.workspace),
|
|
@@ -156,15 +166,52 @@ export class AgentEngine {
|
|
|
156
166
|
}
|
|
157
167
|
}
|
|
158
168
|
|
|
169
|
+
/**
|
|
170
|
+
* Read AGENT.md from workspace (per-project context).
|
|
171
|
+
* Returns content string or empty string if not found.
|
|
172
|
+
*/
|
|
173
|
+
_readAgentMd() {
|
|
174
|
+
const agentMdPath = path.join(this.workspace.cwd, "AGENT.md");
|
|
175
|
+
try {
|
|
176
|
+
if (fs.existsSync(agentMdPath)) {
|
|
177
|
+
return fs.readFileSync(agentMdPath, "utf-8");
|
|
178
|
+
}
|
|
179
|
+
} catch { /* ignore */ }
|
|
180
|
+
return "";
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Build the workspace/project directory state string for the system prompt.
|
|
185
|
+
*/
|
|
186
|
+
_buildWorkspaceState() {
|
|
187
|
+
const lines = [
|
|
188
|
+
`## Directory Layout`,
|
|
189
|
+
`**KC Workspace:** ${this.workspace.cwd}`,
|
|
190
|
+
` Use scope="workspace" (default). Write all working files here (rules, skills, workflows, results, logs).`,
|
|
191
|
+
];
|
|
192
|
+
if (this.workspace.projectDir) {
|
|
193
|
+
lines.push(
|
|
194
|
+
`**Project Directory:** ${this.workspace.projectDir}`,
|
|
195
|
+
` Use scope="project" to read/write files in the user's project folder.`,
|
|
196
|
+
` This is where the user's source regulations, samples, and reference documents are.`,
|
|
197
|
+
``,
|
|
198
|
+
`Read source documents from the project directory. Write KC outputs to the workspace.`,
|
|
199
|
+
`Write user-facing exports (reports, results) to the project directory when the user asks.`,
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
return lines.join("\n");
|
|
203
|
+
}
|
|
204
|
+
|
|
159
205
|
/**
|
|
160
206
|
* Get current context usage statistics.
|
|
161
207
|
* @returns {{ totalTokens: number, limit: number, percentage: number }}
|
|
162
208
|
*/
|
|
163
209
|
getContextStats() {
|
|
164
210
|
const systemPrompt = this.context.build({
|
|
211
|
+
agentMd: this._readAgentMd(),
|
|
165
212
|
skillIndex: this._skillLoader.formatForContext(),
|
|
166
213
|
pipelineState: this.pipelines[this.currentPhase]?.describeState?.() || null,
|
|
167
|
-
workspaceState:
|
|
214
|
+
workspaceState: this._buildWorkspaceState(),
|
|
168
215
|
});
|
|
169
216
|
const systemTokens = estimateTokens(systemPrompt);
|
|
170
217
|
const messageTokens = estimateMessagesTokens(this.history.messages);
|
|
@@ -270,6 +317,14 @@ export class AgentEngine {
|
|
|
270
317
|
engine._phaseSummaries = data.phaseSummaries || [];
|
|
271
318
|
engine._registerToolsForPhase(engine.currentPhase);
|
|
272
319
|
|
|
320
|
+
// Restore project directory from saved state
|
|
321
|
+
if (data.projectDir) {
|
|
322
|
+
if (fs.existsSync(data.projectDir)) {
|
|
323
|
+
engine.workspace.projectDir = data.projectDir;
|
|
324
|
+
}
|
|
325
|
+
// If dir no longer exists, projectDir stays as whatever was passed at launch
|
|
326
|
+
}
|
|
327
|
+
|
|
273
328
|
// Restore pipeline milestones
|
|
274
329
|
const milestones = data.pipelineMilestones || {};
|
|
275
330
|
for (const [phase, mData] of Object.entries(milestones)) {
|
|
@@ -309,9 +364,10 @@ export class AgentEngine {
|
|
|
309
364
|
const pipelineState = pipeline?.describeState?.() || null;
|
|
310
365
|
|
|
311
366
|
const systemPrompt = this.context.build({
|
|
367
|
+
agentMd: this._readAgentMd(),
|
|
312
368
|
skillIndex: this._skillLoader.formatForContext(),
|
|
313
369
|
pipelineState,
|
|
314
|
-
workspaceState:
|
|
370
|
+
workspaceState: this._buildWorkspaceState(),
|
|
315
371
|
});
|
|
316
372
|
const tools = this.toolRegistry.schemasOpenai();
|
|
317
373
|
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import os from "node:os";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
4
5
|
import { Phase, PipelineEvent } from "./index.js";
|
|
5
6
|
import { Pipeline } from "./base.js";
|
|
6
7
|
|
|
8
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
9
|
+
const AGENT_MD_TEMPLATE = path.resolve(__dirname, "../../../template/AGENT.md");
|
|
10
|
+
|
|
7
11
|
const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills"];
|
|
8
12
|
|
|
9
13
|
const DEFAULT_ENV = `# === KC Agent Project Configuration ===
|
|
@@ -74,6 +78,12 @@ export class ProjectInitializer extends Pipeline {
|
|
|
74
78
|
fs.writeFileSync(manifestPath, JSON.stringify({ version: "0.1.0", entries: [] }, null, 2), "utf-8");
|
|
75
79
|
}
|
|
76
80
|
|
|
81
|
+
// AGENT.md — per-project context (agent can modify)
|
|
82
|
+
const agentMdPath = path.join(this._workspace.cwd, "AGENT.md");
|
|
83
|
+
if (!fs.existsSync(agentMdPath) && fs.existsSync(AGENT_MD_TEMPLATE)) {
|
|
84
|
+
fs.copyFileSync(AGENT_MD_TEMPLATE, agentMdPath);
|
|
85
|
+
}
|
|
86
|
+
|
|
77
87
|
this.workspaceCreated = true;
|
|
78
88
|
this._checkRegulations();
|
|
79
89
|
this._checkSamples();
|
|
@@ -81,15 +91,41 @@ export class ProjectInitializer extends Pipeline {
|
|
|
81
91
|
}
|
|
82
92
|
|
|
83
93
|
_checkRegulations() {
|
|
94
|
+
// Check workspace rules/
|
|
84
95
|
const dir = path.join(this._workspace.cwd, "rules");
|
|
85
|
-
if (
|
|
86
|
-
|
|
96
|
+
if (fs.existsSync(dir) && fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile())) {
|
|
97
|
+
this.hasRegulations = true; return;
|
|
98
|
+
}
|
|
99
|
+
// Check project dir rules/ (case-insensitive)
|
|
100
|
+
if (this._workspace.projectDir) {
|
|
101
|
+
for (const name of ["rules", "Rules", "RULES", "regulations", "Regulations"]) {
|
|
102
|
+
const pdir = path.join(this._workspace.projectDir, name);
|
|
103
|
+
if (fs.existsSync(pdir) && fs.statSync(pdir).isDirectory() &&
|
|
104
|
+
fs.readdirSync(pdir, { withFileTypes: true }).some((e) => e.isFile())) {
|
|
105
|
+
this.hasRegulations = true; return;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
this.hasRegulations = false;
|
|
87
110
|
}
|
|
88
111
|
|
|
89
112
|
_checkSamples() {
|
|
113
|
+
// Check workspace samples/
|
|
90
114
|
const dir = path.join(this._workspace.cwd, "samples");
|
|
91
|
-
if (
|
|
92
|
-
|
|
115
|
+
if (fs.existsSync(dir) && fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile())) {
|
|
116
|
+
this.hasSamples = true; return;
|
|
117
|
+
}
|
|
118
|
+
// Check project dir samples/ (case-insensitive)
|
|
119
|
+
if (this._workspace.projectDir) {
|
|
120
|
+
for (const name of ["samples", "Samples", "SAMPLES", "sample", "Sample"]) {
|
|
121
|
+
const pdir = path.join(this._workspace.projectDir, name);
|
|
122
|
+
if (fs.existsSync(pdir) && fs.statSync(pdir).isDirectory() &&
|
|
123
|
+
fs.readdirSync(pdir, { withFileTypes: true }).some((e) => e.isFile())) {
|
|
124
|
+
this.hasSamples = true; return;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
this.hasSamples = false;
|
|
93
129
|
}
|
|
94
130
|
|
|
95
131
|
_checkConfig() {
|
|
@@ -115,10 +151,13 @@ export class ProjectInitializer extends Pipeline {
|
|
|
115
151
|
const completed = [], pending = [];
|
|
116
152
|
if (this.workspaceCreated) completed.push("Workspace structure created"); else pending.push("Workspace structure");
|
|
117
153
|
if (this.configReady) completed.push("API keys configured"); else pending.push("API keys (check .env)");
|
|
118
|
-
if (this.hasRegulations) completed.push("Regulation documents
|
|
119
|
-
if (this.hasSamples) completed.push("Sample documents
|
|
154
|
+
if (this.hasRegulations) completed.push("Regulation documents found"); else pending.push("Regulation documents (add to rules/ in workspace or project dir)");
|
|
155
|
+
if (this.hasSamples) completed.push("Sample documents found"); else pending.push("Sample documents (add to samples/ in workspace or project dir)");
|
|
120
156
|
|
|
121
157
|
const parts = ["## Phase: BOOTSTRAP\nSet up the workspace and understand the developer user's verification scenario. Bundled methodology skills are available in the workspace skills/ directory."];
|
|
158
|
+
if (this._workspace.projectDir) {
|
|
159
|
+
parts.push(`**Project directory:** ${this._workspace.projectDir}\nUse scope="project" to read files from the user's project folder.`);
|
|
160
|
+
}
|
|
122
161
|
if (completed.length) parts.push("### Done\n" + completed.map((c) => `- [x] ${c}`).join("\n"));
|
|
123
162
|
if (pending.length) parts.push("### Needed\n" + pending.map((p) => `- [ ] ${p}`).join("\n"));
|
|
124
163
|
|
|
@@ -135,14 +174,20 @@ export class ProjectInitializer extends Pipeline {
|
|
|
135
174
|
if (toolName === "workspace_file") {
|
|
136
175
|
const op = toolInput.operation || "";
|
|
137
176
|
const p = toolInput.path || "";
|
|
138
|
-
|
|
177
|
+
const scope = toolInput.scope || "workspace";
|
|
178
|
+
if (op === "write" && scope === "workspace") {
|
|
139
179
|
if (p.startsWith("rules/")) this.hasRegulations = true;
|
|
140
180
|
else if (p.startsWith("samples/")) this.hasSamples = true;
|
|
141
181
|
else if (p === ".env") this._checkConfig();
|
|
142
|
-
} else if (op === "list") {
|
|
182
|
+
} else if (op === "list" || op === "read") {
|
|
183
|
+
// Re-check after any list/read — project dir files may satisfy criteria
|
|
143
184
|
this._checkRegulations();
|
|
144
185
|
this._checkSamples();
|
|
145
186
|
}
|
|
187
|
+
} else if (toolName === "document_parse") {
|
|
188
|
+
// Parsing a document from project dir counts as having files
|
|
189
|
+
this._checkRegulations();
|
|
190
|
+
this._checkSamples();
|
|
146
191
|
}
|
|
147
192
|
|
|
148
193
|
if (!wasReady && this.exitCriteriaMet()) {
|
|
@@ -29,6 +29,7 @@ export class SessionState {
|
|
|
29
29
|
version: 1,
|
|
30
30
|
sessionId: engine.workspace.sessionId,
|
|
31
31
|
currentPhase: engine.currentPhase,
|
|
32
|
+
projectDir: engine.workspace.projectDir || null,
|
|
32
33
|
phaseSummaries: engine._phaseSummaries || [],
|
|
33
34
|
lastEventSeq: engine.eventLog?.currentSeq || 0,
|
|
34
35
|
createdAt: this._loadRaw()?.createdAt || new Date().toISOString(),
|
|
@@ -130,7 +130,19 @@ export class SkillLoader {
|
|
|
130
130
|
|
|
131
131
|
const frontmatter = match[1];
|
|
132
132
|
const name = frontmatter.match(/^name:\s*(.+)$/m)?.[1]?.trim() || "";
|
|
133
|
-
|
|
133
|
+
|
|
134
|
+
// Handle both single-line and multi-line (YAML >) descriptions
|
|
135
|
+
let description = "";
|
|
136
|
+
const descMatch = frontmatter.match(/^description:\s*(.+)$/m);
|
|
137
|
+
if (descMatch && descMatch[1].trim() === ">") {
|
|
138
|
+
// Multi-line: capture indented lines after "description: >"
|
|
139
|
+
const multiMatch = frontmatter.match(/^description:\s*>\s*\n((?:[ \t]+.+\n?)*)/m);
|
|
140
|
+
if (multiMatch) {
|
|
141
|
+
description = multiMatch[1].replace(/^[ \t]+/gm, "").replace(/\n/g, " ").trim();
|
|
142
|
+
}
|
|
143
|
+
} else if (descMatch) {
|
|
144
|
+
description = descMatch[1].trim();
|
|
145
|
+
}
|
|
134
146
|
return { name, description };
|
|
135
147
|
} catch {
|
|
136
148
|
return {};
|
|
@@ -12,13 +12,13 @@ const MIN_CHARS_PER_PAGE = 50;
|
|
|
12
12
|
* Level 3: OCR models via SiliconFlow — fallback via vision models
|
|
13
13
|
*/
|
|
14
14
|
export class DocumentParseTool extends BaseTool {
|
|
15
|
-
constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl,
|
|
15
|
+
constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, ocrModel } = {}) {
|
|
16
16
|
super();
|
|
17
17
|
this._workspace = workspace;
|
|
18
18
|
this._mineruApiUrl = mineruApiUrl || "";
|
|
19
19
|
this._mineruApiKey = mineruApiKey || "";
|
|
20
|
-
this.
|
|
21
|
-
this.
|
|
20
|
+
this._vlmApiKey = llmApiKey || "";
|
|
21
|
+
this._vlmBaseUrl = (llmBaseUrl || "").replace(/\/+$/, "");
|
|
22
22
|
this._ocrModel = ocrModel || "";
|
|
23
23
|
}
|
|
24
24
|
|
|
@@ -36,13 +36,18 @@ export class DocumentParseTool extends BaseTool {
|
|
|
36
36
|
return {
|
|
37
37
|
type: "object",
|
|
38
38
|
properties: {
|
|
39
|
-
path: { type: "string", description: "Relative path to the document
|
|
39
|
+
path: { type: "string", description: "Relative path to the document" },
|
|
40
40
|
pages: { type: "string", description: "Page range to extract, e.g. '1-5', '3', '10-20'. Omit for all pages." },
|
|
41
41
|
force_method: {
|
|
42
42
|
type: "string",
|
|
43
|
-
enum: ["pdfjs", "mineru", "ocr"],
|
|
43
|
+
enum: ["pdfjs", "vlm", "mineru", "ocr"],
|
|
44
44
|
description: "Force a specific parsing method, skipping the escalation chain.",
|
|
45
45
|
},
|
|
46
|
+
scope: {
|
|
47
|
+
type: "string",
|
|
48
|
+
enum: ["workspace", "project"],
|
|
49
|
+
description: "Which directory to find the file in. 'workspace' (default) or 'project' (user's project folder).",
|
|
50
|
+
},
|
|
46
51
|
},
|
|
47
52
|
required: ["path"],
|
|
48
53
|
};
|
|
@@ -52,11 +57,19 @@ export class DocumentParseTool extends BaseTool {
|
|
|
52
57
|
const pathStr = input.path || "";
|
|
53
58
|
const pages = input.pages;
|
|
54
59
|
const force = input.force_method;
|
|
60
|
+
const scope = input.scope || "workspace";
|
|
55
61
|
|
|
56
62
|
if (!pathStr) return new ToolResult("No path provided", true);
|
|
63
|
+
if (scope === "project" && !this._workspace.projectDir) {
|
|
64
|
+
return new ToolResult("No project directory available", true);
|
|
65
|
+
}
|
|
57
66
|
|
|
58
67
|
let resolved;
|
|
59
|
-
try {
|
|
68
|
+
try {
|
|
69
|
+
resolved = scope === "project"
|
|
70
|
+
? this._workspace.resolveProjectPath(pathStr)
|
|
71
|
+
: this._workspace.resolvePath(pathStr);
|
|
72
|
+
}
|
|
60
73
|
catch (e) { return new ToolResult(e.message, true); }
|
|
61
74
|
|
|
62
75
|
if (!fs.existsSync(resolved) || !fs.statSync(resolved).isFile()) {
|
|
@@ -76,13 +89,21 @@ export class DocumentParseTool extends BaseTool {
|
|
|
76
89
|
if (force) return this._runMethod(force, resolved, pageRange);
|
|
77
90
|
|
|
78
91
|
// Escalation chain
|
|
79
|
-
// Level 1: pdfjs-dist
|
|
92
|
+
// Level 1: pdfjs-dist (free, local text extraction)
|
|
80
93
|
let result = await this._tryPdfjs(resolved, pageRange);
|
|
81
94
|
if (result && this._qualityOk(result)) {
|
|
82
95
|
return new ToolResult(this._formatOutput(result, "pdfjs", resolved));
|
|
83
96
|
}
|
|
84
97
|
|
|
85
|
-
// Level 2:
|
|
98
|
+
// Level 2: Provider VLM (vision model via API — more convenient than local OCR)
|
|
99
|
+
if (this._vlmApiKey && this._ocrModel) {
|
|
100
|
+
result = await this._tryVlm(resolved, pageRange);
|
|
101
|
+
if (result && this._qualityOk(result)) {
|
|
102
|
+
return new ToolResult(this._formatOutput(result, "vlm", resolved));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Level 3: MineRU API (optional fallback)
|
|
86
107
|
if (this._mineruApiUrl) {
|
|
87
108
|
result = await this._tryMineru(resolved, pageRange);
|
|
88
109
|
if (result && this._qualityOk(result)) {
|
|
@@ -90,12 +111,6 @@ export class DocumentParseTool extends BaseTool {
|
|
|
90
111
|
}
|
|
91
112
|
}
|
|
92
113
|
|
|
93
|
-
// Level 3: OCR via SiliconFlow
|
|
94
|
-
if (this._sfApiKey && this._ocrModel) {
|
|
95
|
-
result = await this._tryOcr(resolved, pageRange);
|
|
96
|
-
if (result) return new ToolResult(this._formatOutput(result, "ocr", resolved));
|
|
97
|
-
}
|
|
98
|
-
|
|
99
114
|
if (result) return new ToolResult(this._formatOutput(result, "pdfjs (low quality)", resolved));
|
|
100
115
|
|
|
101
116
|
return new ToolResult(
|
|
@@ -108,7 +123,7 @@ export class DocumentParseTool extends BaseTool {
|
|
|
108
123
|
let result;
|
|
109
124
|
if (method === "pdfjs") result = await this._tryPdfjs(filePath, pageRange);
|
|
110
125
|
else if (method === "mineru") result = await this._tryMineru(filePath, pageRange);
|
|
111
|
-
else if (method === "ocr") result = await this.
|
|
126
|
+
else if (method === "ocr" || method === "vlm") result = await this._tryVlm(filePath, pageRange);
|
|
112
127
|
else return new ToolResult(`Unknown method: ${method}`, true);
|
|
113
128
|
|
|
114
129
|
if (result) return new ToolResult(this._formatOutput(result, method, filePath));
|
|
@@ -145,12 +160,80 @@ export class DocumentParseTool extends BaseTool {
|
|
|
145
160
|
return null;
|
|
146
161
|
}
|
|
147
162
|
|
|
148
|
-
async
|
|
149
|
-
//
|
|
150
|
-
//
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
163
|
+
async _tryVlm(filePath, pageRange) {
|
|
164
|
+
// Send page images to a VLM provider for OCR/interpretation.
|
|
165
|
+
// Renders PDF pages to PNG via pdfjs canvas, then sends base64 to VLM API.
|
|
166
|
+
if (!this._vlmApiKey || !this._ocrModel || !this._vlmBaseUrl) return null;
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
const pdfjsLib = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
|
170
|
+
const data = new Uint8Array(fs.readFileSync(filePath));
|
|
171
|
+
const doc = await pdfjsLib.getDocument({ data, useSystemFonts: true }).promise;
|
|
172
|
+
|
|
173
|
+
const start = pageRange ? pageRange[0] : 0;
|
|
174
|
+
const end = pageRange ? pageRange[1] : doc.numPages - 1;
|
|
175
|
+
const pages = [];
|
|
176
|
+
|
|
177
|
+
for (let i = Math.max(0, start); i <= Math.min(end, doc.numPages - 1); i++) {
|
|
178
|
+
const page = await doc.getPage(i + 1);
|
|
179
|
+
const viewport = page.getViewport({ scale: 2.0 }); // Higher res for OCR
|
|
180
|
+
|
|
181
|
+
// Use OffscreenCanvas or node-canvas if available, otherwise skip
|
|
182
|
+
let imageBase64;
|
|
183
|
+
try {
|
|
184
|
+
// In Node.js, pdfjs can render to a canvas-like object
|
|
185
|
+
// We'll use the simpler approach: convert page to image via the API
|
|
186
|
+
const { createCanvas } = await import("canvas").catch(() => ({ createCanvas: null }));
|
|
187
|
+
if (!createCanvas) {
|
|
188
|
+
// No canvas available — fall back to sending raw text content hint + page number
|
|
189
|
+
pages.push(`--- Page ${i + 1} (VLM) ---`);
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
const canvas = createCanvas(viewport.width, viewport.height);
|
|
193
|
+
const ctx = canvas.getContext("2d");
|
|
194
|
+
await page.render({ canvasContext: ctx, viewport }).promise;
|
|
195
|
+
imageBase64 = canvas.toBuffer("image/png").toString("base64");
|
|
196
|
+
} catch {
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (!imageBase64) continue;
|
|
201
|
+
|
|
202
|
+
// Call VLM API with the page image
|
|
203
|
+
const baseUrl = this._vlmBaseUrl.replace(/\/+$/, "");
|
|
204
|
+
const resp = await fetch(`${baseUrl}/chat/completions`, {
|
|
205
|
+
method: "POST",
|
|
206
|
+
headers: {
|
|
207
|
+
"Content-Type": "application/json",
|
|
208
|
+
"Authorization": `Bearer ${this._vlmApiKey}`,
|
|
209
|
+
},
|
|
210
|
+
body: JSON.stringify({
|
|
211
|
+
model: this._ocrModel,
|
|
212
|
+
messages: [
|
|
213
|
+
{ role: "system", content: "Extract all text from this document page. Preserve structure: headings, paragraphs, tables (as markdown), lists. Output clean text only." },
|
|
214
|
+
{ role: "user", content: [
|
|
215
|
+
{ type: "image_url", image_url: { url: `data:image/png;base64,${imageBase64}` } },
|
|
216
|
+
{ type: "text", text: "Extract all text from this page." },
|
|
217
|
+
]},
|
|
218
|
+
],
|
|
219
|
+
max_tokens: 4096,
|
|
220
|
+
}),
|
|
221
|
+
signal: AbortSignal.timeout(60000),
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
if (resp.ok) {
|
|
225
|
+
const result = await resp.json();
|
|
226
|
+
const text = result.choices?.[0]?.message?.content || "";
|
|
227
|
+
if (text.trim()) {
|
|
228
|
+
pages.push(`--- Page ${i + 1} ---\n${text.trim()}`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return pages.length > 0 ? pages.join("\n\n") : null;
|
|
234
|
+
} catch {
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
154
237
|
}
|
|
155
238
|
|
|
156
239
|
_qualityOk(text) {
|
|
@@ -21,8 +21,10 @@ export class DocumentSearchTool extends BaseTool {
|
|
|
21
21
|
|
|
22
22
|
get description() {
|
|
23
23
|
return (
|
|
24
|
-
"Search for text across documents
|
|
25
|
-
"
|
|
24
|
+
"Search for text across documents. " +
|
|
25
|
+
"scope='workspace' (default) searches KC's workspace. " +
|
|
26
|
+
"scope='project' searches the user's project directory. " +
|
|
27
|
+
"Returns matching passages with file path and context. Supports plain text and regex queries."
|
|
26
28
|
);
|
|
27
29
|
}
|
|
28
30
|
|
|
@@ -31,9 +33,14 @@ export class DocumentSearchTool extends BaseTool {
|
|
|
31
33
|
type: "object",
|
|
32
34
|
properties: {
|
|
33
35
|
query: { type: "string", description: "Search query (plain text or regex pattern)" },
|
|
34
|
-
path: { type: "string", description: "Subdirectory to search in (default: entire
|
|
36
|
+
path: { type: "string", description: "Subdirectory to search in (default: entire scope root)" },
|
|
35
37
|
max_results: { type: "integer", description: `Maximum results to return (default: ${MAX_RESULTS})` },
|
|
36
38
|
regex: { type: "boolean", description: "Treat query as regex pattern (default: false)" },
|
|
39
|
+
scope: {
|
|
40
|
+
type: "string",
|
|
41
|
+
enum: ["workspace", "project"],
|
|
42
|
+
description: "Which directory to search. 'workspace' (default) or 'project'.",
|
|
43
|
+
},
|
|
37
44
|
},
|
|
38
45
|
required: ["query"],
|
|
39
46
|
};
|
|
@@ -44,11 +51,19 @@ export class DocumentSearchTool extends BaseTool {
|
|
|
44
51
|
const searchPath = input.path || ".";
|
|
45
52
|
const maxResults = input.max_results || MAX_RESULTS;
|
|
46
53
|
const useRegex = input.regex || false;
|
|
54
|
+
const scope = input.scope || "workspace";
|
|
47
55
|
|
|
48
56
|
if (!query) return new ToolResult("No query provided", true);
|
|
57
|
+
if (scope === "project" && !this._workspace.projectDir) {
|
|
58
|
+
return new ToolResult("No project directory available", true);
|
|
59
|
+
}
|
|
49
60
|
|
|
50
61
|
let searchDir;
|
|
51
|
-
try {
|
|
62
|
+
try {
|
|
63
|
+
searchDir = scope === "project"
|
|
64
|
+
? this._workspace.resolveProjectPath(searchPath)
|
|
65
|
+
: this._workspace.resolvePath(searchPath);
|
|
66
|
+
}
|
|
52
67
|
catch (e) { return new ToolResult(e.message, true); }
|
|
53
68
|
|
|
54
69
|
if (!fs.existsSync(searchDir) || !fs.statSync(searchDir).isDirectory()) {
|
|
@@ -62,8 +77,9 @@ export class DocumentSearchTool extends BaseTool {
|
|
|
62
77
|
return new ToolResult(`Invalid regex: ${e.message}`, true);
|
|
63
78
|
}
|
|
64
79
|
|
|
80
|
+
const baseDir = scope === "project" ? this._workspace.projectDir : this._workspace.cwd;
|
|
65
81
|
const results = [];
|
|
66
|
-
this._searchDir(searchDir, pattern, results, maxResults);
|
|
82
|
+
this._searchDir(searchDir, pattern, results, maxResults, baseDir);
|
|
67
83
|
|
|
68
84
|
if (results.length === 0) return new ToolResult(`No matches found for: ${query}`);
|
|
69
85
|
|
|
@@ -76,7 +92,7 @@ export class DocumentSearchTool extends BaseTool {
|
|
|
76
92
|
return new ToolResult(`Found ${results.length} match(es):\n\n${lines.join("\n")}`);
|
|
77
93
|
}
|
|
78
94
|
|
|
79
|
-
_searchDir(dir, pattern, results, maxResults) {
|
|
95
|
+
_searchDir(dir, pattern, results, maxResults, baseDir) {
|
|
80
96
|
let entries;
|
|
81
97
|
try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
|
|
82
98
|
catch { return; }
|
|
@@ -87,7 +103,7 @@ export class DocumentSearchTool extends BaseTool {
|
|
|
87
103
|
|
|
88
104
|
if (entry.isDirectory()) {
|
|
89
105
|
if (entry.name.startsWith(".") || entry.name === "node_modules" || entry.name === "__pycache__") continue;
|
|
90
|
-
this._searchDir(fullPath, pattern, results, maxResults);
|
|
106
|
+
this._searchDir(fullPath, pattern, results, maxResults, baseDir);
|
|
91
107
|
} else if (entry.isFile() && TEXT_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) {
|
|
92
108
|
let content;
|
|
93
109
|
try { content = fs.readFileSync(fullPath, "utf-8"); }
|
|
@@ -100,7 +116,7 @@ export class DocumentSearchTool extends BaseTool {
|
|
|
100
116
|
const end = Math.min(content.length, match.index + match[0].length + CONTEXT_CHARS);
|
|
101
117
|
const context = content.slice(start, end).trim();
|
|
102
118
|
const lineNum = content.slice(0, match.index).split("\n").length;
|
|
103
|
-
const relPath = path.relative(
|
|
119
|
+
const relPath = path.relative(baseDir, fullPath);
|
|
104
120
|
|
|
105
121
|
results.push({ file: relPath, line: lineNum, match: match[0], context });
|
|
106
122
|
if (results.length >= maxResults) break;
|
|
@@ -23,8 +23,9 @@ export class SandboxExecTool extends BaseTool {
|
|
|
23
23
|
|
|
24
24
|
get description() {
|
|
25
25
|
return (
|
|
26
|
-
"Execute a shell command
|
|
27
|
-
"
|
|
26
|
+
"Execute a shell command. " +
|
|
27
|
+
"cwd='workspace' (default) runs in KC's workspace. " +
|
|
28
|
+
"cwd='project' runs in the user's project directory. " +
|
|
28
29
|
"Pipes, redirects, and chained commands (&&) are supported."
|
|
29
30
|
);
|
|
30
31
|
}
|
|
@@ -37,6 +38,11 @@ export class SandboxExecTool extends BaseTool {
|
|
|
37
38
|
type: "string",
|
|
38
39
|
description: "The shell command to execute (e.g. 'python script.py', 'ls -la')",
|
|
39
40
|
},
|
|
41
|
+
cwd: {
|
|
42
|
+
type: "string",
|
|
43
|
+
enum: ["workspace", "project"],
|
|
44
|
+
description: "Working directory. 'workspace' (default) = KC's workspace. 'project' = user's project directory.",
|
|
45
|
+
},
|
|
40
46
|
},
|
|
41
47
|
required: ["command"],
|
|
42
48
|
};
|
|
@@ -44,12 +50,17 @@ export class SandboxExecTool extends BaseTool {
|
|
|
44
50
|
|
|
45
51
|
async execute(input) {
|
|
46
52
|
const command = input.command || "";
|
|
53
|
+
const cwdScope = input.cwd || "workspace";
|
|
47
54
|
if (!command.trim()) {
|
|
48
55
|
return new ToolResult("No command provided", true);
|
|
49
56
|
}
|
|
50
57
|
|
|
58
|
+
const effectiveCwd = (cwdScope === "project" && this._workspace.projectDir)
|
|
59
|
+
? this._workspace.projectDir
|
|
60
|
+
: this._workspace.cwd;
|
|
61
|
+
|
|
51
62
|
try {
|
|
52
|
-
const { output, code } = await this._run(command);
|
|
63
|
+
const { output, code } = await this._run(command, effectiveCwd);
|
|
53
64
|
let result = output;
|
|
54
65
|
if (result.length > MAX_OUTPUT) {
|
|
55
66
|
result = result.slice(0, MAX_OUTPUT) + "\n[truncated]";
|
|
@@ -70,11 +81,11 @@ export class SandboxExecTool extends BaseTool {
|
|
|
70
81
|
* @param {string} command
|
|
71
82
|
* @returns {Promise<{output: string, code: number}>}
|
|
72
83
|
*/
|
|
73
|
-
_run(command) {
|
|
84
|
+
_run(command, cwd) {
|
|
74
85
|
return new Promise((resolve, reject) => {
|
|
75
86
|
const controller = new AbortController();
|
|
76
87
|
const proc = spawn("sh", ["-c", command], {
|
|
77
|
-
cwd
|
|
88
|
+
cwd,
|
|
78
89
|
stdio: ["ignore", "pipe", "pipe"],
|
|
79
90
|
signal: controller.signal,
|
|
80
91
|
});
|