kc-beta 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/QUICKSTART.md +149 -0
  2. package/README.md +207 -0
  3. package/package.json +12 -2
  4. package/src/agent/context.js +8 -4
  5. package/src/agent/engine.js +154 -9
  6. package/src/agent/pipelines/initializer.js +53 -8
  7. package/src/agent/session-state.js +1 -0
  8. package/src/agent/skill-loader.js +13 -1
  9. package/src/agent/task-manager.js +186 -0
  10. package/src/agent/tools/document-parse.js +99 -21
  11. package/src/agent/tools/document-search.js +24 -8
  12. package/src/agent/tools/sandbox-exec.js +16 -5
  13. package/src/agent/tools/workspace-file.js +47 -20
  14. package/src/agent/workspace.js +24 -1
  15. package/src/cli/components.js +42 -1
  16. package/src/cli/config.js +100 -6
  17. package/src/cli/index.js +39 -2
  18. package/src/cli/onboard.js +70 -1
  19. package/src/config.js +43 -3
  20. package/src/model-tiers.json +153 -0
  21. package/src/providers.js +63 -66
  22. package/template/AGENT.md +20 -0
  23. package/template/skills/en/meta/compliance-judgment/SKILL.md +10 -42
  24. package/template/skills/en/meta/document-chunking/SKILL.md +32 -0
  25. package/template/skills/en/meta/document-parsing/SKILL.md +11 -18
  26. package/template/skills/en/meta/entity-extraction/SKILL.md +13 -28
  27. package/template/skills/en/meta/tree-processing/SKILL.md +19 -1
  28. package/template/skills/en/meta-meta/auto-model-selection/SKILL.md +53 -0
  29. package/template/skills/en/meta-meta/pdf-review-dashboard/SKILL.md +57 -0
  30. package/template/skills/en/meta-meta/pdf-review-dashboard/scripts/generate_review.js +262 -0
  31. package/template/skills/en/meta-meta/rule-extraction/SKILL.md +24 -1
  32. package/template/skills/en/meta-meta/skill-authoring/SKILL.md +6 -0
  33. package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +4 -0
  34. package/template/skills/zh/meta/compliance-judgment/SKILL.md +41 -262
  35. package/template/skills/zh/meta/document-chunking/SKILL.md +32 -0
  36. package/template/skills/zh/meta/document-parsing/SKILL.md +65 -132
  37. package/template/skills/zh/meta/entity-extraction/SKILL.md +68 -230
  38. package/template/skills/zh/meta/tree-processing/SKILL.md +82 -194
  39. package/template/skills/zh/meta-meta/auto-model-selection/SKILL.md +51 -0
  40. package/template/skills/zh/meta-meta/pdf-review-dashboard/SKILL.md +55 -0
  41. package/template/skills/zh/meta-meta/pdf-review-dashboard/scripts/generate_review.js +262 -0
  42. package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +79 -164
  43. package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +64 -185
  44. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +95 -216
@@ -1,9 +1,13 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import os from "node:os";
4
+ import { fileURLToPath } from "node:url";
4
5
  import { Phase, PipelineEvent } from "./index.js";
5
6
  import { Pipeline } from "./base.js";
6
7
 
8
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
9
+ const AGENT_MD_TEMPLATE = path.resolve(__dirname, "../../../template/AGENT.md");
10
+
7
11
  const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills"];
8
12
 
9
13
  const DEFAULT_ENV = `# === KC Agent Project Configuration ===
@@ -74,6 +78,12 @@ export class ProjectInitializer extends Pipeline {
74
78
  fs.writeFileSync(manifestPath, JSON.stringify({ version: "0.1.0", entries: [] }, null, 2), "utf-8");
75
79
  }
76
80
 
81
+ // AGENT.md — per-project context (agent can modify)
82
+ const agentMdPath = path.join(this._workspace.cwd, "AGENT.md");
83
+ if (!fs.existsSync(agentMdPath) && fs.existsSync(AGENT_MD_TEMPLATE)) {
84
+ fs.copyFileSync(AGENT_MD_TEMPLATE, agentMdPath);
85
+ }
86
+
77
87
  this.workspaceCreated = true;
78
88
  this._checkRegulations();
79
89
  this._checkSamples();
@@ -81,15 +91,41 @@ export class ProjectInitializer extends Pipeline {
81
91
  }
82
92
 
83
93
  _checkRegulations() {
94
+ // Check workspace rules/
84
95
  const dir = path.join(this._workspace.cwd, "rules");
85
- if (!fs.existsSync(dir)) { this.hasRegulations = false; return; }
86
- this.hasRegulations = fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile());
96
+ if (fs.existsSync(dir) && fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile())) {
97
+ this.hasRegulations = true; return;
98
+ }
99
+ // Check project dir rules/ (case-insensitive)
100
+ if (this._workspace.projectDir) {
101
+ for (const name of ["rules", "Rules", "RULES", "regulations", "Regulations"]) {
102
+ const pdir = path.join(this._workspace.projectDir, name);
103
+ if (fs.existsSync(pdir) && fs.statSync(pdir).isDirectory() &&
104
+ fs.readdirSync(pdir, { withFileTypes: true }).some((e) => e.isFile())) {
105
+ this.hasRegulations = true; return;
106
+ }
107
+ }
108
+ }
109
+ this.hasRegulations = false;
87
110
  }
88
111
 
89
112
  _checkSamples() {
113
+ // Check workspace samples/
90
114
  const dir = path.join(this._workspace.cwd, "samples");
91
- if (!fs.existsSync(dir)) { this.hasSamples = false; return; }
92
- this.hasSamples = fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile());
115
+ if (fs.existsSync(dir) && fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile())) {
116
+ this.hasSamples = true; return;
117
+ }
118
+ // Check project dir samples/ (case-insensitive)
119
+ if (this._workspace.projectDir) {
120
+ for (const name of ["samples", "Samples", "SAMPLES", "sample", "Sample"]) {
121
+ const pdir = path.join(this._workspace.projectDir, name);
122
+ if (fs.existsSync(pdir) && fs.statSync(pdir).isDirectory() &&
123
+ fs.readdirSync(pdir, { withFileTypes: true }).some((e) => e.isFile())) {
124
+ this.hasSamples = true; return;
125
+ }
126
+ }
127
+ }
128
+ this.hasSamples = false;
93
129
  }
94
130
 
95
131
  _checkConfig() {
@@ -115,10 +151,13 @@ export class ProjectInitializer extends Pipeline {
115
151
  const completed = [], pending = [];
116
152
  if (this.workspaceCreated) completed.push("Workspace structure created"); else pending.push("Workspace structure");
117
153
  if (this.configReady) completed.push("API keys configured"); else pending.push("API keys (check .env)");
118
- if (this.hasRegulations) completed.push("Regulation documents in rules/"); else pending.push("Regulation documents in rules/");
119
- if (this.hasSamples) completed.push("Sample documents in samples/"); else pending.push("Sample documents in samples/");
154
+ if (this.hasRegulations) completed.push("Regulation documents found"); else pending.push("Regulation documents (add to rules/ in workspace or project dir)");
155
+ if (this.hasSamples) completed.push("Sample documents found"); else pending.push("Sample documents (add to samples/ in workspace or project dir)");
120
156
 
121
157
  const parts = ["## Phase: BOOTSTRAP\nSet up the workspace and understand the developer user's verification scenario. Bundled methodology skills are available in the workspace skills/ directory."];
158
+ if (this._workspace.projectDir) {
159
+ parts.push(`**Project directory:** ${this._workspace.projectDir}\nUse scope="project" to read files from the user's project folder.`);
160
+ }
122
161
  if (completed.length) parts.push("### Done\n" + completed.map((c) => `- [x] ${c}`).join("\n"));
123
162
  if (pending.length) parts.push("### Needed\n" + pending.map((p) => `- [ ] ${p}`).join("\n"));
124
163
 
@@ -135,14 +174,20 @@ export class ProjectInitializer extends Pipeline {
135
174
  if (toolName === "workspace_file") {
136
175
  const op = toolInput.operation || "";
137
176
  const p = toolInput.path || "";
138
- if (op === "write") {
177
+ const scope = toolInput.scope || "workspace";
178
+ if (op === "write" && scope === "workspace") {
139
179
  if (p.startsWith("rules/")) this.hasRegulations = true;
140
180
  else if (p.startsWith("samples/")) this.hasSamples = true;
141
181
  else if (p === ".env") this._checkConfig();
142
- } else if (op === "list") {
182
+ } else if (op === "list" || op === "read") {
183
+ // Re-check after any list/read — project dir files may satisfy criteria
143
184
  this._checkRegulations();
144
185
  this._checkSamples();
145
186
  }
187
+ } else if (toolName === "document_parse") {
188
+ // Parsing a document from project dir counts as having files
189
+ this._checkRegulations();
190
+ this._checkSamples();
146
191
  }
147
192
 
148
193
  if (!wasReady && this.exitCriteriaMet()) {
@@ -29,6 +29,7 @@ export class SessionState {
29
29
  version: 1,
30
30
  sessionId: engine.workspace.sessionId,
31
31
  currentPhase: engine.currentPhase,
32
+ projectDir: engine.workspace.projectDir || null,
32
33
  phaseSummaries: engine._phaseSummaries || [],
33
34
  lastEventSeq: engine.eventLog?.currentSeq || 0,
34
35
  createdAt: this._loadRaw()?.createdAt || new Date().toISOString(),
@@ -130,7 +130,19 @@ export class SkillLoader {
130
130
 
131
131
  const frontmatter = match[1];
132
132
  const name = frontmatter.match(/^name:\s*(.+)$/m)?.[1]?.trim() || "";
133
- const description = frontmatter.match(/^description:\s*(.+)$/m)?.[1]?.trim() || "";
133
+
134
+ // Handle both single-line and multi-line (YAML >) descriptions
135
+ let description = "";
136
+ const descMatch = frontmatter.match(/^description:\s*(.+)$/m);
137
+ if (descMatch && descMatch[1].trim() === ">") {
138
+ // Multi-line: capture indented lines after "description: >"
139
+ const multiMatch = frontmatter.match(/^description:\s*>\s*\n((?:[ \t]+.+\n?)*)/m);
140
+ if (multiMatch) {
141
+ description = multiMatch[1].replace(/^[ \t]+/gm, "").replace(/\n/g, " ").trim();
142
+ }
143
+ } else if (descMatch) {
144
+ description = descMatch[1].trim();
145
+ }
134
146
  return { name, description };
135
147
  } catch {
136
148
  return {};
@@ -0,0 +1,186 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+
4
+ /**
5
+ * Manages a per-session task list for ralph-loop style autonomous execution.
6
+ * Tasks are generated from KC's rule catalog — each rule becomes a task.
7
+ * Persisted to workspace/tasks.json.
8
+ */
9
+ export class TaskManager {
10
+ /**
11
+ * @param {string} workspacePath - Session workspace directory
12
+ */
13
+ constructor(workspacePath) {
14
+ this._path = path.join(workspacePath, "tasks.json");
15
+ this._tasks = [];
16
+ this._load();
17
+ }
18
+
19
+ // --- Task CRUD ---
20
+
21
+ /**
22
+ * Add a task to the list.
23
+ * @param {{ id: string, title: string, phase: string, ruleId?: string }} task
24
+ */
25
+ addTask({ id, title, phase, ruleId }) {
26
+ // Don't add duplicates
27
+ if (this._tasks.find((t) => t.id === id)) return;
28
+ this._tasks.push({
29
+ id,
30
+ title,
31
+ phase,
32
+ ruleId: ruleId || null,
33
+ status: "pending",
34
+ summary: null,
35
+ createdAt: new Date().toISOString(),
36
+ completedAt: null,
37
+ });
38
+ this.save();
39
+ }
40
+
41
+ /**
42
+ * Update a task's status and optional summary.
43
+ * @param {string} id
44
+ * @param {{ status?: string, summary?: string }} updates
45
+ */
46
+ updateTask(id, { status, summary } = {}) {
47
+ const task = this._tasks.find((t) => t.id === id);
48
+ if (!task) return;
49
+ if (status) {
50
+ task.status = status;
51
+ if (status === "completed" || status === "failed") {
52
+ task.completedAt = new Date().toISOString();
53
+ }
54
+ }
55
+ if (summary !== undefined) task.summary = summary;
56
+ this.save();
57
+ }
58
+
59
+ /**
60
+ * Get the next pending task.
61
+ * @returns {object|null}
62
+ */
63
+ getNextPending() {
64
+ return this._tasks.find((t) => t.status === "pending") || null;
65
+ }
66
+
67
+ /**
68
+ * Get all tasks.
69
+ * @returns {Array}
70
+ */
71
+ getAllTasks() {
72
+ return [...this._tasks];
73
+ }
74
+
75
+ /**
76
+ * Check if there are any tasks at all.
77
+ */
78
+ get hasTasks() {
79
+ return this._tasks.length > 0;
80
+ }
81
+
82
+ // --- Bulk creation from rule catalog ---
83
+
84
+ /**
85
+ * Create one task per rule for a given phase.
86
+ * Reads rules from the provided array (typically from rules/catalog.json).
87
+ * @param {Array<{id: string, title?: string, description?: string}>} rules
88
+ * @param {string} phase - The phase these tasks belong to
89
+ */
90
+ createRuleTasks(rules, phase) {
91
+ for (const rule of rules) {
92
+ const ruleId = rule.id || rule.rule_id;
93
+ const title = rule.title || rule.description || ruleId;
94
+ this.addTask({
95
+ id: `${ruleId}-${phase}`,
96
+ title: `${title}`,
97
+ phase,
98
+ ruleId,
99
+ });
100
+ }
101
+ }
102
+
103
+ // --- Progress ---
104
+
105
+ /**
106
+ * @returns {{ total: number, completed: number, inProgress: number, pending: number, failed: number }}
107
+ */
108
+ get progress() {
109
+ const total = this._tasks.length;
110
+ const completed = this._tasks.filter((t) => t.status === "completed").length;
111
+ const inProgress = this._tasks.filter((t) => t.status === "in_progress").length;
112
+ const failed = this._tasks.filter((t) => t.status === "failed").length;
113
+ const pending = this._tasks.filter((t) => t.status === "pending").length;
114
+ return { total, completed, inProgress, pending, failed };
115
+ }
116
+
117
+ /**
118
+ * Format task list for injection into system prompt context.
119
+ * Compact checklist — not conversation history.
120
+ * @returns {string}
121
+ */
122
+ describeForContext() {
123
+ if (this._tasks.length === 0) return "";
124
+
125
+ const { total, completed, inProgress } = this.progress;
126
+ const current = this._tasks.find((t) => t.status === "in_progress");
127
+ const currentPhase = current?.phase || this._tasks.find((t) => t.status === "pending")?.phase || "";
128
+
129
+ const lines = [
130
+ `## Task Progress`,
131
+ `${completed}/${total} completed${currentPhase ? ` | Phase: ${currentPhase}` : ""}${current ? ` | Current: ${current.ruleId} — ${current.title}` : ""}`,
132
+ "",
133
+ ];
134
+
135
+ for (const t of this._tasks) {
136
+ const mark = t.status === "completed" ? "[x]"
137
+ : t.status === "in_progress" ? "[>]"
138
+ : t.status === "failed" ? "[!]"
139
+ : "[ ]";
140
+ const arrow = t.status === "in_progress" ? " <-- current" : "";
141
+ lines.push(`- ${mark} ${t.ruleId || t.id}: ${t.title}${arrow}`);
142
+ }
143
+
144
+ return lines.join("\n");
145
+ }
146
+
147
+ /**
148
+ * Format for /tasks slash command (more detailed than context injection).
149
+ * @returns {string}
150
+ */
151
+ formatForDisplay() {
152
+ if (this._tasks.length === 0) return "No tasks. Tasks are created when rules are extracted.";
153
+
154
+ const { total, completed, pending, failed } = this.progress;
155
+ const lines = [
156
+ `Tasks: ${completed}/${total} completed${failed ? `, ${failed} failed` : ""}, ${pending} pending`,
157
+ "",
158
+ ];
159
+
160
+ for (const t of this._tasks) {
161
+ const icon = t.status === "completed" ? "✓"
162
+ : t.status === "in_progress" ? "▸"
163
+ : t.status === "failed" ? "✗"
164
+ : "·";
165
+ lines.push(` ${icon} ${t.ruleId || t.id} ${t.title} (${t.status})`);
166
+ }
167
+
168
+ return lines.join("\n");
169
+ }
170
+
171
+ // --- Persistence ---
172
+
173
+ save() {
174
+ fs.writeFileSync(this._path, JSON.stringify(this._tasks, null, 2), "utf-8");
175
+ }
176
+
177
+ _load() {
178
+ if (fs.existsSync(this._path)) {
179
+ try {
180
+ this._tasks = JSON.parse(fs.readFileSync(this._path, "utf-8"));
181
+ } catch {
182
+ this._tasks = [];
183
+ }
184
+ }
185
+ }
186
+ }
@@ -12,13 +12,13 @@ const MIN_CHARS_PER_PAGE = 50;
12
12
  * Level 3: OCR models via SiliconFlow — fallback via vision models
13
13
  */
14
14
  export class DocumentParseTool extends BaseTool {
15
- constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, siliconflowApiKey, siliconflowBaseUrl, ocrModel } = {}) {
15
+ constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, ocrModel } = {}) {
16
16
  super();
17
17
  this._workspace = workspace;
18
18
  this._mineruApiUrl = mineruApiUrl || "";
19
19
  this._mineruApiKey = mineruApiKey || "";
20
- this._sfApiKey = llmApiKey || siliconflowApiKey || "";
21
- this._sfBaseUrl = llmBaseUrl || siliconflowBaseUrl || "https://api.siliconflow.cn/v1";
20
+ this._vlmApiKey = llmApiKey || "";
21
+ this._vlmBaseUrl = (llmBaseUrl || "").replace(/\/+$/, "");
22
22
  this._ocrModel = ocrModel || "";
23
23
  }
24
24
 
@@ -36,13 +36,18 @@ export class DocumentParseTool extends BaseTool {
36
36
  return {
37
37
  type: "object",
38
38
  properties: {
39
- path: { type: "string", description: "Relative path to the document in the workspace" },
39
+ path: { type: "string", description: "Relative path to the document" },
40
40
  pages: { type: "string", description: "Page range to extract, e.g. '1-5', '3', '10-20'. Omit for all pages." },
41
41
  force_method: {
42
42
  type: "string",
43
- enum: ["pdfjs", "mineru", "ocr"],
43
+ enum: ["pdfjs", "vlm", "mineru", "ocr"],
44
44
  description: "Force a specific parsing method, skipping the escalation chain.",
45
45
  },
46
+ scope: {
47
+ type: "string",
48
+ enum: ["workspace", "project"],
49
+ description: "Which directory to find the file in. 'workspace' (default) or 'project' (user's project folder).",
50
+ },
46
51
  },
47
52
  required: ["path"],
48
53
  };
@@ -52,11 +57,19 @@ export class DocumentParseTool extends BaseTool {
52
57
  const pathStr = input.path || "";
53
58
  const pages = input.pages;
54
59
  const force = input.force_method;
60
+ const scope = input.scope || "workspace";
55
61
 
56
62
  if (!pathStr) return new ToolResult("No path provided", true);
63
+ if (scope === "project" && !this._workspace.projectDir) {
64
+ return new ToolResult("No project directory available", true);
65
+ }
57
66
 
58
67
  let resolved;
59
- try { resolved = this._workspace.resolvePath(pathStr); }
68
+ try {
69
+ resolved = scope === "project"
70
+ ? this._workspace.resolveProjectPath(pathStr)
71
+ : this._workspace.resolvePath(pathStr);
72
+ }
60
73
  catch (e) { return new ToolResult(e.message, true); }
61
74
 
62
75
  if (!fs.existsSync(resolved) || !fs.statSync(resolved).isFile()) {
@@ -76,13 +89,21 @@ export class DocumentParseTool extends BaseTool {
76
89
  if (force) return this._runMethod(force, resolved, pageRange);
77
90
 
78
91
  // Escalation chain
79
- // Level 1: pdfjs-dist
92
+ // Level 1: pdfjs-dist (free, local text extraction)
80
93
  let result = await this._tryPdfjs(resolved, pageRange);
81
94
  if (result && this._qualityOk(result)) {
82
95
  return new ToolResult(this._formatOutput(result, "pdfjs", resolved));
83
96
  }
84
97
 
85
- // Level 2: MineRU API
98
+ // Level 2: Provider VLM (vision model via API — more convenient than local OCR)
99
+ if (this._vlmApiKey && this._ocrModel) {
100
+ result = await this._tryVlm(resolved, pageRange);
101
+ if (result && this._qualityOk(result)) {
102
+ return new ToolResult(this._formatOutput(result, "vlm", resolved));
103
+ }
104
+ }
105
+
106
+ // Level 3: MineRU API (optional fallback)
86
107
  if (this._mineruApiUrl) {
87
108
  result = await this._tryMineru(resolved, pageRange);
88
109
  if (result && this._qualityOk(result)) {
@@ -90,12 +111,6 @@ export class DocumentParseTool extends BaseTool {
90
111
  }
91
112
  }
92
113
 
93
- // Level 3: OCR via SiliconFlow
94
- if (this._sfApiKey && this._ocrModel) {
95
- result = await this._tryOcr(resolved, pageRange);
96
- if (result) return new ToolResult(this._formatOutput(result, "ocr", resolved));
97
- }
98
-
99
114
  if (result) return new ToolResult(this._formatOutput(result, "pdfjs (low quality)", resolved));
100
115
 
101
116
  return new ToolResult(
@@ -108,7 +123,7 @@ export class DocumentParseTool extends BaseTool {
108
123
  let result;
109
124
  if (method === "pdfjs") result = await this._tryPdfjs(filePath, pageRange);
110
125
  else if (method === "mineru") result = await this._tryMineru(filePath, pageRange);
111
- else if (method === "ocr") result = await this._tryOcr(filePath, pageRange);
126
+ else if (method === "ocr" || method === "vlm") result = await this._tryVlm(filePath, pageRange);
112
127
  else return new ToolResult(`Unknown method: ${method}`, true);
113
128
 
114
129
  if (result) return new ToolResult(this._formatOutput(result, method, filePath));
@@ -145,12 +160,75 @@ export class DocumentParseTool extends BaseTool {
145
160
  return null;
146
161
  }
147
162
 
148
- async _tryOcr(filePath, pageRange) {
149
- // OCR requires sending page images to a vision model API.
150
- // Without a native image renderer, we delegate to the agent
151
- // to use sandbox_exec for custom OCR pipelines.
152
- // For now, return null to signal OCR is not available natively.
153
- return null;
163
+ async _tryVlm(filePath, pageRange) {
164
+ // Send page images to a VLM provider for OCR/interpretation.
165
+ // Renders PDF pages to PNG via pdfjs canvas, then sends base64 to VLM API.
166
+ if (!this._vlmApiKey || !this._ocrModel || !this._vlmBaseUrl) return null;
167
+
168
+ try {
169
+ const pdfjsLib = await import("pdfjs-dist/legacy/build/pdf.mjs");
170
+ const data = new Uint8Array(fs.readFileSync(filePath));
171
+ const doc = await pdfjsLib.getDocument({ data, useSystemFonts: true }).promise;
172
+
173
+ const start = pageRange ? pageRange[0] : 0;
174
+ const end = pageRange ? pageRange[1] : doc.numPages - 1;
175
+ const pages = [];
176
+
177
+ for (let i = Math.max(0, start); i <= Math.min(end, doc.numPages - 1); i++) {
178
+ const page = await doc.getPage(i + 1);
179
+ const viewport = page.getViewport({ scale: 2.0 }); // Higher res for OCR
180
+
181
+ // Render to PNG via node-canvas if available; otherwise skip VLM and let
182
+ // the escalation chain fall through to MineRU.
183
+ let imageBase64;
184
+ try {
185
+ const { createCanvas } = await import("canvas").catch(() => ({ createCanvas: null }));
186
+ if (!createCanvas) return null;
187
+ const canvas = createCanvas(viewport.width, viewport.height);
188
+ const ctx = canvas.getContext("2d");
189
+ await page.render({ canvasContext: ctx, viewport }).promise;
190
+ imageBase64 = canvas.toBuffer("image/png").toString("base64");
191
+ } catch {
192
+ continue;
193
+ }
194
+
195
+ if (!imageBase64) continue;
196
+
197
+ // Call VLM API with the page image
198
+ const baseUrl = this._vlmBaseUrl.replace(/\/+$/, "");
199
+ const resp = await fetch(`${baseUrl}/chat/completions`, {
200
+ method: "POST",
201
+ headers: {
202
+ "Content-Type": "application/json",
203
+ "Authorization": `Bearer ${this._vlmApiKey}`,
204
+ },
205
+ body: JSON.stringify({
206
+ model: this._ocrModel,
207
+ messages: [
208
+ { role: "system", content: "Extract all text from this document page. Preserve structure: headings, paragraphs, tables (as markdown), lists. Output clean text only." },
209
+ { role: "user", content: [
210
+ { type: "image_url", image_url: { url: `data:image/png;base64,${imageBase64}` } },
211
+ { type: "text", text: "Extract all text from this page." },
212
+ ]},
213
+ ],
214
+ max_tokens: 4096,
215
+ }),
216
+ signal: AbortSignal.timeout(60000),
217
+ });
218
+
219
+ if (resp.ok) {
220
+ const result = await resp.json();
221
+ const text = result.choices?.[0]?.message?.content || "";
222
+ if (text.trim()) {
223
+ pages.push(`--- Page ${i + 1} ---\n${text.trim()}`);
224
+ }
225
+ }
226
+ }
227
+
228
+ return pages.length > 0 ? pages.join("\n\n") : null;
229
+ } catch {
230
+ return null;
231
+ }
154
232
  }
155
233
 
156
234
  _qualityOk(text) {
@@ -21,8 +21,10 @@ export class DocumentSearchTool extends BaseTool {
21
21
 
22
22
  get description() {
23
23
  return (
24
- "Search for text across documents in the workspace. Returns matching " +
25
- "passages with file path and context. Supports plain text and regex queries."
24
+ "Search for text across documents. " +
25
+ "scope='workspace' (default) searches KC's workspace. " +
26
+ "scope='project' searches the user's project directory. " +
27
+ "Returns matching passages with file path and context. Supports plain text and regex queries."
26
28
  );
27
29
  }
28
30
 
@@ -31,9 +33,14 @@ export class DocumentSearchTool extends BaseTool {
31
33
  type: "object",
32
34
  properties: {
33
35
  query: { type: "string", description: "Search query (plain text or regex pattern)" },
34
- path: { type: "string", description: "Subdirectory to search in (default: entire workspace)" },
36
+ path: { type: "string", description: "Subdirectory to search in (default: entire scope root)" },
35
37
  max_results: { type: "integer", description: `Maximum results to return (default: ${MAX_RESULTS})` },
36
38
  regex: { type: "boolean", description: "Treat query as regex pattern (default: false)" },
39
+ scope: {
40
+ type: "string",
41
+ enum: ["workspace", "project"],
42
+ description: "Which directory to search. 'workspace' (default) or 'project'.",
43
+ },
37
44
  },
38
45
  required: ["query"],
39
46
  };
@@ -44,11 +51,19 @@ export class DocumentSearchTool extends BaseTool {
44
51
  const searchPath = input.path || ".";
45
52
  const maxResults = input.max_results || MAX_RESULTS;
46
53
  const useRegex = input.regex || false;
54
+ const scope = input.scope || "workspace";
47
55
 
48
56
  if (!query) return new ToolResult("No query provided", true);
57
+ if (scope === "project" && !this._workspace.projectDir) {
58
+ return new ToolResult("No project directory available", true);
59
+ }
49
60
 
50
61
  let searchDir;
51
- try { searchDir = this._workspace.resolvePath(searchPath); }
62
+ try {
63
+ searchDir = scope === "project"
64
+ ? this._workspace.resolveProjectPath(searchPath)
65
+ : this._workspace.resolvePath(searchPath);
66
+ }
52
67
  catch (e) { return new ToolResult(e.message, true); }
53
68
 
54
69
  if (!fs.existsSync(searchDir) || !fs.statSync(searchDir).isDirectory()) {
@@ -62,8 +77,9 @@ export class DocumentSearchTool extends BaseTool {
62
77
  return new ToolResult(`Invalid regex: ${e.message}`, true);
63
78
  }
64
79
 
80
+ const baseDir = scope === "project" ? this._workspace.projectDir : this._workspace.cwd;
65
81
  const results = [];
66
- this._searchDir(searchDir, pattern, results, maxResults);
82
+ this._searchDir(searchDir, pattern, results, maxResults, baseDir);
67
83
 
68
84
  if (results.length === 0) return new ToolResult(`No matches found for: ${query}`);
69
85
 
@@ -76,7 +92,7 @@ export class DocumentSearchTool extends BaseTool {
76
92
  return new ToolResult(`Found ${results.length} match(es):\n\n${lines.join("\n")}`);
77
93
  }
78
94
 
79
- _searchDir(dir, pattern, results, maxResults) {
95
+ _searchDir(dir, pattern, results, maxResults, baseDir) {
80
96
  let entries;
81
97
  try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
82
98
  catch { return; }
@@ -87,7 +103,7 @@ export class DocumentSearchTool extends BaseTool {
87
103
 
88
104
  if (entry.isDirectory()) {
89
105
  if (entry.name.startsWith(".") || entry.name === "node_modules" || entry.name === "__pycache__") continue;
90
- this._searchDir(fullPath, pattern, results, maxResults);
106
+ this._searchDir(fullPath, pattern, results, maxResults, baseDir);
91
107
  } else if (entry.isFile() && TEXT_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) {
92
108
  let content;
93
109
  try { content = fs.readFileSync(fullPath, "utf-8"); }
@@ -100,7 +116,7 @@ export class DocumentSearchTool extends BaseTool {
100
116
  const end = Math.min(content.length, match.index + match[0].length + CONTEXT_CHARS);
101
117
  const context = content.slice(start, end).trim();
102
118
  const lineNum = content.slice(0, match.index).split("\n").length;
103
- const relPath = path.relative(this._workspace.cwd, fullPath);
119
+ const relPath = path.relative(baseDir, fullPath);
104
120
 
105
121
  results.push({ file: relPath, line: lineNum, match: match[0], context });
106
122
  if (results.length >= maxResults) break;
@@ -23,8 +23,9 @@ export class SandboxExecTool extends BaseTool {
23
23
 
24
24
  get description() {
25
25
  return (
26
- "Execute a shell command in the workspace directory. " +
27
- "Use for running scripts, installing packages, listing files, etc. " +
26
+ "Execute a shell command. " +
27
+ "cwd='workspace' (default) runs in KC's workspace. " +
28
+ "cwd='project' runs in the user's project directory. " +
28
29
  "Pipes, redirects, and chained commands (&&) are supported."
29
30
  );
30
31
  }
@@ -37,6 +38,11 @@ export class SandboxExecTool extends BaseTool {
37
38
  type: "string",
38
39
  description: "The shell command to execute (e.g. 'python script.py', 'ls -la')",
39
40
  },
41
+ cwd: {
42
+ type: "string",
43
+ enum: ["workspace", "project"],
44
+ description: "Working directory. 'workspace' (default) = KC's workspace. 'project' = user's project directory.",
45
+ },
40
46
  },
41
47
  required: ["command"],
42
48
  };
@@ -44,12 +50,17 @@ export class SandboxExecTool extends BaseTool {
44
50
 
45
51
  async execute(input) {
46
52
  const command = input.command || "";
53
+ const cwdScope = input.cwd || "workspace";
47
54
  if (!command.trim()) {
48
55
  return new ToolResult("No command provided", true);
49
56
  }
50
57
 
58
+ const effectiveCwd = (cwdScope === "project" && this._workspace.projectDir)
59
+ ? this._workspace.projectDir
60
+ : this._workspace.cwd;
61
+
51
62
  try {
52
- const { output, code } = await this._run(command);
63
+ const { output, code } = await this._run(command, effectiveCwd);
53
64
  let result = output;
54
65
  if (result.length > MAX_OUTPUT) {
55
66
  result = result.slice(0, MAX_OUTPUT) + "\n[truncated]";
@@ -70,11 +81,11 @@ export class SandboxExecTool extends BaseTool {
70
81
  * @param {string} command
71
82
  * @returns {Promise<{output: string, code: number}>}
72
83
  */
73
- _run(command) {
84
+ _run(command, cwd) {
74
85
  return new Promise((resolve, reject) => {
75
86
  const controller = new AbortController();
76
87
  const proc = spawn("sh", ["-c", command], {
77
- cwd: this._workspace.cwd,
88
+ cwd,
78
89
  stdio: ["ignore", "pipe", "pipe"],
79
90
  signal: controller.signal,
80
91
  });