@ontos-ai/knowhere-claw 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
2
+ import type { StringRecord } from "./types";
3
+ export declare function registerAgentHooks(api: OpenClawPluginApi, rawConfig: StringRecord): void;
@@ -0,0 +1,46 @@
1
+ import { isRecord } from "./types.js";
2
+ //#region src/agent-hooks.ts
3
+ const KNOWHERE_PROMPT_CONTEXT = [
4
+ "## 🔧 Knowhere Plugin Guidance",
5
+ "",
6
+ "### File Ingestion",
7
+ "When the user sends a file or attachment, or asks to parse/ingest a document,",
8
+ "**always** use `knowhere_ingest_document` to process it.",
9
+ "- If a `[media attached: ...]` marker is present, the file is already on disk — use the `filePath` parameter.",
10
+ "- If the file is in the cloud (e.g. Feishu Drive), first obtain the download URL via the appropriate channel tool, then use the `url` parameter.",
11
+ "- Refer to your **knowhere_memory** skill for the complete step-by-step workflow.",
12
+ "",
13
+ "### Knowledge Retrieval",
14
+ "When answering questions about documents or the knowledge base:",
15
+ "- ✅ Use `knowhere_get_map`, `knowhere_get_structure`, `knowhere_read_chunks`, `knowhere_kg_query`",
16
+ "- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`"
17
+ ].join("\n");
18
+ const KNOWHERE_DIR_PATTERN = ".knowhere";
19
+ const BLOCK_REASON = "Do not use exec to read .knowhere/ directly. Use knowhere retrieval tools instead: knowhere_get_map, knowhere_get_structure, knowhere_read_chunks, knowhere_kg_query.";
20
+ function resolveAgentHooksConfig(raw) {
21
+ const section = isRecord(raw.agentHooks) ? raw.agentHooks : {};
22
+ return { enabled: typeof section.enabled === "boolean" ? section.enabled : true };
23
+ }
24
+ function registerAgentHooks(api, rawConfig) {
25
+ if (!resolveAgentHooksConfig(rawConfig).enabled) {
26
+ api.logger.info("knowhere: agent hooks disabled via config");
27
+ return;
28
+ }
29
+ api.on("before_prompt_build", async () => {
30
+ return { prependContext: KNOWHERE_PROMPT_CONTEXT };
31
+ });
32
+ api.on("before_tool_call", async (event) => {
33
+ if (event.toolName !== "exec") return;
34
+ const params = isRecord(event.params) ? event.params : {};
35
+ if ((typeof params.command === "string" ? params.command : typeof params.cmd === "string" ? params.cmd : "").includes(KNOWHERE_DIR_PATTERN)) {
36
+ api.logger.info(`knowhere: blocked exec touching ${KNOWHERE_DIR_PATTERN}`);
37
+ return {
38
+ block: true,
39
+ blockReason: BLOCK_REASON
40
+ };
41
+ }
42
+ });
43
+ api.logger.info("knowhere: agent hooks registered (prompt context + tool governance)");
44
+ }
45
+ //#endregion
46
+ export { registerAgentHooks };
package/dist/client.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import { isRecord } from "./types.js";
2
2
  import { formatErrorMessage } from "./error-message.js";
3
+ import { openAsBlob } from "node:fs";
3
4
  import path from "node:path";
4
5
  import { createHash } from "node:crypto";
5
- import { openAsBlob } from "node:fs";
6
6
  import { Knowhere } from "@knowhere-ai/sdk";
7
7
  //#region src/client.ts
8
8
  const RETRYABLE_STATUS_CODES = new Set([
package/dist/config.d.ts CHANGED
@@ -2,6 +2,14 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
2
2
  import type { JsonSchemaObject, ResolvedKnowhereConfig, StringRecord, KnowledgeGraphConfig } from "./types";
3
3
  export declare const DEFAULT_BASE_URL = "https://api.knowhereto.ai";
4
4
  export declare const knowherePluginConfigSchema: JsonSchemaObject;
5
+ /**
6
+ * Return the effective plugin config object, merging the persisted
7
+ * resolved-config when the live pluginConfig is missing explicit fields
8
+ * (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
9
+ * `resolveKnowledgeGraphConfig` should read from this merged result so
10
+ * that subprocess instances inherit the gateway's full configuration.
11
+ */
12
+ export declare function resolveEffectivePluginConfig(api: OpenClawPluginApi): StringRecord;
5
13
  export declare function resolveKnowhereConfig(api: OpenClawPluginApi): ResolvedKnowhereConfig;
6
14
  export declare const API_KEY_URL = "https://knowhereto.ai/api-keys";
7
15
  export declare const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
package/dist/config.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { isRecord } from "./types.js";
2
+ import { readFileSync } from "node:fs";
2
3
  import fs from "node:fs/promises";
3
4
  import path from "node:path";
4
5
  //#region src/config.ts
@@ -147,36 +148,83 @@ function readScopeMode(raw) {
147
148
  if (value === "session" || value === "agent" || value === "global") return value;
148
149
  return KNOWHERE_PLUGIN_DEFAULTS.scopeMode;
149
150
  }
151
+ const RESOLVED_CONFIG_STATE_FILE = "resolved-config.json";
152
+ function readPersistedResolvedConfigSync(stateDir) {
153
+ const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
154
+ try {
155
+ const raw = readFileSync(filePath, "utf-8");
156
+ const parsed = JSON.parse(raw);
157
+ if (isRecord(parsed)) return parsed;
158
+ return null;
159
+ } catch {
160
+ return null;
161
+ }
162
+ }
163
+ async function persistResolvedConfig(stateDir, config) {
164
+ await fs.mkdir(stateDir, { recursive: true });
165
+ const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
166
+ await fs.writeFile(filePath, JSON.stringify(config, null, 2), "utf-8");
167
+ }
168
+ function hasExplicitPluginConfig(raw) {
169
+ return Boolean(readString(raw, "scopeMode") || readString(raw, "storageDir"));
170
+ }
171
+ /**
172
+ * Return the effective plugin config object, merging the persisted
173
+ * resolved-config when the live pluginConfig is missing explicit fields
174
+ * (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
175
+ * `resolveKnowledgeGraphConfig` should read from this merged result so
176
+ * that subprocess instances inherit the gateway's full configuration.
177
+ */
178
+ function resolveEffectivePluginConfig(api) {
179
+ const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
180
+ const stateDir = api.runtime.state.resolveStateDir();
181
+ if (!hasExplicitPluginConfig(raw)) {
182
+ const persisted = readPersistedResolvedConfigSync(stateDir);
183
+ if (persisted) return {
184
+ ...persisted,
185
+ ...raw
186
+ };
187
+ }
188
+ return raw;
189
+ }
150
190
  function resolveKnowhereConfig(api) {
151
191
  const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
152
192
  const stateDir = api.runtime.state.resolveStateDir();
153
- const storageDirRaw = readString(raw, "storageDir");
154
- return {
193
+ const hasExplicit = hasExplicitPluginConfig(raw);
194
+ const effective = resolveEffectivePluginConfig(api);
195
+ const storageDirRaw = readString(effective, "storageDir");
196
+ const config = {
155
197
  apiKey: readString(raw, "apiKey") || process.env.KNOWHERE_API_KEY || "",
156
198
  baseUrl: readString(raw, "baseUrl") || process.env.KNOWHERE_BASE_URL || "https://api.knowhereto.ai",
157
199
  storageDir: storageDirRaw ? api.resolvePath(storageDirRaw) : path.join(stateDir, "plugins", api.id),
158
- scopeMode: readScopeMode(raw),
159
- pollIntervalMs: readNumber(raw, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
200
+ scopeMode: readScopeMode(effective),
201
+ pollIntervalMs: readNumber(effective, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
160
202
  min: 1e3,
161
203
  max: 6e4,
162
204
  integer: true
163
205
  }),
164
- pollTimeoutMs: readNumber(raw, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
206
+ pollTimeoutMs: readNumber(effective, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
165
207
  min: 1e4,
166
208
  max: 72e5,
167
209
  integer: true
168
210
  }),
169
- requestTimeoutMs: readNumber(raw, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
211
+ requestTimeoutMs: readNumber(effective, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
170
212
  min: 1e3,
171
213
  max: 3e5,
172
214
  integer: true
173
215
  }),
174
- uploadTimeoutMs: readNumber(raw, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
216
+ uploadTimeoutMs: readNumber(effective, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
175
217
  min: 1e3,
176
218
  max: 72e5,
177
219
  integer: true
178
220
  })
179
221
  };
222
+ if (hasExplicit) persistResolvedConfig(stateDir, {
223
+ scopeMode: config.scopeMode,
224
+ storageDir: config.storageDir,
225
+ knowledgeGraph: raw.knowledgeGraph
226
+ }).catch(() => void 0);
227
+ return config;
180
228
  }
181
229
  const API_KEY_URL = "https://knowhereto.ai/api-keys";
182
230
  const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
@@ -257,4 +305,4 @@ function resolveKnowledgeGraphConfig(raw) {
257
305
  };
258
306
  }
259
307
  //#endregion
260
- export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
308
+ export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
@@ -62,7 +62,7 @@ export interface ChunkStats {
62
62
  * Main function to build knowledge graph
63
63
  * Equivalent to Python: build_knowledge_graph
64
64
  */
65
- export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
65
+ export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, _jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
66
66
  /**
67
67
  * Incremental update: match new chunks against existing chunks
68
68
  * Equivalent to Python: _incremental_connections
@@ -1,12 +1,5 @@
1
- import * as nodejieba from "nodejieba";
2
1
  //#region src/graph-builder.ts
3
2
  /**
4
- * Graph Builder Module
5
- *
6
- * TypeScript implementation of knowhere-api/apps/worker/app/services/connect_builder/graph_builder.py
7
- * Builds file-level knowledge graphs from chunk connections with TF-IDF and importance scoring.
8
- */
9
- /**
10
3
  * Extract file key from a chunk.
11
4
  * Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
12
5
  */
@@ -27,29 +20,18 @@ function extractLabel(path) {
27
20
  * Extract tokens from text using jieba
28
21
  * Equivalent to Python: _extract_tokens_from_content
29
22
  */
30
- function extractTokensFromContent(content, jiebaInitialized) {
31
- const cleanContent = content.replace(/<[^>]*>/g, " ");
32
- if (!jiebaInitialized) return cleanContent.split(/\s+/).filter((w) => w.length > 1);
33
- try {
34
- return nodejieba.cut(cleanContent).filter((token) => {
35
- if (token.length <= 1) return false;
36
- if (/^\d+$/.test(token)) return false;
37
- if (/^[^\w\u4e00-\u9fa5]+$/.test(token)) return false;
38
- return true;
39
- });
40
- } catch {
41
- return cleanContent.split(/\s+/).filter((w) => w.length > 1);
42
- }
23
+ function extractTokensFromContent(content) {
24
+ return content.replace(/<[^>]*>/g, " ").split(/\s+/).filter((w) => w.length > 1);
43
25
  }
44
26
  /**
45
27
  * Compute TF-IDF top keywords for a file
46
28
  * Equivalent to Python: _compute_tfidf_top_keywords
47
29
  */
48
- function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized) {
30
+ function computeTfidfTopKeywords(fileChunks, allChunks, topK) {
49
31
  if (fileChunks.length === 0) return [];
50
32
  const fileTokens = [];
51
33
  for (const chunk of fileChunks) {
52
- const tokens = extractTokensFromContent(chunk.content, jiebaInitialized);
34
+ const tokens = extractTokensFromContent(chunk.content);
53
35
  fileTokens.push(...tokens);
54
36
  }
55
37
  if (fileTokens.length === 0) return [];
@@ -60,7 +42,7 @@ function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized)
60
42
  }
61
43
  const docFreq = /* @__PURE__ */ new Map();
62
44
  for (const chunk of allChunks) {
63
- const tokens = new Set(extractTokensFromContent(chunk.content, jiebaInitialized).map((t) => t.toLowerCase()));
45
+ const tokens = new Set(extractTokensFromContent(chunk.content).map((t) => t.toLowerCase()));
64
46
  for (const token of tokens) docFreq.set(token, (docFreq.get(token) || 0) + 1);
65
47
  }
66
48
  const totalDocs = allChunks.length;
@@ -161,7 +143,7 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
161
143
  * Main function to build knowledge graph
162
144
  * Equivalent to Python: build_knowledge_graph
163
145
  */
164
- function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized, logger, kbId) {
146
+ function buildKnowledgeGraph(chunks, connections, chunkStats, _jiebaInitialized, logger, kbId) {
165
147
  logger?.info(`Building knowledge graph from ${chunks.length} chunks and ${connections.length} connections`);
166
148
  const chunkById = /* @__PURE__ */ new Map();
167
149
  for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
@@ -178,7 +160,7 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
178
160
  const type = chunk.metadata.type || "text";
179
161
  typeCount[type] = (typeCount[type] || 0) + 1;
180
162
  }
181
- const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10, jiebaInitialized);
163
+ const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10);
182
164
  const importance = computeFileImportance(fileKey, fileChunks, chunks, chunkStats);
183
165
  let topSummary = "";
184
166
  for (const chunk of fileChunks) if (chunk.metadata.summary && typeof chunk.metadata.summary === "string") {
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
- import { knowherePluginConfigSchema, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
1
+ import { registerAgentHooks } from "./agent-hooks.js";
2
+ import { knowherePluginConfigSchema, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
2
3
  import { KnowhereStore } from "./store.js";
3
4
  import { createKnowhereToolFactory } from "./tools.js";
4
5
  import { KnowledgeGraphService } from "./kg-service.js";
@@ -10,7 +11,9 @@ const plugin = {
10
11
  configSchema: knowherePluginConfigSchema,
11
12
  register(api) {
12
13
  const config = resolveKnowhereConfig(api);
13
- const kgConfig = resolveKnowledgeGraphConfig(api.pluginConfig && typeof api.pluginConfig === "object" ? api.pluginConfig : {});
14
+ const effectiveRaw = resolveEffectivePluginConfig(api);
15
+ const kgConfig = resolveKnowledgeGraphConfig(effectiveRaw);
16
+ registerAgentHooks(api, effectiveRaw);
14
17
  const store = new KnowhereStore({
15
18
  rootDir: config.storageDir,
16
19
  scopeMode: config.scopeMode,
@@ -43,7 +46,11 @@ const plugin = {
43
46
  "knowhere_import_completed_job",
44
47
  "knowhere_set_api_key",
45
48
  "knowhere_kg_list",
46
- "knowhere_kg_query"
49
+ "knowhere_kg_query",
50
+ "knowhere_get_map",
51
+ "knowhere_get_structure",
52
+ "knowhere_read_chunks",
53
+ "knowhere_discover_files"
47
54
  ] });
48
55
  }
49
56
  };
@@ -9,11 +9,9 @@ export declare class KnowledgeGraphService {
9
9
  private readonly logger;
10
10
  private degradationMode;
11
11
  private buildQueues;
12
- private jiebaInitialized;
13
12
  constructor(params: KnowledgeGraphServiceParams);
14
13
  initialize(): Promise<void>;
15
14
  private checkPythonEnvironment;
16
- private initializeJieba;
17
15
  extractKeywords(text: string, topK?: number): Promise<string[]>;
18
16
  resolveKbId(context: ToolRuntimeContext): string | null;
19
17
  getKbPath(kbId: string): string;
@@ -2,10 +2,9 @@ import { resolveStoredKnowhereResultRoot } from "./parser.js";
2
2
  import { buildConnections, init_connect_builder } from "./connect-builder.js";
3
3
  import { buildKnowledgeGraph } from "./graph-builder.js";
4
4
  import path from "node:path";
5
+ import os from "node:os";
5
6
  import { spawn } from "node:child_process";
6
7
  import fs from "fs-extra";
7
- import os from "node:os";
8
- import * as nodejieba from "nodejieba";
9
8
  //#region src/kg-service.ts
10
9
  init_connect_builder();
11
10
  const DEFAULT_CONNECT_CONFIG = {
@@ -26,18 +25,11 @@ const DEFAULT_KG_CONFIG = {
26
25
  function formatUnknownError(error) {
27
26
  return error instanceof Error ? error.message : String(error);
28
27
  }
29
- function extractKeywordText(item) {
30
- if (typeof item === "string") return item;
31
- if (typeof item === "number" || typeof item === "boolean" || typeof item === "bigint") return String(item);
32
- if (typeof item === "object" && item !== null && "word" in item && typeof item.word === "string") return item.word;
33
- return null;
34
- }
35
28
  var KnowledgeGraphService = class {
36
29
  config;
37
30
  logger;
38
31
  degradationMode = "full";
39
32
  buildQueues = /* @__PURE__ */ new Map();
40
- jiebaInitialized = false;
41
33
  constructor(params) {
42
34
  this.config = {
43
35
  ...DEFAULT_KG_CONFIG,
@@ -58,18 +50,12 @@ var KnowledgeGraphService = class {
58
50
  }
59
51
  try {
60
52
  await this.checkPythonEnvironment();
61
- await this.initializeJieba();
62
53
  this.degradationMode = "full";
63
54
  this.logger.info("Knowledge graph service initialized in full mode");
64
55
  } catch (error) {
65
56
  this.logger.warn(`Knowledge graph initialization failed: ${error instanceof Error ? error.message : String(error)}`);
66
- if (error.code === "PYTHON_MISSING") {
67
- this.degradationMode = "disabled";
68
- this.logger.warn("Python not found, knowledge graph disabled");
69
- } else if (error.code === "NODEJIEBA_MISSING") {
70
- this.degradationMode = "basic";
71
- this.logger.warn("Nodejieba missing, using basic tokenization");
72
- } else this.degradationMode = "disabled";
57
+ this.degradationMode = "disabled";
58
+ this.logger.warn("Python not found, knowledge graph disabled");
73
59
  }
74
60
  }
75
61
  async checkPythonEnvironment() {
@@ -92,30 +78,8 @@ var KnowledgeGraphService = class {
92
78
  });
93
79
  });
94
80
  }
95
- async initializeJieba() {
96
- try {
97
- nodejieba.load();
98
- this.jiebaInitialized = true;
99
- this.logger.info("Nodejieba initialized successfully");
100
- } catch {
101
- const err = /* @__PURE__ */ new Error("Failed to initialize nodejieba");
102
- err.code = "NODEJIEBA_MISSING";
103
- throw err;
104
- }
105
- }
106
81
  async extractKeywords(text, topK = 20) {
107
82
  if (this.degradationMode === "disabled") return [];
108
- if (this.degradationMode === "full" && this.jiebaInitialized) try {
109
- const rawKeywords = nodejieba.extract(text, topK);
110
- return (Array.isArray(rawKeywords) ? rawKeywords : []).map((item) => extractKeywordText(item)).filter((keyword) => keyword !== null).filter((kw) => {
111
- if (kw.length <= 1) return false;
112
- if (/^\d+$/.test(kw)) return false;
113
- return true;
114
- }).slice(0, topK);
115
- } catch (error) {
116
- this.logger.warn(`Jieba extraction failed, falling back to basic: ${formatUnknownError(error)}`);
117
- this.degradationMode = "basic";
118
- }
119
83
  return text.split(/\s+/).filter((w) => w.length > 1).slice(0, topK);
120
84
  }
121
85
  resolveKbId(context) {
@@ -146,8 +110,6 @@ var KnowledgeGraphService = class {
146
110
  await fs.ensureDir(docDir);
147
111
  const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
148
112
  await fs.copy(sourceResultRoot, docDir, { overwrite: true });
149
- const keywordsPath = path.join(docDir, "keywords.json");
150
- await fs.writeJSON(keywordsPath, params.keywords, { spaces: 2 });
151
113
  const metadataPath = path.join(docDir, "metadata.json");
152
114
  await fs.writeJSON(metadataPath, params.metadata, { spaces: 2 });
153
115
  this.logger.info(`Document saved to knowledge base: kb=${params.kbId} doc=${params.docId}`);
@@ -176,7 +138,7 @@ var KnowledgeGraphService = class {
176
138
  const docPath = path.join(kbPath, doc);
177
139
  if ((await fs.stat(docPath)).isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
178
140
  }
179
- if (docDirs.length < 2) {
141
+ if (docDirs.length < 1) {
180
142
  this.logger.info(`Not enough documents for graph building (need >=2, have ${docDirs.length}), skipping`);
181
143
  return;
182
144
  }
@@ -203,7 +165,7 @@ var KnowledgeGraphService = class {
203
165
  const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
204
166
  let chunkStats = {};
205
167
  if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
206
- const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, this.jiebaInitialized, this.logger, kbId);
168
+ const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, false, this.logger, kbId);
207
169
  const graphFile = path.join(kbPath, "knowledge_graph.json");
208
170
  await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
209
171
  this.logger.info(`Knowledge graph saved to ${graphFile}`);
package/dist/tools.js CHANGED
@@ -4,9 +4,12 @@ import { resolveStoredKnowhereArtifactPath } from "./parser.js";
4
4
  import { sanitizeStringArray } from "./text.js";
5
5
  import { formatErrorMessage } from "./error-message.js";
6
6
  import { KnowhereClient } from "./client.js";
7
+ import { deliverChannelMessage } from "./channel-delivery.js";
7
8
  import { sendTrackerProgress } from "./tracker-progress.js";
8
9
  import fs from "node:fs/promises";
9
10
  import path from "node:path";
11
+ import crypto from "node:crypto";
12
+ import os from "node:os";
10
13
  //#region src/tools.ts
11
14
  const TERMINAL_JOB_STATUSES = new Set([
12
15
  "cancelled",
@@ -784,6 +787,24 @@ function createImportCompletedJobTool(params) {
784
787
  downloadedResult: importResult.downloadedResult
785
788
  }, { overwrite });
786
789
  params.api.logger.info(`knowhere: knowhere_import_completed_job stored imported document scope=${scope.label} jobId=${importResult.jobResult.job_id} docId=${document.id}`);
790
+ try {
791
+ const importKbId = params.kgService.resolveKbId(params.ctx);
792
+ if (importKbId && params.kgService.isEnabled()) {
793
+ params.api.logger.info(`knowhere: triggering KG build after import kbId=${importKbId}`);
794
+ buildKnowledgeGraphAsync({
795
+ kgService: params.kgService,
796
+ kbId: importKbId,
797
+ docId: document.id,
798
+ documentPayload: { downloadedResult: importResult.downloadedResult },
799
+ scope,
800
+ store: params.store,
801
+ ctx: params.ctx,
802
+ api: params.api
803
+ }).catch((e) => params.api.logger.error(`knowhere: KG build after import failed: ${formatErrorMessage(e)}`));
804
+ }
805
+ } catch (kgError) {
806
+ params.api.logger.warn(`knowhere: import KG trigger error: ${formatErrorMessage(kgError)}`);
807
+ }
787
808
  return textResult([
788
809
  "Import complete.",
789
810
  ...buildStoredDocumentSummaryLines({
@@ -914,6 +935,673 @@ function createKgQueryTool(params) {
914
935
  }
915
936
  };
916
937
  }
938
+ const T2_KNOWHERE_HOME = path.join(os.homedir(), ".knowhere");
939
+ async function t2FindDocDir(kbDir, docName) {
940
+ const exactPath = path.join(kbDir, docName);
941
+ try {
942
+ await fs.access(exactPath);
943
+ return exactPath;
944
+ } catch {}
945
+ let entries;
946
+ try {
947
+ entries = await fs.readdir(kbDir, { withFileTypes: true });
948
+ } catch {
949
+ return null;
950
+ }
951
+ for (const e of entries) if (e.isDirectory() && String(e.name).includes(docName)) return path.join(kbDir, String(e.name));
952
+ return null;
953
+ }
954
+ async function t2LoadChunks(docDir) {
955
+ for (const fname of ["chunks_slim.json", "chunks.json"]) try {
956
+ const raw = await fs.readFile(path.join(docDir, fname), "utf-8");
957
+ const data = JSON.parse(raw);
958
+ let chunks;
959
+ if (Array.isArray(data)) chunks = data;
960
+ else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
961
+ else continue;
962
+ if (fname === "chunks.json") return chunks.map((c) => ({
963
+ type: c.type || "text",
964
+ path: c.path || "",
965
+ content: c.content || "",
966
+ summary: c.metadata?.summary || c.summary || ""
967
+ }));
968
+ return chunks;
969
+ } catch {
970
+ continue;
971
+ }
972
+ return [];
973
+ }
974
+ function t2NormalizePath(s) {
975
+ return s.replace(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 65248)).replace(/[\s\u3000\u00A0]+/g, "").toLowerCase();
976
+ }
977
+ const PLACEHOLDER_RE = /(?:IMAGE|TABLE)_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
978
+ async function t2LoadRawChunks(docDir) {
979
+ try {
980
+ const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
981
+ const data = JSON.parse(raw);
982
+ if (Array.isArray(data)) return data;
983
+ if (isRecord(data) && Array.isArray(data.chunks)) return data.chunks;
984
+ return [];
985
+ } catch {
986
+ return [];
987
+ }
988
+ }
989
+ /**
990
+ * Runtime-only enrichment of chunks returned to the AI:
991
+ * 1. Replace IMAGE_uuid_IMAGE placeholders with [📷 path] in text chunks
992
+ * 2. Replace TABLE_uuid_TABLE placeholders with actual HTML content in text chunks
993
+ * 3. Remove standalone table chunks that were inlined via placeholders
994
+ * 4. Strip self-referencing placeholders from image/table chunk content & summary
995
+ *
996
+ * Does NOT modify chunks.json on disk.
997
+ */
998
+ async function t2EnrichChunks(chunks, docDir) {
999
+ const rawChunks = await t2LoadRawChunks(docDir);
1000
+ const idToRaw = /* @__PURE__ */ new Map();
1001
+ for (const rc of rawChunks) if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
1002
+ const manifestPaths = /* @__PURE__ */ new Map();
1003
+ try {
1004
+ const manifest = JSON.parse(await fs.readFile(path.join(docDir, "manifest.json"), "utf-8"));
1005
+ if (isRecord(manifest) && isRecord(manifest.files)) {
1006
+ const files = manifest.files;
1007
+ for (const entry of Array.isArray(files.images) ? files.images : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
1008
+ type: "image",
1009
+ filePath: entry.file_path
1010
+ });
1011
+ for (const entry of Array.isArray(files.tables) ? files.tables : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
1012
+ type: "table",
1013
+ filePath: entry.file_path
1014
+ });
1015
+ }
1016
+ } catch {}
1017
+ const inlinedTablePaths = /* @__PURE__ */ new Set();
1018
+ const inlinedImagePaths = /* @__PURE__ */ new Set();
1019
+ for (const chunk of chunks) {
1020
+ if (!chunk.content) continue;
1021
+ PLACEHOLDER_RE.lastIndex = 0;
1022
+ if (PLACEHOLDER_RE.test(chunk.content)) {
1023
+ PLACEHOLDER_RE.lastIndex = 0;
1024
+ chunk.content = await replacePlaceholders(chunk.content, idToRaw, docDir, chunk.type === "text" ? inlinedTablePaths : void 0, chunk.type === "text" ? inlinedImagePaths : void 0, manifestPaths);
1025
+ PLACEHOLDER_RE.lastIndex = 0;
1026
+ if (chunk.type !== "text" && chunk.path && PLACEHOLDER_RE.test(chunk.content)) {
1027
+ if (chunk.type === "table") try {
1028
+ const html = await fs.readFile(path.join(docDir, chunk.path), "utf-8");
1029
+ chunk.content = chunk.content.replace(PLACEHOLDER_RE, `\n${html.slice(0, 8e3)}\n`);
1030
+ } catch {
1031
+ chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📊 ${chunk.path}]`);
1032
+ }
1033
+ else if (chunk.type === "image") chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📷 ${chunk.path}]`);
1034
+ }
1035
+ }
1036
+ if (chunk.summary) {
1037
+ PLACEHOLDER_RE.lastIndex = 0;
1038
+ if (PLACEHOLDER_RE.test(chunk.summary)) {
1039
+ PLACEHOLDER_RE.lastIndex = 0;
1040
+ chunk.summary = await replacePlaceholders(chunk.summary, idToRaw, docDir, void 0, void 0, manifestPaths);
1041
+ }
1042
+ }
1043
+ }
1044
+ chunks = chunks.filter((c) => {
1045
+ if (c.type === "table" && inlinedTablePaths.has(c.path)) return false;
1046
+ if (c.type === "image" && inlinedImagePaths.has(c.path)) return false;
1047
+ return true;
1048
+ });
1049
+ return chunks;
1050
+ }
1051
+ async function replacePlaceholders(text, idToRaw, docDir, inlinedTablePaths, inlinedImagePaths, manifestPaths) {
1052
+ const matches = [];
1053
+ const re = /(?:(IMAGE|TABLE))_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
1054
+ let m;
1055
+ while ((m = re.exec(text)) !== null) matches.push({
1056
+ full: m[0],
1057
+ type: m[1],
1058
+ id: m[2],
1059
+ start: m.index,
1060
+ end: m.index + m[0].length
1061
+ });
1062
+ if (matches.length === 0) return text;
1063
+ const replacements = [];
1064
+ for (const match of matches) {
1065
+ const raw = idToRaw.get(match.id);
1066
+ let resolvedPath = raw?.path;
1067
+ if (!resolvedPath && manifestPaths) {
1068
+ const mEntry = manifestPaths.get(match.id);
1069
+ if (mEntry) resolvedPath = mEntry.filePath;
1070
+ }
1071
+ if (!resolvedPath) {
1072
+ replacements.push(match.full);
1073
+ continue;
1074
+ }
1075
+ if (match.type === "IMAGE") {
1076
+ replacements.push(`[📷 ${resolvedPath}]`);
1077
+ inlinedImagePaths?.add(resolvedPath);
1078
+ } else {
1079
+ const htmlPath = path.join(docDir, resolvedPath);
1080
+ try {
1081
+ const html = await fs.readFile(htmlPath, "utf-8");
1082
+ replacements.push(`\n${html.slice(0, 8e3)}\n`);
1083
+ inlinedTablePaths?.add(resolvedPath);
1084
+ } catch {
1085
+ const tableContent = raw?.content || "";
1086
+ if (tableContent && tableContent.includes("<")) {
1087
+ replacements.push(`\n${tableContent}\n`);
1088
+ inlinedTablePaths?.add(resolvedPath);
1089
+ } else replacements.push(`[📊 ${resolvedPath}]`);
1090
+ }
1091
+ }
1092
+ }
1093
+ let result = text;
1094
+ for (let i = matches.length - 1; i >= 0; i--) result = result.slice(0, matches[i].start) + replacements[i] + result.slice(matches[i].end);
1095
+ return result;
1096
+ }
1097
+ function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
1098
+ const df = {};
1099
+ const tf = {};
1100
+ const totalDocs = rawChunks.length || 1;
1101
+ for (const c of rawChunks) {
1102
+ const tokens = Array.isArray(c.metadata?.tokens) ? c.metadata.tokens : [];
1103
+ const keywords = Array.isArray(c.metadata?.keywords) ? c.metadata.keywords : [];
1104
+ const allTerms = [...tokens, ...keywords];
1105
+ const seen = /* @__PURE__ */ new Set();
1106
+ for (const t of allTerms) {
1107
+ if (!t || t.length <= 1) continue;
1108
+ if (/^\d+[.,%]*$/.test(t)) continue;
1109
+ const lower = t.toLowerCase();
1110
+ tf[lower] = (tf[lower] || 0) + 1;
1111
+ if (!seen.has(lower)) {
1112
+ df[lower] = (df[lower] || 0) + 1;
1113
+ seen.add(lower);
1114
+ }
1115
+ }
1116
+ }
1117
+ const scored = Object.entries(tf).map(([term, freq]) => {
1118
+ return {
1119
+ term,
1120
+ score: freq * (Math.log(totalDocs / (df[term] || 1)) + 1)
1121
+ };
1122
+ });
1123
+ scored.sort((a, b) => b.score - a.score);
1124
+ return scored.slice(0, topK).map((s) => s.term);
1125
+ }
1126
+ function t2KeywordsNeedRepair(keywords) {
1127
+ if (!Array.isArray(keywords) || keywords.length === 0) return true;
1128
+ let bad = 0;
1129
+ for (const kw of keywords) if (!kw || typeof kw === "string" && (kw.length <= 1 || /^\d+[.,%]*$/.test(kw) || /^[a-z]{1,2}$/i.test(kw))) bad++;
1130
+ return bad >= keywords.length * .5;
1131
+ }
1132
+ function t2JsonResult(data) {
1133
+ return {
1134
+ content: [{
1135
+ type: "text",
1136
+ text: JSON.stringify(data, null, 2)
1137
+ }],
1138
+ details: {}
1139
+ };
1140
+ }
1141
+ async function t2ListDocDirs(kbRoot) {
1142
+ let entries;
1143
+ try {
1144
+ entries = await fs.readdir(kbRoot, { withFileTypes: true });
1145
+ } catch {
1146
+ return [];
1147
+ }
1148
+ const docs = [];
1149
+ for (const e of entries) {
1150
+ if (!e.isDirectory()) continue;
1151
+ try {
1152
+ await fs.access(path.join(kbRoot, String(e.name), "chunks.json"));
1153
+ docs.push(String(e.name));
1154
+ } catch {
1155
+ continue;
1156
+ }
1157
+ }
1158
+ return docs;
1159
+ }
1160
+ async function t2StageFile(srcPath, stagingDir) {
1161
+ await fs.mkdir(stagingDir, { recursive: true });
1162
+ const ext = path.extname(srcPath);
1163
+ const hash = crypto.createHash("sha1").update(srcPath).digest("hex").slice(0, 12);
1164
+ const staged = path.join(stagingDir, `knowhere-asset-${hash}${ext}`);
1165
+ await fs.copyFile(srcPath, staged);
1166
+ return staged;
1167
+ }
1168
+ async function t2ResolveAssets(params) {
1169
+ const assets = [];
1170
+ const stagingDir = path.join(os.homedir(), ".openclaw", "knowhere-assets");
1171
+ const processedPaths = /* @__PURE__ */ new Set();
1172
+ let channelRoute;
1173
+ try {
1174
+ channelRoute = await params.store.resolveChannelRoute({ sessionKey: params.ctx.sessionKey });
1175
+ } catch {}
1176
+ const resolveOne = async (opts) => {
1177
+ const absolutePath = path.join(params.docDir, opts.relativePath);
1178
+ if (processedPaths.has(absolutePath)) return;
1179
+ processedPaths.add(absolutePath);
1180
+ if (opts.type === "table") try {
1181
+ const html = await fs.readFile(absolutePath, "utf-8");
1182
+ assets.push({
1183
+ chunk_id: opts.chunkId,
1184
+ type: "table",
1185
+ relative_path: opts.relativePath,
1186
+ summary: opts.summary.slice(0, 200),
1187
+ mode: "table_inline",
1188
+ html_content: html.slice(0, 8e3)
1189
+ });
1190
+ } catch {
1191
+ params.api.logger.debug?.(`knowhere: t2ResolveAssets table read failed: ${absolutePath}`);
1192
+ }
1193
+ else try {
1194
+ await fs.access(absolutePath);
1195
+ const stagedPath = await t2StageFile(absolutePath, stagingDir);
1196
+ const result = await deliverChannelMessage({
1197
+ api: params.api,
1198
+ operationLabel: "t2-asset-image",
1199
+ sessionKey: params.ctx.sessionKey,
1200
+ channelRoute,
1201
+ text: `📷 ${opts.summary.slice(0, 100)}`,
1202
+ mediaUrl: stagedPath,
1203
+ mediaLocalRoots: [stagingDir]
1204
+ });
1205
+ assets.push({
1206
+ chunk_id: opts.chunkId,
1207
+ type: "image",
1208
+ relative_path: opts.relativePath,
1209
+ summary: opts.summary.slice(0, 200),
1210
+ mode: result.delivered ? "image_sent" : "image_failed"
1211
+ });
1212
+ } catch (err) {
1213
+ params.api.logger.debug?.(`knowhere: t2ResolveAssets image delivery failed: ${absolutePath} — ${err instanceof Error ? err.message : String(err)}`);
1214
+ }
1215
+ };
1216
+ for (const chunk of params.returnedChunks) if ((chunk.type === "image" || chunk.type === "table") && chunk.path) {
1217
+ if (chunk.type === "table" && chunk.content && !/TABLE_[a-f0-9-]+_TABLE/.test(chunk.content)) continue;
1218
+ await resolveOne({
1219
+ chunkId: chunk.path,
1220
+ type: chunk.type,
1221
+ relativePath: chunk.path,
1222
+ summary: chunk.summary || chunk.content?.slice(0, 200) || ""
1223
+ });
1224
+ }
1225
+ return assets;
1226
+ }
1227
+ function createGetMapTool(_params) {
1228
+ return {
1229
+ name: "knowhere_get_map",
1230
+ label: "Knowhere Get Map",
1231
+ description: "获取知识库全局概览。查询知识时必须先调此工具,了解有哪些文档、关键词、重要性和跨文件关联。然后用 knowhere_get_structure 查看具体文档的章节目录。重要:knowhere_read_chunks 已内置图片/表格自动投递功能,读取内容时会自动发送多媒体到用户频道。除非用户明确要求下载原始文件,否则请勿主动发送 .docx/.pdf 等文档。",
1232
+ parameters: {
1233
+ type: "object",
1234
+ additionalProperties: false,
1235
+ properties: { kbId: {
1236
+ type: "string",
1237
+ description: "Optional: specific KB ID. Leave empty to scan all."
1238
+ } }
1239
+ },
1240
+ execute: async (_toolCallId, rawParams) => {
1241
+ const kbId = readString((isRecord(rawParams) ? rawParams : {}).kbId) || "";
1242
+ try {
1243
+ await fs.access(T2_KNOWHERE_HOME);
1244
+ } catch {
1245
+ return textResult(`未找到知识库目录 ${T2_KNOWHERE_HOME}`);
1246
+ }
1247
+ const entries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
1248
+ const kbs = [];
1249
+ for (const e of entries) {
1250
+ if (!e.isDirectory()) continue;
1251
+ if (kbId && e.name !== kbId) continue;
1252
+ const kbRoot = path.join(T2_KNOWHERE_HOME, e.name);
1253
+ const kgPath = path.join(kbRoot, "knowledge_graph.json");
1254
+ try {
1255
+ const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
1256
+ let kgDirty = false;
1257
+ const files = g.files || {};
1258
+ for (const [docName, info] of Object.entries(files)) if (t2KeywordsNeedRepair(info.top_keywords)) {
1259
+ const rawChunks = await t2LoadRawChunks(path.join(kbRoot, docName));
1260
+ if (rawChunks.length > 0) {
1261
+ const repaired = t2ComputeTfIdfKeywords(rawChunks);
1262
+ if (repaired.length > 0) {
1263
+ info.top_keywords = repaired;
1264
+ const types = {};
1265
+ for (const c of rawChunks) {
1266
+ const t = c.type || "text";
1267
+ types[t] = (types[t] || 0) + 1;
1268
+ }
1269
+ info.types = types;
1270
+ info.chunks_count = rawChunks.length;
1271
+ kgDirty = true;
1272
+ }
1273
+ }
1274
+ }
1275
+ if (kgDirty) {
1276
+ g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
1277
+ try {
1278
+ await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
1279
+ } catch {}
1280
+ }
1281
+ kbs.push({
1282
+ kb_id: e.name,
1283
+ version: g.version || "1.0",
1284
+ updated_at: g.updated_at || "",
1285
+ stats: g.stats || {},
1286
+ files: g.files || {},
1287
+ edges: g.edges || []
1288
+ });
1289
+ } catch {
1290
+ const docs = await t2ListDocDirs(kbRoot);
1291
+ if (docs.length > 0) kbs.push({
1292
+ kb_id: e.name,
1293
+ version: "pending",
1294
+ files: Object.fromEntries(docs.map((d) => [d, {}])),
1295
+ edges: []
1296
+ });
1297
+ }
1298
+ }
1299
+ if (kbs.length === 0) return textResult("未找到知识库。");
1300
+ return t2JsonResult({
1301
+ status: "ok",
1302
+ knowledge_bases: kbs
1303
+ });
1304
+ }
1305
+ };
1306
+ }
1307
+ function createGetStructureTool(_params) {
1308
+ return {
1309
+ name: "knowhere_get_structure",
1310
+ label: "Knowhere Get Structure",
1311
+ description: "获取文档章节目录。先调 knowhere_get_map 确定 kbId 和文档名后,用此工具查看章节结构,然后用 knowhere_read_chunks 读取内容。重要:knowhere_read_chunks 已内置图片/表格自动投递功能,除非用户要求下载文件,否则无需额外发送原始文档。",
1312
+ parameters: {
1313
+ type: "object",
1314
+ additionalProperties: false,
1315
+ properties: {
1316
+ kbId: {
1317
+ type: "string",
1318
+ description: "Knowledge base ID (from knowhere_get_map result)"
1319
+ },
1320
+ docName: {
1321
+ type: "string",
1322
+ description: "Document name (supports fuzzy match)"
1323
+ }
1324
+ },
1325
+ required: ["kbId", "docName"]
1326
+ },
1327
+ execute: async (_toolCallId, rawParams) => {
1328
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
1329
+ const kbId = readString(paramsRecord.kbId);
1330
+ const docName = readString(paramsRecord.docName);
1331
+ if (!kbId || !docName) throw new Error("kbId and docName are required.");
1332
+ const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
1333
+ if (!docDir) return textResult(`文档 '${docName}' 在 kb=${kbId} 中不存在`);
1334
+ try {
1335
+ const h = JSON.parse(await fs.readFile(path.join(docDir, "hierarchy.json"), "utf-8"));
1336
+ return t2JsonResult({
1337
+ status: "ok",
1338
+ kb_id: kbId,
1339
+ doc_name: path.basename(docDir),
1340
+ hierarchy: h
1341
+ });
1342
+ } catch {
1343
+ const chunks = await t2LoadChunks(docDir);
1344
+ const paths = [...new Set(chunks.map((c) => c.path).filter(Boolean))].sort();
1345
+ return t2JsonResult({
1346
+ status: "ok",
1347
+ kb_id: kbId,
1348
+ doc_name: path.basename(docDir),
1349
+ hierarchy: null,
1350
+ chunk_paths: paths,
1351
+ hint: "无 hierarchy.json,已返回 chunk 路径列表"
1352
+ });
1353
+ }
1354
+ }
1355
+ };
1356
+ }
1357
+ function createReadChunksTool(_params) {
1358
+ return {
1359
+ name: "knowhere_read_chunks",
1360
+ label: "Knowhere Read Chunks",
1361
+ description: "读取文档内容。先调 knowhere_get_structure 确定章节后,用此工具读取具体内容。可通过 sectionPath 过滤特定章节,减少 token 消耗。此工具已内置图片/表格自动投递:读取时会自动将多媒体内容发送到用户频道。除非用户明确要求下载原始文件,否则无需再用 message 发送文档。",
1362
+ parameters: {
1363
+ type: "object",
1364
+ additionalProperties: false,
1365
+ properties: {
1366
+ kbId: {
1367
+ type: "string",
1368
+ description: "Knowledge base ID"
1369
+ },
1370
+ docName: {
1371
+ type: "string",
1372
+ description: "Document name"
1373
+ },
1374
+ sectionPath: {
1375
+ type: "string",
1376
+ description: "Optional: section path prefix to filter (e.g. '一、工程概况')"
1377
+ },
1378
+ maxChunks: {
1379
+ type: "number",
1380
+ description: "Max chunks to return (default 50)"
1381
+ }
1382
+ },
1383
+ required: ["kbId", "docName"]
1384
+ },
1385
+ execute: async (_toolCallId, rawParams) => {
1386
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
1387
+ const kbId = readString(paramsRecord.kbId);
1388
+ const docName = readString(paramsRecord.docName);
1389
+ const sectionPath = readString(paramsRecord.sectionPath);
1390
+ const maxChunks = readNumber(paramsRecord.maxChunks, 50);
1391
+ if (!kbId || !docName) throw new Error("kbId and docName are required.");
1392
+ const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
1393
+ if (!docDir) return textResult(`文档 '${docName}' 不存在`);
1394
+ let chunks = await t2LoadChunks(docDir);
1395
+ const totalAll = chunks.length;
1396
+ if (sectionPath) {
1397
+ let filtered = chunks.filter((c) => c.path.includes(sectionPath));
1398
+ if (filtered.length === 0) {
1399
+ const normQuery = t2NormalizePath(sectionPath);
1400
+ filtered = chunks.filter((c) => t2NormalizePath(c.path).includes(normQuery));
1401
+ }
1402
+ if (filtered.length === 0) {
1403
+ const sections = [...new Set(chunks.filter((c) => c.type === "text" && c.path).map((c) => {
1404
+ const parts = c.path.split("/");
1405
+ return parts.length > 1 ? parts.slice(-1)[0] : c.path;
1406
+ }))].slice(0, 30);
1407
+ return t2JsonResult({
1408
+ status: "no_match",
1409
+ message: `sectionPath '${sectionPath}' 未匹配到任何切片。请检查路径或从以下章节中选择:`,
1410
+ available_sections: sections
1411
+ });
1412
+ }
1413
+ chunks = filtered;
1414
+ } else if (totalAll > 20) {
1415
+ const sections = [...new Set(chunks.filter((c) => c.type === "text" && c.path).map((c) => {
1416
+ const parts = c.path.split("/");
1417
+ return parts.length > 1 ? parts.slice(-1)[0] : c.path;
1418
+ }))].slice(0, 30);
1419
+ return t2JsonResult({
1420
+ status: "too_many",
1421
+ message: `该文档共有 ${totalAll} 个切片,请先用 knowhere_get_structure 查看目录,然后用 sectionPath 参数指定章节。`,
1422
+ available_sections: sections
1423
+ });
1424
+ }
1425
+ const total = chunks.length;
1426
+ const limit = maxChunks || 50;
1427
+ chunks = chunks.slice(0, limit);
1428
+ try {
1429
+ const kgPath = path.join(T2_KNOWHERE_HOME, kbId, "knowledge_graph.json");
1430
+ const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
1431
+ const dn = path.basename(docDir);
1432
+ if (g.files?.[dn]) {
1433
+ g.files[dn].hit_count = (g.files[dn].hit_count || 0) + 1;
1434
+ g.files[dn].last_hit = (/* @__PURE__ */ new Date()).toISOString();
1435
+ g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
1436
+ await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
1437
+ }
1438
+ } catch {}
1439
+ chunks = await t2EnrichChunks(chunks, docDir);
1440
+ let resolvedAssets = [];
1441
+ try {
1442
+ resolvedAssets = await t2ResolveAssets({
1443
+ api: _params.api,
1444
+ store: _params.store,
1445
+ ctx: _params.ctx,
1446
+ docDir,
1447
+ returnedChunks: chunks
1448
+ });
1449
+ } catch (err) {
1450
+ _params.api.logger.debug?.(`knowhere: read_chunks asset resolution failed: ${err instanceof Error ? err.message : String(err)}`);
1451
+ }
1452
+ const result = {
1453
+ status: "ok",
1454
+ kb_id: kbId,
1455
+ doc_name: path.basename(docDir),
1456
+ section_path: sectionPath || null,
1457
+ total_chunks: total,
1458
+ returned: chunks.length,
1459
+ truncated: total > limit,
1460
+ chunks
1461
+ };
1462
+ if (resolvedAssets.length > 0) {
1463
+ result.resolved_assets = resolvedAssets;
1464
+ const sent = resolvedAssets.filter((a) => a.mode === "image_sent").length;
1465
+ const tables = resolvedAssets.filter((a) => a.mode === "table_inline").length;
1466
+ const notes = [];
1467
+ if (sent > 0) notes.push(`${sent} 张图片已自动发送到用户频道`);
1468
+ if (tables > 0) notes.push(`${tables} 个表格已内联为 HTML`);
1469
+ result.asset_summary = notes.join(";");
1470
+ result._agent_note = "上述多媒体资源(图片/表格/视频)已自动投递到用户频道,用户已经可以看到。请直接用文字回答用户的问题,不要再重复发送这些图片或表格。也不要主动发送原始文档文件(.docx/.pdf),除非用户明确要求下载原始文件。如果用户要求你分析、描述或对比图片内容,请使用 knowhere_view_image 工具传入 content 中的 [📷 path] 路径来获取图片数据进行视觉分析。";
1471
+ }
1472
+ return t2JsonResult(result);
1473
+ }
1474
+ };
1475
+ }
1476
+ function createViewImageTool(_params) {
1477
+ return {
1478
+ name: "knowhere_view_image",
1479
+ label: "Knowhere View Image",
1480
+ description: "分析知识库图片的像素内容。当 knowhere_read_chunks 返回的文本中包含 [📷 path] 标记时,可用此工具传入该 path 来获取图片的 base64 数据进行视觉分析。需要提供 kbId、docName 和 imagePath(即 [📷 ...] 中的相对路径)。",
1481
+ parameters: {
1482
+ type: "object",
1483
+ additionalProperties: false,
1484
+ properties: {
1485
+ kbId: {
1486
+ type: "string",
1487
+ description: "Knowledge base ID"
1488
+ },
1489
+ docName: {
1490
+ type: "string",
1491
+ description: "Document name"
1492
+ },
1493
+ imagePath: {
1494
+ type: "string",
1495
+ description: "Image relative path from [📷 ...] marker, e.g. 'images/image-9 助力手推车.jpeg'"
1496
+ }
1497
+ },
1498
+ required: [
1499
+ "kbId",
1500
+ "docName",
1501
+ "imagePath"
1502
+ ]
1503
+ },
1504
+ execute: async (_toolCallId, rawParams) => {
1505
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
1506
+ const kbId = readString(paramsRecord.kbId);
1507
+ const docName = readString(paramsRecord.docName);
1508
+ const imagePath = readString(paramsRecord.imagePath);
1509
+ if (!kbId || !docName || !imagePath) throw new Error("kbId, docName, and imagePath are required.");
1510
+ const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
1511
+ if (!docDir) return textResult(`文档 '${docName}' 不存在`);
1512
+ const absolutePath = path.join(docDir, imagePath);
1513
+ try {
1514
+ await fs.access(absolutePath);
1515
+ } catch {
1516
+ return textResult(`图片文件不存在: ${imagePath}`);
1517
+ }
1518
+ const base64Data = (await fs.readFile(absolutePath)).toString("base64");
1519
+ const ext = path.extname(imagePath).toLowerCase();
1520
+ let mediaType = "image/jpeg";
1521
+ if (ext === ".png") mediaType = "image/png";
1522
+ else if (ext === ".gif") mediaType = "image/gif";
1523
+ else if (ext === ".webp") mediaType = "image/webp";
1524
+ return {
1525
+ content: [{
1526
+ type: "text",
1527
+ text: `你正在查看图片: ${imagePath}。图片数据已附在下方,请直接用你的视觉能力分析图片内容,`
1528
+ }, {
1529
+ type: "image_url",
1530
+ image_url: { url: `data:${mediaType};base64,${base64Data}` }
1531
+ }],
1532
+ details: {}
1533
+ };
1534
+ }
1535
+ };
1536
+ }
1537
+ function createDiscoverFilesTool(_params) {
1538
+ return {
1539
+ name: "knowhere_discover_files",
1540
+ label: "Knowhere Discover Files",
1541
+ description: "在所有知识库文档中搜索关键词,返回命中文件和次数。用于和 knowhere_get_map 做并集,避免遗漏相关文件。只返回文件名,不返回内容。注意:后续用 knowhere_read_chunks 读取时会自动投递图片/表格到用户频道,除非用户要求下载文件,否则无需额外发送原始文档。",
1542
+ parameters: {
1543
+ type: "object",
1544
+ additionalProperties: false,
1545
+ properties: {
1546
+ query: {
1547
+ type: "string",
1548
+ description: "Search keywords"
1549
+ },
1550
+ kbId: {
1551
+ type: "string",
1552
+ description: "Optional: limit to specific KB"
1553
+ }
1554
+ },
1555
+ required: ["query"]
1556
+ },
1557
+ execute: async (_toolCallId, rawParams) => {
1558
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
1559
+ const query = readString(paramsRecord.query);
1560
+ const kbId = readString(paramsRecord.kbId);
1561
+ if (!query) throw new Error("query is required.");
1562
+ const terms = query.split(/[\s,;,;。!?、\-/]+/).filter((t) => t.length > 1);
1563
+ if (terms.length === 0) return textResult("查询词为空");
1564
+ try {
1565
+ await fs.access(T2_KNOWHERE_HOME);
1566
+ } catch {
1567
+ return textResult("未找到知识库。");
1568
+ }
1569
+ const results = [];
1570
+ const kbEntries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
1571
+ for (const kbE of kbEntries) {
1572
+ if (!kbE.isDirectory()) continue;
1573
+ if (kbId && kbE.name !== kbId) continue;
1574
+ let docEntries;
1575
+ try {
1576
+ docEntries = await fs.readdir(path.join(T2_KNOWHERE_HOME, String(kbE.name)), { withFileTypes: true });
1577
+ } catch {
1578
+ continue;
1579
+ }
1580
+ for (const docE of docEntries) {
1581
+ if (!docE.isDirectory()) continue;
1582
+ const chunks = await t2LoadChunks(path.join(T2_KNOWHERE_HOME, String(kbE.name), String(docE.name)));
1583
+ let hits = 0;
1584
+ for (const c of chunks) {
1585
+ const text = `${c.content} ${c.summary}`;
1586
+ for (const t of terms) if (text.includes(t)) hits++;
1587
+ }
1588
+ if (hits > 0) results.push({
1589
+ kb_id: String(kbE.name),
1590
+ doc_name: String(docE.name),
1591
+ hit_count: hits
1592
+ });
1593
+ }
1594
+ }
1595
+ results.sort((a, b) => b.hit_count - a.hit_count);
1596
+ return t2JsonResult({
1597
+ status: "ok",
1598
+ query,
1599
+ terms,
1600
+ discovered_files: results
1601
+ });
1602
+ }
1603
+ };
1604
+ }
917
1605
  function createKnowhereToolFactory(params) {
918
1606
  return (ctx) => [
919
1607
  createIngestTool({
@@ -939,6 +1627,7 @@ function createKnowhereToolFactory(params) {
939
1627
  api: params.api,
940
1628
  config: params.config,
941
1629
  store: params.store,
1630
+ kgService: params.kgService,
942
1631
  ctx
943
1632
  }),
944
1633
  createSetApiKeyTool({
@@ -954,7 +1643,16 @@ function createKnowhereToolFactory(params) {
954
1643
  api: params.api,
955
1644
  kgService: params.kgService,
956
1645
  ctx
957
- })
1646
+ }),
1647
+ createGetMapTool({ api: params.api }),
1648
+ createGetStructureTool({ api: params.api }),
1649
+ createReadChunksTool({
1650
+ api: params.api,
1651
+ store: params.store,
1652
+ ctx
1653
+ }),
1654
+ createViewImageTool({ api: params.api }),
1655
+ createDiscoverFilesTool({ api: params.api })
958
1656
  ];
959
1657
  }
960
1658
  //#endregion
@@ -3,7 +3,7 @@
3
3
  "name": "Knowhere",
4
4
  "description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
5
5
  "skills": ["./skills"],
6
- "version": "0.2.4",
6
+ "version": "0.2.6",
7
7
  "uiHints": {
8
8
  "apiKey": {
9
9
  "label": "Knowhere API Key",
@@ -152,6 +152,16 @@
152
152
  }
153
153
  }
154
154
  }
155
+ },
156
+ "agentHooks": {
157
+ "type": "object",
158
+ "additionalProperties": false,
159
+ "properties": {
160
+ "enabled": {
161
+ "type": "boolean",
162
+ "default": true
163
+ }
164
+ }
155
165
  }
156
166
  }
157
167
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ontos-ai/knowhere-claw",
3
- "version": "0.2.4",
3
+ "version": "0.2.6",
4
4
  "description": "OpenClaw plugin for Knowhere-powered document ingestion and automatic grounding.",
5
5
  "files": [
6
6
  "dist/",
@@ -40,8 +40,7 @@
40
40
  "dependencies": {
41
41
  "@knowhere-ai/sdk": "^0.1.1",
42
42
  "fflate": "^0.8.2",
43
- "fs-extra": "^11.2.0",
44
- "nodejieba": "^2.6.0"
43
+ "fs-extra": "^11.2.0"
45
44
  },
46
45
  "devDependencies": {
47
46
  "@changesets/changelog-github": "^0.6.0",
@@ -19,28 +19,46 @@ Activate this skill when:
19
19
 
20
20
  ## Part 1: Ingesting New Documents
21
21
 
22
- When a file is uploaded or attached (e.g. via Telegram), the agent should parse it into the knowledge base.
22
+ When a user sends, uploads, or mentions a file, **always** use `knowhere_ingest_document` to parse it into the knowledge base. Two delivery modes exist depending on how the file arrives:
23
23
 
24
- ### Attachment markers
24
+ ### Mode A: Local File (Telegram, Discord, Signal, …)
25
25
 
26
- When a prompt contains a marker like:
26
+ When the channel downloads the file to disk, a marker appears in the prompt:
27
27
 
28
28
  ```text
29
29
  [media attached: /absolute/path/to/file.pdf (application/pdf) | handbook.pdf]
30
30
  ```
31
31
 
32
- Use the exact absolute path as `filePath` and the visible filename as `fileName`.
32
+ Use the exact absolute path from the marker:
33
33
 
34
- ### Ingestion workflow
34
+ ```
35
+ knowhere_ingest_document(filePath: "/absolute/path/to/file.pdf", fileName: "handbook.pdf")
36
+ ```
37
+
38
+ ### Mode B: Cloud File (Feishu, …)
39
+
40
+ When the file stays in a cloud service (no `[media attached:]` marker):
41
+
42
+ 1. Use the channel's file tool to locate the file and obtain a download URL
43
+ - Feishu: use `feishu_drive` with `action: "list"` to find the file, then get its download URL
44
+ - Other cloud channels: use the equivalent tool to get a direct download URL
45
+ 2. Pass the URL to knowhere:
46
+
47
+ ```
48
+ knowhere_ingest_document(url: "https://download-url-from-channel-tool/...")
49
+ ```
50
+
51
+ ### After Ingestion
52
+
53
+ The plugin handles everything automatically:
54
+
55
+ - Uploads/fetches the file for parsing
56
+ - Polls until parsing completes
57
+ - Downloads and extracts the result package
58
+ - Copies parsed data to `~/.knowhere/{kbId}/`
59
+ - Builds/updates `knowledge_graph.json`
35
60
 
36
- 1. Call `knowhere_ingest_document` with the file path
37
- 2. The plugin handles everything automatically:
38
- - Uploads the file to Knowhere API for parsing
39
- - Polls until parsing completes
40
- - Downloads and extracts the result package
41
- - **Automatically** copies parsed data to `~/.knowhere/{kbId}/`
42
- - **Automatically** builds/updates `knowledge_graph.json`
43
- 3. After ingest completes, the new document is immediately searchable via the retrieval workflow below
61
+ After ingest completes, the new document is immediately searchable via the retrieval workflow below.
44
62
 
45
63
  Supported formats: PDF, DOCX, XLSX, PPTX, TXT, MD, images (JPG, PNG)
46
64