@ontos-ai/knowhere-claw 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import { isRecord } from "./types.js";
2
2
  import { formatErrorMessage } from "./error-message.js";
3
+ import { openAsBlob } from "node:fs";
3
4
  import path from "node:path";
4
5
  import { createHash } from "node:crypto";
5
- import { openAsBlob } from "node:fs";
6
6
  import { Knowhere } from "@knowhere-ai/sdk";
7
7
  //#region src/client.ts
8
8
  const RETRYABLE_STATUS_CODES = new Set([
package/dist/config.d.ts CHANGED
@@ -2,6 +2,14 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
2
2
  import type { JsonSchemaObject, ResolvedKnowhereConfig, StringRecord, KnowledgeGraphConfig } from "./types";
3
3
  export declare const DEFAULT_BASE_URL = "https://api.knowhereto.ai";
4
4
  export declare const knowherePluginConfigSchema: JsonSchemaObject;
5
+ /**
6
+ * Return the effective plugin config object, merging the persisted
7
+ * resolved-config when the live pluginConfig is missing explicit fields
8
+ * (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
9
+ * `resolveKnowledgeGraphConfig` should read from this merged result so
10
+ * that subprocess instances inherit the gateway's full configuration.
11
+ */
12
+ export declare function resolveEffectivePluginConfig(api: OpenClawPluginApi): StringRecord;
5
13
  export declare function resolveKnowhereConfig(api: OpenClawPluginApi): ResolvedKnowhereConfig;
6
14
  export declare const API_KEY_URL = "https://knowhereto.ai/api-keys";
7
15
  export declare const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
package/dist/config.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { isRecord } from "./types.js";
2
+ import { readFileSync } from "node:fs";
2
3
  import fs from "node:fs/promises";
3
4
  import path from "node:path";
4
5
  //#region src/config.ts
@@ -147,36 +148,83 @@ function readScopeMode(raw) {
147
148
  if (value === "session" || value === "agent" || value === "global") return value;
148
149
  return KNOWHERE_PLUGIN_DEFAULTS.scopeMode;
149
150
  }
151
+ const RESOLVED_CONFIG_STATE_FILE = "resolved-config.json";
152
+ function readPersistedResolvedConfigSync(stateDir) {
153
+ const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
154
+ try {
155
+ const raw = readFileSync(filePath, "utf-8");
156
+ const parsed = JSON.parse(raw);
157
+ if (isRecord(parsed)) return parsed;
158
+ return null;
159
+ } catch {
160
+ return null;
161
+ }
162
+ }
163
+ async function persistResolvedConfig(stateDir, config) {
164
+ await fs.mkdir(stateDir, { recursive: true });
165
+ const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
166
+ await fs.writeFile(filePath, JSON.stringify(config, null, 2), "utf-8");
167
+ }
168
+ function hasExplicitPluginConfig(raw) {
169
+ return Boolean(readString(raw, "scopeMode") || readString(raw, "storageDir"));
170
+ }
171
+ /**
172
+ * Return the effective plugin config object, merging the persisted
173
+ * resolved-config when the live pluginConfig is missing explicit fields
174
+ * (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
175
+ * `resolveKnowledgeGraphConfig` should read from this merged result so
176
+ * that subprocess instances inherit the gateway's full configuration.
177
+ */
178
+ function resolveEffectivePluginConfig(api) {
179
+ const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
180
+ const stateDir = api.runtime.state.resolveStateDir();
181
+ if (!hasExplicitPluginConfig(raw)) {
182
+ const persisted = readPersistedResolvedConfigSync(stateDir);
183
+ if (persisted) return {
184
+ ...persisted,
185
+ ...raw
186
+ };
187
+ }
188
+ return raw;
189
+ }
150
190
  function resolveKnowhereConfig(api) {
151
191
  const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
152
192
  const stateDir = api.runtime.state.resolveStateDir();
153
- const storageDirRaw = readString(raw, "storageDir");
154
- return {
193
+ const hasExplicit = hasExplicitPluginConfig(raw);
194
+ const effective = resolveEffectivePluginConfig(api);
195
+ const storageDirRaw = readString(effective, "storageDir");
196
+ const config = {
155
197
  apiKey: readString(raw, "apiKey") || process.env.KNOWHERE_API_KEY || "",
156
198
  baseUrl: readString(raw, "baseUrl") || process.env.KNOWHERE_BASE_URL || "https://api.knowhereto.ai",
157
199
  storageDir: storageDirRaw ? api.resolvePath(storageDirRaw) : path.join(stateDir, "plugins", api.id),
158
- scopeMode: readScopeMode(raw),
159
- pollIntervalMs: readNumber(raw, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
200
+ scopeMode: readScopeMode(effective),
201
+ pollIntervalMs: readNumber(effective, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
160
202
  min: 1e3,
161
203
  max: 6e4,
162
204
  integer: true
163
205
  }),
164
- pollTimeoutMs: readNumber(raw, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
206
+ pollTimeoutMs: readNumber(effective, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
165
207
  min: 1e4,
166
208
  max: 72e5,
167
209
  integer: true
168
210
  }),
169
- requestTimeoutMs: readNumber(raw, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
211
+ requestTimeoutMs: readNumber(effective, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
170
212
  min: 1e3,
171
213
  max: 3e5,
172
214
  integer: true
173
215
  }),
174
- uploadTimeoutMs: readNumber(raw, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
216
+ uploadTimeoutMs: readNumber(effective, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
175
217
  min: 1e3,
176
218
  max: 72e5,
177
219
  integer: true
178
220
  })
179
221
  };
222
+ if (hasExplicit) persistResolvedConfig(stateDir, {
223
+ scopeMode: config.scopeMode,
224
+ storageDir: config.storageDir,
225
+ knowledgeGraph: raw.knowledgeGraph
226
+ }).catch(() => void 0);
227
+ return config;
180
228
  }
181
229
  const API_KEY_URL = "https://knowhereto.ai/api-keys";
182
230
  const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
@@ -257,4 +305,4 @@ function resolveKnowledgeGraphConfig(raw) {
257
305
  };
258
306
  }
259
307
  //#endregion
260
- export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
308
+ export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
@@ -62,7 +62,7 @@ export interface ChunkStats {
62
62
  * Main function to build knowledge graph
63
63
  * Equivalent to Python: build_knowledge_graph
64
64
  */
65
- export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
65
+ export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, _jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
66
66
  /**
67
67
  * Incremental update: match new chunks against existing chunks
68
68
  * Equivalent to Python: _incremental_connections
@@ -1,12 +1,5 @@
1
- import * as nodejieba from "nodejieba";
2
1
  //#region src/graph-builder.ts
3
2
  /**
4
- * Graph Builder Module
5
- *
6
- * TypeScript implementation of knowhere-api/apps/worker/app/services/connect_builder/graph_builder.py
7
- * Builds file-level knowledge graphs from chunk connections with TF-IDF and importance scoring.
8
- */
9
- /**
10
3
  * Extract file key from a chunk.
11
4
  * Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
12
5
  */
@@ -27,29 +20,18 @@ function extractLabel(path) {
27
20
  * Extract tokens from text using jieba
28
21
  * Equivalent to Python: _extract_tokens_from_content
29
22
  */
30
- function extractTokensFromContent(content, jiebaInitialized) {
31
- const cleanContent = content.replace(/<[^>]*>/g, " ");
32
- if (!jiebaInitialized) return cleanContent.split(/\s+/).filter((w) => w.length > 1);
33
- try {
34
- return nodejieba.cut(cleanContent).filter((token) => {
35
- if (token.length <= 1) return false;
36
- if (/^\d+$/.test(token)) return false;
37
- if (/^[^\w\u4e00-\u9fa5]+$/.test(token)) return false;
38
- return true;
39
- });
40
- } catch {
41
- return cleanContent.split(/\s+/).filter((w) => w.length > 1);
42
- }
23
+ function extractTokensFromContent(content) {
24
+ return content.replace(/<[^>]*>/g, " ").split(/\s+/).filter((w) => w.length > 1);
43
25
  }
44
26
  /**
45
27
  * Compute TF-IDF top keywords for a file
46
28
  * Equivalent to Python: _compute_tfidf_top_keywords
47
29
  */
48
- function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized) {
30
+ function computeTfidfTopKeywords(fileChunks, allChunks, topK) {
49
31
  if (fileChunks.length === 0) return [];
50
32
  const fileTokens = [];
51
33
  for (const chunk of fileChunks) {
52
- const tokens = extractTokensFromContent(chunk.content, jiebaInitialized);
34
+ const tokens = extractTokensFromContent(chunk.content);
53
35
  fileTokens.push(...tokens);
54
36
  }
55
37
  if (fileTokens.length === 0) return [];
@@ -60,7 +42,7 @@ function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized)
60
42
  }
61
43
  const docFreq = /* @__PURE__ */ new Map();
62
44
  for (const chunk of allChunks) {
63
- const tokens = new Set(extractTokensFromContent(chunk.content, jiebaInitialized).map((t) => t.toLowerCase()));
45
+ const tokens = new Set(extractTokensFromContent(chunk.content).map((t) => t.toLowerCase()));
64
46
  for (const token of tokens) docFreq.set(token, (docFreq.get(token) || 0) + 1);
65
47
  }
66
48
  const totalDocs = allChunks.length;
@@ -161,7 +143,7 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
161
143
  * Main function to build knowledge graph
162
144
  * Equivalent to Python: build_knowledge_graph
163
145
  */
164
- function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized, logger, kbId) {
146
+ function buildKnowledgeGraph(chunks, connections, chunkStats, _jiebaInitialized, logger, kbId) {
165
147
  logger?.info(`Building knowledge graph from ${chunks.length} chunks and ${connections.length} connections`);
166
148
  const chunkById = /* @__PURE__ */ new Map();
167
149
  for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
@@ -178,7 +160,7 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
178
160
  const type = chunk.metadata.type || "text";
179
161
  typeCount[type] = (typeCount[type] || 0) + 1;
180
162
  }
181
- const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10, jiebaInitialized);
163
+ const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10);
182
164
  const importance = computeFileImportance(fileKey, fileChunks, chunks, chunkStats);
183
165
  let topSummary = "";
184
166
  for (const chunk of fileChunks) if (chunk.metadata.summary && typeof chunk.metadata.summary === "string") {
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { knowherePluginConfigSchema, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
1
+ import { knowherePluginConfigSchema, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
2
2
  import { KnowhereStore } from "./store.js";
3
3
  import { createKnowhereToolFactory } from "./tools.js";
4
4
  import { KnowledgeGraphService } from "./kg-service.js";
@@ -10,7 +10,7 @@ const plugin = {
10
10
  configSchema: knowherePluginConfigSchema,
11
11
  register(api) {
12
12
  const config = resolveKnowhereConfig(api);
13
- const kgConfig = resolveKnowledgeGraphConfig(api.pluginConfig && typeof api.pluginConfig === "object" ? api.pluginConfig : {});
13
+ const kgConfig = resolveKnowledgeGraphConfig(resolveEffectivePluginConfig(api));
14
14
  const store = new KnowhereStore({
15
15
  rootDir: config.storageDir,
16
16
  scopeMode: config.scopeMode,
@@ -9,11 +9,9 @@ export declare class KnowledgeGraphService {
9
9
  private readonly logger;
10
10
  private degradationMode;
11
11
  private buildQueues;
12
- private jiebaInitialized;
13
12
  constructor(params: KnowledgeGraphServiceParams);
14
13
  initialize(): Promise<void>;
15
14
  private checkPythonEnvironment;
16
- private initializeJieba;
17
15
  extractKeywords(text: string, topK?: number): Promise<string[]>;
18
16
  resolveKbId(context: ToolRuntimeContext): string | null;
19
17
  getKbPath(kbId: string): string;
@@ -2,10 +2,9 @@ import { resolveStoredKnowhereResultRoot } from "./parser.js";
2
2
  import { buildConnections, init_connect_builder } from "./connect-builder.js";
3
3
  import { buildKnowledgeGraph } from "./graph-builder.js";
4
4
  import path from "node:path";
5
+ import os from "node:os";
5
6
  import { spawn } from "node:child_process";
6
7
  import fs from "fs-extra";
7
- import os from "node:os";
8
- import * as nodejieba from "nodejieba";
9
8
  //#region src/kg-service.ts
10
9
  init_connect_builder();
11
10
  const DEFAULT_CONNECT_CONFIG = {
@@ -26,18 +25,11 @@ const DEFAULT_KG_CONFIG = {
26
25
  function formatUnknownError(error) {
27
26
  return error instanceof Error ? error.message : String(error);
28
27
  }
29
- function extractKeywordText(item) {
30
- if (typeof item === "string") return item;
31
- if (typeof item === "number" || typeof item === "boolean" || typeof item === "bigint") return String(item);
32
- if (typeof item === "object" && item !== null && "word" in item && typeof item.word === "string") return item.word;
33
- return null;
34
- }
35
28
  var KnowledgeGraphService = class {
36
29
  config;
37
30
  logger;
38
31
  degradationMode = "full";
39
32
  buildQueues = /* @__PURE__ */ new Map();
40
- jiebaInitialized = false;
41
33
  constructor(params) {
42
34
  this.config = {
43
35
  ...DEFAULT_KG_CONFIG,
@@ -58,18 +50,12 @@ var KnowledgeGraphService = class {
58
50
  }
59
51
  try {
60
52
  await this.checkPythonEnvironment();
61
- await this.initializeJieba();
62
53
  this.degradationMode = "full";
63
54
  this.logger.info("Knowledge graph service initialized in full mode");
64
55
  } catch (error) {
65
56
  this.logger.warn(`Knowledge graph initialization failed: ${error instanceof Error ? error.message : String(error)}`);
66
- if (error.code === "PYTHON_MISSING") {
67
- this.degradationMode = "disabled";
68
- this.logger.warn("Python not found, knowledge graph disabled");
69
- } else if (error.code === "NODEJIEBA_MISSING") {
70
- this.degradationMode = "basic";
71
- this.logger.warn("Nodejieba missing, using basic tokenization");
72
- } else this.degradationMode = "disabled";
57
+ this.degradationMode = "disabled";
58
+ this.logger.warn("Python not found, knowledge graph disabled");
73
59
  }
74
60
  }
75
61
  async checkPythonEnvironment() {
@@ -92,30 +78,8 @@ var KnowledgeGraphService = class {
92
78
  });
93
79
  });
94
80
  }
95
- async initializeJieba() {
96
- try {
97
- nodejieba.load();
98
- this.jiebaInitialized = true;
99
- this.logger.info("Nodejieba initialized successfully");
100
- } catch {
101
- const err = /* @__PURE__ */ new Error("Failed to initialize nodejieba");
102
- err.code = "NODEJIEBA_MISSING";
103
- throw err;
104
- }
105
- }
106
81
  async extractKeywords(text, topK = 20) {
107
82
  if (this.degradationMode === "disabled") return [];
108
- if (this.degradationMode === "full" && this.jiebaInitialized) try {
109
- const rawKeywords = nodejieba.extract(text, topK);
110
- return (Array.isArray(rawKeywords) ? rawKeywords : []).map((item) => extractKeywordText(item)).filter((keyword) => keyword !== null).filter((kw) => {
111
- if (kw.length <= 1) return false;
112
- if (/^\d+$/.test(kw)) return false;
113
- return true;
114
- }).slice(0, topK);
115
- } catch (error) {
116
- this.logger.warn(`Jieba extraction failed, falling back to basic: ${formatUnknownError(error)}`);
117
- this.degradationMode = "basic";
118
- }
119
83
  return text.split(/\s+/).filter((w) => w.length > 1).slice(0, topK);
120
84
  }
121
85
  resolveKbId(context) {
@@ -146,8 +110,6 @@ var KnowledgeGraphService = class {
146
110
  await fs.ensureDir(docDir);
147
111
  const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
148
112
  await fs.copy(sourceResultRoot, docDir, { overwrite: true });
149
- const keywordsPath = path.join(docDir, "keywords.json");
150
- await fs.writeJSON(keywordsPath, params.keywords, { spaces: 2 });
151
113
  const metadataPath = path.join(docDir, "metadata.json");
152
114
  await fs.writeJSON(metadataPath, params.metadata, { spaces: 2 });
153
115
  this.logger.info(`Document saved to knowledge base: kb=${params.kbId} doc=${params.docId}`);
@@ -176,7 +138,7 @@ var KnowledgeGraphService = class {
176
138
  const docPath = path.join(kbPath, doc);
177
139
  if ((await fs.stat(docPath)).isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
178
140
  }
179
- if (docDirs.length < 2) {
141
+ if (docDirs.length < 1) {
180
142
  this.logger.info(`Not enough documents for graph building (need >=2, have ${docDirs.length}), skipping`);
181
143
  return;
182
144
  }
@@ -203,7 +165,7 @@ var KnowledgeGraphService = class {
203
165
  const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
204
166
  let chunkStats = {};
205
167
  if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
206
- const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, this.jiebaInitialized, this.logger, kbId);
168
+ const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, false, this.logger, kbId);
207
169
  const graphFile = path.join(kbPath, "knowledge_graph.json");
208
170
  await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
209
171
  this.logger.info(`Knowledge graph saved to ${graphFile}`);
package/dist/tools.js CHANGED
@@ -7,6 +7,7 @@ import { KnowhereClient } from "./client.js";
7
7
  import { sendTrackerProgress } from "./tracker-progress.js";
8
8
  import fs from "node:fs/promises";
9
9
  import path from "node:path";
10
+ import os from "node:os";
10
11
  //#region src/tools.ts
11
12
  const TERMINAL_JOB_STATUSES = new Set([
12
13
  "cancelled",
@@ -784,6 +785,24 @@ function createImportCompletedJobTool(params) {
784
785
  downloadedResult: importResult.downloadedResult
785
786
  }, { overwrite });
786
787
  params.api.logger.info(`knowhere: knowhere_import_completed_job stored imported document scope=${scope.label} jobId=${importResult.jobResult.job_id} docId=${document.id}`);
788
+ try {
789
+ const importKbId = params.kgService.resolveKbId(params.ctx);
790
+ if (importKbId && params.kgService.isEnabled()) {
791
+ params.api.logger.info(`knowhere: triggering KG build after import kbId=${importKbId}`);
792
+ buildKnowledgeGraphAsync({
793
+ kgService: params.kgService,
794
+ kbId: importKbId,
795
+ docId: document.id,
796
+ documentPayload: { downloadedResult: importResult.downloadedResult },
797
+ scope,
798
+ store: params.store,
799
+ ctx: params.ctx,
800
+ api: params.api
801
+ }).catch((e) => params.api.logger.error(`knowhere: KG build after import failed: ${formatErrorMessage(e)}`));
802
+ }
803
+ } catch (kgError) {
804
+ params.api.logger.warn(`knowhere: import KG trigger error: ${formatErrorMessage(kgError)}`);
805
+ }
787
806
  return textResult([
788
807
  "Import complete.",
789
808
  ...buildStoredDocumentSummaryLines({
@@ -914,6 +933,380 @@ function createKgQueryTool(params) {
914
933
  }
915
934
  };
916
935
  }
936
+ const T2_KNOWHERE_HOME = path.join(os.homedir(), ".knowhere");
937
+ async function t2FindDocDir(kbDir, docName) {
938
+ const exactPath = path.join(kbDir, docName);
939
+ try {
940
+ await fs.access(exactPath);
941
+ return exactPath;
942
+ } catch {}
943
+ let entries;
944
+ try {
945
+ entries = await fs.readdir(kbDir, { withFileTypes: true });
946
+ } catch {
947
+ return null;
948
+ }
949
+ for (const e of entries) if (e.isDirectory() && String(e.name).includes(docName)) return path.join(kbDir, String(e.name));
950
+ return null;
951
+ }
952
+ async function t2LoadChunks(docDir) {
953
+ for (const fname of ["chunks_slim.json", "chunks.json"]) try {
954
+ const raw = await fs.readFile(path.join(docDir, fname), "utf-8");
955
+ const data = JSON.parse(raw);
956
+ let chunks;
957
+ if (Array.isArray(data)) chunks = data;
958
+ else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
959
+ else continue;
960
+ if (fname === "chunks.json") return chunks.map((c) => ({
961
+ type: c.type || "text",
962
+ path: c.path || "",
963
+ content: c.content || "",
964
+ summary: c.metadata?.summary || c.summary || ""
965
+ }));
966
+ return chunks;
967
+ } catch {
968
+ continue;
969
+ }
970
+ return [];
971
+ }
972
+ async function t2LoadRawChunks(docDir) {
973
+ try {
974
+ const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
975
+ const data = JSON.parse(raw);
976
+ if (Array.isArray(data)) return data;
977
+ if (isRecord(data) && Array.isArray(data.chunks)) return data.chunks;
978
+ return [];
979
+ } catch {
980
+ return [];
981
+ }
982
+ }
983
+ function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
984
+ const df = {};
985
+ const tf = {};
986
+ const totalDocs = rawChunks.length || 1;
987
+ for (const c of rawChunks) {
988
+ const tokens = Array.isArray(c.metadata?.tokens) ? c.metadata.tokens : [];
989
+ const keywords = Array.isArray(c.metadata?.keywords) ? c.metadata.keywords : [];
990
+ const allTerms = [...tokens, ...keywords];
991
+ const seen = /* @__PURE__ */ new Set();
992
+ for (const t of allTerms) {
993
+ if (!t || t.length <= 1) continue;
994
+ if (/^\d+[.,%]*$/.test(t)) continue;
995
+ const lower = t.toLowerCase();
996
+ tf[lower] = (tf[lower] || 0) + 1;
997
+ if (!seen.has(lower)) {
998
+ df[lower] = (df[lower] || 0) + 1;
999
+ seen.add(lower);
1000
+ }
1001
+ }
1002
+ }
1003
+ const scored = Object.entries(tf).map(([term, freq]) => {
1004
+ return {
1005
+ term,
1006
+ score: freq * (Math.log(totalDocs / (df[term] || 1)) + 1)
1007
+ };
1008
+ });
1009
+ scored.sort((a, b) => b.score - a.score);
1010
+ return scored.slice(0, topK).map((s) => s.term);
1011
+ }
1012
+ function t2KeywordsNeedRepair(keywords) {
1013
+ if (!Array.isArray(keywords) || keywords.length === 0) return true;
1014
+ let bad = 0;
1015
+ for (const kw of keywords) if (!kw || typeof kw === "string" && (kw.length <= 1 || /^\d+[.,%]*$/.test(kw) || /^[a-z]{1,2}$/i.test(kw))) bad++;
1016
+ return bad >= keywords.length * .5;
1017
+ }
1018
+ function t2JsonResult(data) {
1019
+ return {
1020
+ content: [{
1021
+ type: "text",
1022
+ text: JSON.stringify(data, null, 2)
1023
+ }],
1024
+ details: {}
1025
+ };
1026
+ }
1027
+ async function t2ListDocDirs(kbRoot) {
1028
+ let entries;
1029
+ try {
1030
+ entries = await fs.readdir(kbRoot, { withFileTypes: true });
1031
+ } catch {
1032
+ return [];
1033
+ }
1034
+ const docs = [];
1035
+ for (const e of entries) {
1036
+ if (!e.isDirectory()) continue;
1037
+ try {
1038
+ await fs.access(path.join(kbRoot, String(e.name), "chunks.json"));
1039
+ docs.push(String(e.name));
1040
+ } catch {
1041
+ continue;
1042
+ }
1043
+ }
1044
+ return docs;
1045
+ }
1046
+ function createGetMapTool(_params) {
1047
+ return {
1048
+ name: "knowhere_get_map",
1049
+ label: "Knowhere Get Map",
1050
+ description: "获取知识库全局概览。查询知识时必须先调此工具,了解有哪些文档、关键词、重要性和跨文件关联。然后用 knowhere_get_structure 查看具体文档的章节目录。",
1051
+ parameters: {
1052
+ type: "object",
1053
+ additionalProperties: false,
1054
+ properties: { kbId: {
1055
+ type: "string",
1056
+ description: "Optional: specific KB ID. Leave empty to scan all."
1057
+ } }
1058
+ },
1059
+ execute: async (_toolCallId, rawParams) => {
1060
+ const kbId = readString((isRecord(rawParams) ? rawParams : {}).kbId) || "";
1061
+ try {
1062
+ await fs.access(T2_KNOWHERE_HOME);
1063
+ } catch {
1064
+ return textResult(`未找到知识库目录 ${T2_KNOWHERE_HOME}`);
1065
+ }
1066
+ const entries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
1067
+ const kbs = [];
1068
+ for (const e of entries) {
1069
+ if (!e.isDirectory()) continue;
1070
+ if (kbId && e.name !== kbId) continue;
1071
+ const kbRoot = path.join(T2_KNOWHERE_HOME, e.name);
1072
+ const kgPath = path.join(kbRoot, "knowledge_graph.json");
1073
+ try {
1074
+ const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
1075
+ let kgDirty = false;
1076
+ const files = g.files || {};
1077
+ for (const [docName, info] of Object.entries(files)) if (t2KeywordsNeedRepair(info.top_keywords)) {
1078
+ const rawChunks = await t2LoadRawChunks(path.join(kbRoot, docName));
1079
+ if (rawChunks.length > 0) {
1080
+ const repaired = t2ComputeTfIdfKeywords(rawChunks);
1081
+ if (repaired.length > 0) {
1082
+ info.top_keywords = repaired;
1083
+ const types = {};
1084
+ for (const c of rawChunks) {
1085
+ const t = c.type || "text";
1086
+ types[t] = (types[t] || 0) + 1;
1087
+ }
1088
+ info.types = types;
1089
+ info.chunks_count = rawChunks.length;
1090
+ kgDirty = true;
1091
+ }
1092
+ }
1093
+ }
1094
+ if (kgDirty) {
1095
+ g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
1096
+ try {
1097
+ await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
1098
+ } catch {}
1099
+ }
1100
+ kbs.push({
1101
+ kb_id: e.name,
1102
+ version: g.version || "1.0",
1103
+ updated_at: g.updated_at || "",
1104
+ stats: g.stats || {},
1105
+ files: g.files || {},
1106
+ edges: g.edges || []
1107
+ });
1108
+ } catch {
1109
+ const docs = await t2ListDocDirs(kbRoot);
1110
+ if (docs.length > 0) kbs.push({
1111
+ kb_id: e.name,
1112
+ version: "pending",
1113
+ files: Object.fromEntries(docs.map((d) => [d, {}])),
1114
+ edges: []
1115
+ });
1116
+ }
1117
+ }
1118
+ if (kbs.length === 0) return textResult("未找到知识库。");
1119
+ return t2JsonResult({
1120
+ status: "ok",
1121
+ knowledge_bases: kbs
1122
+ });
1123
+ }
1124
+ };
1125
+ }
1126
+ function createGetStructureTool(_params) {
1127
+ return {
1128
+ name: "knowhere_get_structure",
1129
+ label: "Knowhere Get Structure",
1130
+ description: "获取文档章节目录。先调 knowhere_get_map 确定 kbId 和文档名后,用此工具查看章节结构,然后用 knowhere_read_chunks 读取内容。",
1131
+ parameters: {
1132
+ type: "object",
1133
+ additionalProperties: false,
1134
+ properties: {
1135
+ kbId: {
1136
+ type: "string",
1137
+ description: "Knowledge base ID (from knowhere_get_map result)"
1138
+ },
1139
+ docName: {
1140
+ type: "string",
1141
+ description: "Document name (supports fuzzy match)"
1142
+ }
1143
+ },
1144
+ required: ["kbId", "docName"]
1145
+ },
1146
+ execute: async (_toolCallId, rawParams) => {
1147
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
1148
+ const kbId = readString(paramsRecord.kbId);
1149
+ const docName = readString(paramsRecord.docName);
1150
+ if (!kbId || !docName) throw new Error("kbId and docName are required.");
1151
+ const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
1152
+ if (!docDir) return textResult(`文档 '${docName}' 在 kb=${kbId} 中不存在`);
1153
+ try {
1154
+ const h = JSON.parse(await fs.readFile(path.join(docDir, "hierarchy.json"), "utf-8"));
1155
+ return t2JsonResult({
1156
+ status: "ok",
1157
+ kb_id: kbId,
1158
+ doc_name: path.basename(docDir),
1159
+ hierarchy: h
1160
+ });
1161
+ } catch {
1162
+ const chunks = await t2LoadChunks(docDir);
1163
+ const paths = [...new Set(chunks.map((c) => c.path).filter(Boolean))].sort();
1164
+ return t2JsonResult({
1165
+ status: "ok",
1166
+ kb_id: kbId,
1167
+ doc_name: path.basename(docDir),
1168
+ hierarchy: null,
1169
+ chunk_paths: paths,
1170
+ hint: "无 hierarchy.json,已返回 chunk 路径列表"
1171
+ });
1172
+ }
1173
+ }
1174
+ };
1175
+ }
1176
+ function createReadChunksTool(_params) {
1177
+ return {
1178
+ name: "knowhere_read_chunks",
1179
+ label: "Knowhere Read Chunks",
1180
+ description: "读取文档内容。先调 knowhere_get_structure 确定章节后,用此工具读取具体内容。可通过 sectionPath 过滤特定章节,减少 token 消耗。",
1181
+ parameters: {
1182
+ type: "object",
1183
+ additionalProperties: false,
1184
+ properties: {
1185
+ kbId: {
1186
+ type: "string",
1187
+ description: "Knowledge base ID"
1188
+ },
1189
+ docName: {
1190
+ type: "string",
1191
+ description: "Document name"
1192
+ },
1193
+ sectionPath: {
1194
+ type: "string",
1195
+ description: "Optional: section path prefix to filter (e.g. '一、工程概况')"
1196
+ },
1197
+ maxChunks: {
1198
+ type: "number",
1199
+ description: "Max chunks to return (default 50)"
1200
+ }
1201
+ },
1202
+ required: ["kbId", "docName"]
1203
+ },
1204
+ execute: async (_toolCallId, rawParams) => {
1205
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
1206
+ const kbId = readString(paramsRecord.kbId);
1207
+ const docName = readString(paramsRecord.docName);
1208
+ const sectionPath = readString(paramsRecord.sectionPath);
1209
+ const maxChunks = readNumber(paramsRecord.maxChunks, 50);
1210
+ if (!kbId || !docName) throw new Error("kbId and docName are required.");
1211
+ const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
1212
+ if (!docDir) return textResult(`文档 '${docName}' 不存在`);
1213
+ let chunks = await t2LoadChunks(docDir);
1214
+ if (sectionPath) chunks = chunks.filter((c) => c.path.includes(sectionPath));
1215
+ const total = chunks.length;
1216
+ const limit = maxChunks || 50;
1217
+ chunks = chunks.slice(0, limit);
1218
+ try {
1219
+ const kgPath = path.join(T2_KNOWHERE_HOME, kbId, "knowledge_graph.json");
1220
+ const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
1221
+ const dn = path.basename(docDir);
1222
+ if (g.files?.[dn]) {
1223
+ g.files[dn].hit_count = (g.files[dn].hit_count || 0) + 1;
1224
+ g.files[dn].last_hit = (/* @__PURE__ */ new Date()).toISOString();
1225
+ g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
1226
+ await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
1227
+ }
1228
+ } catch {}
1229
+ return t2JsonResult({
1230
+ status: "ok",
1231
+ kb_id: kbId,
1232
+ doc_name: path.basename(docDir),
1233
+ section_path: sectionPath || null,
1234
+ total_chunks: total,
1235
+ returned: chunks.length,
1236
+ truncated: total > limit,
1237
+ chunks
1238
+ });
1239
+ }
1240
+ };
1241
+ }
1242
+ function createDiscoverFilesTool(_params) {
1243
+ return {
1244
+ name: "knowhere_discover_files",
1245
+ label: "Knowhere Discover Files",
1246
+ description: "在所有知识库文档中搜索关键词,返回命中文件和次数。用于和 knowhere_get_map 做并集,避免遗漏相关文件。只返回文件名,不返回内容。",
1247
+ parameters: {
1248
+ type: "object",
1249
+ additionalProperties: false,
1250
+ properties: {
1251
+ query: {
1252
+ type: "string",
1253
+ description: "Search keywords"
1254
+ },
1255
+ kbId: {
1256
+ type: "string",
1257
+ description: "Optional: limit to specific KB"
1258
+ }
1259
+ },
1260
+ required: ["query"]
1261
+ },
1262
+ execute: async (_toolCallId, rawParams) => {
1263
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
1264
+ const query = readString(paramsRecord.query);
1265
+ const kbId = readString(paramsRecord.kbId);
1266
+ if (!query) throw new Error("query is required.");
1267
+ const terms = query.split(/[\s,;,;。!?、\-/]+/).filter((t) => t.length > 1);
1268
+ if (terms.length === 0) return textResult("查询词为空");
1269
+ try {
1270
+ await fs.access(T2_KNOWHERE_HOME);
1271
+ } catch {
1272
+ return textResult("未找到知识库。");
1273
+ }
1274
+ const results = [];
1275
+ const kbEntries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
1276
+ for (const kbE of kbEntries) {
1277
+ if (!kbE.isDirectory()) continue;
1278
+ if (kbId && kbE.name !== kbId) continue;
1279
+ let docEntries;
1280
+ try {
1281
+ docEntries = await fs.readdir(path.join(T2_KNOWHERE_HOME, String(kbE.name)), { withFileTypes: true });
1282
+ } catch {
1283
+ continue;
1284
+ }
1285
+ for (const docE of docEntries) {
1286
+ if (!docE.isDirectory()) continue;
1287
+ const chunks = await t2LoadChunks(path.join(T2_KNOWHERE_HOME, String(kbE.name), String(docE.name)));
1288
+ let hits = 0;
1289
+ for (const c of chunks) {
1290
+ const text = `${c.content} ${c.summary}`;
1291
+ for (const t of terms) if (text.includes(t)) hits++;
1292
+ }
1293
+ if (hits > 0) results.push({
1294
+ kb_id: String(kbE.name),
1295
+ doc_name: String(docE.name),
1296
+ hit_count: hits
1297
+ });
1298
+ }
1299
+ }
1300
+ results.sort((a, b) => b.hit_count - a.hit_count);
1301
+ return t2JsonResult({
1302
+ status: "ok",
1303
+ query,
1304
+ terms,
1305
+ discovered_files: results
1306
+ });
1307
+ }
1308
+ };
1309
+ }
917
1310
  function createKnowhereToolFactory(params) {
918
1311
  return (ctx) => [
919
1312
  createIngestTool({
@@ -939,6 +1332,7 @@ function createKnowhereToolFactory(params) {
939
1332
  api: params.api,
940
1333
  config: params.config,
941
1334
  store: params.store,
1335
+ kgService: params.kgService,
942
1336
  ctx
943
1337
  }),
944
1338
  createSetApiKeyTool({
@@ -954,7 +1348,11 @@ function createKnowhereToolFactory(params) {
954
1348
  api: params.api,
955
1349
  kgService: params.kgService,
956
1350
  ctx
957
- })
1351
+ }),
1352
+ createGetMapTool({ api: params.api }),
1353
+ createGetStructureTool({ api: params.api }),
1354
+ createReadChunksTool({ api: params.api }),
1355
+ createDiscoverFilesTool({ api: params.api })
958
1356
  ];
959
1357
  }
960
1358
  //#endregion
@@ -3,7 +3,7 @@
3
3
  "name": "Knowhere",
4
4
  "description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
5
5
  "skills": ["./skills"],
6
- "version": "0.2.4",
6
+ "version": "0.2.5",
7
7
  "uiHints": {
8
8
  "apiKey": {
9
9
  "label": "Knowhere API Key",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ontos-ai/knowhere-claw",
3
- "version": "0.2.4",
3
+ "version": "0.2.5",
4
4
  "description": "OpenClaw plugin for Knowhere-powered document ingestion and automatic grounding.",
5
5
  "files": [
6
6
  "dist/",
@@ -40,8 +40,7 @@
40
40
  "dependencies": {
41
41
  "@knowhere-ai/sdk": "^0.1.1",
42
42
  "fflate": "^0.8.2",
43
- "fs-extra": "^11.2.0",
44
- "nodejieba": "^2.6.0"
43
+ "fs-extra": "^11.2.0"
45
44
  },
46
45
  "devDependencies": {
47
46
  "@changesets/changelog-github": "^0.6.0",