@ontos-ai/knowhere-claw 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Knowhere for OpenClaw
2
2
 
3
3
  Knowhere is an OpenClaw plugin that parses documents and URLs with Knowhere,
4
- stores the extracted result package in OpenClaw state, and gives agents a
4
+ stores extracted Knowhere files in OpenClaw state, and gives agents a
5
5
  browse-first toolset for grounded document work.
6
6
 
7
7
  Quick mental model:
@@ -22,7 +22,7 @@ the machine running that Gateway, then restart that Gateway.
22
22
  ## What You Get
23
23
 
24
24
  - Ingest local files or document URLs with Knowhere
25
- - Store parsed result packages inside OpenClaw-managed state
25
+ - Store parsed documents inside OpenClaw-managed state
26
26
  - Preview document structure, search chunks, and inspect raw result files
27
27
  - Reuse stored documents across `session`, `agent`, or `global` scope
28
28
  - Ship bundled `knowhere` and `knowhere_memory` skills so agents prefer this
@@ -73,7 +73,7 @@ Config notes:
73
73
  - `pollIntervalMs`, `pollTimeoutMs`, `requestTimeoutMs`, `uploadTimeoutMs`:
74
74
  optional tuning for job polling, API calls, and large uploads.
75
75
  - An explicit `storageDir` such as
76
- `/home/<user>/.openclaw/plugin-state/knowhere` makes stored result packages
76
+ `/home/<user>/.openclaw/plugin-state/knowhere` makes stored documents
77
77
  easier to inspect, back up, or clean up.
78
78
 
79
79
  ## How OpenClaw Uses It
@@ -110,8 +110,8 @@ actually call the plugin tools.
110
110
  Within each scope, the plugin keeps:
111
111
 
112
112
  - an `index.json` cache of stored document summaries
113
- - per-document metadata and browse indexes
114
- - the extracted Knowhere result package under `result/`
113
+ - a `metadata/` directory with one JSON record per stored document
114
+ - the extracted Knowhere result files directly inside each document directory
115
115
 
116
116
  ## Common Workflow
117
117
 
package/dist/client.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import { isRecord } from "./types.js";
2
2
  import { formatErrorMessage } from "./error-message.js";
3
+ import { openAsBlob } from "node:fs";
3
4
  import path from "node:path";
4
5
  import { createHash } from "node:crypto";
5
- import { openAsBlob } from "node:fs";
6
6
  import { Knowhere } from "@knowhere-ai/sdk";
7
7
  //#region src/client.ts
8
8
  const RETRYABLE_STATUS_CODES = new Set([
package/dist/config.d.ts CHANGED
@@ -2,6 +2,14 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
2
2
  import type { JsonSchemaObject, ResolvedKnowhereConfig, StringRecord, KnowledgeGraphConfig } from "./types";
3
3
  export declare const DEFAULT_BASE_URL = "https://api.knowhereto.ai";
4
4
  export declare const knowherePluginConfigSchema: JsonSchemaObject;
5
+ /**
6
+ * Return the effective plugin config object, merging the persisted
7
+ * resolved-config when the live pluginConfig is missing explicit fields
8
+ * (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
9
+ * `resolveKnowledgeGraphConfig` should read from this merged result so
10
+ * that subprocess instances inherit the gateway's full configuration.
11
+ */
12
+ export declare function resolveEffectivePluginConfig(api: OpenClawPluginApi): StringRecord;
5
13
  export declare function resolveKnowhereConfig(api: OpenClawPluginApi): ResolvedKnowhereConfig;
6
14
  export declare const API_KEY_URL = "https://knowhereto.ai/api-keys";
7
15
  export declare const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
package/dist/config.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { isRecord } from "./types.js";
2
+ import { readFileSync } from "node:fs";
2
3
  import fs from "node:fs/promises";
3
4
  import path from "node:path";
4
5
  //#region src/config.ts
@@ -147,36 +148,83 @@ function readScopeMode(raw) {
147
148
  if (value === "session" || value === "agent" || value === "global") return value;
148
149
  return KNOWHERE_PLUGIN_DEFAULTS.scopeMode;
149
150
  }
151
+ const RESOLVED_CONFIG_STATE_FILE = "resolved-config.json";
152
+ function readPersistedResolvedConfigSync(stateDir) {
153
+ const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
154
+ try {
155
+ const raw = readFileSync(filePath, "utf-8");
156
+ const parsed = JSON.parse(raw);
157
+ if (isRecord(parsed)) return parsed;
158
+ return null;
159
+ } catch {
160
+ return null;
161
+ }
162
+ }
163
+ async function persistResolvedConfig(stateDir, config) {
164
+ await fs.mkdir(stateDir, { recursive: true });
165
+ const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
166
+ await fs.writeFile(filePath, JSON.stringify(config, null, 2), "utf-8");
167
+ }
168
+ function hasExplicitPluginConfig(raw) {
169
+ return Boolean(readString(raw, "scopeMode") || readString(raw, "storageDir"));
170
+ }
171
+ /**
172
+ * Return the effective plugin config object, merging the persisted
173
+ * resolved-config when the live pluginConfig is missing explicit fields
174
+ * (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
175
+ * `resolveKnowledgeGraphConfig` should read from this merged result so
176
+ * that subprocess instances inherit the gateway's full configuration.
177
+ */
178
+ function resolveEffectivePluginConfig(api) {
179
+ const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
180
+ const stateDir = api.runtime.state.resolveStateDir();
181
+ if (!hasExplicitPluginConfig(raw)) {
182
+ const persisted = readPersistedResolvedConfigSync(stateDir);
183
+ if (persisted) return {
184
+ ...persisted,
185
+ ...raw
186
+ };
187
+ }
188
+ return raw;
189
+ }
150
190
  function resolveKnowhereConfig(api) {
151
191
  const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
152
192
  const stateDir = api.runtime.state.resolveStateDir();
153
- const storageDirRaw = readString(raw, "storageDir");
154
- return {
193
+ const hasExplicit = hasExplicitPluginConfig(raw);
194
+ const effective = resolveEffectivePluginConfig(api);
195
+ const storageDirRaw = readString(effective, "storageDir");
196
+ const config = {
155
197
  apiKey: readString(raw, "apiKey") || process.env.KNOWHERE_API_KEY || "",
156
198
  baseUrl: readString(raw, "baseUrl") || process.env.KNOWHERE_BASE_URL || "https://api.knowhereto.ai",
157
199
  storageDir: storageDirRaw ? api.resolvePath(storageDirRaw) : path.join(stateDir, "plugins", api.id),
158
- scopeMode: readScopeMode(raw),
159
- pollIntervalMs: readNumber(raw, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
200
+ scopeMode: readScopeMode(effective),
201
+ pollIntervalMs: readNumber(effective, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
160
202
  min: 1e3,
161
203
  max: 6e4,
162
204
  integer: true
163
205
  }),
164
- pollTimeoutMs: readNumber(raw, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
206
+ pollTimeoutMs: readNumber(effective, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
165
207
  min: 1e4,
166
208
  max: 72e5,
167
209
  integer: true
168
210
  }),
169
- requestTimeoutMs: readNumber(raw, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
211
+ requestTimeoutMs: readNumber(effective, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
170
212
  min: 1e3,
171
213
  max: 3e5,
172
214
  integer: true
173
215
  }),
174
- uploadTimeoutMs: readNumber(raw, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
216
+ uploadTimeoutMs: readNumber(effective, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
175
217
  min: 1e3,
176
218
  max: 72e5,
177
219
  integer: true
178
220
  })
179
221
  };
222
+ if (hasExplicit) persistResolvedConfig(stateDir, {
223
+ scopeMode: config.scopeMode,
224
+ storageDir: config.storageDir,
225
+ knowledgeGraph: raw.knowledgeGraph
226
+ }).catch(() => void 0);
227
+ return config;
180
228
  }
181
229
  const API_KEY_URL = "https://knowhereto.ai/api-keys";
182
230
  const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
@@ -257,4 +305,4 @@ function resolveKnowledgeGraphConfig(raw) {
257
305
  };
258
306
  }
259
307
  //#endregion
260
- export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
308
+ export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
@@ -12,6 +12,8 @@ export interface ChunkData {
12
12
  chunk_id: string;
13
13
  path: string;
14
14
  content: string;
15
+ /** Document directory name (e.g. "report.pdf"). Injected by kg-service at load time. */
16
+ fileKey: string;
15
17
  metadata: {
16
18
  keywords?: string[];
17
19
  tokens?: string[];
@@ -8,15 +8,14 @@ function normalizeKeyword(keyword) {
8
8
  return keyword.toLowerCase().trim();
9
9
  }
10
10
  /**
11
- * Extract file key from a chunk path
12
- * Equivalent to Python: _extract_file_key
13
- *
14
- * Example: "Default_Root/report.docx/Chapter 1" -> "report.docx"
11
+ * Extract file key from a chunk.
12
+ * Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
15
13
  */
16
- function extractFileKey(path) {
17
- const parts = path.split("/");
14
+ function getFileKey(chunk) {
15
+ if (chunk.fileKey) return chunk.fileKey;
16
+ const parts = chunk.path.replace(/-->/g, "/").split("/");
18
17
  if (parts.length >= 2) return parts[1];
19
- return path;
18
+ return chunk.path;
20
19
  }
21
20
  /**
22
21
  * Build inverted keyword index: keyword -> [chunk_ids]
@@ -97,7 +96,7 @@ function buildConnections(chunks, config, logger) {
97
96
  for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
98
97
  const chunksByFile = /* @__PURE__ */ new Map();
99
98
  for (const chunk of chunks) {
100
- const fileKey = extractFileKey(chunk.path);
99
+ const fileKey = getFileKey(chunk);
101
100
  if (!chunksByFile.has(fileKey)) chunksByFile.set(fileKey, /* @__PURE__ */ new Set());
102
101
  chunksByFile.get(fileKey).add(chunk.chunk_id);
103
102
  }
@@ -120,14 +119,14 @@ function buildConnections(chunks, config, logger) {
120
119
  const sourceChunk = chunkById.get(sourceId);
121
120
  if (!sourceChunk) continue;
122
121
  const sourceKeywords = getKeywords(sourceChunk);
123
- const sourceFileKey = extractFileKey(sourceChunk.path);
122
+ const sourceFileKey = getFileKey(sourceChunk);
124
123
  for (const targetId of targetIds) {
125
124
  const targetChunk = chunkById.get(targetId);
126
125
  if (!targetChunk) continue;
127
126
  const pairKey = sourceId < targetId ? `${sourceId}::${targetId}` : `${targetId}::${sourceId}`;
128
127
  if (seenPairs.has(pairKey)) continue;
129
128
  seenPairs.add(pairKey);
130
- const targetFileKey = extractFileKey(targetChunk.path);
129
+ const targetFileKey = getFileKey(targetChunk);
131
130
  if (config.crossFileOnly && sourceFileKey === targetFileKey) continue;
132
131
  const contentRatio = sequenceMatcherRatio(sourceChunk.content.slice(0, 500), targetChunk.content.slice(0, 500));
133
132
  if (contentRatio >= config.maxContentOverlap) {
@@ -32,12 +32,15 @@ interface FileMetadata {
32
32
  top_keywords: string[];
33
33
  top_summary: string;
34
34
  importance: number;
35
+ created_at: string;
35
36
  }
36
37
  /**
37
38
  * Complete knowledge graph structure
38
39
  */
39
40
  export interface KnowledgeGraph {
40
41
  version: string;
42
+ updated_at: string;
43
+ kb_id: string;
41
44
  stats: {
42
45
  total_files: number;
43
46
  total_chunks: number;
@@ -59,7 +62,7 @@ export interface ChunkStats {
59
62
  * Main function to build knowledge graph
60
63
  * Equivalent to Python: build_knowledge_graph
61
64
  */
62
- export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, jiebaInitialized: boolean, logger?: PluginLogger): KnowledgeGraph;
65
+ export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, _jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
63
66
  /**
64
67
  * Incremental update: match new chunks against existing chunks
65
68
  * Equivalent to Python: _incremental_connections
@@ -1,18 +1,13 @@
1
- import * as nodejieba from "nodejieba";
2
1
  //#region src/graph-builder.ts
3
2
  /**
4
- * Graph Builder Module
5
- *
6
- * TypeScript implementation of knowhere-api/apps/worker/app/services/connect_builder/graph_builder.py
7
- * Builds file-level knowledge graphs from chunk connections with TF-IDF and importance scoring.
8
- */
9
- /**
10
- * Extract file key from chunk path
3
+ * Extract file key from a chunk.
4
+ * Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
11
5
  */
12
- function extractFileKey(path) {
13
- const parts = path.split("/");
6
+ function getFileKey(chunk) {
7
+ if (chunk.fileKey) return chunk.fileKey;
8
+ const parts = chunk.path.replace(/-->/g, "/").split("/");
14
9
  if (parts.length >= 2) return parts[1];
15
- return path;
10
+ return chunk.path;
16
11
  }
17
12
  /**
18
13
  * Extract label from chunk path (last segment)
@@ -25,29 +20,18 @@ function extractLabel(path) {
25
20
  * Extract tokens from text using jieba
26
21
  * Equivalent to Python: _extract_tokens_from_content
27
22
  */
28
- function extractTokensFromContent(content, jiebaInitialized) {
29
- const cleanContent = content.replace(/<[^>]*>/g, " ");
30
- if (!jiebaInitialized) return cleanContent.split(/\s+/).filter((w) => w.length > 1);
31
- try {
32
- return nodejieba.cut(cleanContent).filter((token) => {
33
- if (token.length <= 1) return false;
34
- if (/^\d+$/.test(token)) return false;
35
- if (/^[^\w\u4e00-\u9fa5]+$/.test(token)) return false;
36
- return true;
37
- });
38
- } catch {
39
- return cleanContent.split(/\s+/).filter((w) => w.length > 1);
40
- }
23
+ function extractTokensFromContent(content) {
24
+ return content.replace(/<[^>]*>/g, " ").split(/\s+/).filter((w) => w.length > 1);
41
25
  }
42
26
  /**
43
27
  * Compute TF-IDF top keywords for a file
44
28
  * Equivalent to Python: _compute_tfidf_top_keywords
45
29
  */
46
- function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized) {
30
+ function computeTfidfTopKeywords(fileChunks, allChunks, topK) {
47
31
  if (fileChunks.length === 0) return [];
48
32
  const fileTokens = [];
49
33
  for (const chunk of fileChunks) {
50
- const tokens = extractTokensFromContent(chunk.content, jiebaInitialized);
34
+ const tokens = extractTokensFromContent(chunk.content);
51
35
  fileTokens.push(...tokens);
52
36
  }
53
37
  if (fileTokens.length === 0) return [];
@@ -58,7 +42,7 @@ function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized)
58
42
  }
59
43
  const docFreq = /* @__PURE__ */ new Map();
60
44
  for (const chunk of allChunks) {
61
- const tokens = new Set(extractTokensFromContent(chunk.content, jiebaInitialized).map((t) => t.toLowerCase()));
45
+ const tokens = new Set(extractTokensFromContent(chunk.content).map((t) => t.toLowerCase()));
62
46
  for (const token of tokens) docFreq.set(token, (docFreq.get(token) || 0) + 1);
63
47
  }
64
48
  const totalDocs = allChunks.length;
@@ -107,7 +91,7 @@ function computeFileImportance(fileKey, fileChunks, allChunks, chunkStats, decay
107
91
  function getAllChunkCountsByFile(chunks) {
108
92
  const countsByFile = /* @__PURE__ */ new Map();
109
93
  for (const chunk of chunks) {
110
- const fileKey = extractFileKey(chunk.path);
94
+ const fileKey = getFileKey(chunk);
111
95
  countsByFile.set(fileKey, (countsByFile.get(fileKey) || 0) + 1);
112
96
  }
113
97
  return Array.from(countsByFile.values());
@@ -122,8 +106,8 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
122
106
  const sourceChunk = chunkById.get(conn.source);
123
107
  const targetChunk = chunkById.get(conn.target);
124
108
  if (!sourceChunk || !targetChunk) continue;
125
- const sourceFile = extractFileKey(sourceChunk.path);
126
- const targetFile = extractFileKey(targetChunk.path);
109
+ const sourceFile = getFileKey(sourceChunk);
110
+ const targetFile = getFileKey(targetChunk);
127
111
  if (sourceFile === targetFile) continue;
128
112
  const pairKey = sourceFile < targetFile ? `${sourceFile}::${targetFile}` : `${targetFile}::${sourceFile}`;
129
113
  if (!filePairs.has(pairKey)) filePairs.set(pairKey, []);
@@ -159,13 +143,13 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
159
143
  * Main function to build knowledge graph
160
144
  * Equivalent to Python: build_knowledge_graph
161
145
  */
162
- function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized, logger) {
146
+ function buildKnowledgeGraph(chunks, connections, chunkStats, _jiebaInitialized, logger, kbId) {
163
147
  logger?.info(`Building knowledge graph from ${chunks.length} chunks and ${connections.length} connections`);
164
148
  const chunkById = /* @__PURE__ */ new Map();
165
149
  for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
166
150
  const chunksByFile = /* @__PURE__ */ new Map();
167
151
  for (const chunk of chunks) {
168
- const fileKey = extractFileKey(chunk.path);
152
+ const fileKey = getFileKey(chunk);
169
153
  if (!chunksByFile.has(fileKey)) chunksByFile.set(fileKey, []);
170
154
  chunksByFile.get(fileKey).push(chunk);
171
155
  }
@@ -176,7 +160,7 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
176
160
  const type = chunk.metadata.type || "text";
177
161
  typeCount[type] = (typeCount[type] || 0) + 1;
178
162
  }
179
- const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10, jiebaInitialized);
163
+ const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10);
180
164
  const importance = computeFileImportance(fileKey, fileChunks, chunks, chunkStats);
181
165
  let topSummary = "";
182
166
  for (const chunk of fileChunks) if (chunk.metadata.summary && typeof chunk.metadata.summary === "string") {
@@ -188,13 +172,16 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
188
172
  types: typeCount,
189
173
  top_keywords: topKeywords,
190
174
  top_summary: topSummary,
191
- importance
175
+ importance,
176
+ created_at: (/* @__PURE__ */ new Date()).toISOString()
192
177
  };
193
178
  }
194
179
  const fileEdges = aggregateFileLevelEdges(connections, chunkById, 5);
195
180
  logger?.info(`Created graph with ${Object.keys(filesMetadata).length} files and ${fileEdges.length} edges`);
196
181
  return {
197
182
  version: "2.0",
183
+ updated_at: (/* @__PURE__ */ new Date()).toISOString(),
184
+ kb_id: kbId || "",
198
185
  stats: {
199
186
  total_files: Object.keys(filesMetadata).length,
200
187
  total_chunks: chunks.length,
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { knowherePluginConfigSchema, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
1
+ import { knowherePluginConfigSchema, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
2
2
  import { KnowhereStore } from "./store.js";
3
3
  import { createKnowhereToolFactory } from "./tools.js";
4
4
  import { KnowledgeGraphService } from "./kg-service.js";
@@ -6,11 +6,11 @@ import { KnowledgeGraphService } from "./kg-service.js";
6
6
  const plugin = {
7
7
  id: "knowhere-claw",
8
8
  name: "Knowhere",
9
- description: "Knowhere document ingestion and direct stored-result access for OpenClaw.",
9
+ description: "Knowhere document ingestion, job management, and knowledge graph tools for OpenClaw.",
10
10
  configSchema: knowherePluginConfigSchema,
11
11
  register(api) {
12
12
  const config = resolveKnowhereConfig(api);
13
- const kgConfig = resolveKnowledgeGraphConfig(api.pluginConfig && typeof api.pluginConfig === "object" ? api.pluginConfig : {});
13
+ const kgConfig = resolveKnowledgeGraphConfig(resolveEffectivePluginConfig(api));
14
14
  const store = new KnowhereStore({
15
15
  rootDir: config.storageDir,
16
16
  scopeMode: config.scopeMode,
@@ -41,12 +41,6 @@ const plugin = {
41
41
  "knowhere_list_jobs",
42
42
  "knowhere_get_job_status",
43
43
  "knowhere_import_completed_job",
44
- "knowhere_grep",
45
- "knowhere_read_result_file",
46
- "knowhere_preview_document",
47
- "knowhere_list_documents",
48
- "knowhere_remove_document",
49
- "knowhere_clear_scope",
50
44
  "knowhere_set_api_key",
51
45
  "knowhere_kg_list",
52
46
  "knowhere_kg_query"
@@ -9,11 +9,9 @@ export declare class KnowledgeGraphService {
9
9
  private readonly logger;
10
10
  private degradationMode;
11
11
  private buildQueues;
12
- private jiebaInitialized;
13
12
  constructor(params: KnowledgeGraphServiceParams);
14
13
  initialize(): Promise<void>;
15
14
  private checkPythonEnvironment;
16
- private initializeJieba;
17
15
  extractKeywords(text: string, topK?: number): Promise<string[]>;
18
16
  resolveKbId(context: ToolRuntimeContext): string | null;
19
17
  getKbPath(kbId: string): string;
@@ -1,10 +1,10 @@
1
+ import { resolveStoredKnowhereResultRoot } from "./parser.js";
1
2
  import { buildConnections, init_connect_builder } from "./connect-builder.js";
2
3
  import { buildKnowledgeGraph } from "./graph-builder.js";
3
4
  import path from "node:path";
5
+ import os from "node:os";
4
6
  import { spawn } from "node:child_process";
5
7
  import fs from "fs-extra";
6
- import os from "node:os";
7
- import * as nodejieba from "nodejieba";
8
8
  //#region src/kg-service.ts
9
9
  init_connect_builder();
10
10
  const DEFAULT_CONNECT_CONFIG = {
@@ -25,18 +25,11 @@ const DEFAULT_KG_CONFIG = {
25
25
  function formatUnknownError(error) {
26
26
  return error instanceof Error ? error.message : String(error);
27
27
  }
28
- function extractKeywordText(item) {
29
- if (typeof item === "string") return item;
30
- if (typeof item === "number" || typeof item === "boolean" || typeof item === "bigint") return String(item);
31
- if (typeof item === "object" && item !== null && "word" in item && typeof item.word === "string") return item.word;
32
- return null;
33
- }
34
28
  var KnowledgeGraphService = class {
35
29
  config;
36
30
  logger;
37
31
  degradationMode = "full";
38
32
  buildQueues = /* @__PURE__ */ new Map();
39
- jiebaInitialized = false;
40
33
  constructor(params) {
41
34
  this.config = {
42
35
  ...DEFAULT_KG_CONFIG,
@@ -57,18 +50,12 @@ var KnowledgeGraphService = class {
57
50
  }
58
51
  try {
59
52
  await this.checkPythonEnvironment();
60
- await this.initializeJieba();
61
53
  this.degradationMode = "full";
62
54
  this.logger.info("Knowledge graph service initialized in full mode");
63
55
  } catch (error) {
64
56
  this.logger.warn(`Knowledge graph initialization failed: ${error instanceof Error ? error.message : String(error)}`);
65
- if (error.code === "PYTHON_MISSING") {
66
- this.degradationMode = "disabled";
67
- this.logger.warn("Python not found, knowledge graph disabled");
68
- } else if (error.code === "NODEJIEBA_MISSING") {
69
- this.degradationMode = "basic";
70
- this.logger.warn("Nodejieba missing, using basic tokenization");
71
- } else this.degradationMode = "disabled";
57
+ this.degradationMode = "disabled";
58
+ this.logger.warn("Python not found, knowledge graph disabled");
72
59
  }
73
60
  }
74
61
  async checkPythonEnvironment() {
@@ -91,30 +78,8 @@ var KnowledgeGraphService = class {
91
78
  });
92
79
  });
93
80
  }
94
- async initializeJieba() {
95
- try {
96
- nodejieba.load();
97
- this.jiebaInitialized = true;
98
- this.logger.info("Nodejieba initialized successfully");
99
- } catch {
100
- const err = /* @__PURE__ */ new Error("Failed to initialize nodejieba");
101
- err.code = "NODEJIEBA_MISSING";
102
- throw err;
103
- }
104
- }
105
81
  async extractKeywords(text, topK = 20) {
106
82
  if (this.degradationMode === "disabled") return [];
107
- if (this.degradationMode === "full" && this.jiebaInitialized) try {
108
- const rawKeywords = nodejieba.extract(text, topK);
109
- return (Array.isArray(rawKeywords) ? rawKeywords : []).map((item) => extractKeywordText(item)).filter((keyword) => keyword !== null).filter((kw) => {
110
- if (kw.length <= 1) return false;
111
- if (/^\d+$/.test(kw)) return false;
112
- return true;
113
- }).slice(0, topK);
114
- } catch (error) {
115
- this.logger.warn(`Jieba extraction failed, falling back to basic: ${formatUnknownError(error)}`);
116
- this.degradationMode = "basic";
117
- }
118
83
  return text.split(/\s+/).filter((w) => w.length > 1).slice(0, topK);
119
84
  }
120
85
  resolveKbId(context) {
@@ -143,9 +108,8 @@ var KnowledgeGraphService = class {
143
108
  const kbPath = await this.ensureKbDirectory(params.kbId);
144
109
  const docDir = path.join(kbPath, params.docId);
145
110
  await fs.ensureDir(docDir);
146
- await fs.copy(params.sourcePath, docDir, { overwrite: true });
147
- const keywordsPath = path.join(docDir, "keywords.json");
148
- await fs.writeJSON(keywordsPath, params.keywords, { spaces: 2 });
111
+ const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
112
+ await fs.copy(sourceResultRoot, docDir, { overwrite: true });
149
113
  const metadataPath = path.join(docDir, "metadata.json");
150
114
  await fs.writeJSON(metadataPath, params.metadata, { spaces: 2 });
151
115
  this.logger.info(`Document saved to knowledge base: kb=${params.kbId} doc=${params.docId}`);
@@ -174,7 +138,7 @@ var KnowledgeGraphService = class {
174
138
  const docPath = path.join(kbPath, doc);
175
139
  if ((await fs.stat(docPath)).isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
176
140
  }
177
- if (docDirs.length < 2) {
141
+ if (docDirs.length < 1) {
178
142
  this.logger.info(`Not enough documents for graph building (need >=2, have ${docDirs.length}), skipping`);
179
143
  return;
180
144
  }
@@ -185,7 +149,10 @@ var KnowledgeGraphService = class {
185
149
  const chunksPath = path.join(kbPath, docDir, "chunks.json");
186
150
  if (await fs.pathExists(chunksPath)) {
187
151
  const chunksData = await fs.readJSON(chunksPath);
188
- if (chunksData.chunks && Array.isArray(chunksData.chunks)) allChunks.push(...chunksData.chunks);
152
+ if (chunksData.chunks && Array.isArray(chunksData.chunks)) allChunks.push(...chunksData.chunks.map((c) => ({
153
+ ...c,
154
+ fileKey: docDir
155
+ })));
189
156
  }
190
157
  }
191
158
  if (allChunks.length === 0) {
@@ -198,7 +165,7 @@ var KnowledgeGraphService = class {
198
165
  const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
199
166
  let chunkStats = {};
200
167
  if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
201
- const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, this.jiebaInitialized, this.logger);
168
+ const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, false, this.logger, kbId);
202
169
  const graphFile = path.join(kbPath, "knowledge_graph.json");
203
170
  await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
204
171
  this.logger.info(`Knowledge graph saved to ${graphFile}`);
package/dist/parser.d.ts CHANGED
@@ -1,16 +1,12 @@
1
- import type { KnowhereDownloadedResult, KnowhereManifest, KnowhereParseResult, KnowhereStatistics, StoredBrowseIndex, StoredChunk } from "./types";
1
+ import type { KnowhereDownloadedResult, KnowhereManifest, KnowhereStatistics } from "./types";
2
2
  type KnowhereStoredResultSummary = {
3
3
  manifest: KnowhereManifest;
4
4
  chunkCount: number;
5
5
  statistics: KnowhereStatistics;
6
6
  };
7
- export declare const STORED_BROWSE_INDEX_VERSION = 2;
8
7
  export declare function resolveResultEntryPath(rootDir: string, entryPath: string): string;
9
- export declare function buildStoredPathPrefixes(storedPath: string): string[];
10
- export declare function isStoredBrowseIndex(value: unknown): value is StoredBrowseIndex;
11
- export declare function buildStoredBrowseIndex(resultDir: string, manifest: KnowhereManifest, chunks: StoredChunk[]): Promise<StoredBrowseIndex>;
12
8
  export declare function extractKnowhereResultArchive(downloadedResult: KnowhereDownloadedResult, targetDir: string): Promise<void>;
13
- export declare function readStoredKnowhereResultSummary(resultDir: string): Promise<KnowhereStoredResultSummary>;
14
- export declare function readStoredKnowhereResultContent(resultDir: string): Promise<Omit<KnowhereParseResult, "browseIndex">>;
15
- export declare function readStoredKnowhereParseResult(resultDir: string): Promise<KnowhereParseResult>;
9
+ export declare function resolveStoredKnowhereResultRoot(documentDir: string): Promise<string>;
10
+ export declare function resolveStoredKnowhereArtifactPath(documentDir: string, entryPath: string): Promise<string>;
11
+ export declare function readStoredKnowhereResultSummary(documentDir: string): Promise<KnowhereStoredResultSummary>;
16
12
  export {};