@ontos-ai/knowhere-claw 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/client.js +1 -1
- package/dist/config.d.ts +8 -0
- package/dist/config.js +56 -8
- package/dist/connect-builder.d.ts +2 -0
- package/dist/connect-builder.js +9 -10
- package/dist/graph-builder.d.ts +4 -1
- package/dist/graph-builder.js +21 -34
- package/dist/index.js +3 -9
- package/dist/kg-service.d.ts +0 -2
- package/dist/kg-service.js +12 -45
- package/dist/parser.d.ts +4 -8
- package/dist/parser.js +25 -243
- package/dist/store.d.ts +4 -14
- package/dist/store.js +21 -106
- package/dist/text.js +1 -13
- package/dist/tools.js +413 -848
- package/dist/types.d.ts +1 -58
- package/openclaw.plugin.json +71 -1
- package/package.json +2 -3
- package/skills/knowhere_memory/SKILL.md +80 -98
- package/skills/knowhere/SKILL.md +0 -285
- /package/dist/__tests__/{read-result-file-tool.test.d.ts → storage-layout.test.d.ts} +0 -0
package/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Knowhere for OpenClaw
|
|
2
2
|
|
|
3
3
|
Knowhere is an OpenClaw plugin that parses documents and URLs with Knowhere,
|
|
4
|
-
stores
|
|
4
|
+
stores extracted Knowhere files in OpenClaw state, and gives agents a
|
|
5
5
|
browse-first toolset for grounded document work.
|
|
6
6
|
|
|
7
7
|
Quick mental model:
|
|
@@ -22,7 +22,7 @@ the machine running that Gateway, then restart that Gateway.
|
|
|
22
22
|
## What You Get
|
|
23
23
|
|
|
24
24
|
- Ingest local files or document URLs with Knowhere
|
|
25
|
-
- Store parsed
|
|
25
|
+
- Store parsed documents inside OpenClaw-managed state
|
|
26
26
|
- Preview document structure, search chunks, and inspect raw result files
|
|
27
27
|
- Reuse stored documents across `session`, `agent`, or `global` scope
|
|
28
28
|
- Ship bundled `knowhere` and `knowhere_memory` skills so agents prefer this
|
|
@@ -73,7 +73,7 @@ Config notes:
|
|
|
73
73
|
- `pollIntervalMs`, `pollTimeoutMs`, `requestTimeoutMs`, `uploadTimeoutMs`:
|
|
74
74
|
optional tuning for job polling, API calls, and large uploads.
|
|
75
75
|
- An explicit `storageDir` such as
|
|
76
|
-
`/home/<user>/.openclaw/plugin-state/knowhere` makes stored
|
|
76
|
+
`/home/<user>/.openclaw/plugin-state/knowhere` makes stored documents
|
|
77
77
|
easier to inspect, back up, or clean up.
|
|
78
78
|
|
|
79
79
|
## How OpenClaw Uses It
|
|
@@ -110,8 +110,8 @@ actually call the plugin tools.
|
|
|
110
110
|
Within each scope, the plugin keeps:
|
|
111
111
|
|
|
112
112
|
- an `index.json` cache of stored document summaries
|
|
113
|
-
-
|
|
114
|
-
- the extracted Knowhere result
|
|
113
|
+
- a `metadata/` directory with one JSON record per stored document
|
|
114
|
+
- the extracted Knowhere result files directly inside each document directory
|
|
115
115
|
|
|
116
116
|
## Common Workflow
|
|
117
117
|
|
package/dist/client.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { isRecord } from "./types.js";
|
|
2
2
|
import { formatErrorMessage } from "./error-message.js";
|
|
3
|
+
import { openAsBlob } from "node:fs";
|
|
3
4
|
import path from "node:path";
|
|
4
5
|
import { createHash } from "node:crypto";
|
|
5
|
-
import { openAsBlob } from "node:fs";
|
|
6
6
|
import { Knowhere } from "@knowhere-ai/sdk";
|
|
7
7
|
//#region src/client.ts
|
|
8
8
|
const RETRYABLE_STATUS_CODES = new Set([
|
package/dist/config.d.ts
CHANGED
|
@@ -2,6 +2,14 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
|
|
|
2
2
|
import type { JsonSchemaObject, ResolvedKnowhereConfig, StringRecord, KnowledgeGraphConfig } from "./types";
|
|
3
3
|
export declare const DEFAULT_BASE_URL = "https://api.knowhereto.ai";
|
|
4
4
|
export declare const knowherePluginConfigSchema: JsonSchemaObject;
|
|
5
|
+
/**
|
|
6
|
+
* Return the effective plugin config object, merging the persisted
|
|
7
|
+
* resolved-config when the live pluginConfig is missing explicit fields
|
|
8
|
+
* (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
|
|
9
|
+
* `resolveKnowledgeGraphConfig` should read from this merged result so
|
|
10
|
+
* that subprocess instances inherit the gateway's full configuration.
|
|
11
|
+
*/
|
|
12
|
+
export declare function resolveEffectivePluginConfig(api: OpenClawPluginApi): StringRecord;
|
|
5
13
|
export declare function resolveKnowhereConfig(api: OpenClawPluginApi): ResolvedKnowhereConfig;
|
|
6
14
|
export declare const API_KEY_URL = "https://knowhereto.ai/api-keys";
|
|
7
15
|
export declare const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
|
package/dist/config.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { isRecord } from "./types.js";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
2
3
|
import fs from "node:fs/promises";
|
|
3
4
|
import path from "node:path";
|
|
4
5
|
//#region src/config.ts
|
|
@@ -147,36 +148,83 @@ function readScopeMode(raw) {
|
|
|
147
148
|
if (value === "session" || value === "agent" || value === "global") return value;
|
|
148
149
|
return KNOWHERE_PLUGIN_DEFAULTS.scopeMode;
|
|
149
150
|
}
|
|
151
|
+
const RESOLVED_CONFIG_STATE_FILE = "resolved-config.json";
|
|
152
|
+
function readPersistedResolvedConfigSync(stateDir) {
|
|
153
|
+
const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
|
|
154
|
+
try {
|
|
155
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
156
|
+
const parsed = JSON.parse(raw);
|
|
157
|
+
if (isRecord(parsed)) return parsed;
|
|
158
|
+
return null;
|
|
159
|
+
} catch {
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
async function persistResolvedConfig(stateDir, config) {
|
|
164
|
+
await fs.mkdir(stateDir, { recursive: true });
|
|
165
|
+
const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
|
|
166
|
+
await fs.writeFile(filePath, JSON.stringify(config, null, 2), "utf-8");
|
|
167
|
+
}
|
|
168
|
+
function hasExplicitPluginConfig(raw) {
|
|
169
|
+
return Boolean(readString(raw, "scopeMode") || readString(raw, "storageDir"));
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Return the effective plugin config object, merging the persisted
|
|
173
|
+
* resolved-config when the live pluginConfig is missing explicit fields
|
|
174
|
+
* (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
|
|
175
|
+
* `resolveKnowledgeGraphConfig` should read from this merged result so
|
|
176
|
+
* that subprocess instances inherit the gateway's full configuration.
|
|
177
|
+
*/
|
|
178
|
+
function resolveEffectivePluginConfig(api) {
|
|
179
|
+
const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
|
|
180
|
+
const stateDir = api.runtime.state.resolveStateDir();
|
|
181
|
+
if (!hasExplicitPluginConfig(raw)) {
|
|
182
|
+
const persisted = readPersistedResolvedConfigSync(stateDir);
|
|
183
|
+
if (persisted) return {
|
|
184
|
+
...persisted,
|
|
185
|
+
...raw
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
return raw;
|
|
189
|
+
}
|
|
150
190
|
function resolveKnowhereConfig(api) {
|
|
151
191
|
const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
|
|
152
192
|
const stateDir = api.runtime.state.resolveStateDir();
|
|
153
|
-
const
|
|
154
|
-
|
|
193
|
+
const hasExplicit = hasExplicitPluginConfig(raw);
|
|
194
|
+
const effective = resolveEffectivePluginConfig(api);
|
|
195
|
+
const storageDirRaw = readString(effective, "storageDir");
|
|
196
|
+
const config = {
|
|
155
197
|
apiKey: readString(raw, "apiKey") || process.env.KNOWHERE_API_KEY || "",
|
|
156
198
|
baseUrl: readString(raw, "baseUrl") || process.env.KNOWHERE_BASE_URL || "https://api.knowhereto.ai",
|
|
157
199
|
storageDir: storageDirRaw ? api.resolvePath(storageDirRaw) : path.join(stateDir, "plugins", api.id),
|
|
158
|
-
scopeMode: readScopeMode(
|
|
159
|
-
pollIntervalMs: readNumber(
|
|
200
|
+
scopeMode: readScopeMode(effective),
|
|
201
|
+
pollIntervalMs: readNumber(effective, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
|
|
160
202
|
min: 1e3,
|
|
161
203
|
max: 6e4,
|
|
162
204
|
integer: true
|
|
163
205
|
}),
|
|
164
|
-
pollTimeoutMs: readNumber(
|
|
206
|
+
pollTimeoutMs: readNumber(effective, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
|
|
165
207
|
min: 1e4,
|
|
166
208
|
max: 72e5,
|
|
167
209
|
integer: true
|
|
168
210
|
}),
|
|
169
|
-
requestTimeoutMs: readNumber(
|
|
211
|
+
requestTimeoutMs: readNumber(effective, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
|
|
170
212
|
min: 1e3,
|
|
171
213
|
max: 3e5,
|
|
172
214
|
integer: true
|
|
173
215
|
}),
|
|
174
|
-
uploadTimeoutMs: readNumber(
|
|
216
|
+
uploadTimeoutMs: readNumber(effective, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
|
|
175
217
|
min: 1e3,
|
|
176
218
|
max: 72e5,
|
|
177
219
|
integer: true
|
|
178
220
|
})
|
|
179
221
|
};
|
|
222
|
+
if (hasExplicit) persistResolvedConfig(stateDir, {
|
|
223
|
+
scopeMode: config.scopeMode,
|
|
224
|
+
storageDir: config.storageDir,
|
|
225
|
+
knowledgeGraph: raw.knowledgeGraph
|
|
226
|
+
}).catch(() => void 0);
|
|
227
|
+
return config;
|
|
180
228
|
}
|
|
181
229
|
const API_KEY_URL = "https://knowhereto.ai/api-keys";
|
|
182
230
|
const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
|
|
@@ -257,4 +305,4 @@ function resolveKnowledgeGraphConfig(raw) {
|
|
|
257
305
|
};
|
|
258
306
|
}
|
|
259
307
|
//#endregion
|
|
260
|
-
export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
|
|
308
|
+
export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
|
package/dist/connect-builder.js
CHANGED
|
@@ -8,15 +8,14 @@ function normalizeKeyword(keyword) {
|
|
|
8
8
|
return keyword.toLowerCase().trim();
|
|
9
9
|
}
|
|
10
10
|
/**
|
|
11
|
-
* Extract file key from a chunk
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* Example: "Default_Root/report.docx/Chapter 1" -> "report.docx"
|
|
11
|
+
* Extract file key from a chunk.
|
|
12
|
+
* Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
|
|
15
13
|
*/
|
|
16
|
-
function
|
|
17
|
-
|
|
14
|
+
function getFileKey(chunk) {
|
|
15
|
+
if (chunk.fileKey) return chunk.fileKey;
|
|
16
|
+
const parts = chunk.path.replace(/-->/g, "/").split("/");
|
|
18
17
|
if (parts.length >= 2) return parts[1];
|
|
19
|
-
return path;
|
|
18
|
+
return chunk.path;
|
|
20
19
|
}
|
|
21
20
|
/**
|
|
22
21
|
* Build inverted keyword index: keyword -> [chunk_ids]
|
|
@@ -97,7 +96,7 @@ function buildConnections(chunks, config, logger) {
|
|
|
97
96
|
for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
|
|
98
97
|
const chunksByFile = /* @__PURE__ */ new Map();
|
|
99
98
|
for (const chunk of chunks) {
|
|
100
|
-
const fileKey =
|
|
99
|
+
const fileKey = getFileKey(chunk);
|
|
101
100
|
if (!chunksByFile.has(fileKey)) chunksByFile.set(fileKey, /* @__PURE__ */ new Set());
|
|
102
101
|
chunksByFile.get(fileKey).add(chunk.chunk_id);
|
|
103
102
|
}
|
|
@@ -120,14 +119,14 @@ function buildConnections(chunks, config, logger) {
|
|
|
120
119
|
const sourceChunk = chunkById.get(sourceId);
|
|
121
120
|
if (!sourceChunk) continue;
|
|
122
121
|
const sourceKeywords = getKeywords(sourceChunk);
|
|
123
|
-
const sourceFileKey =
|
|
122
|
+
const sourceFileKey = getFileKey(sourceChunk);
|
|
124
123
|
for (const targetId of targetIds) {
|
|
125
124
|
const targetChunk = chunkById.get(targetId);
|
|
126
125
|
if (!targetChunk) continue;
|
|
127
126
|
const pairKey = sourceId < targetId ? `${sourceId}::${targetId}` : `${targetId}::${sourceId}`;
|
|
128
127
|
if (seenPairs.has(pairKey)) continue;
|
|
129
128
|
seenPairs.add(pairKey);
|
|
130
|
-
const targetFileKey =
|
|
129
|
+
const targetFileKey = getFileKey(targetChunk);
|
|
131
130
|
if (config.crossFileOnly && sourceFileKey === targetFileKey) continue;
|
|
132
131
|
const contentRatio = sequenceMatcherRatio(sourceChunk.content.slice(0, 500), targetChunk.content.slice(0, 500));
|
|
133
132
|
if (contentRatio >= config.maxContentOverlap) {
|
package/dist/graph-builder.d.ts
CHANGED
|
@@ -32,12 +32,15 @@ interface FileMetadata {
|
|
|
32
32
|
top_keywords: string[];
|
|
33
33
|
top_summary: string;
|
|
34
34
|
importance: number;
|
|
35
|
+
created_at: string;
|
|
35
36
|
}
|
|
36
37
|
/**
|
|
37
38
|
* Complete knowledge graph structure
|
|
38
39
|
*/
|
|
39
40
|
export interface KnowledgeGraph {
|
|
40
41
|
version: string;
|
|
42
|
+
updated_at: string;
|
|
43
|
+
kb_id: string;
|
|
41
44
|
stats: {
|
|
42
45
|
total_files: number;
|
|
43
46
|
total_chunks: number;
|
|
@@ -59,7 +62,7 @@ export interface ChunkStats {
|
|
|
59
62
|
* Main function to build knowledge graph
|
|
60
63
|
* Equivalent to Python: build_knowledge_graph
|
|
61
64
|
*/
|
|
62
|
-
export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats,
|
|
65
|
+
export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, _jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
|
|
63
66
|
/**
|
|
64
67
|
* Incremental update: match new chunks against existing chunks
|
|
65
68
|
* Equivalent to Python: _incremental_connections
|
package/dist/graph-builder.js
CHANGED
|
@@ -1,18 +1,13 @@
|
|
|
1
|
-
import * as nodejieba from "nodejieba";
|
|
2
1
|
//#region src/graph-builder.ts
|
|
3
2
|
/**
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* TypeScript implementation of knowhere-api/apps/worker/app/services/connect_builder/graph_builder.py
|
|
7
|
-
* Builds file-level knowledge graphs from chunk connections with TF-IDF and importance scoring.
|
|
8
|
-
*/
|
|
9
|
-
/**
|
|
10
|
-
* Extract file key from chunk path
|
|
3
|
+
* Extract file key from a chunk.
|
|
4
|
+
* Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
|
|
11
5
|
*/
|
|
12
|
-
function
|
|
13
|
-
|
|
6
|
+
function getFileKey(chunk) {
|
|
7
|
+
if (chunk.fileKey) return chunk.fileKey;
|
|
8
|
+
const parts = chunk.path.replace(/-->/g, "/").split("/");
|
|
14
9
|
if (parts.length >= 2) return parts[1];
|
|
15
|
-
return path;
|
|
10
|
+
return chunk.path;
|
|
16
11
|
}
|
|
17
12
|
/**
|
|
18
13
|
* Extract label from chunk path (last segment)
|
|
@@ -25,29 +20,18 @@ function extractLabel(path) {
|
|
|
25
20
|
* Extract tokens from text using jieba
|
|
26
21
|
* Equivalent to Python: _extract_tokens_from_content
|
|
27
22
|
*/
|
|
28
|
-
function extractTokensFromContent(content
|
|
29
|
-
|
|
30
|
-
if (!jiebaInitialized) return cleanContent.split(/\s+/).filter((w) => w.length > 1);
|
|
31
|
-
try {
|
|
32
|
-
return nodejieba.cut(cleanContent).filter((token) => {
|
|
33
|
-
if (token.length <= 1) return false;
|
|
34
|
-
if (/^\d+$/.test(token)) return false;
|
|
35
|
-
if (/^[^\w\u4e00-\u9fa5]+$/.test(token)) return false;
|
|
36
|
-
return true;
|
|
37
|
-
});
|
|
38
|
-
} catch {
|
|
39
|
-
return cleanContent.split(/\s+/).filter((w) => w.length > 1);
|
|
40
|
-
}
|
|
23
|
+
function extractTokensFromContent(content) {
|
|
24
|
+
return content.replace(/<[^>]*>/g, " ").split(/\s+/).filter((w) => w.length > 1);
|
|
41
25
|
}
|
|
42
26
|
/**
|
|
43
27
|
* Compute TF-IDF top keywords for a file
|
|
44
28
|
* Equivalent to Python: _compute_tfidf_top_keywords
|
|
45
29
|
*/
|
|
46
|
-
function computeTfidfTopKeywords(fileChunks, allChunks, topK
|
|
30
|
+
function computeTfidfTopKeywords(fileChunks, allChunks, topK) {
|
|
47
31
|
if (fileChunks.length === 0) return [];
|
|
48
32
|
const fileTokens = [];
|
|
49
33
|
for (const chunk of fileChunks) {
|
|
50
|
-
const tokens = extractTokensFromContent(chunk.content
|
|
34
|
+
const tokens = extractTokensFromContent(chunk.content);
|
|
51
35
|
fileTokens.push(...tokens);
|
|
52
36
|
}
|
|
53
37
|
if (fileTokens.length === 0) return [];
|
|
@@ -58,7 +42,7 @@ function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized)
|
|
|
58
42
|
}
|
|
59
43
|
const docFreq = /* @__PURE__ */ new Map();
|
|
60
44
|
for (const chunk of allChunks) {
|
|
61
|
-
const tokens = new Set(extractTokensFromContent(chunk.content
|
|
45
|
+
const tokens = new Set(extractTokensFromContent(chunk.content).map((t) => t.toLowerCase()));
|
|
62
46
|
for (const token of tokens) docFreq.set(token, (docFreq.get(token) || 0) + 1);
|
|
63
47
|
}
|
|
64
48
|
const totalDocs = allChunks.length;
|
|
@@ -107,7 +91,7 @@ function computeFileImportance(fileKey, fileChunks, allChunks, chunkStats, decay
|
|
|
107
91
|
function getAllChunkCountsByFile(chunks) {
|
|
108
92
|
const countsByFile = /* @__PURE__ */ new Map();
|
|
109
93
|
for (const chunk of chunks) {
|
|
110
|
-
const fileKey =
|
|
94
|
+
const fileKey = getFileKey(chunk);
|
|
111
95
|
countsByFile.set(fileKey, (countsByFile.get(fileKey) || 0) + 1);
|
|
112
96
|
}
|
|
113
97
|
return Array.from(countsByFile.values());
|
|
@@ -122,8 +106,8 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
|
|
|
122
106
|
const sourceChunk = chunkById.get(conn.source);
|
|
123
107
|
const targetChunk = chunkById.get(conn.target);
|
|
124
108
|
if (!sourceChunk || !targetChunk) continue;
|
|
125
|
-
const sourceFile =
|
|
126
|
-
const targetFile =
|
|
109
|
+
const sourceFile = getFileKey(sourceChunk);
|
|
110
|
+
const targetFile = getFileKey(targetChunk);
|
|
127
111
|
if (sourceFile === targetFile) continue;
|
|
128
112
|
const pairKey = sourceFile < targetFile ? `${sourceFile}::${targetFile}` : `${targetFile}::${sourceFile}`;
|
|
129
113
|
if (!filePairs.has(pairKey)) filePairs.set(pairKey, []);
|
|
@@ -159,13 +143,13 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
|
|
|
159
143
|
* Main function to build knowledge graph
|
|
160
144
|
* Equivalent to Python: build_knowledge_graph
|
|
161
145
|
*/
|
|
162
|
-
function buildKnowledgeGraph(chunks, connections, chunkStats,
|
|
146
|
+
function buildKnowledgeGraph(chunks, connections, chunkStats, _jiebaInitialized, logger, kbId) {
|
|
163
147
|
logger?.info(`Building knowledge graph from ${chunks.length} chunks and ${connections.length} connections`);
|
|
164
148
|
const chunkById = /* @__PURE__ */ new Map();
|
|
165
149
|
for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
|
|
166
150
|
const chunksByFile = /* @__PURE__ */ new Map();
|
|
167
151
|
for (const chunk of chunks) {
|
|
168
|
-
const fileKey =
|
|
152
|
+
const fileKey = getFileKey(chunk);
|
|
169
153
|
if (!chunksByFile.has(fileKey)) chunksByFile.set(fileKey, []);
|
|
170
154
|
chunksByFile.get(fileKey).push(chunk);
|
|
171
155
|
}
|
|
@@ -176,7 +160,7 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
|
|
|
176
160
|
const type = chunk.metadata.type || "text";
|
|
177
161
|
typeCount[type] = (typeCount[type] || 0) + 1;
|
|
178
162
|
}
|
|
179
|
-
const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10
|
|
163
|
+
const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10);
|
|
180
164
|
const importance = computeFileImportance(fileKey, fileChunks, chunks, chunkStats);
|
|
181
165
|
let topSummary = "";
|
|
182
166
|
for (const chunk of fileChunks) if (chunk.metadata.summary && typeof chunk.metadata.summary === "string") {
|
|
@@ -188,13 +172,16 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
|
|
|
188
172
|
types: typeCount,
|
|
189
173
|
top_keywords: topKeywords,
|
|
190
174
|
top_summary: topSummary,
|
|
191
|
-
importance
|
|
175
|
+
importance,
|
|
176
|
+
created_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
192
177
|
};
|
|
193
178
|
}
|
|
194
179
|
const fileEdges = aggregateFileLevelEdges(connections, chunkById, 5);
|
|
195
180
|
logger?.info(`Created graph with ${Object.keys(filesMetadata).length} files and ${fileEdges.length} edges`);
|
|
196
181
|
return {
|
|
197
182
|
version: "2.0",
|
|
183
|
+
updated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
184
|
+
kb_id: kbId || "",
|
|
198
185
|
stats: {
|
|
199
186
|
total_files: Object.keys(filesMetadata).length,
|
|
200
187
|
total_chunks: chunks.length,
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { knowherePluginConfigSchema, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
|
|
1
|
+
import { knowherePluginConfigSchema, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
|
|
2
2
|
import { KnowhereStore } from "./store.js";
|
|
3
3
|
import { createKnowhereToolFactory } from "./tools.js";
|
|
4
4
|
import { KnowledgeGraphService } from "./kg-service.js";
|
|
@@ -6,11 +6,11 @@ import { KnowledgeGraphService } from "./kg-service.js";
|
|
|
6
6
|
const plugin = {
|
|
7
7
|
id: "knowhere-claw",
|
|
8
8
|
name: "Knowhere",
|
|
9
|
-
description: "Knowhere document ingestion and
|
|
9
|
+
description: "Knowhere document ingestion, job management, and knowledge graph tools for OpenClaw.",
|
|
10
10
|
configSchema: knowherePluginConfigSchema,
|
|
11
11
|
register(api) {
|
|
12
12
|
const config = resolveKnowhereConfig(api);
|
|
13
|
-
const kgConfig = resolveKnowledgeGraphConfig(api
|
|
13
|
+
const kgConfig = resolveKnowledgeGraphConfig(resolveEffectivePluginConfig(api));
|
|
14
14
|
const store = new KnowhereStore({
|
|
15
15
|
rootDir: config.storageDir,
|
|
16
16
|
scopeMode: config.scopeMode,
|
|
@@ -41,12 +41,6 @@ const plugin = {
|
|
|
41
41
|
"knowhere_list_jobs",
|
|
42
42
|
"knowhere_get_job_status",
|
|
43
43
|
"knowhere_import_completed_job",
|
|
44
|
-
"knowhere_grep",
|
|
45
|
-
"knowhere_read_result_file",
|
|
46
|
-
"knowhere_preview_document",
|
|
47
|
-
"knowhere_list_documents",
|
|
48
|
-
"knowhere_remove_document",
|
|
49
|
-
"knowhere_clear_scope",
|
|
50
44
|
"knowhere_set_api_key",
|
|
51
45
|
"knowhere_kg_list",
|
|
52
46
|
"knowhere_kg_query"
|
package/dist/kg-service.d.ts
CHANGED
|
@@ -9,11 +9,9 @@ export declare class KnowledgeGraphService {
|
|
|
9
9
|
private readonly logger;
|
|
10
10
|
private degradationMode;
|
|
11
11
|
private buildQueues;
|
|
12
|
-
private jiebaInitialized;
|
|
13
12
|
constructor(params: KnowledgeGraphServiceParams);
|
|
14
13
|
initialize(): Promise<void>;
|
|
15
14
|
private checkPythonEnvironment;
|
|
16
|
-
private initializeJieba;
|
|
17
15
|
extractKeywords(text: string, topK?: number): Promise<string[]>;
|
|
18
16
|
resolveKbId(context: ToolRuntimeContext): string | null;
|
|
19
17
|
getKbPath(kbId: string): string;
|
package/dist/kg-service.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
+
import { resolveStoredKnowhereResultRoot } from "./parser.js";
|
|
1
2
|
import { buildConnections, init_connect_builder } from "./connect-builder.js";
|
|
2
3
|
import { buildKnowledgeGraph } from "./graph-builder.js";
|
|
3
4
|
import path from "node:path";
|
|
5
|
+
import os from "node:os";
|
|
4
6
|
import { spawn } from "node:child_process";
|
|
5
7
|
import fs from "fs-extra";
|
|
6
|
-
import os from "node:os";
|
|
7
|
-
import * as nodejieba from "nodejieba";
|
|
8
8
|
//#region src/kg-service.ts
|
|
9
9
|
init_connect_builder();
|
|
10
10
|
const DEFAULT_CONNECT_CONFIG = {
|
|
@@ -25,18 +25,11 @@ const DEFAULT_KG_CONFIG = {
|
|
|
25
25
|
function formatUnknownError(error) {
|
|
26
26
|
return error instanceof Error ? error.message : String(error);
|
|
27
27
|
}
|
|
28
|
-
function extractKeywordText(item) {
|
|
29
|
-
if (typeof item === "string") return item;
|
|
30
|
-
if (typeof item === "number" || typeof item === "boolean" || typeof item === "bigint") return String(item);
|
|
31
|
-
if (typeof item === "object" && item !== null && "word" in item && typeof item.word === "string") return item.word;
|
|
32
|
-
return null;
|
|
33
|
-
}
|
|
34
28
|
var KnowledgeGraphService = class {
|
|
35
29
|
config;
|
|
36
30
|
logger;
|
|
37
31
|
degradationMode = "full";
|
|
38
32
|
buildQueues = /* @__PURE__ */ new Map();
|
|
39
|
-
jiebaInitialized = false;
|
|
40
33
|
constructor(params) {
|
|
41
34
|
this.config = {
|
|
42
35
|
...DEFAULT_KG_CONFIG,
|
|
@@ -57,18 +50,12 @@ var KnowledgeGraphService = class {
|
|
|
57
50
|
}
|
|
58
51
|
try {
|
|
59
52
|
await this.checkPythonEnvironment();
|
|
60
|
-
await this.initializeJieba();
|
|
61
53
|
this.degradationMode = "full";
|
|
62
54
|
this.logger.info("Knowledge graph service initialized in full mode");
|
|
63
55
|
} catch (error) {
|
|
64
56
|
this.logger.warn(`Knowledge graph initialization failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
this.logger.warn("Python not found, knowledge graph disabled");
|
|
68
|
-
} else if (error.code === "NODEJIEBA_MISSING") {
|
|
69
|
-
this.degradationMode = "basic";
|
|
70
|
-
this.logger.warn("Nodejieba missing, using basic tokenization");
|
|
71
|
-
} else this.degradationMode = "disabled";
|
|
57
|
+
this.degradationMode = "disabled";
|
|
58
|
+
this.logger.warn("Python not found, knowledge graph disabled");
|
|
72
59
|
}
|
|
73
60
|
}
|
|
74
61
|
async checkPythonEnvironment() {
|
|
@@ -91,30 +78,8 @@ var KnowledgeGraphService = class {
|
|
|
91
78
|
});
|
|
92
79
|
});
|
|
93
80
|
}
|
|
94
|
-
async initializeJieba() {
|
|
95
|
-
try {
|
|
96
|
-
nodejieba.load();
|
|
97
|
-
this.jiebaInitialized = true;
|
|
98
|
-
this.logger.info("Nodejieba initialized successfully");
|
|
99
|
-
} catch {
|
|
100
|
-
const err = /* @__PURE__ */ new Error("Failed to initialize nodejieba");
|
|
101
|
-
err.code = "NODEJIEBA_MISSING";
|
|
102
|
-
throw err;
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
81
|
async extractKeywords(text, topK = 20) {
|
|
106
82
|
if (this.degradationMode === "disabled") return [];
|
|
107
|
-
if (this.degradationMode === "full" && this.jiebaInitialized) try {
|
|
108
|
-
const rawKeywords = nodejieba.extract(text, topK);
|
|
109
|
-
return (Array.isArray(rawKeywords) ? rawKeywords : []).map((item) => extractKeywordText(item)).filter((keyword) => keyword !== null).filter((kw) => {
|
|
110
|
-
if (kw.length <= 1) return false;
|
|
111
|
-
if (/^\d+$/.test(kw)) return false;
|
|
112
|
-
return true;
|
|
113
|
-
}).slice(0, topK);
|
|
114
|
-
} catch (error) {
|
|
115
|
-
this.logger.warn(`Jieba extraction failed, falling back to basic: ${formatUnknownError(error)}`);
|
|
116
|
-
this.degradationMode = "basic";
|
|
117
|
-
}
|
|
118
83
|
return text.split(/\s+/).filter((w) => w.length > 1).slice(0, topK);
|
|
119
84
|
}
|
|
120
85
|
resolveKbId(context) {
|
|
@@ -143,9 +108,8 @@ var KnowledgeGraphService = class {
|
|
|
143
108
|
const kbPath = await this.ensureKbDirectory(params.kbId);
|
|
144
109
|
const docDir = path.join(kbPath, params.docId);
|
|
145
110
|
await fs.ensureDir(docDir);
|
|
146
|
-
await
|
|
147
|
-
|
|
148
|
-
await fs.writeJSON(keywordsPath, params.keywords, { spaces: 2 });
|
|
111
|
+
const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
|
|
112
|
+
await fs.copy(sourceResultRoot, docDir, { overwrite: true });
|
|
149
113
|
const metadataPath = path.join(docDir, "metadata.json");
|
|
150
114
|
await fs.writeJSON(metadataPath, params.metadata, { spaces: 2 });
|
|
151
115
|
this.logger.info(`Document saved to knowledge base: kb=${params.kbId} doc=${params.docId}`);
|
|
@@ -174,7 +138,7 @@ var KnowledgeGraphService = class {
|
|
|
174
138
|
const docPath = path.join(kbPath, doc);
|
|
175
139
|
if ((await fs.stat(docPath)).isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
|
|
176
140
|
}
|
|
177
|
-
if (docDirs.length <
|
|
141
|
+
if (docDirs.length < 1) {
|
|
178
142
|
this.logger.info(`Not enough documents for graph building (need >=2, have ${docDirs.length}), skipping`);
|
|
179
143
|
return;
|
|
180
144
|
}
|
|
@@ -185,7 +149,10 @@ var KnowledgeGraphService = class {
|
|
|
185
149
|
const chunksPath = path.join(kbPath, docDir, "chunks.json");
|
|
186
150
|
if (await fs.pathExists(chunksPath)) {
|
|
187
151
|
const chunksData = await fs.readJSON(chunksPath);
|
|
188
|
-
if (chunksData.chunks && Array.isArray(chunksData.chunks)) allChunks.push(...chunksData.chunks)
|
|
152
|
+
if (chunksData.chunks && Array.isArray(chunksData.chunks)) allChunks.push(...chunksData.chunks.map((c) => ({
|
|
153
|
+
...c,
|
|
154
|
+
fileKey: docDir
|
|
155
|
+
})));
|
|
189
156
|
}
|
|
190
157
|
}
|
|
191
158
|
if (allChunks.length === 0) {
|
|
@@ -198,7 +165,7 @@ var KnowledgeGraphService = class {
|
|
|
198
165
|
const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
|
|
199
166
|
let chunkStats = {};
|
|
200
167
|
if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
|
|
201
|
-
const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats,
|
|
168
|
+
const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, false, this.logger, kbId);
|
|
202
169
|
const graphFile = path.join(kbPath, "knowledge_graph.json");
|
|
203
170
|
await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
|
|
204
171
|
this.logger.info(`Knowledge graph saved to ${graphFile}`);
|
package/dist/parser.d.ts
CHANGED
|
@@ -1,16 +1,12 @@
|
|
|
1
|
-
import type { KnowhereDownloadedResult, KnowhereManifest,
|
|
1
|
+
import type { KnowhereDownloadedResult, KnowhereManifest, KnowhereStatistics } from "./types";
|
|
2
2
|
type KnowhereStoredResultSummary = {
|
|
3
3
|
manifest: KnowhereManifest;
|
|
4
4
|
chunkCount: number;
|
|
5
5
|
statistics: KnowhereStatistics;
|
|
6
6
|
};
|
|
7
|
-
export declare const STORED_BROWSE_INDEX_VERSION = 2;
|
|
8
7
|
export declare function resolveResultEntryPath(rootDir: string, entryPath: string): string;
|
|
9
|
-
export declare function buildStoredPathPrefixes(storedPath: string): string[];
|
|
10
|
-
export declare function isStoredBrowseIndex(value: unknown): value is StoredBrowseIndex;
|
|
11
|
-
export declare function buildStoredBrowseIndex(resultDir: string, manifest: KnowhereManifest, chunks: StoredChunk[]): Promise<StoredBrowseIndex>;
|
|
12
8
|
export declare function extractKnowhereResultArchive(downloadedResult: KnowhereDownloadedResult, targetDir: string): Promise<void>;
|
|
13
|
-
export declare function
|
|
14
|
-
export declare function
|
|
15
|
-
export declare function
|
|
9
|
+
export declare function resolveStoredKnowhereResultRoot(documentDir: string): Promise<string>;
|
|
10
|
+
export declare function resolveStoredKnowhereArtifactPath(documentDir: string, entryPath: string): Promise<string>;
|
|
11
|
+
export declare function readStoredKnowhereResultSummary(documentDir: string): Promise<KnowhereStoredResultSummary>;
|
|
16
12
|
export {};
|