@ontos-ai/knowhere-claw 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-hooks.d.ts +3 -0
- package/dist/agent-hooks.js +46 -0
- package/dist/client.js +1 -1
- package/dist/config.d.ts +8 -0
- package/dist/config.js +56 -8
- package/dist/graph-builder.d.ts +1 -1
- package/dist/graph-builder.js +7 -25
- package/dist/index.js +10 -3
- package/dist/kg-service.d.ts +0 -2
- package/dist/kg-service.js +5 -43
- package/dist/tools.js +699 -1
- package/openclaw.plugin.json +11 -1
- package/package.json +2 -3
- package/skills/knowhere_memory/SKILL.md +31 -13
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { isRecord } from "./types.js";
|
|
2
|
+
//#region src/agent-hooks.ts
|
|
3
|
+
const KNOWHERE_PROMPT_CONTEXT = [
|
|
4
|
+
"## 🔧 Knowhere Plugin Guidance",
|
|
5
|
+
"",
|
|
6
|
+
"### File Ingestion",
|
|
7
|
+
"When the user sends a file or attachment, or asks to parse/ingest a document,",
|
|
8
|
+
"**always** use `knowhere_ingest_document` to process it.",
|
|
9
|
+
"- If a `[media attached: ...]` marker is present, the file is already on disk — use the `filePath` parameter.",
|
|
10
|
+
"- If the file is in the cloud (e.g. Feishu Drive), first obtain the download URL via the appropriate channel tool, then use the `url` parameter.",
|
|
11
|
+
"- Refer to your **knowhere_memory** skill for the complete step-by-step workflow.",
|
|
12
|
+
"",
|
|
13
|
+
"### Knowledge Retrieval",
|
|
14
|
+
"When answering questions about documents or the knowledge base:",
|
|
15
|
+
"- ✅ Use `knowhere_get_map`, `knowhere_get_structure`, `knowhere_read_chunks`, `knowhere_kg_query`",
|
|
16
|
+
"- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`"
|
|
17
|
+
].join("\n");
|
|
18
|
+
const KNOWHERE_DIR_PATTERN = ".knowhere";
|
|
19
|
+
const BLOCK_REASON = "Do not use exec to read .knowhere/ directly. Use knowhere retrieval tools instead: knowhere_get_map, knowhere_get_structure, knowhere_read_chunks, knowhere_kg_query.";
|
|
20
|
+
function resolveAgentHooksConfig(raw) {
|
|
21
|
+
const section = isRecord(raw.agentHooks) ? raw.agentHooks : {};
|
|
22
|
+
return { enabled: typeof section.enabled === "boolean" ? section.enabled : true };
|
|
23
|
+
}
|
|
24
|
+
function registerAgentHooks(api, rawConfig) {
|
|
25
|
+
if (!resolveAgentHooksConfig(rawConfig).enabled) {
|
|
26
|
+
api.logger.info("knowhere: agent hooks disabled via config");
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
api.on("before_prompt_build", async () => {
|
|
30
|
+
return { prependContext: KNOWHERE_PROMPT_CONTEXT };
|
|
31
|
+
});
|
|
32
|
+
api.on("before_tool_call", async (event) => {
|
|
33
|
+
if (event.toolName !== "exec") return;
|
|
34
|
+
const params = isRecord(event.params) ? event.params : {};
|
|
35
|
+
if ((typeof params.command === "string" ? params.command : typeof params.cmd === "string" ? params.cmd : "").includes(KNOWHERE_DIR_PATTERN)) {
|
|
36
|
+
api.logger.info(`knowhere: blocked exec touching ${KNOWHERE_DIR_PATTERN}`);
|
|
37
|
+
return {
|
|
38
|
+
block: true,
|
|
39
|
+
blockReason: BLOCK_REASON
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
api.logger.info("knowhere: agent hooks registered (prompt context + tool governance)");
|
|
44
|
+
}
|
|
45
|
+
//#endregion
|
|
46
|
+
export { registerAgentHooks };
|
package/dist/client.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { isRecord } from "./types.js";
|
|
2
2
|
import { formatErrorMessage } from "./error-message.js";
|
|
3
|
+
import { openAsBlob } from "node:fs";
|
|
3
4
|
import path from "node:path";
|
|
4
5
|
import { createHash } from "node:crypto";
|
|
5
|
-
import { openAsBlob } from "node:fs";
|
|
6
6
|
import { Knowhere } from "@knowhere-ai/sdk";
|
|
7
7
|
//#region src/client.ts
|
|
8
8
|
const RETRYABLE_STATUS_CODES = new Set([
|
package/dist/config.d.ts
CHANGED
|
@@ -2,6 +2,14 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
|
|
|
2
2
|
import type { JsonSchemaObject, ResolvedKnowhereConfig, StringRecord, KnowledgeGraphConfig } from "./types";
|
|
3
3
|
export declare const DEFAULT_BASE_URL = "https://api.knowhereto.ai";
|
|
4
4
|
export declare const knowherePluginConfigSchema: JsonSchemaObject;
|
|
5
|
+
/**
|
|
6
|
+
* Return the effective plugin config object, merging the persisted
|
|
7
|
+
* resolved-config when the live pluginConfig is missing explicit fields
|
|
8
|
+
* (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
|
|
9
|
+
* `resolveKnowledgeGraphConfig` should read from this merged result so
|
|
10
|
+
* that subprocess instances inherit the gateway's full configuration.
|
|
11
|
+
*/
|
|
12
|
+
export declare function resolveEffectivePluginConfig(api: OpenClawPluginApi): StringRecord;
|
|
5
13
|
export declare function resolveKnowhereConfig(api: OpenClawPluginApi): ResolvedKnowhereConfig;
|
|
6
14
|
export declare const API_KEY_URL = "https://knowhereto.ai/api-keys";
|
|
7
15
|
export declare const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
|
package/dist/config.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { isRecord } from "./types.js";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
2
3
|
import fs from "node:fs/promises";
|
|
3
4
|
import path from "node:path";
|
|
4
5
|
//#region src/config.ts
|
|
@@ -147,36 +148,83 @@ function readScopeMode(raw) {
|
|
|
147
148
|
if (value === "session" || value === "agent" || value === "global") return value;
|
|
148
149
|
return KNOWHERE_PLUGIN_DEFAULTS.scopeMode;
|
|
149
150
|
}
|
|
151
|
+
const RESOLVED_CONFIG_STATE_FILE = "resolved-config.json";
|
|
152
|
+
function readPersistedResolvedConfigSync(stateDir) {
|
|
153
|
+
const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
|
|
154
|
+
try {
|
|
155
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
156
|
+
const parsed = JSON.parse(raw);
|
|
157
|
+
if (isRecord(parsed)) return parsed;
|
|
158
|
+
return null;
|
|
159
|
+
} catch {
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
async function persistResolvedConfig(stateDir, config) {
|
|
164
|
+
await fs.mkdir(stateDir, { recursive: true });
|
|
165
|
+
const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
|
|
166
|
+
await fs.writeFile(filePath, JSON.stringify(config, null, 2), "utf-8");
|
|
167
|
+
}
|
|
168
|
+
function hasExplicitPluginConfig(raw) {
|
|
169
|
+
return Boolean(readString(raw, "scopeMode") || readString(raw, "storageDir"));
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Return the effective plugin config object, merging the persisted
|
|
173
|
+
* resolved-config when the live pluginConfig is missing explicit fields
|
|
174
|
+
* (i.e. in agent subprocesses). Both `resolveKnowhereConfig` and
|
|
175
|
+
* `resolveKnowledgeGraphConfig` should read from this merged result so
|
|
176
|
+
* that subprocess instances inherit the gateway's full configuration.
|
|
177
|
+
*/
|
|
178
|
+
function resolveEffectivePluginConfig(api) {
|
|
179
|
+
const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
|
|
180
|
+
const stateDir = api.runtime.state.resolveStateDir();
|
|
181
|
+
if (!hasExplicitPluginConfig(raw)) {
|
|
182
|
+
const persisted = readPersistedResolvedConfigSync(stateDir);
|
|
183
|
+
if (persisted) return {
|
|
184
|
+
...persisted,
|
|
185
|
+
...raw
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
return raw;
|
|
189
|
+
}
|
|
150
190
|
function resolveKnowhereConfig(api) {
|
|
151
191
|
const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
|
|
152
192
|
const stateDir = api.runtime.state.resolveStateDir();
|
|
153
|
-
const
|
|
154
|
-
|
|
193
|
+
const hasExplicit = hasExplicitPluginConfig(raw);
|
|
194
|
+
const effective = resolveEffectivePluginConfig(api);
|
|
195
|
+
const storageDirRaw = readString(effective, "storageDir");
|
|
196
|
+
const config = {
|
|
155
197
|
apiKey: readString(raw, "apiKey") || process.env.KNOWHERE_API_KEY || "",
|
|
156
198
|
baseUrl: readString(raw, "baseUrl") || process.env.KNOWHERE_BASE_URL || "https://api.knowhereto.ai",
|
|
157
199
|
storageDir: storageDirRaw ? api.resolvePath(storageDirRaw) : path.join(stateDir, "plugins", api.id),
|
|
158
|
-
scopeMode: readScopeMode(
|
|
159
|
-
pollIntervalMs: readNumber(
|
|
200
|
+
scopeMode: readScopeMode(effective),
|
|
201
|
+
pollIntervalMs: readNumber(effective, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
|
|
160
202
|
min: 1e3,
|
|
161
203
|
max: 6e4,
|
|
162
204
|
integer: true
|
|
163
205
|
}),
|
|
164
|
-
pollTimeoutMs: readNumber(
|
|
206
|
+
pollTimeoutMs: readNumber(effective, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
|
|
165
207
|
min: 1e4,
|
|
166
208
|
max: 72e5,
|
|
167
209
|
integer: true
|
|
168
210
|
}),
|
|
169
|
-
requestTimeoutMs: readNumber(
|
|
211
|
+
requestTimeoutMs: readNumber(effective, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
|
|
170
212
|
min: 1e3,
|
|
171
213
|
max: 3e5,
|
|
172
214
|
integer: true
|
|
173
215
|
}),
|
|
174
|
-
uploadTimeoutMs: readNumber(
|
|
216
|
+
uploadTimeoutMs: readNumber(effective, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
|
|
175
217
|
min: 1e3,
|
|
176
218
|
max: 72e5,
|
|
177
219
|
integer: true
|
|
178
220
|
})
|
|
179
221
|
};
|
|
222
|
+
if (hasExplicit) persistResolvedConfig(stateDir, {
|
|
223
|
+
scopeMode: config.scopeMode,
|
|
224
|
+
storageDir: config.storageDir,
|
|
225
|
+
knowledgeGraph: raw.knowledgeGraph
|
|
226
|
+
}).catch(() => void 0);
|
|
227
|
+
return config;
|
|
180
228
|
}
|
|
181
229
|
const API_KEY_URL = "https://knowhereto.ai/api-keys";
|
|
182
230
|
const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
|
|
@@ -257,4 +305,4 @@ function resolveKnowledgeGraphConfig(raw) {
|
|
|
257
305
|
};
|
|
258
306
|
}
|
|
259
307
|
//#endregion
|
|
260
|
-
export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
|
|
308
|
+
export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
|
package/dist/graph-builder.d.ts
CHANGED
|
@@ -62,7 +62,7 @@ export interface ChunkStats {
|
|
|
62
62
|
* Main function to build knowledge graph
|
|
63
63
|
* Equivalent to Python: build_knowledge_graph
|
|
64
64
|
*/
|
|
65
|
-
export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats,
|
|
65
|
+
export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, _jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
|
|
66
66
|
/**
|
|
67
67
|
* Incremental update: match new chunks against existing chunks
|
|
68
68
|
* Equivalent to Python: _incremental_connections
|
package/dist/graph-builder.js
CHANGED
|
@@ -1,12 +1,5 @@
|
|
|
1
|
-
import * as nodejieba from "nodejieba";
|
|
2
1
|
//#region src/graph-builder.ts
|
|
3
2
|
/**
|
|
4
|
-
* Graph Builder Module
|
|
5
|
-
*
|
|
6
|
-
* TypeScript implementation of knowhere-api/apps/worker/app/services/connect_builder/graph_builder.py
|
|
7
|
-
* Builds file-level knowledge graphs from chunk connections with TF-IDF and importance scoring.
|
|
8
|
-
*/
|
|
9
|
-
/**
|
|
10
3
|
* Extract file key from a chunk.
|
|
11
4
|
* Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
|
|
12
5
|
*/
|
|
@@ -27,29 +20,18 @@ function extractLabel(path) {
|
|
|
27
20
|
* Extract tokens from text using jieba
|
|
28
21
|
* Equivalent to Python: _extract_tokens_from_content
|
|
29
22
|
*/
|
|
30
|
-
function extractTokensFromContent(content
|
|
31
|
-
|
|
32
|
-
if (!jiebaInitialized) return cleanContent.split(/\s+/).filter((w) => w.length > 1);
|
|
33
|
-
try {
|
|
34
|
-
return nodejieba.cut(cleanContent).filter((token) => {
|
|
35
|
-
if (token.length <= 1) return false;
|
|
36
|
-
if (/^\d+$/.test(token)) return false;
|
|
37
|
-
if (/^[^\w\u4e00-\u9fa5]+$/.test(token)) return false;
|
|
38
|
-
return true;
|
|
39
|
-
});
|
|
40
|
-
} catch {
|
|
41
|
-
return cleanContent.split(/\s+/).filter((w) => w.length > 1);
|
|
42
|
-
}
|
|
23
|
+
function extractTokensFromContent(content) {
|
|
24
|
+
return content.replace(/<[^>]*>/g, " ").split(/\s+/).filter((w) => w.length > 1);
|
|
43
25
|
}
|
|
44
26
|
/**
|
|
45
27
|
* Compute TF-IDF top keywords for a file
|
|
46
28
|
* Equivalent to Python: _compute_tfidf_top_keywords
|
|
47
29
|
*/
|
|
48
|
-
function computeTfidfTopKeywords(fileChunks, allChunks, topK
|
|
30
|
+
function computeTfidfTopKeywords(fileChunks, allChunks, topK) {
|
|
49
31
|
if (fileChunks.length === 0) return [];
|
|
50
32
|
const fileTokens = [];
|
|
51
33
|
for (const chunk of fileChunks) {
|
|
52
|
-
const tokens = extractTokensFromContent(chunk.content
|
|
34
|
+
const tokens = extractTokensFromContent(chunk.content);
|
|
53
35
|
fileTokens.push(...tokens);
|
|
54
36
|
}
|
|
55
37
|
if (fileTokens.length === 0) return [];
|
|
@@ -60,7 +42,7 @@ function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized)
|
|
|
60
42
|
}
|
|
61
43
|
const docFreq = /* @__PURE__ */ new Map();
|
|
62
44
|
for (const chunk of allChunks) {
|
|
63
|
-
const tokens = new Set(extractTokensFromContent(chunk.content
|
|
45
|
+
const tokens = new Set(extractTokensFromContent(chunk.content).map((t) => t.toLowerCase()));
|
|
64
46
|
for (const token of tokens) docFreq.set(token, (docFreq.get(token) || 0) + 1);
|
|
65
47
|
}
|
|
66
48
|
const totalDocs = allChunks.length;
|
|
@@ -161,7 +143,7 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
|
|
|
161
143
|
* Main function to build knowledge graph
|
|
162
144
|
* Equivalent to Python: build_knowledge_graph
|
|
163
145
|
*/
|
|
164
|
-
function buildKnowledgeGraph(chunks, connections, chunkStats,
|
|
146
|
+
function buildKnowledgeGraph(chunks, connections, chunkStats, _jiebaInitialized, logger, kbId) {
|
|
165
147
|
logger?.info(`Building knowledge graph from ${chunks.length} chunks and ${connections.length} connections`);
|
|
166
148
|
const chunkById = /* @__PURE__ */ new Map();
|
|
167
149
|
for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
|
|
@@ -178,7 +160,7 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
|
|
|
178
160
|
const type = chunk.metadata.type || "text";
|
|
179
161
|
typeCount[type] = (typeCount[type] || 0) + 1;
|
|
180
162
|
}
|
|
181
|
-
const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10
|
|
163
|
+
const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10);
|
|
182
164
|
const importance = computeFileImportance(fileKey, fileChunks, chunks, chunkStats);
|
|
183
165
|
let topSummary = "";
|
|
184
166
|
for (const chunk of fileChunks) if (chunk.metadata.summary && typeof chunk.metadata.summary === "string") {
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { registerAgentHooks } from "./agent-hooks.js";
|
|
2
|
+
import { knowherePluginConfigSchema, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
|
|
2
3
|
import { KnowhereStore } from "./store.js";
|
|
3
4
|
import { createKnowhereToolFactory } from "./tools.js";
|
|
4
5
|
import { KnowledgeGraphService } from "./kg-service.js";
|
|
@@ -10,7 +11,9 @@ const plugin = {
|
|
|
10
11
|
configSchema: knowherePluginConfigSchema,
|
|
11
12
|
register(api) {
|
|
12
13
|
const config = resolveKnowhereConfig(api);
|
|
13
|
-
const
|
|
14
|
+
const effectiveRaw = resolveEffectivePluginConfig(api);
|
|
15
|
+
const kgConfig = resolveKnowledgeGraphConfig(effectiveRaw);
|
|
16
|
+
registerAgentHooks(api, effectiveRaw);
|
|
14
17
|
const store = new KnowhereStore({
|
|
15
18
|
rootDir: config.storageDir,
|
|
16
19
|
scopeMode: config.scopeMode,
|
|
@@ -43,7 +46,11 @@ const plugin = {
|
|
|
43
46
|
"knowhere_import_completed_job",
|
|
44
47
|
"knowhere_set_api_key",
|
|
45
48
|
"knowhere_kg_list",
|
|
46
|
-
"knowhere_kg_query"
|
|
49
|
+
"knowhere_kg_query",
|
|
50
|
+
"knowhere_get_map",
|
|
51
|
+
"knowhere_get_structure",
|
|
52
|
+
"knowhere_read_chunks",
|
|
53
|
+
"knowhere_discover_files"
|
|
47
54
|
] });
|
|
48
55
|
}
|
|
49
56
|
};
|
package/dist/kg-service.d.ts
CHANGED
|
@@ -9,11 +9,9 @@ export declare class KnowledgeGraphService {
|
|
|
9
9
|
private readonly logger;
|
|
10
10
|
private degradationMode;
|
|
11
11
|
private buildQueues;
|
|
12
|
-
private jiebaInitialized;
|
|
13
12
|
constructor(params: KnowledgeGraphServiceParams);
|
|
14
13
|
initialize(): Promise<void>;
|
|
15
14
|
private checkPythonEnvironment;
|
|
16
|
-
private initializeJieba;
|
|
17
15
|
extractKeywords(text: string, topK?: number): Promise<string[]>;
|
|
18
16
|
resolveKbId(context: ToolRuntimeContext): string | null;
|
|
19
17
|
getKbPath(kbId: string): string;
|
package/dist/kg-service.js
CHANGED
|
@@ -2,10 +2,9 @@ import { resolveStoredKnowhereResultRoot } from "./parser.js";
|
|
|
2
2
|
import { buildConnections, init_connect_builder } from "./connect-builder.js";
|
|
3
3
|
import { buildKnowledgeGraph } from "./graph-builder.js";
|
|
4
4
|
import path from "node:path";
|
|
5
|
+
import os from "node:os";
|
|
5
6
|
import { spawn } from "node:child_process";
|
|
6
7
|
import fs from "fs-extra";
|
|
7
|
-
import os from "node:os";
|
|
8
|
-
import * as nodejieba from "nodejieba";
|
|
9
8
|
//#region src/kg-service.ts
|
|
10
9
|
init_connect_builder();
|
|
11
10
|
const DEFAULT_CONNECT_CONFIG = {
|
|
@@ -26,18 +25,11 @@ const DEFAULT_KG_CONFIG = {
|
|
|
26
25
|
function formatUnknownError(error) {
|
|
27
26
|
return error instanceof Error ? error.message : String(error);
|
|
28
27
|
}
|
|
29
|
-
function extractKeywordText(item) {
|
|
30
|
-
if (typeof item === "string") return item;
|
|
31
|
-
if (typeof item === "number" || typeof item === "boolean" || typeof item === "bigint") return String(item);
|
|
32
|
-
if (typeof item === "object" && item !== null && "word" in item && typeof item.word === "string") return item.word;
|
|
33
|
-
return null;
|
|
34
|
-
}
|
|
35
28
|
var KnowledgeGraphService = class {
|
|
36
29
|
config;
|
|
37
30
|
logger;
|
|
38
31
|
degradationMode = "full";
|
|
39
32
|
buildQueues = /* @__PURE__ */ new Map();
|
|
40
|
-
jiebaInitialized = false;
|
|
41
33
|
constructor(params) {
|
|
42
34
|
this.config = {
|
|
43
35
|
...DEFAULT_KG_CONFIG,
|
|
@@ -58,18 +50,12 @@ var KnowledgeGraphService = class {
|
|
|
58
50
|
}
|
|
59
51
|
try {
|
|
60
52
|
await this.checkPythonEnvironment();
|
|
61
|
-
await this.initializeJieba();
|
|
62
53
|
this.degradationMode = "full";
|
|
63
54
|
this.logger.info("Knowledge graph service initialized in full mode");
|
|
64
55
|
} catch (error) {
|
|
65
56
|
this.logger.warn(`Knowledge graph initialization failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
this.logger.warn("Python not found, knowledge graph disabled");
|
|
69
|
-
} else if (error.code === "NODEJIEBA_MISSING") {
|
|
70
|
-
this.degradationMode = "basic";
|
|
71
|
-
this.logger.warn("Nodejieba missing, using basic tokenization");
|
|
72
|
-
} else this.degradationMode = "disabled";
|
|
57
|
+
this.degradationMode = "disabled";
|
|
58
|
+
this.logger.warn("Python not found, knowledge graph disabled");
|
|
73
59
|
}
|
|
74
60
|
}
|
|
75
61
|
async checkPythonEnvironment() {
|
|
@@ -92,30 +78,8 @@ var KnowledgeGraphService = class {
|
|
|
92
78
|
});
|
|
93
79
|
});
|
|
94
80
|
}
|
|
95
|
-
async initializeJieba() {
|
|
96
|
-
try {
|
|
97
|
-
nodejieba.load();
|
|
98
|
-
this.jiebaInitialized = true;
|
|
99
|
-
this.logger.info("Nodejieba initialized successfully");
|
|
100
|
-
} catch {
|
|
101
|
-
const err = /* @__PURE__ */ new Error("Failed to initialize nodejieba");
|
|
102
|
-
err.code = "NODEJIEBA_MISSING";
|
|
103
|
-
throw err;
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
81
|
async extractKeywords(text, topK = 20) {
|
|
107
82
|
if (this.degradationMode === "disabled") return [];
|
|
108
|
-
if (this.degradationMode === "full" && this.jiebaInitialized) try {
|
|
109
|
-
const rawKeywords = nodejieba.extract(text, topK);
|
|
110
|
-
return (Array.isArray(rawKeywords) ? rawKeywords : []).map((item) => extractKeywordText(item)).filter((keyword) => keyword !== null).filter((kw) => {
|
|
111
|
-
if (kw.length <= 1) return false;
|
|
112
|
-
if (/^\d+$/.test(kw)) return false;
|
|
113
|
-
return true;
|
|
114
|
-
}).slice(0, topK);
|
|
115
|
-
} catch (error) {
|
|
116
|
-
this.logger.warn(`Jieba extraction failed, falling back to basic: ${formatUnknownError(error)}`);
|
|
117
|
-
this.degradationMode = "basic";
|
|
118
|
-
}
|
|
119
83
|
return text.split(/\s+/).filter((w) => w.length > 1).slice(0, topK);
|
|
120
84
|
}
|
|
121
85
|
resolveKbId(context) {
|
|
@@ -146,8 +110,6 @@ var KnowledgeGraphService = class {
|
|
|
146
110
|
await fs.ensureDir(docDir);
|
|
147
111
|
const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
|
|
148
112
|
await fs.copy(sourceResultRoot, docDir, { overwrite: true });
|
|
149
|
-
const keywordsPath = path.join(docDir, "keywords.json");
|
|
150
|
-
await fs.writeJSON(keywordsPath, params.keywords, { spaces: 2 });
|
|
151
113
|
const metadataPath = path.join(docDir, "metadata.json");
|
|
152
114
|
await fs.writeJSON(metadataPath, params.metadata, { spaces: 2 });
|
|
153
115
|
this.logger.info(`Document saved to knowledge base: kb=${params.kbId} doc=${params.docId}`);
|
|
@@ -176,7 +138,7 @@ var KnowledgeGraphService = class {
|
|
|
176
138
|
const docPath = path.join(kbPath, doc);
|
|
177
139
|
if ((await fs.stat(docPath)).isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
|
|
178
140
|
}
|
|
179
|
-
if (docDirs.length <
|
|
141
|
+
if (docDirs.length < 1) {
|
|
180
142
|
this.logger.info(`Not enough documents for graph building (need >=2, have ${docDirs.length}), skipping`);
|
|
181
143
|
return;
|
|
182
144
|
}
|
|
@@ -203,7 +165,7 @@ var KnowledgeGraphService = class {
|
|
|
203
165
|
const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
|
|
204
166
|
let chunkStats = {};
|
|
205
167
|
if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
|
|
206
|
-
const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats,
|
|
168
|
+
const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, false, this.logger, kbId);
|
|
207
169
|
const graphFile = path.join(kbPath, "knowledge_graph.json");
|
|
208
170
|
await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
|
|
209
171
|
this.logger.info(`Knowledge graph saved to ${graphFile}`);
|
package/dist/tools.js
CHANGED
|
@@ -4,9 +4,12 @@ import { resolveStoredKnowhereArtifactPath } from "./parser.js";
|
|
|
4
4
|
import { sanitizeStringArray } from "./text.js";
|
|
5
5
|
import { formatErrorMessage } from "./error-message.js";
|
|
6
6
|
import { KnowhereClient } from "./client.js";
|
|
7
|
+
import { deliverChannelMessage } from "./channel-delivery.js";
|
|
7
8
|
import { sendTrackerProgress } from "./tracker-progress.js";
|
|
8
9
|
import fs from "node:fs/promises";
|
|
9
10
|
import path from "node:path";
|
|
11
|
+
import crypto from "node:crypto";
|
|
12
|
+
import os from "node:os";
|
|
10
13
|
//#region src/tools.ts
|
|
11
14
|
const TERMINAL_JOB_STATUSES = new Set([
|
|
12
15
|
"cancelled",
|
|
@@ -784,6 +787,24 @@ function createImportCompletedJobTool(params) {
|
|
|
784
787
|
downloadedResult: importResult.downloadedResult
|
|
785
788
|
}, { overwrite });
|
|
786
789
|
params.api.logger.info(`knowhere: knowhere_import_completed_job stored imported document scope=${scope.label} jobId=${importResult.jobResult.job_id} docId=${document.id}`);
|
|
790
|
+
try {
|
|
791
|
+
const importKbId = params.kgService.resolveKbId(params.ctx);
|
|
792
|
+
if (importKbId && params.kgService.isEnabled()) {
|
|
793
|
+
params.api.logger.info(`knowhere: triggering KG build after import kbId=${importKbId}`);
|
|
794
|
+
buildKnowledgeGraphAsync({
|
|
795
|
+
kgService: params.kgService,
|
|
796
|
+
kbId: importKbId,
|
|
797
|
+
docId: document.id,
|
|
798
|
+
documentPayload: { downloadedResult: importResult.downloadedResult },
|
|
799
|
+
scope,
|
|
800
|
+
store: params.store,
|
|
801
|
+
ctx: params.ctx,
|
|
802
|
+
api: params.api
|
|
803
|
+
}).catch((e) => params.api.logger.error(`knowhere: KG build after import failed: ${formatErrorMessage(e)}`));
|
|
804
|
+
}
|
|
805
|
+
} catch (kgError) {
|
|
806
|
+
params.api.logger.warn(`knowhere: import KG trigger error: ${formatErrorMessage(kgError)}`);
|
|
807
|
+
}
|
|
787
808
|
return textResult([
|
|
788
809
|
"Import complete.",
|
|
789
810
|
...buildStoredDocumentSummaryLines({
|
|
@@ -914,6 +935,673 @@ function createKgQueryTool(params) {
|
|
|
914
935
|
}
|
|
915
936
|
};
|
|
916
937
|
}
|
|
938
|
+
const T2_KNOWHERE_HOME = path.join(os.homedir(), ".knowhere");
|
|
939
|
+
async function t2FindDocDir(kbDir, docName) {
|
|
940
|
+
const exactPath = path.join(kbDir, docName);
|
|
941
|
+
try {
|
|
942
|
+
await fs.access(exactPath);
|
|
943
|
+
return exactPath;
|
|
944
|
+
} catch {}
|
|
945
|
+
let entries;
|
|
946
|
+
try {
|
|
947
|
+
entries = await fs.readdir(kbDir, { withFileTypes: true });
|
|
948
|
+
} catch {
|
|
949
|
+
return null;
|
|
950
|
+
}
|
|
951
|
+
for (const e of entries) if (e.isDirectory() && String(e.name).includes(docName)) return path.join(kbDir, String(e.name));
|
|
952
|
+
return null;
|
|
953
|
+
}
|
|
954
|
+
async function t2LoadChunks(docDir) {
|
|
955
|
+
for (const fname of ["chunks_slim.json", "chunks.json"]) try {
|
|
956
|
+
const raw = await fs.readFile(path.join(docDir, fname), "utf-8");
|
|
957
|
+
const data = JSON.parse(raw);
|
|
958
|
+
let chunks;
|
|
959
|
+
if (Array.isArray(data)) chunks = data;
|
|
960
|
+
else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
|
|
961
|
+
else continue;
|
|
962
|
+
if (fname === "chunks.json") return chunks.map((c) => ({
|
|
963
|
+
type: c.type || "text",
|
|
964
|
+
path: c.path || "",
|
|
965
|
+
content: c.content || "",
|
|
966
|
+
summary: c.metadata?.summary || c.summary || ""
|
|
967
|
+
}));
|
|
968
|
+
return chunks;
|
|
969
|
+
} catch {
|
|
970
|
+
continue;
|
|
971
|
+
}
|
|
972
|
+
return [];
|
|
973
|
+
}
|
|
974
|
+
function t2NormalizePath(s) {
|
|
975
|
+
return s.replace(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 65248)).replace(/[\s\u3000\u00A0]+/g, "").toLowerCase();
|
|
976
|
+
}
|
|
977
|
+
const PLACEHOLDER_RE = /(?:IMAGE|TABLE)_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
|
|
978
|
+
async function t2LoadRawChunks(docDir) {
|
|
979
|
+
try {
|
|
980
|
+
const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
|
|
981
|
+
const data = JSON.parse(raw);
|
|
982
|
+
if (Array.isArray(data)) return data;
|
|
983
|
+
if (isRecord(data) && Array.isArray(data.chunks)) return data.chunks;
|
|
984
|
+
return [];
|
|
985
|
+
} catch {
|
|
986
|
+
return [];
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
/**
|
|
990
|
+
* Runtime-only enrichment of chunks returned to the AI:
|
|
991
|
+
* 1. Replace IMAGE_uuid_IMAGE placeholders with [📷 path] in text chunks
|
|
992
|
+
* 2. Replace TABLE_uuid_TABLE placeholders with actual HTML content in text chunks
|
|
993
|
+
* 3. Remove standalone table chunks that were inlined via placeholders
|
|
994
|
+
* 4. Strip self-referencing placeholders from image/table chunk content & summary
|
|
995
|
+
*
|
|
996
|
+
* Does NOT modify chunks.json on disk.
|
|
997
|
+
*/
|
|
998
|
+
async function t2EnrichChunks(chunks, docDir) {
|
|
999
|
+
const rawChunks = await t2LoadRawChunks(docDir);
|
|
1000
|
+
const idToRaw = /* @__PURE__ */ new Map();
|
|
1001
|
+
for (const rc of rawChunks) if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
|
|
1002
|
+
const manifestPaths = /* @__PURE__ */ new Map();
|
|
1003
|
+
try {
|
|
1004
|
+
const manifest = JSON.parse(await fs.readFile(path.join(docDir, "manifest.json"), "utf-8"));
|
|
1005
|
+
if (isRecord(manifest) && isRecord(manifest.files)) {
|
|
1006
|
+
const files = manifest.files;
|
|
1007
|
+
for (const entry of Array.isArray(files.images) ? files.images : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
|
|
1008
|
+
type: "image",
|
|
1009
|
+
filePath: entry.file_path
|
|
1010
|
+
});
|
|
1011
|
+
for (const entry of Array.isArray(files.tables) ? files.tables : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
|
|
1012
|
+
type: "table",
|
|
1013
|
+
filePath: entry.file_path
|
|
1014
|
+
});
|
|
1015
|
+
}
|
|
1016
|
+
} catch {}
|
|
1017
|
+
const inlinedTablePaths = /* @__PURE__ */ new Set();
|
|
1018
|
+
const inlinedImagePaths = /* @__PURE__ */ new Set();
|
|
1019
|
+
for (const chunk of chunks) {
|
|
1020
|
+
if (!chunk.content) continue;
|
|
1021
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1022
|
+
if (PLACEHOLDER_RE.test(chunk.content)) {
|
|
1023
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1024
|
+
chunk.content = await replacePlaceholders(chunk.content, idToRaw, docDir, chunk.type === "text" ? inlinedTablePaths : void 0, chunk.type === "text" ? inlinedImagePaths : void 0, manifestPaths);
|
|
1025
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1026
|
+
if (chunk.type !== "text" && chunk.path && PLACEHOLDER_RE.test(chunk.content)) {
|
|
1027
|
+
if (chunk.type === "table") try {
|
|
1028
|
+
const html = await fs.readFile(path.join(docDir, chunk.path), "utf-8");
|
|
1029
|
+
chunk.content = chunk.content.replace(PLACEHOLDER_RE, `\n${html.slice(0, 8e3)}\n`);
|
|
1030
|
+
} catch {
|
|
1031
|
+
chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📊 ${chunk.path}]`);
|
|
1032
|
+
}
|
|
1033
|
+
else if (chunk.type === "image") chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📷 ${chunk.path}]`);
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
if (chunk.summary) {
|
|
1037
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1038
|
+
if (PLACEHOLDER_RE.test(chunk.summary)) {
|
|
1039
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1040
|
+
chunk.summary = await replacePlaceholders(chunk.summary, idToRaw, docDir, void 0, void 0, manifestPaths);
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
chunks = chunks.filter((c) => {
|
|
1045
|
+
if (c.type === "table" && inlinedTablePaths.has(c.path)) return false;
|
|
1046
|
+
if (c.type === "image" && inlinedImagePaths.has(c.path)) return false;
|
|
1047
|
+
return true;
|
|
1048
|
+
});
|
|
1049
|
+
return chunks;
|
|
1050
|
+
}
|
|
1051
|
+
async function replacePlaceholders(text, idToRaw, docDir, inlinedTablePaths, inlinedImagePaths, manifestPaths) {
|
|
1052
|
+
const matches = [];
|
|
1053
|
+
const re = /(?:(IMAGE|TABLE))_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
|
|
1054
|
+
let m;
|
|
1055
|
+
while ((m = re.exec(text)) !== null) matches.push({
|
|
1056
|
+
full: m[0],
|
|
1057
|
+
type: m[1],
|
|
1058
|
+
id: m[2],
|
|
1059
|
+
start: m.index,
|
|
1060
|
+
end: m.index + m[0].length
|
|
1061
|
+
});
|
|
1062
|
+
if (matches.length === 0) return text;
|
|
1063
|
+
const replacements = [];
|
|
1064
|
+
for (const match of matches) {
|
|
1065
|
+
const raw = idToRaw.get(match.id);
|
|
1066
|
+
let resolvedPath = raw?.path;
|
|
1067
|
+
if (!resolvedPath && manifestPaths) {
|
|
1068
|
+
const mEntry = manifestPaths.get(match.id);
|
|
1069
|
+
if (mEntry) resolvedPath = mEntry.filePath;
|
|
1070
|
+
}
|
|
1071
|
+
if (!resolvedPath) {
|
|
1072
|
+
replacements.push(match.full);
|
|
1073
|
+
continue;
|
|
1074
|
+
}
|
|
1075
|
+
if (match.type === "IMAGE") {
|
|
1076
|
+
replacements.push(`[📷 ${resolvedPath}]`);
|
|
1077
|
+
inlinedImagePaths?.add(resolvedPath);
|
|
1078
|
+
} else {
|
|
1079
|
+
const htmlPath = path.join(docDir, resolvedPath);
|
|
1080
|
+
try {
|
|
1081
|
+
const html = await fs.readFile(htmlPath, "utf-8");
|
|
1082
|
+
replacements.push(`\n${html.slice(0, 8e3)}\n`);
|
|
1083
|
+
inlinedTablePaths?.add(resolvedPath);
|
|
1084
|
+
} catch {
|
|
1085
|
+
const tableContent = raw?.content || "";
|
|
1086
|
+
if (tableContent && tableContent.includes("<")) {
|
|
1087
|
+
replacements.push(`\n${tableContent}\n`);
|
|
1088
|
+
inlinedTablePaths?.add(resolvedPath);
|
|
1089
|
+
} else replacements.push(`[📊 ${resolvedPath}]`);
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
let result = text;
|
|
1094
|
+
for (let i = matches.length - 1; i >= 0; i--) result = result.slice(0, matches[i].start) + replacements[i] + result.slice(matches[i].end);
|
|
1095
|
+
return result;
|
|
1096
|
+
}
|
|
1097
|
+
function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
|
|
1098
|
+
const df = {};
|
|
1099
|
+
const tf = {};
|
|
1100
|
+
const totalDocs = rawChunks.length || 1;
|
|
1101
|
+
for (const c of rawChunks) {
|
|
1102
|
+
const tokens = Array.isArray(c.metadata?.tokens) ? c.metadata.tokens : [];
|
|
1103
|
+
const keywords = Array.isArray(c.metadata?.keywords) ? c.metadata.keywords : [];
|
|
1104
|
+
const allTerms = [...tokens, ...keywords];
|
|
1105
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1106
|
+
for (const t of allTerms) {
|
|
1107
|
+
if (!t || t.length <= 1) continue;
|
|
1108
|
+
if (/^\d+[.,%]*$/.test(t)) continue;
|
|
1109
|
+
const lower = t.toLowerCase();
|
|
1110
|
+
tf[lower] = (tf[lower] || 0) + 1;
|
|
1111
|
+
if (!seen.has(lower)) {
|
|
1112
|
+
df[lower] = (df[lower] || 0) + 1;
|
|
1113
|
+
seen.add(lower);
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
const scored = Object.entries(tf).map(([term, freq]) => {
|
|
1118
|
+
return {
|
|
1119
|
+
term,
|
|
1120
|
+
score: freq * (Math.log(totalDocs / (df[term] || 1)) + 1)
|
|
1121
|
+
};
|
|
1122
|
+
});
|
|
1123
|
+
scored.sort((a, b) => b.score - a.score);
|
|
1124
|
+
return scored.slice(0, topK).map((s) => s.term);
|
|
1125
|
+
}
|
|
1126
|
+
function t2KeywordsNeedRepair(keywords) {
|
|
1127
|
+
if (!Array.isArray(keywords) || keywords.length === 0) return true;
|
|
1128
|
+
let bad = 0;
|
|
1129
|
+
for (const kw of keywords) if (!kw || typeof kw === "string" && (kw.length <= 1 || /^\d+[.,%]*$/.test(kw) || /^[a-z]{1,2}$/i.test(kw))) bad++;
|
|
1130
|
+
return bad >= keywords.length * .5;
|
|
1131
|
+
}
|
|
1132
|
+
function t2JsonResult(data) {
|
|
1133
|
+
return {
|
|
1134
|
+
content: [{
|
|
1135
|
+
type: "text",
|
|
1136
|
+
text: JSON.stringify(data, null, 2)
|
|
1137
|
+
}],
|
|
1138
|
+
details: {}
|
|
1139
|
+
};
|
|
1140
|
+
}
|
|
1141
|
+
async function t2ListDocDirs(kbRoot) {
|
|
1142
|
+
let entries;
|
|
1143
|
+
try {
|
|
1144
|
+
entries = await fs.readdir(kbRoot, { withFileTypes: true });
|
|
1145
|
+
} catch {
|
|
1146
|
+
return [];
|
|
1147
|
+
}
|
|
1148
|
+
const docs = [];
|
|
1149
|
+
for (const e of entries) {
|
|
1150
|
+
if (!e.isDirectory()) continue;
|
|
1151
|
+
try {
|
|
1152
|
+
await fs.access(path.join(kbRoot, String(e.name), "chunks.json"));
|
|
1153
|
+
docs.push(String(e.name));
|
|
1154
|
+
} catch {
|
|
1155
|
+
continue;
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
return docs;
|
|
1159
|
+
}
|
|
1160
|
+
async function t2StageFile(srcPath, stagingDir) {
|
|
1161
|
+
await fs.mkdir(stagingDir, { recursive: true });
|
|
1162
|
+
const ext = path.extname(srcPath);
|
|
1163
|
+
const hash = crypto.createHash("sha1").update(srcPath).digest("hex").slice(0, 12);
|
|
1164
|
+
const staged = path.join(stagingDir, `knowhere-asset-${hash}${ext}`);
|
|
1165
|
+
await fs.copyFile(srcPath, staged);
|
|
1166
|
+
return staged;
|
|
1167
|
+
}
|
|
1168
|
+
async function t2ResolveAssets(params) {
|
|
1169
|
+
const assets = [];
|
|
1170
|
+
const stagingDir = path.join(os.homedir(), ".openclaw", "knowhere-assets");
|
|
1171
|
+
const processedPaths = /* @__PURE__ */ new Set();
|
|
1172
|
+
let channelRoute;
|
|
1173
|
+
try {
|
|
1174
|
+
channelRoute = await params.store.resolveChannelRoute({ sessionKey: params.ctx.sessionKey });
|
|
1175
|
+
} catch {}
|
|
1176
|
+
const resolveOne = async (opts) => {
|
|
1177
|
+
const absolutePath = path.join(params.docDir, opts.relativePath);
|
|
1178
|
+
if (processedPaths.has(absolutePath)) return;
|
|
1179
|
+
processedPaths.add(absolutePath);
|
|
1180
|
+
if (opts.type === "table") try {
|
|
1181
|
+
const html = await fs.readFile(absolutePath, "utf-8");
|
|
1182
|
+
assets.push({
|
|
1183
|
+
chunk_id: opts.chunkId,
|
|
1184
|
+
type: "table",
|
|
1185
|
+
relative_path: opts.relativePath,
|
|
1186
|
+
summary: opts.summary.slice(0, 200),
|
|
1187
|
+
mode: "table_inline",
|
|
1188
|
+
html_content: html.slice(0, 8e3)
|
|
1189
|
+
});
|
|
1190
|
+
} catch {
|
|
1191
|
+
params.api.logger.debug?.(`knowhere: t2ResolveAssets table read failed: ${absolutePath}`);
|
|
1192
|
+
}
|
|
1193
|
+
else try {
|
|
1194
|
+
await fs.access(absolutePath);
|
|
1195
|
+
const stagedPath = await t2StageFile(absolutePath, stagingDir);
|
|
1196
|
+
const result = await deliverChannelMessage({
|
|
1197
|
+
api: params.api,
|
|
1198
|
+
operationLabel: "t2-asset-image",
|
|
1199
|
+
sessionKey: params.ctx.sessionKey,
|
|
1200
|
+
channelRoute,
|
|
1201
|
+
text: `📷 ${opts.summary.slice(0, 100)}`,
|
|
1202
|
+
mediaUrl: stagedPath,
|
|
1203
|
+
mediaLocalRoots: [stagingDir]
|
|
1204
|
+
});
|
|
1205
|
+
assets.push({
|
|
1206
|
+
chunk_id: opts.chunkId,
|
|
1207
|
+
type: "image",
|
|
1208
|
+
relative_path: opts.relativePath,
|
|
1209
|
+
summary: opts.summary.slice(0, 200),
|
|
1210
|
+
mode: result.delivered ? "image_sent" : "image_failed"
|
|
1211
|
+
});
|
|
1212
|
+
} catch (err) {
|
|
1213
|
+
params.api.logger.debug?.(`knowhere: t2ResolveAssets image delivery failed: ${absolutePath} — ${err instanceof Error ? err.message : String(err)}`);
|
|
1214
|
+
}
|
|
1215
|
+
};
|
|
1216
|
+
for (const chunk of params.returnedChunks) if ((chunk.type === "image" || chunk.type === "table") && chunk.path) {
|
|
1217
|
+
if (chunk.type === "table" && chunk.content && !/TABLE_[a-f0-9-]+_TABLE/.test(chunk.content)) continue;
|
|
1218
|
+
await resolveOne({
|
|
1219
|
+
chunkId: chunk.path,
|
|
1220
|
+
type: chunk.type,
|
|
1221
|
+
relativePath: chunk.path,
|
|
1222
|
+
summary: chunk.summary || chunk.content?.slice(0, 200) || ""
|
|
1223
|
+
});
|
|
1224
|
+
}
|
|
1225
|
+
return assets;
|
|
1226
|
+
}
|
|
1227
|
+
function createGetMapTool(_params) {
|
|
1228
|
+
return {
|
|
1229
|
+
name: "knowhere_get_map",
|
|
1230
|
+
label: "Knowhere Get Map",
|
|
1231
|
+
description: "获取知识库全局概览。查询知识时必须先调此工具,了解有哪些文档、关键词、重要性和跨文件关联。然后用 knowhere_get_structure 查看具体文档的章节目录。重要:knowhere_read_chunks 已内置图片/表格自动投递功能,读取内容时会自动发送多媒体到用户频道。除非用户明确要求下载原始文件,否则请勿主动发送 .docx/.pdf 等文档。",
|
|
1232
|
+
parameters: {
|
|
1233
|
+
type: "object",
|
|
1234
|
+
additionalProperties: false,
|
|
1235
|
+
properties: { kbId: {
|
|
1236
|
+
type: "string",
|
|
1237
|
+
description: "Optional: specific KB ID. Leave empty to scan all."
|
|
1238
|
+
} }
|
|
1239
|
+
},
|
|
1240
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1241
|
+
const kbId = readString((isRecord(rawParams) ? rawParams : {}).kbId) || "";
|
|
1242
|
+
try {
|
|
1243
|
+
await fs.access(T2_KNOWHERE_HOME);
|
|
1244
|
+
} catch {
|
|
1245
|
+
return textResult(`未找到知识库目录 ${T2_KNOWHERE_HOME}`);
|
|
1246
|
+
}
|
|
1247
|
+
const entries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
|
|
1248
|
+
const kbs = [];
|
|
1249
|
+
for (const e of entries) {
|
|
1250
|
+
if (!e.isDirectory()) continue;
|
|
1251
|
+
if (kbId && e.name !== kbId) continue;
|
|
1252
|
+
const kbRoot = path.join(T2_KNOWHERE_HOME, e.name);
|
|
1253
|
+
const kgPath = path.join(kbRoot, "knowledge_graph.json");
|
|
1254
|
+
try {
|
|
1255
|
+
const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
|
|
1256
|
+
let kgDirty = false;
|
|
1257
|
+
const files = g.files || {};
|
|
1258
|
+
for (const [docName, info] of Object.entries(files)) if (t2KeywordsNeedRepair(info.top_keywords)) {
|
|
1259
|
+
const rawChunks = await t2LoadRawChunks(path.join(kbRoot, docName));
|
|
1260
|
+
if (rawChunks.length > 0) {
|
|
1261
|
+
const repaired = t2ComputeTfIdfKeywords(rawChunks);
|
|
1262
|
+
if (repaired.length > 0) {
|
|
1263
|
+
info.top_keywords = repaired;
|
|
1264
|
+
const types = {};
|
|
1265
|
+
for (const c of rawChunks) {
|
|
1266
|
+
const t = c.type || "text";
|
|
1267
|
+
types[t] = (types[t] || 0) + 1;
|
|
1268
|
+
}
|
|
1269
|
+
info.types = types;
|
|
1270
|
+
info.chunks_count = rawChunks.length;
|
|
1271
|
+
kgDirty = true;
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
if (kgDirty) {
|
|
1276
|
+
g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
|
|
1277
|
+
try {
|
|
1278
|
+
await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
|
|
1279
|
+
} catch {}
|
|
1280
|
+
}
|
|
1281
|
+
kbs.push({
|
|
1282
|
+
kb_id: e.name,
|
|
1283
|
+
version: g.version || "1.0",
|
|
1284
|
+
updated_at: g.updated_at || "",
|
|
1285
|
+
stats: g.stats || {},
|
|
1286
|
+
files: g.files || {},
|
|
1287
|
+
edges: g.edges || []
|
|
1288
|
+
});
|
|
1289
|
+
} catch {
|
|
1290
|
+
const docs = await t2ListDocDirs(kbRoot);
|
|
1291
|
+
if (docs.length > 0) kbs.push({
|
|
1292
|
+
kb_id: e.name,
|
|
1293
|
+
version: "pending",
|
|
1294
|
+
files: Object.fromEntries(docs.map((d) => [d, {}])),
|
|
1295
|
+
edges: []
|
|
1296
|
+
});
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
if (kbs.length === 0) return textResult("未找到知识库。");
|
|
1300
|
+
return t2JsonResult({
|
|
1301
|
+
status: "ok",
|
|
1302
|
+
knowledge_bases: kbs
|
|
1303
|
+
});
|
|
1304
|
+
}
|
|
1305
|
+
};
|
|
1306
|
+
}
|
|
1307
|
+
function createGetStructureTool(_params) {
|
|
1308
|
+
return {
|
|
1309
|
+
name: "knowhere_get_structure",
|
|
1310
|
+
label: "Knowhere Get Structure",
|
|
1311
|
+
description: "获取文档章节目录。先调 knowhere_get_map 确定 kbId 和文档名后,用此工具查看章节结构,然后用 knowhere_read_chunks 读取内容。重要:knowhere_read_chunks 已内置图片/表格自动投递功能,除非用户要求下载文件,否则无需额外发送原始文档。",
|
|
1312
|
+
parameters: {
|
|
1313
|
+
type: "object",
|
|
1314
|
+
additionalProperties: false,
|
|
1315
|
+
properties: {
|
|
1316
|
+
kbId: {
|
|
1317
|
+
type: "string",
|
|
1318
|
+
description: "Knowledge base ID (from knowhere_get_map result)"
|
|
1319
|
+
},
|
|
1320
|
+
docName: {
|
|
1321
|
+
type: "string",
|
|
1322
|
+
description: "Document name (supports fuzzy match)"
|
|
1323
|
+
}
|
|
1324
|
+
},
|
|
1325
|
+
required: ["kbId", "docName"]
|
|
1326
|
+
},
|
|
1327
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1328
|
+
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1329
|
+
const kbId = readString(paramsRecord.kbId);
|
|
1330
|
+
const docName = readString(paramsRecord.docName);
|
|
1331
|
+
if (!kbId || !docName) throw new Error("kbId and docName are required.");
|
|
1332
|
+
const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
|
|
1333
|
+
if (!docDir) return textResult(`文档 '${docName}' 在 kb=${kbId} 中不存在`);
|
|
1334
|
+
try {
|
|
1335
|
+
const h = JSON.parse(await fs.readFile(path.join(docDir, "hierarchy.json"), "utf-8"));
|
|
1336
|
+
return t2JsonResult({
|
|
1337
|
+
status: "ok",
|
|
1338
|
+
kb_id: kbId,
|
|
1339
|
+
doc_name: path.basename(docDir),
|
|
1340
|
+
hierarchy: h
|
|
1341
|
+
});
|
|
1342
|
+
} catch {
|
|
1343
|
+
const chunks = await t2LoadChunks(docDir);
|
|
1344
|
+
const paths = [...new Set(chunks.map((c) => c.path).filter(Boolean))].sort();
|
|
1345
|
+
return t2JsonResult({
|
|
1346
|
+
status: "ok",
|
|
1347
|
+
kb_id: kbId,
|
|
1348
|
+
doc_name: path.basename(docDir),
|
|
1349
|
+
hierarchy: null,
|
|
1350
|
+
chunk_paths: paths,
|
|
1351
|
+
hint: "无 hierarchy.json,已返回 chunk 路径列表"
|
|
1352
|
+
});
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
};
|
|
1356
|
+
}
|
|
1357
|
+
function createReadChunksTool(_params) {
|
|
1358
|
+
return {
|
|
1359
|
+
name: "knowhere_read_chunks",
|
|
1360
|
+
label: "Knowhere Read Chunks",
|
|
1361
|
+
description: "读取文档内容。先调 knowhere_get_structure 确定章节后,用此工具读取具体内容。可通过 sectionPath 过滤特定章节,减少 token 消耗。此工具已内置图片/表格自动投递:读取时会自动将多媒体内容发送到用户频道。除非用户明确要求下载原始文件,否则无需再用 message 发送文档。",
|
|
1362
|
+
parameters: {
|
|
1363
|
+
type: "object",
|
|
1364
|
+
additionalProperties: false,
|
|
1365
|
+
properties: {
|
|
1366
|
+
kbId: {
|
|
1367
|
+
type: "string",
|
|
1368
|
+
description: "Knowledge base ID"
|
|
1369
|
+
},
|
|
1370
|
+
docName: {
|
|
1371
|
+
type: "string",
|
|
1372
|
+
description: "Document name"
|
|
1373
|
+
},
|
|
1374
|
+
sectionPath: {
|
|
1375
|
+
type: "string",
|
|
1376
|
+
description: "Optional: section path prefix to filter (e.g. '一、工程概况')"
|
|
1377
|
+
},
|
|
1378
|
+
maxChunks: {
|
|
1379
|
+
type: "number",
|
|
1380
|
+
description: "Max chunks to return (default 50)"
|
|
1381
|
+
}
|
|
1382
|
+
},
|
|
1383
|
+
required: ["kbId", "docName"]
|
|
1384
|
+
},
|
|
1385
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1386
|
+
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1387
|
+
const kbId = readString(paramsRecord.kbId);
|
|
1388
|
+
const docName = readString(paramsRecord.docName);
|
|
1389
|
+
const sectionPath = readString(paramsRecord.sectionPath);
|
|
1390
|
+
const maxChunks = readNumber(paramsRecord.maxChunks, 50);
|
|
1391
|
+
if (!kbId || !docName) throw new Error("kbId and docName are required.");
|
|
1392
|
+
const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
|
|
1393
|
+
if (!docDir) return textResult(`文档 '${docName}' 不存在`);
|
|
1394
|
+
let chunks = await t2LoadChunks(docDir);
|
|
1395
|
+
const totalAll = chunks.length;
|
|
1396
|
+
if (sectionPath) {
|
|
1397
|
+
let filtered = chunks.filter((c) => c.path.includes(sectionPath));
|
|
1398
|
+
if (filtered.length === 0) {
|
|
1399
|
+
const normQuery = t2NormalizePath(sectionPath);
|
|
1400
|
+
filtered = chunks.filter((c) => t2NormalizePath(c.path).includes(normQuery));
|
|
1401
|
+
}
|
|
1402
|
+
if (filtered.length === 0) {
|
|
1403
|
+
const sections = [...new Set(chunks.filter((c) => c.type === "text" && c.path).map((c) => {
|
|
1404
|
+
const parts = c.path.split("/");
|
|
1405
|
+
return parts.length > 1 ? parts.slice(-1)[0] : c.path;
|
|
1406
|
+
}))].slice(0, 30);
|
|
1407
|
+
return t2JsonResult({
|
|
1408
|
+
status: "no_match",
|
|
1409
|
+
message: `sectionPath '${sectionPath}' 未匹配到任何切片。请检查路径或从以下章节中选择:`,
|
|
1410
|
+
available_sections: sections
|
|
1411
|
+
});
|
|
1412
|
+
}
|
|
1413
|
+
chunks = filtered;
|
|
1414
|
+
} else if (totalAll > 20) {
|
|
1415
|
+
const sections = [...new Set(chunks.filter((c) => c.type === "text" && c.path).map((c) => {
|
|
1416
|
+
const parts = c.path.split("/");
|
|
1417
|
+
return parts.length > 1 ? parts.slice(-1)[0] : c.path;
|
|
1418
|
+
}))].slice(0, 30);
|
|
1419
|
+
return t2JsonResult({
|
|
1420
|
+
status: "too_many",
|
|
1421
|
+
message: `该文档共有 ${totalAll} 个切片,请先用 knowhere_get_structure 查看目录,然后用 sectionPath 参数指定章节。`,
|
|
1422
|
+
available_sections: sections
|
|
1423
|
+
});
|
|
1424
|
+
}
|
|
1425
|
+
const total = chunks.length;
|
|
1426
|
+
const limit = maxChunks || 50;
|
|
1427
|
+
chunks = chunks.slice(0, limit);
|
|
1428
|
+
try {
|
|
1429
|
+
const kgPath = path.join(T2_KNOWHERE_HOME, kbId, "knowledge_graph.json");
|
|
1430
|
+
const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
|
|
1431
|
+
const dn = path.basename(docDir);
|
|
1432
|
+
if (g.files?.[dn]) {
|
|
1433
|
+
g.files[dn].hit_count = (g.files[dn].hit_count || 0) + 1;
|
|
1434
|
+
g.files[dn].last_hit = (/* @__PURE__ */ new Date()).toISOString();
|
|
1435
|
+
g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
|
|
1436
|
+
await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
|
|
1437
|
+
}
|
|
1438
|
+
} catch {}
|
|
1439
|
+
chunks = await t2EnrichChunks(chunks, docDir);
|
|
1440
|
+
let resolvedAssets = [];
|
|
1441
|
+
try {
|
|
1442
|
+
resolvedAssets = await t2ResolveAssets({
|
|
1443
|
+
api: _params.api,
|
|
1444
|
+
store: _params.store,
|
|
1445
|
+
ctx: _params.ctx,
|
|
1446
|
+
docDir,
|
|
1447
|
+
returnedChunks: chunks
|
|
1448
|
+
});
|
|
1449
|
+
} catch (err) {
|
|
1450
|
+
_params.api.logger.debug?.(`knowhere: read_chunks asset resolution failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1451
|
+
}
|
|
1452
|
+
const result = {
|
|
1453
|
+
status: "ok",
|
|
1454
|
+
kb_id: kbId,
|
|
1455
|
+
doc_name: path.basename(docDir),
|
|
1456
|
+
section_path: sectionPath || null,
|
|
1457
|
+
total_chunks: total,
|
|
1458
|
+
returned: chunks.length,
|
|
1459
|
+
truncated: total > limit,
|
|
1460
|
+
chunks
|
|
1461
|
+
};
|
|
1462
|
+
if (resolvedAssets.length > 0) {
|
|
1463
|
+
result.resolved_assets = resolvedAssets;
|
|
1464
|
+
const sent = resolvedAssets.filter((a) => a.mode === "image_sent").length;
|
|
1465
|
+
const tables = resolvedAssets.filter((a) => a.mode === "table_inline").length;
|
|
1466
|
+
const notes = [];
|
|
1467
|
+
if (sent > 0) notes.push(`${sent} 张图片已自动发送到用户频道`);
|
|
1468
|
+
if (tables > 0) notes.push(`${tables} 个表格已内联为 HTML`);
|
|
1469
|
+
result.asset_summary = notes.join(";");
|
|
1470
|
+
result._agent_note = "上述多媒体资源(图片/表格/视频)已自动投递到用户频道,用户已经可以看到。请直接用文字回答用户的问题,不要再重复发送这些图片或表格。也不要主动发送原始文档文件(.docx/.pdf),除非用户明确要求下载原始文件。如果用户要求你分析、描述或对比图片内容,请使用 knowhere_view_image 工具传入 content 中的 [📷 path] 路径来获取图片数据进行视觉分析。";
|
|
1471
|
+
}
|
|
1472
|
+
return t2JsonResult(result);
|
|
1473
|
+
}
|
|
1474
|
+
};
|
|
1475
|
+
}
|
|
1476
|
+
function createViewImageTool(_params) {
|
|
1477
|
+
return {
|
|
1478
|
+
name: "knowhere_view_image",
|
|
1479
|
+
label: "Knowhere View Image",
|
|
1480
|
+
description: "分析知识库图片的像素内容。当 knowhere_read_chunks 返回的文本中包含 [📷 path] 标记时,可用此工具传入该 path 来获取图片的 base64 数据进行视觉分析。需要提供 kbId、docName 和 imagePath(即 [📷 ...] 中的相对路径)。",
|
|
1481
|
+
parameters: {
|
|
1482
|
+
type: "object",
|
|
1483
|
+
additionalProperties: false,
|
|
1484
|
+
properties: {
|
|
1485
|
+
kbId: {
|
|
1486
|
+
type: "string",
|
|
1487
|
+
description: "Knowledge base ID"
|
|
1488
|
+
},
|
|
1489
|
+
docName: {
|
|
1490
|
+
type: "string",
|
|
1491
|
+
description: "Document name"
|
|
1492
|
+
},
|
|
1493
|
+
imagePath: {
|
|
1494
|
+
type: "string",
|
|
1495
|
+
description: "Image relative path from [📷 ...] marker, e.g. 'images/image-9 助力手推车.jpeg'"
|
|
1496
|
+
}
|
|
1497
|
+
},
|
|
1498
|
+
required: [
|
|
1499
|
+
"kbId",
|
|
1500
|
+
"docName",
|
|
1501
|
+
"imagePath"
|
|
1502
|
+
]
|
|
1503
|
+
},
|
|
1504
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1505
|
+
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1506
|
+
const kbId = readString(paramsRecord.kbId);
|
|
1507
|
+
const docName = readString(paramsRecord.docName);
|
|
1508
|
+
const imagePath = readString(paramsRecord.imagePath);
|
|
1509
|
+
if (!kbId || !docName || !imagePath) throw new Error("kbId, docName, and imagePath are required.");
|
|
1510
|
+
const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
|
|
1511
|
+
if (!docDir) return textResult(`文档 '${docName}' 不存在`);
|
|
1512
|
+
const absolutePath = path.join(docDir, imagePath);
|
|
1513
|
+
try {
|
|
1514
|
+
await fs.access(absolutePath);
|
|
1515
|
+
} catch {
|
|
1516
|
+
return textResult(`图片文件不存在: ${imagePath}`);
|
|
1517
|
+
}
|
|
1518
|
+
const base64Data = (await fs.readFile(absolutePath)).toString("base64");
|
|
1519
|
+
const ext = path.extname(imagePath).toLowerCase();
|
|
1520
|
+
let mediaType = "image/jpeg";
|
|
1521
|
+
if (ext === ".png") mediaType = "image/png";
|
|
1522
|
+
else if (ext === ".gif") mediaType = "image/gif";
|
|
1523
|
+
else if (ext === ".webp") mediaType = "image/webp";
|
|
1524
|
+
return {
|
|
1525
|
+
content: [{
|
|
1526
|
+
type: "text",
|
|
1527
|
+
text: `你正在查看图片: ${imagePath}。图片数据已附在下方,请直接用你的视觉能力分析图片内容,`
|
|
1528
|
+
}, {
|
|
1529
|
+
type: "image_url",
|
|
1530
|
+
image_url: { url: `data:${mediaType};base64,${base64Data}` }
|
|
1531
|
+
}],
|
|
1532
|
+
details: {}
|
|
1533
|
+
};
|
|
1534
|
+
}
|
|
1535
|
+
};
|
|
1536
|
+
}
|
|
1537
|
+
function createDiscoverFilesTool(_params) {
|
|
1538
|
+
return {
|
|
1539
|
+
name: "knowhere_discover_files",
|
|
1540
|
+
label: "Knowhere Discover Files",
|
|
1541
|
+
description: "在所有知识库文档中搜索关键词,返回命中文件和次数。用于和 knowhere_get_map 做并集,避免遗漏相关文件。只返回文件名,不返回内容。注意:后续用 knowhere_read_chunks 读取时会自动投递图片/表格到用户频道,除非用户要求下载文件,否则无需额外发送原始文档。",
|
|
1542
|
+
parameters: {
|
|
1543
|
+
type: "object",
|
|
1544
|
+
additionalProperties: false,
|
|
1545
|
+
properties: {
|
|
1546
|
+
query: {
|
|
1547
|
+
type: "string",
|
|
1548
|
+
description: "Search keywords"
|
|
1549
|
+
},
|
|
1550
|
+
kbId: {
|
|
1551
|
+
type: "string",
|
|
1552
|
+
description: "Optional: limit to specific KB"
|
|
1553
|
+
}
|
|
1554
|
+
},
|
|
1555
|
+
required: ["query"]
|
|
1556
|
+
},
|
|
1557
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1558
|
+
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1559
|
+
const query = readString(paramsRecord.query);
|
|
1560
|
+
const kbId = readString(paramsRecord.kbId);
|
|
1561
|
+
if (!query) throw new Error("query is required.");
|
|
1562
|
+
const terms = query.split(/[\s,;,;。!?、\-/]+/).filter((t) => t.length > 1);
|
|
1563
|
+
if (terms.length === 0) return textResult("查询词为空");
|
|
1564
|
+
try {
|
|
1565
|
+
await fs.access(T2_KNOWHERE_HOME);
|
|
1566
|
+
} catch {
|
|
1567
|
+
return textResult("未找到知识库。");
|
|
1568
|
+
}
|
|
1569
|
+
const results = [];
|
|
1570
|
+
const kbEntries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
|
|
1571
|
+
for (const kbE of kbEntries) {
|
|
1572
|
+
if (!kbE.isDirectory()) continue;
|
|
1573
|
+
if (kbId && kbE.name !== kbId) continue;
|
|
1574
|
+
let docEntries;
|
|
1575
|
+
try {
|
|
1576
|
+
docEntries = await fs.readdir(path.join(T2_KNOWHERE_HOME, String(kbE.name)), { withFileTypes: true });
|
|
1577
|
+
} catch {
|
|
1578
|
+
continue;
|
|
1579
|
+
}
|
|
1580
|
+
for (const docE of docEntries) {
|
|
1581
|
+
if (!docE.isDirectory()) continue;
|
|
1582
|
+
const chunks = await t2LoadChunks(path.join(T2_KNOWHERE_HOME, String(kbE.name), String(docE.name)));
|
|
1583
|
+
let hits = 0;
|
|
1584
|
+
for (const c of chunks) {
|
|
1585
|
+
const text = `${c.content} ${c.summary}`;
|
|
1586
|
+
for (const t of terms) if (text.includes(t)) hits++;
|
|
1587
|
+
}
|
|
1588
|
+
if (hits > 0) results.push({
|
|
1589
|
+
kb_id: String(kbE.name),
|
|
1590
|
+
doc_name: String(docE.name),
|
|
1591
|
+
hit_count: hits
|
|
1592
|
+
});
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
results.sort((a, b) => b.hit_count - a.hit_count);
|
|
1596
|
+
return t2JsonResult({
|
|
1597
|
+
status: "ok",
|
|
1598
|
+
query,
|
|
1599
|
+
terms,
|
|
1600
|
+
discovered_files: results
|
|
1601
|
+
});
|
|
1602
|
+
}
|
|
1603
|
+
};
|
|
1604
|
+
}
|
|
917
1605
|
function createKnowhereToolFactory(params) {
|
|
918
1606
|
return (ctx) => [
|
|
919
1607
|
createIngestTool({
|
|
@@ -939,6 +1627,7 @@ function createKnowhereToolFactory(params) {
|
|
|
939
1627
|
api: params.api,
|
|
940
1628
|
config: params.config,
|
|
941
1629
|
store: params.store,
|
|
1630
|
+
kgService: params.kgService,
|
|
942
1631
|
ctx
|
|
943
1632
|
}),
|
|
944
1633
|
createSetApiKeyTool({
|
|
@@ -954,7 +1643,16 @@ function createKnowhereToolFactory(params) {
|
|
|
954
1643
|
api: params.api,
|
|
955
1644
|
kgService: params.kgService,
|
|
956
1645
|
ctx
|
|
957
|
-
})
|
|
1646
|
+
}),
|
|
1647
|
+
createGetMapTool({ api: params.api }),
|
|
1648
|
+
createGetStructureTool({ api: params.api }),
|
|
1649
|
+
createReadChunksTool({
|
|
1650
|
+
api: params.api,
|
|
1651
|
+
store: params.store,
|
|
1652
|
+
ctx
|
|
1653
|
+
}),
|
|
1654
|
+
createViewImageTool({ api: params.api }),
|
|
1655
|
+
createDiscoverFilesTool({ api: params.api })
|
|
958
1656
|
];
|
|
959
1657
|
}
|
|
960
1658
|
//#endregion
|
package/openclaw.plugin.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"name": "Knowhere",
|
|
4
4
|
"description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
|
|
5
5
|
"skills": ["./skills"],
|
|
6
|
-
"version": "0.2.
|
|
6
|
+
"version": "0.2.6",
|
|
7
7
|
"uiHints": {
|
|
8
8
|
"apiKey": {
|
|
9
9
|
"label": "Knowhere API Key",
|
|
@@ -152,6 +152,16 @@
|
|
|
152
152
|
}
|
|
153
153
|
}
|
|
154
154
|
}
|
|
155
|
+
},
|
|
156
|
+
"agentHooks": {
|
|
157
|
+
"type": "object",
|
|
158
|
+
"additionalProperties": false,
|
|
159
|
+
"properties": {
|
|
160
|
+
"enabled": {
|
|
161
|
+
"type": "boolean",
|
|
162
|
+
"default": true
|
|
163
|
+
}
|
|
164
|
+
}
|
|
155
165
|
}
|
|
156
166
|
}
|
|
157
167
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ontos-ai/knowhere-claw",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"description": "OpenClaw plugin for Knowhere-powered document ingestion and automatic grounding.",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/",
|
|
@@ -40,8 +40,7 @@
|
|
|
40
40
|
"dependencies": {
|
|
41
41
|
"@knowhere-ai/sdk": "^0.1.1",
|
|
42
42
|
"fflate": "^0.8.2",
|
|
43
|
-
"fs-extra": "^11.2.0"
|
|
44
|
-
"nodejieba": "^2.6.0"
|
|
43
|
+
"fs-extra": "^11.2.0"
|
|
45
44
|
},
|
|
46
45
|
"devDependencies": {
|
|
47
46
|
"@changesets/changelog-github": "^0.6.0",
|
|
@@ -19,28 +19,46 @@ Activate this skill when:
|
|
|
19
19
|
|
|
20
20
|
## Part 1: Ingesting New Documents
|
|
21
21
|
|
|
22
|
-
When a
|
|
22
|
+
When a user sends, uploads, or mentions a file, **always** use `knowhere_ingest_document` to parse it into the knowledge base. Two delivery modes exist depending on how the file arrives:
|
|
23
23
|
|
|
24
|
-
###
|
|
24
|
+
### Mode A: Local File (Telegram, Discord, Signal, …)
|
|
25
25
|
|
|
26
|
-
When
|
|
26
|
+
When the channel downloads the file to disk, a marker appears in the prompt:
|
|
27
27
|
|
|
28
28
|
```text
|
|
29
29
|
[media attached: /absolute/path/to/file.pdf (application/pdf) | handbook.pdf]
|
|
30
30
|
```
|
|
31
31
|
|
|
32
|
-
Use the exact absolute path
|
|
32
|
+
Use the exact absolute path from the marker:
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
```
|
|
35
|
+
knowhere_ingest_document(filePath: "/absolute/path/to/file.pdf", fileName: "handbook.pdf")
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Mode B: Cloud File (Feishu, …)
|
|
39
|
+
|
|
40
|
+
When the file stays in a cloud service (no `[media attached:]` marker):
|
|
41
|
+
|
|
42
|
+
1. Use the channel's file tool to locate the file and obtain a download URL
|
|
43
|
+
- Feishu: use `feishu_drive` with `action: "list"` to find the file, then get its download URL
|
|
44
|
+
- Other cloud channels: use the equivalent tool to get a direct download URL
|
|
45
|
+
2. Pass the URL to knowhere:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
knowhere_ingest_document(url: "https://download-url-from-channel-tool/...")
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### After Ingestion
|
|
52
|
+
|
|
53
|
+
The plugin handles everything automatically:
|
|
54
|
+
|
|
55
|
+
- Uploads/fetches the file for parsing
|
|
56
|
+
- Polls until parsing completes
|
|
57
|
+
- Downloads and extracts the result package
|
|
58
|
+
- Copies parsed data to `~/.knowhere/{kbId}/`
|
|
59
|
+
- Builds/updates `knowledge_graph.json`
|
|
35
60
|
|
|
36
|
-
|
|
37
|
-
2. The plugin handles everything automatically:
|
|
38
|
-
- Uploads the file to Knowhere API for parsing
|
|
39
|
-
- Polls until parsing completes
|
|
40
|
-
- Downloads and extracts the result package
|
|
41
|
-
- **Automatically** copies parsed data to `~/.knowhere/{kbId}/`
|
|
42
|
-
- **Automatically** builds/updates `knowledge_graph.json`
|
|
43
|
-
3. After ingest completes, the new document is immediately searchable via the retrieval workflow below
|
|
61
|
+
After ingest completes, the new document is immediately searchable via the retrieval workflow below.
|
|
44
62
|
|
|
45
63
|
Supported formats: PDF, DOCX, XLSX, PPTX, TXT, MD, images (JPG, PNG)
|
|
46
64
|
|