@ontos-ai/knowhere-claw 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/tools.js +123 -34
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/skills/knowhere/SKILL.md +14 -9
package/README.md
CHANGED
|
@@ -116,7 +116,7 @@ Within each scope, the plugin keeps:
|
|
|
116
116
|
## Common Workflow
|
|
117
117
|
|
|
118
118
|
1. Provide a file path or URL to the agent.
|
|
119
|
-
2. The agent ingests it into Knowhere and
|
|
119
|
+
2. The agent ingests it into Knowhere. By default this starts parsing asynchronously and returns a job ID; when the current turn needs the parsed result immediately, the agent can call `knowhere_ingest_document` with `blockUntilComplete: true`.
|
|
120
120
|
3. Follow-up questions reuse stored results from the current scope.
|
|
121
121
|
4. When needed, the agent can preview structure, search chunks, read raw result
|
|
122
122
|
files, or clear stored documents.
|
package/dist/tools.js
CHANGED
|
@@ -9,6 +9,13 @@ import fs from "node:fs/promises";
|
|
|
9
9
|
import path from "node:path";
|
|
10
10
|
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/core";
|
|
11
11
|
//#region src/tools.ts
|
|
12
|
+
const TERMINAL_JOB_STATUSES = new Set([
|
|
13
|
+
"cancelled",
|
|
14
|
+
"canceled",
|
|
15
|
+
"done",
|
|
16
|
+
"error",
|
|
17
|
+
"failed"
|
|
18
|
+
]);
|
|
12
19
|
async function buildKnowledgeGraphAsync(params) {
|
|
13
20
|
const docDir = path.join(params.scope.documentsDir, params.docId);
|
|
14
21
|
const metadataPath = path.join(docDir, "metadata.json");
|
|
@@ -95,6 +102,67 @@ function buildStoredDocumentSummaryLines(params) {
|
|
|
95
102
|
if (params.includeUpdatedAt) lines.push(`Updated: ${params.document.updatedAt}`);
|
|
96
103
|
return lines;
|
|
97
104
|
}
|
|
105
|
+
function isTerminalJobStatus(status, hasError) {
|
|
106
|
+
return TERMINAL_JOB_STATUSES.has(status.trim().toLowerCase()) || hasError;
|
|
107
|
+
}
|
|
108
|
+
function startKnowledgeGraphBuild(params) {
|
|
109
|
+
if (!params.kgService.isEnabled()) return;
|
|
110
|
+
const kbId = params.kgService.resolveKbId(params.ctx);
|
|
111
|
+
if (!kbId) return;
|
|
112
|
+
params.api.logger.info(`knowhere: initiating knowledge graph build kbId=${kbId} docId=${params.document.id}`);
|
|
113
|
+
buildKnowledgeGraphAsync({
|
|
114
|
+
kgService: params.kgService,
|
|
115
|
+
kbId,
|
|
116
|
+
docId: params.document.id,
|
|
117
|
+
documentPayload: params.ingestResult,
|
|
118
|
+
scope: params.scope,
|
|
119
|
+
ctx: params.ctx,
|
|
120
|
+
api: params.api,
|
|
121
|
+
channelRoute: params.channelRoute,
|
|
122
|
+
sessionKey: params.sessionKey
|
|
123
|
+
}).catch((kgError) => {
|
|
124
|
+
params.api.logger.error(`knowhere: knowledge graph build failed kbId=${kbId} docId=${params.document.id}: ${formatErrorMessage(kgError)}`);
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
async function persistIngestedDocument(params) {
|
|
128
|
+
const storedDocument = await params.store.saveDownloadedDocument(params.scope, {
|
|
129
|
+
sourceType: params.sourceType,
|
|
130
|
+
source: params.source,
|
|
131
|
+
fileName: params.fileName,
|
|
132
|
+
docId: params.docId,
|
|
133
|
+
title: params.title,
|
|
134
|
+
dataId: params.dataId,
|
|
135
|
+
tags: params.tags,
|
|
136
|
+
job: params.ingestResult.job,
|
|
137
|
+
jobResult: params.ingestResult.jobResult,
|
|
138
|
+
downloadedResult: params.ingestResult.downloadedResult
|
|
139
|
+
}, { overwrite: params.overwrite });
|
|
140
|
+
params.api.logger.info(`knowhere: knowhere_ingest_document stored document scope=${params.scope.label} jobId=${params.ingestResult.job.job_id} docId=${storedDocument.id}`);
|
|
141
|
+
startKnowledgeGraphBuild({
|
|
142
|
+
api: params.api,
|
|
143
|
+
channelRoute: params.channelRoute,
|
|
144
|
+
ctx: params.ctx,
|
|
145
|
+
document: storedDocument,
|
|
146
|
+
ingestResult: params.ingestResult,
|
|
147
|
+
kgService: params.kgService,
|
|
148
|
+
scope: params.scope,
|
|
149
|
+
sessionKey: params.sessionKey
|
|
150
|
+
});
|
|
151
|
+
return storedDocument;
|
|
152
|
+
}
|
|
153
|
+
function formatCompletedIngestResult(params) {
|
|
154
|
+
return [
|
|
155
|
+
"Ingest complete.",
|
|
156
|
+
...buildStoredDocumentSummaryLines({
|
|
157
|
+
document: params.document,
|
|
158
|
+
scopeLabel: params.scopeLabel,
|
|
159
|
+
includeJobId: true,
|
|
160
|
+
includeSource: true
|
|
161
|
+
}),
|
|
162
|
+
`Source type: ${params.sourceType}`,
|
|
163
|
+
"Next: use knowhere_preview_document for a structural overview or knowhere_grep to search the parsed content."
|
|
164
|
+
].join("\n");
|
|
165
|
+
}
|
|
98
166
|
function readString(value) {
|
|
99
167
|
return typeof value === "string" && value.trim() ? value.trim() : void 0;
|
|
100
168
|
}
|
|
@@ -531,7 +599,7 @@ function createIngestTool(params) {
|
|
|
531
599
|
return {
|
|
532
600
|
name: "knowhere_ingest_document",
|
|
533
601
|
label: "Knowhere Ingest",
|
|
534
|
-
description: "Parse a local file or remote URL with Knowhere and store the result in the current scope. Before calling this for a document that might already be stored in the current scope, use knowhere_list_documents and reuse the existing stored document when Source, File, or Title clearly match unless the user explicitly asks for a fresh parse or overwrite. When the user provides a URL to a document (PDF link, web page, etc.), pass it as the url parameter — Knowhere fetches it directly, no local download needed.
|
|
602
|
+
description: "Parse a local file or remote URL with Knowhere and store the result in the current scope. Before calling this for a document that might already be stored in the current scope, use knowhere_list_documents and reuse the existing stored document when Source, File, or Title clearly match unless the user explicitly asks for a fresh parse or overwrite. When the user provides a URL to a document (PDF link, web page, etc.), pass it as the url parameter — Knowhere fetches it directly, no local download needed. Knowhere must be the only parser for supported files. If Knowhere returns an error, surface that exact error to the user and do not fall back to other parsing methods or fabricate a preview. By default blockUntilComplete is false, so this tool is fire-and-forget and returns a job ID while parsing continues in the background. Set blockUntilComplete to true only when the current turn explicitly needs the parsed result before continuing. Use lang to control the language of any user-facing background status update (`en` by default, `ch` for Chinese). Provide either filePath or url, not both.",
|
|
535
603
|
parameters: {
|
|
536
604
|
type: "object",
|
|
537
605
|
additionalProperties: false,
|
|
@@ -573,6 +641,10 @@ function createIngestTool(params) {
|
|
|
573
641
|
type: "boolean",
|
|
574
642
|
description: "Replace an existing stored document with the same docId."
|
|
575
643
|
},
|
|
644
|
+
blockUntilComplete: {
|
|
645
|
+
type: "boolean",
|
|
646
|
+
description: "When true, wait for Knowhere to finish parsing, store the result, and return a ready-to-use stored-document summary. Defaults to false, which returns immediately with a job ID and continues parsing in the background."
|
|
647
|
+
},
|
|
576
648
|
lang: {
|
|
577
649
|
type: "string",
|
|
578
650
|
description: "Language for any user-facing background status update sent after parsing completes or fails. Supports en and ch; unsupported values fall back to en."
|
|
@@ -628,13 +700,14 @@ function createIngestTool(params) {
|
|
|
628
700
|
filePath: resolvedFilePath,
|
|
629
701
|
url: urlParam
|
|
630
702
|
});
|
|
703
|
+
const blockUntilComplete = readBoolean(paramsRecord.blockUntilComplete, false);
|
|
631
704
|
const tags = sanitizeStringArray(paramsRecord.tags);
|
|
632
705
|
const overwrite = readBoolean(paramsRecord.overwrite, false);
|
|
633
706
|
const trackerLanguage = readIngestTrackerLanguage(paramsRecord.lang);
|
|
634
707
|
const sessionKey = params.ctx.sessionKey;
|
|
635
708
|
const sourceType = urlParam ? "url" : "file";
|
|
636
709
|
const channelRoute = await params.store.resolveChannelRoute({ sessionKey });
|
|
637
|
-
params.api.logger.info(`knowhere: knowhere_ingest_document starting
|
|
710
|
+
params.api.logger.info(`knowhere: knowhere_ingest_document starting ingest scope=${scope.label} sourceType=${sourceType} label=${JSON.stringify(progressLabel)} mode=${blockUntilComplete ? "blocking" : "background"} overwrite=${overwrite} docId=${docId ?? "auto"} dataId=${dataId ?? "none"} lang=${trackerLanguage} routeState=${channelRoute ? "resolved" : "missing"} routeAccountId=${channelRoute?.accountId ?? "none"}`);
|
|
638
711
|
let resolveJobCreated;
|
|
639
712
|
const jobCreatedPromise = new Promise((resolve) => {
|
|
640
713
|
resolveJobCreated = resolve;
|
|
@@ -653,40 +726,52 @@ function createIngestTool(params) {
|
|
|
653
726
|
resolveJobCreated(job);
|
|
654
727
|
}
|
|
655
728
|
});
|
|
729
|
+
if (blockUntilComplete) {
|
|
730
|
+
const ingestResult = await ingestPromise.catch(rethrowWithPaymentHint);
|
|
731
|
+
params.api.logger.info(`knowhere: knowhere_ingest_document download completed scope=${scope.label} jobId=${ingestResult.job.job_id}; storing extracted result`);
|
|
732
|
+
return textResult(formatCompletedIngestResult({
|
|
733
|
+
document: await persistIngestedDocument({
|
|
734
|
+
api: params.api,
|
|
735
|
+
channelRoute,
|
|
736
|
+
ctx: params.ctx,
|
|
737
|
+
dataId,
|
|
738
|
+
docId,
|
|
739
|
+
fileName,
|
|
740
|
+
ingestResult,
|
|
741
|
+
kgService: params.kgService,
|
|
742
|
+
overwrite,
|
|
743
|
+
scope,
|
|
744
|
+
sessionKey,
|
|
745
|
+
source: urlParam || resolvedFilePath || "",
|
|
746
|
+
sourceType,
|
|
747
|
+
store: params.store,
|
|
748
|
+
tags,
|
|
749
|
+
title
|
|
750
|
+
}),
|
|
751
|
+
scopeLabel: scope.label,
|
|
752
|
+
sourceType
|
|
753
|
+
}));
|
|
754
|
+
}
|
|
656
755
|
ingestPromise.then(async (ingestResult) => {
|
|
657
756
|
params.api.logger.info(`knowhere: knowhere_ingest_document download completed scope=${scope.label} jobId=${ingestResult.job.job_id}; storing extracted result`);
|
|
658
|
-
const storedDocument = await
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
docId,
|
|
663
|
-
title,
|
|
757
|
+
const storedDocument = await persistIngestedDocument({
|
|
758
|
+
api: params.api,
|
|
759
|
+
channelRoute,
|
|
760
|
+
ctx: params.ctx,
|
|
664
761
|
dataId,
|
|
762
|
+
docId,
|
|
763
|
+
fileName,
|
|
764
|
+
ingestResult,
|
|
765
|
+
kgService: params.kgService,
|
|
766
|
+
overwrite,
|
|
767
|
+
scope,
|
|
768
|
+
sessionKey,
|
|
769
|
+
source: urlParam || resolvedFilePath || "",
|
|
770
|
+
sourceType,
|
|
771
|
+
store: params.store,
|
|
665
772
|
tags,
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
downloadedResult: ingestResult.downloadedResult
|
|
669
|
-
}, { overwrite });
|
|
670
|
-
params.api.logger.info(`knowhere: knowhere_ingest_document stored document scope=${scope.label} jobId=${ingestResult.job.job_id} docId=${storedDocument.id} label=${JSON.stringify(progressLabel)}`);
|
|
671
|
-
if (params.kgService.isEnabled()) {
|
|
672
|
-
const kbId = params.kgService.resolveKbId(params.ctx);
|
|
673
|
-
if (kbId) {
|
|
674
|
-
params.api.logger.info(`knowhere: initiating knowledge graph build kbId=${kbId} docId=${storedDocument.id}`);
|
|
675
|
-
buildKnowledgeGraphAsync({
|
|
676
|
-
kgService: params.kgService,
|
|
677
|
-
kbId,
|
|
678
|
-
docId: storedDocument.id,
|
|
679
|
-
documentPayload: ingestResult,
|
|
680
|
-
scope,
|
|
681
|
-
ctx: params.ctx,
|
|
682
|
-
api: params.api,
|
|
683
|
-
channelRoute,
|
|
684
|
-
sessionKey
|
|
685
|
-
}).catch((kgError) => {
|
|
686
|
-
params.api.logger.error(`knowhere: knowledge graph build failed kbId=${kbId} docId=${storedDocument.id}: ${formatErrorMessage(kgError)}`);
|
|
687
|
-
});
|
|
688
|
-
}
|
|
689
|
-
}
|
|
773
|
+
title
|
|
774
|
+
});
|
|
690
775
|
await notifyBackgroundIngestOutcome({
|
|
691
776
|
api: params.api,
|
|
692
777
|
context: params.ctx,
|
|
@@ -737,7 +822,7 @@ function createIngestTool(params) {
|
|
|
737
822
|
`Job ID: ${createdJob.job_id}`,
|
|
738
823
|
`File: ${progressLabel}`,
|
|
739
824
|
`Scope: ${scope.label}`,
|
|
740
|
-
"
|
|
825
|
+
"This call does not include parsed content yet."
|
|
741
826
|
].join("\n"));
|
|
742
827
|
}
|
|
743
828
|
};
|
|
@@ -746,7 +831,7 @@ function createJobStatusTool(params) {
|
|
|
746
831
|
return {
|
|
747
832
|
name: "knowhere_get_job_status",
|
|
748
833
|
label: "Knowhere Job Status",
|
|
749
|
-
description: "Check the status of a Knowhere parsing job by job ID. Returns job status, progress, duration, credits spent, and whether the result is already stored locally. Use this to monitor a running job or inspect a past job before importing it with knowhere_import_completed_job.",
|
|
834
|
+
description: "Check the status of a Knowhere parsing job by job ID. Returns job status, progress, duration, credits spent, and whether the result is already stored locally. Use this to monitor a running job or inspect a past job before importing it with knowhere_import_completed_job. Do not assume a running job is stuck just because progress is unchanged or slow. Only treat the job as failed or stuck when Knowhere returns an explicit failure status or error code.",
|
|
750
835
|
parameters: {
|
|
751
836
|
type: "object",
|
|
752
837
|
additionalProperties: false,
|
|
@@ -798,6 +883,10 @@ function createJobStatusTool(params) {
|
|
|
798
883
|
lines.push(`Result URL: ${job.result_url}`);
|
|
799
884
|
if (job.result_url_expires_at) lines.push(`Result URL expires: ${job.result_url_expires_at}`);
|
|
800
885
|
}
|
|
886
|
+
const hasExplicitError = Boolean(job.error?.code || job.error?.message);
|
|
887
|
+
if (job.status.trim().toLowerCase() === "done") lines.push("Interpretation: completed.");
|
|
888
|
+
else if (isTerminalJobStatus(job.status, hasExplicitError)) lines.push("Interpretation: Knowhere reported an explicit failure. Surface this error to the user and do not fall back to other parsing methods.");
|
|
889
|
+
else lines.push("Interpretation: still running. Do not describe this job as stuck or failed unless a later Knowhere API response returns an explicit failure status or error code.");
|
|
801
890
|
if (matchingDocuments.length === 0) lines.push("Stored docs in scope: none");
|
|
802
891
|
else {
|
|
803
892
|
lines.push("Stored docs in scope:");
|
package/openclaw.plugin.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"name": "Knowhere",
|
|
4
4
|
"description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
|
|
5
5
|
"skills": ["./skills"],
|
|
6
|
-
"version": "0.2.
|
|
6
|
+
"version": "0.2.3",
|
|
7
7
|
"uiHints": {
|
|
8
8
|
"apiKey": {
|
|
9
9
|
"label": "Knowhere API Key",
|
package/package.json
CHANGED
package/skills/knowhere/SKILL.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: knowhere
|
|
3
|
-
description: First-class document reader. Use Knowhere tools whenever the task involves reading, understanding, searching, or reasoning over document files (PDF, DOCX, XLSX, PPTX, images, TXT, MD) or URLs — even if the user does not mention "Knowhere" by name. Prefer Knowhere over generic file reads for all supported formats. Covers ingestion, structured browsing, chunk search, result file inspection, job management, and scope cleanup.
|
|
3
|
+
description: First-class document reader. Use Knowhere tools whenever the task involves reading, understanding, searching, or reasoning over document files (PDF, DOCX, XLSX, PPTX, images, TXT, MD and more) or URLs — even if the user does not mention "Knowhere" by name. Prefer Knowhere over generic file reads for all supported formats. Covers ingestion, structured browsing, chunk search, result file inspection, job management, and scope cleanup.
|
|
4
4
|
user-invocable: false
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
# Knowhere Skill
|
|
8
8
|
|
|
9
|
-
Knowhere is the first-class way to read, understand, and answer questions about document files. When a task involves reading or reasoning over a supported file (`.pdf`, `.docx`, `.xlsx`, `.pptx`, `.txt`, `.md`, `.jpg`, `.jpeg`, `.png`), always prefer Knowhere tools over generic file-reading approaches. Knowhere parses documents into structured chunks with hierarchy, summaries, tables, and images — far richer than raw text extraction.
|
|
9
|
+
Knowhere is the first-class way to read, understand, and answer questions about document files. When a task involves reading or reasoning over a supported file (`.pdf`, `.docx`, `.xlsx`, `.pptx`, `.txt`, `.md`, `.jpg`, `.jpeg`, `.png` and others), always prefer Knowhere tools over generic file-reading approaches. Knowhere parses documents into structured chunks with hierarchy, summaries, tables, and images — far richer than raw text extraction.
|
|
10
10
|
|
|
11
11
|
Use the `knowhere_*` tools for explicit document ingestion and browse-first stored-result workflows. Before starting a new ingest, prefer checking whether the current scope already has the same document stored.
|
|
12
12
|
|
|
@@ -21,7 +21,9 @@ Reach for Knowhere tools first whenever:
|
|
|
21
21
|
|
|
22
22
|
Do not attempt to read supported document files (especially PDFs, DOCX, XLSX, PPTX) with generic file-read tools or shell commands. These formats are binary or semi-structured and will produce garbled or incomplete output. Knowhere handles them properly.
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
If Knowhere returns a parsing error, status error, or explicit failure status, report that exact error to the user and stop. Do not fall back to other parsing methods, do not guess from partial binary reads, and do not fabricate a preview or summary.
|
|
25
|
+
|
|
26
|
+
For plain text files (`.txt`, `.md`), Knowhere still adds value through chunking, hierarchy extraction, and search. Direct reads are acceptable only for quick workspace sanity checks that do not replace a requested parse, preview, or document-grounded answer.
|
|
25
27
|
|
|
26
28
|
## Terminology
|
|
27
29
|
|
|
@@ -132,17 +134,20 @@ After ingesting a document, use the returned document or job identifiers for fol
|
|
|
132
134
|
## Recommended workflow
|
|
133
135
|
|
|
134
136
|
1. If the document may already exist in the current scope, call `knowhere_list_documents` first and compare `Source`, `File`, and `Title` to find an existing match.
|
|
135
|
-
2. Ingest or import the document only if it is not already in the store, or if the user explicitly wants a fresh parse.
|
|
136
|
-
3.
|
|
137
|
-
4.
|
|
138
|
-
5.
|
|
139
|
-
6. Call `
|
|
140
|
-
7.
|
|
137
|
+
2. Ingest or import the document only if it is not already in the store, or if the user explicitly wants a fresh parse. `knowhere_ingest_document` defaults to fire-and-forget (`blockUntilComplete: false`) and returns a job ID immediately while parsing continues in the background.
|
|
138
|
+
3. Set `blockUntilComplete: true` on `knowhere_ingest_document` when the current turn explicitly needs the parsed result before continuing, such as "wait until it is parsed" or "show me a preview now".
|
|
139
|
+
4. If a job was already started asynchronously and the current turn now depends on the parsed result, use `knowhere_get_job_status` until Knowhere reports `done` or an explicit failure. Do not infer "stuck" from unchanged progress alone.
|
|
140
|
+
5. Call `knowhere_list_documents` again if you need to confirm the right `docId`.
|
|
141
|
+
6. Call `knowhere_preview_document` to get a structural overview (table of contents with summaries).
|
|
142
|
+
7. When you know what to search for, call `knowhere_grep` with `conditions: [{ pattern: "your query" }]` — this searches all text fields (content, summary, keywords, path) in one call. Add more conditions to narrow results (e.g. filter by `chunk.type` or `chunk.path`).
|
|
143
|
+
8. Call `knowhere_grep` with a path condition to narrow results to a specific branch when browsing by structure.
|
|
144
|
+
9. Call `knowhere_read_result_file` for `hierarchy.json`, `kb.csv`, table HTML, or image assets when the answer depends on parser rows, rich table structure, or visual content.
|
|
141
145
|
|
|
142
146
|
## Reasoning rules
|
|
143
147
|
|
|
144
148
|
- Prefer `knowhere_grep` for all text search. It supports composable AND conditions, regex, and normalizes HTML/LaTeX/unicode before matching. Use `knowhere_preview_document` when you need a quick overview and structural browsing by path.
|
|
145
149
|
- Use `knowhere_preview_document` before broad reads when the document is large or the relevant branch is unclear.
|
|
150
|
+
- Use Knowhere as the only parser for document read. If Knowhere fails, surface the real error to the user instead of switching to another parsing approach.
|
|
146
151
|
- Keep `path` in your reasoning and in your answer when possible. It restores section position and improves grounding.
|
|
147
152
|
- Use `chunkId` and `path` internally for your own reasoning and tool calls, but do not expose them to the user. When citing sources, use human-readable section names derived from the path (e.g., "第7章 维护、保养" instead of `Default_Root/f339a970...-->7 维护、保养`). Never show raw `docId`, `chunkId`, or internal file paths in user-facing replies.
|
|
148
153
|
- For image or table questions, inspect matching `image` or `table` chunks and the related manifest asset entries before answering. Use `knowhere_read_result_file` with the chunk's `assetFilePath` to prepare image assets for delivery, then use the returned `message` tool handoff when the user wants to see the image. Do not call `read` on the staged image path because it may live outside the agent sandbox.
|