@ontos-ai/knowhere-claw 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +163 -0
  2. package/dist/_virtual/_rolldown/runtime.js +37 -0
  3. package/dist/client.d.ts +33 -0
  4. package/dist/client.js +395 -0
  5. package/dist/config.d.ts +6 -0
  6. package/dist/config.js +132 -0
  7. package/dist/error-message.d.ts +1 -0
  8. package/dist/error-message.js +48 -0
  9. package/dist/hooks.d.ts +8 -0
  10. package/dist/hooks.js +415 -0
  11. package/dist/index.d.ts +9 -0
  12. package/dist/index.js +43 -0
  13. package/dist/node_modules/.pnpm/@knowhere-ai_sdk@0.1.1/node_modules/@knowhere-ai/sdk/dist/index.js +717 -0
  14. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/adapters.js +83 -0
  15. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/fetch.js +170 -0
  16. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/xhr.js +106 -0
  17. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/axios.js +57 -0
  18. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/CancelToken.js +90 -0
  19. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/CanceledError.js +20 -0
  20. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/isCancel.js +6 -0
  21. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/Axios.js +174 -0
  22. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/AxiosError.js +70 -0
  23. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/AxiosHeaders.js +204 -0
  24. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/InterceptorManager.js +60 -0
  25. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/buildFullPath.js +20 -0
  26. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/dispatchRequest.js +52 -0
  27. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/mergeConfig.js +81 -0
  28. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/settle.js +18 -0
  29. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/transformData.js +25 -0
  30. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/defaults/index.js +107 -0
  31. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/defaults/transitional.js +9 -0
  32. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/env/data.js +4 -0
  33. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/AxiosURLSearchParams.js +50 -0
  34. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/HttpStatusCode.js +77 -0
  35. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/bind.js +15 -0
  36. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/buildURL.js +40 -0
  37. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/combineURLs.js +14 -0
  38. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/composeSignals.js +39 -0
  39. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/cookies.js +31 -0
  40. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/formDataToJSON.js +67 -0
  41. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isAbsoluteURL.js +14 -0
  42. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isAxiosError.js +14 -0
  43. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isURLSameOrigin.js +8 -0
  44. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/parseHeaders.js +53 -0
  45. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/parseProtocol.js +7 -0
  46. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/progressEventReducer.js +38 -0
  47. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/resolveConfig.js +36 -0
  48. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/speedometer.js +36 -0
  49. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/spread.js +29 -0
  50. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/throttle.js +38 -0
  51. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/toFormData.js +151 -0
  52. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/toURLEncodedForm.js +18 -0
  53. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/trackStream.js +69 -0
  54. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/validator.js +76 -0
  55. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/Blob.js +4 -0
  56. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/FormData.js +4 -0
  57. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/URLSearchParams.js +5 -0
  58. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/index.js +22 -0
  59. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/common/utils.js +46 -0
  60. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/index.js +9 -0
  61. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/utils.js +698 -0
  62. package/dist/node_modules/.pnpm/fflate@0.8.2/node_modules/fflate/esm/browser.js +426 -0
  63. package/dist/node_modules/.pnpm/jszip@3.10.1/node_modules/jszip/dist/jszip.min.js +3110 -0
  64. package/dist/parser.d.ts +16 -0
  65. package/dist/parser.js +323 -0
  66. package/dist/session.d.ts +11 -0
  67. package/dist/session.js +78 -0
  68. package/dist/store.d.ts +62 -0
  69. package/dist/store.js +482 -0
  70. package/dist/text.d.ts +10 -0
  71. package/dist/text.js +34 -0
  72. package/dist/tools.d.ts +9 -0
  73. package/dist/tools.js +1177 -0
  74. package/dist/tracker-progress.d.ts +8 -0
  75. package/dist/tracker-progress.js +197 -0
  76. package/dist/types.d.ts +247 -0
  77. package/dist/types.js +9 -0
  78. package/openclaw.plugin.json +107 -0
  79. package/package.json +61 -0
  80. package/skills/knowhere/SKILL.md +243 -0
@@ -0,0 +1,243 @@
1
+ ---
2
+ name: knowhere
3
+ description: Use Knowhere plugin tools to ingest local files or URLs, browse stored document structure with path and chunk tools, inspect raw result files, inspect jobs, and manage the current Knowhere scope. Use this whenever the user asks to parse, inspect, summarize, quote, import, remove, or clear Knowhere-managed documents or attachments.
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Knowhere Skill
8
+
9
+ Use the `knowhere_*` tools for explicit document ingestion and browse-first stored-result workflows.
10
+
11
+ ## Terminology
12
+
13
+ Use these terms consistently:
14
+
15
+ - `stored document`: one locally stored document identified by `docId`
16
+ - `scope`: the current local storage scope
17
+ - `path`: a structural path derived from Knowhere chunk paths
18
+ - `chunk`: one stored parsed unit identified by `chunkId`
19
+ - `asset path`: the relative path to a stored image or table asset under `result/`
20
+
21
+ ## Response Style
22
+
23
+ Keep tool-driven replies short and labeled.
24
+
25
+ - Reuse labels such as `Scope`, `Source`, `File`, `Chunks`, `Job ID`, and `Next` when relaying tool results.
26
+ - Prefer one short status line plus the key fields the user needs for the next step.
27
+
28
+ ## When to use Knowhere
29
+
30
+ Use Knowhere when the user wants to:
31
+
32
+ - ingest a local file or URL into the current scope
33
+ - inspect, summarize, or quote previously ingested documents
34
+ - inspect ingest jobs or import a completed Knowhere job
35
+ - preview, list, remove, or clear stored documents
36
+ - understand what fields exist inside the stored result package
37
+
38
+ Do not assume an uploaded attachment was already ingested. If the user asks you to use an attached file and no existing Knowhere result already covers it, call `knowhere_ingest_document` yourself.
39
+
40
+ ## Attachment markers
41
+
42
+ When a prompt contains a marker like:
43
+
44
+ ```text
45
+ [media attached: /absolute/path/to/file.pdf (application/pdf) | handbook.pdf]
46
+ ```
47
+
48
+ Use:
49
+
50
+ - the exact absolute path as `filePath`
51
+ - the visible filename as `fileName`
52
+
53
+ This preserves the original filename when the local path is a temporary UUID-based attachment path.
54
+
55
+ ## Stored result model
56
+
57
+ The canonical Knowhere delivery format behind `result_url` is a ZIP package.
58
+ This plugin stores that package in extracted form on disk: lightweight local
59
+ metadata in `metadata.json`, a plugin-local browse index in `browse-index.json`,
60
+ and the unzipped Knowhere result files under `result/`.
61
+
62
+ The important package files are:
63
+
64
+ - `metadata.json`: plugin-local sidecar. It maps the local `docId`, source labels, tags, timestamps, and job metadata to the extracted Knowhere package under `result/`.
65
+ - `browse-index.json`: plugin-local browse helper. It tracks structural paths, chunk order, and result-file inventory for fast path and file browsing.
66
+ - `manifest.json`: package inventory. Read this first. It tells you what optional files and assets exist and carries `job_id`, `data_id`, `source_file_name`, checksum, and statistics.
67
+ - `chunks.json`: primary retrieval dataset. It contains stable chunk IDs, chunk types, `path`, `content`, summaries, and metadata.
68
+ - `hierarchy.json`: document tree rebuilt from parser paths. Use it when structural context matters and the raw path tree is not enough.
69
+ - `full.md`: convenience whole-document markdown view. Good for broad skimming, but do not treat it as the only grounding source.
70
+ - `images/` and `tables/`: raw extracted assets referenced by chunk metadata and manifest entries. The plugin stores these untouched.
71
+ - `kb.csv`: raw parser export when present. Use it when you need parser rows, joins, or custom downstream transforms.
72
+
73
+ Each stored chunk exposes these core fields through the chunk tools:
74
+
75
+ - `chunkId`: stable chunk identifier
76
+ - `type`: `text`, `image`, or `table`
77
+ - `path`: best-effort hierarchy/path label
78
+ - `summary`: chunk summary
79
+ - `content`: original stored text payload
80
+ - `contentLength`: character count of the content field (available in summary mode to help budget reads)
81
+ - `tokens`: parser token hint when present
82
+ - `keywords`: search hints from the parser
83
+ - `relationships`: best-effort related chunk metadata
84
+ - `assetFilePath`: relative path to the stored asset for image/table chunks
85
+
86
+ Useful joins:
87
+
88
+ - `manifest.files.images[].id == chunks[].chunkId` for image chunks
89
+ - `manifest.files.tables[].id == chunks[].chunkId` for table chunks
90
+ - `chunks[].assetFilePath` points to the stored asset path under `result/`
91
+
92
+ ## String truncation
93
+
94
+ Chunk and file tools truncate string fields to `maxStringChars` (default 4000).
95
+ When `truncatedStrings: true` appears, retry with a higher value (e.g. 12000, up to 20000) to get full content.
96
+
97
+ ## Tool selection
98
+
99
+ - `knowhere_ingest_document` for new local files or URLs
100
+ - `knowhere_list_documents` to find candidate document IDs in the current scope
101
+ - `knowhere_read_result_file` for `manifest.json`, `hierarchy.json`, `kb.csv`, table HTML files, or other text-like files under `result/`
102
+ - `knowhere_preview_document` for a quick overview: markdown preview plus the structural path tree (like a book index)
103
+ - `knowhere_grep` to search chunks with composable AND conditions across fields. This is the recommended default for text search — just pass `conditions: [{ pattern: "your query" }]` with no target to search all text fields at once. Use targeted conditions only when you need to narrow by specific fields like `chunk.type` or `chunk.path`.
104
+ - `knowhere_list_jobs`, `knowhere_get_job_status`, and `knowhere_import_completed_job` for async Knowhere jobs
105
+ - `knowhere_remove_document` and `knowhere_clear_scope` for cleanup
106
+
107
+ After ingesting a document, use the returned document or job identifiers for follow-up operations instead of guessing names.
108
+
109
+ ## Recommended workflow
110
+
111
+ 1. Ingest or import the document if it is not already in the store.
112
+ 2. Call `knowhere_list_documents` if you need to confirm the right `docId`.
113
+ 3. Call `knowhere_preview_document` to get a structural overview (table of contents with summaries).
114
+ 4. When you know what to search for, call `knowhere_grep` with `conditions: [{ pattern: "your query" }]` — this searches all text fields (content, summary, keywords, path) in one call. Add more conditions to narrow results (e.g. filter by `chunk.type` or `chunk.path`).
115
+ 5. Call `knowhere_grep` with a path condition to narrow results to a specific branch when browsing by structure.
116
+ 6. Call `knowhere_read_result_file` for `hierarchy.json`, `kb.csv`, or table HTML when the answer depends on parser rows or rich table structure.
117
+
118
+ ## Reasoning rules
119
+
120
+ - Prefer `knowhere_grep` for all text search. It supports composable AND conditions, regex, and normalizes HTML/LaTeX/unicode before matching. Use `knowhere_preview_document` when you need a quick overview and structural browsing by path.
121
+ - Use `knowhere_preview_document` before broad reads when the document is large or the relevant branch is unclear.
122
+ - Keep `path` in your reasoning and in your answer when possible. It restores section position and improves grounding.
123
+ - Cite `chunkId` and `path` when answering from retrieved chunks.
124
+ - For image or table questions, inspect matching `image` or `table` chunks and the related manifest asset entries before answering.
125
+ - Do not rely on `full.md` alone if the question depends on exact section boundaries, tables, or images.
126
+ - If the task needs raw `kb.csv`, raw HTML tables, or another stored text file under `result/`, read it directly with `knowhere_read_result_file`.
127
+ - When a tool response contains `truncatedStrings: true`, retry with `maxStringChars: 12000` (or up to 20000) before answering from incomplete content.
128
+
129
+ ## Tool usage examples
130
+
131
+ Use a real stored `docId` returned by `knowhere_ingest_document`, `knowhere_import_completed_job`, or `knowhere_list_documents`.
132
+
133
+ Ingest a local file:
134
+
135
+ ```json
136
+ {
137
+ "filePath": "/tmp/uploads/handbook.pdf",
138
+ "fileName": "handbook.pdf"
139
+ }
140
+ ```
141
+
142
+ Ingest a URL:
143
+
144
+ ```json
145
+ {
146
+ "url": "https://example.com/report-2026.pdf",
147
+ "title": "Q1 Report"
148
+ }
149
+ ```
150
+
151
+ Check job status:
152
+
153
+ ```json
154
+ {
155
+ "jobId": "job_f6f12930906a"
156
+ }
157
+ ```
158
+
159
+ Import a completed job:
160
+
161
+ ```json
162
+ {
163
+ "jobId": "job_f6f12930906a"
164
+ }
165
+ ```
166
+
167
+ Read manifest JSON:
168
+
169
+ ```json
170
+ {
171
+ "docId": "handbook-1234",
172
+ "filePath": "manifest.json",
173
+ "mode": "json"
174
+ }
175
+ ```
176
+
177
+ Preview a document:
178
+
179
+ ```json
180
+ {
181
+ "docId": "handbook-1234"
182
+ }
183
+ ```
184
+
185
+ Grep for text across all chunk fields (recommended default):
186
+
187
+ ```json
188
+ {
189
+ "docId": "paper-pdf-a370ef58",
190
+ "conditions": [{ "pattern": "npm audit" }]
191
+ }
192
+ ```
193
+
194
+ Grep with multiple AND conditions (e.g. table chunks mentioning "precision"):
195
+
196
+ ```json
197
+ {
198
+ "docId": "paper-pdf-a370ef58",
199
+ "conditions": [{ "target": "chunk.type", "pattern": "table" }, { "pattern": "precision" }]
200
+ }
201
+ ```
202
+
203
+ Grep with path filter and sibling context:
204
+
205
+ ```json
206
+ {
207
+ "docId": "paper-pdf-a370ef58",
208
+ "conditions": [{ "target": "chunk.path", "pattern": "EVALUATION" }],
209
+ "includeContext": true
210
+ }
211
+ ```
212
+
213
+ Grep with regex:
214
+
215
+ ```json
216
+ {
217
+ "docId": "paper-pdf-a370ef58",
218
+ "conditions": [{ "pattern": "\\bF1[- ]score\\b", "regex": true }]
219
+ }
220
+ ```
221
+
222
+ Read a table HTML file:
223
+
224
+ ```json
225
+ {
226
+ "docId": "handbook-1234",
227
+ "filePath": "tables/table-1.html",
228
+ "mode": "text"
229
+ }
230
+ ```
231
+
232
+ Example workflow for a question like `What does the handbook say about hotel limits?`:
233
+
234
+ 1. Call `knowhere_list_documents` to find the right `docId` if you do not already have it.
235
+ 2. Call `knowhere_read_result_file` with `filePath: "manifest.json"` to confirm the package layout.
236
+ 3. Call `knowhere_preview_document` with `mode: "paths"` and look for a relevant branch such as `Travel` or `Expenses`.
237
+ 4. Call `knowhere_grep` with `conditions: [{ "pattern": "hotel" }]` to find matching chunks, or add a path condition to narrow to the branch.
238
+ 5. If the answer depends on a table, read the linked `assetFilePath` with `knowhere_read_result_file`.
239
+ 6. Answer from the matched chunks and cite `chunkId` and `path`.
240
+
241
+ Do not rely on prompt-loaded excerpts as the main workflow. Prefer reading the stored result directly from disk.
242
+
243
+ If Knowhere returns an API error, quote the error directly in your reply.