@ontos-ai/knowhere-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Knowhere Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,202 @@
1
+ # Knowhere MCP
2
+
3
+ `@ontos-ai/knowhere-mcp` is a thin Model Context Protocol wrapper around
4
+ `@ontos-ai/knowhere-sdk`.
5
+
6
+ The SDK owns the parse, cache, outline, read, grep, and search behavior. This
7
+ package only exposes that SDK interface as MCP tools.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ npm install @ontos-ai/knowhere-mcp
13
+ ```
14
+
15
+ ## Run
16
+
17
+ ```bash
18
+ npx -y @ontos-ai/knowhere-mcp login
19
+ npx -y @ontos-ai/knowhere-mcp
20
+ ```
21
+
22
+ The server uses stdio transport and stores expanded Knowhere result files under
23
+ the SDK local knowledge cache by default. `knowhere-mcp login` opens the
24
+ Knowhere dashboard in your browser and stores a local MCP login at
25
+ `~/.knowhere-node-sdk/mcp/auth.json`.
26
+
27
+ During login, the dashboard asks for a Permission:
28
+
29
+ - Read only: query Knowhere and read existing parsed documents. Parse and
30
+ delete tools are not exposed to the MCP host.
31
+ - Full access: query, read, parse URLs/files, cache completed parse jobs, and
32
+ archive documents.
33
+
34
+ Useful auth commands:
35
+
36
+ ```bash
37
+ npx -y @ontos-ai/knowhere-mcp login
38
+ npx -y @ontos-ai/knowhere-mcp status
39
+ npx -y @ontos-ai/knowhere-mcp logout
40
+ ```
41
+
42
+ `knowhere-mcp status` shows the stored Permission for the current login.
43
+
44
+ Set `KNOWHERE_DASHBOARD_URL` when logging in through a non-default dashboard.
45
+ Set `KNOWHERE_BASE_URL` only when using a non-default Knowhere API endpoint.
46
+ `KNOWHERE_API_KEY` is still supported as a manual fallback and takes precedence
47
+ over the local dashboard login. API-key authentication runs with full access.
48
+
49
+ ## Connect From MCP Hosts
50
+
51
+ The package is a local stdio MCP server. Use `npx -y @ontos-ai/knowhere-mcp`
52
+ as the server command in hosts that manage MCP processes for you. Run the login
53
+ command once before connecting a host:
54
+
55
+ ```bash
56
+ npx -y @ontos-ai/knowhere-mcp login
57
+ ```
58
+
59
+ The host config does not need `KNOWHERE_API_KEY` when dashboard login is used.
60
+ Do not commit real API keys to shared project config files if you choose the
61
+ manual API-key fallback.
62
+
63
+ ### Codex
64
+
65
+ Codex stores MCP servers in `~/.codex/config.toml` by default. Trusted projects
66
+ can also use project-scoped `.codex/config.toml`.
67
+
68
+ Add the server with the Codex CLI:
69
+
70
+ ```bash
71
+ codex mcp add knowhere -- npx -y @ontos-ai/knowhere-mcp
72
+ ```
73
+
74
+ Or edit `config.toml` directly:
75
+
76
+ ```toml
77
+ [mcp_servers.knowhere]
78
+ command = "npx"
79
+ args = ["-y", "@ontos-ai/knowhere-mcp"]
80
+ startup_timeout_sec = 20
81
+ tool_timeout_sec = 120
82
+ ```
83
+
84
+ For project-scoped config with a non-default API endpoint, forward only the
85
+ endpoint variable:
86
+
87
+ ```toml
88
+ [mcp_servers.knowhere]
89
+ command = "npx"
90
+ args = ["-y", "@ontos-ai/knowhere-mcp"]
91
+ env_vars = ["KNOWHERE_BASE_URL"]
92
+ ```
93
+
94
+ Restart Codex or run `/mcp` in the Codex TUI to inspect connected MCP servers.
95
+
96
+ ### Claude Code
97
+
98
+ Add the server with the Claude Code CLI:
99
+
100
+ ```bash
101
+ claude mcp add \
102
+ --transport stdio \
103
+ knowhere \
104
+ -- npx -y @ontos-ai/knowhere-mcp
105
+ ```
106
+
107
+ Use `/mcp` inside Claude Code to verify the server and `claude mcp list` to see
108
+ configured servers.
109
+
110
+ For a project-shared `.mcp.json`, forward environment variables rather than
111
+ committing secrets:
112
+
113
+ ```json
114
+ {
115
+ "mcpServers": {
116
+ "knowhere": {
117
+ "type": "stdio",
118
+ "command": "npx",
119
+ "args": ["-y", "@ontos-ai/knowhere-mcp"]
120
+ }
121
+ }
122
+ }
123
+ ```
124
+
125
+ ### Claude Desktop
126
+
127
+ Open Claude Desktop settings, go to the developer settings, and edit
128
+ `claude_desktop_config.json`.
129
+
130
+ Config file locations:
131
+
132
+ - macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
133
+ - Windows: `%APPDATA%\Claude\claude_desktop_config.json`
134
+
135
+ Add the Knowhere server:
136
+
137
+ ```json
138
+ {
139
+ "mcpServers": {
140
+ "knowhere": {
141
+ "type": "stdio",
142
+ "command": "npx",
143
+ "args": ["-y", "@ontos-ai/knowhere-mcp"]
144
+ }
145
+ }
146
+ }
147
+ ```
148
+
149
+ Save the file and fully restart Claude Desktop. If the server does not appear,
150
+ check Claude's MCP logs and verify that `node`, `npm`, and `npx` are available
151
+ from the desktop app's environment.
152
+
153
+ ### Other Stdio MCP Hosts
154
+
155
+ Use the same process configuration:
156
+
157
+ ```json
158
+ {
159
+ "command": "npx",
160
+ "args": ["-y", "@ontos-ai/knowhere-mcp"]
161
+ }
162
+ ```
163
+
164
+ Host documentation:
165
+
166
+ - [Codex MCP configuration](https://developers.openai.com/codex/mcp)
167
+ - [Claude Code MCP configuration](https://docs.anthropic.com/en/docs/claude-code/mcp)
168
+ - [Claude Desktop local MCP servers](https://modelcontextprotocol.io/docs/develop/connect-local-servers)
169
+
170
+ ## Tools
171
+
172
+ When logged in with Read only permission, the MCP server exposes only
173
+ `knowhere_search`, `knowhere_list_documents`,
174
+ `knowhere_get_document_outline`, `knowhere_read_chunks`,
175
+ `knowhere_grep_chunks`, `knowhere_async_get_job_status`, and
176
+ `knowhere_async_cache_job_result`.
177
+
178
+ - `knowhere_parse_url`: blocking parse for a remote URL; waits for completion
179
+ and caches the result locally.
180
+ - `knowhere_parse_file`: blocking parse for a file path available to the MCP
181
+ process; waits for completion and caches the result locally.
182
+ - `knowhere_async_parse_url`: start parsing a remote URL and return the job
183
+ immediately.
184
+ - `knowhere_async_parse_file`: start parsing a local file path, upload it if
185
+ needed, and return the job immediately.
186
+ - `knowhere_async_get_job_status`: check a parse job status; completed jobs
187
+ started by async parse tools are cached locally automatically.
188
+ - `knowhere_async_cache_job_result`: manually cache a completed parse job result
189
+ locally, mainly for recovery or jobs started outside the async parse tools.
190
+ - `knowhere_list_documents`: list locally cached parse results.
191
+ - `knowhere_delete_document`: archive, or soft-delete, a published Knowhere
192
+ document through the Knowhere API.
193
+ - `knowhere_get_document_outline`: inspect a cached document outline.
194
+ - `knowhere_read_chunks`: read exact chunks from a cached result.
195
+ - `knowhere_grep_chunks`: run local literal or regex grep over cached chunks.
196
+ - `knowhere_search`: search published documents through the Knowhere API
197
+ retrieval query.
198
+
199
+ ## Package Boundary
200
+
201
+ Use `@ontos-ai/knowhere-sdk` directly when building an app. Install this MCP
202
+ package when an agent host needs a local MCP server.
@@ -0,0 +1,319 @@
1
+ // src/index.ts
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import {
5
+ Knowhere,
6
+ VERSION,
7
+ ValidationError
8
+ } from "@ontos-ai/knowhere-sdk";
9
+ import * as z from "zod/v4";
10
+ var parsingParamsSchema = z.object({
11
+ model: z.enum(["base", "advanced"]).optional(),
12
+ ocrEnabled: z.boolean().optional(),
13
+ kbDir: z.string().optional(),
14
+ docType: z.enum(["auto", "pdf", "docx", "txt", "md"]).optional(),
15
+ smartTitleParse: z.boolean().optional(),
16
+ summaryImage: z.boolean().optional(),
17
+ summaryTable: z.boolean().optional(),
18
+ summaryTxt: z.boolean().optional(),
19
+ addFragDesc: z.string().optional()
20
+ }).optional();
21
+ var objectOutputSchema = {
22
+ result: z.record(z.string(), z.unknown())
23
+ };
24
+ async function createKnowhereMcpServer(options) {
25
+ const client = options?.client ?? new Knowhere({
26
+ authTokenProvider: options?.authTokenProvider,
27
+ baseURL: options?.baseURL
28
+ });
29
+ const knowledge = options?.cacheDirectory === void 0 ? client.knowledge : client.knowledge.withCacheDirectory(options.cacheDirectory);
30
+ if (options?.recoverPendingJobsOnStart !== false) {
31
+ await knowledge.recoverPendingAsyncParseJobs();
32
+ }
33
+ const server = new McpServer({
34
+ name: "knowhere-local-knowledge",
35
+ version: VERSION
36
+ });
37
+ const permission = options?.permission ?? "full_access";
38
+ const hasWritePermission = permission === "full_access";
39
+ if (hasWritePermission) {
40
+ server.registerTool(
41
+ "knowhere_parse_url",
42
+ {
43
+ description: "Blocking parse: submit a remote URL to Knowhere, wait for completion, then cache the parse result locally for outline/read/grep/search tools.",
44
+ inputSchema: {
45
+ url: z.string().url(),
46
+ namespace: z.string().optional(),
47
+ localDocumentId: z.string().optional(),
48
+ dataId: z.string().optional(),
49
+ parsingParams: parsingParamsSchema
50
+ },
51
+ outputSchema: objectOutputSchema
52
+ },
53
+ async (input) => createToolResult(
54
+ await knowledge.parse({
55
+ url: input.url,
56
+ namespace: input.namespace,
57
+ localDocumentId: input.localDocumentId,
58
+ dataId: input.dataId,
59
+ ...toFlatParsingParams(input.parsingParams)
60
+ })
61
+ )
62
+ );
63
+ server.registerTool(
64
+ "knowhere_parse_file",
65
+ {
66
+ description: "Blocking parse: submit a local file path available to this MCP process, wait for completion, then cache the parse result locally.",
67
+ inputSchema: {
68
+ file: z.string().describe("Local file path available to this MCP server process."),
69
+ fileName: z.string().optional(),
70
+ namespace: z.string().optional(),
71
+ localDocumentId: z.string().optional(),
72
+ dataId: z.string().optional(),
73
+ parsingParams: parsingParamsSchema
74
+ },
75
+ outputSchema: objectOutputSchema
76
+ },
77
+ async (input) => createToolResult(
78
+ await knowledge.parse({
79
+ file: input.file,
80
+ fileName: input.fileName,
81
+ namespace: input.namespace,
82
+ localDocumentId: input.localDocumentId,
83
+ dataId: input.dataId,
84
+ ...toFlatParsingParams(input.parsingParams)
85
+ })
86
+ )
87
+ );
88
+ server.registerTool(
89
+ "knowhere_async_parse_url",
90
+ {
91
+ description: "Start parsing a remote URL through Knowhere and return immediately with the parse job. Poll with knowhere_async_get_job_status; completed tracked jobs are cached locally automatically.",
92
+ inputSchema: {
93
+ url: z.string().url(),
94
+ namespace: z.string().optional(),
95
+ localDocumentId: z.string().optional(),
96
+ dataId: z.string().optional(),
97
+ parsingParams: parsingParamsSchema
98
+ },
99
+ outputSchema: objectOutputSchema
100
+ },
101
+ async (input) => createToolResult(
102
+ await knowledge.startParse({
103
+ url: input.url,
104
+ namespace: input.namespace,
105
+ localDocumentId: input.localDocumentId,
106
+ dataId: input.dataId,
107
+ ...toFlatParsingParams(input.parsingParams)
108
+ })
109
+ )
110
+ );
111
+ server.registerTool(
112
+ "knowhere_async_parse_file",
113
+ {
114
+ description: "Start parsing a local file path available to this MCP process, upload it if needed, and return immediately with the parse job. Poll with knowhere_async_get_job_status; completed tracked jobs are cached locally automatically.",
115
+ inputSchema: {
116
+ file: z.string().describe("Local file path available to this MCP server process."),
117
+ fileName: z.string().optional(),
118
+ namespace: z.string().optional(),
119
+ localDocumentId: z.string().optional(),
120
+ dataId: z.string().optional(),
121
+ parsingParams: parsingParamsSchema
122
+ },
123
+ outputSchema: objectOutputSchema
124
+ },
125
+ async (input) => createToolResult(
126
+ await knowledge.startParse({
127
+ file: input.file,
128
+ fileName: input.fileName,
129
+ namespace: input.namespace,
130
+ localDocumentId: input.localDocumentId,
131
+ dataId: input.dataId,
132
+ ...toFlatParsingParams(input.parsingParams)
133
+ })
134
+ )
135
+ );
136
+ }
137
+ server.registerTool(
138
+ "knowhere_async_get_job_status",
139
+ {
140
+ description: "Fetch the current status for a Knowhere parse job. If the job was started by an async parse tool and is done, this also caches the result locally for outline/read/grep/search.",
141
+ inputSchema: {
142
+ jobId: z.string()
143
+ },
144
+ outputSchema: objectOutputSchema
145
+ },
146
+ async (input) => createToolResult(await knowledge.getJobStatus(input.jobId))
147
+ );
148
+ server.registerTool(
149
+ "knowhere_async_cache_job_result",
150
+ {
151
+ description: "Manually load a completed Knowhere parse job result and cache it locally. Usually not needed for jobs started by async parse tools because knowhere_async_get_job_status auto-caches them when done.",
152
+ inputSchema: {
153
+ jobId: z.string(),
154
+ localDocumentId: z.string().optional(),
155
+ verifyChecksum: z.boolean().optional()
156
+ },
157
+ outputSchema: objectOutputSchema
158
+ },
159
+ async (input) => createToolResult(
160
+ await knowledge.cacheJobResult({
161
+ jobId: input.jobId,
162
+ localDocumentId: input.localDocumentId,
163
+ verifyChecksum: input.verifyChecksum
164
+ })
165
+ )
166
+ );
167
+ server.registerTool(
168
+ "knowhere_list_documents",
169
+ {
170
+ description: "List parse results cached locally by this SDK-backed MCP server.",
171
+ inputSchema: {},
172
+ outputSchema: objectOutputSchema
173
+ },
174
+ async () => createToolResult({ documents: await knowledge.listDocuments() })
175
+ );
176
+ if (hasWritePermission) {
177
+ server.registerTool(
178
+ "knowhere_delete_document",
179
+ {
180
+ description: "Archive, or soft-delete, a published Knowhere document through the Knowhere API. Provide documentId directly, or localDocumentId for a cached parse result that has a server documentId.",
181
+ inputSchema: {
182
+ documentId: z.string().optional(),
183
+ localDocumentId: z.string().optional()
184
+ },
185
+ outputSchema: objectOutputSchema
186
+ },
187
+ async (input) => createToolResult(await archiveDocument({ client, knowledge, params: input }))
188
+ );
189
+ }
190
+ server.registerTool(
191
+ "knowhere_get_document_outline",
192
+ {
193
+ description: "Return the local outline for a cached parsed document.",
194
+ inputSchema: {
195
+ localDocumentId: z.string()
196
+ },
197
+ outputSchema: objectOutputSchema
198
+ },
199
+ async (input) => createToolResult(await knowledge.getDocumentOutline(input.localDocumentId))
200
+ );
201
+ server.registerTool(
202
+ "knowhere_read_chunks",
203
+ {
204
+ description: "Read exact chunks from a cached local parse result.",
205
+ inputSchema: {
206
+ localDocumentId: z.string(),
207
+ sectionPath: z.string().optional(),
208
+ startChunk: z.number().int().positive().optional(),
209
+ endChunk: z.number().int().positive().optional(),
210
+ chunkId: z.string().optional(),
211
+ chunkType: z.enum(["text", "image", "table"]).optional(),
212
+ limit: z.number().int().positive().optional()
213
+ },
214
+ outputSchema: objectOutputSchema
215
+ },
216
+ async (input) => createToolResult(await knowledge.readChunks(input))
217
+ );
218
+ server.registerTool(
219
+ "knowhere_grep_chunks",
220
+ {
221
+ description: "Run grep-style literal or regex matching against cached local chunks.",
222
+ inputSchema: {
223
+ localDocumentId: z.string(),
224
+ pattern: z.string(),
225
+ isRegex: z.boolean().optional(),
226
+ isCaseSensitive: z.boolean().optional(),
227
+ maxResults: z.number().int().positive().optional(),
228
+ chunkType: z.enum(["text", "image", "table"]).optional(),
229
+ sectionPathPrefix: z.string().optional(),
230
+ contextChars: z.number().int().nonnegative().optional()
231
+ },
232
+ outputSchema: objectOutputSchema
233
+ },
234
+ async (input) => createToolResult(await knowledge.grepChunks(input))
235
+ );
236
+ server.registerTool(
237
+ "knowhere_search",
238
+ {
239
+ description: "Search published Knowhere documents with the Knowhere API retrieval query. localDocumentIds only map returned server document IDs back to local cache IDs when available.",
240
+ inputSchema: {
241
+ query: z.string(),
242
+ namespace: z.string().optional(),
243
+ topK: z.number().int().positive().optional(),
244
+ localDocumentIds: z.array(z.string()).optional(),
245
+ useAgentic: z.boolean().optional()
246
+ },
247
+ outputSchema: objectOutputSchema
248
+ },
249
+ async (input) => createToolResult(await knowledge.search(input))
250
+ );
251
+ return server;
252
+ }
253
+ async function runKnowhereMcpServer(options) {
254
+ const server = await createKnowhereMcpServer(options);
255
+ const transport = new StdioServerTransport();
256
+ await server.connect(transport);
257
+ }
258
+ function createToolResult(result) {
259
+ const structuredContent = { result };
260
+ return {
261
+ content: [{ type: "text", text: JSON.stringify(structuredContent, null, 2) }],
262
+ structuredContent
263
+ };
264
+ }
265
+ function toFlatParsingParams(parsingParams) {
266
+ if (!parsingParams) {
267
+ return {};
268
+ }
269
+ return {
270
+ model: parsingParams.model,
271
+ ocr: parsingParams.ocrEnabled,
272
+ docType: parsingParams.docType,
273
+ smartTitleParse: parsingParams.smartTitleParse,
274
+ summaryImage: parsingParams.summaryImage,
275
+ summaryTable: parsingParams.summaryTable,
276
+ summaryTxt: parsingParams.summaryTxt,
277
+ addFragDesc: parsingParams.addFragDesc,
278
+ kbDir: parsingParams.kbDir
279
+ };
280
+ }
281
+ async function archiveDocument(params) {
282
+ const archiveTarget = await resolveArchiveTarget(params.knowledge, params.params);
283
+ const document = await params.client.documents.archive(archiveTarget.documentId);
284
+ return {
285
+ document,
286
+ localDocumentId: archiveTarget.localDocumentId
287
+ };
288
+ }
289
+ async function resolveArchiveTarget(knowledge, params) {
290
+ if (params.documentId) {
291
+ return {
292
+ documentId: params.documentId,
293
+ localDocumentId: params.localDocumentId
294
+ };
295
+ }
296
+ if (!params.localDocumentId) {
297
+ throw new ValidationError("documentId or localDocumentId is required");
298
+ }
299
+ const document = await findLocalDocument(knowledge, params.localDocumentId);
300
+ if (!document) {
301
+ throw new Error(`Local Knowhere document not found: ${params.localDocumentId}`);
302
+ }
303
+ if (!document.documentId) {
304
+ throw new Error(`Local Knowhere document has no server documentId: ${params.localDocumentId}`);
305
+ }
306
+ return {
307
+ documentId: document.documentId,
308
+ localDocumentId: document.localDocumentId
309
+ };
310
+ }
311
+ async function findLocalDocument(knowledge, localDocumentId) {
312
+ const documents = await knowledge.listDocuments();
313
+ return documents.find((document) => document.localDocumentId === localDocumentId);
314
+ }
315
+
316
+ export {
317
+ createKnowhereMcpServer,
318
+ runKnowhereMcpServer
319
+ };
@@ -0,0 +1,17 @@
1
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import { Knowhere, AuthTokenProvider } from '@ontos-ai/knowhere-sdk';
3
+
4
+ type Permission = 'read_only' | 'full_access';
5
+
6
+ interface KnowhereMcpServerOptions {
7
+ client?: Knowhere;
8
+ authTokenProvider?: AuthTokenProvider;
9
+ baseURL?: string;
10
+ cacheDirectory?: string;
11
+ permission?: Permission;
12
+ recoverPendingJobsOnStart?: boolean;
13
+ }
14
+ declare function createKnowhereMcpServer(options?: KnowhereMcpServerOptions): Promise<McpServer>;
15
+ declare function runKnowhereMcpServer(options?: KnowhereMcpServerOptions): Promise<void>;
16
+
17
+ export { type KnowhereMcpServerOptions, createKnowhereMcpServer, runKnowhereMcpServer };
@@ -0,0 +1,17 @@
1
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import { Knowhere, AuthTokenProvider } from '@ontos-ai/knowhere-sdk';
3
+
4
+ type Permission = 'read_only' | 'full_access';
5
+
6
+ interface KnowhereMcpServerOptions {
7
+ client?: Knowhere;
8
+ authTokenProvider?: AuthTokenProvider;
9
+ baseURL?: string;
10
+ cacheDirectory?: string;
11
+ permission?: Permission;
12
+ recoverPendingJobsOnStart?: boolean;
13
+ }
14
+ declare function createKnowhereMcpServer(options?: KnowhereMcpServerOptions): Promise<McpServer>;
15
+ declare function runKnowhereMcpServer(options?: KnowhereMcpServerOptions): Promise<void>;
16
+
17
+ export { type KnowhereMcpServerOptions, createKnowhereMcpServer, runKnowhereMcpServer };