@ontos-ai/knowhere-sdk 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -147,6 +147,10 @@ For granular control over the job lifecycle:
147
147
  const job = await client.jobs.create({
148
148
  sourceType: 'file',
149
149
  fileName: 'document.pdf',
150
+ documentMetadata: {
151
+ createdByClient: 'cli',
152
+ sourceFileName: 'document.pdf',
153
+ },
150
154
  parsingParams: { model: 'advanced', ocrEnabled: true },
151
155
  });
152
156
 
@@ -168,19 +172,23 @@ const result = await client.jobs.load(jobResult);
168
172
  ### Retrieval and Document Lifecycle
169
173
 
170
174
  Published documents are queryable through the retrieval API after a job
171
- finishes. `client.jobs.create(...)` does not return a usable `documentId`;
172
- persist `jobResult.documentId` after publication if you need to update or
173
- archive the same document later.
175
+ finishes. `client.jobs.create(...)` may return a planned `documentId`; persist
176
+ `jobResult.documentId` after publication as the canonical value if you need to
177
+ update or archive the same document later.
174
178
 
175
179
  ```typescript
176
180
  const job = await client.jobs.create({
177
181
  sourceType: 'url',
178
182
  sourceUrl: 'https://example.com/manual.pdf',
179
183
  namespace: 'support-center',
184
+ documentMetadata: {
185
+ createdByClient: 'notebook',
186
+ title: 'Support manual',
187
+ },
180
188
  });
181
189
 
182
190
  const jobResult = await client.jobs.wait(job.jobId);
183
- const documentId = jobResult.documentId;
191
+ const documentId = jobResult.documentId ?? job.documentId;
184
192
 
185
193
  if (!documentId) {
186
194
  throw new Error('Expected documentId after successful publication.');
@@ -196,11 +204,11 @@ const response = await client.retrieval.query({
196
204
  useAgentic: true,
197
205
  });
198
206
 
199
- console.log(response.answerText); // LLM-generated answer
200
- console.log(response.referencedChunks); // cited evidence chunks
201
- console.log(response.evidenceText); // rendered evidence context, when returned
202
- console.log(response.stopReason); // agentic termination reason, when returned
203
- console.log(response.failureReason); // no-answer reason, when returned
207
+ console.log(response.answerText); // LLM-generated answer
208
+ console.log(response.referencedChunks); // cited evidence chunks
209
+ console.log(response.evidenceText); // rendered evidence context, when returned
210
+ console.log(response.stopReason); // agentic termination reason, when returned
211
+ console.log(response.failureReason); // no-answer reason, when returned
204
212
 
205
213
  for (const result of response.results) {
206
214
  console.log(result.content);
@@ -265,6 +273,72 @@ if (chunks.chunks[0]) {
265
273
  console.log(archived.status);
266
274
  ```
267
275
 
276
+ ### Local Knowledge Tools
277
+
278
+ The SDK can also keep parsed results in a local cache and run exact inspection
279
+ tools over that cached copy. This is the implementation used by the separate
280
+ `@ontos-ai/knowhere-mcp` package.
281
+
282
+ ```typescript
283
+ const parsed = await client.knowledge.parse({
284
+ file: './manual.pdf',
285
+ localDocumentId: 'manual-v1',
286
+ });
287
+
288
+ const outline = await client.knowledge.getDocumentOutline(parsed.document.localDocumentId);
289
+
290
+ const read = await client.knowledge.readChunks({
291
+ localDocumentId: parsed.document.localDocumentId,
292
+ sectionPath: outline.sections[0]?.sectionPath,
293
+ limit: 5,
294
+ });
295
+
296
+ const grep = await client.knowledge.grepChunks({
297
+ localDocumentId: parsed.document.localDocumentId,
298
+ pattern: 'warranty',
299
+ maxResults: 10,
300
+ });
301
+
302
+ const serverSearch = await client.knowledge.search({
303
+ query: 'battery warranty',
304
+ localDocumentIds: [parsed.document.localDocumentId],
305
+ topK: 5,
306
+ });
307
+
308
+ console.log(read.chunks);
309
+ console.log(grep.matches);
310
+ console.log(serverSearch.references);
311
+ ```
312
+
313
+ Local grep and reads use the cached parse result, not server-side chunk scans.
314
+ Search uses the Knowhere API retrieval query; local document IDs only help map
315
+ returned server document IDs back to local cache IDs when available.
316
+ The MCP package is a wrapper over this SDK interface; install it only when an
317
+ agent host needs an MCP server. See the MCP package README for Codex, Claude
318
+ Code, Claude Desktop, and generic stdio MCP host configuration examples.
319
+
320
+ For longer parses, use the non-blocking SDK flow and cache the result after the
321
+ job completes:
322
+
323
+ ```typescript
324
+ const started = await client.knowledge.startParse({
325
+ file: './manual.pdf',
326
+ localDocumentId: 'manual-v1',
327
+ });
328
+
329
+ const status = await client.knowledge.getJobStatus(started.job.jobId);
330
+
331
+ if (status.job.isDone && status.cache.document) {
332
+ console.log(status.cache.document.localDocumentId);
333
+ }
334
+ ```
335
+
336
+ When the job was started through `client.knowledge.startParse(...)`,
337
+ `getJobStatus(...)` automatically caches the completed result locally the first
338
+ time it observes `status.job.isDone`. Use `cacheJobResult(...)` only to recover a
339
+ completed job that was not started through the local knowledge helper, or to
340
+ retry a cache step explicitly.
341
+
268
342
  Follow-up queries can exclude documents or sections for one request:
269
343
 
270
344
  ```typescript
package/dist/index.d.mts CHANGED
@@ -3,12 +3,15 @@ import { Agent as Agent$1 } from 'https';
3
3
  import { ReadStream } from 'fs';
4
4
  import { AxiosRequestConfig, AxiosInstance } from 'axios';
5
5
 
6
+ type AuthTokenProvider = () => string | Promise<string>;
6
7
  /**
7
8
  * Configuration options for the Knowhere client
8
9
  */
9
10
  interface KnowhereOptions {
10
11
  /** API authentication key (defaults to KNOWHERE_API_KEY env var) */
11
12
  apiKey?: string;
13
+ /** Dynamic bearer token provider for short-lived non-API-key auth flows */
14
+ authTokenProvider?: AuthTokenProvider;
12
15
  /** API base URL (defaults to https://api.knowhereto.ai) */
13
16
  baseURL?: string;
14
17
  /** Request timeout in milliseconds (default: 60000) */
@@ -43,6 +46,8 @@ interface Job {
43
46
  dataId?: string;
44
47
  /** Retrieval namespace for the canonical document */
45
48
  namespace?: string;
49
+ /** Planned stable document identifier for this job, when returned by the API */
50
+ documentId?: string;
46
51
  /** Job creation timestamp */
47
52
  createdAt: Date;
48
53
  /** Presigned URL for file upload (if sourceType is 'file') */
@@ -151,6 +156,10 @@ interface WebhookConfig {
151
156
  /** Webhook URL to notify on job completion */
152
157
  url: string;
153
158
  }
159
+ /**
160
+ * Client-provided display metadata copied onto the published document.
161
+ */
162
+ type DocumentMetadata = Record<string, unknown>;
154
163
  /**
155
164
  * Job creation parameters
156
165
  */
@@ -167,6 +176,8 @@ interface CreateJobParams {
167
176
  namespace?: string;
168
177
  /** Existing document identifier when updating a published document */
169
178
  documentId?: string;
179
+ /** Display metadata to copy onto the published document */
180
+ documentMetadata?: DocumentMetadata;
170
181
  /** Parsing configuration */
171
182
  parsingParams?: ParsingParams;
172
183
  /** Webhook configuration */
@@ -233,6 +244,8 @@ interface ParseParams {
233
244
  namespace?: string;
234
245
  /** Existing document identifier when updating a published document */
235
246
  documentId?: string;
247
+ /** Display metadata to copy onto the published document */
248
+ documentMetadata?: DocumentMetadata;
236
249
  /** Additional fragment description */
237
250
  addFragDesc?: string;
238
251
  /** Knowledge base directory */
@@ -520,7 +533,8 @@ interface ParseResult {
520
533
 
521
534
  interface HttpClientOptions {
522
535
  baseURL: string;
523
- apiKey: string;
536
+ apiKey?: string;
537
+ authTokenProvider?: AuthTokenProvider;
524
538
  timeout?: number;
525
539
  uploadTimeout?: number;
526
540
  maxRetries?: number;
@@ -537,8 +551,10 @@ declare class HttpClient {
537
551
  private uploadTimeout;
538
552
  private httpAgent?;
539
553
  private httpsAgent?;
554
+ private authTokenProvider?;
540
555
  constructor(options: HttpClientOptions);
541
556
  private setupInterceptors;
557
+ private attachDynamicAuthorization;
542
558
  private handleError;
543
559
  private getErrorObject;
544
560
  private normalizeErrorData;
@@ -774,6 +790,8 @@ interface Document {
774
790
  currentJobResultId?: string;
775
791
  /** Original source file name */
776
792
  sourceFileName?: string;
793
+ /** Client-provided display metadata copied from the publishing job */
794
+ documentMetadata?: Record<string, unknown>;
777
795
  /** Document creation timestamp */
778
796
  createdAt?: Date;
779
797
  /** Last update timestamp */
@@ -919,6 +937,234 @@ declare class Documents extends BaseResource {
919
937
  private createChunkGetRequestConfig;
920
938
  }
921
939
 
940
+ type KnowledgeChunkType = DocumentChunkType;
941
+ interface KnowledgeParseParams extends ParseParams {
942
+ /** Optional stable local identifier for this parsed result copy. */
943
+ localDocumentId?: string;
944
+ }
945
+ interface LocalKnowledgeDocument {
946
+ /** Stable local identifier used by local outline/read/grep methods. */
947
+ localDocumentId: string;
948
+ /** Server parse job identifier. */
949
+ jobId: string;
950
+ /** Server canonical document identifier when publication returned one. */
951
+ documentId?: string;
952
+ /** Server retrieval namespace when available. */
953
+ namespace?: string;
954
+ /** Original source file name from the parse manifest. */
955
+ sourceFileName: string;
956
+ /** Number of chunks in the locally cached parse result. */
957
+ chunkCount: number;
958
+ /** Chunk counts grouped by type. */
959
+ typeCounts: Record<KnowledgeChunkType, number>;
960
+ /** Local cache directory containing expanded Knowhere result files and assets. */
961
+ resultDirectoryPath: string;
962
+ /** Cache creation time. */
963
+ createdAt: Date;
964
+ /** Last cache write time. */
965
+ updatedAt: Date;
966
+ }
967
+ interface LocalKnowledgeParseResponse {
968
+ document: LocalKnowledgeDocument;
969
+ result: ParseResult;
970
+ }
971
+ interface KnowledgeAsyncParseParams extends ParseParams {
972
+ /** Optional stable local identifier to use when this job result is cached later. */
973
+ localDocumentId?: string;
974
+ }
975
+ interface KnowledgeAsyncParseResponse {
976
+ job: Job;
977
+ localDocumentId?: string;
978
+ }
979
+ type KnowledgeAsyncCacheStatus = 'pending' | 'cached' | 'already_cached' | 'untracked' | 'not_available' | 'failed';
980
+ interface KnowledgeAsyncCacheResult {
981
+ status: KnowledgeAsyncCacheStatus;
982
+ localDocumentId?: string;
983
+ document?: LocalKnowledgeDocument;
984
+ error?: string;
985
+ }
986
+ interface KnowledgeAsyncJobStatusResponse {
987
+ job: JobResult;
988
+ cache: KnowledgeAsyncCacheResult;
989
+ }
990
+ interface KnowledgeStartupRecoveryResponse {
991
+ checkedJobs: number;
992
+ results: KnowledgeAsyncJobStatusResponse[];
993
+ }
994
+ interface KnowledgeCacheJobResultParams {
995
+ jobId: string;
996
+ localDocumentId?: string;
997
+ verifyChecksum?: boolean;
998
+ }
999
+ interface KnowledgeSection {
1000
+ sectionPath: string;
1001
+ sectionTitle: string;
1002
+ sectionLevel: number;
1003
+ summary?: string;
1004
+ startChunk?: number;
1005
+ endChunk?: number;
1006
+ chunkCount: number;
1007
+ typeCounts: Record<KnowledgeChunkType, number>;
1008
+ children: KnowledgeSection[];
1009
+ }
1010
+ interface KnowledgeOutline {
1011
+ document: LocalKnowledgeDocument;
1012
+ totalChunks: number;
1013
+ typeCounts: Record<KnowledgeChunkType, number>;
1014
+ sections: KnowledgeSection[];
1015
+ sectionTree: KnowledgeSection[];
1016
+ }
1017
+ interface KnowledgeReadParams {
1018
+ localDocumentId: string;
1019
+ sectionPath?: string;
1020
+ startChunk?: number;
1021
+ endChunk?: number;
1022
+ chunkId?: string;
1023
+ chunkType?: KnowledgeChunkType;
1024
+ limit?: number;
1025
+ }
1026
+ interface KnowledgeReadChunk {
1027
+ position: number;
1028
+ chunkId: string;
1029
+ chunkType: KnowledgeChunkType;
1030
+ content: string;
1031
+ sectionPath: string;
1032
+ sourceChunkPath: string;
1033
+ filePath?: string;
1034
+ metadata: Record<string, unknown>;
1035
+ }
1036
+ interface KnowledgeReadResponse {
1037
+ document: LocalKnowledgeDocument;
1038
+ chunks: KnowledgeReadChunk[];
1039
+ nextChunk?: number;
1040
+ }
1041
+ interface KnowledgeGrepParams {
1042
+ localDocumentId: string;
1043
+ pattern: string;
1044
+ isRegex?: boolean;
1045
+ isCaseSensitive?: boolean;
1046
+ maxResults?: number;
1047
+ chunkType?: KnowledgeChunkType;
1048
+ sectionPathPrefix?: string;
1049
+ contextChars?: number;
1050
+ }
1051
+ interface KnowledgeGrepMatch {
1052
+ position: number;
1053
+ chunkId: string;
1054
+ chunkType: KnowledgeChunkType;
1055
+ sectionPath: string;
1056
+ sourceChunkPath: string;
1057
+ filePath?: string;
1058
+ startOffset: number;
1059
+ endOffset: number;
1060
+ snippet: string;
1061
+ }
1062
+ interface KnowledgeGrepResponse {
1063
+ document: LocalKnowledgeDocument;
1064
+ matches: KnowledgeGrepMatch[];
1065
+ scannedChunks: number;
1066
+ truncated: boolean;
1067
+ }
1068
+ interface KnowledgeSearchParams {
1069
+ query: string;
1070
+ namespace?: string;
1071
+ topK?: number;
1072
+ localDocumentIds?: string[];
1073
+ useAgentic?: boolean;
1074
+ }
1075
+ interface KnowledgeSearchReference {
1076
+ localDocumentId?: string;
1077
+ documentId?: string;
1078
+ chunkId?: string;
1079
+ sectionPath?: string;
1080
+ chunkType?: string;
1081
+ score?: number | null;
1082
+ }
1083
+ interface KnowledgeSearchResponse {
1084
+ namespace?: string;
1085
+ query: string;
1086
+ evidenceText?: string | null;
1087
+ references: KnowledgeSearchReference[];
1088
+ results: KnowledgeSearchResult[];
1089
+ rawResponse: unknown;
1090
+ }
1091
+ interface KnowledgeSearchResult {
1092
+ localDocumentId?: string;
1093
+ documentId?: string;
1094
+ chunkId?: string;
1095
+ chunkType?: string;
1096
+ content: string;
1097
+ score: number | null;
1098
+ sectionPath?: string;
1099
+ sourceFileName?: string;
1100
+ }
1101
+
1102
+ declare class Knowledge {
1103
+ private readonly client;
1104
+ private readonly store;
1105
+ constructor(client: Knowhere, options?: {
1106
+ cacheDirectory?: string;
1107
+ });
1108
+ withCacheDirectory(cacheDirectory: string): Knowledge;
1109
+ parse(params: KnowledgeParseParams): Promise<LocalKnowledgeParseResponse>;
1110
+ startParse(params: KnowledgeAsyncParseParams): Promise<KnowledgeAsyncParseResponse>;
1111
+ getJobStatus(jobId: string): Promise<KnowledgeAsyncJobStatusResponse>;
1112
+ recoverPendingAsyncParseJobs(): Promise<KnowledgeStartupRecoveryResponse>;
1113
+ cacheJobResult(params: KnowledgeCacheJobResultParams): Promise<LocalKnowledgeParseResponse>;
1114
+ private resolveAsyncCache;
1115
+ listDocuments(): Promise<LocalKnowledgeDocument[]>;
1116
+ getDocumentOutline(localDocumentId: string): Promise<KnowledgeOutline>;
1117
+ readChunks(params: KnowledgeReadParams): Promise<KnowledgeReadResponse>;
1118
+ grepChunks(params: KnowledgeGrepParams): Promise<KnowledgeGrepResponse>;
1119
+ search(params: KnowledgeSearchParams): Promise<KnowledgeSearchResponse>;
1120
+ private resolveSearchDocuments;
1121
+ }
1122
+
1123
+ interface StoredAsyncParseJob {
1124
+ jobId: string;
1125
+ localDocumentId?: string;
1126
+ cacheStatus: KnowledgeAsyncCacheStatus;
1127
+ createdAt: string;
1128
+ updatedAt: string;
1129
+ }
1130
+ interface LocalKnowledgeAsyncParseJob {
1131
+ jobId: string;
1132
+ localDocumentId?: string;
1133
+ cacheStatus: KnowledgeAsyncCacheStatus;
1134
+ createdAt: Date;
1135
+ updatedAt: Date;
1136
+ }
1137
+ declare class LocalKnowledgeStore {
1138
+ private readonly cacheDirectory;
1139
+ private readonly indexPath;
1140
+ private readonly resultCache;
1141
+ constructor(cacheDirectory?: string);
1142
+ saveResult(result: ParseResult, options?: {
1143
+ localDocumentId?: string;
1144
+ }): Promise<LocalKnowledgeDocument>;
1145
+ saveAsyncParseJob(params: {
1146
+ jobId: string;
1147
+ localDocumentId?: string;
1148
+ }): Promise<void>;
1149
+ getAsyncParseJob(jobId: string): Promise<StoredAsyncParseJob | undefined>;
1150
+ listRecoverableAsyncParseJobs(): Promise<LocalKnowledgeAsyncParseJob[]>;
1151
+ updateAsyncParseJobCacheStatus(params: {
1152
+ jobId: string;
1153
+ cacheStatus: KnowledgeAsyncCacheStatus;
1154
+ localDocumentId?: string;
1155
+ }): Promise<void>;
1156
+ listDocuments(): Promise<LocalKnowledgeDocument[]>;
1157
+ getDocument(localDocumentId: string): Promise<LocalKnowledgeDocument | undefined>;
1158
+ loadResult(localDocumentId: string): Promise<{
1159
+ document: LocalKnowledgeDocument;
1160
+ result: ParseResult;
1161
+ }>;
1162
+ private getResultDirectoryPath;
1163
+ private loadStoredResult;
1164
+ private readIndex;
1165
+ private writeIndex;
1166
+ }
1167
+
922
1168
  /**
923
1169
  * Main Knowhere SDK client
924
1170
  */
@@ -929,6 +1175,8 @@ declare class Knowhere {
929
1175
  readonly retrieval: Retrieval;
930
1176
  /** Documents resource for canonical document lifecycle operations */
931
1177
  readonly documents: Documents;
1178
+ /** Client-side local knowledge tools over parsed Knowhere results */
1179
+ readonly knowledge: Knowledge;
932
1180
  private httpClient;
933
1181
  /**
934
1182
  * Create a new Knowhere client
@@ -955,6 +1203,12 @@ declare class Knowhere {
955
1203
  * ```
956
1204
  */
957
1205
  parse(params: ParseParams): Promise<ParseResult>;
1206
+ /**
1207
+ * Start a parse job and return immediately after the URL job is created or
1208
+ * the local file is uploaded. Use jobs.get()/jobs.wait() and jobs.load()
1209
+ * to inspect completion and load results later.
1210
+ */
1211
+ startParse(params: ParseParams): Promise<Job>;
958
1212
  }
959
1213
 
960
1214
  declare const VERSION = "0.1.0";
@@ -1088,4 +1342,4 @@ declare class JobFailedError extends KnowhereError {
1088
1342
  constructor(message: string, code: string, jobResult: JobResult);
1089
1343
  }
1090
1344
 
1091
- export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentChunk, type DocumentChunkGetParams, type DocumentChunkListParams, type DocumentChunkListResponse, type DocumentChunkPagination, type DocumentChunkResponse, type DocumentChunkType, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalReferencedChunk, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
1345
+ export { APIError, type AuthTokenProvider, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentChunk, type DocumentChunkGetParams, type DocumentChunkListParams, type DocumentChunkListResponse, type DocumentChunkPagination, type DocumentChunkResponse, type DocumentChunkType, type DocumentListResponse, type DocumentMetadata, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, Knowledge, type KnowledgeAsyncJobStatusResponse, type KnowledgeAsyncParseParams, type KnowledgeAsyncParseResponse, type KnowledgeCacheJobResultParams, type KnowledgeChunkType, type KnowledgeGrepMatch, type KnowledgeGrepParams, type KnowledgeGrepResponse, type KnowledgeOutline, type KnowledgeParseParams, type KnowledgeReadChunk, type KnowledgeReadParams, type KnowledgeReadResponse, type KnowledgeSearchParams, type KnowledgeSearchReference, type KnowledgeSearchResponse, type KnowledgeSearchResult, type KnowledgeSection, type KnowledgeStartupRecoveryResponse, type LoadOptions, type LocalKnowledgeDocument, type LocalKnowledgeParseResponse, LocalKnowledgeStore, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalReferencedChunk, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };