@ontos-ai/knowhere-sdk 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -196,8 +196,11 @@ const response = await client.retrieval.query({
196
196
  useAgentic: true,
197
197
  });
198
198
 
199
- console.log(response.answerText); // LLM-generated answer
200
- console.log(response.referencedChunks); // cited evidence chunks
199
+ console.log(response.answerText); // LLM-generated answer
200
+ console.log(response.referencedChunks); // cited evidence chunks
201
+ console.log(response.evidenceText); // rendered evidence context, when returned
202
+ console.log(response.stopReason); // agentic termination reason, when returned
203
+ console.log(response.failureReason); // no-answer reason, when returned
201
204
 
202
205
  for (const result of response.results) {
203
206
  console.log(result.content);
@@ -218,6 +221,20 @@ result.source.sourceFileName;
218
221
  result.source.sectionPath;
219
222
  ```
220
223
 
224
+ Agentic references expose the current retrieval citation fields:
225
+
226
+ ```typescript
227
+ const reference = response.referencedChunks[0];
228
+
229
+ reference.chunkId;
230
+ reference.documentId;
231
+ reference.chunkType;
232
+ reference.sectionPath;
233
+ reference.filePath;
234
+ reference.jobId;
235
+ reference.assetUrl;
236
+ ```
237
+
221
238
  Use `documentId` to update or archive a document:
222
239
 
223
240
  ```typescript
@@ -248,6 +265,72 @@ if (chunks.chunks[0]) {
248
265
  console.log(archived.status);
249
266
  ```
250
267
 
268
+ ### Local Knowledge Tools
269
+
270
+ The SDK can also keep parsed results in a local cache and run exact inspection
271
+ tools over that cached copy. This is the implementation used by the separate
272
+ `@ontos-ai/knowhere-mcp` package.
273
+
274
+ ```typescript
275
+ const parsed = await client.knowledge.parse({
276
+ file: './manual.pdf',
277
+ localDocumentId: 'manual-v1',
278
+ });
279
+
280
+ const outline = await client.knowledge.getDocumentOutline(parsed.document.localDocumentId);
281
+
282
+ const read = await client.knowledge.readChunks({
283
+ localDocumentId: parsed.document.localDocumentId,
284
+ sectionPath: outline.sections[0]?.sectionPath,
285
+ limit: 5,
286
+ });
287
+
288
+ const grep = await client.knowledge.grepChunks({
289
+ localDocumentId: parsed.document.localDocumentId,
290
+ pattern: 'warranty',
291
+ maxResults: 10,
292
+ });
293
+
294
+ const serverSearch = await client.knowledge.search({
295
+ query: 'battery warranty',
296
+ localDocumentIds: [parsed.document.localDocumentId],
297
+ topK: 5,
298
+ });
299
+
300
+ console.log(read.chunks);
301
+ console.log(grep.matches);
302
+ console.log(serverSearch.references);
303
+ ```
304
+
305
+ Local grep and reads use the cached parse result, not server-side chunk scans.
306
+ Search uses the Knowhere API retrieval query; local document IDs only help map
307
+ returned server document IDs back to local cache IDs when available.
308
+ The MCP package is a wrapper over this SDK interface; install it only when an
309
+ agent host needs an MCP server. See the MCP package README for Codex, Claude
310
+ Code, Claude Desktop, and generic stdio MCP host configuration examples.
311
+
312
+ For longer parses, use the non-blocking SDK flow and cache the result after the
313
+ job completes:
314
+
315
+ ```typescript
316
+ const started = await client.knowledge.startParse({
317
+ file: './manual.pdf',
318
+ localDocumentId: 'manual-v1',
319
+ });
320
+
321
+ const status = await client.knowledge.getJobStatus(started.job.jobId);
322
+
323
+ if (status.job.isDone && status.cache.document) {
324
+ console.log(status.cache.document.localDocumentId);
325
+ }
326
+ ```
327
+
328
+ When the job was started through `client.knowledge.startParse(...)`,
329
+ `getJobStatus(...)` automatically caches the completed result locally the first
330
+ time it observes `status.job.isDone`. Use `cacheJobResult(...)` only to recover a
331
+ completed job that was not started through the local knowledge helper, or to
332
+ retry a cache step explicitly.
333
+
251
334
  Follow-up queries can exclude documents or sections for one request:
252
335
 
253
336
  ```typescript
package/dist/index.d.mts CHANGED
@@ -3,12 +3,15 @@ import { Agent as Agent$1 } from 'https';
3
3
  import { ReadStream } from 'fs';
4
4
  import { AxiosRequestConfig, AxiosInstance } from 'axios';
5
5
 
6
+ type AuthTokenProvider = () => string | Promise<string>;
6
7
  /**
7
8
  * Configuration options for the Knowhere client
8
9
  */
9
10
  interface KnowhereOptions {
10
11
  /** API authentication key (defaults to KNOWHERE_API_KEY env var) */
11
12
  apiKey?: string;
13
+ /** Dynamic bearer token provider for short-lived non-API-key auth flows */
14
+ authTokenProvider?: AuthTokenProvider;
12
15
  /** API base URL (defaults to https://api.knowhereto.ai) */
13
16
  baseURL?: string;
14
17
  /** Request timeout in milliseconds (default: 60000) */
@@ -520,7 +523,8 @@ interface ParseResult {
520
523
 
521
524
  interface HttpClientOptions {
522
525
  baseURL: string;
523
- apiKey: string;
526
+ apiKey?: string;
527
+ authTokenProvider?: AuthTokenProvider;
524
528
  timeout?: number;
525
529
  uploadTimeout?: number;
526
530
  maxRetries?: number;
@@ -537,8 +541,10 @@ declare class HttpClient {
537
541
  private uploadTimeout;
538
542
  private httpAgent?;
539
543
  private httpsAgent?;
544
+ private authTokenProvider?;
540
545
  constructor(options: HttpClientOptions);
541
546
  private setupInterceptors;
547
+ private attachDynamicAuthorization;
542
548
  private handleError;
543
549
  private getErrorObject;
544
550
  private normalizeErrorData;
@@ -679,11 +685,11 @@ interface RetrievalQueryParams {
679
685
  */
680
686
  interface RetrievalSource {
681
687
  /** Stable document identifier */
682
- documentId?: string;
688
+ documentId?: string | null;
683
689
  /** Original source file name */
684
- sourceFileName?: string;
690
+ sourceFileName?: string | null;
685
691
  /** Human-readable section path */
686
- sectionPath?: string;
692
+ sectionPath?: string | null;
687
693
  }
688
694
  /**
689
695
  * Canonical chunk result returned by retrieval query.
@@ -693,15 +699,39 @@ interface RetrievalResult {
693
699
  content: string;
694
700
  /** Chunk type, for example text, image, or table */
695
701
  chunkType: string;
696
- /** Retrieval score returned by the API */
697
- score: number;
702
+ /** Retrieval score returned by the API. Null when no score is available (agentic navigation-only results). */
703
+ score: number | null;
698
704
  /** Presigned asset URL for media chunks when available */
699
705
  assetUrl?: string;
700
706
  /** Source reference for this result */
701
707
  source: RetrievalSource;
702
708
  }
709
+ /**
710
+ * Cited evidence chunk returned by agentic retrieval.
711
+ */
712
+ interface RetrievalReferencedChunk {
713
+ /** Parser-provided chunk identifier */
714
+ chunkId: string;
715
+ /** Stable document identifier */
716
+ documentId: string;
717
+ /** Chunk type, for example text, image, or table */
718
+ chunkType: string;
719
+ /** Human-readable section path */
720
+ sectionPath: string;
721
+ /** Generated artifact file path for media chunks */
722
+ filePath?: string | null;
723
+ /** Published job identifier for the referenced chunk */
724
+ jobId?: string | null;
725
+ /** Presigned asset URL for media chunks when available */
726
+ assetUrl?: string | null;
727
+ }
703
728
  /**
704
729
  * Response from POST /v1/retrieval/query.
730
+ *
731
+ * Three PRIMARY output fields for downstream agent consumption:
732
+ * - `evidenceText`: hierarchical evidence tree for LLM context
733
+ * - `decisionTrace`: per-step navigation decisions (includes stop/failure)
734
+ * - `referencedChunks`: structured chunk citations for follow-up queries
705
735
  */
706
736
  interface RetrievalQueryResponse {
707
737
  /** Namespace searched by the API */
@@ -709,11 +739,19 @@ interface RetrievalQueryResponse {
709
739
  /** Echoed query text */
710
740
  query: string;
711
741
  /** Retrieval router path used by the API for this query */
712
- routerUsed?: string;
713
- /** LLM-generated natural-language answer (agentic mode only) */
714
- answerText?: string | null;
715
- /** Cited evidence chunks with asset URLs (agentic mode only) */
716
- referencedChunks?: Array<Record<string, unknown>> | null;
742
+ routerUsed: string;
743
+ /** LLM-generated natural-language answer, or null when no answer was produced */
744
+ answerText: string | null;
745
+ /** Cited evidence chunks with asset URLs when available */
746
+ referencedChunks: RetrievalReferencedChunk[];
747
+ /** Tree-structured evidence text rendered by the agentic navigator */
748
+ evidenceText?: string | null;
749
+ /** Reason why the agentic run stopped (e.g. answer_done, not_found) */
750
+ stopReason?: string | null;
751
+ /** Semantic failure reason when the agentic evidence is insufficient */
752
+ failureReason?: string | null;
753
+ /** Per-step navigation decisions from agentic retrieval, including terminal stop/failure */
754
+ decisionTrace?: Record<string, unknown>[];
717
755
  /** Ranked retrieval results */
718
756
  results: RetrievalResult[];
719
757
  }
@@ -887,6 +925,234 @@ declare class Documents extends BaseResource {
887
925
  private createChunkGetRequestConfig;
888
926
  }
889
927
 
928
+ type KnowledgeChunkType = DocumentChunkType;
929
+ interface KnowledgeParseParams extends ParseParams {
930
+ /** Optional stable local identifier for this parsed result copy. */
931
+ localDocumentId?: string;
932
+ }
933
+ interface LocalKnowledgeDocument {
934
+ /** Stable local identifier used by local outline/read/grep methods. */
935
+ localDocumentId: string;
936
+ /** Server parse job identifier. */
937
+ jobId: string;
938
+ /** Server canonical document identifier when publication returned one. */
939
+ documentId?: string;
940
+ /** Server retrieval namespace when available. */
941
+ namespace?: string;
942
+ /** Original source file name from the parse manifest. */
943
+ sourceFileName: string;
944
+ /** Number of chunks in the locally cached parse result. */
945
+ chunkCount: number;
946
+ /** Chunk counts grouped by type. */
947
+ typeCounts: Record<KnowledgeChunkType, number>;
948
+ /** Local cache directory containing expanded Knowhere result files and assets. */
949
+ resultDirectoryPath: string;
950
+ /** Cache creation time. */
951
+ createdAt: Date;
952
+ /** Last cache write time. */
953
+ updatedAt: Date;
954
+ }
955
+ interface LocalKnowledgeParseResponse {
956
+ document: LocalKnowledgeDocument;
957
+ result: ParseResult;
958
+ }
959
+ interface KnowledgeAsyncParseParams extends ParseParams {
960
+ /** Optional stable local identifier to use when this job result is cached later. */
961
+ localDocumentId?: string;
962
+ }
963
+ interface KnowledgeAsyncParseResponse {
964
+ job: Job;
965
+ localDocumentId?: string;
966
+ }
967
+ type KnowledgeAsyncCacheStatus = 'pending' | 'cached' | 'already_cached' | 'untracked' | 'not_available' | 'failed';
968
+ interface KnowledgeAsyncCacheResult {
969
+ status: KnowledgeAsyncCacheStatus;
970
+ localDocumentId?: string;
971
+ document?: LocalKnowledgeDocument;
972
+ error?: string;
973
+ }
974
+ interface KnowledgeAsyncJobStatusResponse {
975
+ job: JobResult;
976
+ cache: KnowledgeAsyncCacheResult;
977
+ }
978
+ interface KnowledgeStartupRecoveryResponse {
979
+ checkedJobs: number;
980
+ results: KnowledgeAsyncJobStatusResponse[];
981
+ }
982
+ interface KnowledgeCacheJobResultParams {
983
+ jobId: string;
984
+ localDocumentId?: string;
985
+ verifyChecksum?: boolean;
986
+ }
987
+ interface KnowledgeSection {
988
+ sectionPath: string;
989
+ sectionTitle: string;
990
+ sectionLevel: number;
991
+ summary?: string;
992
+ startChunk?: number;
993
+ endChunk?: number;
994
+ chunkCount: number;
995
+ typeCounts: Record<KnowledgeChunkType, number>;
996
+ children: KnowledgeSection[];
997
+ }
998
+ interface KnowledgeOutline {
999
+ document: LocalKnowledgeDocument;
1000
+ totalChunks: number;
1001
+ typeCounts: Record<KnowledgeChunkType, number>;
1002
+ sections: KnowledgeSection[];
1003
+ sectionTree: KnowledgeSection[];
1004
+ }
1005
+ interface KnowledgeReadParams {
1006
+ localDocumentId: string;
1007
+ sectionPath?: string;
1008
+ startChunk?: number;
1009
+ endChunk?: number;
1010
+ chunkId?: string;
1011
+ chunkType?: KnowledgeChunkType;
1012
+ limit?: number;
1013
+ }
1014
+ interface KnowledgeReadChunk {
1015
+ position: number;
1016
+ chunkId: string;
1017
+ chunkType: KnowledgeChunkType;
1018
+ content: string;
1019
+ sectionPath: string;
1020
+ sourceChunkPath: string;
1021
+ filePath?: string;
1022
+ metadata: Record<string, unknown>;
1023
+ }
1024
+ interface KnowledgeReadResponse {
1025
+ document: LocalKnowledgeDocument;
1026
+ chunks: KnowledgeReadChunk[];
1027
+ nextChunk?: number;
1028
+ }
1029
+ interface KnowledgeGrepParams {
1030
+ localDocumentId: string;
1031
+ pattern: string;
1032
+ isRegex?: boolean;
1033
+ isCaseSensitive?: boolean;
1034
+ maxResults?: number;
1035
+ chunkType?: KnowledgeChunkType;
1036
+ sectionPathPrefix?: string;
1037
+ contextChars?: number;
1038
+ }
1039
+ interface KnowledgeGrepMatch {
1040
+ position: number;
1041
+ chunkId: string;
1042
+ chunkType: KnowledgeChunkType;
1043
+ sectionPath: string;
1044
+ sourceChunkPath: string;
1045
+ filePath?: string;
1046
+ startOffset: number;
1047
+ endOffset: number;
1048
+ snippet: string;
1049
+ }
1050
+ interface KnowledgeGrepResponse {
1051
+ document: LocalKnowledgeDocument;
1052
+ matches: KnowledgeGrepMatch[];
1053
+ scannedChunks: number;
1054
+ truncated: boolean;
1055
+ }
1056
+ interface KnowledgeSearchParams {
1057
+ query: string;
1058
+ namespace?: string;
1059
+ topK?: number;
1060
+ localDocumentIds?: string[];
1061
+ useAgentic?: boolean;
1062
+ }
1063
+ interface KnowledgeSearchReference {
1064
+ localDocumentId?: string;
1065
+ documentId?: string;
1066
+ chunkId?: string;
1067
+ sectionPath?: string;
1068
+ chunkType?: string;
1069
+ score?: number | null;
1070
+ }
1071
+ interface KnowledgeSearchResponse {
1072
+ namespace?: string;
1073
+ query: string;
1074
+ evidenceText?: string | null;
1075
+ references: KnowledgeSearchReference[];
1076
+ results: KnowledgeSearchResult[];
1077
+ rawResponse: unknown;
1078
+ }
1079
+ interface KnowledgeSearchResult {
1080
+ localDocumentId?: string;
1081
+ documentId?: string;
1082
+ chunkId?: string;
1083
+ chunkType?: string;
1084
+ content: string;
1085
+ score: number | null;
1086
+ sectionPath?: string;
1087
+ sourceFileName?: string;
1088
+ }
1089
+
1090
+ declare class Knowledge {
1091
+ private readonly client;
1092
+ private readonly store;
1093
+ constructor(client: Knowhere, options?: {
1094
+ cacheDirectory?: string;
1095
+ });
1096
+ withCacheDirectory(cacheDirectory: string): Knowledge;
1097
+ parse(params: KnowledgeParseParams): Promise<LocalKnowledgeParseResponse>;
1098
+ startParse(params: KnowledgeAsyncParseParams): Promise<KnowledgeAsyncParseResponse>;
1099
+ getJobStatus(jobId: string): Promise<KnowledgeAsyncJobStatusResponse>;
1100
+ recoverPendingAsyncParseJobs(): Promise<KnowledgeStartupRecoveryResponse>;
1101
+ cacheJobResult(params: KnowledgeCacheJobResultParams): Promise<LocalKnowledgeParseResponse>;
1102
+ private resolveAsyncCache;
1103
+ listDocuments(): Promise<LocalKnowledgeDocument[]>;
1104
+ getDocumentOutline(localDocumentId: string): Promise<KnowledgeOutline>;
1105
+ readChunks(params: KnowledgeReadParams): Promise<KnowledgeReadResponse>;
1106
+ grepChunks(params: KnowledgeGrepParams): Promise<KnowledgeGrepResponse>;
1107
+ search(params: KnowledgeSearchParams): Promise<KnowledgeSearchResponse>;
1108
+ private resolveSearchDocuments;
1109
+ }
1110
+
1111
+ interface StoredAsyncParseJob {
1112
+ jobId: string;
1113
+ localDocumentId?: string;
1114
+ cacheStatus: KnowledgeAsyncCacheStatus;
1115
+ createdAt: string;
1116
+ updatedAt: string;
1117
+ }
1118
+ interface LocalKnowledgeAsyncParseJob {
1119
+ jobId: string;
1120
+ localDocumentId?: string;
1121
+ cacheStatus: KnowledgeAsyncCacheStatus;
1122
+ createdAt: Date;
1123
+ updatedAt: Date;
1124
+ }
1125
+ declare class LocalKnowledgeStore {
1126
+ private readonly cacheDirectory;
1127
+ private readonly indexPath;
1128
+ private readonly resultCache;
1129
+ constructor(cacheDirectory?: string);
1130
+ saveResult(result: ParseResult, options?: {
1131
+ localDocumentId?: string;
1132
+ }): Promise<LocalKnowledgeDocument>;
1133
+ saveAsyncParseJob(params: {
1134
+ jobId: string;
1135
+ localDocumentId?: string;
1136
+ }): Promise<void>;
1137
+ getAsyncParseJob(jobId: string): Promise<StoredAsyncParseJob | undefined>;
1138
+ listRecoverableAsyncParseJobs(): Promise<LocalKnowledgeAsyncParseJob[]>;
1139
+ updateAsyncParseJobCacheStatus(params: {
1140
+ jobId: string;
1141
+ cacheStatus: KnowledgeAsyncCacheStatus;
1142
+ localDocumentId?: string;
1143
+ }): Promise<void>;
1144
+ listDocuments(): Promise<LocalKnowledgeDocument[]>;
1145
+ getDocument(localDocumentId: string): Promise<LocalKnowledgeDocument | undefined>;
1146
+ loadResult(localDocumentId: string): Promise<{
1147
+ document: LocalKnowledgeDocument;
1148
+ result: ParseResult;
1149
+ }>;
1150
+ private getResultDirectoryPath;
1151
+ private loadStoredResult;
1152
+ private readIndex;
1153
+ private writeIndex;
1154
+ }
1155
+
890
1156
  /**
891
1157
  * Main Knowhere SDK client
892
1158
  */
@@ -897,6 +1163,8 @@ declare class Knowhere {
897
1163
  readonly retrieval: Retrieval;
898
1164
  /** Documents resource for canonical document lifecycle operations */
899
1165
  readonly documents: Documents;
1166
+ /** Client-side local knowledge tools over parsed Knowhere results */
1167
+ readonly knowledge: Knowledge;
900
1168
  private httpClient;
901
1169
  /**
902
1170
  * Create a new Knowhere client
@@ -923,6 +1191,12 @@ declare class Knowhere {
923
1191
  * ```
924
1192
  */
925
1193
  parse(params: ParseParams): Promise<ParseResult>;
1194
+ /**
1195
+ * Start a parse job and return immediately after the URL job is created or
1196
+ * the local file is uploaded. Use jobs.get()/jobs.wait() and jobs.load()
1197
+ * to inspect completion and load results later.
1198
+ */
1199
+ startParse(params: ParseParams): Promise<Job>;
926
1200
  }
927
1201
 
928
1202
  declare const VERSION = "0.1.0";
@@ -1056,4 +1330,4 @@ declare class JobFailedError extends KnowhereError {
1056
1330
  constructor(message: string, code: string, jobResult: JobResult);
1057
1331
  }
1058
1332
 
1059
- export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentChunk, type DocumentChunkGetParams, type DocumentChunkListParams, type DocumentChunkListResponse, type DocumentChunkPagination, type DocumentChunkResponse, type DocumentChunkType, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
1333
+ export { APIError, type AuthTokenProvider, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentChunk, type DocumentChunkGetParams, type DocumentChunkListParams, type DocumentChunkListResponse, type DocumentChunkPagination, type DocumentChunkResponse, type DocumentChunkType, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, Knowledge, type KnowledgeAsyncJobStatusResponse, type KnowledgeAsyncParseParams, type KnowledgeAsyncParseResponse, type KnowledgeCacheJobResultParams, type KnowledgeChunkType, type KnowledgeGrepMatch, type KnowledgeGrepParams, type KnowledgeGrepResponse, type KnowledgeOutline, type KnowledgeParseParams, type KnowledgeReadChunk, type KnowledgeReadParams, type KnowledgeReadResponse, type KnowledgeSearchParams, type KnowledgeSearchReference, type KnowledgeSearchResponse, type KnowledgeSearchResult, type KnowledgeSection, type KnowledgeStartupRecoveryResponse, type LoadOptions, type LocalKnowledgeDocument, type LocalKnowledgeParseResponse, LocalKnowledgeStore, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalReferencedChunk, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };