@ontos-ai/knowhere-sdk 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -3
- package/dist/index.d.mts +193 -39
- package/dist/index.d.ts +193 -39
- package/dist/index.js +69 -48
- package/dist/index.mjs +69 -48
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -47,11 +47,11 @@ console.log(`Found ${result.textChunks.length} text chunks`);
|
|
|
47
47
|
console.log(`Found ${result.imageChunks.length} images`);
|
|
48
48
|
console.log(`Found ${result.tableChunks.length} tables`);
|
|
49
49
|
|
|
50
|
-
// Work with chunks
|
|
50
|
+
// Work with chunks — worker metadata is in chunk.metadata
|
|
51
51
|
result.textChunks.forEach((chunk) => {
|
|
52
52
|
console.log(chunk.content);
|
|
53
|
-
console.log(chunk.keywords);
|
|
54
|
-
console.log(chunk.summary);
|
|
53
|
+
console.log(chunk.metadata.keywords);
|
|
54
|
+
console.log(chunk.metadata.summary);
|
|
55
55
|
});
|
|
56
56
|
|
|
57
57
|
// Save results to disk
|
|
@@ -188,12 +188,17 @@ if (!documentId) {
|
|
|
188
188
|
|
|
189
189
|
console.log(documentId);
|
|
190
190
|
|
|
191
|
+
// Agentic mode (LLM navigation + answer synthesis)
|
|
191
192
|
const response = await client.retrieval.query({
|
|
192
193
|
namespace: 'support-center',
|
|
193
194
|
query: 'How do I reset Bluetooth pairing?',
|
|
194
195
|
topK: 5,
|
|
196
|
+
useAgentic: true,
|
|
195
197
|
});
|
|
196
198
|
|
|
199
|
+
console.log(response.answerText); // LLM-generated answer
|
|
200
|
+
console.log(response.referencedChunks); // cited evidence chunks
|
|
201
|
+
|
|
197
202
|
for (const result of response.results) {
|
|
198
203
|
console.log(result.content);
|
|
199
204
|
console.log(result.score);
|
|
@@ -224,10 +229,22 @@ const updateJob = await client.jobs.create({
|
|
|
224
229
|
|
|
225
230
|
const documents = await client.documents.list({ namespace: 'support-center' });
|
|
226
231
|
const document = await client.documents.get(documentId);
|
|
232
|
+
const chunks = await client.documents.listChunks(documentId, {
|
|
233
|
+
page: 1,
|
|
234
|
+
pageSize: 50,
|
|
235
|
+
chunkType: 'text',
|
|
236
|
+
});
|
|
227
237
|
const archived = await client.documents.archive(documentId);
|
|
228
238
|
|
|
229
239
|
console.log(documents.documents.length);
|
|
230
240
|
console.log(document.status);
|
|
241
|
+
console.log(chunks.pagination.total);
|
|
242
|
+
if (chunks.chunks[0]) {
|
|
243
|
+
const chunk = await client.documents.getChunk(documentId, chunks.chunks[0].id, {
|
|
244
|
+
includeAssetUrls: true,
|
|
245
|
+
});
|
|
246
|
+
console.log(chunk.chunk.content);
|
|
247
|
+
}
|
|
231
248
|
console.log(archived.status);
|
|
232
249
|
```
|
|
233
250
|
|
package/dist/index.d.mts
CHANGED
|
@@ -332,7 +332,6 @@ interface Manifest {
|
|
|
332
332
|
dataId?: string;
|
|
333
333
|
/** Original source file name */
|
|
334
334
|
sourceFileName: string;
|
|
335
|
-
/** Processing completion date */
|
|
336
335
|
/** Processing completion date (optional: only present if emitted by the worker) */
|
|
337
336
|
processingDate?: Date;
|
|
338
337
|
/** Worker-side processing metadata emitted by manifest v2 */
|
|
@@ -341,6 +340,13 @@ interface Manifest {
|
|
|
341
340
|
statistics: Statistics;
|
|
342
341
|
/** Legacy file index from earlier ZIP manifests */
|
|
343
342
|
files?: FileIndex;
|
|
343
|
+
/**
|
|
344
|
+
* Document hierarchy emitted by the current worker.
|
|
345
|
+
*
|
|
346
|
+
* The key remains all-caps at runtime because ``keysToCamel()`` only
|
|
347
|
+
* transforms snake_case keys.
|
|
348
|
+
*/
|
|
349
|
+
HIERARCHY?: Record<string, unknown>;
|
|
344
350
|
}
|
|
345
351
|
/**
|
|
346
352
|
* Chunk relationship entry (metadata.connect_to per schema v2.1)
|
|
@@ -357,6 +363,58 @@ interface ConnectTo {
|
|
|
357
363
|
/** Shared keywords (related only) */
|
|
358
364
|
keywords?: string[];
|
|
359
365
|
}
|
|
366
|
+
/**
|
|
367
|
+
* A single image or table resource entry in ``doc_nav.json``.
|
|
368
|
+
*/
|
|
369
|
+
interface DocNavResourceItem {
|
|
370
|
+
path: string;
|
|
371
|
+
summary?: string;
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Image and table resource summaries from ``doc_nav.json``.
|
|
375
|
+
*/
|
|
376
|
+
interface DocNavResources {
|
|
377
|
+
images: DocNavResourceItem[];
|
|
378
|
+
tables: DocNavResourceItem[];
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* A document section in the ``doc_nav.json`` navigation tree.
|
|
382
|
+
*/
|
|
383
|
+
interface DocNavSection {
|
|
384
|
+
title: string;
|
|
385
|
+
path: string;
|
|
386
|
+
level: number;
|
|
387
|
+
summary?: string;
|
|
388
|
+
chunkCount: number;
|
|
389
|
+
children: DocNavSection[];
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* Top-level document navigation structure from ``doc_nav.json``.
|
|
393
|
+
*/
|
|
394
|
+
interface DocNav {
|
|
395
|
+
sections: DocNavSection[];
|
|
396
|
+
resources?: DocNavResources;
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Known worker metadata fields for a chunk.
|
|
400
|
+
*
|
|
401
|
+
* All fields are optional. Unknown fields added by future worker
|
|
402
|
+
* versions are accessible through the index signature.
|
|
403
|
+
*/
|
|
404
|
+
interface ChunkMetadata {
|
|
405
|
+
length?: number;
|
|
406
|
+
pageNums?: number[];
|
|
407
|
+
tokens?: string[];
|
|
408
|
+
keywords?: string[];
|
|
409
|
+
summary?: string;
|
|
410
|
+
connectTo?: ConnectTo[];
|
|
411
|
+
filePath?: string;
|
|
412
|
+
originalName?: string;
|
|
413
|
+
tableType?: string;
|
|
414
|
+
documentTopSummary?: string;
|
|
415
|
+
/** Allow forward-compatible access to unknown fields. */
|
|
416
|
+
[key: string]: unknown;
|
|
417
|
+
}
|
|
360
418
|
/**
|
|
361
419
|
* Base chunk properties
|
|
362
420
|
*/
|
|
@@ -369,50 +427,30 @@ interface BaseChunk {
|
|
|
369
427
|
content: string;
|
|
370
428
|
/** Relative path in ZIP */
|
|
371
429
|
path: string;
|
|
372
|
-
/**
|
|
373
|
-
|
|
430
|
+
/** Worker metadata for this chunk */
|
|
431
|
+
metadata: ChunkMetadata;
|
|
374
432
|
}
|
|
375
433
|
/**
|
|
376
|
-
* Minimal chunk representation emitted in chunks_slim.json
|
|
434
|
+
* Minimal chunk representation emitted in chunks_slim.json (legacy).
|
|
377
435
|
*/
|
|
378
436
|
interface SlimChunk {
|
|
379
437
|
type: 'text' | 'image' | 'table';
|
|
380
438
|
path: string;
|
|
381
439
|
content: string;
|
|
382
|
-
summary?: string;
|
|
383
440
|
}
|
|
384
441
|
/**
|
|
385
442
|
* Text chunk
|
|
386
443
|
*/
|
|
387
444
|
interface TextChunk extends BaseChunk {
|
|
388
445
|
type: 'text';
|
|
389
|
-
/** Content length */
|
|
390
|
-
length: number;
|
|
391
|
-
/** Extracted tokens from the current backend payload */
|
|
392
|
-
tokens?: string[];
|
|
393
|
-
/** Extracted keywords */
|
|
394
|
-
keywords?: string[];
|
|
395
|
-
/** Generated summary */
|
|
396
|
-
summary?: string;
|
|
397
|
-
/** Chunk relationships (schema v2.1: metadata.connect_to) */
|
|
398
|
-
connectTo?: ConnectTo[];
|
|
399
|
-
/**
|
|
400
|
-
* @deprecated Use connectTo instead. Retained for backward compatibility.
|
|
401
|
-
* Previously populated from metadata.relationships which is no longer emitted by the API.
|
|
402
|
-
*/
|
|
403
|
-
relationships?: string[];
|
|
404
446
|
}
|
|
405
447
|
/**
|
|
406
448
|
* Image chunk
|
|
407
449
|
*/
|
|
408
450
|
interface ImageChunk extends BaseChunk {
|
|
409
451
|
type: 'image';
|
|
410
|
-
/** Content length */
|
|
411
|
-
length: number;
|
|
412
452
|
/** Relative file path in ZIP */
|
|
413
453
|
filePath: string;
|
|
414
|
-
/** Generated summary */
|
|
415
|
-
summary?: string;
|
|
416
454
|
/** Image data buffer */
|
|
417
455
|
data: Buffer;
|
|
418
456
|
/** Image format (derived from file extension) */
|
|
@@ -425,14 +463,8 @@ interface ImageChunk extends BaseChunk {
|
|
|
425
463
|
*/
|
|
426
464
|
interface TableChunk extends BaseChunk {
|
|
427
465
|
type: 'table';
|
|
428
|
-
/** Content length */
|
|
429
|
-
length: number;
|
|
430
466
|
/** Relative file path in ZIP */
|
|
431
467
|
filePath: string;
|
|
432
|
-
/** Table type */
|
|
433
|
-
tableType?: string;
|
|
434
|
-
/** Generated summary */
|
|
435
|
-
summary?: string;
|
|
436
468
|
/** HTML representation */
|
|
437
469
|
html: string;
|
|
438
470
|
/** Save table HTML to disk */
|
|
@@ -450,20 +482,22 @@ interface ParseResult {
|
|
|
450
482
|
manifest: Manifest;
|
|
451
483
|
/** All chunks */
|
|
452
484
|
chunks: Chunk[];
|
|
453
|
-
/**
|
|
454
|
-
|
|
485
|
+
/** Document navigation tree from doc_nav.json (current worker output) */
|
|
486
|
+
docNav?: DocNav;
|
|
455
487
|
/** Full document as Markdown (if available) */
|
|
456
488
|
fullMarkdown?: string;
|
|
457
|
-
/**
|
|
489
|
+
/** Raw ZIP buffer */
|
|
490
|
+
rawZip: Buffer;
|
|
491
|
+
/** @deprecated Current worker no longer emits chunks_slim.json */
|
|
492
|
+
chunksSlim?: SlimChunk[];
|
|
493
|
+
/** @deprecated Current worker no longer emits hierarchy.json */
|
|
458
494
|
hierarchy?: unknown;
|
|
459
|
-
/** Table-of-contents hierarchy hints (if available) */
|
|
495
|
+
/** @deprecated Table-of-contents hierarchy hints (if available) */
|
|
460
496
|
tocHierarchies?: unknown;
|
|
461
|
-
/** Knowledge-base CSV export (if available) */
|
|
497
|
+
/** @deprecated Knowledge-base CSV export (if available) */
|
|
462
498
|
kbCsv?: string;
|
|
463
|
-
/** Pre-rendered hierarchy HTML view (if available) */
|
|
499
|
+
/** @deprecated Pre-rendered hierarchy HTML view (if available) */
|
|
464
500
|
hierarchyViewHtml?: string;
|
|
465
|
-
/** Raw ZIP buffer */
|
|
466
|
-
rawZip: Buffer;
|
|
467
501
|
/** Text chunks only */
|
|
468
502
|
readonly textChunks: TextChunk[];
|
|
469
503
|
/** Image chunks only */
|
|
@@ -611,6 +645,14 @@ interface RetrievalQueryParams {
|
|
|
611
645
|
namespace?: string;
|
|
612
646
|
/** Maximum number of results to return */
|
|
613
647
|
topK?: number;
|
|
648
|
+
/**
|
|
649
|
+
* Force retrieval mode.
|
|
650
|
+
*
|
|
651
|
+
* - ``true`` — agentic (LLM navigation + answer synthesis)
|
|
652
|
+
* - ``false`` — legacy 3-channel RRF only
|
|
653
|
+
* - ``undefined`` / omitted — server default
|
|
654
|
+
*/
|
|
655
|
+
useAgentic?: boolean;
|
|
614
656
|
/** Chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table */
|
|
615
657
|
dataType?: 1 | 2 | 3 | 4 | 5 | 6;
|
|
616
658
|
/** Path keywords for include/exclude filtering */
|
|
@@ -668,6 +710,10 @@ interface RetrievalQueryResponse {
|
|
|
668
710
|
query: string;
|
|
669
711
|
/** Retrieval router path used by the API for this query */
|
|
670
712
|
routerUsed?: string;
|
|
713
|
+
/** LLM-generated natural-language answer (agentic mode only) */
|
|
714
|
+
answerText?: string | null;
|
|
715
|
+
/** Cited evidence chunks with asset URLs (agentic mode only) */
|
|
716
|
+
referencedChunks?: Array<Record<string, unknown>> | null;
|
|
671
717
|
/** Ranked retrieval results */
|
|
672
718
|
results: RetrievalResult[];
|
|
673
719
|
}
|
|
@@ -712,6 +758,104 @@ interface DocumentListResponse {
|
|
|
712
758
|
/** Documents visible in the namespace */
|
|
713
759
|
documents: Document[];
|
|
714
760
|
}
|
|
761
|
+
/**
|
|
762
|
+
* Document chunk types supported by document chunk endpoints.
|
|
763
|
+
*/
|
|
764
|
+
type DocumentChunkType = 'text' | 'image' | 'table';
|
|
765
|
+
/**
|
|
766
|
+
* Pagination metadata returned by chunk list endpoints.
|
|
767
|
+
*/
|
|
768
|
+
interface DocumentChunkPagination {
|
|
769
|
+
/** Current page number */
|
|
770
|
+
page: number;
|
|
771
|
+
/** Number of items requested per page */
|
|
772
|
+
pageSize: number;
|
|
773
|
+
/** Total matching chunks */
|
|
774
|
+
total: number;
|
|
775
|
+
/** Total number of pages */
|
|
776
|
+
totalPages: number;
|
|
777
|
+
}
|
|
778
|
+
/**
|
|
779
|
+
* Query parameters for GET /v1/documents/{document_id}/chunks.
|
|
780
|
+
*/
|
|
781
|
+
interface DocumentChunkListParams {
|
|
782
|
+
/** Page number (default: 1) */
|
|
783
|
+
page?: number;
|
|
784
|
+
/** Items per page (default: 50, maximum: 200) */
|
|
785
|
+
pageSize?: number;
|
|
786
|
+
/** Optional chunk type filter */
|
|
787
|
+
chunkType?: DocumentChunkType;
|
|
788
|
+
/** Generate asset URLs for media chunks (default: false) */
|
|
789
|
+
includeAssetUrls?: boolean;
|
|
790
|
+
}
|
|
791
|
+
/**
|
|
792
|
+
* Query parameters for GET /v1/documents/{document_id}/chunks/{document_chunk_id}.
|
|
793
|
+
*/
|
|
794
|
+
interface DocumentChunkGetParams {
|
|
795
|
+
/** Generate asset URLs for media chunks (default: false) */
|
|
796
|
+
includeAssetUrls?: boolean;
|
|
797
|
+
}
|
|
798
|
+
/**
|
|
799
|
+
* One current-revision document chunk.
|
|
800
|
+
*/
|
|
801
|
+
interface DocumentChunk {
|
|
802
|
+
/** Stable document chunk row identifier */
|
|
803
|
+
id: string;
|
|
804
|
+
/** Parser-provided chunk identifier */
|
|
805
|
+
chunkId: string;
|
|
806
|
+
/** Chunk content type */
|
|
807
|
+
chunkType: DocumentChunkType;
|
|
808
|
+
/** Chunk text or generated summary content */
|
|
809
|
+
content?: string | null;
|
|
810
|
+
/** Parent section identifier */
|
|
811
|
+
sectionId?: string | null;
|
|
812
|
+
/** Parent section path */
|
|
813
|
+
sectionPath?: string | null;
|
|
814
|
+
/** Source path from the parser output */
|
|
815
|
+
sourceChunkPath?: string | null;
|
|
816
|
+
/** Generated artifact file path for media chunks */
|
|
817
|
+
filePath?: string | null;
|
|
818
|
+
/** Sort order within the document revision */
|
|
819
|
+
sortOrder: number;
|
|
820
|
+
/** Chunk metadata returned by the API */
|
|
821
|
+
metadata: Record<string, unknown>;
|
|
822
|
+
/** Generated asset URL when requested and available */
|
|
823
|
+
assetUrl?: string | null;
|
|
824
|
+
/** Chunk creation timestamp */
|
|
825
|
+
createdAt?: Date;
|
|
826
|
+
}
|
|
827
|
+
/**
|
|
828
|
+
* Response from GET /v1/documents/{document_id}/chunks.
|
|
829
|
+
*/
|
|
830
|
+
interface DocumentChunkListResponse {
|
|
831
|
+
/** Stable document identifier */
|
|
832
|
+
documentId: string;
|
|
833
|
+
/** Retrieval namespace */
|
|
834
|
+
namespace: string;
|
|
835
|
+
/** Current published job result identifier */
|
|
836
|
+
jobResultId?: string | null;
|
|
837
|
+
/** Current published job identifier */
|
|
838
|
+
jobId?: string | null;
|
|
839
|
+
/** Current-revision chunks */
|
|
840
|
+
chunks: DocumentChunk[];
|
|
841
|
+
/** Pagination metadata */
|
|
842
|
+
pagination: DocumentChunkPagination;
|
|
843
|
+
}
|
|
844
|
+
/**
|
|
845
|
+
* Response from GET /v1/documents/{document_id}/chunks/{document_chunk_id}.
|
|
846
|
+
*/
|
|
847
|
+
interface DocumentChunkResponse {
|
|
848
|
+
/** Stable document identifier */
|
|
849
|
+
documentId: string;
|
|
850
|
+
/** Retrieval namespace */
|
|
851
|
+
namespace: string;
|
|
852
|
+
/** Current published job result identifier */
|
|
853
|
+
jobResultId?: string | null;
|
|
854
|
+
/** Current published job identifier */
|
|
855
|
+
jobId?: string | null;
|
|
856
|
+
/** Requested current-revision chunk */
|
|
857
|
+
chunk: DocumentChunk;
|
|
858
|
+
}
|
|
715
859
|
|
|
716
860
|
/**
|
|
717
861
|
* Resource for canonical document lifecycle operations.
|
|
@@ -727,10 +871,20 @@ declare class Documents extends BaseResource {
|
|
|
727
871
|
* Get one canonical document by ID.
|
|
728
872
|
*/
|
|
729
873
|
get(documentId: string): Promise<Document>;
|
|
874
|
+
/**
|
|
875
|
+
* List current-revision chunks for one canonical document.
|
|
876
|
+
*/
|
|
877
|
+
listChunks(documentId: string, params?: DocumentChunkListParams): Promise<DocumentChunkListResponse>;
|
|
878
|
+
/**
|
|
879
|
+
* Get one current-revision chunk for one canonical document.
|
|
880
|
+
*/
|
|
881
|
+
getChunk(documentId: string, documentChunkId: string, params?: DocumentChunkGetParams): Promise<DocumentChunkResponse>;
|
|
730
882
|
/**
|
|
731
883
|
* Archive one canonical document by ID.
|
|
732
884
|
*/
|
|
733
885
|
archive(documentId: string): Promise<Document>;
|
|
886
|
+
private createChunkListRequestConfig;
|
|
887
|
+
private createChunkGetRequestConfig;
|
|
734
888
|
}
|
|
735
889
|
|
|
736
890
|
/**
|
|
@@ -902,4 +1056,4 @@ declare class JobFailedError extends KnowhereError {
|
|
|
902
1056
|
constructor(message: string, code: string, jobResult: JobResult);
|
|
903
1057
|
}
|
|
904
1058
|
|
|
905
|
-
export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
|
|
1059
|
+
export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentChunk, type DocumentChunkGetParams, type DocumentChunkListParams, type DocumentChunkListResponse, type DocumentChunkPagination, type DocumentChunkResponse, type DocumentChunkType, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -332,7 +332,6 @@ interface Manifest {
|
|
|
332
332
|
dataId?: string;
|
|
333
333
|
/** Original source file name */
|
|
334
334
|
sourceFileName: string;
|
|
335
|
-
/** Processing completion date */
|
|
336
335
|
/** Processing completion date (optional: only present if emitted by the worker) */
|
|
337
336
|
processingDate?: Date;
|
|
338
337
|
/** Worker-side processing metadata emitted by manifest v2 */
|
|
@@ -341,6 +340,13 @@ interface Manifest {
|
|
|
341
340
|
statistics: Statistics;
|
|
342
341
|
/** Legacy file index from earlier ZIP manifests */
|
|
343
342
|
files?: FileIndex;
|
|
343
|
+
/**
|
|
344
|
+
* Document hierarchy emitted by the current worker.
|
|
345
|
+
*
|
|
346
|
+
* The key remains all-caps at runtime because ``keysToCamel()`` only
|
|
347
|
+
* transforms snake_case keys.
|
|
348
|
+
*/
|
|
349
|
+
HIERARCHY?: Record<string, unknown>;
|
|
344
350
|
}
|
|
345
351
|
/**
|
|
346
352
|
* Chunk relationship entry (metadata.connect_to per schema v2.1)
|
|
@@ -357,6 +363,58 @@ interface ConnectTo {
|
|
|
357
363
|
/** Shared keywords (related only) */
|
|
358
364
|
keywords?: string[];
|
|
359
365
|
}
|
|
366
|
+
/**
|
|
367
|
+
* A single image or table resource entry in ``doc_nav.json``.
|
|
368
|
+
*/
|
|
369
|
+
interface DocNavResourceItem {
|
|
370
|
+
path: string;
|
|
371
|
+
summary?: string;
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Image and table resource summaries from ``doc_nav.json``.
|
|
375
|
+
*/
|
|
376
|
+
interface DocNavResources {
|
|
377
|
+
images: DocNavResourceItem[];
|
|
378
|
+
tables: DocNavResourceItem[];
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* A document section in the ``doc_nav.json`` navigation tree.
|
|
382
|
+
*/
|
|
383
|
+
interface DocNavSection {
|
|
384
|
+
title: string;
|
|
385
|
+
path: string;
|
|
386
|
+
level: number;
|
|
387
|
+
summary?: string;
|
|
388
|
+
chunkCount: number;
|
|
389
|
+
children: DocNavSection[];
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* Top-level document navigation structure from ``doc_nav.json``.
|
|
393
|
+
*/
|
|
394
|
+
interface DocNav {
|
|
395
|
+
sections: DocNavSection[];
|
|
396
|
+
resources?: DocNavResources;
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Known worker metadata fields for a chunk.
|
|
400
|
+
*
|
|
401
|
+
* All fields are optional. Unknown fields added by future worker
|
|
402
|
+
* versions are accessible through the index signature.
|
|
403
|
+
*/
|
|
404
|
+
interface ChunkMetadata {
|
|
405
|
+
length?: number;
|
|
406
|
+
pageNums?: number[];
|
|
407
|
+
tokens?: string[];
|
|
408
|
+
keywords?: string[];
|
|
409
|
+
summary?: string;
|
|
410
|
+
connectTo?: ConnectTo[];
|
|
411
|
+
filePath?: string;
|
|
412
|
+
originalName?: string;
|
|
413
|
+
tableType?: string;
|
|
414
|
+
documentTopSummary?: string;
|
|
415
|
+
/** Allow forward-compatible access to unknown fields. */
|
|
416
|
+
[key: string]: unknown;
|
|
417
|
+
}
|
|
360
418
|
/**
|
|
361
419
|
* Base chunk properties
|
|
362
420
|
*/
|
|
@@ -369,50 +427,30 @@ interface BaseChunk {
|
|
|
369
427
|
content: string;
|
|
370
428
|
/** Relative path in ZIP */
|
|
371
429
|
path: string;
|
|
372
|
-
/**
|
|
373
|
-
|
|
430
|
+
/** Worker metadata for this chunk */
|
|
431
|
+
metadata: ChunkMetadata;
|
|
374
432
|
}
|
|
375
433
|
/**
|
|
376
|
-
* Minimal chunk representation emitted in chunks_slim.json
|
|
434
|
+
* Minimal chunk representation emitted in chunks_slim.json (legacy).
|
|
377
435
|
*/
|
|
378
436
|
interface SlimChunk {
|
|
379
437
|
type: 'text' | 'image' | 'table';
|
|
380
438
|
path: string;
|
|
381
439
|
content: string;
|
|
382
|
-
summary?: string;
|
|
383
440
|
}
|
|
384
441
|
/**
|
|
385
442
|
* Text chunk
|
|
386
443
|
*/
|
|
387
444
|
interface TextChunk extends BaseChunk {
|
|
388
445
|
type: 'text';
|
|
389
|
-
/** Content length */
|
|
390
|
-
length: number;
|
|
391
|
-
/** Extracted tokens from the current backend payload */
|
|
392
|
-
tokens?: string[];
|
|
393
|
-
/** Extracted keywords */
|
|
394
|
-
keywords?: string[];
|
|
395
|
-
/** Generated summary */
|
|
396
|
-
summary?: string;
|
|
397
|
-
/** Chunk relationships (schema v2.1: metadata.connect_to) */
|
|
398
|
-
connectTo?: ConnectTo[];
|
|
399
|
-
/**
|
|
400
|
-
* @deprecated Use connectTo instead. Retained for backward compatibility.
|
|
401
|
-
* Previously populated from metadata.relationships which is no longer emitted by the API.
|
|
402
|
-
*/
|
|
403
|
-
relationships?: string[];
|
|
404
446
|
}
|
|
405
447
|
/**
|
|
406
448
|
* Image chunk
|
|
407
449
|
*/
|
|
408
450
|
interface ImageChunk extends BaseChunk {
|
|
409
451
|
type: 'image';
|
|
410
|
-
/** Content length */
|
|
411
|
-
length: number;
|
|
412
452
|
/** Relative file path in ZIP */
|
|
413
453
|
filePath: string;
|
|
414
|
-
/** Generated summary */
|
|
415
|
-
summary?: string;
|
|
416
454
|
/** Image data buffer */
|
|
417
455
|
data: Buffer;
|
|
418
456
|
/** Image format (derived from file extension) */
|
|
@@ -425,14 +463,8 @@ interface ImageChunk extends BaseChunk {
|
|
|
425
463
|
*/
|
|
426
464
|
interface TableChunk extends BaseChunk {
|
|
427
465
|
type: 'table';
|
|
428
|
-
/** Content length */
|
|
429
|
-
length: number;
|
|
430
466
|
/** Relative file path in ZIP */
|
|
431
467
|
filePath: string;
|
|
432
|
-
/** Table type */
|
|
433
|
-
tableType?: string;
|
|
434
|
-
/** Generated summary */
|
|
435
|
-
summary?: string;
|
|
436
468
|
/** HTML representation */
|
|
437
469
|
html: string;
|
|
438
470
|
/** Save table HTML to disk */
|
|
@@ -450,20 +482,22 @@ interface ParseResult {
|
|
|
450
482
|
manifest: Manifest;
|
|
451
483
|
/** All chunks */
|
|
452
484
|
chunks: Chunk[];
|
|
453
|
-
/**
|
|
454
|
-
|
|
485
|
+
/** Document navigation tree from doc_nav.json (current worker output) */
|
|
486
|
+
docNav?: DocNav;
|
|
455
487
|
/** Full document as Markdown (if available) */
|
|
456
488
|
fullMarkdown?: string;
|
|
457
|
-
/**
|
|
489
|
+
/** Raw ZIP buffer */
|
|
490
|
+
rawZip: Buffer;
|
|
491
|
+
/** @deprecated Current worker no longer emits chunks_slim.json */
|
|
492
|
+
chunksSlim?: SlimChunk[];
|
|
493
|
+
/** @deprecated Current worker no longer emits hierarchy.json */
|
|
458
494
|
hierarchy?: unknown;
|
|
459
|
-
/** Table-of-contents hierarchy hints (if available) */
|
|
495
|
+
/** @deprecated Table-of-contents hierarchy hints (if available) */
|
|
460
496
|
tocHierarchies?: unknown;
|
|
461
|
-
/** Knowledge-base CSV export (if available) */
|
|
497
|
+
/** @deprecated Knowledge-base CSV export (if available) */
|
|
462
498
|
kbCsv?: string;
|
|
463
|
-
/** Pre-rendered hierarchy HTML view (if available) */
|
|
499
|
+
/** @deprecated Pre-rendered hierarchy HTML view (if available) */
|
|
464
500
|
hierarchyViewHtml?: string;
|
|
465
|
-
/** Raw ZIP buffer */
|
|
466
|
-
rawZip: Buffer;
|
|
467
501
|
/** Text chunks only */
|
|
468
502
|
readonly textChunks: TextChunk[];
|
|
469
503
|
/** Image chunks only */
|
|
@@ -611,6 +645,14 @@ interface RetrievalQueryParams {
|
|
|
611
645
|
namespace?: string;
|
|
612
646
|
/** Maximum number of results to return */
|
|
613
647
|
topK?: number;
|
|
648
|
+
/**
|
|
649
|
+
* Force retrieval mode.
|
|
650
|
+
*
|
|
651
|
+
* - ``true`` — agentic (LLM navigation + answer synthesis)
|
|
652
|
+
* - ``false`` — legacy 3-channel RRF only
|
|
653
|
+
* - ``undefined`` / omitted — server default
|
|
654
|
+
*/
|
|
655
|
+
useAgentic?: boolean;
|
|
614
656
|
/** Chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table */
|
|
615
657
|
dataType?: 1 | 2 | 3 | 4 | 5 | 6;
|
|
616
658
|
/** Path keywords for include/exclude filtering */
|
|
@@ -668,6 +710,10 @@ interface RetrievalQueryResponse {
|
|
|
668
710
|
query: string;
|
|
669
711
|
/** Retrieval router path used by the API for this query */
|
|
670
712
|
routerUsed?: string;
|
|
713
|
+
/** LLM-generated natural-language answer (agentic mode only) */
|
|
714
|
+
answerText?: string | null;
|
|
715
|
+
/** Cited evidence chunks with asset URLs (agentic mode only) */
|
|
716
|
+
referencedChunks?: Array<Record<string, unknown>> | null;
|
|
671
717
|
/** Ranked retrieval results */
|
|
672
718
|
results: RetrievalResult[];
|
|
673
719
|
}
|
|
@@ -712,6 +758,104 @@ interface DocumentListResponse {
|
|
|
712
758
|
/** Documents visible in the namespace */
|
|
713
759
|
documents: Document[];
|
|
714
760
|
}
|
|
761
|
+
/**
|
|
762
|
+
* Document chunk types supported by document chunk endpoints.
|
|
763
|
+
*/
|
|
764
|
+
type DocumentChunkType = 'text' | 'image' | 'table';
|
|
765
|
+
/**
|
|
766
|
+
* Pagination metadata returned by chunk list endpoints.
|
|
767
|
+
*/
|
|
768
|
+
interface DocumentChunkPagination {
|
|
769
|
+
/** Current page number */
|
|
770
|
+
page: number;
|
|
771
|
+
/** Number of items requested per page */
|
|
772
|
+
pageSize: number;
|
|
773
|
+
/** Total matching chunks */
|
|
774
|
+
total: number;
|
|
775
|
+
/** Total number of pages */
|
|
776
|
+
totalPages: number;
|
|
777
|
+
}
|
|
778
|
+
/**
|
|
779
|
+
* Query parameters for GET /v1/documents/{document_id}/chunks.
|
|
780
|
+
*/
|
|
781
|
+
interface DocumentChunkListParams {
|
|
782
|
+
/** Page number (default: 1) */
|
|
783
|
+
page?: number;
|
|
784
|
+
/** Items per page (default: 50, maximum: 200) */
|
|
785
|
+
pageSize?: number;
|
|
786
|
+
/** Optional chunk type filter */
|
|
787
|
+
chunkType?: DocumentChunkType;
|
|
788
|
+
/** Generate asset URLs for media chunks (default: false) */
|
|
789
|
+
includeAssetUrls?: boolean;
|
|
790
|
+
}
|
|
791
|
+
/**
|
|
792
|
+
* Query parameters for GET /v1/documents/{document_id}/chunks/{document_chunk_id}.
|
|
793
|
+
*/
|
|
794
|
+
interface DocumentChunkGetParams {
|
|
795
|
+
/** Generate asset URLs for media chunks (default: false) */
|
|
796
|
+
includeAssetUrls?: boolean;
|
|
797
|
+
}
|
|
798
|
+
/**
|
|
799
|
+
* One current-revision document chunk.
|
|
800
|
+
*/
|
|
801
|
+
interface DocumentChunk {
|
|
802
|
+
/** Stable document chunk row identifier */
|
|
803
|
+
id: string;
|
|
804
|
+
/** Parser-provided chunk identifier */
|
|
805
|
+
chunkId: string;
|
|
806
|
+
/** Chunk content type */
|
|
807
|
+
chunkType: DocumentChunkType;
|
|
808
|
+
/** Chunk text or generated summary content */
|
|
809
|
+
content?: string | null;
|
|
810
|
+
/** Parent section identifier */
|
|
811
|
+
sectionId?: string | null;
|
|
812
|
+
/** Parent section path */
|
|
813
|
+
sectionPath?: string | null;
|
|
814
|
+
/** Source path from the parser output */
|
|
815
|
+
sourceChunkPath?: string | null;
|
|
816
|
+
/** Generated artifact file path for media chunks */
|
|
817
|
+
filePath?: string | null;
|
|
818
|
+
/** Sort order within the document revision */
|
|
819
|
+
sortOrder: number;
|
|
820
|
+
/** Chunk metadata returned by the API */
|
|
821
|
+
metadata: Record<string, unknown>;
|
|
822
|
+
/** Generated asset URL when requested and available */
|
|
823
|
+
assetUrl?: string | null;
|
|
824
|
+
/** Chunk creation timestamp */
|
|
825
|
+
createdAt?: Date;
|
|
826
|
+
}
|
|
827
|
+
/**
|
|
828
|
+
* Response from GET /v1/documents/{document_id}/chunks.
|
|
829
|
+
*/
|
|
830
|
+
interface DocumentChunkListResponse {
|
|
831
|
+
/** Stable document identifier */
|
|
832
|
+
documentId: string;
|
|
833
|
+
/** Retrieval namespace */
|
|
834
|
+
namespace: string;
|
|
835
|
+
/** Current published job result identifier */
|
|
836
|
+
jobResultId?: string | null;
|
|
837
|
+
/** Current published job identifier */
|
|
838
|
+
jobId?: string | null;
|
|
839
|
+
/** Current-revision chunks */
|
|
840
|
+
chunks: DocumentChunk[];
|
|
841
|
+
/** Pagination metadata */
|
|
842
|
+
pagination: DocumentChunkPagination;
|
|
843
|
+
}
|
|
844
|
+
/**
|
|
845
|
+
* Response from GET /v1/documents/{document_id}/chunks/{document_chunk_id}.
|
|
846
|
+
*/
|
|
847
|
+
interface DocumentChunkResponse {
|
|
848
|
+
/** Stable document identifier */
|
|
849
|
+
documentId: string;
|
|
850
|
+
/** Retrieval namespace */
|
|
851
|
+
namespace: string;
|
|
852
|
+
/** Current published job result identifier */
|
|
853
|
+
jobResultId?: string | null;
|
|
854
|
+
/** Current published job identifier */
|
|
855
|
+
jobId?: string | null;
|
|
856
|
+
/** Requested current-revision chunk */
|
|
857
|
+
chunk: DocumentChunk;
|
|
858
|
+
}
|
|
715
859
|
|
|
716
860
|
/**
|
|
717
861
|
* Resource for canonical document lifecycle operations.
|
|
@@ -727,10 +871,20 @@ declare class Documents extends BaseResource {
|
|
|
727
871
|
* Get one canonical document by ID.
|
|
728
872
|
*/
|
|
729
873
|
get(documentId: string): Promise<Document>;
|
|
874
|
+
/**
|
|
875
|
+
* List current-revision chunks for one canonical document.
|
|
876
|
+
*/
|
|
877
|
+
listChunks(documentId: string, params?: DocumentChunkListParams): Promise<DocumentChunkListResponse>;
|
|
878
|
+
/**
|
|
879
|
+
* Get one current-revision chunk for one canonical document.
|
|
880
|
+
*/
|
|
881
|
+
getChunk(documentId: string, documentChunkId: string, params?: DocumentChunkGetParams): Promise<DocumentChunkResponse>;
|
|
730
882
|
/**
|
|
731
883
|
* Archive one canonical document by ID.
|
|
732
884
|
*/
|
|
733
885
|
archive(documentId: string): Promise<Document>;
|
|
886
|
+
private createChunkListRequestConfig;
|
|
887
|
+
private createChunkGetRequestConfig;
|
|
734
888
|
}
|
|
735
889
|
|
|
736
890
|
/**
|
|
@@ -902,4 +1056,4 @@ declare class JobFailedError extends KnowhereError {
|
|
|
902
1056
|
constructor(message: string, code: string, jobResult: JobResult);
|
|
903
1057
|
}
|
|
904
1058
|
|
|
905
|
-
export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
|
|
1059
|
+
export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentChunk, type DocumentChunkGetParams, type DocumentChunkListParams, type DocumentChunkListResponse, type DocumentChunkPagination, type DocumentChunkResponse, type DocumentChunkType, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
|
package/dist/index.js
CHANGED
|
@@ -844,6 +844,13 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
844
844
|
if (fullMdFile) {
|
|
845
845
|
fullMarkdown = await fullMdFile.async("string");
|
|
846
846
|
}
|
|
847
|
+
let docNav;
|
|
848
|
+
const docNavFile = zip.file("doc_nav.json");
|
|
849
|
+
if (docNavFile) {
|
|
850
|
+
const docNavContent = await docNavFile.async("string");
|
|
851
|
+
const rawDocNav = JSON.parse(docNavContent);
|
|
852
|
+
docNav = keysToCamel(rawDocNav);
|
|
853
|
+
}
|
|
847
854
|
let hierarchy;
|
|
848
855
|
const hierarchyFile = zip.file("hierarchy.json");
|
|
849
856
|
if (hierarchyFile) {
|
|
@@ -877,13 +884,15 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
877
884
|
const result = {
|
|
878
885
|
manifest,
|
|
879
886
|
chunks,
|
|
880
|
-
|
|
887
|
+
docNav,
|
|
881
888
|
fullMarkdown,
|
|
889
|
+
rawZip: zipBuffer,
|
|
890
|
+
// Legacy
|
|
891
|
+
chunksSlim,
|
|
882
892
|
hierarchy,
|
|
883
893
|
tocHierarchies,
|
|
884
894
|
kbCsv,
|
|
885
895
|
hierarchyViewHtml,
|
|
886
|
-
rawZip: zipBuffer,
|
|
887
896
|
get textChunks() {
|
|
888
897
|
return chunks.filter((c) => c.type === "text");
|
|
889
898
|
},
|
|
@@ -905,6 +914,9 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
905
914
|
async save(directory) {
|
|
906
915
|
await import_fs2.promises.mkdir(directory, { recursive: true });
|
|
907
916
|
await import_fs2.promises.writeFile((0, import_path.join)(directory, "manifest.json"), JSON.stringify(manifest, null, 2));
|
|
917
|
+
if (docNav) {
|
|
918
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "doc_nav.json"), JSON.stringify(docNav, null, 2));
|
|
919
|
+
}
|
|
908
920
|
await import_fs2.promises.writeFile((0, import_path.join)(directory, "chunks.json"), JSON.stringify(chunks, null, 2));
|
|
909
921
|
if (chunksSlim) {
|
|
910
922
|
await import_fs2.promises.writeFile(
|
|
@@ -960,56 +972,24 @@ function extractSlimChunks(payload) {
|
|
|
960
972
|
}
|
|
961
973
|
return [];
|
|
962
974
|
}
|
|
963
|
-
function getChunkMetadata(chunkData) {
|
|
964
|
-
if (!chunkData.metadata) {
|
|
965
|
-
return {};
|
|
966
|
-
}
|
|
967
|
-
return chunkData.metadata;
|
|
968
|
-
}
|
|
969
975
|
function getChunkFilePath(chunkData) {
|
|
970
|
-
const metadata =
|
|
971
|
-
return chunkData.filePath ?? metadata
|
|
976
|
+
const metadata = chunkData.metadata;
|
|
977
|
+
return chunkData.filePath ?? metadata?.filePath ?? chunkData.path;
|
|
972
978
|
}
|
|
973
|
-
function
|
|
974
|
-
if (!Array.isArray(pageNums)) {
|
|
975
|
-
return void 0;
|
|
976
|
-
}
|
|
977
|
-
const normalized = pageNums.filter((pageNum) => typeof pageNum === "number");
|
|
978
|
-
return normalized.length > 0 ? normalized : void 0;
|
|
979
|
-
}
|
|
980
|
-
function normalizeTokens(tokens) {
|
|
981
|
-
if (!Array.isArray(tokens)) {
|
|
982
|
-
return void 0;
|
|
983
|
-
}
|
|
984
|
-
if (!tokens.every((token) => typeof token === "string")) {
|
|
985
|
-
return void 0;
|
|
986
|
-
}
|
|
987
|
-
return tokens;
|
|
988
|
-
}
|
|
989
|
-
function normalizeTextChunk(chunkData) {
|
|
990
|
-
const metadata = getChunkMetadata(chunkData);
|
|
991
|
-
const connectTo = metadata.connectTo ?? chunkData.connectTo;
|
|
992
|
-
const relationships = metadata.relationships ?? chunkData.relationships;
|
|
979
|
+
function buildTextChunk(chunkData) {
|
|
993
980
|
return {
|
|
994
981
|
chunkId: chunkData.chunkId ?? "",
|
|
995
982
|
type: "text",
|
|
996
983
|
content: chunkData.content ?? "",
|
|
997
984
|
path: chunkData.path ?? "",
|
|
998
|
-
|
|
999
|
-
length: metadata.length ?? chunkData.length ?? 0,
|
|
1000
|
-
tokens: normalizeTokens(metadata.tokens ?? chunkData.tokens),
|
|
1001
|
-
keywords: metadata.keywords ?? chunkData.keywords,
|
|
1002
|
-
summary: metadata.summary ?? chunkData.summary,
|
|
1003
|
-
...connectTo !== void 0 && { connectTo },
|
|
1004
|
-
...relationships !== void 0 && { relationships }
|
|
985
|
+
metadata: chunkData.metadata ?? {}
|
|
1005
986
|
};
|
|
1006
987
|
}
|
|
1007
988
|
async function processChunk(zip, chunkData) {
|
|
1008
989
|
if (chunkData.type === "text") {
|
|
1009
|
-
return
|
|
990
|
+
return buildTextChunk(chunkData);
|
|
1010
991
|
}
|
|
1011
992
|
if (chunkData.type === "image") {
|
|
1012
|
-
const metadata = getChunkMetadata(chunkData);
|
|
1013
993
|
const filePath = getChunkFilePath(chunkData);
|
|
1014
994
|
if (!filePath) {
|
|
1015
995
|
throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
|
|
@@ -1025,11 +1005,9 @@ async function processChunk(zip, chunkData) {
|
|
|
1025
1005
|
type: "image",
|
|
1026
1006
|
content: chunkData.content ?? "",
|
|
1027
1007
|
path: chunkData.path ?? "",
|
|
1028
|
-
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
1029
|
-
length: metadata.length ?? chunkData.length ?? 0,
|
|
1030
1008
|
filePath,
|
|
1031
|
-
summary: metadata.summary ?? chunkData.summary,
|
|
1032
1009
|
data: imageBuffer,
|
|
1010
|
+
metadata: chunkData.metadata ?? {},
|
|
1033
1011
|
get format() {
|
|
1034
1012
|
return getFileExtension(this.filePath);
|
|
1035
1013
|
},
|
|
@@ -1044,7 +1022,6 @@ async function processChunk(zip, chunkData) {
|
|
|
1044
1022
|
return enrichedChunk;
|
|
1045
1023
|
}
|
|
1046
1024
|
if (chunkData.type === "table") {
|
|
1047
|
-
const metadata = getChunkMetadata(chunkData);
|
|
1048
1025
|
const filePath = getChunkFilePath(chunkData);
|
|
1049
1026
|
if (!filePath) {
|
|
1050
1027
|
throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
|
|
@@ -1060,12 +1037,9 @@ async function processChunk(zip, chunkData) {
|
|
|
1060
1037
|
type: "table",
|
|
1061
1038
|
content: chunkData.content ?? "",
|
|
1062
1039
|
path: chunkData.path ?? "",
|
|
1063
|
-
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
1064
|
-
length: metadata.length ?? chunkData.length ?? 0,
|
|
1065
1040
|
filePath,
|
|
1066
|
-
tableType: metadata.tableType ?? chunkData.tableType,
|
|
1067
|
-
summary: metadata.summary ?? chunkData.summary,
|
|
1068
1041
|
html,
|
|
1042
|
+
metadata: chunkData.metadata ?? {},
|
|
1069
1043
|
async save(directory) {
|
|
1070
1044
|
const outputPath = (0, import_path.join)(directory, sanitizePath(this.filePath));
|
|
1071
1045
|
const outputDir = (0, import_path.dirname)(outputPath);
|
|
@@ -1076,7 +1050,7 @@ async function processChunk(zip, chunkData) {
|
|
|
1076
1050
|
};
|
|
1077
1051
|
return enrichedChunk;
|
|
1078
1052
|
}
|
|
1079
|
-
return
|
|
1053
|
+
return buildTextChunk(chunkData);
|
|
1080
1054
|
}
|
|
1081
1055
|
|
|
1082
1056
|
// src/resources/jobs.ts
|
|
@@ -1219,12 +1193,59 @@ var Documents = class extends BaseResource {
|
|
|
1219
1193
|
async get(documentId) {
|
|
1220
1194
|
return this.httpClient.get(`/v1/documents/${documentId}`);
|
|
1221
1195
|
}
|
|
1196
|
+
/**
|
|
1197
|
+
* List current-revision chunks for one canonical document.
|
|
1198
|
+
*/
|
|
1199
|
+
async listChunks(documentId, params) {
|
|
1200
|
+
return this.httpClient.get(
|
|
1201
|
+
`/v1/documents/${documentId}/chunks`,
|
|
1202
|
+
this.createChunkListRequestConfig(params)
|
|
1203
|
+
);
|
|
1204
|
+
}
|
|
1205
|
+
/**
|
|
1206
|
+
* Get one current-revision chunk for one canonical document.
|
|
1207
|
+
*/
|
|
1208
|
+
async getChunk(documentId, documentChunkId, params) {
|
|
1209
|
+
return this.httpClient.get(
|
|
1210
|
+
`/v1/documents/${documentId}/chunks/${documentChunkId}`,
|
|
1211
|
+
this.createChunkGetRequestConfig(params)
|
|
1212
|
+
);
|
|
1213
|
+
}
|
|
1222
1214
|
/**
|
|
1223
1215
|
* Archive one canonical document by ID.
|
|
1224
1216
|
*/
|
|
1225
1217
|
async archive(documentId) {
|
|
1226
1218
|
return this.httpClient.post(`/v1/documents/${documentId}/archive`);
|
|
1227
1219
|
}
|
|
1220
|
+
createChunkListRequestConfig(params) {
|
|
1221
|
+
if (!params) {
|
|
1222
|
+
return void 0;
|
|
1223
|
+
}
|
|
1224
|
+
const queryParams = {};
|
|
1225
|
+
if (params.page !== void 0) {
|
|
1226
|
+
queryParams.page = params.page;
|
|
1227
|
+
}
|
|
1228
|
+
if (params.pageSize !== void 0) {
|
|
1229
|
+
queryParams.page_size = params.pageSize;
|
|
1230
|
+
}
|
|
1231
|
+
if (params.chunkType !== void 0) {
|
|
1232
|
+
queryParams.chunk_type = params.chunkType;
|
|
1233
|
+
}
|
|
1234
|
+
if (params.includeAssetUrls === true) {
|
|
1235
|
+
queryParams.include_asset_urls = true;
|
|
1236
|
+
}
|
|
1237
|
+
return Object.keys(queryParams).length > 0 ? { params: queryParams } : void 0;
|
|
1238
|
+
}
|
|
1239
|
+
createChunkGetRequestConfig(params) {
|
|
1240
|
+
if (params?.includeAssetUrls !== true) {
|
|
1241
|
+
return void 0;
|
|
1242
|
+
}
|
|
1243
|
+
return {
|
|
1244
|
+
params: {
|
|
1245
|
+
include_asset_urls: true
|
|
1246
|
+
}
|
|
1247
|
+
};
|
|
1248
|
+
}
|
|
1228
1249
|
};
|
|
1229
1250
|
|
|
1230
1251
|
// src/client.ts
|
package/dist/index.mjs
CHANGED
|
@@ -784,6 +784,13 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
784
784
|
if (fullMdFile) {
|
|
785
785
|
fullMarkdown = await fullMdFile.async("string");
|
|
786
786
|
}
|
|
787
|
+
let docNav;
|
|
788
|
+
const docNavFile = zip.file("doc_nav.json");
|
|
789
|
+
if (docNavFile) {
|
|
790
|
+
const docNavContent = await docNavFile.async("string");
|
|
791
|
+
const rawDocNav = JSON.parse(docNavContent);
|
|
792
|
+
docNav = keysToCamel(rawDocNav);
|
|
793
|
+
}
|
|
787
794
|
let hierarchy;
|
|
788
795
|
const hierarchyFile = zip.file("hierarchy.json");
|
|
789
796
|
if (hierarchyFile) {
|
|
@@ -817,13 +824,15 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
817
824
|
const result = {
|
|
818
825
|
manifest,
|
|
819
826
|
chunks,
|
|
820
|
-
|
|
827
|
+
docNav,
|
|
821
828
|
fullMarkdown,
|
|
829
|
+
rawZip: zipBuffer,
|
|
830
|
+
// Legacy
|
|
831
|
+
chunksSlim,
|
|
822
832
|
hierarchy,
|
|
823
833
|
tocHierarchies,
|
|
824
834
|
kbCsv,
|
|
825
835
|
hierarchyViewHtml,
|
|
826
|
-
rawZip: zipBuffer,
|
|
827
836
|
get textChunks() {
|
|
828
837
|
return chunks.filter((c) => c.type === "text");
|
|
829
838
|
},
|
|
@@ -845,6 +854,9 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
845
854
|
async save(directory) {
|
|
846
855
|
await fs2.mkdir(directory, { recursive: true });
|
|
847
856
|
await fs2.writeFile(join(directory, "manifest.json"), JSON.stringify(manifest, null, 2));
|
|
857
|
+
if (docNav) {
|
|
858
|
+
await fs2.writeFile(join(directory, "doc_nav.json"), JSON.stringify(docNav, null, 2));
|
|
859
|
+
}
|
|
848
860
|
await fs2.writeFile(join(directory, "chunks.json"), JSON.stringify(chunks, null, 2));
|
|
849
861
|
if (chunksSlim) {
|
|
850
862
|
await fs2.writeFile(
|
|
@@ -900,56 +912,24 @@ function extractSlimChunks(payload) {
|
|
|
900
912
|
}
|
|
901
913
|
return [];
|
|
902
914
|
}
|
|
903
|
-
function getChunkMetadata(chunkData) {
|
|
904
|
-
if (!chunkData.metadata) {
|
|
905
|
-
return {};
|
|
906
|
-
}
|
|
907
|
-
return chunkData.metadata;
|
|
908
|
-
}
|
|
909
915
|
function getChunkFilePath(chunkData) {
|
|
910
|
-
const metadata =
|
|
911
|
-
return chunkData.filePath ?? metadata
|
|
916
|
+
const metadata = chunkData.metadata;
|
|
917
|
+
return chunkData.filePath ?? metadata?.filePath ?? chunkData.path;
|
|
912
918
|
}
|
|
913
|
-
function
|
|
914
|
-
if (!Array.isArray(pageNums)) {
|
|
915
|
-
return void 0;
|
|
916
|
-
}
|
|
917
|
-
const normalized = pageNums.filter((pageNum) => typeof pageNum === "number");
|
|
918
|
-
return normalized.length > 0 ? normalized : void 0;
|
|
919
|
-
}
|
|
920
|
-
function normalizeTokens(tokens) {
|
|
921
|
-
if (!Array.isArray(tokens)) {
|
|
922
|
-
return void 0;
|
|
923
|
-
}
|
|
924
|
-
if (!tokens.every((token) => typeof token === "string")) {
|
|
925
|
-
return void 0;
|
|
926
|
-
}
|
|
927
|
-
return tokens;
|
|
928
|
-
}
|
|
929
|
-
function normalizeTextChunk(chunkData) {
|
|
930
|
-
const metadata = getChunkMetadata(chunkData);
|
|
931
|
-
const connectTo = metadata.connectTo ?? chunkData.connectTo;
|
|
932
|
-
const relationships = metadata.relationships ?? chunkData.relationships;
|
|
919
|
+
function buildTextChunk(chunkData) {
|
|
933
920
|
return {
|
|
934
921
|
chunkId: chunkData.chunkId ?? "",
|
|
935
922
|
type: "text",
|
|
936
923
|
content: chunkData.content ?? "",
|
|
937
924
|
path: chunkData.path ?? "",
|
|
938
|
-
|
|
939
|
-
length: metadata.length ?? chunkData.length ?? 0,
|
|
940
|
-
tokens: normalizeTokens(metadata.tokens ?? chunkData.tokens),
|
|
941
|
-
keywords: metadata.keywords ?? chunkData.keywords,
|
|
942
|
-
summary: metadata.summary ?? chunkData.summary,
|
|
943
|
-
...connectTo !== void 0 && { connectTo },
|
|
944
|
-
...relationships !== void 0 && { relationships }
|
|
925
|
+
metadata: chunkData.metadata ?? {}
|
|
945
926
|
};
|
|
946
927
|
}
|
|
947
928
|
async function processChunk(zip, chunkData) {
|
|
948
929
|
if (chunkData.type === "text") {
|
|
949
|
-
return
|
|
930
|
+
return buildTextChunk(chunkData);
|
|
950
931
|
}
|
|
951
932
|
if (chunkData.type === "image") {
|
|
952
|
-
const metadata = getChunkMetadata(chunkData);
|
|
953
933
|
const filePath = getChunkFilePath(chunkData);
|
|
954
934
|
if (!filePath) {
|
|
955
935
|
throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
|
|
@@ -965,11 +945,9 @@ async function processChunk(zip, chunkData) {
|
|
|
965
945
|
type: "image",
|
|
966
946
|
content: chunkData.content ?? "",
|
|
967
947
|
path: chunkData.path ?? "",
|
|
968
|
-
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
969
|
-
length: metadata.length ?? chunkData.length ?? 0,
|
|
970
948
|
filePath,
|
|
971
|
-
summary: metadata.summary ?? chunkData.summary,
|
|
972
949
|
data: imageBuffer,
|
|
950
|
+
metadata: chunkData.metadata ?? {},
|
|
973
951
|
get format() {
|
|
974
952
|
return getFileExtension(this.filePath);
|
|
975
953
|
},
|
|
@@ -984,7 +962,6 @@ async function processChunk(zip, chunkData) {
|
|
|
984
962
|
return enrichedChunk;
|
|
985
963
|
}
|
|
986
964
|
if (chunkData.type === "table") {
|
|
987
|
-
const metadata = getChunkMetadata(chunkData);
|
|
988
965
|
const filePath = getChunkFilePath(chunkData);
|
|
989
966
|
if (!filePath) {
|
|
990
967
|
throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
|
|
@@ -1000,12 +977,9 @@ async function processChunk(zip, chunkData) {
|
|
|
1000
977
|
type: "table",
|
|
1001
978
|
content: chunkData.content ?? "",
|
|
1002
979
|
path: chunkData.path ?? "",
|
|
1003
|
-
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
1004
|
-
length: metadata.length ?? chunkData.length ?? 0,
|
|
1005
980
|
filePath,
|
|
1006
|
-
tableType: metadata.tableType ?? chunkData.tableType,
|
|
1007
|
-
summary: metadata.summary ?? chunkData.summary,
|
|
1008
981
|
html,
|
|
982
|
+
metadata: chunkData.metadata ?? {},
|
|
1009
983
|
async save(directory) {
|
|
1010
984
|
const outputPath = join(directory, sanitizePath(this.filePath));
|
|
1011
985
|
const outputDir = dirname(outputPath);
|
|
@@ -1016,7 +990,7 @@ async function processChunk(zip, chunkData) {
|
|
|
1016
990
|
};
|
|
1017
991
|
return enrichedChunk;
|
|
1018
992
|
}
|
|
1019
|
-
return
|
|
993
|
+
return buildTextChunk(chunkData);
|
|
1020
994
|
}
|
|
1021
995
|
|
|
1022
996
|
// src/resources/jobs.ts
|
|
@@ -1159,12 +1133,59 @@ var Documents = class extends BaseResource {
|
|
|
1159
1133
|
async get(documentId) {
|
|
1160
1134
|
return this.httpClient.get(`/v1/documents/${documentId}`);
|
|
1161
1135
|
}
|
|
1136
|
+
/**
|
|
1137
|
+
* List current-revision chunks for one canonical document.
|
|
1138
|
+
*/
|
|
1139
|
+
async listChunks(documentId, params) {
|
|
1140
|
+
return this.httpClient.get(
|
|
1141
|
+
`/v1/documents/${documentId}/chunks`,
|
|
1142
|
+
this.createChunkListRequestConfig(params)
|
|
1143
|
+
);
|
|
1144
|
+
}
|
|
1145
|
+
/**
|
|
1146
|
+
* Get one current-revision chunk for one canonical document.
|
|
1147
|
+
*/
|
|
1148
|
+
async getChunk(documentId, documentChunkId, params) {
|
|
1149
|
+
return this.httpClient.get(
|
|
1150
|
+
`/v1/documents/${documentId}/chunks/${documentChunkId}`,
|
|
1151
|
+
this.createChunkGetRequestConfig(params)
|
|
1152
|
+
);
|
|
1153
|
+
}
|
|
1162
1154
|
/**
|
|
1163
1155
|
* Archive one canonical document by ID.
|
|
1164
1156
|
*/
|
|
1165
1157
|
async archive(documentId) {
|
|
1166
1158
|
return this.httpClient.post(`/v1/documents/${documentId}/archive`);
|
|
1167
1159
|
}
|
|
1160
|
+
createChunkListRequestConfig(params) {
|
|
1161
|
+
if (!params) {
|
|
1162
|
+
return void 0;
|
|
1163
|
+
}
|
|
1164
|
+
const queryParams = {};
|
|
1165
|
+
if (params.page !== void 0) {
|
|
1166
|
+
queryParams.page = params.page;
|
|
1167
|
+
}
|
|
1168
|
+
if (params.pageSize !== void 0) {
|
|
1169
|
+
queryParams.page_size = params.pageSize;
|
|
1170
|
+
}
|
|
1171
|
+
if (params.chunkType !== void 0) {
|
|
1172
|
+
queryParams.chunk_type = params.chunkType;
|
|
1173
|
+
}
|
|
1174
|
+
if (params.includeAssetUrls === true) {
|
|
1175
|
+
queryParams.include_asset_urls = true;
|
|
1176
|
+
}
|
|
1177
|
+
return Object.keys(queryParams).length > 0 ? { params: queryParams } : void 0;
|
|
1178
|
+
}
|
|
1179
|
+
createChunkGetRequestConfig(params) {
|
|
1180
|
+
if (params?.includeAssetUrls !== true) {
|
|
1181
|
+
return void 0;
|
|
1182
|
+
}
|
|
1183
|
+
return {
|
|
1184
|
+
params: {
|
|
1185
|
+
include_asset_urls: true
|
|
1186
|
+
}
|
|
1187
|
+
};
|
|
1188
|
+
}
|
|
1168
1189
|
};
|
|
1169
1190
|
|
|
1170
1191
|
// src/client.ts
|