@ontos-ai/knowhere-sdk 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -0
- package/dist/index.d.mts +245 -8
- package/dist/index.d.ts +245 -8
- package/dist/index.js +146 -6
- package/dist/index.mjs +144 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -163,6 +163,83 @@ const jobResult = await client.jobs.wait(job.jobId, {
|
|
|
163
163
|
const result = await client.jobs.load(jobResult);
|
|
164
164
|
```
|
|
165
165
|
|
|
166
|
+
### Retrieval and Document Lifecycle
|
|
167
|
+
|
|
168
|
+
Published documents are queryable through the retrieval API after a job
|
|
169
|
+
finishes. `client.jobs.create(...)` does not return a usable `documentId`;
|
|
170
|
+
persist `jobResult.documentId` after publication if you need to update or
|
|
171
|
+
archive the same document later.
|
|
172
|
+
|
|
173
|
+
```typescript
|
|
174
|
+
const job = await client.jobs.create({
|
|
175
|
+
sourceType: 'url',
|
|
176
|
+
sourceUrl: 'https://example.com/manual.pdf',
|
|
177
|
+
namespace: 'support-center',
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
const jobResult = await client.jobs.wait(job.jobId);
|
|
181
|
+
const documentId = jobResult.documentId;
|
|
182
|
+
|
|
183
|
+
if (!documentId) {
|
|
184
|
+
throw new Error('Expected documentId after successful publication.');
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
console.log(documentId);
|
|
188
|
+
|
|
189
|
+
const response = await client.retrieval.query({
|
|
190
|
+
namespace: 'support-center',
|
|
191
|
+
query: 'How do I reset Bluetooth pairing?',
|
|
192
|
+
topK: 5,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
for (const result of response.results) {
|
|
196
|
+
console.log(result.content);
|
|
197
|
+
console.log(result.score);
|
|
198
|
+
console.log(result.source.sourceFileName, result.source.sectionPath);
|
|
199
|
+
}
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Retrieval results use one canonical source object:
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
result.content;
|
|
206
|
+
result.chunkType;
|
|
207
|
+
result.score;
|
|
208
|
+
result.assetUrl;
|
|
209
|
+
result.source.documentId;
|
|
210
|
+
result.source.sourceFileName;
|
|
211
|
+
result.source.sectionPath;
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Use `documentId` to update or archive a document:
|
|
215
|
+
|
|
216
|
+
```typescript
|
|
217
|
+
const updateJob = await client.jobs.create({
|
|
218
|
+
sourceType: 'url',
|
|
219
|
+
sourceUrl: 'https://example.com/manual-v2.pdf',
|
|
220
|
+
documentId,
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
const documents = await client.documents.list({ namespace: 'support-center' });
|
|
224
|
+
const document = await client.documents.get(documentId);
|
|
225
|
+
const archived = await client.documents.archive(documentId);
|
|
226
|
+
|
|
227
|
+
console.log(documents.documents.length);
|
|
228
|
+
console.log(document.status);
|
|
229
|
+
console.log(archived.status);
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Follow-up queries can exclude documents or sections for one request:
|
|
233
|
+
|
|
234
|
+
```typescript
|
|
235
|
+
const followUp = await client.retrieval.query({
|
|
236
|
+
namespace: 'support-center',
|
|
237
|
+
query: 'battery charging',
|
|
238
|
+
excludeDocumentIds: ['doc_old'],
|
|
239
|
+
excludeSections: [{ documentId: 'doc_123', sectionPath: 'Appendix / Legal' }],
|
|
240
|
+
});
|
|
241
|
+
```
|
|
242
|
+
|
|
166
243
|
### Error Handling
|
|
167
244
|
|
|
168
245
|
```typescript
|
package/dist/index.d.mts
CHANGED
|
@@ -41,6 +41,8 @@ interface Job {
|
|
|
41
41
|
sourceType: string;
|
|
42
42
|
/** Optional custom data identifier */
|
|
43
43
|
dataId?: string;
|
|
44
|
+
/** Retrieval namespace for the canonical document */
|
|
45
|
+
namespace?: string;
|
|
44
46
|
/** Job creation timestamp */
|
|
45
47
|
createdAt: Date;
|
|
46
48
|
/** Presigned URL for file upload (if sourceType is 'file') */
|
|
@@ -75,6 +77,10 @@ interface JobResult {
|
|
|
75
77
|
sourceType: string;
|
|
76
78
|
/** Optional custom data identifier */
|
|
77
79
|
dataId?: string;
|
|
80
|
+
/** Retrieval namespace for the canonical document */
|
|
81
|
+
namespace?: string;
|
|
82
|
+
/** Stable document identifier for retrieval/document lifecycle APIs */
|
|
83
|
+
documentId?: string;
|
|
78
84
|
/** Job creation timestamp */
|
|
79
85
|
createdAt: Date;
|
|
80
86
|
/** Processing progress information */
|
|
@@ -157,6 +163,10 @@ interface CreateJobParams {
|
|
|
157
163
|
fileName?: string;
|
|
158
164
|
/** Optional custom data identifier */
|
|
159
165
|
dataId?: string;
|
|
166
|
+
/** Retrieval namespace for the canonical document */
|
|
167
|
+
namespace?: string;
|
|
168
|
+
/** Existing document identifier when updating a published document */
|
|
169
|
+
documentId?: string;
|
|
160
170
|
/** Parsing configuration */
|
|
161
171
|
parsingParams?: ParsingParams;
|
|
162
172
|
/** Webhook configuration */
|
|
@@ -216,9 +226,13 @@ interface ParseParams {
|
|
|
216
226
|
/** Generate table summaries */
|
|
217
227
|
summaryTable?: boolean;
|
|
218
228
|
/** Generate text summaries */
|
|
219
|
-
|
|
229
|
+
summaryTxt?: boolean;
|
|
220
230
|
/** Custom data identifier */
|
|
221
231
|
dataId?: string;
|
|
232
|
+
/** Retrieval namespace for the canonical document */
|
|
233
|
+
namespace?: string;
|
|
234
|
+
/** Existing document identifier when updating a published document */
|
|
235
|
+
documentId?: string;
|
|
222
236
|
/** Additional fragment description */
|
|
223
237
|
addFragDesc?: string;
|
|
224
238
|
/** Knowledge base directory */
|
|
@@ -282,6 +296,30 @@ interface Statistics {
|
|
|
282
296
|
interface FileIndex {
|
|
283
297
|
[chunkId: string]: string;
|
|
284
298
|
}
|
|
299
|
+
/**
|
|
300
|
+
* Processing cost details emitted by manifest v2
|
|
301
|
+
*/
|
|
302
|
+
interface ProcessingCost {
|
|
303
|
+
microDollars?: number;
|
|
304
|
+
credits?: number;
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Processing timing details emitted by manifest v2
|
|
308
|
+
*/
|
|
309
|
+
interface ProcessingTiming {
|
|
310
|
+
startedAt?: Date;
|
|
311
|
+
completedAt?: Date;
|
|
312
|
+
durationMs?: number;
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Processing metadata emitted by manifest v2
|
|
316
|
+
*/
|
|
317
|
+
interface ProcessingMetadata {
|
|
318
|
+
pageCount?: number;
|
|
319
|
+
billingStatus?: string;
|
|
320
|
+
cost?: ProcessingCost;
|
|
321
|
+
timing?: ProcessingTiming;
|
|
322
|
+
}
|
|
285
323
|
/**
|
|
286
324
|
* Manifest containing metadata about the parse result
|
|
287
325
|
*/
|
|
@@ -295,11 +333,29 @@ interface Manifest {
|
|
|
295
333
|
/** Original source file name */
|
|
296
334
|
sourceFileName: string;
|
|
297
335
|
/** Processing completion date */
|
|
298
|
-
|
|
336
|
+
/** Processing completion date (optional: only present if emitted by the worker) */
|
|
337
|
+
processingDate?: Date;
|
|
338
|
+
/** Worker-side processing metadata emitted by manifest v2 */
|
|
339
|
+
processing?: ProcessingMetadata;
|
|
299
340
|
/** Statistics */
|
|
300
341
|
statistics: Statistics;
|
|
301
|
-
/**
|
|
302
|
-
files
|
|
342
|
+
/** Legacy file index from earlier ZIP manifests */
|
|
343
|
+
files?: FileIndex;
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Chunk relationship entry (metadata.connect_to per schema v2.1)
|
|
347
|
+
*/
|
|
348
|
+
interface ConnectTo {
|
|
349
|
+
/** Target chunk_id */
|
|
350
|
+
target: string;
|
|
351
|
+
/** Relationship type */
|
|
352
|
+
relation: 'embeds' | 'related';
|
|
353
|
+
/** Placeholder ref in content, e.g. '[images/a.png]' (embeds only) */
|
|
354
|
+
ref?: string;
|
|
355
|
+
/** Semantic similarity score (related only) */
|
|
356
|
+
score?: number;
|
|
357
|
+
/** Shared keywords (related only) */
|
|
358
|
+
keywords?: string[];
|
|
303
359
|
}
|
|
304
360
|
/**
|
|
305
361
|
* Base chunk properties
|
|
@@ -313,6 +369,17 @@ interface BaseChunk {
|
|
|
313
369
|
content: string;
|
|
314
370
|
/** Relative path in ZIP */
|
|
315
371
|
path: string;
|
|
372
|
+
/** Page numbers spanned by this chunk when provided by the backend */
|
|
373
|
+
pageNums?: number[];
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Minimal chunk representation emitted in chunks_slim.json
|
|
377
|
+
*/
|
|
378
|
+
interface SlimChunk {
|
|
379
|
+
type: 'text' | 'image' | 'table';
|
|
380
|
+
path: string;
|
|
381
|
+
content: string;
|
|
382
|
+
summary?: string;
|
|
316
383
|
}
|
|
317
384
|
/**
|
|
318
385
|
* Text chunk
|
|
@@ -321,13 +388,18 @@ interface TextChunk extends BaseChunk {
|
|
|
321
388
|
type: 'text';
|
|
322
389
|
/** Content length */
|
|
323
390
|
length: number;
|
|
324
|
-
/**
|
|
325
|
-
tokens?:
|
|
391
|
+
/** Extracted tokens from the current backend payload */
|
|
392
|
+
tokens?: string[];
|
|
326
393
|
/** Extracted keywords */
|
|
327
394
|
keywords?: string[];
|
|
328
395
|
/** Generated summary */
|
|
329
396
|
summary?: string;
|
|
330
|
-
/**
|
|
397
|
+
/** Chunk relationships (schema v2.1: metadata.connect_to) */
|
|
398
|
+
connectTo?: ConnectTo[];
|
|
399
|
+
/**
|
|
400
|
+
* @deprecated Use connectTo instead. Retained for backward compatibility.
|
|
401
|
+
* Previously populated from metadata.relationships which is no longer emitted by the API.
|
|
402
|
+
*/
|
|
331
403
|
relationships?: string[];
|
|
332
404
|
}
|
|
333
405
|
/**
|
|
@@ -378,10 +450,18 @@ interface ParseResult {
|
|
|
378
450
|
manifest: Manifest;
|
|
379
451
|
/** All chunks */
|
|
380
452
|
chunks: Chunk[];
|
|
453
|
+
/** Minimal chunk projection from chunks_slim.json (if available) */
|
|
454
|
+
chunksSlim?: SlimChunk[];
|
|
381
455
|
/** Full document as Markdown (if available) */
|
|
382
456
|
fullMarkdown?: string;
|
|
383
457
|
/** Document hierarchy (if available) */
|
|
384
458
|
hierarchy?: unknown;
|
|
459
|
+
/** Table-of-contents hierarchy hints (if available) */
|
|
460
|
+
tocHierarchies?: unknown;
|
|
461
|
+
/** Knowledge-base CSV export (if available) */
|
|
462
|
+
kbCsv?: string;
|
|
463
|
+
/** Pre-rendered hierarchy HTML view (if available) */
|
|
464
|
+
hierarchyViewHtml?: string;
|
|
385
465
|
/** Raw ZIP buffer */
|
|
386
466
|
rawZip: Buffer;
|
|
387
467
|
/** Text chunks only */
|
|
@@ -392,6 +472,10 @@ interface ParseResult {
|
|
|
392
472
|
readonly tableChunks: TableChunk[];
|
|
393
473
|
/** Job ID */
|
|
394
474
|
readonly jobId: string;
|
|
475
|
+
/** Effective retrieval namespace when loaded from a job result */
|
|
476
|
+
namespace?: string;
|
|
477
|
+
/** Canonical document identifier when loaded from a job result */
|
|
478
|
+
documentId?: string;
|
|
395
479
|
/** Statistics */
|
|
396
480
|
readonly statistics: Statistics;
|
|
397
481
|
/** Find a specific chunk by ID */
|
|
@@ -500,12 +584,165 @@ declare class Jobs extends BaseResource {
|
|
|
500
584
|
private resolveLoadJobResult;
|
|
501
585
|
}
|
|
502
586
|
|
|
587
|
+
/**
|
|
588
|
+
* Section exclusion for follow-up retrieval queries.
|
|
589
|
+
*/
|
|
590
|
+
interface RetrievalSectionExclusion {
|
|
591
|
+
/** Document containing the section to exclude */
|
|
592
|
+
documentId: string;
|
|
593
|
+
/** Human-readable section path to exclude */
|
|
594
|
+
sectionPath: string;
|
|
595
|
+
}
|
|
596
|
+
/**
|
|
597
|
+
* Supported retrieval channel names.
|
|
598
|
+
*/
|
|
599
|
+
type RetrievalChannel = 'path' | 'content' | 'term';
|
|
600
|
+
/**
|
|
601
|
+
* Path filtering mode for retrieval queries.
|
|
602
|
+
*/
|
|
603
|
+
type RetrievalFilterMode = 'delete' | 'keep';
|
|
604
|
+
/**
|
|
605
|
+
* Retrieval query parameters.
|
|
606
|
+
*/
|
|
607
|
+
interface RetrievalQueryParams {
|
|
608
|
+
/** Search query text */
|
|
609
|
+
query: string;
|
|
610
|
+
/** Retrieval namespace. Defaults to the server's default namespace when omitted. */
|
|
611
|
+
namespace?: string;
|
|
612
|
+
/** Maximum number of results to return */
|
|
613
|
+
topK?: number;
|
|
614
|
+
/** Chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table */
|
|
615
|
+
dataType?: 1 | 2 | 3 | 4 | 5 | 6;
|
|
616
|
+
/** Path keywords for include/exclude filtering */
|
|
617
|
+
signalPaths?: string[];
|
|
618
|
+
/** Signal path filter mode */
|
|
619
|
+
filterMode?: RetrievalFilterMode;
|
|
620
|
+
/** Retrieval channels to run. Defaults to all channels when omitted. */
|
|
621
|
+
channels?: RetrievalChannel[];
|
|
622
|
+
/** Per-channel weight overrides for reciprocal-rank fusion */
|
|
623
|
+
channelWeights?: Partial<Record<RetrievalChannel, number>>;
|
|
624
|
+
/** Enable LLM reranking after channel fusion */
|
|
625
|
+
rerank?: boolean;
|
|
626
|
+
/** Minimum retrieval score threshold after fusion */
|
|
627
|
+
threshold?: number;
|
|
628
|
+
/** Override the internal per-channel recall count */
|
|
629
|
+
internalRecallK?: number;
|
|
630
|
+
/** Documents to exclude for this request only */
|
|
631
|
+
excludeDocumentIds?: string[];
|
|
632
|
+
/** Document sections to exclude for this request only */
|
|
633
|
+
excludeSections?: RetrievalSectionExclusion[];
|
|
634
|
+
}
|
|
635
|
+
/**
|
|
636
|
+
* Caller-facing source reference attached to a retrieval result.
|
|
637
|
+
*/
|
|
638
|
+
interface RetrievalSource {
|
|
639
|
+
/** Stable document identifier */
|
|
640
|
+
documentId?: string;
|
|
641
|
+
/** Original source file name */
|
|
642
|
+
sourceFileName?: string;
|
|
643
|
+
/** Human-readable section path */
|
|
644
|
+
sectionPath?: string;
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Canonical chunk result returned by retrieval query.
|
|
648
|
+
*/
|
|
649
|
+
interface RetrievalResult {
|
|
650
|
+
/** Knowledge content to use directly in the caller's answer */
|
|
651
|
+
content: string;
|
|
652
|
+
/** Chunk type, for example text, image, or table */
|
|
653
|
+
chunkType: string;
|
|
654
|
+
/** Retrieval score returned by the API */
|
|
655
|
+
score: number;
|
|
656
|
+
/** Presigned asset URL for media chunks when available */
|
|
657
|
+
assetUrl?: string;
|
|
658
|
+
/** Source reference for this result */
|
|
659
|
+
source: RetrievalSource;
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* Response from POST /v1/retrieval/query.
|
|
663
|
+
*/
|
|
664
|
+
interface RetrievalQueryResponse {
|
|
665
|
+
/** Namespace searched by the API */
|
|
666
|
+
namespace: string;
|
|
667
|
+
/** Echoed query text */
|
|
668
|
+
query: string;
|
|
669
|
+
/** Retrieval router path used by the API for this query */
|
|
670
|
+
routerUsed?: string;
|
|
671
|
+
/** Ranked retrieval results */
|
|
672
|
+
results: RetrievalResult[];
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
/**
|
|
676
|
+
* Resource for querying published retrieval documents.
|
|
677
|
+
*/
|
|
678
|
+
declare class Retrieval extends BaseResource {
|
|
679
|
+
/**
|
|
680
|
+
* Query published documents.
|
|
681
|
+
*/
|
|
682
|
+
query(params: RetrievalQueryParams): Promise<RetrievalQueryResponse>;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/**
|
|
686
|
+
* Canonical document state returned by document lifecycle endpoints.
|
|
687
|
+
*/
|
|
688
|
+
interface Document {
|
|
689
|
+
/** Stable document identifier */
|
|
690
|
+
documentId: string;
|
|
691
|
+
/** Retrieval namespace */
|
|
692
|
+
namespace: string;
|
|
693
|
+
/** Current lifecycle status */
|
|
694
|
+
status: string;
|
|
695
|
+
/** Current published job result identifier */
|
|
696
|
+
currentJobResultId?: string;
|
|
697
|
+
/** Original source file name */
|
|
698
|
+
sourceFileName?: string;
|
|
699
|
+
/** Document creation timestamp */
|
|
700
|
+
createdAt?: Date;
|
|
701
|
+
/** Last update timestamp */
|
|
702
|
+
updatedAt?: Date;
|
|
703
|
+
/** Archive timestamp, when archived */
|
|
704
|
+
archivedAt?: Date;
|
|
705
|
+
}
|
|
706
|
+
/**
|
|
707
|
+
* Response from GET /v1/documents.
|
|
708
|
+
*/
|
|
709
|
+
interface DocumentListResponse {
|
|
710
|
+
/** Namespace listed by the API */
|
|
711
|
+
namespace: string;
|
|
712
|
+
/** Documents visible in the namespace */
|
|
713
|
+
documents: Document[];
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
/**
|
|
717
|
+
* Resource for canonical document lifecycle operations.
|
|
718
|
+
*/
|
|
719
|
+
declare class Documents extends BaseResource {
|
|
720
|
+
/**
|
|
721
|
+
* List canonical documents in a namespace.
|
|
722
|
+
*/
|
|
723
|
+
list(params?: {
|
|
724
|
+
namespace?: string;
|
|
725
|
+
}): Promise<DocumentListResponse>;
|
|
726
|
+
/**
|
|
727
|
+
* Get one canonical document by ID.
|
|
728
|
+
*/
|
|
729
|
+
get(documentId: string): Promise<Document>;
|
|
730
|
+
/**
|
|
731
|
+
* Archive one canonical document by ID.
|
|
732
|
+
*/
|
|
733
|
+
archive(documentId: string): Promise<Document>;
|
|
734
|
+
}
|
|
735
|
+
|
|
503
736
|
/**
|
|
504
737
|
* Main Knowhere SDK client
|
|
505
738
|
*/
|
|
506
739
|
declare class Knowhere {
|
|
507
740
|
/** Jobs resource for low-level API */
|
|
508
741
|
readonly jobs: Jobs;
|
|
742
|
+
/** Retrieval resource for querying published documents */
|
|
743
|
+
readonly retrieval: Retrieval;
|
|
744
|
+
/** Documents resource for canonical document lifecycle operations */
|
|
745
|
+
readonly documents: Documents;
|
|
509
746
|
private httpClient;
|
|
510
747
|
/**
|
|
511
748
|
* Create a new Knowhere client
|
|
@@ -665,4 +902,4 @@ declare class JobFailedError extends KnowhereError {
|
|
|
665
902
|
constructor(message: string, code: string, jobResult: JobResult);
|
|
666
903
|
}
|
|
667
904
|
|
|
668
|
-
export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
|
|
905
|
+
export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -41,6 +41,8 @@ interface Job {
|
|
|
41
41
|
sourceType: string;
|
|
42
42
|
/** Optional custom data identifier */
|
|
43
43
|
dataId?: string;
|
|
44
|
+
/** Retrieval namespace for the canonical document */
|
|
45
|
+
namespace?: string;
|
|
44
46
|
/** Job creation timestamp */
|
|
45
47
|
createdAt: Date;
|
|
46
48
|
/** Presigned URL for file upload (if sourceType is 'file') */
|
|
@@ -75,6 +77,10 @@ interface JobResult {
|
|
|
75
77
|
sourceType: string;
|
|
76
78
|
/** Optional custom data identifier */
|
|
77
79
|
dataId?: string;
|
|
80
|
+
/** Retrieval namespace for the canonical document */
|
|
81
|
+
namespace?: string;
|
|
82
|
+
/** Stable document identifier for retrieval/document lifecycle APIs */
|
|
83
|
+
documentId?: string;
|
|
78
84
|
/** Job creation timestamp */
|
|
79
85
|
createdAt: Date;
|
|
80
86
|
/** Processing progress information */
|
|
@@ -157,6 +163,10 @@ interface CreateJobParams {
|
|
|
157
163
|
fileName?: string;
|
|
158
164
|
/** Optional custom data identifier */
|
|
159
165
|
dataId?: string;
|
|
166
|
+
/** Retrieval namespace for the canonical document */
|
|
167
|
+
namespace?: string;
|
|
168
|
+
/** Existing document identifier when updating a published document */
|
|
169
|
+
documentId?: string;
|
|
160
170
|
/** Parsing configuration */
|
|
161
171
|
parsingParams?: ParsingParams;
|
|
162
172
|
/** Webhook configuration */
|
|
@@ -216,9 +226,13 @@ interface ParseParams {
|
|
|
216
226
|
/** Generate table summaries */
|
|
217
227
|
summaryTable?: boolean;
|
|
218
228
|
/** Generate text summaries */
|
|
219
|
-
|
|
229
|
+
summaryTxt?: boolean;
|
|
220
230
|
/** Custom data identifier */
|
|
221
231
|
dataId?: string;
|
|
232
|
+
/** Retrieval namespace for the canonical document */
|
|
233
|
+
namespace?: string;
|
|
234
|
+
/** Existing document identifier when updating a published document */
|
|
235
|
+
documentId?: string;
|
|
222
236
|
/** Additional fragment description */
|
|
223
237
|
addFragDesc?: string;
|
|
224
238
|
/** Knowledge base directory */
|
|
@@ -282,6 +296,30 @@ interface Statistics {
|
|
|
282
296
|
interface FileIndex {
|
|
283
297
|
[chunkId: string]: string;
|
|
284
298
|
}
|
|
299
|
+
/**
|
|
300
|
+
* Processing cost details emitted by manifest v2
|
|
301
|
+
*/
|
|
302
|
+
interface ProcessingCost {
|
|
303
|
+
microDollars?: number;
|
|
304
|
+
credits?: number;
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Processing timing details emitted by manifest v2
|
|
308
|
+
*/
|
|
309
|
+
interface ProcessingTiming {
|
|
310
|
+
startedAt?: Date;
|
|
311
|
+
completedAt?: Date;
|
|
312
|
+
durationMs?: number;
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Processing metadata emitted by manifest v2
|
|
316
|
+
*/
|
|
317
|
+
interface ProcessingMetadata {
|
|
318
|
+
pageCount?: number;
|
|
319
|
+
billingStatus?: string;
|
|
320
|
+
cost?: ProcessingCost;
|
|
321
|
+
timing?: ProcessingTiming;
|
|
322
|
+
}
|
|
285
323
|
/**
|
|
286
324
|
* Manifest containing metadata about the parse result
|
|
287
325
|
*/
|
|
@@ -295,11 +333,29 @@ interface Manifest {
|
|
|
295
333
|
/** Original source file name */
|
|
296
334
|
sourceFileName: string;
|
|
297
335
|
/** Processing completion date */
|
|
298
|
-
|
|
336
|
+
/** Processing completion date (optional: only present if emitted by the worker) */
|
|
337
|
+
processingDate?: Date;
|
|
338
|
+
/** Worker-side processing metadata emitted by manifest v2 */
|
|
339
|
+
processing?: ProcessingMetadata;
|
|
299
340
|
/** Statistics */
|
|
300
341
|
statistics: Statistics;
|
|
301
|
-
/**
|
|
302
|
-
files
|
|
342
|
+
/** Legacy file index from earlier ZIP manifests */
|
|
343
|
+
files?: FileIndex;
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Chunk relationship entry (metadata.connect_to per schema v2.1)
|
|
347
|
+
*/
|
|
348
|
+
interface ConnectTo {
|
|
349
|
+
/** Target chunk_id */
|
|
350
|
+
target: string;
|
|
351
|
+
/** Relationship type */
|
|
352
|
+
relation: 'embeds' | 'related';
|
|
353
|
+
/** Placeholder ref in content, e.g. '[images/a.png]' (embeds only) */
|
|
354
|
+
ref?: string;
|
|
355
|
+
/** Semantic similarity score (related only) */
|
|
356
|
+
score?: number;
|
|
357
|
+
/** Shared keywords (related only) */
|
|
358
|
+
keywords?: string[];
|
|
303
359
|
}
|
|
304
360
|
/**
|
|
305
361
|
* Base chunk properties
|
|
@@ -313,6 +369,17 @@ interface BaseChunk {
|
|
|
313
369
|
content: string;
|
|
314
370
|
/** Relative path in ZIP */
|
|
315
371
|
path: string;
|
|
372
|
+
/** Page numbers spanned by this chunk when provided by the backend */
|
|
373
|
+
pageNums?: number[];
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Minimal chunk representation emitted in chunks_slim.json
|
|
377
|
+
*/
|
|
378
|
+
interface SlimChunk {
|
|
379
|
+
type: 'text' | 'image' | 'table';
|
|
380
|
+
path: string;
|
|
381
|
+
content: string;
|
|
382
|
+
summary?: string;
|
|
316
383
|
}
|
|
317
384
|
/**
|
|
318
385
|
* Text chunk
|
|
@@ -321,13 +388,18 @@ interface TextChunk extends BaseChunk {
|
|
|
321
388
|
type: 'text';
|
|
322
389
|
/** Content length */
|
|
323
390
|
length: number;
|
|
324
|
-
/**
|
|
325
|
-
tokens?:
|
|
391
|
+
/** Extracted tokens from the current backend payload */
|
|
392
|
+
tokens?: string[];
|
|
326
393
|
/** Extracted keywords */
|
|
327
394
|
keywords?: string[];
|
|
328
395
|
/** Generated summary */
|
|
329
396
|
summary?: string;
|
|
330
|
-
/**
|
|
397
|
+
/** Chunk relationships (schema v2.1: metadata.connect_to) */
|
|
398
|
+
connectTo?: ConnectTo[];
|
|
399
|
+
/**
|
|
400
|
+
* @deprecated Use connectTo instead. Retained for backward compatibility.
|
|
401
|
+
* Previously populated from metadata.relationships which is no longer emitted by the API.
|
|
402
|
+
*/
|
|
331
403
|
relationships?: string[];
|
|
332
404
|
}
|
|
333
405
|
/**
|
|
@@ -378,10 +450,18 @@ interface ParseResult {
|
|
|
378
450
|
manifest: Manifest;
|
|
379
451
|
/** All chunks */
|
|
380
452
|
chunks: Chunk[];
|
|
453
|
+
/** Minimal chunk projection from chunks_slim.json (if available) */
|
|
454
|
+
chunksSlim?: SlimChunk[];
|
|
381
455
|
/** Full document as Markdown (if available) */
|
|
382
456
|
fullMarkdown?: string;
|
|
383
457
|
/** Document hierarchy (if available) */
|
|
384
458
|
hierarchy?: unknown;
|
|
459
|
+
/** Table-of-contents hierarchy hints (if available) */
|
|
460
|
+
tocHierarchies?: unknown;
|
|
461
|
+
/** Knowledge-base CSV export (if available) */
|
|
462
|
+
kbCsv?: string;
|
|
463
|
+
/** Pre-rendered hierarchy HTML view (if available) */
|
|
464
|
+
hierarchyViewHtml?: string;
|
|
385
465
|
/** Raw ZIP buffer */
|
|
386
466
|
rawZip: Buffer;
|
|
387
467
|
/** Text chunks only */
|
|
@@ -392,6 +472,10 @@ interface ParseResult {
|
|
|
392
472
|
readonly tableChunks: TableChunk[];
|
|
393
473
|
/** Job ID */
|
|
394
474
|
readonly jobId: string;
|
|
475
|
+
/** Effective retrieval namespace when loaded from a job result */
|
|
476
|
+
namespace?: string;
|
|
477
|
+
/** Canonical document identifier when loaded from a job result */
|
|
478
|
+
documentId?: string;
|
|
395
479
|
/** Statistics */
|
|
396
480
|
readonly statistics: Statistics;
|
|
397
481
|
/** Find a specific chunk by ID */
|
|
@@ -500,12 +584,165 @@ declare class Jobs extends BaseResource {
|
|
|
500
584
|
private resolveLoadJobResult;
|
|
501
585
|
}
|
|
502
586
|
|
|
587
|
+
/**
|
|
588
|
+
* Section exclusion for follow-up retrieval queries.
|
|
589
|
+
*/
|
|
590
|
+
interface RetrievalSectionExclusion {
|
|
591
|
+
/** Document containing the section to exclude */
|
|
592
|
+
documentId: string;
|
|
593
|
+
/** Human-readable section path to exclude */
|
|
594
|
+
sectionPath: string;
|
|
595
|
+
}
|
|
596
|
+
/**
|
|
597
|
+
* Supported retrieval channel names.
|
|
598
|
+
*/
|
|
599
|
+
type RetrievalChannel = 'path' | 'content' | 'term';
|
|
600
|
+
/**
|
|
601
|
+
* Path filtering mode for retrieval queries.
|
|
602
|
+
*/
|
|
603
|
+
type RetrievalFilterMode = 'delete' | 'keep';
|
|
604
|
+
/**
|
|
605
|
+
* Retrieval query parameters.
|
|
606
|
+
*/
|
|
607
|
+
interface RetrievalQueryParams {
|
|
608
|
+
/** Search query text */
|
|
609
|
+
query: string;
|
|
610
|
+
/** Retrieval namespace. Defaults to the server's default namespace when omitted. */
|
|
611
|
+
namespace?: string;
|
|
612
|
+
/** Maximum number of results to return */
|
|
613
|
+
topK?: number;
|
|
614
|
+
/** Chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table */
|
|
615
|
+
dataType?: 1 | 2 | 3 | 4 | 5 | 6;
|
|
616
|
+
/** Path keywords for include/exclude filtering */
|
|
617
|
+
signalPaths?: string[];
|
|
618
|
+
/** Signal path filter mode */
|
|
619
|
+
filterMode?: RetrievalFilterMode;
|
|
620
|
+
/** Retrieval channels to run. Defaults to all channels when omitted. */
|
|
621
|
+
channels?: RetrievalChannel[];
|
|
622
|
+
/** Per-channel weight overrides for reciprocal-rank fusion */
|
|
623
|
+
channelWeights?: Partial<Record<RetrievalChannel, number>>;
|
|
624
|
+
/** Enable LLM reranking after channel fusion */
|
|
625
|
+
rerank?: boolean;
|
|
626
|
+
/** Minimum retrieval score threshold after fusion */
|
|
627
|
+
threshold?: number;
|
|
628
|
+
/** Override the internal per-channel recall count */
|
|
629
|
+
internalRecallK?: number;
|
|
630
|
+
/** Documents to exclude for this request only */
|
|
631
|
+
excludeDocumentIds?: string[];
|
|
632
|
+
/** Document sections to exclude for this request only */
|
|
633
|
+
excludeSections?: RetrievalSectionExclusion[];
|
|
634
|
+
}
|
|
635
|
+
/**
|
|
636
|
+
* Caller-facing source reference attached to a retrieval result.
|
|
637
|
+
*/
|
|
638
|
+
interface RetrievalSource {
|
|
639
|
+
/** Stable document identifier */
|
|
640
|
+
documentId?: string;
|
|
641
|
+
/** Original source file name */
|
|
642
|
+
sourceFileName?: string;
|
|
643
|
+
/** Human-readable section path */
|
|
644
|
+
sectionPath?: string;
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Canonical chunk result returned by retrieval query.
|
|
648
|
+
*/
|
|
649
|
+
interface RetrievalResult {
|
|
650
|
+
/** Knowledge content to use directly in the caller's answer */
|
|
651
|
+
content: string;
|
|
652
|
+
/** Chunk type, for example text, image, or table */
|
|
653
|
+
chunkType: string;
|
|
654
|
+
/** Retrieval score returned by the API */
|
|
655
|
+
score: number;
|
|
656
|
+
/** Presigned asset URL for media chunks when available */
|
|
657
|
+
assetUrl?: string;
|
|
658
|
+
/** Source reference for this result */
|
|
659
|
+
source: RetrievalSource;
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* Response from POST /v1/retrieval/query.
|
|
663
|
+
*/
|
|
664
|
+
interface RetrievalQueryResponse {
|
|
665
|
+
/** Namespace searched by the API */
|
|
666
|
+
namespace: string;
|
|
667
|
+
/** Echoed query text */
|
|
668
|
+
query: string;
|
|
669
|
+
/** Retrieval router path used by the API for this query */
|
|
670
|
+
routerUsed?: string;
|
|
671
|
+
/** Ranked retrieval results */
|
|
672
|
+
results: RetrievalResult[];
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
/**
|
|
676
|
+
* Resource for querying published retrieval documents.
|
|
677
|
+
*/
|
|
678
|
+
declare class Retrieval extends BaseResource {
|
|
679
|
+
/**
|
|
680
|
+
* Query published documents.
|
|
681
|
+
*/
|
|
682
|
+
query(params: RetrievalQueryParams): Promise<RetrievalQueryResponse>;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/**
|
|
686
|
+
* Canonical document state returned by document lifecycle endpoints.
|
|
687
|
+
*/
|
|
688
|
+
interface Document {
|
|
689
|
+
/** Stable document identifier */
|
|
690
|
+
documentId: string;
|
|
691
|
+
/** Retrieval namespace */
|
|
692
|
+
namespace: string;
|
|
693
|
+
/** Current lifecycle status */
|
|
694
|
+
status: string;
|
|
695
|
+
/** Current published job result identifier */
|
|
696
|
+
currentJobResultId?: string;
|
|
697
|
+
/** Original source file name */
|
|
698
|
+
sourceFileName?: string;
|
|
699
|
+
/** Document creation timestamp */
|
|
700
|
+
createdAt?: Date;
|
|
701
|
+
/** Last update timestamp */
|
|
702
|
+
updatedAt?: Date;
|
|
703
|
+
/** Archive timestamp, when archived */
|
|
704
|
+
archivedAt?: Date;
|
|
705
|
+
}
|
|
706
|
+
/**
|
|
707
|
+
* Response from GET /v1/documents.
|
|
708
|
+
*/
|
|
709
|
+
interface DocumentListResponse {
|
|
710
|
+
/** Namespace listed by the API */
|
|
711
|
+
namespace: string;
|
|
712
|
+
/** Documents visible in the namespace */
|
|
713
|
+
documents: Document[];
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
/**
|
|
717
|
+
* Resource for canonical document lifecycle operations.
|
|
718
|
+
*/
|
|
719
|
+
declare class Documents extends BaseResource {
|
|
720
|
+
/**
|
|
721
|
+
* List canonical documents in a namespace.
|
|
722
|
+
*/
|
|
723
|
+
list(params?: {
|
|
724
|
+
namespace?: string;
|
|
725
|
+
}): Promise<DocumentListResponse>;
|
|
726
|
+
/**
|
|
727
|
+
* Get one canonical document by ID.
|
|
728
|
+
*/
|
|
729
|
+
get(documentId: string): Promise<Document>;
|
|
730
|
+
/**
|
|
731
|
+
* Archive one canonical document by ID.
|
|
732
|
+
*/
|
|
733
|
+
archive(documentId: string): Promise<Document>;
|
|
734
|
+
}
|
|
735
|
+
|
|
503
736
|
/**
|
|
504
737
|
* Main Knowhere SDK client
|
|
505
738
|
*/
|
|
506
739
|
declare class Knowhere {
|
|
507
740
|
/** Jobs resource for low-level API */
|
|
508
741
|
readonly jobs: Jobs;
|
|
742
|
+
/** Retrieval resource for querying published documents */
|
|
743
|
+
readonly retrieval: Retrieval;
|
|
744
|
+
/** Documents resource for canonical document lifecycle operations */
|
|
745
|
+
readonly documents: Documents;
|
|
509
746
|
private httpClient;
|
|
510
747
|
/**
|
|
511
748
|
* Create a new Knowhere client
|
|
@@ -665,4 +902,4 @@ declare class JobFailedError extends KnowhereError {
|
|
|
665
902
|
constructor(message: string, code: string, jobResult: JobResult);
|
|
666
903
|
}
|
|
667
904
|
|
|
668
|
-
export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
|
|
905
|
+
export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type Document, type DocumentListResponse, Documents, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, Retrieval, type RetrievalChannel, type RetrievalFilterMode, type RetrievalQueryParams, type RetrievalQueryResponse, type RetrievalResult, type RetrievalSectionExclusion, type RetrievalSource, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };
|
package/dist/index.js
CHANGED
|
@@ -35,6 +35,7 @@ __export(index_exports, {
|
|
|
35
35
|
BadRequestError: () => BadRequestError,
|
|
36
36
|
ChecksumError: () => ChecksumError,
|
|
37
37
|
ConflictError: () => ConflictError,
|
|
38
|
+
Documents: () => Documents,
|
|
38
39
|
GatewayTimeoutError: () => GatewayTimeoutError,
|
|
39
40
|
InternalServerError: () => InternalServerError,
|
|
40
41
|
InvalidStateError: () => InvalidStateError,
|
|
@@ -48,6 +49,7 @@ __export(index_exports, {
|
|
|
48
49
|
PermissionDeniedError: () => PermissionDeniedError,
|
|
49
50
|
PollingTimeoutError: () => PollingTimeoutError,
|
|
50
51
|
RateLimitError: () => RateLimitError,
|
|
52
|
+
Retrieval: () => Retrieval,
|
|
51
53
|
ServiceUnavailableError: () => ServiceUnavailableError,
|
|
52
54
|
TimeoutError: () => TimeoutError,
|
|
53
55
|
VERSION: () => VERSION,
|
|
@@ -344,6 +346,15 @@ function enrichJobResult(jobResult) {
|
|
|
344
346
|
}
|
|
345
347
|
return jobResult;
|
|
346
348
|
}
|
|
349
|
+
function enrichParseResult(parseResult2, scope) {
|
|
350
|
+
if (scope.namespace !== void 0) {
|
|
351
|
+
parseResult2.namespace = scope.namespace;
|
|
352
|
+
}
|
|
353
|
+
if (scope.documentId !== void 0) {
|
|
354
|
+
parseResult2.documentId = scope.documentId;
|
|
355
|
+
}
|
|
356
|
+
return parseResult2;
|
|
357
|
+
}
|
|
347
358
|
function sanitizePath(path2) {
|
|
348
359
|
let sanitized = path2.replace(/^\/+/, "");
|
|
349
360
|
sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
|
|
@@ -839,11 +850,39 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
839
850
|
const hierarchyContent = await hierarchyFile.async("string");
|
|
840
851
|
hierarchy = JSON.parse(hierarchyContent);
|
|
841
852
|
}
|
|
853
|
+
let chunksSlim;
|
|
854
|
+
const chunksSlimFile = zip.file("chunks_slim.json");
|
|
855
|
+
if (chunksSlimFile) {
|
|
856
|
+
const chunksSlimContent = await chunksSlimFile.async("string");
|
|
857
|
+
let chunksSlimData = JSON.parse(chunksSlimContent);
|
|
858
|
+
chunksSlimData = keysToCamel(chunksSlimData);
|
|
859
|
+
chunksSlim = extractSlimChunks(chunksSlimData);
|
|
860
|
+
}
|
|
861
|
+
let tocHierarchies;
|
|
862
|
+
const tocHierarchiesFile = zip.file("toc_hierarchies.json");
|
|
863
|
+
if (tocHierarchiesFile) {
|
|
864
|
+
const tocHierarchiesContent = await tocHierarchiesFile.async("string");
|
|
865
|
+
tocHierarchies = keysToCamel(JSON.parse(tocHierarchiesContent));
|
|
866
|
+
}
|
|
867
|
+
let kbCsv;
|
|
868
|
+
const kbCsvFile = zip.file("kb.csv");
|
|
869
|
+
if (kbCsvFile) {
|
|
870
|
+
kbCsv = await kbCsvFile.async("string");
|
|
871
|
+
}
|
|
872
|
+
let hierarchyViewHtml;
|
|
873
|
+
const hierarchyViewFile = zip.file("hierarchy_view.html");
|
|
874
|
+
if (hierarchyViewFile) {
|
|
875
|
+
hierarchyViewHtml = await hierarchyViewFile.async("string");
|
|
876
|
+
}
|
|
842
877
|
const result = {
|
|
843
878
|
manifest,
|
|
844
879
|
chunks,
|
|
880
|
+
chunksSlim,
|
|
845
881
|
fullMarkdown,
|
|
846
882
|
hierarchy,
|
|
883
|
+
tocHierarchies,
|
|
884
|
+
kbCsv,
|
|
885
|
+
hierarchyViewHtml,
|
|
847
886
|
rawZip: zipBuffer,
|
|
848
887
|
get textChunks() {
|
|
849
888
|
return chunks.filter((c) => c.type === "text");
|
|
@@ -867,12 +906,30 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
867
906
|
await import_fs2.promises.mkdir(directory, { recursive: true });
|
|
868
907
|
await import_fs2.promises.writeFile((0, import_path.join)(directory, "manifest.json"), JSON.stringify(manifest, null, 2));
|
|
869
908
|
await import_fs2.promises.writeFile((0, import_path.join)(directory, "chunks.json"), JSON.stringify(chunks, null, 2));
|
|
909
|
+
if (chunksSlim) {
|
|
910
|
+
await import_fs2.promises.writeFile(
|
|
911
|
+
(0, import_path.join)(directory, "chunks_slim.json"),
|
|
912
|
+
JSON.stringify({ chunks: chunksSlim }, null, 2)
|
|
913
|
+
);
|
|
914
|
+
}
|
|
870
915
|
if (fullMarkdown) {
|
|
871
916
|
await import_fs2.promises.writeFile((0, import_path.join)(directory, "full.md"), fullMarkdown);
|
|
872
917
|
}
|
|
873
918
|
if (hierarchy) {
|
|
874
919
|
await import_fs2.promises.writeFile((0, import_path.join)(directory, "hierarchy.json"), JSON.stringify(hierarchy, null, 2));
|
|
875
920
|
}
|
|
921
|
+
if (tocHierarchies) {
|
|
922
|
+
await import_fs2.promises.writeFile(
|
|
923
|
+
(0, import_path.join)(directory, "toc_hierarchies.json"),
|
|
924
|
+
JSON.stringify(tocHierarchies, null, 2)
|
|
925
|
+
);
|
|
926
|
+
}
|
|
927
|
+
if (kbCsv) {
|
|
928
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "kb.csv"), kbCsv);
|
|
929
|
+
}
|
|
930
|
+
if (hierarchyViewHtml) {
|
|
931
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "hierarchy_view.html"), hierarchyViewHtml);
|
|
932
|
+
}
|
|
876
933
|
for (const imageChunk of this.imageChunks) {
|
|
877
934
|
await imageChunk.save(directory);
|
|
878
935
|
}
|
|
@@ -894,6 +951,15 @@ function extractChunks(payload) {
|
|
|
894
951
|
}
|
|
895
952
|
return [];
|
|
896
953
|
}
|
|
954
|
+
function extractSlimChunks(payload) {
|
|
955
|
+
if (Array.isArray(payload)) {
|
|
956
|
+
return payload;
|
|
957
|
+
}
|
|
958
|
+
if (Array.isArray(payload.chunks)) {
|
|
959
|
+
return payload.chunks;
|
|
960
|
+
}
|
|
961
|
+
return [];
|
|
962
|
+
}
|
|
897
963
|
function getChunkMetadata(chunkData) {
|
|
898
964
|
if (!chunkData.metadata) {
|
|
899
965
|
return {};
|
|
@@ -904,18 +970,38 @@ function getChunkFilePath(chunkData) {
|
|
|
904
970
|
const metadata = getChunkMetadata(chunkData);
|
|
905
971
|
return chunkData.filePath ?? metadata.filePath ?? chunkData.path;
|
|
906
972
|
}
|
|
973
|
+
function normalizePageNums(pageNums) {
|
|
974
|
+
if (!Array.isArray(pageNums)) {
|
|
975
|
+
return void 0;
|
|
976
|
+
}
|
|
977
|
+
const normalized = pageNums.filter((pageNum) => typeof pageNum === "number");
|
|
978
|
+
return normalized.length > 0 ? normalized : void 0;
|
|
979
|
+
}
|
|
980
|
+
function normalizeTokens(tokens) {
|
|
981
|
+
if (!Array.isArray(tokens)) {
|
|
982
|
+
return void 0;
|
|
983
|
+
}
|
|
984
|
+
if (!tokens.every((token) => typeof token === "string")) {
|
|
985
|
+
return void 0;
|
|
986
|
+
}
|
|
987
|
+
return tokens;
|
|
988
|
+
}
|
|
907
989
|
function normalizeTextChunk(chunkData) {
|
|
908
990
|
const metadata = getChunkMetadata(chunkData);
|
|
991
|
+
const connectTo = metadata.connectTo ?? chunkData.connectTo;
|
|
992
|
+
const relationships = metadata.relationships ?? chunkData.relationships;
|
|
909
993
|
return {
|
|
910
994
|
chunkId: chunkData.chunkId ?? "",
|
|
911
995
|
type: "text",
|
|
912
996
|
content: chunkData.content ?? "",
|
|
913
997
|
path: chunkData.path ?? "",
|
|
998
|
+
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
914
999
|
length: metadata.length ?? chunkData.length ?? 0,
|
|
915
|
-
tokens: metadata.tokens ?? chunkData.tokens,
|
|
1000
|
+
tokens: normalizeTokens(metadata.tokens ?? chunkData.tokens),
|
|
916
1001
|
keywords: metadata.keywords ?? chunkData.keywords,
|
|
917
1002
|
summary: metadata.summary ?? chunkData.summary,
|
|
918
|
-
|
|
1003
|
+
...connectTo !== void 0 && { connectTo },
|
|
1004
|
+
...relationships !== void 0 && { relationships }
|
|
919
1005
|
};
|
|
920
1006
|
}
|
|
921
1007
|
async function processChunk(zip, chunkData) {
|
|
@@ -939,6 +1025,7 @@ async function processChunk(zip, chunkData) {
|
|
|
939
1025
|
type: "image",
|
|
940
1026
|
content: chunkData.content ?? "",
|
|
941
1027
|
path: chunkData.path ?? "",
|
|
1028
|
+
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
942
1029
|
length: metadata.length ?? chunkData.length ?? 0,
|
|
943
1030
|
filePath,
|
|
944
1031
|
summary: metadata.summary ?? chunkData.summary,
|
|
@@ -973,6 +1060,7 @@ async function processChunk(zip, chunkData) {
|
|
|
973
1060
|
type: "table",
|
|
974
1061
|
content: chunkData.content ?? "",
|
|
975
1062
|
path: chunkData.path ?? "",
|
|
1063
|
+
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
976
1064
|
length: metadata.length ?? chunkData.length ?? 0,
|
|
977
1065
|
filePath,
|
|
978
1066
|
tableType: metadata.tableType ?? chunkData.tableType,
|
|
@@ -998,7 +1086,11 @@ var Jobs = class extends BaseResource {
|
|
|
998
1086
|
* Create a new parsing job
|
|
999
1087
|
*/
|
|
1000
1088
|
async create(params) {
|
|
1001
|
-
const job = await this.httpClient.post(
|
|
1089
|
+
const job = await this.httpClient.post(
|
|
1090
|
+
"/v1/jobs",
|
|
1091
|
+
params
|
|
1092
|
+
);
|
|
1093
|
+
delete job.documentId;
|
|
1002
1094
|
if (job.uploadUrl) {
|
|
1003
1095
|
this.pendingUploadJobs.set(job.jobId, job);
|
|
1004
1096
|
}
|
|
@@ -1046,7 +1138,8 @@ var Jobs = class extends BaseResource {
|
|
|
1046
1138
|
if (!jobResult.resultUrl) {
|
|
1047
1139
|
throw new NotFoundError("Result URL not available");
|
|
1048
1140
|
}
|
|
1049
|
-
|
|
1141
|
+
const result = await parseResult(this.httpClient, jobResult.resultUrl, options);
|
|
1142
|
+
return enrichParseResult(result, jobResult);
|
|
1050
1143
|
}
|
|
1051
1144
|
isHttpUrl(value) {
|
|
1052
1145
|
return /^https?:\/\//i.test(value);
|
|
@@ -1097,6 +1190,43 @@ var Jobs = class extends BaseResource {
|
|
|
1097
1190
|
}
|
|
1098
1191
|
};
|
|
1099
1192
|
|
|
1193
|
+
// src/resources/retrieval.ts
|
|
1194
|
+
var Retrieval = class extends BaseResource {
|
|
1195
|
+
/**
|
|
1196
|
+
* Query published documents.
|
|
1197
|
+
*/
|
|
1198
|
+
async query(params) {
|
|
1199
|
+
return this.httpClient.post("/v1/retrieval/query", params);
|
|
1200
|
+
}
|
|
1201
|
+
};
|
|
1202
|
+
|
|
1203
|
+
// src/resources/documents.ts
|
|
1204
|
+
var Documents = class extends BaseResource {
|
|
1205
|
+
/**
|
|
1206
|
+
* List canonical documents in a namespace.
|
|
1207
|
+
*/
|
|
1208
|
+
async list(params) {
|
|
1209
|
+
const requestConfig = params?.namespace ? {
|
|
1210
|
+
params: {
|
|
1211
|
+
namespace: params.namespace
|
|
1212
|
+
}
|
|
1213
|
+
} : void 0;
|
|
1214
|
+
return this.httpClient.get("/v1/documents", requestConfig);
|
|
1215
|
+
}
|
|
1216
|
+
/**
|
|
1217
|
+
* Get one canonical document by ID.
|
|
1218
|
+
*/
|
|
1219
|
+
async get(documentId) {
|
|
1220
|
+
return this.httpClient.get(`/v1/documents/${documentId}`);
|
|
1221
|
+
}
|
|
1222
|
+
/**
|
|
1223
|
+
* Archive one canonical document by ID.
|
|
1224
|
+
*/
|
|
1225
|
+
async archive(documentId) {
|
|
1226
|
+
return this.httpClient.post(`/v1/documents/${documentId}/archive`);
|
|
1227
|
+
}
|
|
1228
|
+
};
|
|
1229
|
+
|
|
1100
1230
|
// src/client.ts
|
|
1101
1231
|
function inferFileName(file, explicitFileName) {
|
|
1102
1232
|
if (explicitFileName) {
|
|
@@ -1116,6 +1246,10 @@ function isReadStream2(file) {
|
|
|
1116
1246
|
var Knowhere = class {
|
|
1117
1247
|
/** Jobs resource for low-level API */
|
|
1118
1248
|
jobs;
|
|
1249
|
+
/** Retrieval resource for querying published documents */
|
|
1250
|
+
retrieval;
|
|
1251
|
+
/** Documents resource for canonical document lifecycle operations */
|
|
1252
|
+
documents;
|
|
1119
1253
|
httpClient;
|
|
1120
1254
|
/**
|
|
1121
1255
|
* Create a new Knowhere client
|
|
@@ -1139,6 +1273,8 @@ var Knowhere = class {
|
|
|
1139
1273
|
httpsAgent: options.httpsAgent
|
|
1140
1274
|
});
|
|
1141
1275
|
this.jobs = new Jobs(this.httpClient);
|
|
1276
|
+
this.retrieval = new Retrieval(this.httpClient);
|
|
1277
|
+
this.documents = new Documents(this.httpClient);
|
|
1142
1278
|
}
|
|
1143
1279
|
/**
|
|
1144
1280
|
* High-level API: Parse a document and return structured results
|
|
@@ -1181,7 +1317,7 @@ var Knowhere = class {
|
|
|
1181
1317
|
smartTitleParse: params.smartTitleParse,
|
|
1182
1318
|
summaryImage: params.summaryImage,
|
|
1183
1319
|
summaryTable: params.summaryTable,
|
|
1184
|
-
summaryTxt: params.
|
|
1320
|
+
summaryTxt: params.summaryTxt,
|
|
1185
1321
|
addFragDesc: params.addFragDesc,
|
|
1186
1322
|
kbDir: params.kbDir
|
|
1187
1323
|
};
|
|
@@ -1196,6 +1332,8 @@ var Knowhere = class {
|
|
|
1196
1332
|
sourceUrl: params.url,
|
|
1197
1333
|
fileName: resolvedFileName,
|
|
1198
1334
|
dataId: params.dataId,
|
|
1335
|
+
namespace: params.namespace,
|
|
1336
|
+
documentId: params.documentId,
|
|
1199
1337
|
parsingParams: Object.keys(parsingParams).length > 0 ? parsingParams : void 0,
|
|
1200
1338
|
webhook
|
|
1201
1339
|
});
|
|
@@ -1215,7 +1353,7 @@ var Knowhere = class {
|
|
|
1215
1353
|
const result = await this.jobs.load(jobResult, {
|
|
1216
1354
|
verifyChecksum: params.verifyChecksum
|
|
1217
1355
|
});
|
|
1218
|
-
return result;
|
|
1356
|
+
return enrichParseResult(result, jobResult);
|
|
1219
1357
|
}
|
|
1220
1358
|
};
|
|
1221
1359
|
// Annotate the CommonJS export names for ESM import in node:
|
|
@@ -1225,6 +1363,7 @@ var Knowhere = class {
|
|
|
1225
1363
|
BadRequestError,
|
|
1226
1364
|
ChecksumError,
|
|
1227
1365
|
ConflictError,
|
|
1366
|
+
Documents,
|
|
1228
1367
|
GatewayTimeoutError,
|
|
1229
1368
|
InternalServerError,
|
|
1230
1369
|
InvalidStateError,
|
|
@@ -1238,6 +1377,7 @@ var Knowhere = class {
|
|
|
1238
1377
|
PermissionDeniedError,
|
|
1239
1378
|
PollingTimeoutError,
|
|
1240
1379
|
RateLimitError,
|
|
1380
|
+
Retrieval,
|
|
1241
1381
|
ServiceUnavailableError,
|
|
1242
1382
|
TimeoutError,
|
|
1243
1383
|
VERSION,
|
package/dist/index.mjs
CHANGED
|
@@ -286,6 +286,15 @@ function enrichJobResult(jobResult) {
|
|
|
286
286
|
}
|
|
287
287
|
return jobResult;
|
|
288
288
|
}
|
|
289
|
+
function enrichParseResult(parseResult2, scope) {
|
|
290
|
+
if (scope.namespace !== void 0) {
|
|
291
|
+
parseResult2.namespace = scope.namespace;
|
|
292
|
+
}
|
|
293
|
+
if (scope.documentId !== void 0) {
|
|
294
|
+
parseResult2.documentId = scope.documentId;
|
|
295
|
+
}
|
|
296
|
+
return parseResult2;
|
|
297
|
+
}
|
|
289
298
|
function sanitizePath(path2) {
|
|
290
299
|
let sanitized = path2.replace(/^\/+/, "");
|
|
291
300
|
sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
|
|
@@ -781,11 +790,39 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
781
790
|
const hierarchyContent = await hierarchyFile.async("string");
|
|
782
791
|
hierarchy = JSON.parse(hierarchyContent);
|
|
783
792
|
}
|
|
793
|
+
let chunksSlim;
|
|
794
|
+
const chunksSlimFile = zip.file("chunks_slim.json");
|
|
795
|
+
if (chunksSlimFile) {
|
|
796
|
+
const chunksSlimContent = await chunksSlimFile.async("string");
|
|
797
|
+
let chunksSlimData = JSON.parse(chunksSlimContent);
|
|
798
|
+
chunksSlimData = keysToCamel(chunksSlimData);
|
|
799
|
+
chunksSlim = extractSlimChunks(chunksSlimData);
|
|
800
|
+
}
|
|
801
|
+
let tocHierarchies;
|
|
802
|
+
const tocHierarchiesFile = zip.file("toc_hierarchies.json");
|
|
803
|
+
if (tocHierarchiesFile) {
|
|
804
|
+
const tocHierarchiesContent = await tocHierarchiesFile.async("string");
|
|
805
|
+
tocHierarchies = keysToCamel(JSON.parse(tocHierarchiesContent));
|
|
806
|
+
}
|
|
807
|
+
let kbCsv;
|
|
808
|
+
const kbCsvFile = zip.file("kb.csv");
|
|
809
|
+
if (kbCsvFile) {
|
|
810
|
+
kbCsv = await kbCsvFile.async("string");
|
|
811
|
+
}
|
|
812
|
+
let hierarchyViewHtml;
|
|
813
|
+
const hierarchyViewFile = zip.file("hierarchy_view.html");
|
|
814
|
+
if (hierarchyViewFile) {
|
|
815
|
+
hierarchyViewHtml = await hierarchyViewFile.async("string");
|
|
816
|
+
}
|
|
784
817
|
const result = {
|
|
785
818
|
manifest,
|
|
786
819
|
chunks,
|
|
820
|
+
chunksSlim,
|
|
787
821
|
fullMarkdown,
|
|
788
822
|
hierarchy,
|
|
823
|
+
tocHierarchies,
|
|
824
|
+
kbCsv,
|
|
825
|
+
hierarchyViewHtml,
|
|
789
826
|
rawZip: zipBuffer,
|
|
790
827
|
get textChunks() {
|
|
791
828
|
return chunks.filter((c) => c.type === "text");
|
|
@@ -809,12 +846,30 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
809
846
|
await fs2.mkdir(directory, { recursive: true });
|
|
810
847
|
await fs2.writeFile(join(directory, "manifest.json"), JSON.stringify(manifest, null, 2));
|
|
811
848
|
await fs2.writeFile(join(directory, "chunks.json"), JSON.stringify(chunks, null, 2));
|
|
849
|
+
if (chunksSlim) {
|
|
850
|
+
await fs2.writeFile(
|
|
851
|
+
join(directory, "chunks_slim.json"),
|
|
852
|
+
JSON.stringify({ chunks: chunksSlim }, null, 2)
|
|
853
|
+
);
|
|
854
|
+
}
|
|
812
855
|
if (fullMarkdown) {
|
|
813
856
|
await fs2.writeFile(join(directory, "full.md"), fullMarkdown);
|
|
814
857
|
}
|
|
815
858
|
if (hierarchy) {
|
|
816
859
|
await fs2.writeFile(join(directory, "hierarchy.json"), JSON.stringify(hierarchy, null, 2));
|
|
817
860
|
}
|
|
861
|
+
if (tocHierarchies) {
|
|
862
|
+
await fs2.writeFile(
|
|
863
|
+
join(directory, "toc_hierarchies.json"),
|
|
864
|
+
JSON.stringify(tocHierarchies, null, 2)
|
|
865
|
+
);
|
|
866
|
+
}
|
|
867
|
+
if (kbCsv) {
|
|
868
|
+
await fs2.writeFile(join(directory, "kb.csv"), kbCsv);
|
|
869
|
+
}
|
|
870
|
+
if (hierarchyViewHtml) {
|
|
871
|
+
await fs2.writeFile(join(directory, "hierarchy_view.html"), hierarchyViewHtml);
|
|
872
|
+
}
|
|
818
873
|
for (const imageChunk of this.imageChunks) {
|
|
819
874
|
await imageChunk.save(directory);
|
|
820
875
|
}
|
|
@@ -836,6 +891,15 @@ function extractChunks(payload) {
|
|
|
836
891
|
}
|
|
837
892
|
return [];
|
|
838
893
|
}
|
|
894
|
+
function extractSlimChunks(payload) {
|
|
895
|
+
if (Array.isArray(payload)) {
|
|
896
|
+
return payload;
|
|
897
|
+
}
|
|
898
|
+
if (Array.isArray(payload.chunks)) {
|
|
899
|
+
return payload.chunks;
|
|
900
|
+
}
|
|
901
|
+
return [];
|
|
902
|
+
}
|
|
839
903
|
function getChunkMetadata(chunkData) {
|
|
840
904
|
if (!chunkData.metadata) {
|
|
841
905
|
return {};
|
|
@@ -846,18 +910,38 @@ function getChunkFilePath(chunkData) {
|
|
|
846
910
|
const metadata = getChunkMetadata(chunkData);
|
|
847
911
|
return chunkData.filePath ?? metadata.filePath ?? chunkData.path;
|
|
848
912
|
}
|
|
913
|
+
function normalizePageNums(pageNums) {
|
|
914
|
+
if (!Array.isArray(pageNums)) {
|
|
915
|
+
return void 0;
|
|
916
|
+
}
|
|
917
|
+
const normalized = pageNums.filter((pageNum) => typeof pageNum === "number");
|
|
918
|
+
return normalized.length > 0 ? normalized : void 0;
|
|
919
|
+
}
|
|
920
|
+
function normalizeTokens(tokens) {
|
|
921
|
+
if (!Array.isArray(tokens)) {
|
|
922
|
+
return void 0;
|
|
923
|
+
}
|
|
924
|
+
if (!tokens.every((token) => typeof token === "string")) {
|
|
925
|
+
return void 0;
|
|
926
|
+
}
|
|
927
|
+
return tokens;
|
|
928
|
+
}
|
|
849
929
|
function normalizeTextChunk(chunkData) {
|
|
850
930
|
const metadata = getChunkMetadata(chunkData);
|
|
931
|
+
const connectTo = metadata.connectTo ?? chunkData.connectTo;
|
|
932
|
+
const relationships = metadata.relationships ?? chunkData.relationships;
|
|
851
933
|
return {
|
|
852
934
|
chunkId: chunkData.chunkId ?? "",
|
|
853
935
|
type: "text",
|
|
854
936
|
content: chunkData.content ?? "",
|
|
855
937
|
path: chunkData.path ?? "",
|
|
938
|
+
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
856
939
|
length: metadata.length ?? chunkData.length ?? 0,
|
|
857
|
-
tokens: metadata.tokens ?? chunkData.tokens,
|
|
940
|
+
tokens: normalizeTokens(metadata.tokens ?? chunkData.tokens),
|
|
858
941
|
keywords: metadata.keywords ?? chunkData.keywords,
|
|
859
942
|
summary: metadata.summary ?? chunkData.summary,
|
|
860
|
-
|
|
943
|
+
...connectTo !== void 0 && { connectTo },
|
|
944
|
+
...relationships !== void 0 && { relationships }
|
|
861
945
|
};
|
|
862
946
|
}
|
|
863
947
|
async function processChunk(zip, chunkData) {
|
|
@@ -881,6 +965,7 @@ async function processChunk(zip, chunkData) {
|
|
|
881
965
|
type: "image",
|
|
882
966
|
content: chunkData.content ?? "",
|
|
883
967
|
path: chunkData.path ?? "",
|
|
968
|
+
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
884
969
|
length: metadata.length ?? chunkData.length ?? 0,
|
|
885
970
|
filePath,
|
|
886
971
|
summary: metadata.summary ?? chunkData.summary,
|
|
@@ -915,6 +1000,7 @@ async function processChunk(zip, chunkData) {
|
|
|
915
1000
|
type: "table",
|
|
916
1001
|
content: chunkData.content ?? "",
|
|
917
1002
|
path: chunkData.path ?? "",
|
|
1003
|
+
pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums),
|
|
918
1004
|
length: metadata.length ?? chunkData.length ?? 0,
|
|
919
1005
|
filePath,
|
|
920
1006
|
tableType: metadata.tableType ?? chunkData.tableType,
|
|
@@ -940,7 +1026,11 @@ var Jobs = class extends BaseResource {
|
|
|
940
1026
|
* Create a new parsing job
|
|
941
1027
|
*/
|
|
942
1028
|
async create(params) {
|
|
943
|
-
const job = await this.httpClient.post(
|
|
1029
|
+
const job = await this.httpClient.post(
|
|
1030
|
+
"/v1/jobs",
|
|
1031
|
+
params
|
|
1032
|
+
);
|
|
1033
|
+
delete job.documentId;
|
|
944
1034
|
if (job.uploadUrl) {
|
|
945
1035
|
this.pendingUploadJobs.set(job.jobId, job);
|
|
946
1036
|
}
|
|
@@ -988,7 +1078,8 @@ var Jobs = class extends BaseResource {
|
|
|
988
1078
|
if (!jobResult.resultUrl) {
|
|
989
1079
|
throw new NotFoundError("Result URL not available");
|
|
990
1080
|
}
|
|
991
|
-
|
|
1081
|
+
const result = await parseResult(this.httpClient, jobResult.resultUrl, options);
|
|
1082
|
+
return enrichParseResult(result, jobResult);
|
|
992
1083
|
}
|
|
993
1084
|
isHttpUrl(value) {
|
|
994
1085
|
return /^https?:\/\//i.test(value);
|
|
@@ -1039,6 +1130,43 @@ var Jobs = class extends BaseResource {
|
|
|
1039
1130
|
}
|
|
1040
1131
|
};
|
|
1041
1132
|
|
|
1133
|
+
// src/resources/retrieval.ts
|
|
1134
|
+
var Retrieval = class extends BaseResource {
|
|
1135
|
+
/**
|
|
1136
|
+
* Query published documents.
|
|
1137
|
+
*/
|
|
1138
|
+
async query(params) {
|
|
1139
|
+
return this.httpClient.post("/v1/retrieval/query", params);
|
|
1140
|
+
}
|
|
1141
|
+
};
|
|
1142
|
+
|
|
1143
|
+
// src/resources/documents.ts
|
|
1144
|
+
var Documents = class extends BaseResource {
|
|
1145
|
+
/**
|
|
1146
|
+
* List canonical documents in a namespace.
|
|
1147
|
+
*/
|
|
1148
|
+
async list(params) {
|
|
1149
|
+
const requestConfig = params?.namespace ? {
|
|
1150
|
+
params: {
|
|
1151
|
+
namespace: params.namespace
|
|
1152
|
+
}
|
|
1153
|
+
} : void 0;
|
|
1154
|
+
return this.httpClient.get("/v1/documents", requestConfig);
|
|
1155
|
+
}
|
|
1156
|
+
/**
|
|
1157
|
+
* Get one canonical document by ID.
|
|
1158
|
+
*/
|
|
1159
|
+
async get(documentId) {
|
|
1160
|
+
return this.httpClient.get(`/v1/documents/${documentId}`);
|
|
1161
|
+
}
|
|
1162
|
+
/**
|
|
1163
|
+
* Archive one canonical document by ID.
|
|
1164
|
+
*/
|
|
1165
|
+
async archive(documentId) {
|
|
1166
|
+
return this.httpClient.post(`/v1/documents/${documentId}/archive`);
|
|
1167
|
+
}
|
|
1168
|
+
};
|
|
1169
|
+
|
|
1042
1170
|
// src/client.ts
|
|
1043
1171
|
function inferFileName(file, explicitFileName) {
|
|
1044
1172
|
if (explicitFileName) {
|
|
@@ -1058,6 +1186,10 @@ function isReadStream2(file) {
|
|
|
1058
1186
|
var Knowhere = class {
|
|
1059
1187
|
/** Jobs resource for low-level API */
|
|
1060
1188
|
jobs;
|
|
1189
|
+
/** Retrieval resource for querying published documents */
|
|
1190
|
+
retrieval;
|
|
1191
|
+
/** Documents resource for canonical document lifecycle operations */
|
|
1192
|
+
documents;
|
|
1061
1193
|
httpClient;
|
|
1062
1194
|
/**
|
|
1063
1195
|
* Create a new Knowhere client
|
|
@@ -1081,6 +1213,8 @@ var Knowhere = class {
|
|
|
1081
1213
|
httpsAgent: options.httpsAgent
|
|
1082
1214
|
});
|
|
1083
1215
|
this.jobs = new Jobs(this.httpClient);
|
|
1216
|
+
this.retrieval = new Retrieval(this.httpClient);
|
|
1217
|
+
this.documents = new Documents(this.httpClient);
|
|
1084
1218
|
}
|
|
1085
1219
|
/**
|
|
1086
1220
|
* High-level API: Parse a document and return structured results
|
|
@@ -1123,7 +1257,7 @@ var Knowhere = class {
|
|
|
1123
1257
|
smartTitleParse: params.smartTitleParse,
|
|
1124
1258
|
summaryImage: params.summaryImage,
|
|
1125
1259
|
summaryTable: params.summaryTable,
|
|
1126
|
-
summaryTxt: params.
|
|
1260
|
+
summaryTxt: params.summaryTxt,
|
|
1127
1261
|
addFragDesc: params.addFragDesc,
|
|
1128
1262
|
kbDir: params.kbDir
|
|
1129
1263
|
};
|
|
@@ -1138,6 +1272,8 @@ var Knowhere = class {
|
|
|
1138
1272
|
sourceUrl: params.url,
|
|
1139
1273
|
fileName: resolvedFileName,
|
|
1140
1274
|
dataId: params.dataId,
|
|
1275
|
+
namespace: params.namespace,
|
|
1276
|
+
documentId: params.documentId,
|
|
1141
1277
|
parsingParams: Object.keys(parsingParams).length > 0 ? parsingParams : void 0,
|
|
1142
1278
|
webhook
|
|
1143
1279
|
});
|
|
@@ -1157,7 +1293,7 @@ var Knowhere = class {
|
|
|
1157
1293
|
const result = await this.jobs.load(jobResult, {
|
|
1158
1294
|
verifyChecksum: params.verifyChecksum
|
|
1159
1295
|
});
|
|
1160
|
-
return result;
|
|
1296
|
+
return enrichParseResult(result, jobResult);
|
|
1161
1297
|
}
|
|
1162
1298
|
};
|
|
1163
1299
|
export {
|
|
@@ -1166,6 +1302,7 @@ export {
|
|
|
1166
1302
|
BadRequestError,
|
|
1167
1303
|
ChecksumError,
|
|
1168
1304
|
ConflictError,
|
|
1305
|
+
Documents,
|
|
1169
1306
|
GatewayTimeoutError,
|
|
1170
1307
|
InternalServerError,
|
|
1171
1308
|
InvalidStateError,
|
|
@@ -1179,6 +1316,7 @@ export {
|
|
|
1179
1316
|
PermissionDeniedError,
|
|
1180
1317
|
PollingTimeoutError,
|
|
1181
1318
|
RateLimitError,
|
|
1319
|
+
Retrieval,
|
|
1182
1320
|
ServiceUnavailableError,
|
|
1183
1321
|
TimeoutError,
|
|
1184
1322
|
VERSION,
|