@llamaindex/llama-cloud 1.7.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -0
- package/README.md +10 -8
- package/client.d.mts +9 -8
- package/client.d.mts.map +1 -1
- package/client.d.ts +9 -8
- package/client.d.ts.map +1 -1
- package/client.js +16 -12
- package/client.js.map +1 -1
- package/client.mjs +16 -12
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +0 -23
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +0 -23
- package/core/pagination.d.ts.map +1 -1
- package/core/pagination.js +1 -32
- package/core/pagination.js.map +1 -1
- package/core/pagination.mjs +0 -30
- package/core/pagination.mjs.map +1 -1
- package/package.json +12 -1
- package/resources/beta/batch/batch.d.mts +55 -30
- package/resources/beta/batch/batch.d.mts.map +1 -1
- package/resources/beta/batch/batch.d.ts +55 -30
- package/resources/beta/batch/batch.d.ts.map +1 -1
- package/resources/beta/batch/batch.js +14 -11
- package/resources/beta/batch/batch.js.map +1 -1
- package/resources/beta/batch/batch.mjs +14 -11
- package/resources/beta/batch/batch.mjs.map +1 -1
- package/resources/beta/batch/job-items.d.mts +36 -13
- package/resources/beta/batch/job-items.d.mts.map +1 -1
- package/resources/beta/batch/job-items.d.ts +36 -13
- package/resources/beta/batch/job-items.d.ts.map +1 -1
- package/resources/beta/batch/job-items.js +6 -8
- package/resources/beta/batch/job-items.js.map +1 -1
- package/resources/beta/batch/job-items.mjs +6 -8
- package/resources/beta/batch/job-items.mjs.map +1 -1
- package/resources/beta/sheets.d.mts +16 -0
- package/resources/beta/sheets.d.mts.map +1 -1
- package/resources/beta/sheets.d.ts +16 -0
- package/resources/beta/sheets.d.ts.map +1 -1
- package/resources/beta/split.d.mts +60 -16
- package/resources/beta/split.d.mts.map +1 -1
- package/resources/beta/split.d.ts +60 -16
- package/resources/beta/split.d.ts.map +1 -1
- package/resources/beta/split.js.map +1 -1
- package/resources/beta/split.mjs.map +1 -1
- package/resources/classifier/jobs.d.mts +20 -3
- package/resources/classifier/jobs.d.mts.map +1 -1
- package/resources/classifier/jobs.d.ts +20 -3
- package/resources/classifier/jobs.d.ts.map +1 -1
- package/resources/classifier/jobs.js +8 -0
- package/resources/classifier/jobs.js.map +1 -1
- package/resources/classifier/jobs.mjs +8 -0
- package/resources/classifier/jobs.mjs.map +1 -1
- package/resources/classify.d.mts +373 -0
- package/resources/classify.d.mts.map +1 -0
- package/resources/classify.d.ts +373 -0
- package/resources/classify.d.ts.map +1 -0
- package/resources/classify.js +54 -0
- package/resources/classify.js.map +1 -0
- package/resources/classify.mjs +50 -0
- package/resources/classify.mjs.map +1 -0
- package/resources/extract.d.mts +1588 -0
- package/resources/extract.d.mts.map +1 -0
- package/resources/extract.d.ts +1588 -0
- package/resources/extract.d.ts.map +1 -0
- package/resources/extract.js +217 -0
- package/resources/extract.js.map +1 -0
- package/resources/extract.mjs +213 -0
- package/resources/extract.mjs.map +1 -0
- package/resources/files.d.mts +52 -38
- package/resources/files.d.mts.map +1 -1
- package/resources/files.d.ts +52 -38
- package/resources/files.d.ts.map +1 -1
- package/resources/files.js +10 -9
- package/resources/files.js.map +1 -1
- package/resources/files.mjs +10 -9
- package/resources/files.mjs.map +1 -1
- package/resources/index.d.mts +2 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +2 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +5 -3
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -1
- package/resources/index.mjs.map +1 -1
- package/resources/parsing.d.mts +324 -138
- package/resources/parsing.d.mts.map +1 -1
- package/resources/parsing.d.ts +324 -138
- package/resources/parsing.d.ts.map +1 -1
- package/resources/parsing.js +30 -4
- package/resources/parsing.js.map +1 -1
- package/resources/parsing.mjs +30 -4
- package/resources/parsing.mjs.map +1 -1
- package/resources/pipelines/pipelines.d.mts +59 -13
- package/resources/pipelines/pipelines.d.mts.map +1 -1
- package/resources/pipelines/pipelines.d.ts +59 -13
- package/resources/pipelines/pipelines.d.ts.map +1 -1
- package/resources/pipelines/pipelines.js +24 -9
- package/resources/pipelines/pipelines.js.map +1 -1
- package/resources/pipelines/pipelines.mjs +24 -9
- package/resources/pipelines/pipelines.mjs.map +1 -1
- package/resources/pipelines/sync.d.mts +5 -3
- package/resources/pipelines/sync.d.mts.map +1 -1
- package/resources/pipelines/sync.d.ts +5 -3
- package/resources/pipelines/sync.d.ts.map +1 -1
- package/resources/pipelines/sync.js +5 -3
- package/resources/pipelines/sync.js.map +1 -1
- package/resources/pipelines/sync.mjs +5 -3
- package/resources/pipelines/sync.mjs.map +1 -1
- package/src/client.ts +86 -22
- package/src/core/pagination.ts +0 -71
- package/src/resources/beta/batch/batch.ts +75 -30
- package/src/resources/beta/batch/job-items.ts +56 -13
- package/src/resources/beta/sheets.ts +20 -0
- package/src/resources/beta/split.ts +70 -17
- package/src/resources/classifier/jobs.ts +20 -3
- package/src/resources/classify.ts +486 -0
- package/src/resources/extract.ts +2045 -0
- package/src/resources/files.ts +52 -38
- package/src/resources/index.ts +35 -1
- package/src/resources/parsing.ts +367 -136
- package/src/resources/pipelines/pipelines.ts +80 -14
- package/src/resources/pipelines/sync.ts +5 -3
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.mts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.ts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.js +0 -56
- package/resources/extraction/extraction-agents/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.mjs +0 -51
- package/resources/extraction/extraction-agents/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction-agents/index.d.mts +0 -3
- package/resources/extraction/extraction-agents/index.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/index.d.ts +0 -3
- package/resources/extraction/extraction-agents/index.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/index.js +0 -9
- package/resources/extraction/extraction-agents/index.js.map +0 -1
- package/resources/extraction/extraction-agents/index.mjs +0 -4
- package/resources/extraction/extraction-agents/index.mjs.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.mts +0 -75
- package/resources/extraction/extraction-agents/schema.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.ts +0 -75
- package/resources/extraction/extraction-agents/schema.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/schema.js +0 -28
- package/resources/extraction/extraction-agents/schema.js.map +0 -1
- package/resources/extraction/extraction-agents/schema.mjs +0 -24
- package/resources/extraction/extraction-agents/schema.mjs.map +0 -1
- package/resources/extraction/extraction-agents.d.mts +0 -2
- package/resources/extraction/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents.d.ts +0 -2
- package/resources/extraction/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents.js +0 -6
- package/resources/extraction/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents.mjs +0 -3
- package/resources/extraction/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction.d.mts +0 -118
- package/resources/extraction/extraction.d.mts.map +0 -1
- package/resources/extraction/extraction.d.ts +0 -118
- package/resources/extraction/extraction.d.ts.map +0 -1
- package/resources/extraction/extraction.js +0 -91
- package/resources/extraction/extraction.js.map +0 -1
- package/resources/extraction/extraction.mjs +0 -86
- package/resources/extraction/extraction.mjs.map +0 -1
- package/resources/extraction/index.d.mts +0 -5
- package/resources/extraction/index.d.mts.map +0 -1
- package/resources/extraction/index.d.ts +0 -5
- package/resources/extraction/index.d.ts.map +0 -1
- package/resources/extraction/index.js +0 -13
- package/resources/extraction/index.js.map +0 -1
- package/resources/extraction/index.mjs +0 -6
- package/resources/extraction/index.mjs.map +0 -1
- package/resources/extraction/jobs.d.mts +0 -280
- package/resources/extraction/jobs.d.mts.map +0 -1
- package/resources/extraction/jobs.d.ts +0 -280
- package/resources/extraction/jobs.d.ts.map +0 -1
- package/resources/extraction/jobs.js +0 -179
- package/resources/extraction/jobs.js.map +0 -1
- package/resources/extraction/jobs.mjs +0 -175
- package/resources/extraction/jobs.mjs.map +0 -1
- package/resources/extraction/runs.d.mts +0 -198
- package/resources/extraction/runs.d.mts.map +0 -1
- package/resources/extraction/runs.d.ts +0 -198
- package/resources/extraction/runs.d.ts.map +0 -1
- package/resources/extraction/runs.js +0 -42
- package/resources/extraction/runs.js.map +0 -1
- package/resources/extraction/runs.mjs +0 -38
- package/resources/extraction/runs.mjs.map +0 -1
- package/resources/extraction.d.mts +0 -2
- package/resources/extraction.d.mts.map +0 -1
- package/resources/extraction.d.ts +0 -2
- package/resources/extraction.d.ts.map +0 -1
- package/resources/extraction.js +0 -6
- package/resources/extraction.js.map +0 -1
- package/resources/extraction.mjs +0 -3
- package/resources/extraction.mjs.map +0 -1
- package/src/resources/extraction/extraction-agents/extraction-agents.ts +0 -196
- package/src/resources/extraction/extraction-agents/index.ts +0 -18
- package/src/resources/extraction/extraction-agents/schema.ts +0 -100
- package/src/resources/extraction/extraction-agents.ts +0 -3
- package/src/resources/extraction/extraction.ts +0 -224
- package/src/resources/extraction/index.ts +0 -34
- package/src/resources/extraction/jobs.ts +0 -414
- package/src/resources/extraction/runs.ts +0 -315
- package/src/resources/extraction.ts +0 -3
package/resources/parsing.d.mts
CHANGED
|
@@ -7,18 +7,44 @@ import { type Uploadable } from "../core/uploads.mjs";
|
|
|
7
7
|
import { PollingOptions } from "../core/polling.mjs";
|
|
8
8
|
export declare class Parsing extends APIResource {
|
|
9
9
|
/**
|
|
10
|
-
* Parse a file by file ID
|
|
10
|
+
* Parse a file by file ID or URL.
|
|
11
|
+
*
|
|
12
|
+
* Provide either `file_id` (a previously uploaded file) or `source_url` (a
|
|
13
|
+
* publicly accessible URL). Configure parsing with options like `tier`,
|
|
14
|
+
* `target_pages`, and `lang`.
|
|
15
|
+
*
|
|
16
|
+
* ## Tiers
|
|
17
|
+
*
|
|
18
|
+
* - `fast` — rule-based, cheapest, no AI
|
|
19
|
+
* - `cost_effective` — balanced speed and quality
|
|
20
|
+
* - `agentic` — full AI-powered parsing
|
|
21
|
+
* - `agentic_plus` — premium AI with specialized features
|
|
22
|
+
*
|
|
23
|
+
* The job runs asynchronously. Poll `GET /parse/{job_id}` with `expand=text` or
|
|
24
|
+
* `expand=markdown` to retrieve results.
|
|
11
25
|
*/
|
|
12
26
|
create(params: ParsingCreateParams & {
|
|
13
27
|
upload_file?: Uploadable;
|
|
14
28
|
}, options?: RequestOptions): APIPromise<ParsingCreateResponse>;
|
|
15
29
|
/**
|
|
16
|
-
* List parse jobs for the current project
|
|
17
|
-
*
|
|
30
|
+
* List parse jobs for the current project.
|
|
31
|
+
*
|
|
32
|
+
* Filter by `status` or creation date range. Results are paginated — use
|
|
33
|
+
* `page_token` from the response to fetch subsequent pages.
|
|
18
34
|
*/
|
|
19
35
|
list(query?: ParsingListParams | null | undefined, options?: RequestOptions): PagePromise<ParsingListResponsesPaginatedCursor, ParsingListResponse>;
|
|
20
36
|
/**
|
|
21
|
-
* Retrieve parse job with optional content
|
|
37
|
+
* Retrieve a parse job with optional expanded content.
|
|
38
|
+
*
|
|
39
|
+
* By default returns job metadata only. Use `expand` to include parsed content:
|
|
40
|
+
*
|
|
41
|
+
* - `text` — plain text output
|
|
42
|
+
* - `markdown` — markdown output
|
|
43
|
+
* - `items` — structured page-by-page output
|
|
44
|
+
* - `job_metadata` — usage and processing details
|
|
45
|
+
*
|
|
46
|
+
* Content metadata fields (e.g. `text_content_metadata`) return presigned URLs for
|
|
47
|
+
* downloading large results.
|
|
22
48
|
*/
|
|
23
49
|
get(jobID: string, query?: ParsingGetParams | null | undefined, options?: RequestOptions): APIPromise<ParsingGetResponse>;
|
|
24
50
|
/**
|
|
@@ -297,15 +323,24 @@ export interface ListItem {
|
|
|
297
323
|
*/
|
|
298
324
|
export type LlamaParseSupportedFileExtensions = '.pdf' | '.abw' | '.awt' | '.cgm' | '.cwk' | '.doc' | '.docm' | '.docx' | '.dot' | '.dotm' | '.dotx' | '.fodg' | '.fodp' | '.fopd' | '.fodt' | '.fb2' | '.hwp' | '.lwp' | '.mcw' | '.mw' | '.mwd' | '.odf' | '.odt' | '.otg' | '.ott' | '.pages' | '.pbd' | '.psw' | '.rtf' | '.sda' | '.sdd' | '.sdp' | '.sdw' | '.sgl' | '.std' | '.stw' | '.sxd' | '.sxg' | '.sxm' | '.sxw' | '.uof' | '.uop' | '.uot' | '.vor' | '.wpd' | '.wps' | '.wpt' | '.wri' | '.wn' | '.xml' | '.zabw' | '.key' | '.odp' | '.odg' | '.otp' | '.pot' | '.potm' | '.potx' | '.ppt' | '.pptm' | '.pptx' | '.sti' | '.sxi' | '.vsd' | '.vsdm' | '.vsdx' | '.vdx' | '.bmp' | '.gif' | '.jpg' | '.jpeg' | '.png' | '.svg' | '.tif' | '.tiff' | '.webp' | '.htm' | '.html' | '.xhtm' | '.csv' | '.dbf' | '.dif' | '.et' | '.eth' | '.fods' | '.numbers' | '.ods' | '.ots' | '.prn' | '.qpw' | '.slk' | '.stc' | '.sxc' | '.sylk' | '.tsv' | '.uos1' | '.uos2' | '.uos' | '.wb1' | '.wb2' | '.wb3' | '.wk1' | '.wk2' | '.wk3' | '.wk4' | '.wks' | '.wq1' | '.wq2' | '.xlr' | '.xls' | '.xlsb' | '.xlsm' | '.xlsx' | '.xlw' | '.azw' | '.azw3' | '.azw4' | '.cb7' | '.cbc' | '.cbr' | '.cbz' | '.chm' | '.djvu' | '.epub' | '.fbz' | '.htmlz' | '.lit' | '.lrf' | '.md' | '.mobi' | '.pdb' | '.pml' | '.prc' | '.rb' | '.snb' | '.tcr' | '.txtz' | '.m4a' | '.mp3' | '.mp4' | '.mpeg' | '.mpga' | '.wav' | '.webm';
|
|
299
325
|
/**
|
|
300
|
-
*
|
|
326
|
+
* A parse job (v1).
|
|
301
327
|
*/
|
|
302
328
|
export interface ParsingJob {
|
|
329
|
+
/**
|
|
330
|
+
* Unique parse job identifier
|
|
331
|
+
*/
|
|
303
332
|
id: string;
|
|
304
333
|
/**
|
|
305
|
-
*
|
|
334
|
+
* Current job status
|
|
306
335
|
*/
|
|
307
336
|
status: StatusEnum;
|
|
337
|
+
/**
|
|
338
|
+
* Machine-readable error code when failed
|
|
339
|
+
*/
|
|
308
340
|
error_code?: string | null;
|
|
341
|
+
/**
|
|
342
|
+
* Human-readable error details when failed
|
|
343
|
+
*/
|
|
309
344
|
error_message?: string | null;
|
|
310
345
|
}
|
|
311
346
|
/**
|
|
@@ -393,11 +428,11 @@ export interface TextItem {
|
|
|
393
428
|
type?: 'text';
|
|
394
429
|
}
|
|
395
430
|
/**
|
|
396
|
-
*
|
|
431
|
+
* A parse job.
|
|
397
432
|
*/
|
|
398
433
|
export interface ParsingCreateResponse {
|
|
399
434
|
/**
|
|
400
|
-
* Unique
|
|
435
|
+
* Unique parse job identifier
|
|
401
436
|
*/
|
|
402
437
|
id: string;
|
|
403
438
|
/**
|
|
@@ -405,7 +440,7 @@ export interface ParsingCreateResponse {
|
|
|
405
440
|
*/
|
|
406
441
|
project_id: string;
|
|
407
442
|
/**
|
|
408
|
-
* Current
|
|
443
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
409
444
|
*/
|
|
410
445
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
411
446
|
/**
|
|
@@ -413,24 +448,28 @@ export interface ParsingCreateResponse {
|
|
|
413
448
|
*/
|
|
414
449
|
created_at?: string | null;
|
|
415
450
|
/**
|
|
416
|
-
* Error
|
|
451
|
+
* Error details when status is FAILED
|
|
417
452
|
*/
|
|
418
453
|
error_message?: string | null;
|
|
419
454
|
/**
|
|
420
|
-
*
|
|
455
|
+
* Optional display name for this parse job
|
|
421
456
|
*/
|
|
422
457
|
name?: string | null;
|
|
458
|
+
/**
|
|
459
|
+
* Parsing tier used for this job
|
|
460
|
+
*/
|
|
461
|
+
tier?: string | null;
|
|
423
462
|
/**
|
|
424
463
|
* Update datetime
|
|
425
464
|
*/
|
|
426
465
|
updated_at?: string | null;
|
|
427
466
|
}
|
|
428
467
|
/**
|
|
429
|
-
*
|
|
468
|
+
* A parse job.
|
|
430
469
|
*/
|
|
431
470
|
export interface ParsingListResponse {
|
|
432
471
|
/**
|
|
433
|
-
* Unique
|
|
472
|
+
* Unique parse job identifier
|
|
434
473
|
*/
|
|
435
474
|
id: string;
|
|
436
475
|
/**
|
|
@@ -438,7 +477,7 @@ export interface ParsingListResponse {
|
|
|
438
477
|
*/
|
|
439
478
|
project_id: string;
|
|
440
479
|
/**
|
|
441
|
-
* Current
|
|
480
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
442
481
|
*/
|
|
443
482
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
444
483
|
/**
|
|
@@ -446,13 +485,17 @@ export interface ParsingListResponse {
|
|
|
446
485
|
*/
|
|
447
486
|
created_at?: string | null;
|
|
448
487
|
/**
|
|
449
|
-
* Error
|
|
488
|
+
* Error details when status is FAILED
|
|
450
489
|
*/
|
|
451
490
|
error_message?: string | null;
|
|
452
491
|
/**
|
|
453
|
-
*
|
|
492
|
+
* Optional display name for this parse job
|
|
454
493
|
*/
|
|
455
494
|
name?: string | null;
|
|
495
|
+
/**
|
|
496
|
+
* Parsing tier used for this job
|
|
497
|
+
*/
|
|
498
|
+
tier?: string | null;
|
|
456
499
|
/**
|
|
457
500
|
* Update datetime
|
|
458
501
|
*/
|
|
@@ -477,6 +520,12 @@ export interface ParsingGetResponse {
|
|
|
477
520
|
* Structured JSON result (if requested)
|
|
478
521
|
*/
|
|
479
522
|
items?: ParsingGetResponse.Items | null;
|
|
523
|
+
/**
|
|
524
|
+
* Job execution metadata (if requested)
|
|
525
|
+
*/
|
|
526
|
+
job_metadata?: {
|
|
527
|
+
[key: string]: unknown;
|
|
528
|
+
} | null;
|
|
480
529
|
/**
|
|
481
530
|
* Markdown result (if requested)
|
|
482
531
|
*/
|
|
@@ -513,7 +562,7 @@ export declare namespace ParsingGetResponse {
|
|
|
513
562
|
*/
|
|
514
563
|
interface Job {
|
|
515
564
|
/**
|
|
516
|
-
* Unique
|
|
565
|
+
* Unique parse job identifier
|
|
517
566
|
*/
|
|
518
567
|
id: string;
|
|
519
568
|
/**
|
|
@@ -521,7 +570,7 @@ export declare namespace ParsingGetResponse {
|
|
|
521
570
|
*/
|
|
522
571
|
project_id: string;
|
|
523
572
|
/**
|
|
524
|
-
* Current
|
|
573
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
525
574
|
*/
|
|
526
575
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
527
576
|
/**
|
|
@@ -529,13 +578,17 @@ export declare namespace ParsingGetResponse {
|
|
|
529
578
|
*/
|
|
530
579
|
created_at?: string | null;
|
|
531
580
|
/**
|
|
532
|
-
* Error
|
|
581
|
+
* Error details when status is FAILED
|
|
533
582
|
*/
|
|
534
583
|
error_message?: string | null;
|
|
535
584
|
/**
|
|
536
|
-
*
|
|
585
|
+
* Optional display name for this parse job
|
|
537
586
|
*/
|
|
538
587
|
name?: string | null;
|
|
588
|
+
/**
|
|
589
|
+
* Parsing tier used for this job
|
|
590
|
+
*/
|
|
591
|
+
tier?: string | null;
|
|
539
592
|
/**
|
|
540
593
|
* Update datetime
|
|
541
594
|
*/
|
|
@@ -567,6 +620,15 @@ export declare namespace ParsingGetResponse {
|
|
|
567
620
|
* Index of the image in the extraction order
|
|
568
621
|
*/
|
|
569
622
|
index: number;
|
|
623
|
+
/**
|
|
624
|
+
* Bounding box for an image on its page.
|
|
625
|
+
*/
|
|
626
|
+
bbox?: Image.Bbox | null;
|
|
627
|
+
/**
|
|
628
|
+
* Image category: 'screenshot' (full page), 'embedded' (images in document), or
|
|
629
|
+
* 'layout' (cropped from layout detection)
|
|
630
|
+
*/
|
|
631
|
+
category?: 'screenshot' | 'embedded' | 'layout' | null;
|
|
570
632
|
/**
|
|
571
633
|
* MIME type of the image
|
|
572
634
|
*/
|
|
@@ -580,6 +642,29 @@ export declare namespace ParsingGetResponse {
|
|
|
580
642
|
*/
|
|
581
643
|
size_bytes?: number | null;
|
|
582
644
|
}
|
|
645
|
+
namespace Image {
|
|
646
|
+
/**
|
|
647
|
+
* Bounding box for an image on its page.
|
|
648
|
+
*/
|
|
649
|
+
interface Bbox {
|
|
650
|
+
/**
|
|
651
|
+
* Height of the bounding box
|
|
652
|
+
*/
|
|
653
|
+
h: number;
|
|
654
|
+
/**
|
|
655
|
+
* Width of the bounding box
|
|
656
|
+
*/
|
|
657
|
+
w: number;
|
|
658
|
+
/**
|
|
659
|
+
* X coordinate of the bounding box
|
|
660
|
+
*/
|
|
661
|
+
x: number;
|
|
662
|
+
/**
|
|
663
|
+
* Y coordinate of the bounding box
|
|
664
|
+
*/
|
|
665
|
+
y: number;
|
|
666
|
+
}
|
|
667
|
+
}
|
|
583
668
|
}
|
|
584
669
|
/**
|
|
585
670
|
* Structured JSON result (if requested)
|
|
@@ -764,13 +849,16 @@ export declare namespace ParsingGetResponse {
|
|
|
764
849
|
}
|
|
765
850
|
export interface ParsingCreateParams {
|
|
766
851
|
/**
|
|
767
|
-
* Body param:
|
|
852
|
+
* Body param: Parsing tier: 'fast' (rule-based, cheapest), 'cost_effective'
|
|
853
|
+
* (balanced), 'agentic' (AI-powered with custom prompts), or 'agentic_plus'
|
|
854
|
+
* (premium AI with highest accuracy)
|
|
768
855
|
*/
|
|
769
856
|
tier: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus';
|
|
770
857
|
/**
|
|
771
|
-
* Body param:
|
|
858
|
+
* Body param: Tier version. Use 'latest' for the current stable version, or
|
|
859
|
+
* specify a specific version (e.g., '1.0', '2.0') for reproducible results
|
|
772
860
|
*/
|
|
773
|
-
version: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | 'latest' | (string & {});
|
|
861
|
+
version: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | '2026-03-02' | '2026-03-03' | '2026-03-04' | '2026-03-05' | '2026-03-09' | '2026-03-10' | '2026-03-11' | '2026-03-12' | '2026-03-17' | '2026-03-19' | '2026-03-20' | '2026-03-22' | '2026-03-23' | '2026-03-24' | '2026-03-25' | '2026-03-26' | '2026-03-27' | 'latest' | (string & {});
|
|
774
862
|
/**
|
|
775
863
|
* Query param
|
|
776
864
|
*/
|
|
@@ -780,250 +868,295 @@ export interface ParsingCreateParams {
|
|
|
780
868
|
*/
|
|
781
869
|
project_id?: string | null;
|
|
782
870
|
/**
|
|
783
|
-
* Body param: Options for
|
|
871
|
+
* Body param: Options for AI-powered parsing tiers (cost_effective, agentic,
|
|
872
|
+
* agentic_plus).
|
|
873
|
+
*
|
|
874
|
+
* These options customize how the AI processes and interprets document content.
|
|
875
|
+
* Only applicable when using non-fast tiers.
|
|
784
876
|
*/
|
|
785
877
|
agentic_options?: ParsingCreateParams.AgenticOptions | null;
|
|
786
878
|
/**
|
|
787
|
-
* Body param:
|
|
879
|
+
* Body param: Identifier for the client/application making the request. Used for
|
|
880
|
+
* analytics and debugging. Example: 'my-app-v2'
|
|
788
881
|
*/
|
|
789
882
|
client_name?: string | null;
|
|
790
883
|
/**
|
|
791
|
-
* Body param:
|
|
884
|
+
* Body param: Crop boundaries to process only a portion of each page. Values are
|
|
885
|
+
* ratios 0-1 from page edges
|
|
792
886
|
*/
|
|
793
887
|
crop_box?: ParsingCreateParams.CropBox;
|
|
794
888
|
/**
|
|
795
|
-
* Body param:
|
|
889
|
+
* Body param: Bypass result caching and force re-parsing. Use when document
|
|
890
|
+
* content may have changed or you need fresh results
|
|
796
891
|
*/
|
|
797
892
|
disable_cache?: boolean | null;
|
|
798
893
|
/**
|
|
799
|
-
* Body param: Options for fast tier parsing (
|
|
894
|
+
* Body param: Options for fast tier parsing (rule-based, no AI).
|
|
895
|
+
*
|
|
896
|
+
* Fast tier uses deterministic algorithms for text extraction without AI
|
|
897
|
+
* enhancement. It's the fastest and most cost-effective option, best suited for
|
|
898
|
+
* simple documents with standard layouts. Currently has no configurable options
|
|
899
|
+
* but reserved for future expansion.
|
|
800
900
|
*/
|
|
801
901
|
fast_options?: unknown | null;
|
|
802
902
|
/**
|
|
803
|
-
* Body param: ID of an existing file in the project to parse
|
|
903
|
+
* Body param: ID of an existing file in the project to parse. Mutually exclusive
|
|
904
|
+
* with source_url
|
|
804
905
|
*/
|
|
805
906
|
file_id?: string | null;
|
|
806
907
|
/**
|
|
807
|
-
* Body param: HTTP proxy
|
|
908
|
+
* Body param: HTTP/HTTPS proxy for fetching source_url. Ignored if using file_id
|
|
808
909
|
*/
|
|
809
910
|
http_proxy?: string | null;
|
|
810
911
|
/**
|
|
811
|
-
* Body param:
|
|
912
|
+
* Body param: Format-specific options (HTML, PDF, spreadsheet, presentation).
|
|
913
|
+
* Applied based on detected input file type
|
|
812
914
|
*/
|
|
813
915
|
input_options?: ParsingCreateParams.InputOptions;
|
|
814
916
|
/**
|
|
815
|
-
* Body param: Output
|
|
917
|
+
* Body param: Output formatting options for markdown, text, and extracted images
|
|
816
918
|
*/
|
|
817
919
|
output_options?: ParsingCreateParams.OutputOptions;
|
|
818
920
|
/**
|
|
819
|
-
* Body param: Page
|
|
921
|
+
* Body param: Page selection: limit total pages or specify exact pages to process
|
|
820
922
|
*/
|
|
821
923
|
page_ranges?: ParsingCreateParams.PageRanges;
|
|
822
924
|
/**
|
|
823
|
-
* Body param: Job
|
|
925
|
+
* Body param: Job execution controls including timeouts and failure thresholds
|
|
824
926
|
*/
|
|
825
927
|
processing_control?: ParsingCreateParams.ProcessingControl;
|
|
826
928
|
/**
|
|
827
|
-
* Body param:
|
|
929
|
+
* Body param: Document processing options including OCR, table extraction, and
|
|
930
|
+
* chart parsing
|
|
828
931
|
*/
|
|
829
932
|
processing_options?: ParsingCreateParams.ProcessingOptions;
|
|
830
933
|
/**
|
|
831
|
-
* Body param:
|
|
934
|
+
* Body param: Public URL of the document to parse. Mutually exclusive with file_id
|
|
832
935
|
*/
|
|
833
936
|
source_url?: string | null;
|
|
834
937
|
/**
|
|
835
|
-
* Body param:
|
|
938
|
+
* Body param: Webhook endpoints for job status notifications. Multiple webhooks
|
|
939
|
+
* can be configured for different events or services
|
|
836
940
|
*/
|
|
837
941
|
webhook_configurations?: Array<ParsingCreateParams.WebhookConfiguration>;
|
|
838
942
|
}
|
|
839
943
|
export declare namespace ParsingCreateParams {
|
|
840
944
|
/**
|
|
841
|
-
* Options for
|
|
945
|
+
* Options for AI-powered parsing tiers (cost_effective, agentic, agentic_plus).
|
|
946
|
+
*
|
|
947
|
+
* These options customize how the AI processes and interprets document content.
|
|
948
|
+
* Only applicable when using non-fast tiers.
|
|
842
949
|
*/
|
|
843
950
|
interface AgenticOptions {
|
|
844
951
|
/**
|
|
845
|
-
* Custom
|
|
952
|
+
* Custom instructions for the AI parser. Use to guide extraction behavior, specify
|
|
953
|
+
* output formatting, or provide domain-specific context. Example: 'Extract
|
|
954
|
+
* financial tables with currency symbols. Format dates as YYYY-MM-DD.'
|
|
846
955
|
*/
|
|
847
956
|
custom_prompt?: string | null;
|
|
848
957
|
}
|
|
849
958
|
/**
|
|
850
|
-
*
|
|
959
|
+
* Crop boundaries to process only a portion of each page. Values are ratios 0-1
|
|
960
|
+
* from page edges
|
|
851
961
|
*/
|
|
852
962
|
interface CropBox {
|
|
853
963
|
/**
|
|
854
|
-
* Bottom boundary
|
|
964
|
+
* Bottom boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content below this
|
|
965
|
+
* line is excluded
|
|
855
966
|
*/
|
|
856
967
|
bottom?: number | null;
|
|
857
968
|
/**
|
|
858
|
-
* Left boundary
|
|
969
|
+
* Left boundary as ratio (0-1). 0=left edge, 1=right edge. Content left of this
|
|
970
|
+
* line is excluded
|
|
859
971
|
*/
|
|
860
972
|
left?: number | null;
|
|
861
973
|
/**
|
|
862
|
-
* Right boundary
|
|
974
|
+
* Right boundary as ratio (0-1). 0=left edge, 1=right edge. Content right of this
|
|
975
|
+
* line is excluded
|
|
863
976
|
*/
|
|
864
977
|
right?: number | null;
|
|
865
978
|
/**
|
|
866
|
-
* Top boundary
|
|
979
|
+
* Top boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content above this line
|
|
980
|
+
* is excluded
|
|
867
981
|
*/
|
|
868
982
|
top?: number | null;
|
|
869
983
|
}
|
|
870
984
|
/**
|
|
871
|
-
*
|
|
985
|
+
* Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on
|
|
986
|
+
* detected input file type
|
|
872
987
|
*/
|
|
873
988
|
interface InputOptions {
|
|
874
989
|
/**
|
|
875
|
-
* HTML
|
|
990
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
876
991
|
*/
|
|
877
992
|
html?: InputOptions.HTML;
|
|
878
993
|
/**
|
|
879
|
-
* PDF-specific parsing options
|
|
994
|
+
* PDF-specific parsing options (applies to .pdf files)
|
|
880
995
|
*/
|
|
881
996
|
pdf?: unknown;
|
|
882
997
|
/**
|
|
883
|
-
* Presentation
|
|
998
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
884
999
|
*/
|
|
885
1000
|
presentation?: InputOptions.Presentation;
|
|
886
1001
|
/**
|
|
887
|
-
* Spreadsheet
|
|
1002
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
888
1003
|
*/
|
|
889
1004
|
spreadsheet?: InputOptions.Spreadsheet;
|
|
890
1005
|
}
|
|
891
1006
|
namespace InputOptions {
|
|
892
1007
|
/**
|
|
893
|
-
* HTML
|
|
1008
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
894
1009
|
*/
|
|
895
1010
|
interface HTML {
|
|
896
1011
|
/**
|
|
897
|
-
*
|
|
1012
|
+
* Force all HTML elements to be visible by overriding CSS display/visibility
|
|
1013
|
+
* properties. Useful for parsing pages with hidden content or collapsed sections
|
|
898
1014
|
*/
|
|
899
1015
|
make_all_elements_visible?: boolean | null;
|
|
900
1016
|
/**
|
|
901
|
-
* Remove fixed
|
|
1017
|
+
* Remove fixed-position elements (headers, footers, floating buttons) that appear
|
|
1018
|
+
* on every page render
|
|
902
1019
|
*/
|
|
903
1020
|
remove_fixed_elements?: boolean | null;
|
|
904
1021
|
/**
|
|
905
|
-
* Remove navigation elements
|
|
1022
|
+
* Remove navigation elements (nav bars, sidebars, menus) to focus on main content
|
|
906
1023
|
*/
|
|
907
1024
|
remove_navigation_elements?: boolean | null;
|
|
908
1025
|
}
|
|
909
1026
|
/**
|
|
910
|
-
* Presentation
|
|
1027
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
911
1028
|
*/
|
|
912
1029
|
interface Presentation {
|
|
913
1030
|
/**
|
|
914
|
-
* Extract
|
|
1031
|
+
* Extract content positioned outside the visible slide area. Some presentations
|
|
1032
|
+
* have hidden notes or content that extends beyond slide boundaries
|
|
915
1033
|
*/
|
|
916
1034
|
out_of_bounds_content?: boolean | null;
|
|
917
1035
|
/**
|
|
918
|
-
* Skip extraction of embedded data
|
|
1036
|
+
* Skip extraction of embedded chart data tables. When true, only the visual
|
|
1037
|
+
* representation of charts is captured, not the underlying data
|
|
919
1038
|
*/
|
|
920
1039
|
skip_embedded_data?: boolean | null;
|
|
921
1040
|
}
|
|
922
1041
|
/**
|
|
923
|
-
* Spreadsheet
|
|
1042
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
924
1043
|
*/
|
|
925
1044
|
interface Spreadsheet {
|
|
926
1045
|
/**
|
|
927
|
-
* Detect and extract
|
|
1046
|
+
* Detect and extract multiple tables within a single sheet. Useful when
|
|
1047
|
+
* spreadsheets contain several data regions separated by blank rows/columns
|
|
928
1048
|
*/
|
|
929
1049
|
detect_sub_tables_in_sheets?: boolean | null;
|
|
930
1050
|
/**
|
|
931
|
-
*
|
|
1051
|
+
* Compute formula results instead of extracting formula text. Use when you need
|
|
1052
|
+
* calculated values rather than formula definitions
|
|
932
1053
|
*/
|
|
933
1054
|
force_formula_computation_in_sheets?: boolean | null;
|
|
934
1055
|
/**
|
|
935
|
-
*
|
|
1056
|
+
* Parse hidden sheets in addition to visible ones. By default, hidden sheets are
|
|
1057
|
+
* skipped
|
|
936
1058
|
*/
|
|
937
1059
|
include_hidden_sheets?: boolean | null;
|
|
938
1060
|
}
|
|
939
1061
|
}
|
|
940
1062
|
/**
|
|
941
|
-
* Output
|
|
1063
|
+
* Output formatting options for markdown, text, and extracted images
|
|
942
1064
|
*/
|
|
943
1065
|
interface OutputOptions {
|
|
944
1066
|
/**
|
|
945
|
-
* Extract printed page
|
|
1067
|
+
* Extract the printed page number as it appears in the document (e.g., 'Page 5 of
|
|
1068
|
+
* 10', 'v', 'A-3'). Useful for referencing original page numbers
|
|
946
1069
|
*/
|
|
947
1070
|
extract_printed_page_number?: boolean | null;
|
|
948
1071
|
/**
|
|
949
|
-
* Image categories to save: 'screenshot' (full page
|
|
950
|
-
*
|
|
951
|
-
*
|
|
1072
|
+
* Image categories to extract and save. Options: 'screenshot' (full page renders
|
|
1073
|
+
* useful for visual QA), 'embedded' (images found within the document), 'layout'
|
|
1074
|
+
* (cropped regions from layout detection like figures and diagrams). Empty list
|
|
1075
|
+
* saves no images
|
|
952
1076
|
*/
|
|
953
1077
|
images_to_save?: Array<'screenshot' | 'embedded' | 'layout'>;
|
|
954
1078
|
/**
|
|
955
|
-
* Markdown
|
|
1079
|
+
* Markdown formatting options including table styles and link annotations
|
|
956
1080
|
*/
|
|
957
1081
|
markdown?: OutputOptions.Markdown;
|
|
958
1082
|
/**
|
|
959
|
-
* Spatial text output options
|
|
1083
|
+
* Spatial text output options for preserving document layout structure
|
|
960
1084
|
*/
|
|
961
1085
|
spatial_text?: OutputOptions.SpatialText;
|
|
962
1086
|
/**
|
|
963
|
-
*
|
|
1087
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
964
1088
|
*/
|
|
965
1089
|
tables_as_spreadsheet?: OutputOptions.TablesAsSpreadsheet;
|
|
966
1090
|
}
|
|
967
1091
|
namespace OutputOptions {
|
|
968
1092
|
/**
|
|
969
|
-
* Markdown
|
|
1093
|
+
* Markdown formatting options including table styles and link annotations
|
|
970
1094
|
*/
|
|
971
1095
|
interface Markdown {
|
|
972
1096
|
/**
|
|
973
|
-
* Add annotations to
|
|
1097
|
+
* Add link annotations to markdown output in the format [text](url). When false,
|
|
1098
|
+
* only the link text is included
|
|
974
1099
|
*/
|
|
975
1100
|
annotate_links?: boolean | null;
|
|
976
1101
|
/**
|
|
977
|
-
*
|
|
1102
|
+
* Embed images directly in markdown as base64 data URIs instead of extracting them
|
|
1103
|
+
* as separate files. Useful for self-contained markdown output
|
|
978
1104
|
*/
|
|
979
1105
|
inline_images?: boolean | null;
|
|
980
1106
|
/**
|
|
981
|
-
* Table formatting options
|
|
1107
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
982
1108
|
*/
|
|
983
1109
|
tables?: Markdown.Tables;
|
|
984
1110
|
}
|
|
985
1111
|
namespace Markdown {
|
|
986
1112
|
/**
|
|
987
|
-
* Table formatting options
|
|
1113
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
988
1114
|
*/
|
|
989
1115
|
interface Tables {
|
|
990
1116
|
/**
|
|
991
|
-
*
|
|
1117
|
+
* Remove extra whitespace padding in markdown table cells for more compact output
|
|
992
1118
|
*/
|
|
993
1119
|
compact_markdown_tables?: boolean | null;
|
|
994
1120
|
/**
|
|
995
|
-
* Separator for multiline content in markdown tables
|
|
1121
|
+
* Separator string for multiline cell content in markdown tables. Example:
|
|
1122
|
+
* '<br>' to preserve line breaks, ' ' to join with spaces
|
|
996
1123
|
*/
|
|
997
1124
|
markdown_table_multiline_separator?: string | null;
|
|
998
1125
|
/**
|
|
999
|
-
*
|
|
1126
|
+
* Automatically merge tables that span multiple pages into a single table. The
|
|
1127
|
+
* merged table appears on the first page with merged_from_pages metadata
|
|
1000
1128
|
*/
|
|
1001
1129
|
merge_continued_tables?: boolean | null;
|
|
1002
1130
|
/**
|
|
1003
|
-
* Output tables
|
|
1131
|
+
* Output tables as markdown pipe tables instead of HTML <table> tags.
|
|
1132
|
+
* Markdown tables are simpler but cannot represent complex structures like merged
|
|
1133
|
+
* cells
|
|
1004
1134
|
*/
|
|
1005
1135
|
output_tables_as_markdown?: boolean | null;
|
|
1006
1136
|
}
|
|
1007
1137
|
}
|
|
1008
1138
|
/**
|
|
1009
|
-
* Spatial text output options
|
|
1139
|
+
* Spatial text output options for preserving document layout structure
|
|
1010
1140
|
*/
|
|
1011
1141
|
interface SpatialText {
|
|
1012
1142
|
/**
|
|
1013
|
-
* Keep column
|
|
1143
|
+
* Keep multi-column layouts intact instead of linearizing columns into sequential
|
|
1144
|
+
* text. Automatically enabled for non-fast tiers
|
|
1014
1145
|
*/
|
|
1015
1146
|
do_not_unroll_columns?: boolean | null;
|
|
1016
1147
|
/**
|
|
1017
|
-
*
|
|
1148
|
+
* Maintain consistent text column alignment across page boundaries. Automatically
|
|
1149
|
+
* enabled for document-level parsing modes
|
|
1018
1150
|
*/
|
|
1019
1151
|
preserve_layout_alignment_across_pages?: boolean | null;
|
|
1020
1152
|
/**
|
|
1021
|
-
* Include
|
|
1153
|
+
* Include text below the normal size threshold. Useful for footnotes, watermarks,
|
|
1154
|
+
* or fine print that might otherwise be filtered out
|
|
1022
1155
|
*/
|
|
1023
1156
|
preserve_very_small_text?: boolean | null;
|
|
1024
1157
|
}
|
|
1025
1158
|
/**
|
|
1026
|
-
*
|
|
1159
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
1027
1160
|
*/
|
|
1028
1161
|
interface TablesAsSpreadsheet {
|
|
1029
1162
|
/**
|
|
@@ -1031,121 +1164,146 @@ export declare namespace ParsingCreateParams {
|
|
|
1031
1164
|
*/
|
|
1032
1165
|
enable?: boolean | null;
|
|
1033
1166
|
/**
|
|
1034
|
-
* Automatically
|
|
1167
|
+
* Automatically generate descriptive sheet names from table context (headers,
|
|
1168
|
+
* surrounding text) instead of using generic names like 'Table_1'
|
|
1035
1169
|
*/
|
|
1036
1170
|
guess_sheet_name?: boolean;
|
|
1037
1171
|
}
|
|
1038
1172
|
}
|
|
1039
1173
|
/**
|
|
1040
|
-
* Page
|
|
1174
|
+
* Page selection: limit total pages or specify exact pages to process
|
|
1041
1175
|
*/
|
|
1042
1176
|
interface PageRanges {
|
|
1043
1177
|
/**
|
|
1044
|
-
* Maximum number of pages to process
|
|
1178
|
+
* Maximum number of pages to process. Pages are processed in order starting from
|
|
1179
|
+
* page 1. If both max_pages and target_pages are set, target_pages takes
|
|
1180
|
+
* precedence
|
|
1045
1181
|
*/
|
|
1046
1182
|
max_pages?: number | null;
|
|
1047
1183
|
/**
|
|
1048
|
-
*
|
|
1184
|
+
* Comma-separated list of specific pages to process using 1-based indexing.
|
|
1185
|
+
* Supports individual pages and ranges. Examples: '1,3,5' (pages 1, 3, 5), '1-5'
|
|
1186
|
+
* (pages 1 through 5 inclusive), '1,3,5-8,10' (pages 1, 3, 5-8, and 10). Pages are
|
|
1187
|
+
* sorted and deduplicated automatically. Duplicate pages cause an error
|
|
1049
1188
|
*/
|
|
1050
1189
|
target_pages?: string | null;
|
|
1051
1190
|
}
|
|
1052
1191
|
/**
|
|
1053
|
-
* Job
|
|
1192
|
+
* Job execution controls including timeouts and failure thresholds
|
|
1054
1193
|
*/
|
|
1055
1194
|
interface ProcessingControl {
|
|
1056
1195
|
/**
|
|
1057
|
-
*
|
|
1196
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1197
|
+
* partial results
|
|
1058
1198
|
*/
|
|
1059
1199
|
job_failure_conditions?: ProcessingControl.JobFailureConditions;
|
|
1060
1200
|
/**
|
|
1061
|
-
* Timeout
|
|
1201
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1062
1202
|
*/
|
|
1063
1203
|
timeouts?: ProcessingControl.Timeouts;
|
|
1064
1204
|
}
|
|
1065
1205
|
namespace ProcessingControl {
|
|
1066
1206
|
/**
|
|
1067
|
-
*
|
|
1207
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1208
|
+
* partial results
|
|
1068
1209
|
*/
|
|
1069
1210
|
interface JobFailureConditions {
|
|
1070
1211
|
/**
|
|
1071
|
-
* Maximum ratio of pages allowed to fail (0-1)
|
|
1212
|
+
* Maximum ratio of pages allowed to fail before the job fails (0-1). Example: 0.1
|
|
1213
|
+
* means job fails if more than 10% of pages fail. Default is 0.05 (5%)
|
|
1072
1214
|
*/
|
|
1073
1215
|
allowed_page_failure_ratio?: number | null;
|
|
1074
1216
|
/**
|
|
1075
|
-
* Fail job if
|
|
1217
|
+
* Fail the job if a problematic font is detected that may cause incorrect text
|
|
1218
|
+
* extraction. Buggy fonts can produce garbled or missing characters
|
|
1076
1219
|
*/
|
|
1077
1220
|
fail_on_buggy_font?: boolean | null;
|
|
1078
1221
|
/**
|
|
1079
|
-
* Fail job if image
|
|
1222
|
+
* Fail the entire job if any embedded image cannot be extracted. By default, image
|
|
1223
|
+
* extraction errors are logged but don't fail the job
|
|
1080
1224
|
*/
|
|
1081
1225
|
fail_on_image_extraction_error?: boolean | null;
|
|
1082
1226
|
/**
|
|
1083
|
-
* Fail job if image OCR
|
|
1227
|
+
* Fail the entire job if OCR fails on any image. By default, OCR errors result in
|
|
1228
|
+
* empty text for that image
|
|
1084
1229
|
*/
|
|
1085
1230
|
fail_on_image_ocr_error?: boolean | null;
|
|
1086
1231
|
/**
|
|
1087
|
-
* Fail job if markdown
|
|
1232
|
+
* Fail the entire job if markdown cannot be reconstructed for any page. By
|
|
1233
|
+
* default, failed pages use fallback text extraction
|
|
1088
1234
|
*/
|
|
1089
1235
|
fail_on_markdown_reconstruction_error?: boolean | null;
|
|
1090
1236
|
}
|
|
1091
1237
|
/**
|
|
1092
|
-
* Timeout
|
|
1238
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1093
1239
|
*/
|
|
1094
1240
|
interface Timeouts {
|
|
1095
1241
|
/**
|
|
1096
|
-
* Base timeout in seconds (max 30 minutes)
|
|
1242
|
+
* Base timeout for the job in seconds (max 1800 = 30 minutes). This is the minimum
|
|
1243
|
+
* time allowed regardless of document size
|
|
1097
1244
|
*/
|
|
1098
1245
|
base_in_seconds?: number | null;
|
|
1099
1246
|
/**
|
|
1100
|
-
* Additional timeout per page in seconds (max 5 minutes)
|
|
1247
|
+
* Additional timeout per page in seconds (max 300 = 5 minutes). Total timeout =
|
|
1248
|
+
* base + (this value × page count)
|
|
1101
1249
|
*/
|
|
1102
1250
|
extra_time_per_page_in_seconds?: number | null;
|
|
1103
1251
|
}
|
|
1104
1252
|
}
|
|
1105
1253
|
/**
|
|
1106
|
-
*
|
|
1254
|
+
* Document processing options including OCR, table extraction, and chart parsing
|
|
1107
1255
|
*/
|
|
1108
1256
|
interface ProcessingOptions {
|
|
1109
1257
|
/**
|
|
1110
|
-
*
|
|
1258
|
+
* Use aggressive heuristics to detect table boundaries, even without visible
|
|
1259
|
+
* borders. Useful for documents with borderless or complex tables
|
|
1111
1260
|
*/
|
|
1112
1261
|
aggressive_table_extraction?: boolean | null;
|
|
1113
1262
|
/**
|
|
1114
|
-
*
|
|
1263
|
+
* Conditional processing rules that apply different parsing options based on page
|
|
1264
|
+
* content, document structure, or filename patterns. Each entry defines trigger
|
|
1265
|
+
* conditions and the parsing configuration to apply when triggered
|
|
1115
1266
|
*/
|
|
1116
1267
|
auto_mode_configuration?: Array<ProcessingOptions.AutoModeConfiguration> | null;
|
|
1117
1268
|
/**
|
|
1118
|
-
* Cost optimizer
|
|
1269
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
1270
|
+
*
|
|
1271
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
1272
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
1273
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
1119
1274
|
*/
|
|
1120
1275
|
cost_optimizer?: ProcessingOptions.CostOptimizer | null;
|
|
1121
1276
|
/**
|
|
1122
|
-
*
|
|
1123
|
-
* table handling
|
|
1277
|
+
* Disable automatic heuristics including outlined table extraction and adaptive
|
|
1278
|
+
* long table handling. Use when heuristics produce incorrect results
|
|
1124
1279
|
*/
|
|
1125
1280
|
disable_heuristics?: boolean | null;
|
|
1126
1281
|
/**
|
|
1127
|
-
* Options for ignoring specific text types
|
|
1282
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
1128
1283
|
*/
|
|
1129
1284
|
ignore?: ProcessingOptions.Ignore;
|
|
1130
1285
|
/**
|
|
1131
|
-
* OCR configuration
|
|
1286
|
+
* OCR configuration including language detection settings
|
|
1132
1287
|
*/
|
|
1133
1288
|
ocr_parameters?: ProcessingOptions.OcrParameters;
|
|
1134
1289
|
/**
|
|
1135
|
-
* Enable
|
|
1290
|
+
* Enable AI-powered chart analysis. Modes: 'efficient' (fast, lower cost),
|
|
1291
|
+
* 'agentic' (balanced), 'agentic_plus' (highest accuracy). Automatically enables
|
|
1292
|
+
* extract_layout and precise_bounding_box when set
|
|
1136
1293
|
*/
|
|
1137
1294
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
1138
1295
|
}
|
|
1139
1296
|
namespace ProcessingOptions {
|
|
1140
1297
|
/**
|
|
1141
|
-
* A single
|
|
1298
|
+
* A single auto mode rule with trigger conditions and parsing configuration.
|
|
1299
|
+
*
|
|
1300
|
+
* Auto mode allows conditional parsing where different configurations are applied
|
|
1301
|
+
* based on page content, structure, or filename. When triggers match, the
|
|
1302
|
+
* parsing_conf overrides default settings for that page.
|
|
1142
1303
|
*/
|
|
1143
1304
|
interface AutoModeConfiguration {
|
|
1144
1305
|
/**
|
|
1145
|
-
*
|
|
1146
|
-
*
|
|
1147
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1148
|
-
* the V1 format expected by the llamaparse worker.
|
|
1306
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1149
1307
|
*/
|
|
1150
1308
|
parsing_conf: AutoModeConfiguration.ParsingConf;
|
|
1151
1309
|
/**
|
|
@@ -1285,17 +1443,14 @@ export declare namespace ParsingCreateParams {
|
|
|
1285
1443
|
*/
|
|
1286
1444
|
text_in_page?: string | null;
|
|
1287
1445
|
/**
|
|
1288
|
-
* How to combine multiple trigger conditions: 'and' (all must match,
|
|
1289
|
-
* 'or' (any can
|
|
1446
|
+
* How to combine multiple trigger conditions: 'and' (all conditions must match,
|
|
1447
|
+
* this is the default) or 'or' (any single condition can trigger)
|
|
1290
1448
|
*/
|
|
1291
1449
|
trigger_mode?: string | null;
|
|
1292
1450
|
}
|
|
1293
1451
|
namespace AutoModeConfiguration {
|
|
1294
1452
|
/**
|
|
1295
|
-
*
|
|
1296
|
-
*
|
|
1297
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1298
|
-
* the V1 format expected by the llamaparse worker.
|
|
1453
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1299
1454
|
*/
|
|
1300
1455
|
interface ParsingConf {
|
|
1301
1456
|
/**
|
|
@@ -1311,7 +1466,7 @@ export declare namespace ParsingCreateParams {
|
|
|
1311
1466
|
*/
|
|
1312
1467
|
crop_box?: ParsingConf.CropBox | null;
|
|
1313
1468
|
/**
|
|
1314
|
-
* Custom
|
|
1469
|
+
* Custom AI instructions for matched pages. Overrides the base custom_prompt
|
|
1315
1470
|
*/
|
|
1316
1471
|
custom_prompt?: string | null;
|
|
1317
1472
|
/**
|
|
@@ -1347,13 +1502,13 @@ export declare namespace ParsingCreateParams {
|
|
|
1347
1502
|
*/
|
|
1348
1503
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
1349
1504
|
/**
|
|
1350
|
-
*
|
|
1505
|
+
* Override the parsing tier for matched pages. Must be paired with version
|
|
1351
1506
|
*/
|
|
1352
1507
|
tier?: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus' | null;
|
|
1353
1508
|
/**
|
|
1354
|
-
*
|
|
1509
|
+
* Tier version when overriding tier. Required when tier is specified
|
|
1355
1510
|
*/
|
|
1356
|
-
version?: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | 'latest' | (string & {}) | null;
|
|
1511
|
+
version?: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | '2026-03-02' | '2026-03-03' | '2026-03-04' | '2026-03-05' | '2026-03-09' | '2026-03-10' | '2026-03-11' | '2026-03-12' | '2026-03-17' | '2026-03-19' | '2026-03-20' | '2026-03-22' | '2026-03-23' | '2026-03-24' | '2026-03-25' | '2026-03-26' | '2026-03-27' | 'latest' | (string & {}) | null;
|
|
1357
1512
|
}
|
|
1358
1513
|
namespace ParsingConf {
|
|
1359
1514
|
/**
|
|
@@ -1423,60 +1578,91 @@ export declare namespace ParsingCreateParams {
|
|
|
1423
1578
|
}
|
|
1424
1579
|
}
|
|
1425
1580
|
/**
|
|
1426
|
-
* Cost optimizer
|
|
1581
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
1582
|
+
*
|
|
1583
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
1584
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
1585
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
1427
1586
|
*/
|
|
1428
1587
|
interface CostOptimizer {
|
|
1429
1588
|
/**
|
|
1430
|
-
*
|
|
1431
|
-
*
|
|
1589
|
+
* Enable cost-optimized parsing. Routes simpler pages to faster processing while
|
|
1590
|
+
* complex pages use full AI analysis. May reduce speed on some documents.
|
|
1591
|
+
* IMPORTANT: Only available with 'agentic' or 'agentic_plus' tiers
|
|
1432
1592
|
*/
|
|
1433
1593
|
enable?: boolean | null;
|
|
1434
1594
|
}
|
|
1435
1595
|
/**
|
|
1436
|
-
* Options for ignoring specific text types
|
|
1596
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
1437
1597
|
*/
|
|
1438
1598
|
interface Ignore {
|
|
1439
1599
|
/**
|
|
1440
|
-
*
|
|
1600
|
+
* Skip text rotated at an angle (not horizontal/vertical). Useful for ignoring
|
|
1601
|
+
* watermarks or decorative angled text
|
|
1441
1602
|
*/
|
|
1442
1603
|
ignore_diagonal_text?: boolean | null;
|
|
1443
1604
|
/**
|
|
1444
|
-
*
|
|
1605
|
+
* Skip text marked as hidden in the document structure. Some PDFs contain
|
|
1606
|
+
* invisible text layers used for accessibility or search indexing
|
|
1445
1607
|
*/
|
|
1446
1608
|
ignore_hidden_text?: boolean | null;
|
|
1447
1609
|
/**
|
|
1448
|
-
*
|
|
1610
|
+
* Skip OCR text extraction from embedded images. Use when images contain
|
|
1611
|
+
* irrelevant text (watermarks, logos) that shouldn't be in the output
|
|
1449
1612
|
*/
|
|
1450
1613
|
ignore_text_in_image?: boolean | null;
|
|
1451
1614
|
}
|
|
1452
1615
|
/**
|
|
1453
|
-
* OCR configuration
|
|
1616
|
+
* OCR configuration including language detection settings
|
|
1454
1617
|
*/
|
|
1455
1618
|
interface OcrParameters {
|
|
1456
1619
|
/**
|
|
1457
|
-
*
|
|
1620
|
+
* Languages to use for OCR text recognition. Specify multiple languages if
|
|
1621
|
+
* document contains mixed-language content. Order matters - put primary language
|
|
1622
|
+
* first. Example: ['en', 'es'] for English with Spanish
|
|
1458
1623
|
*/
|
|
1459
1624
|
languages?: Array<ParsingAPI.ParsingLanguages> | null;
|
|
1460
1625
|
}
|
|
1461
1626
|
}
|
|
1627
|
+
/**
|
|
1628
|
+
* Webhook configuration for receiving parsing job notifications.
|
|
1629
|
+
*
|
|
1630
|
+
* Webhooks are called when specified events occur during job processing. Configure
|
|
1631
|
+
* multiple webhook configurations to send to different endpoints.
|
|
1632
|
+
*/
|
|
1462
1633
|
interface WebhookConfiguration {
|
|
1463
1634
|
/**
|
|
1464
|
-
*
|
|
1635
|
+
* Events that trigger this webhook. Options: 'parse.success' (job completed),
|
|
1636
|
+
* 'parse.failure' (job failed), 'parse.partial' (some pages failed). If not
|
|
1637
|
+
* specified, webhook fires for all events
|
|
1465
1638
|
*/
|
|
1466
1639
|
webhook_events?: Array<string> | null;
|
|
1467
1640
|
/**
|
|
1468
|
-
* Custom headers to include in webhook requests
|
|
1641
|
+
* Custom HTTP headers to include in webhook requests. Use for authentication
|
|
1642
|
+
* tokens or custom routing. Example: {'Authorization': 'Bearer xyz'}
|
|
1469
1643
|
*/
|
|
1470
1644
|
webhook_headers?: {
|
|
1471
1645
|
[key: string]: unknown;
|
|
1472
1646
|
} | null;
|
|
1473
1647
|
/**
|
|
1474
|
-
*
|
|
1648
|
+
* HTTPS URL to receive webhook POST requests. Must be publicly accessible
|
|
1475
1649
|
*/
|
|
1476
1650
|
webhook_url?: string | null;
|
|
1477
1651
|
}
|
|
1478
1652
|
}
|
|
1479
1653
|
export interface ParsingListParams extends PaginatedCursorParams {
|
|
1654
|
+
/**
|
|
1655
|
+
* Include jobs created at or after this timestamp (inclusive)
|
|
1656
|
+
*/
|
|
1657
|
+
created_at_on_or_after?: string | null;
|
|
1658
|
+
/**
|
|
1659
|
+
* Include jobs created at or before this timestamp (inclusive)
|
|
1660
|
+
*/
|
|
1661
|
+
created_at_on_or_before?: string | null;
|
|
1662
|
+
/**
|
|
1663
|
+
* Filter by specific job IDs
|
|
1664
|
+
*/
|
|
1665
|
+
job_ids?: Array<string> | null;
|
|
1480
1666
|
organization_id?: string | null;
|
|
1481
1667
|
project_id?: string | null;
|
|
1482
1668
|
/**
|
|
@@ -1486,10 +1672,10 @@ export interface ParsingListParams extends PaginatedCursorParams {
|
|
|
1486
1672
|
}
|
|
1487
1673
|
export interface ParsingGetParams {
|
|
1488
1674
|
/**
|
|
1489
|
-
* Fields to include: text, markdown, items, metadata,
|
|
1490
|
-
* markdown_content_metadata, items_content_metadata,
|
|
1491
|
-
* xlsx_content_metadata, output_pdf_content_metadata,
|
|
1492
|
-
* Metadata fields include presigned URLs.
|
|
1675
|
+
* Fields to include: text, markdown, items, metadata, job_metadata,
|
|
1676
|
+
* text_content_metadata, markdown_content_metadata, items_content_metadata,
|
|
1677
|
+
* metadata_content_metadata, xlsx_content_metadata, output_pdf_content_metadata,
|
|
1678
|
+
* images_content_metadata. Metadata fields include presigned URLs.
|
|
1493
1679
|
*/
|
|
1494
1680
|
expand?: Array<string>;
|
|
1495
1681
|
/**
|