@llamaindex/llama-cloud 1.8.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/README.md +10 -8
- package/client.d.mts +4 -6
- package/client.d.mts.map +1 -1
- package/client.d.ts +4 -6
- package/client.d.ts.map +1 -1
- package/client.js +7 -6
- package/client.js.map +1 -1
- package/client.mjs +7 -6
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +0 -23
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +0 -23
- package/core/pagination.d.ts.map +1 -1
- package/core/pagination.js +1 -32
- package/core/pagination.js.map +1 -1
- package/core/pagination.mjs +0 -30
- package/core/pagination.mjs.map +1 -1
- package/package.json +12 -1
- package/resources/beta/batch/batch.d.mts +55 -30
- package/resources/beta/batch/batch.d.mts.map +1 -1
- package/resources/beta/batch/batch.d.ts +55 -30
- package/resources/beta/batch/batch.d.ts.map +1 -1
- package/resources/beta/batch/batch.js +14 -11
- package/resources/beta/batch/batch.js.map +1 -1
- package/resources/beta/batch/batch.mjs +14 -11
- package/resources/beta/batch/batch.mjs.map +1 -1
- package/resources/beta/batch/job-items.d.mts +36 -13
- package/resources/beta/batch/job-items.d.mts.map +1 -1
- package/resources/beta/batch/job-items.d.ts +36 -13
- package/resources/beta/batch/job-items.d.ts.map +1 -1
- package/resources/beta/batch/job-items.js +6 -8
- package/resources/beta/batch/job-items.js.map +1 -1
- package/resources/beta/batch/job-items.mjs +6 -8
- package/resources/beta/batch/job-items.mjs.map +1 -1
- package/resources/beta/parse-configurations.d.mts +7 -9
- package/resources/beta/parse-configurations.d.mts.map +1 -1
- package/resources/beta/parse-configurations.d.ts +7 -9
- package/resources/beta/parse-configurations.d.ts.map +1 -1
- package/resources/beta/parse-configurations.js +7 -9
- package/resources/beta/parse-configurations.js.map +1 -1
- package/resources/beta/parse-configurations.mjs +7 -9
- package/resources/beta/parse-configurations.mjs.map +1 -1
- package/resources/beta/sheets.d.mts +16 -0
- package/resources/beta/sheets.d.mts.map +1 -1
- package/resources/beta/sheets.d.ts +16 -0
- package/resources/beta/sheets.d.ts.map +1 -1
- package/resources/beta/split.d.mts +60 -16
- package/resources/beta/split.d.mts.map +1 -1
- package/resources/beta/split.d.ts +60 -16
- package/resources/beta/split.d.ts.map +1 -1
- package/resources/beta/split.js.map +1 -1
- package/resources/beta/split.mjs.map +1 -1
- package/resources/classifier/jobs.d.mts +12 -3
- package/resources/classifier/jobs.d.mts.map +1 -1
- package/resources/classifier/jobs.d.ts +12 -3
- package/resources/classifier/jobs.d.ts.map +1 -1
- package/resources/classify.d.mts +93 -38
- package/resources/classify.d.mts.map +1 -1
- package/resources/classify.d.ts +93 -38
- package/resources/classify.d.ts.map +1 -1
- package/resources/classify.js +19 -2
- package/resources/classify.js.map +1 -1
- package/resources/classify.mjs +19 -2
- package/resources/classify.mjs.map +1 -1
- package/resources/extract.d.mts +1593 -0
- package/resources/extract.d.mts.map +1 -0
- package/resources/extract.d.ts +1593 -0
- package/resources/extract.d.ts.map +1 -0
- package/resources/extract.js +215 -0
- package/resources/extract.js.map +1 -0
- package/resources/extract.mjs +211 -0
- package/resources/extract.mjs.map +1 -0
- package/resources/files.d.mts +53 -39
- package/resources/files.d.mts.map +1 -1
- package/resources/files.d.ts +53 -39
- package/resources/files.d.ts.map +1 -1
- package/resources/files.js +11 -10
- package/resources/files.js.map +1 -1
- package/resources/files.mjs +11 -10
- package/resources/files.mjs.map +1 -1
- package/resources/index.d.mts +1 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +1 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +3 -3
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/parsing.d.mts +292 -138
- package/resources/parsing.d.mts.map +1 -1
- package/resources/parsing.d.ts +292 -138
- package/resources/parsing.d.ts.map +1 -1
- package/resources/parsing.js +30 -4
- package/resources/parsing.js.map +1 -1
- package/resources/parsing.mjs +30 -4
- package/resources/parsing.mjs.map +1 -1
- package/resources/pipelines/pipelines.d.mts +59 -13
- package/resources/pipelines/pipelines.d.mts.map +1 -1
- package/resources/pipelines/pipelines.d.ts +59 -13
- package/resources/pipelines/pipelines.d.ts.map +1 -1
- package/resources/pipelines/pipelines.js +24 -9
- package/resources/pipelines/pipelines.js.map +1 -1
- package/resources/pipelines/pipelines.mjs +24 -9
- package/resources/pipelines/pipelines.mjs.map +1 -1
- package/resources/pipelines/sync.d.mts +5 -3
- package/resources/pipelines/sync.d.mts.map +1 -1
- package/resources/pipelines/sync.d.ts +5 -3
- package/resources/pipelines/sync.d.ts.map +1 -1
- package/resources/pipelines/sync.js +5 -3
- package/resources/pipelines/sync.js.map +1 -1
- package/resources/pipelines/sync.mjs +5 -3
- package/resources/pipelines/sync.mjs.map +1 -1
- package/src/client.ts +50 -15
- package/src/core/pagination.ts +0 -71
- package/src/resources/beta/batch/batch.ts +75 -30
- package/src/resources/beta/batch/job-items.ts +56 -13
- package/src/resources/beta/parse-configurations.ts +7 -9
- package/src/resources/beta/sheets.ts +20 -0
- package/src/resources/beta/split.ts +70 -17
- package/src/resources/classifier/jobs.ts +12 -3
- package/src/resources/classify.ts +101 -38
- package/src/resources/extract.ts +2055 -0
- package/src/resources/files.ts +53 -39
- package/src/resources/index.ts +22 -1
- package/src/resources/parsing.ts +327 -136
- package/src/resources/pipelines/pipelines.ts +80 -14
- package/src/resources/pipelines/sync.ts +5 -3
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.mts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.ts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.js +0 -56
- package/resources/extraction/extraction-agents/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.mjs +0 -51
- package/resources/extraction/extraction-agents/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction-agents/index.d.mts +0 -3
- package/resources/extraction/extraction-agents/index.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/index.d.ts +0 -3
- package/resources/extraction/extraction-agents/index.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/index.js +0 -9
- package/resources/extraction/extraction-agents/index.js.map +0 -1
- package/resources/extraction/extraction-agents/index.mjs +0 -4
- package/resources/extraction/extraction-agents/index.mjs.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.mts +0 -75
- package/resources/extraction/extraction-agents/schema.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.ts +0 -75
- package/resources/extraction/extraction-agents/schema.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/schema.js +0 -28
- package/resources/extraction/extraction-agents/schema.js.map +0 -1
- package/resources/extraction/extraction-agents/schema.mjs +0 -24
- package/resources/extraction/extraction-agents/schema.mjs.map +0 -1
- package/resources/extraction/extraction-agents.d.mts +0 -2
- package/resources/extraction/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents.d.ts +0 -2
- package/resources/extraction/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents.js +0 -6
- package/resources/extraction/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents.mjs +0 -3
- package/resources/extraction/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction.d.mts +0 -118
- package/resources/extraction/extraction.d.mts.map +0 -1
- package/resources/extraction/extraction.d.ts +0 -118
- package/resources/extraction/extraction.d.ts.map +0 -1
- package/resources/extraction/extraction.js +0 -91
- package/resources/extraction/extraction.js.map +0 -1
- package/resources/extraction/extraction.mjs +0 -86
- package/resources/extraction/extraction.mjs.map +0 -1
- package/resources/extraction/index.d.mts +0 -5
- package/resources/extraction/index.d.mts.map +0 -1
- package/resources/extraction/index.d.ts +0 -5
- package/resources/extraction/index.d.ts.map +0 -1
- package/resources/extraction/index.js +0 -13
- package/resources/extraction/index.js.map +0 -1
- package/resources/extraction/index.mjs +0 -6
- package/resources/extraction/index.mjs.map +0 -1
- package/resources/extraction/jobs.d.mts +0 -280
- package/resources/extraction/jobs.d.mts.map +0 -1
- package/resources/extraction/jobs.d.ts +0 -280
- package/resources/extraction/jobs.d.ts.map +0 -1
- package/resources/extraction/jobs.js +0 -179
- package/resources/extraction/jobs.js.map +0 -1
- package/resources/extraction/jobs.mjs +0 -175
- package/resources/extraction/jobs.mjs.map +0 -1
- package/resources/extraction/runs.d.mts +0 -198
- package/resources/extraction/runs.d.mts.map +0 -1
- package/resources/extraction/runs.d.ts +0 -198
- package/resources/extraction/runs.d.ts.map +0 -1
- package/resources/extraction/runs.js +0 -42
- package/resources/extraction/runs.js.map +0 -1
- package/resources/extraction/runs.mjs +0 -38
- package/resources/extraction/runs.mjs.map +0 -1
- package/resources/extraction.d.mts +0 -2
- package/resources/extraction.d.mts.map +0 -1
- package/resources/extraction.d.ts +0 -2
- package/resources/extraction.d.ts.map +0 -1
- package/resources/extraction.js +0 -6
- package/resources/extraction.js.map +0 -1
- package/resources/extraction.mjs +0 -3
- package/resources/extraction.mjs.map +0 -1
- package/src/resources/extraction/extraction-agents/extraction-agents.ts +0 -196
- package/src/resources/extraction/extraction-agents/index.ts +0 -18
- package/src/resources/extraction/extraction-agents/schema.ts +0 -100
- package/src/resources/extraction/extraction-agents.ts +0 -3
- package/src/resources/extraction/extraction.ts +0 -224
- package/src/resources/extraction/index.ts +0 -34
- package/src/resources/extraction/jobs.ts +0 -414
- package/src/resources/extraction/runs.ts +0 -315
- package/src/resources/extraction.ts +0 -3
package/resources/parsing.d.mts
CHANGED
|
@@ -7,18 +7,44 @@ import { type Uploadable } from "../core/uploads.mjs";
|
|
|
7
7
|
import { PollingOptions } from "../core/polling.mjs";
|
|
8
8
|
export declare class Parsing extends APIResource {
|
|
9
9
|
/**
|
|
10
|
-
* Parse a file by file ID
|
|
10
|
+
* Parse a file by file ID or URL.
|
|
11
|
+
*
|
|
12
|
+
* Provide either `file_id` (a previously uploaded file) or `source_url` (a
|
|
13
|
+
* publicly accessible URL). Configure parsing with options like `tier`,
|
|
14
|
+
* `target_pages`, and `lang`.
|
|
15
|
+
*
|
|
16
|
+
* ## Tiers
|
|
17
|
+
*
|
|
18
|
+
* - `fast` — rule-based, cheapest, no AI
|
|
19
|
+
* - `cost_effective` — balanced speed and quality
|
|
20
|
+
* - `agentic` — full AI-powered parsing
|
|
21
|
+
* - `agentic_plus` — premium AI with specialized features
|
|
22
|
+
*
|
|
23
|
+
* The job runs asynchronously. Poll `GET /parse/{job_id}` with `expand=text` or
|
|
24
|
+
* `expand=markdown` to retrieve results.
|
|
11
25
|
*/
|
|
12
26
|
create(params: ParsingCreateParams & {
|
|
13
27
|
upload_file?: Uploadable;
|
|
14
28
|
}, options?: RequestOptions): APIPromise<ParsingCreateResponse>;
|
|
15
29
|
/**
|
|
16
|
-
* List parse jobs for the current project
|
|
17
|
-
*
|
|
30
|
+
* List parse jobs for the current project.
|
|
31
|
+
*
|
|
32
|
+
* Filter by `status` or creation date range. Results are paginated — use
|
|
33
|
+
* `page_token` from the response to fetch subsequent pages.
|
|
18
34
|
*/
|
|
19
35
|
list(query?: ParsingListParams | null | undefined, options?: RequestOptions): PagePromise<ParsingListResponsesPaginatedCursor, ParsingListResponse>;
|
|
20
36
|
/**
|
|
21
|
-
* Retrieve parse job with optional content
|
|
37
|
+
* Retrieve a parse job with optional expanded content.
|
|
38
|
+
*
|
|
39
|
+
* By default returns job metadata only. Use `expand` to include parsed content:
|
|
40
|
+
*
|
|
41
|
+
* - `text` — plain text output
|
|
42
|
+
* - `markdown` — markdown output
|
|
43
|
+
* - `items` — structured page-by-page output
|
|
44
|
+
* - `job_metadata` — usage and processing details
|
|
45
|
+
*
|
|
46
|
+
* Content metadata fields (e.g. `text_content_metadata`) return presigned URLs for
|
|
47
|
+
* downloading large results.
|
|
22
48
|
*/
|
|
23
49
|
get(jobID: string, query?: ParsingGetParams | null | undefined, options?: RequestOptions): APIPromise<ParsingGetResponse>;
|
|
24
50
|
/**
|
|
@@ -297,15 +323,24 @@ export interface ListItem {
|
|
|
297
323
|
*/
|
|
298
324
|
export type LlamaParseSupportedFileExtensions = '.pdf' | '.abw' | '.awt' | '.cgm' | '.cwk' | '.doc' | '.docm' | '.docx' | '.dot' | '.dotm' | '.dotx' | '.fodg' | '.fodp' | '.fopd' | '.fodt' | '.fb2' | '.hwp' | '.lwp' | '.mcw' | '.mw' | '.mwd' | '.odf' | '.odt' | '.otg' | '.ott' | '.pages' | '.pbd' | '.psw' | '.rtf' | '.sda' | '.sdd' | '.sdp' | '.sdw' | '.sgl' | '.std' | '.stw' | '.sxd' | '.sxg' | '.sxm' | '.sxw' | '.uof' | '.uop' | '.uot' | '.vor' | '.wpd' | '.wps' | '.wpt' | '.wri' | '.wn' | '.xml' | '.zabw' | '.key' | '.odp' | '.odg' | '.otp' | '.pot' | '.potm' | '.potx' | '.ppt' | '.pptm' | '.pptx' | '.sti' | '.sxi' | '.vsd' | '.vsdm' | '.vsdx' | '.vdx' | '.bmp' | '.gif' | '.jpg' | '.jpeg' | '.png' | '.svg' | '.tif' | '.tiff' | '.webp' | '.htm' | '.html' | '.xhtm' | '.csv' | '.dbf' | '.dif' | '.et' | '.eth' | '.fods' | '.numbers' | '.ods' | '.ots' | '.prn' | '.qpw' | '.slk' | '.stc' | '.sxc' | '.sylk' | '.tsv' | '.uos1' | '.uos2' | '.uos' | '.wb1' | '.wb2' | '.wb3' | '.wk1' | '.wk2' | '.wk3' | '.wk4' | '.wks' | '.wq1' | '.wq2' | '.xlr' | '.xls' | '.xlsb' | '.xlsm' | '.xlsx' | '.xlw' | '.azw' | '.azw3' | '.azw4' | '.cb7' | '.cbc' | '.cbr' | '.cbz' | '.chm' | '.djvu' | '.epub' | '.fbz' | '.htmlz' | '.lit' | '.lrf' | '.md' | '.mobi' | '.pdb' | '.pml' | '.prc' | '.rb' | '.snb' | '.tcr' | '.txtz' | '.m4a' | '.mp3' | '.mp4' | '.mpeg' | '.mpga' | '.wav' | '.webm';
|
|
299
325
|
/**
|
|
300
|
-
*
|
|
326
|
+
* A parse job (v1).
|
|
301
327
|
*/
|
|
302
328
|
export interface ParsingJob {
|
|
329
|
+
/**
|
|
330
|
+
* Unique parse job identifier
|
|
331
|
+
*/
|
|
303
332
|
id: string;
|
|
304
333
|
/**
|
|
305
|
-
*
|
|
334
|
+
* Current job status
|
|
306
335
|
*/
|
|
307
336
|
status: StatusEnum;
|
|
337
|
+
/**
|
|
338
|
+
* Machine-readable error code when failed
|
|
339
|
+
*/
|
|
308
340
|
error_code?: string | null;
|
|
341
|
+
/**
|
|
342
|
+
* Human-readable error details when failed
|
|
343
|
+
*/
|
|
309
344
|
error_message?: string | null;
|
|
310
345
|
}
|
|
311
346
|
/**
|
|
@@ -393,11 +428,11 @@ export interface TextItem {
|
|
|
393
428
|
type?: 'text';
|
|
394
429
|
}
|
|
395
430
|
/**
|
|
396
|
-
*
|
|
431
|
+
* A parse job.
|
|
397
432
|
*/
|
|
398
433
|
export interface ParsingCreateResponse {
|
|
399
434
|
/**
|
|
400
|
-
* Unique
|
|
435
|
+
* Unique parse job identifier
|
|
401
436
|
*/
|
|
402
437
|
id: string;
|
|
403
438
|
/**
|
|
@@ -405,7 +440,7 @@ export interface ParsingCreateResponse {
|
|
|
405
440
|
*/
|
|
406
441
|
project_id: string;
|
|
407
442
|
/**
|
|
408
|
-
* Current
|
|
443
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
409
444
|
*/
|
|
410
445
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
411
446
|
/**
|
|
@@ -413,24 +448,28 @@ export interface ParsingCreateResponse {
|
|
|
413
448
|
*/
|
|
414
449
|
created_at?: string | null;
|
|
415
450
|
/**
|
|
416
|
-
* Error
|
|
451
|
+
* Error details when status is FAILED
|
|
417
452
|
*/
|
|
418
453
|
error_message?: string | null;
|
|
419
454
|
/**
|
|
420
|
-
*
|
|
455
|
+
* Optional display name for this parse job
|
|
421
456
|
*/
|
|
422
457
|
name?: string | null;
|
|
458
|
+
/**
|
|
459
|
+
* Parsing tier used for this job
|
|
460
|
+
*/
|
|
461
|
+
tier?: string | null;
|
|
423
462
|
/**
|
|
424
463
|
* Update datetime
|
|
425
464
|
*/
|
|
426
465
|
updated_at?: string | null;
|
|
427
466
|
}
|
|
428
467
|
/**
|
|
429
|
-
*
|
|
468
|
+
* A parse job.
|
|
430
469
|
*/
|
|
431
470
|
export interface ParsingListResponse {
|
|
432
471
|
/**
|
|
433
|
-
* Unique
|
|
472
|
+
* Unique parse job identifier
|
|
434
473
|
*/
|
|
435
474
|
id: string;
|
|
436
475
|
/**
|
|
@@ -438,7 +477,7 @@ export interface ParsingListResponse {
|
|
|
438
477
|
*/
|
|
439
478
|
project_id: string;
|
|
440
479
|
/**
|
|
441
|
-
* Current
|
|
480
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
442
481
|
*/
|
|
443
482
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
444
483
|
/**
|
|
@@ -446,13 +485,17 @@ export interface ParsingListResponse {
|
|
|
446
485
|
*/
|
|
447
486
|
created_at?: string | null;
|
|
448
487
|
/**
|
|
449
|
-
* Error
|
|
488
|
+
* Error details when status is FAILED
|
|
450
489
|
*/
|
|
451
490
|
error_message?: string | null;
|
|
452
491
|
/**
|
|
453
|
-
*
|
|
492
|
+
* Optional display name for this parse job
|
|
454
493
|
*/
|
|
455
494
|
name?: string | null;
|
|
495
|
+
/**
|
|
496
|
+
* Parsing tier used for this job
|
|
497
|
+
*/
|
|
498
|
+
tier?: string | null;
|
|
456
499
|
/**
|
|
457
500
|
* Update datetime
|
|
458
501
|
*/
|
|
@@ -477,6 +520,12 @@ export interface ParsingGetResponse {
|
|
|
477
520
|
* Structured JSON result (if requested)
|
|
478
521
|
*/
|
|
479
522
|
items?: ParsingGetResponse.Items | null;
|
|
523
|
+
/**
|
|
524
|
+
* Job execution metadata (if requested)
|
|
525
|
+
*/
|
|
526
|
+
job_metadata?: {
|
|
527
|
+
[key: string]: unknown;
|
|
528
|
+
} | null;
|
|
480
529
|
/**
|
|
481
530
|
* Markdown result (if requested)
|
|
482
531
|
*/
|
|
@@ -513,7 +562,7 @@ export declare namespace ParsingGetResponse {
|
|
|
513
562
|
*/
|
|
514
563
|
interface Job {
|
|
515
564
|
/**
|
|
516
|
-
* Unique
|
|
565
|
+
* Unique parse job identifier
|
|
517
566
|
*/
|
|
518
567
|
id: string;
|
|
519
568
|
/**
|
|
@@ -521,7 +570,7 @@ export declare namespace ParsingGetResponse {
|
|
|
521
570
|
*/
|
|
522
571
|
project_id: string;
|
|
523
572
|
/**
|
|
524
|
-
* Current
|
|
573
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
525
574
|
*/
|
|
526
575
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
527
576
|
/**
|
|
@@ -529,13 +578,17 @@ export declare namespace ParsingGetResponse {
|
|
|
529
578
|
*/
|
|
530
579
|
created_at?: string | null;
|
|
531
580
|
/**
|
|
532
|
-
* Error
|
|
581
|
+
* Error details when status is FAILED
|
|
533
582
|
*/
|
|
534
583
|
error_message?: string | null;
|
|
535
584
|
/**
|
|
536
|
-
*
|
|
585
|
+
* Optional display name for this parse job
|
|
537
586
|
*/
|
|
538
587
|
name?: string | null;
|
|
588
|
+
/**
|
|
589
|
+
* Parsing tier used for this job
|
|
590
|
+
*/
|
|
591
|
+
tier?: string | null;
|
|
539
592
|
/**
|
|
540
593
|
* Update datetime
|
|
541
594
|
*/
|
|
@@ -796,13 +849,16 @@ export declare namespace ParsingGetResponse {
|
|
|
796
849
|
}
|
|
797
850
|
export interface ParsingCreateParams {
|
|
798
851
|
/**
|
|
799
|
-
* Body param:
|
|
852
|
+
* Body param: Parsing tier: 'fast' (rule-based, cheapest), 'cost_effective'
|
|
853
|
+
* (balanced), 'agentic' (AI-powered with custom prompts), or 'agentic_plus'
|
|
854
|
+
* (premium AI with highest accuracy)
|
|
800
855
|
*/
|
|
801
856
|
tier: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus';
|
|
802
857
|
/**
|
|
803
|
-
* Body param:
|
|
858
|
+
* Body param: Tier version. Use 'latest' for the current stable version, or
|
|
859
|
+
* specify a specific version (e.g., '1.0', '2.0') for reproducible results
|
|
804
860
|
*/
|
|
805
|
-
version: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | '2026-03-02' | '2026-03-03' | '2026-03-04' | 'latest' | (string & {});
|
|
861
|
+
version: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | '2026-03-02' | '2026-03-03' | '2026-03-04' | '2026-03-05' | '2026-03-09' | '2026-03-10' | '2026-03-11' | '2026-03-12' | '2026-03-17' | '2026-03-19' | '2026-03-20' | '2026-03-22' | '2026-03-23' | '2026-03-24' | '2026-03-25' | '2026-03-26' | '2026-03-27' | '2026-03-30' | '2026-03-31' | 'latest' | (string & {});
|
|
806
862
|
/**
|
|
807
863
|
* Query param
|
|
808
864
|
*/
|
|
@@ -812,250 +868,295 @@ export interface ParsingCreateParams {
|
|
|
812
868
|
*/
|
|
813
869
|
project_id?: string | null;
|
|
814
870
|
/**
|
|
815
|
-
* Body param: Options for
|
|
871
|
+
* Body param: Options for AI-powered parsing tiers (cost_effective, agentic,
|
|
872
|
+
* agentic_plus).
|
|
873
|
+
*
|
|
874
|
+
* These options customize how the AI processes and interprets document content.
|
|
875
|
+
* Only applicable when using non-fast tiers.
|
|
816
876
|
*/
|
|
817
877
|
agentic_options?: ParsingCreateParams.AgenticOptions | null;
|
|
818
878
|
/**
|
|
819
|
-
* Body param:
|
|
879
|
+
* Body param: Identifier for the client/application making the request. Used for
|
|
880
|
+
* analytics and debugging. Example: 'my-app-v2'
|
|
820
881
|
*/
|
|
821
882
|
client_name?: string | null;
|
|
822
883
|
/**
|
|
823
|
-
* Body param:
|
|
884
|
+
* Body param: Crop boundaries to process only a portion of each page. Values are
|
|
885
|
+
* ratios 0-1 from page edges
|
|
824
886
|
*/
|
|
825
887
|
crop_box?: ParsingCreateParams.CropBox;
|
|
826
888
|
/**
|
|
827
|
-
* Body param:
|
|
889
|
+
* Body param: Bypass result caching and force re-parsing. Use when document
|
|
890
|
+
* content may have changed or you need fresh results
|
|
828
891
|
*/
|
|
829
892
|
disable_cache?: boolean | null;
|
|
830
893
|
/**
|
|
831
|
-
* Body param: Options for fast tier parsing (
|
|
894
|
+
* Body param: Options for fast tier parsing (rule-based, no AI).
|
|
895
|
+
*
|
|
896
|
+
* Fast tier uses deterministic algorithms for text extraction without AI
|
|
897
|
+
* enhancement. It's the fastest and most cost-effective option, best suited for
|
|
898
|
+
* simple documents with standard layouts. Currently has no configurable options
|
|
899
|
+
* but reserved for future expansion.
|
|
832
900
|
*/
|
|
833
901
|
fast_options?: unknown | null;
|
|
834
902
|
/**
|
|
835
|
-
* Body param: ID of an existing file in the project to parse
|
|
903
|
+
* Body param: ID of an existing file in the project to parse. Mutually exclusive
|
|
904
|
+
* with source_url
|
|
836
905
|
*/
|
|
837
906
|
file_id?: string | null;
|
|
838
907
|
/**
|
|
839
|
-
* Body param: HTTP proxy
|
|
908
|
+
* Body param: HTTP/HTTPS proxy for fetching source_url. Ignored if using file_id
|
|
840
909
|
*/
|
|
841
910
|
http_proxy?: string | null;
|
|
842
911
|
/**
|
|
843
|
-
* Body param:
|
|
912
|
+
* Body param: Format-specific options (HTML, PDF, spreadsheet, presentation).
|
|
913
|
+
* Applied based on detected input file type
|
|
844
914
|
*/
|
|
845
915
|
input_options?: ParsingCreateParams.InputOptions;
|
|
846
916
|
/**
|
|
847
|
-
* Body param: Output
|
|
917
|
+
* Body param: Output formatting options for markdown, text, and extracted images
|
|
848
918
|
*/
|
|
849
919
|
output_options?: ParsingCreateParams.OutputOptions;
|
|
850
920
|
/**
|
|
851
|
-
* Body param: Page
|
|
921
|
+
* Body param: Page selection: limit total pages or specify exact pages to process
|
|
852
922
|
*/
|
|
853
923
|
page_ranges?: ParsingCreateParams.PageRanges;
|
|
854
924
|
/**
|
|
855
|
-
* Body param: Job
|
|
925
|
+
* Body param: Job execution controls including timeouts and failure thresholds
|
|
856
926
|
*/
|
|
857
927
|
processing_control?: ParsingCreateParams.ProcessingControl;
|
|
858
928
|
/**
|
|
859
|
-
* Body param:
|
|
929
|
+
* Body param: Document processing options including OCR, table extraction, and
|
|
930
|
+
* chart parsing
|
|
860
931
|
*/
|
|
861
932
|
processing_options?: ParsingCreateParams.ProcessingOptions;
|
|
862
933
|
/**
|
|
863
|
-
* Body param:
|
|
934
|
+
* Body param: Public URL of the document to parse. Mutually exclusive with file_id
|
|
864
935
|
*/
|
|
865
936
|
source_url?: string | null;
|
|
866
937
|
/**
|
|
867
|
-
* Body param:
|
|
938
|
+
* Body param: Webhook endpoints for job status notifications. Multiple webhooks
|
|
939
|
+
* can be configured for different events or services
|
|
868
940
|
*/
|
|
869
941
|
webhook_configurations?: Array<ParsingCreateParams.WebhookConfiguration>;
|
|
870
942
|
}
|
|
871
943
|
export declare namespace ParsingCreateParams {
|
|
872
944
|
/**
|
|
873
|
-
* Options for
|
|
945
|
+
* Options for AI-powered parsing tiers (cost_effective, agentic, agentic_plus).
|
|
946
|
+
*
|
|
947
|
+
* These options customize how the AI processes and interprets document content.
|
|
948
|
+
* Only applicable when using non-fast tiers.
|
|
874
949
|
*/
|
|
875
950
|
interface AgenticOptions {
|
|
876
951
|
/**
|
|
877
|
-
* Custom
|
|
952
|
+
* Custom instructions for the AI parser. Use to guide extraction behavior, specify
|
|
953
|
+
* output formatting, or provide domain-specific context. Example: 'Extract
|
|
954
|
+
* financial tables with currency symbols. Format dates as YYYY-MM-DD.'
|
|
878
955
|
*/
|
|
879
956
|
custom_prompt?: string | null;
|
|
880
957
|
}
|
|
881
958
|
/**
|
|
882
|
-
*
|
|
959
|
+
* Crop boundaries to process only a portion of each page. Values are ratios 0-1
|
|
960
|
+
* from page edges
|
|
883
961
|
*/
|
|
884
962
|
interface CropBox {
|
|
885
963
|
/**
|
|
886
|
-
* Bottom boundary
|
|
964
|
+
* Bottom boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content below this
|
|
965
|
+
* line is excluded
|
|
887
966
|
*/
|
|
888
967
|
bottom?: number | null;
|
|
889
968
|
/**
|
|
890
|
-
* Left boundary
|
|
969
|
+
* Left boundary as ratio (0-1). 0=left edge, 1=right edge. Content left of this
|
|
970
|
+
* line is excluded
|
|
891
971
|
*/
|
|
892
972
|
left?: number | null;
|
|
893
973
|
/**
|
|
894
|
-
* Right boundary
|
|
974
|
+
* Right boundary as ratio (0-1). 0=left edge, 1=right edge. Content right of this
|
|
975
|
+
* line is excluded
|
|
895
976
|
*/
|
|
896
977
|
right?: number | null;
|
|
897
978
|
/**
|
|
898
|
-
* Top boundary
|
|
979
|
+
* Top boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content above this line
|
|
980
|
+
* is excluded
|
|
899
981
|
*/
|
|
900
982
|
top?: number | null;
|
|
901
983
|
}
|
|
902
984
|
/**
|
|
903
|
-
*
|
|
985
|
+
* Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on
|
|
986
|
+
* detected input file type
|
|
904
987
|
*/
|
|
905
988
|
interface InputOptions {
|
|
906
989
|
/**
|
|
907
|
-
* HTML
|
|
990
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
908
991
|
*/
|
|
909
992
|
html?: InputOptions.HTML;
|
|
910
993
|
/**
|
|
911
|
-
* PDF-specific parsing options
|
|
994
|
+
* PDF-specific parsing options (applies to .pdf files)
|
|
912
995
|
*/
|
|
913
996
|
pdf?: unknown;
|
|
914
997
|
/**
|
|
915
|
-
* Presentation
|
|
998
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
916
999
|
*/
|
|
917
1000
|
presentation?: InputOptions.Presentation;
|
|
918
1001
|
/**
|
|
919
|
-
* Spreadsheet
|
|
1002
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
920
1003
|
*/
|
|
921
1004
|
spreadsheet?: InputOptions.Spreadsheet;
|
|
922
1005
|
}
|
|
923
1006
|
namespace InputOptions {
|
|
924
1007
|
/**
|
|
925
|
-
* HTML
|
|
1008
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
926
1009
|
*/
|
|
927
1010
|
interface HTML {
|
|
928
1011
|
/**
|
|
929
|
-
*
|
|
1012
|
+
* Force all HTML elements to be visible by overriding CSS display/visibility
|
|
1013
|
+
* properties. Useful for parsing pages with hidden content or collapsed sections
|
|
930
1014
|
*/
|
|
931
1015
|
make_all_elements_visible?: boolean | null;
|
|
932
1016
|
/**
|
|
933
|
-
* Remove fixed
|
|
1017
|
+
* Remove fixed-position elements (headers, footers, floating buttons) that appear
|
|
1018
|
+
* on every page render
|
|
934
1019
|
*/
|
|
935
1020
|
remove_fixed_elements?: boolean | null;
|
|
936
1021
|
/**
|
|
937
|
-
* Remove navigation elements
|
|
1022
|
+
* Remove navigation elements (nav bars, sidebars, menus) to focus on main content
|
|
938
1023
|
*/
|
|
939
1024
|
remove_navigation_elements?: boolean | null;
|
|
940
1025
|
}
|
|
941
1026
|
/**
|
|
942
|
-
* Presentation
|
|
1027
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
943
1028
|
*/
|
|
944
1029
|
interface Presentation {
|
|
945
1030
|
/**
|
|
946
|
-
* Extract
|
|
1031
|
+
* Extract content positioned outside the visible slide area. Some presentations
|
|
1032
|
+
* have hidden notes or content that extends beyond slide boundaries
|
|
947
1033
|
*/
|
|
948
1034
|
out_of_bounds_content?: boolean | null;
|
|
949
1035
|
/**
|
|
950
|
-
* Skip extraction of embedded data
|
|
1036
|
+
* Skip extraction of embedded chart data tables. When true, only the visual
|
|
1037
|
+
* representation of charts is captured, not the underlying data
|
|
951
1038
|
*/
|
|
952
1039
|
skip_embedded_data?: boolean | null;
|
|
953
1040
|
}
|
|
954
1041
|
/**
|
|
955
|
-
* Spreadsheet
|
|
1042
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
956
1043
|
*/
|
|
957
1044
|
interface Spreadsheet {
|
|
958
1045
|
/**
|
|
959
|
-
* Detect and extract
|
|
1046
|
+
* Detect and extract multiple tables within a single sheet. Useful when
|
|
1047
|
+
* spreadsheets contain several data regions separated by blank rows/columns
|
|
960
1048
|
*/
|
|
961
1049
|
detect_sub_tables_in_sheets?: boolean | null;
|
|
962
1050
|
/**
|
|
963
|
-
*
|
|
1051
|
+
* Compute formula results instead of extracting formula text. Use when you need
|
|
1052
|
+
* calculated values rather than formula definitions
|
|
964
1053
|
*/
|
|
965
1054
|
force_formula_computation_in_sheets?: boolean | null;
|
|
966
1055
|
/**
|
|
967
|
-
*
|
|
1056
|
+
* Parse hidden sheets in addition to visible ones. By default, hidden sheets are
|
|
1057
|
+
* skipped
|
|
968
1058
|
*/
|
|
969
1059
|
include_hidden_sheets?: boolean | null;
|
|
970
1060
|
}
|
|
971
1061
|
}
|
|
972
1062
|
/**
|
|
973
|
-
* Output
|
|
1063
|
+
* Output formatting options for markdown, text, and extracted images
|
|
974
1064
|
*/
|
|
975
1065
|
interface OutputOptions {
|
|
976
1066
|
/**
|
|
977
|
-
* Extract printed page
|
|
1067
|
+
* Extract the printed page number as it appears in the document (e.g., 'Page 5 of
|
|
1068
|
+
* 10', 'v', 'A-3'). Useful for referencing original page numbers
|
|
978
1069
|
*/
|
|
979
1070
|
extract_printed_page_number?: boolean | null;
|
|
980
1071
|
/**
|
|
981
|
-
* Image categories to save: 'screenshot' (full page
|
|
982
|
-
*
|
|
983
|
-
*
|
|
1072
|
+
* Image categories to extract and save. Options: 'screenshot' (full page renders
|
|
1073
|
+
* useful for visual QA), 'embedded' (images found within the document), 'layout'
|
|
1074
|
+
* (cropped regions from layout detection like figures and diagrams). Empty list
|
|
1075
|
+
* saves no images
|
|
984
1076
|
*/
|
|
985
1077
|
images_to_save?: Array<'screenshot' | 'embedded' | 'layout'>;
|
|
986
1078
|
/**
|
|
987
|
-
* Markdown
|
|
1079
|
+
* Markdown formatting options including table styles and link annotations
|
|
988
1080
|
*/
|
|
989
1081
|
markdown?: OutputOptions.Markdown;
|
|
990
1082
|
/**
|
|
991
|
-
* Spatial text output options
|
|
1083
|
+
* Spatial text output options for preserving document layout structure
|
|
992
1084
|
*/
|
|
993
1085
|
spatial_text?: OutputOptions.SpatialText;
|
|
994
1086
|
/**
|
|
995
|
-
*
|
|
1087
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
996
1088
|
*/
|
|
997
1089
|
tables_as_spreadsheet?: OutputOptions.TablesAsSpreadsheet;
|
|
998
1090
|
}
|
|
999
1091
|
namespace OutputOptions {
|
|
1000
1092
|
/**
|
|
1001
|
-
* Markdown
|
|
1093
|
+
* Markdown formatting options including table styles and link annotations
|
|
1002
1094
|
*/
|
|
1003
1095
|
interface Markdown {
|
|
1004
1096
|
/**
|
|
1005
|
-
* Add annotations to
|
|
1097
|
+
* Add link annotations to markdown output in the format [text](url). When false,
|
|
1098
|
+
* only the link text is included
|
|
1006
1099
|
*/
|
|
1007
1100
|
annotate_links?: boolean | null;
|
|
1008
1101
|
/**
|
|
1009
|
-
*
|
|
1102
|
+
* Embed images directly in markdown as base64 data URIs instead of extracting them
|
|
1103
|
+
* as separate files. Useful for self-contained markdown output
|
|
1010
1104
|
*/
|
|
1011
1105
|
inline_images?: boolean | null;
|
|
1012
1106
|
/**
|
|
1013
|
-
* Table formatting options
|
|
1107
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
1014
1108
|
*/
|
|
1015
1109
|
tables?: Markdown.Tables;
|
|
1016
1110
|
}
|
|
1017
1111
|
namespace Markdown {
|
|
1018
1112
|
/**
|
|
1019
|
-
* Table formatting options
|
|
1113
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
1020
1114
|
*/
|
|
1021
1115
|
interface Tables {
|
|
1022
1116
|
/**
|
|
1023
|
-
*
|
|
1117
|
+
* Remove extra whitespace padding in markdown table cells for more compact output
|
|
1024
1118
|
*/
|
|
1025
1119
|
compact_markdown_tables?: boolean | null;
|
|
1026
1120
|
/**
|
|
1027
|
-
* Separator for multiline content in markdown tables
|
|
1121
|
+
* Separator string for multiline cell content in markdown tables. Example:
|
|
1122
|
+
* '<br>' to preserve line breaks, ' ' to join with spaces
|
|
1028
1123
|
*/
|
|
1029
1124
|
markdown_table_multiline_separator?: string | null;
|
|
1030
1125
|
/**
|
|
1031
|
-
*
|
|
1126
|
+
* Automatically merge tables that span multiple pages into a single table. The
|
|
1127
|
+
* merged table appears on the first page with merged_from_pages metadata
|
|
1032
1128
|
*/
|
|
1033
1129
|
merge_continued_tables?: boolean | null;
|
|
1034
1130
|
/**
|
|
1035
|
-
* Output tables
|
|
1131
|
+
* Output tables as markdown pipe tables instead of HTML <table> tags.
|
|
1132
|
+
* Markdown tables are simpler but cannot represent complex structures like merged
|
|
1133
|
+
* cells
|
|
1036
1134
|
*/
|
|
1037
1135
|
output_tables_as_markdown?: boolean | null;
|
|
1038
1136
|
}
|
|
1039
1137
|
}
|
|
1040
1138
|
/**
|
|
1041
|
-
* Spatial text output options
|
|
1139
|
+
* Spatial text output options for preserving document layout structure
|
|
1042
1140
|
*/
|
|
1043
1141
|
interface SpatialText {
|
|
1044
1142
|
/**
|
|
1045
|
-
* Keep column
|
|
1143
|
+
* Keep multi-column layouts intact instead of linearizing columns into sequential
|
|
1144
|
+
* text. Automatically enabled for non-fast tiers
|
|
1046
1145
|
*/
|
|
1047
1146
|
do_not_unroll_columns?: boolean | null;
|
|
1048
1147
|
/**
|
|
1049
|
-
*
|
|
1148
|
+
* Maintain consistent text column alignment across page boundaries. Automatically
|
|
1149
|
+
* enabled for document-level parsing modes
|
|
1050
1150
|
*/
|
|
1051
1151
|
preserve_layout_alignment_across_pages?: boolean | null;
|
|
1052
1152
|
/**
|
|
1053
|
-
* Include
|
|
1153
|
+
* Include text below the normal size threshold. Useful for footnotes, watermarks,
|
|
1154
|
+
* or fine print that might otherwise be filtered out
|
|
1054
1155
|
*/
|
|
1055
1156
|
preserve_very_small_text?: boolean | null;
|
|
1056
1157
|
}
|
|
1057
1158
|
/**
|
|
1058
|
-
*
|
|
1159
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
1059
1160
|
*/
|
|
1060
1161
|
interface TablesAsSpreadsheet {
|
|
1061
1162
|
/**
|
|
@@ -1063,121 +1164,146 @@ export declare namespace ParsingCreateParams {
|
|
|
1063
1164
|
*/
|
|
1064
1165
|
enable?: boolean | null;
|
|
1065
1166
|
/**
|
|
1066
|
-
* Automatically
|
|
1167
|
+
* Automatically generate descriptive sheet names from table context (headers,
|
|
1168
|
+
* surrounding text) instead of using generic names like 'Table_1'
|
|
1067
1169
|
*/
|
|
1068
1170
|
guess_sheet_name?: boolean;
|
|
1069
1171
|
}
|
|
1070
1172
|
}
|
|
1071
1173
|
/**
|
|
1072
|
-
* Page
|
|
1174
|
+
* Page selection: limit total pages or specify exact pages to process
|
|
1073
1175
|
*/
|
|
1074
1176
|
interface PageRanges {
|
|
1075
1177
|
/**
|
|
1076
|
-
* Maximum number of pages to process
|
|
1178
|
+
* Maximum number of pages to process. Pages are processed in order starting from
|
|
1179
|
+
* page 1. If both max_pages and target_pages are set, target_pages takes
|
|
1180
|
+
* precedence
|
|
1077
1181
|
*/
|
|
1078
1182
|
max_pages?: number | null;
|
|
1079
1183
|
/**
|
|
1080
|
-
*
|
|
1184
|
+
* Comma-separated list of specific pages to process using 1-based indexing.
|
|
1185
|
+
* Supports individual pages and ranges. Examples: '1,3,5' (pages 1, 3, 5), '1-5'
|
|
1186
|
+
* (pages 1 through 5 inclusive), '1,3,5-8,10' (pages 1, 3, 5-8, and 10). Pages are
|
|
1187
|
+
* sorted and deduplicated automatically. Duplicate pages cause an error
|
|
1081
1188
|
*/
|
|
1082
1189
|
target_pages?: string | null;
|
|
1083
1190
|
}
|
|
1084
1191
|
/**
|
|
1085
|
-
* Job
|
|
1192
|
+
* Job execution controls including timeouts and failure thresholds
|
|
1086
1193
|
*/
|
|
1087
1194
|
interface ProcessingControl {
|
|
1088
1195
|
/**
|
|
1089
|
-
*
|
|
1196
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1197
|
+
* partial results
|
|
1090
1198
|
*/
|
|
1091
1199
|
job_failure_conditions?: ProcessingControl.JobFailureConditions;
|
|
1092
1200
|
/**
|
|
1093
|
-
* Timeout
|
|
1201
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1094
1202
|
*/
|
|
1095
1203
|
timeouts?: ProcessingControl.Timeouts;
|
|
1096
1204
|
}
|
|
1097
1205
|
namespace ProcessingControl {
|
|
1098
1206
|
/**
|
|
1099
|
-
*
|
|
1207
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1208
|
+
* partial results
|
|
1100
1209
|
*/
|
|
1101
1210
|
interface JobFailureConditions {
|
|
1102
1211
|
/**
|
|
1103
|
-
* Maximum ratio of pages allowed to fail (0-1)
|
|
1212
|
+
* Maximum ratio of pages allowed to fail before the job fails (0-1). Example: 0.1
|
|
1213
|
+
* means job fails if more than 10% of pages fail. Default is 0.05 (5%)
|
|
1104
1214
|
*/
|
|
1105
1215
|
allowed_page_failure_ratio?: number | null;
|
|
1106
1216
|
/**
|
|
1107
|
-
* Fail job if
|
|
1217
|
+
* Fail the job if a problematic font is detected that may cause incorrect text
|
|
1218
|
+
* extraction. Buggy fonts can produce garbled or missing characters
|
|
1108
1219
|
*/
|
|
1109
1220
|
fail_on_buggy_font?: boolean | null;
|
|
1110
1221
|
/**
|
|
1111
|
-
* Fail job if image
|
|
1222
|
+
* Fail the entire job if any embedded image cannot be extracted. By default, image
|
|
1223
|
+
* extraction errors are logged but don't fail the job
|
|
1112
1224
|
*/
|
|
1113
1225
|
fail_on_image_extraction_error?: boolean | null;
|
|
1114
1226
|
/**
|
|
1115
|
-
* Fail job if image OCR
|
|
1227
|
+
* Fail the entire job if OCR fails on any image. By default, OCR errors result in
|
|
1228
|
+
* empty text for that image
|
|
1116
1229
|
*/
|
|
1117
1230
|
fail_on_image_ocr_error?: boolean | null;
|
|
1118
1231
|
/**
|
|
1119
|
-
* Fail job if markdown
|
|
1232
|
+
* Fail the entire job if markdown cannot be reconstructed for any page. By
|
|
1233
|
+
* default, failed pages use fallback text extraction
|
|
1120
1234
|
*/
|
|
1121
1235
|
fail_on_markdown_reconstruction_error?: boolean | null;
|
|
1122
1236
|
}
|
|
1123
1237
|
/**
|
|
1124
|
-
* Timeout
|
|
1238
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1125
1239
|
*/
|
|
1126
1240
|
interface Timeouts {
|
|
1127
1241
|
/**
|
|
1128
|
-
* Base timeout in seconds (max 30 minutes)
|
|
1242
|
+
* Base timeout for the job in seconds (max 1800 = 30 minutes). This is the minimum
|
|
1243
|
+
* time allowed regardless of document size
|
|
1129
1244
|
*/
|
|
1130
1245
|
base_in_seconds?: number | null;
|
|
1131
1246
|
/**
|
|
1132
|
-
* Additional timeout per page in seconds (max 5 minutes)
|
|
1247
|
+
* Additional timeout per page in seconds (max 300 = 5 minutes). Total timeout =
|
|
1248
|
+
* base + (this value × page count)
|
|
1133
1249
|
*/
|
|
1134
1250
|
extra_time_per_page_in_seconds?: number | null;
|
|
1135
1251
|
}
|
|
1136
1252
|
}
|
|
1137
1253
|
/**
|
|
1138
|
-
*
|
|
1254
|
+
* Document processing options including OCR, table extraction, and chart parsing
|
|
1139
1255
|
*/
|
|
1140
1256
|
interface ProcessingOptions {
|
|
1141
1257
|
/**
|
|
1142
|
-
*
|
|
1258
|
+
* Use aggressive heuristics to detect table boundaries, even without visible
|
|
1259
|
+
* borders. Useful for documents with borderless or complex tables
|
|
1143
1260
|
*/
|
|
1144
1261
|
aggressive_table_extraction?: boolean | null;
|
|
1145
1262
|
/**
|
|
1146
|
-
*
|
|
1263
|
+
* Conditional processing rules that apply different parsing options based on page
|
|
1264
|
+
* content, document structure, or filename patterns. Each entry defines trigger
|
|
1265
|
+
* conditions and the parsing configuration to apply when triggered
|
|
1147
1266
|
*/
|
|
1148
1267
|
auto_mode_configuration?: Array<ProcessingOptions.AutoModeConfiguration> | null;
|
|
1149
1268
|
/**
|
|
1150
|
-
* Cost optimizer
|
|
1269
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
1270
|
+
*
|
|
1271
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
1272
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
1273
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
1151
1274
|
*/
|
|
1152
1275
|
cost_optimizer?: ProcessingOptions.CostOptimizer | null;
|
|
1153
1276
|
/**
|
|
1154
|
-
*
|
|
1155
|
-
* table handling
|
|
1277
|
+
* Disable automatic heuristics including outlined table extraction and adaptive
|
|
1278
|
+
* long table handling. Use when heuristics produce incorrect results
|
|
1156
1279
|
*/
|
|
1157
1280
|
disable_heuristics?: boolean | null;
|
|
1158
1281
|
/**
|
|
1159
|
-
* Options for ignoring specific text types
|
|
1282
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
1160
1283
|
*/
|
|
1161
1284
|
ignore?: ProcessingOptions.Ignore;
|
|
1162
1285
|
/**
|
|
1163
|
-
* OCR configuration
|
|
1286
|
+
* OCR configuration including language detection settings
|
|
1164
1287
|
*/
|
|
1165
1288
|
ocr_parameters?: ProcessingOptions.OcrParameters;
|
|
1166
1289
|
/**
|
|
1167
|
-
* Enable
|
|
1290
|
+
* Enable AI-powered chart analysis. Modes: 'efficient' (fast, lower cost),
|
|
1291
|
+
* 'agentic' (balanced), 'agentic_plus' (highest accuracy). Automatically enables
|
|
1292
|
+
* extract_layout and precise_bounding_box when set
|
|
1168
1293
|
*/
|
|
1169
1294
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
1170
1295
|
}
|
|
1171
1296
|
namespace ProcessingOptions {
|
|
1172
1297
|
/**
|
|
1173
|
-
* A single
|
|
1298
|
+
* A single auto mode rule with trigger conditions and parsing configuration.
|
|
1299
|
+
*
|
|
1300
|
+
* Auto mode allows conditional parsing where different configurations are applied
|
|
1301
|
+
* based on page content, structure, or filename. When triggers match, the
|
|
1302
|
+
* parsing_conf overrides default settings for that page.
|
|
1174
1303
|
*/
|
|
1175
1304
|
interface AutoModeConfiguration {
|
|
1176
1305
|
/**
|
|
1177
|
-
*
|
|
1178
|
-
*
|
|
1179
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1180
|
-
* the V1 format expected by the llamaparse worker.
|
|
1306
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1181
1307
|
*/
|
|
1182
1308
|
parsing_conf: AutoModeConfiguration.ParsingConf;
|
|
1183
1309
|
/**
|
|
@@ -1317,17 +1443,14 @@ export declare namespace ParsingCreateParams {
|
|
|
1317
1443
|
*/
|
|
1318
1444
|
text_in_page?: string | null;
|
|
1319
1445
|
/**
|
|
1320
|
-
* How to combine multiple trigger conditions: 'and' (all must match,
|
|
1321
|
-
* 'or' (any can
|
|
1446
|
+
* How to combine multiple trigger conditions: 'and' (all conditions must match,
|
|
1447
|
+
* this is the default) or 'or' (any single condition can trigger)
|
|
1322
1448
|
*/
|
|
1323
1449
|
trigger_mode?: string | null;
|
|
1324
1450
|
}
|
|
1325
1451
|
namespace AutoModeConfiguration {
|
|
1326
1452
|
/**
|
|
1327
|
-
*
|
|
1328
|
-
*
|
|
1329
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1330
|
-
* the V1 format expected by the llamaparse worker.
|
|
1453
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1331
1454
|
*/
|
|
1332
1455
|
interface ParsingConf {
|
|
1333
1456
|
/**
|
|
@@ -1343,7 +1466,7 @@ export declare namespace ParsingCreateParams {
|
|
|
1343
1466
|
*/
|
|
1344
1467
|
crop_box?: ParsingConf.CropBox | null;
|
|
1345
1468
|
/**
|
|
1346
|
-
* Custom
|
|
1469
|
+
* Custom AI instructions for matched pages. Overrides the base custom_prompt
|
|
1347
1470
|
*/
|
|
1348
1471
|
custom_prompt?: string | null;
|
|
1349
1472
|
/**
|
|
@@ -1379,13 +1502,13 @@ export declare namespace ParsingCreateParams {
|
|
|
1379
1502
|
*/
|
|
1380
1503
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
1381
1504
|
/**
|
|
1382
|
-
*
|
|
1505
|
+
* Override the parsing tier for matched pages. Must be paired with version
|
|
1383
1506
|
*/
|
|
1384
1507
|
tier?: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus' | null;
|
|
1385
1508
|
/**
|
|
1386
|
-
*
|
|
1509
|
+
* Tier version when overriding tier. Required when tier is specified
|
|
1387
1510
|
*/
|
|
1388
|
-
version?: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | '2026-03-02' | '2026-03-03' | '2026-03-04' | 'latest' | (string & {}) | null;
|
|
1511
|
+
version?: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | '2026-03-02' | '2026-03-03' | '2026-03-04' | '2026-03-05' | '2026-03-09' | '2026-03-10' | '2026-03-11' | '2026-03-12' | '2026-03-17' | '2026-03-19' | '2026-03-20' | '2026-03-22' | '2026-03-23' | '2026-03-24' | '2026-03-25' | '2026-03-26' | '2026-03-27' | '2026-03-30' | '2026-03-31' | 'latest' | (string & {}) | null;
|
|
1389
1512
|
}
|
|
1390
1513
|
namespace ParsingConf {
|
|
1391
1514
|
/**
|
|
@@ -1455,60 +1578,91 @@ export declare namespace ParsingCreateParams {
|
|
|
1455
1578
|
}
|
|
1456
1579
|
}
|
|
1457
1580
|
/**
|
|
1458
|
-
* Cost optimizer
|
|
1581
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
1582
|
+
*
|
|
1583
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
1584
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
1585
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
1459
1586
|
*/
|
|
1460
1587
|
interface CostOptimizer {
|
|
1461
1588
|
/**
|
|
1462
|
-
*
|
|
1463
|
-
*
|
|
1589
|
+
* Enable cost-optimized parsing. Routes simpler pages to faster processing while
|
|
1590
|
+
* complex pages use full AI analysis. May reduce speed on some documents.
|
|
1591
|
+
* IMPORTANT: Only available with 'agentic' or 'agentic_plus' tiers
|
|
1464
1592
|
*/
|
|
1465
1593
|
enable?: boolean | null;
|
|
1466
1594
|
}
|
|
1467
1595
|
/**
|
|
1468
|
-
* Options for ignoring specific text types
|
|
1596
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
1469
1597
|
*/
|
|
1470
1598
|
interface Ignore {
|
|
1471
1599
|
/**
|
|
1472
|
-
*
|
|
1600
|
+
* Skip text rotated at an angle (not horizontal/vertical). Useful for ignoring
|
|
1601
|
+
* watermarks or decorative angled text
|
|
1473
1602
|
*/
|
|
1474
1603
|
ignore_diagonal_text?: boolean | null;
|
|
1475
1604
|
/**
|
|
1476
|
-
*
|
|
1605
|
+
* Skip text marked as hidden in the document structure. Some PDFs contain
|
|
1606
|
+
* invisible text layers used for accessibility or search indexing
|
|
1477
1607
|
*/
|
|
1478
1608
|
ignore_hidden_text?: boolean | null;
|
|
1479
1609
|
/**
|
|
1480
|
-
*
|
|
1610
|
+
* Skip OCR text extraction from embedded images. Use when images contain
|
|
1611
|
+
* irrelevant text (watermarks, logos) that shouldn't be in the output
|
|
1481
1612
|
*/
|
|
1482
1613
|
ignore_text_in_image?: boolean | null;
|
|
1483
1614
|
}
|
|
1484
1615
|
/**
|
|
1485
|
-
* OCR configuration
|
|
1616
|
+
* OCR configuration including language detection settings
|
|
1486
1617
|
*/
|
|
1487
1618
|
interface OcrParameters {
|
|
1488
1619
|
/**
|
|
1489
|
-
*
|
|
1620
|
+
* Languages to use for OCR text recognition. Specify multiple languages if
|
|
1621
|
+
* document contains mixed-language content. Order matters - put primary language
|
|
1622
|
+
* first. Example: ['en', 'es'] for English with Spanish
|
|
1490
1623
|
*/
|
|
1491
1624
|
languages?: Array<ParsingAPI.ParsingLanguages> | null;
|
|
1492
1625
|
}
|
|
1493
1626
|
}
|
|
1627
|
+
/**
|
|
1628
|
+
* Webhook configuration for receiving parsing job notifications.
|
|
1629
|
+
*
|
|
1630
|
+
* Webhooks are called when specified events occur during job processing. Configure
|
|
1631
|
+
* multiple webhook configurations to send to different endpoints.
|
|
1632
|
+
*/
|
|
1494
1633
|
interface WebhookConfiguration {
|
|
1495
1634
|
/**
|
|
1496
|
-
*
|
|
1635
|
+
* Events that trigger this webhook. Options: 'parse.success' (job completed),
|
|
1636
|
+
* 'parse.failure' (job failed), 'parse.partial' (some pages failed). If not
|
|
1637
|
+
* specified, webhook fires for all events
|
|
1497
1638
|
*/
|
|
1498
1639
|
webhook_events?: Array<string> | null;
|
|
1499
1640
|
/**
|
|
1500
|
-
* Custom headers to include in webhook requests
|
|
1641
|
+
* Custom HTTP headers to include in webhook requests. Use for authentication
|
|
1642
|
+
* tokens or custom routing. Example: {'Authorization': 'Bearer xyz'}
|
|
1501
1643
|
*/
|
|
1502
1644
|
webhook_headers?: {
|
|
1503
1645
|
[key: string]: unknown;
|
|
1504
1646
|
} | null;
|
|
1505
1647
|
/**
|
|
1506
|
-
*
|
|
1648
|
+
* HTTPS URL to receive webhook POST requests. Must be publicly accessible
|
|
1507
1649
|
*/
|
|
1508
1650
|
webhook_url?: string | null;
|
|
1509
1651
|
}
|
|
1510
1652
|
}
|
|
1511
1653
|
export interface ParsingListParams extends PaginatedCursorParams {
|
|
1654
|
+
/**
|
|
1655
|
+
* Include items created at or after this timestamp (inclusive)
|
|
1656
|
+
*/
|
|
1657
|
+
created_at_on_or_after?: string | null;
|
|
1658
|
+
/**
|
|
1659
|
+
* Include items created at or before this timestamp (inclusive)
|
|
1660
|
+
*/
|
|
1661
|
+
created_at_on_or_before?: string | null;
|
|
1662
|
+
/**
|
|
1663
|
+
* Filter by specific job IDs
|
|
1664
|
+
*/
|
|
1665
|
+
job_ids?: Array<string> | null;
|
|
1512
1666
|
organization_id?: string | null;
|
|
1513
1667
|
project_id?: string | null;
|
|
1514
1668
|
/**
|
|
@@ -1518,10 +1672,10 @@ export interface ParsingListParams extends PaginatedCursorParams {
|
|
|
1518
1672
|
}
|
|
1519
1673
|
export interface ParsingGetParams {
|
|
1520
1674
|
/**
|
|
1521
|
-
* Fields to include: text, markdown, items, metadata,
|
|
1522
|
-
* markdown_content_metadata, items_content_metadata,
|
|
1523
|
-
* xlsx_content_metadata, output_pdf_content_metadata,
|
|
1524
|
-
* Metadata fields include presigned URLs.
|
|
1675
|
+
* Fields to include: text, markdown, items, metadata, job_metadata,
|
|
1676
|
+
* text_content_metadata, markdown_content_metadata, items_content_metadata,
|
|
1677
|
+
* metadata_content_metadata, xlsx_content_metadata, output_pdf_content_metadata,
|
|
1678
|
+
* images_content_metadata. Metadata fields include presigned URLs.
|
|
1525
1679
|
*/
|
|
1526
1680
|
expand?: Array<string>;
|
|
1527
1681
|
/**
|