@llamaindex/llama-cloud 1.7.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -0
- package/README.md +10 -8
- package/client.d.mts +9 -8
- package/client.d.mts.map +1 -1
- package/client.d.ts +9 -8
- package/client.d.ts.map +1 -1
- package/client.js +16 -12
- package/client.js.map +1 -1
- package/client.mjs +16 -12
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +0 -23
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +0 -23
- package/core/pagination.d.ts.map +1 -1
- package/core/pagination.js +1 -32
- package/core/pagination.js.map +1 -1
- package/core/pagination.mjs +0 -30
- package/core/pagination.mjs.map +1 -1
- package/package.json +12 -1
- package/resources/beta/batch/batch.d.mts +55 -30
- package/resources/beta/batch/batch.d.mts.map +1 -1
- package/resources/beta/batch/batch.d.ts +55 -30
- package/resources/beta/batch/batch.d.ts.map +1 -1
- package/resources/beta/batch/batch.js +14 -11
- package/resources/beta/batch/batch.js.map +1 -1
- package/resources/beta/batch/batch.mjs +14 -11
- package/resources/beta/batch/batch.mjs.map +1 -1
- package/resources/beta/batch/job-items.d.mts +36 -13
- package/resources/beta/batch/job-items.d.mts.map +1 -1
- package/resources/beta/batch/job-items.d.ts +36 -13
- package/resources/beta/batch/job-items.d.ts.map +1 -1
- package/resources/beta/batch/job-items.js +6 -8
- package/resources/beta/batch/job-items.js.map +1 -1
- package/resources/beta/batch/job-items.mjs +6 -8
- package/resources/beta/batch/job-items.mjs.map +1 -1
- package/resources/beta/sheets.d.mts +16 -0
- package/resources/beta/sheets.d.mts.map +1 -1
- package/resources/beta/sheets.d.ts +16 -0
- package/resources/beta/sheets.d.ts.map +1 -1
- package/resources/beta/split.d.mts +60 -16
- package/resources/beta/split.d.mts.map +1 -1
- package/resources/beta/split.d.ts +60 -16
- package/resources/beta/split.d.ts.map +1 -1
- package/resources/beta/split.js.map +1 -1
- package/resources/beta/split.mjs.map +1 -1
- package/resources/classifier/jobs.d.mts +20 -3
- package/resources/classifier/jobs.d.mts.map +1 -1
- package/resources/classifier/jobs.d.ts +20 -3
- package/resources/classifier/jobs.d.ts.map +1 -1
- package/resources/classifier/jobs.js +8 -0
- package/resources/classifier/jobs.js.map +1 -1
- package/resources/classifier/jobs.mjs +8 -0
- package/resources/classifier/jobs.mjs.map +1 -1
- package/resources/classify.d.mts +373 -0
- package/resources/classify.d.mts.map +1 -0
- package/resources/classify.d.ts +373 -0
- package/resources/classify.d.ts.map +1 -0
- package/resources/classify.js +54 -0
- package/resources/classify.js.map +1 -0
- package/resources/classify.mjs +50 -0
- package/resources/classify.mjs.map +1 -0
- package/resources/extract.d.mts +1588 -0
- package/resources/extract.d.mts.map +1 -0
- package/resources/extract.d.ts +1588 -0
- package/resources/extract.d.ts.map +1 -0
- package/resources/extract.js +217 -0
- package/resources/extract.js.map +1 -0
- package/resources/extract.mjs +213 -0
- package/resources/extract.mjs.map +1 -0
- package/resources/files.d.mts +52 -38
- package/resources/files.d.mts.map +1 -1
- package/resources/files.d.ts +52 -38
- package/resources/files.d.ts.map +1 -1
- package/resources/files.js +10 -9
- package/resources/files.js.map +1 -1
- package/resources/files.mjs +10 -9
- package/resources/files.mjs.map +1 -1
- package/resources/index.d.mts +2 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +2 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +5 -3
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -1
- package/resources/index.mjs.map +1 -1
- package/resources/parsing.d.mts +324 -138
- package/resources/parsing.d.mts.map +1 -1
- package/resources/parsing.d.ts +324 -138
- package/resources/parsing.d.ts.map +1 -1
- package/resources/parsing.js +30 -4
- package/resources/parsing.js.map +1 -1
- package/resources/parsing.mjs +30 -4
- package/resources/parsing.mjs.map +1 -1
- package/resources/pipelines/pipelines.d.mts +59 -13
- package/resources/pipelines/pipelines.d.mts.map +1 -1
- package/resources/pipelines/pipelines.d.ts +59 -13
- package/resources/pipelines/pipelines.d.ts.map +1 -1
- package/resources/pipelines/pipelines.js +24 -9
- package/resources/pipelines/pipelines.js.map +1 -1
- package/resources/pipelines/pipelines.mjs +24 -9
- package/resources/pipelines/pipelines.mjs.map +1 -1
- package/resources/pipelines/sync.d.mts +5 -3
- package/resources/pipelines/sync.d.mts.map +1 -1
- package/resources/pipelines/sync.d.ts +5 -3
- package/resources/pipelines/sync.d.ts.map +1 -1
- package/resources/pipelines/sync.js +5 -3
- package/resources/pipelines/sync.js.map +1 -1
- package/resources/pipelines/sync.mjs +5 -3
- package/resources/pipelines/sync.mjs.map +1 -1
- package/src/client.ts +86 -22
- package/src/core/pagination.ts +0 -71
- package/src/resources/beta/batch/batch.ts +75 -30
- package/src/resources/beta/batch/job-items.ts +56 -13
- package/src/resources/beta/sheets.ts +20 -0
- package/src/resources/beta/split.ts +70 -17
- package/src/resources/classifier/jobs.ts +20 -3
- package/src/resources/classify.ts +486 -0
- package/src/resources/extract.ts +2045 -0
- package/src/resources/files.ts +52 -38
- package/src/resources/index.ts +35 -1
- package/src/resources/parsing.ts +367 -136
- package/src/resources/pipelines/pipelines.ts +80 -14
- package/src/resources/pipelines/sync.ts +5 -3
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.mts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.ts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.js +0 -56
- package/resources/extraction/extraction-agents/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.mjs +0 -51
- package/resources/extraction/extraction-agents/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction-agents/index.d.mts +0 -3
- package/resources/extraction/extraction-agents/index.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/index.d.ts +0 -3
- package/resources/extraction/extraction-agents/index.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/index.js +0 -9
- package/resources/extraction/extraction-agents/index.js.map +0 -1
- package/resources/extraction/extraction-agents/index.mjs +0 -4
- package/resources/extraction/extraction-agents/index.mjs.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.mts +0 -75
- package/resources/extraction/extraction-agents/schema.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.ts +0 -75
- package/resources/extraction/extraction-agents/schema.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/schema.js +0 -28
- package/resources/extraction/extraction-agents/schema.js.map +0 -1
- package/resources/extraction/extraction-agents/schema.mjs +0 -24
- package/resources/extraction/extraction-agents/schema.mjs.map +0 -1
- package/resources/extraction/extraction-agents.d.mts +0 -2
- package/resources/extraction/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents.d.ts +0 -2
- package/resources/extraction/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents.js +0 -6
- package/resources/extraction/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents.mjs +0 -3
- package/resources/extraction/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction.d.mts +0 -118
- package/resources/extraction/extraction.d.mts.map +0 -1
- package/resources/extraction/extraction.d.ts +0 -118
- package/resources/extraction/extraction.d.ts.map +0 -1
- package/resources/extraction/extraction.js +0 -91
- package/resources/extraction/extraction.js.map +0 -1
- package/resources/extraction/extraction.mjs +0 -86
- package/resources/extraction/extraction.mjs.map +0 -1
- package/resources/extraction/index.d.mts +0 -5
- package/resources/extraction/index.d.mts.map +0 -1
- package/resources/extraction/index.d.ts +0 -5
- package/resources/extraction/index.d.ts.map +0 -1
- package/resources/extraction/index.js +0 -13
- package/resources/extraction/index.js.map +0 -1
- package/resources/extraction/index.mjs +0 -6
- package/resources/extraction/index.mjs.map +0 -1
- package/resources/extraction/jobs.d.mts +0 -280
- package/resources/extraction/jobs.d.mts.map +0 -1
- package/resources/extraction/jobs.d.ts +0 -280
- package/resources/extraction/jobs.d.ts.map +0 -1
- package/resources/extraction/jobs.js +0 -179
- package/resources/extraction/jobs.js.map +0 -1
- package/resources/extraction/jobs.mjs +0 -175
- package/resources/extraction/jobs.mjs.map +0 -1
- package/resources/extraction/runs.d.mts +0 -198
- package/resources/extraction/runs.d.mts.map +0 -1
- package/resources/extraction/runs.d.ts +0 -198
- package/resources/extraction/runs.d.ts.map +0 -1
- package/resources/extraction/runs.js +0 -42
- package/resources/extraction/runs.js.map +0 -1
- package/resources/extraction/runs.mjs +0 -38
- package/resources/extraction/runs.mjs.map +0 -1
- package/resources/extraction.d.mts +0 -2
- package/resources/extraction.d.mts.map +0 -1
- package/resources/extraction.d.ts +0 -2
- package/resources/extraction.d.ts.map +0 -1
- package/resources/extraction.js +0 -6
- package/resources/extraction.js.map +0 -1
- package/resources/extraction.mjs +0 -3
- package/resources/extraction.mjs.map +0 -1
- package/src/resources/extraction/extraction-agents/extraction-agents.ts +0 -196
- package/src/resources/extraction/extraction-agents/index.ts +0 -18
- package/src/resources/extraction/extraction-agents/schema.ts +0 -100
- package/src/resources/extraction/extraction-agents.ts +0 -3
- package/src/resources/extraction/extraction.ts +0 -224
- package/src/resources/extraction/index.ts +0 -34
- package/src/resources/extraction/jobs.ts +0 -414
- package/src/resources/extraction/runs.ts +0 -315
- package/src/resources/extraction.ts +0 -3
package/src/resources/parsing.ts
CHANGED
|
@@ -12,7 +12,21 @@ import { pollUntilComplete, PollingOptions, DEFAULT_TIMEOUT } from '../core/poll
|
|
|
12
12
|
|
|
13
13
|
export class Parsing extends APIResource {
|
|
14
14
|
/**
|
|
15
|
-
* Parse a file by file ID
|
|
15
|
+
* Parse a file by file ID or URL.
|
|
16
|
+
*
|
|
17
|
+
* Provide either `file_id` (a previously uploaded file) or `source_url` (a
|
|
18
|
+
* publicly accessible URL). Configure parsing with options like `tier`,
|
|
19
|
+
* `target_pages`, and `lang`.
|
|
20
|
+
*
|
|
21
|
+
* ## Tiers
|
|
22
|
+
*
|
|
23
|
+
* - `fast` — rule-based, cheapest, no AI
|
|
24
|
+
* - `cost_effective` — balanced speed and quality
|
|
25
|
+
* - `agentic` — full AI-powered parsing
|
|
26
|
+
* - `agentic_plus` — premium AI with specialized features
|
|
27
|
+
*
|
|
28
|
+
* The job runs asynchronously. Poll `GET /parse/{job_id}` with `expand=text` or
|
|
29
|
+
* `expand=markdown` to retrieve results.
|
|
16
30
|
*/
|
|
17
31
|
create(
|
|
18
32
|
params: ParsingCreateParams & { upload_file?: Uploadable },
|
|
@@ -43,8 +57,10 @@ export class Parsing extends APIResource {
|
|
|
43
57
|
}
|
|
44
58
|
|
|
45
59
|
/**
|
|
46
|
-
* List parse jobs for the current project
|
|
47
|
-
*
|
|
60
|
+
* List parse jobs for the current project.
|
|
61
|
+
*
|
|
62
|
+
* Filter by `status` or creation date range. Results are paginated — use
|
|
63
|
+
* `page_token` from the response to fetch subsequent pages.
|
|
48
64
|
*/
|
|
49
65
|
list(
|
|
50
66
|
query: ParsingListParams | null | undefined = {},
|
|
@@ -57,7 +73,17 @@ export class Parsing extends APIResource {
|
|
|
57
73
|
}
|
|
58
74
|
|
|
59
75
|
/**
|
|
60
|
-
* Retrieve parse job with optional content
|
|
76
|
+
* Retrieve a parse job with optional expanded content.
|
|
77
|
+
*
|
|
78
|
+
* By default returns job metadata only. Use `expand` to include parsed content:
|
|
79
|
+
*
|
|
80
|
+
* - `text` — plain text output
|
|
81
|
+
* - `markdown` — markdown output
|
|
82
|
+
* - `items` — structured page-by-page output
|
|
83
|
+
* - `job_metadata` — usage and processing details
|
|
84
|
+
*
|
|
85
|
+
* Content metadata fields (e.g. `text_content_metadata`) return presigned URLs for
|
|
86
|
+
* downloading large results.
|
|
61
87
|
*/
|
|
62
88
|
get(
|
|
63
89
|
jobID: string,
|
|
@@ -599,18 +625,27 @@ export type LlamaParseSupportedFileExtensions =
|
|
|
599
625
|
| '.webm';
|
|
600
626
|
|
|
601
627
|
/**
|
|
602
|
-
*
|
|
628
|
+
* A parse job (v1).
|
|
603
629
|
*/
|
|
604
630
|
export interface ParsingJob {
|
|
631
|
+
/**
|
|
632
|
+
* Unique parse job identifier
|
|
633
|
+
*/
|
|
605
634
|
id: string;
|
|
606
635
|
|
|
607
636
|
/**
|
|
608
|
-
*
|
|
637
|
+
* Current job status
|
|
609
638
|
*/
|
|
610
639
|
status: StatusEnum;
|
|
611
640
|
|
|
641
|
+
/**
|
|
642
|
+
* Machine-readable error code when failed
|
|
643
|
+
*/
|
|
612
644
|
error_code?: string | null;
|
|
613
645
|
|
|
646
|
+
/**
|
|
647
|
+
* Human-readable error details when failed
|
|
648
|
+
*/
|
|
614
649
|
error_message?: string | null;
|
|
615
650
|
}
|
|
616
651
|
|
|
@@ -811,11 +846,11 @@ export interface TextItem {
|
|
|
811
846
|
}
|
|
812
847
|
|
|
813
848
|
/**
|
|
814
|
-
*
|
|
849
|
+
* A parse job.
|
|
815
850
|
*/
|
|
816
851
|
export interface ParsingCreateResponse {
|
|
817
852
|
/**
|
|
818
|
-
* Unique
|
|
853
|
+
* Unique parse job identifier
|
|
819
854
|
*/
|
|
820
855
|
id: string;
|
|
821
856
|
|
|
@@ -825,7 +860,7 @@ export interface ParsingCreateResponse {
|
|
|
825
860
|
project_id: string;
|
|
826
861
|
|
|
827
862
|
/**
|
|
828
|
-
* Current
|
|
863
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
829
864
|
*/
|
|
830
865
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
831
866
|
|
|
@@ -835,15 +870,20 @@ export interface ParsingCreateResponse {
|
|
|
835
870
|
created_at?: string | null;
|
|
836
871
|
|
|
837
872
|
/**
|
|
838
|
-
* Error
|
|
873
|
+
* Error details when status is FAILED
|
|
839
874
|
*/
|
|
840
875
|
error_message?: string | null;
|
|
841
876
|
|
|
842
877
|
/**
|
|
843
|
-
*
|
|
878
|
+
* Optional display name for this parse job
|
|
844
879
|
*/
|
|
845
880
|
name?: string | null;
|
|
846
881
|
|
|
882
|
+
/**
|
|
883
|
+
* Parsing tier used for this job
|
|
884
|
+
*/
|
|
885
|
+
tier?: string | null;
|
|
886
|
+
|
|
847
887
|
/**
|
|
848
888
|
* Update datetime
|
|
849
889
|
*/
|
|
@@ -851,11 +891,11 @@ export interface ParsingCreateResponse {
|
|
|
851
891
|
}
|
|
852
892
|
|
|
853
893
|
/**
|
|
854
|
-
*
|
|
894
|
+
* A parse job.
|
|
855
895
|
*/
|
|
856
896
|
export interface ParsingListResponse {
|
|
857
897
|
/**
|
|
858
|
-
* Unique
|
|
898
|
+
* Unique parse job identifier
|
|
859
899
|
*/
|
|
860
900
|
id: string;
|
|
861
901
|
|
|
@@ -865,7 +905,7 @@ export interface ParsingListResponse {
|
|
|
865
905
|
project_id: string;
|
|
866
906
|
|
|
867
907
|
/**
|
|
868
|
-
* Current
|
|
908
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
869
909
|
*/
|
|
870
910
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
871
911
|
|
|
@@ -875,15 +915,20 @@ export interface ParsingListResponse {
|
|
|
875
915
|
created_at?: string | null;
|
|
876
916
|
|
|
877
917
|
/**
|
|
878
|
-
* Error
|
|
918
|
+
* Error details when status is FAILED
|
|
879
919
|
*/
|
|
880
920
|
error_message?: string | null;
|
|
881
921
|
|
|
882
922
|
/**
|
|
883
|
-
*
|
|
923
|
+
* Optional display name for this parse job
|
|
884
924
|
*/
|
|
885
925
|
name?: string | null;
|
|
886
926
|
|
|
927
|
+
/**
|
|
928
|
+
* Parsing tier used for this job
|
|
929
|
+
*/
|
|
930
|
+
tier?: string | null;
|
|
931
|
+
|
|
887
932
|
/**
|
|
888
933
|
* Update datetime
|
|
889
934
|
*/
|
|
@@ -912,6 +957,11 @@ export interface ParsingGetResponse {
|
|
|
912
957
|
*/
|
|
913
958
|
items?: ParsingGetResponse.Items | null;
|
|
914
959
|
|
|
960
|
+
/**
|
|
961
|
+
* Job execution metadata (if requested)
|
|
962
|
+
*/
|
|
963
|
+
job_metadata?: { [key: string]: unknown } | null;
|
|
964
|
+
|
|
915
965
|
/**
|
|
916
966
|
* Markdown result (if requested)
|
|
917
967
|
*/
|
|
@@ -951,7 +1001,7 @@ export namespace ParsingGetResponse {
|
|
|
951
1001
|
*/
|
|
952
1002
|
export interface Job {
|
|
953
1003
|
/**
|
|
954
|
-
* Unique
|
|
1004
|
+
* Unique parse job identifier
|
|
955
1005
|
*/
|
|
956
1006
|
id: string;
|
|
957
1007
|
|
|
@@ -961,7 +1011,7 @@ export namespace ParsingGetResponse {
|
|
|
961
1011
|
project_id: string;
|
|
962
1012
|
|
|
963
1013
|
/**
|
|
964
|
-
* Current
|
|
1014
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
965
1015
|
*/
|
|
966
1016
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
967
1017
|
|
|
@@ -971,15 +1021,20 @@ export namespace ParsingGetResponse {
|
|
|
971
1021
|
created_at?: string | null;
|
|
972
1022
|
|
|
973
1023
|
/**
|
|
974
|
-
* Error
|
|
1024
|
+
* Error details when status is FAILED
|
|
975
1025
|
*/
|
|
976
1026
|
error_message?: string | null;
|
|
977
1027
|
|
|
978
1028
|
/**
|
|
979
|
-
*
|
|
1029
|
+
* Optional display name for this parse job
|
|
980
1030
|
*/
|
|
981
1031
|
name?: string | null;
|
|
982
1032
|
|
|
1033
|
+
/**
|
|
1034
|
+
* Parsing tier used for this job
|
|
1035
|
+
*/
|
|
1036
|
+
tier?: string | null;
|
|
1037
|
+
|
|
983
1038
|
/**
|
|
984
1039
|
* Update datetime
|
|
985
1040
|
*/
|
|
@@ -1016,6 +1071,17 @@ export namespace ParsingGetResponse {
|
|
|
1016
1071
|
*/
|
|
1017
1072
|
index: number;
|
|
1018
1073
|
|
|
1074
|
+
/**
|
|
1075
|
+
* Bounding box for an image on its page.
|
|
1076
|
+
*/
|
|
1077
|
+
bbox?: Image.Bbox | null;
|
|
1078
|
+
|
|
1079
|
+
/**
|
|
1080
|
+
* Image category: 'screenshot' (full page), 'embedded' (images in document), or
|
|
1081
|
+
* 'layout' (cropped from layout detection)
|
|
1082
|
+
*/
|
|
1083
|
+
category?: 'screenshot' | 'embedded' | 'layout' | null;
|
|
1084
|
+
|
|
1019
1085
|
/**
|
|
1020
1086
|
* MIME type of the image
|
|
1021
1087
|
*/
|
|
@@ -1031,6 +1097,33 @@ export namespace ParsingGetResponse {
|
|
|
1031
1097
|
*/
|
|
1032
1098
|
size_bytes?: number | null;
|
|
1033
1099
|
}
|
|
1100
|
+
|
|
1101
|
+
export namespace Image {
|
|
1102
|
+
/**
|
|
1103
|
+
* Bounding box for an image on its page.
|
|
1104
|
+
*/
|
|
1105
|
+
export interface Bbox {
|
|
1106
|
+
/**
|
|
1107
|
+
* Height of the bounding box
|
|
1108
|
+
*/
|
|
1109
|
+
h: number;
|
|
1110
|
+
|
|
1111
|
+
/**
|
|
1112
|
+
* Width of the bounding box
|
|
1113
|
+
*/
|
|
1114
|
+
w: number;
|
|
1115
|
+
|
|
1116
|
+
/**
|
|
1117
|
+
* X coordinate of the bounding box
|
|
1118
|
+
*/
|
|
1119
|
+
x: number;
|
|
1120
|
+
|
|
1121
|
+
/**
|
|
1122
|
+
* Y coordinate of the bounding box
|
|
1123
|
+
*/
|
|
1124
|
+
y: number;
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1034
1127
|
}
|
|
1035
1128
|
|
|
1036
1129
|
/**
|
|
@@ -1259,12 +1352,15 @@ export namespace ParsingGetResponse {
|
|
|
1259
1352
|
|
|
1260
1353
|
export interface ParsingCreateParams {
|
|
1261
1354
|
/**
|
|
1262
|
-
* Body param:
|
|
1355
|
+
* Body param: Parsing tier: 'fast' (rule-based, cheapest), 'cost_effective'
|
|
1356
|
+
* (balanced), 'agentic' (AI-powered with custom prompts), or 'agentic_plus'
|
|
1357
|
+
* (premium AI with highest accuracy)
|
|
1263
1358
|
*/
|
|
1264
1359
|
tier: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus';
|
|
1265
1360
|
|
|
1266
1361
|
/**
|
|
1267
|
-
* Body param:
|
|
1362
|
+
* Body param: Tier version. Use 'latest' for the current stable version, or
|
|
1363
|
+
* specify a specific version (e.g., '1.0', '2.0') for reproducible results
|
|
1268
1364
|
*/
|
|
1269
1365
|
version:
|
|
1270
1366
|
| '2025-12-11'
|
|
@@ -1283,6 +1379,23 @@ export interface ParsingCreateParams {
|
|
|
1283
1379
|
| '2026-02-20'
|
|
1284
1380
|
| '2026-02-24'
|
|
1285
1381
|
| '2026-02-26'
|
|
1382
|
+
| '2026-03-02'
|
|
1383
|
+
| '2026-03-03'
|
|
1384
|
+
| '2026-03-04'
|
|
1385
|
+
| '2026-03-05'
|
|
1386
|
+
| '2026-03-09'
|
|
1387
|
+
| '2026-03-10'
|
|
1388
|
+
| '2026-03-11'
|
|
1389
|
+
| '2026-03-12'
|
|
1390
|
+
| '2026-03-17'
|
|
1391
|
+
| '2026-03-19'
|
|
1392
|
+
| '2026-03-20'
|
|
1393
|
+
| '2026-03-22'
|
|
1394
|
+
| '2026-03-23'
|
|
1395
|
+
| '2026-03-24'
|
|
1396
|
+
| '2026-03-25'
|
|
1397
|
+
| '2026-03-26'
|
|
1398
|
+
| '2026-03-27'
|
|
1286
1399
|
| 'latest'
|
|
1287
1400
|
| (string & {});
|
|
1288
1401
|
|
|
@@ -1297,296 +1410,341 @@ export interface ParsingCreateParams {
|
|
|
1297
1410
|
project_id?: string | null;
|
|
1298
1411
|
|
|
1299
1412
|
/**
|
|
1300
|
-
* Body param: Options for
|
|
1413
|
+
* Body param: Options for AI-powered parsing tiers (cost_effective, agentic,
|
|
1414
|
+
* agentic_plus).
|
|
1415
|
+
*
|
|
1416
|
+
* These options customize how the AI processes and interprets document content.
|
|
1417
|
+
* Only applicable when using non-fast tiers.
|
|
1301
1418
|
*/
|
|
1302
1419
|
agentic_options?: ParsingCreateParams.AgenticOptions | null;
|
|
1303
1420
|
|
|
1304
1421
|
/**
|
|
1305
|
-
* Body param:
|
|
1422
|
+
* Body param: Identifier for the client/application making the request. Used for
|
|
1423
|
+
* analytics and debugging. Example: 'my-app-v2'
|
|
1306
1424
|
*/
|
|
1307
1425
|
client_name?: string | null;
|
|
1308
1426
|
|
|
1309
1427
|
/**
|
|
1310
|
-
* Body param:
|
|
1428
|
+
* Body param: Crop boundaries to process only a portion of each page. Values are
|
|
1429
|
+
* ratios 0-1 from page edges
|
|
1311
1430
|
*/
|
|
1312
1431
|
crop_box?: ParsingCreateParams.CropBox;
|
|
1313
1432
|
|
|
1314
1433
|
/**
|
|
1315
|
-
* Body param:
|
|
1434
|
+
* Body param: Bypass result caching and force re-parsing. Use when document
|
|
1435
|
+
* content may have changed or you need fresh results
|
|
1316
1436
|
*/
|
|
1317
1437
|
disable_cache?: boolean | null;
|
|
1318
1438
|
|
|
1319
1439
|
/**
|
|
1320
|
-
* Body param: Options for fast tier parsing (
|
|
1440
|
+
* Body param: Options for fast tier parsing (rule-based, no AI).
|
|
1441
|
+
*
|
|
1442
|
+
* Fast tier uses deterministic algorithms for text extraction without AI
|
|
1443
|
+
* enhancement. It's the fastest and most cost-effective option, best suited for
|
|
1444
|
+
* simple documents with standard layouts. Currently has no configurable options
|
|
1445
|
+
* but reserved for future expansion.
|
|
1321
1446
|
*/
|
|
1322
1447
|
fast_options?: unknown | null;
|
|
1323
1448
|
|
|
1324
1449
|
/**
|
|
1325
|
-
* Body param: ID of an existing file in the project to parse
|
|
1450
|
+
* Body param: ID of an existing file in the project to parse. Mutually exclusive
|
|
1451
|
+
* with source_url
|
|
1326
1452
|
*/
|
|
1327
1453
|
file_id?: string | null;
|
|
1328
1454
|
|
|
1329
1455
|
/**
|
|
1330
|
-
* Body param: HTTP proxy
|
|
1456
|
+
* Body param: HTTP/HTTPS proxy for fetching source_url. Ignored if using file_id
|
|
1331
1457
|
*/
|
|
1332
1458
|
http_proxy?: string | null;
|
|
1333
1459
|
|
|
1334
1460
|
/**
|
|
1335
|
-
* Body param:
|
|
1461
|
+
* Body param: Format-specific options (HTML, PDF, spreadsheet, presentation).
|
|
1462
|
+
* Applied based on detected input file type
|
|
1336
1463
|
*/
|
|
1337
1464
|
input_options?: ParsingCreateParams.InputOptions;
|
|
1338
1465
|
|
|
1339
1466
|
/**
|
|
1340
|
-
* Body param: Output
|
|
1467
|
+
* Body param: Output formatting options for markdown, text, and extracted images
|
|
1341
1468
|
*/
|
|
1342
1469
|
output_options?: ParsingCreateParams.OutputOptions;
|
|
1343
1470
|
|
|
1344
1471
|
/**
|
|
1345
|
-
* Body param: Page
|
|
1472
|
+
* Body param: Page selection: limit total pages or specify exact pages to process
|
|
1346
1473
|
*/
|
|
1347
1474
|
page_ranges?: ParsingCreateParams.PageRanges;
|
|
1348
1475
|
|
|
1349
1476
|
/**
|
|
1350
|
-
* Body param: Job
|
|
1477
|
+
* Body param: Job execution controls including timeouts and failure thresholds
|
|
1351
1478
|
*/
|
|
1352
1479
|
processing_control?: ParsingCreateParams.ProcessingControl;
|
|
1353
1480
|
|
|
1354
1481
|
/**
|
|
1355
|
-
* Body param:
|
|
1482
|
+
* Body param: Document processing options including OCR, table extraction, and
|
|
1483
|
+
* chart parsing
|
|
1356
1484
|
*/
|
|
1357
1485
|
processing_options?: ParsingCreateParams.ProcessingOptions;
|
|
1358
1486
|
|
|
1359
1487
|
/**
|
|
1360
|
-
* Body param:
|
|
1488
|
+
* Body param: Public URL of the document to parse. Mutually exclusive with file_id
|
|
1361
1489
|
*/
|
|
1362
1490
|
source_url?: string | null;
|
|
1363
1491
|
|
|
1364
1492
|
/**
|
|
1365
|
-
* Body param:
|
|
1493
|
+
* Body param: Webhook endpoints for job status notifications. Multiple webhooks
|
|
1494
|
+
* can be configured for different events or services
|
|
1366
1495
|
*/
|
|
1367
1496
|
webhook_configurations?: Array<ParsingCreateParams.WebhookConfiguration>;
|
|
1368
1497
|
}
|
|
1369
1498
|
|
|
1370
1499
|
export namespace ParsingCreateParams {
|
|
1371
1500
|
/**
|
|
1372
|
-
* Options for
|
|
1501
|
+
* Options for AI-powered parsing tiers (cost_effective, agentic, agentic_plus).
|
|
1502
|
+
*
|
|
1503
|
+
* These options customize how the AI processes and interprets document content.
|
|
1504
|
+
* Only applicable when using non-fast tiers.
|
|
1373
1505
|
*/
|
|
1374
1506
|
export interface AgenticOptions {
|
|
1375
1507
|
/**
|
|
1376
|
-
* Custom
|
|
1508
|
+
* Custom instructions for the AI parser. Use to guide extraction behavior, specify
|
|
1509
|
+
* output formatting, or provide domain-specific context. Example: 'Extract
|
|
1510
|
+
* financial tables with currency symbols. Format dates as YYYY-MM-DD.'
|
|
1377
1511
|
*/
|
|
1378
1512
|
custom_prompt?: string | null;
|
|
1379
1513
|
}
|
|
1380
1514
|
|
|
1381
1515
|
/**
|
|
1382
|
-
*
|
|
1516
|
+
* Crop boundaries to process only a portion of each page. Values are ratios 0-1
|
|
1517
|
+
* from page edges
|
|
1383
1518
|
*/
|
|
1384
1519
|
export interface CropBox {
|
|
1385
1520
|
/**
|
|
1386
|
-
* Bottom boundary
|
|
1521
|
+
* Bottom boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content below this
|
|
1522
|
+
* line is excluded
|
|
1387
1523
|
*/
|
|
1388
1524
|
bottom?: number | null;
|
|
1389
1525
|
|
|
1390
1526
|
/**
|
|
1391
|
-
* Left boundary
|
|
1527
|
+
* Left boundary as ratio (0-1). 0=left edge, 1=right edge. Content left of this
|
|
1528
|
+
* line is excluded
|
|
1392
1529
|
*/
|
|
1393
1530
|
left?: number | null;
|
|
1394
1531
|
|
|
1395
1532
|
/**
|
|
1396
|
-
* Right boundary
|
|
1533
|
+
* Right boundary as ratio (0-1). 0=left edge, 1=right edge. Content right of this
|
|
1534
|
+
* line is excluded
|
|
1397
1535
|
*/
|
|
1398
1536
|
right?: number | null;
|
|
1399
1537
|
|
|
1400
1538
|
/**
|
|
1401
|
-
* Top boundary
|
|
1539
|
+
* Top boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content above this line
|
|
1540
|
+
* is excluded
|
|
1402
1541
|
*/
|
|
1403
1542
|
top?: number | null;
|
|
1404
1543
|
}
|
|
1405
1544
|
|
|
1406
1545
|
/**
|
|
1407
|
-
*
|
|
1546
|
+
* Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on
|
|
1547
|
+
* detected input file type
|
|
1408
1548
|
*/
|
|
1409
1549
|
export interface InputOptions {
|
|
1410
1550
|
/**
|
|
1411
|
-
* HTML
|
|
1551
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
1412
1552
|
*/
|
|
1413
1553
|
html?: InputOptions.HTML;
|
|
1414
1554
|
|
|
1415
1555
|
/**
|
|
1416
|
-
* PDF-specific parsing options
|
|
1556
|
+
* PDF-specific parsing options (applies to .pdf files)
|
|
1417
1557
|
*/
|
|
1418
1558
|
pdf?: unknown;
|
|
1419
1559
|
|
|
1420
1560
|
/**
|
|
1421
|
-
* Presentation
|
|
1561
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
1422
1562
|
*/
|
|
1423
1563
|
presentation?: InputOptions.Presentation;
|
|
1424
1564
|
|
|
1425
1565
|
/**
|
|
1426
|
-
* Spreadsheet
|
|
1566
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
1427
1567
|
*/
|
|
1428
1568
|
spreadsheet?: InputOptions.Spreadsheet;
|
|
1429
1569
|
}
|
|
1430
1570
|
|
|
1431
1571
|
export namespace InputOptions {
|
|
1432
1572
|
/**
|
|
1433
|
-
* HTML
|
|
1573
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
1434
1574
|
*/
|
|
1435
1575
|
export interface HTML {
|
|
1436
1576
|
/**
|
|
1437
|
-
*
|
|
1577
|
+
* Force all HTML elements to be visible by overriding CSS display/visibility
|
|
1578
|
+
* properties. Useful for parsing pages with hidden content or collapsed sections
|
|
1438
1579
|
*/
|
|
1439
1580
|
make_all_elements_visible?: boolean | null;
|
|
1440
1581
|
|
|
1441
1582
|
/**
|
|
1442
|
-
* Remove fixed
|
|
1583
|
+
* Remove fixed-position elements (headers, footers, floating buttons) that appear
|
|
1584
|
+
* on every page render
|
|
1443
1585
|
*/
|
|
1444
1586
|
remove_fixed_elements?: boolean | null;
|
|
1445
1587
|
|
|
1446
1588
|
/**
|
|
1447
|
-
* Remove navigation elements
|
|
1589
|
+
* Remove navigation elements (nav bars, sidebars, menus) to focus on main content
|
|
1448
1590
|
*/
|
|
1449
1591
|
remove_navigation_elements?: boolean | null;
|
|
1450
1592
|
}
|
|
1451
1593
|
|
|
1452
1594
|
/**
|
|
1453
|
-
* Presentation
|
|
1595
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
1454
1596
|
*/
|
|
1455
1597
|
export interface Presentation {
|
|
1456
1598
|
/**
|
|
1457
|
-
* Extract
|
|
1599
|
+
* Extract content positioned outside the visible slide area. Some presentations
|
|
1600
|
+
* have hidden notes or content that extends beyond slide boundaries
|
|
1458
1601
|
*/
|
|
1459
1602
|
out_of_bounds_content?: boolean | null;
|
|
1460
1603
|
|
|
1461
1604
|
/**
|
|
1462
|
-
* Skip extraction of embedded data
|
|
1605
|
+
* Skip extraction of embedded chart data tables. When true, only the visual
|
|
1606
|
+
* representation of charts is captured, not the underlying data
|
|
1463
1607
|
*/
|
|
1464
1608
|
skip_embedded_data?: boolean | null;
|
|
1465
1609
|
}
|
|
1466
1610
|
|
|
1467
1611
|
/**
|
|
1468
|
-
* Spreadsheet
|
|
1612
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
1469
1613
|
*/
|
|
1470
1614
|
export interface Spreadsheet {
|
|
1471
1615
|
/**
|
|
1472
|
-
* Detect and extract
|
|
1616
|
+
* Detect and extract multiple tables within a single sheet. Useful when
|
|
1617
|
+
* spreadsheets contain several data regions separated by blank rows/columns
|
|
1473
1618
|
*/
|
|
1474
1619
|
detect_sub_tables_in_sheets?: boolean | null;
|
|
1475
1620
|
|
|
1476
1621
|
/**
|
|
1477
|
-
*
|
|
1622
|
+
* Compute formula results instead of extracting formula text. Use when you need
|
|
1623
|
+
* calculated values rather than formula definitions
|
|
1478
1624
|
*/
|
|
1479
1625
|
force_formula_computation_in_sheets?: boolean | null;
|
|
1480
1626
|
|
|
1481
1627
|
/**
|
|
1482
|
-
*
|
|
1628
|
+
* Parse hidden sheets in addition to visible ones. By default, hidden sheets are
|
|
1629
|
+
* skipped
|
|
1483
1630
|
*/
|
|
1484
1631
|
include_hidden_sheets?: boolean | null;
|
|
1485
1632
|
}
|
|
1486
1633
|
}
|
|
1487
1634
|
|
|
1488
1635
|
/**
|
|
1489
|
-
* Output
|
|
1636
|
+
* Output formatting options for markdown, text, and extracted images
|
|
1490
1637
|
*/
|
|
1491
1638
|
export interface OutputOptions {
|
|
1492
1639
|
/**
|
|
1493
|
-
* Extract printed page
|
|
1640
|
+
* Extract the printed page number as it appears in the document (e.g., 'Page 5 of
|
|
1641
|
+
* 10', 'v', 'A-3'). Useful for referencing original page numbers
|
|
1494
1642
|
*/
|
|
1495
1643
|
extract_printed_page_number?: boolean | null;
|
|
1496
1644
|
|
|
1497
1645
|
/**
|
|
1498
|
-
* Image categories to save: 'screenshot' (full page
|
|
1499
|
-
*
|
|
1500
|
-
*
|
|
1646
|
+
* Image categories to extract and save. Options: 'screenshot' (full page renders
|
|
1647
|
+
* useful for visual QA), 'embedded' (images found within the document), 'layout'
|
|
1648
|
+
* (cropped regions from layout detection like figures and diagrams). Empty list
|
|
1649
|
+
* saves no images
|
|
1501
1650
|
*/
|
|
1502
1651
|
images_to_save?: Array<'screenshot' | 'embedded' | 'layout'>;
|
|
1503
1652
|
|
|
1504
1653
|
/**
|
|
1505
|
-
* Markdown
|
|
1654
|
+
* Markdown formatting options including table styles and link annotations
|
|
1506
1655
|
*/
|
|
1507
1656
|
markdown?: OutputOptions.Markdown;
|
|
1508
1657
|
|
|
1509
1658
|
/**
|
|
1510
|
-
* Spatial text output options
|
|
1659
|
+
* Spatial text output options for preserving document layout structure
|
|
1511
1660
|
*/
|
|
1512
1661
|
spatial_text?: OutputOptions.SpatialText;
|
|
1513
1662
|
|
|
1514
1663
|
/**
|
|
1515
|
-
*
|
|
1664
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
1516
1665
|
*/
|
|
1517
1666
|
tables_as_spreadsheet?: OutputOptions.TablesAsSpreadsheet;
|
|
1518
1667
|
}
|
|
1519
1668
|
|
|
1520
1669
|
export namespace OutputOptions {
|
|
1521
1670
|
/**
|
|
1522
|
-
* Markdown
|
|
1671
|
+
* Markdown formatting options including table styles and link annotations
|
|
1523
1672
|
*/
|
|
1524
1673
|
export interface Markdown {
|
|
1525
1674
|
/**
|
|
1526
|
-
* Add annotations to
|
|
1675
|
+
* Add link annotations to markdown output in the format [text](url). When false,
|
|
1676
|
+
* only the link text is included
|
|
1527
1677
|
*/
|
|
1528
1678
|
annotate_links?: boolean | null;
|
|
1529
1679
|
|
|
1530
1680
|
/**
|
|
1531
|
-
*
|
|
1681
|
+
* Embed images directly in markdown as base64 data URIs instead of extracting them
|
|
1682
|
+
* as separate files. Useful for self-contained markdown output
|
|
1532
1683
|
*/
|
|
1533
1684
|
inline_images?: boolean | null;
|
|
1534
1685
|
|
|
1535
1686
|
/**
|
|
1536
|
-
* Table formatting options
|
|
1687
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
1537
1688
|
*/
|
|
1538
1689
|
tables?: Markdown.Tables;
|
|
1539
1690
|
}
|
|
1540
1691
|
|
|
1541
1692
|
export namespace Markdown {
|
|
1542
1693
|
/**
|
|
1543
|
-
* Table formatting options
|
|
1694
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
1544
1695
|
*/
|
|
1545
1696
|
export interface Tables {
|
|
1546
1697
|
/**
|
|
1547
|
-
*
|
|
1698
|
+
* Remove extra whitespace padding in markdown table cells for more compact output
|
|
1548
1699
|
*/
|
|
1549
1700
|
compact_markdown_tables?: boolean | null;
|
|
1550
1701
|
|
|
1551
1702
|
/**
|
|
1552
|
-
* Separator for multiline content in markdown tables
|
|
1703
|
+
* Separator string for multiline cell content in markdown tables. Example:
|
|
1704
|
+
* '<br>' to preserve line breaks, ' ' to join with spaces
|
|
1553
1705
|
*/
|
|
1554
1706
|
markdown_table_multiline_separator?: string | null;
|
|
1555
1707
|
|
|
1556
1708
|
/**
|
|
1557
|
-
*
|
|
1709
|
+
* Automatically merge tables that span multiple pages into a single table. The
|
|
1710
|
+
* merged table appears on the first page with merged_from_pages metadata
|
|
1558
1711
|
*/
|
|
1559
1712
|
merge_continued_tables?: boolean | null;
|
|
1560
1713
|
|
|
1561
1714
|
/**
|
|
1562
|
-
* Output tables
|
|
1715
|
+
* Output tables as markdown pipe tables instead of HTML <table> tags.
|
|
1716
|
+
* Markdown tables are simpler but cannot represent complex structures like merged
|
|
1717
|
+
* cells
|
|
1563
1718
|
*/
|
|
1564
1719
|
output_tables_as_markdown?: boolean | null;
|
|
1565
1720
|
}
|
|
1566
1721
|
}
|
|
1567
1722
|
|
|
1568
1723
|
/**
|
|
1569
|
-
* Spatial text output options
|
|
1724
|
+
* Spatial text output options for preserving document layout structure
|
|
1570
1725
|
*/
|
|
1571
1726
|
export interface SpatialText {
|
|
1572
1727
|
/**
|
|
1573
|
-
* Keep column
|
|
1728
|
+
* Keep multi-column layouts intact instead of linearizing columns into sequential
|
|
1729
|
+
* text. Automatically enabled for non-fast tiers
|
|
1574
1730
|
*/
|
|
1575
1731
|
do_not_unroll_columns?: boolean | null;
|
|
1576
1732
|
|
|
1577
1733
|
/**
|
|
1578
|
-
*
|
|
1734
|
+
* Maintain consistent text column alignment across page boundaries. Automatically
|
|
1735
|
+
* enabled for document-level parsing modes
|
|
1579
1736
|
*/
|
|
1580
1737
|
preserve_layout_alignment_across_pages?: boolean | null;
|
|
1581
1738
|
|
|
1582
1739
|
/**
|
|
1583
|
-
* Include
|
|
1740
|
+
* Include text below the normal size threshold. Useful for footnotes, watermarks,
|
|
1741
|
+
* or fine print that might otherwise be filtered out
|
|
1584
1742
|
*/
|
|
1585
1743
|
preserve_very_small_text?: boolean | null;
|
|
1586
1744
|
}
|
|
1587
1745
|
|
|
1588
1746
|
/**
|
|
1589
|
-
*
|
|
1747
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
1590
1748
|
*/
|
|
1591
1749
|
export interface TablesAsSpreadsheet {
|
|
1592
1750
|
/**
|
|
@@ -1595,140 +1753,165 @@ export namespace ParsingCreateParams {
|
|
|
1595
1753
|
enable?: boolean | null;
|
|
1596
1754
|
|
|
1597
1755
|
/**
|
|
1598
|
-
* Automatically
|
|
1756
|
+
* Automatically generate descriptive sheet names from table context (headers,
|
|
1757
|
+
* surrounding text) instead of using generic names like 'Table_1'
|
|
1599
1758
|
*/
|
|
1600
1759
|
guess_sheet_name?: boolean;
|
|
1601
1760
|
}
|
|
1602
1761
|
}
|
|
1603
1762
|
|
|
1604
1763
|
/**
|
|
1605
|
-
* Page
|
|
1764
|
+
* Page selection: limit total pages or specify exact pages to process
|
|
1606
1765
|
*/
|
|
1607
1766
|
export interface PageRanges {
|
|
1608
1767
|
/**
|
|
1609
|
-
* Maximum number of pages to process
|
|
1768
|
+
* Maximum number of pages to process. Pages are processed in order starting from
|
|
1769
|
+
* page 1. If both max_pages and target_pages are set, target_pages takes
|
|
1770
|
+
* precedence
|
|
1610
1771
|
*/
|
|
1611
1772
|
max_pages?: number | null;
|
|
1612
1773
|
|
|
1613
1774
|
/**
|
|
1614
|
-
*
|
|
1775
|
+
* Comma-separated list of specific pages to process using 1-based indexing.
|
|
1776
|
+
* Supports individual pages and ranges. Examples: '1,3,5' (pages 1, 3, 5), '1-5'
|
|
1777
|
+
* (pages 1 through 5 inclusive), '1,3,5-8,10' (pages 1, 3, 5-8, and 10). Pages are
|
|
1778
|
+
* sorted and deduplicated automatically. Duplicate pages cause an error
|
|
1615
1779
|
*/
|
|
1616
1780
|
target_pages?: string | null;
|
|
1617
1781
|
}
|
|
1618
1782
|
|
|
1619
1783
|
/**
|
|
1620
|
-
* Job
|
|
1784
|
+
* Job execution controls including timeouts and failure thresholds
|
|
1621
1785
|
*/
|
|
1622
1786
|
export interface ProcessingControl {
|
|
1623
1787
|
/**
|
|
1624
|
-
*
|
|
1788
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1789
|
+
* partial results
|
|
1625
1790
|
*/
|
|
1626
1791
|
job_failure_conditions?: ProcessingControl.JobFailureConditions;
|
|
1627
1792
|
|
|
1628
1793
|
/**
|
|
1629
|
-
* Timeout
|
|
1794
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1630
1795
|
*/
|
|
1631
1796
|
timeouts?: ProcessingControl.Timeouts;
|
|
1632
1797
|
}
|
|
1633
1798
|
|
|
1634
1799
|
export namespace ProcessingControl {
|
|
1635
1800
|
/**
|
|
1636
|
-
*
|
|
1801
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1802
|
+
* partial results
|
|
1637
1803
|
*/
|
|
1638
1804
|
export interface JobFailureConditions {
|
|
1639
1805
|
/**
|
|
1640
|
-
* Maximum ratio of pages allowed to fail (0-1)
|
|
1806
|
+
* Maximum ratio of pages allowed to fail before the job fails (0-1). Example: 0.1
|
|
1807
|
+
* means job fails if more than 10% of pages fail. Default is 0.05 (5%)
|
|
1641
1808
|
*/
|
|
1642
1809
|
allowed_page_failure_ratio?: number | null;
|
|
1643
1810
|
|
|
1644
1811
|
/**
|
|
1645
|
-
* Fail job if
|
|
1812
|
+
* Fail the job if a problematic font is detected that may cause incorrect text
|
|
1813
|
+
* extraction. Buggy fonts can produce garbled or missing characters
|
|
1646
1814
|
*/
|
|
1647
1815
|
fail_on_buggy_font?: boolean | null;
|
|
1648
1816
|
|
|
1649
1817
|
/**
|
|
1650
|
-
* Fail job if image
|
|
1818
|
+
* Fail the entire job if any embedded image cannot be extracted. By default, image
|
|
1819
|
+
* extraction errors are logged but don't fail the job
|
|
1651
1820
|
*/
|
|
1652
1821
|
fail_on_image_extraction_error?: boolean | null;
|
|
1653
1822
|
|
|
1654
1823
|
/**
|
|
1655
|
-
* Fail job if image OCR
|
|
1824
|
+
* Fail the entire job if OCR fails on any image. By default, OCR errors result in
|
|
1825
|
+
* empty text for that image
|
|
1656
1826
|
*/
|
|
1657
1827
|
fail_on_image_ocr_error?: boolean | null;
|
|
1658
1828
|
|
|
1659
1829
|
/**
|
|
1660
|
-
* Fail job if markdown
|
|
1830
|
+
* Fail the entire job if markdown cannot be reconstructed for any page. By
|
|
1831
|
+
* default, failed pages use fallback text extraction
|
|
1661
1832
|
*/
|
|
1662
1833
|
fail_on_markdown_reconstruction_error?: boolean | null;
|
|
1663
1834
|
}
|
|
1664
1835
|
|
|
1665
1836
|
/**
|
|
1666
|
-
* Timeout
|
|
1837
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1667
1838
|
*/
|
|
1668
1839
|
export interface Timeouts {
|
|
1669
1840
|
/**
|
|
1670
|
-
* Base timeout in seconds (max 30 minutes)
|
|
1841
|
+
* Base timeout for the job in seconds (max 1800 = 30 minutes). This is the minimum
|
|
1842
|
+
* time allowed regardless of document size
|
|
1671
1843
|
*/
|
|
1672
1844
|
base_in_seconds?: number | null;
|
|
1673
1845
|
|
|
1674
1846
|
/**
|
|
1675
|
-
* Additional timeout per page in seconds (max 5 minutes)
|
|
1847
|
+
* Additional timeout per page in seconds (max 300 = 5 minutes). Total timeout =
|
|
1848
|
+
* base + (this value × page count)
|
|
1676
1849
|
*/
|
|
1677
1850
|
extra_time_per_page_in_seconds?: number | null;
|
|
1678
1851
|
}
|
|
1679
1852
|
}
|
|
1680
1853
|
|
|
1681
1854
|
/**
|
|
1682
|
-
*
|
|
1855
|
+
* Document processing options including OCR, table extraction, and chart parsing
|
|
1683
1856
|
*/
|
|
1684
1857
|
export interface ProcessingOptions {
|
|
1685
1858
|
/**
|
|
1686
|
-
*
|
|
1859
|
+
* Use aggressive heuristics to detect table boundaries, even without visible
|
|
1860
|
+
* borders. Useful for documents with borderless or complex tables
|
|
1687
1861
|
*/
|
|
1688
1862
|
aggressive_table_extraction?: boolean | null;
|
|
1689
1863
|
|
|
1690
1864
|
/**
|
|
1691
|
-
*
|
|
1865
|
+
* Conditional processing rules that apply different parsing options based on page
|
|
1866
|
+
* content, document structure, or filename patterns. Each entry defines trigger
|
|
1867
|
+
* conditions and the parsing configuration to apply when triggered
|
|
1692
1868
|
*/
|
|
1693
1869
|
auto_mode_configuration?: Array<ProcessingOptions.AutoModeConfiguration> | null;
|
|
1694
1870
|
|
|
1695
1871
|
/**
|
|
1696
|
-
* Cost optimizer
|
|
1872
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
1873
|
+
*
|
|
1874
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
1875
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
1876
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
1697
1877
|
*/
|
|
1698
1878
|
cost_optimizer?: ProcessingOptions.CostOptimizer | null;
|
|
1699
1879
|
|
|
1700
1880
|
/**
|
|
1701
|
-
*
|
|
1702
|
-
* table handling
|
|
1881
|
+
* Disable automatic heuristics including outlined table extraction and adaptive
|
|
1882
|
+
* long table handling. Use when heuristics produce incorrect results
|
|
1703
1883
|
*/
|
|
1704
1884
|
disable_heuristics?: boolean | null;
|
|
1705
1885
|
|
|
1706
1886
|
/**
|
|
1707
|
-
* Options for ignoring specific text types
|
|
1887
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
1708
1888
|
*/
|
|
1709
1889
|
ignore?: ProcessingOptions.Ignore;
|
|
1710
1890
|
|
|
1711
1891
|
/**
|
|
1712
|
-
* OCR configuration
|
|
1892
|
+
* OCR configuration including language detection settings
|
|
1713
1893
|
*/
|
|
1714
1894
|
ocr_parameters?: ProcessingOptions.OcrParameters;
|
|
1715
1895
|
|
|
1716
1896
|
/**
|
|
1717
|
-
* Enable
|
|
1897
|
+
* Enable AI-powered chart analysis. Modes: 'efficient' (fast, lower cost),
|
|
1898
|
+
* 'agentic' (balanced), 'agentic_plus' (highest accuracy). Automatically enables
|
|
1899
|
+
* extract_layout and precise_bounding_box when set
|
|
1718
1900
|
*/
|
|
1719
1901
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
1720
1902
|
}
|
|
1721
1903
|
|
|
1722
1904
|
export namespace ProcessingOptions {
|
|
1723
1905
|
/**
|
|
1724
|
-
* A single
|
|
1906
|
+
* A single auto mode rule with trigger conditions and parsing configuration.
|
|
1907
|
+
*
|
|
1908
|
+
* Auto mode allows conditional parsing where different configurations are applied
|
|
1909
|
+
* based on page content, structure, or filename. When triggers match, the
|
|
1910
|
+
* parsing_conf overrides default settings for that page.
|
|
1725
1911
|
*/
|
|
1726
1912
|
export interface AutoModeConfiguration {
|
|
1727
1913
|
/**
|
|
1728
|
-
*
|
|
1729
|
-
*
|
|
1730
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1731
|
-
* the V1 format expected by the llamaparse worker.
|
|
1914
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1732
1915
|
*/
|
|
1733
1916
|
parsing_conf: AutoModeConfiguration.ParsingConf;
|
|
1734
1917
|
|
|
@@ -1903,18 +2086,15 @@ export namespace ParsingCreateParams {
|
|
|
1903
2086
|
text_in_page?: string | null;
|
|
1904
2087
|
|
|
1905
2088
|
/**
|
|
1906
|
-
* How to combine multiple trigger conditions: 'and' (all must match,
|
|
1907
|
-
* 'or' (any can
|
|
2089
|
+
* How to combine multiple trigger conditions: 'and' (all conditions must match,
|
|
2090
|
+
* this is the default) or 'or' (any single condition can trigger)
|
|
1908
2091
|
*/
|
|
1909
2092
|
trigger_mode?: string | null;
|
|
1910
2093
|
}
|
|
1911
2094
|
|
|
1912
2095
|
export namespace AutoModeConfiguration {
|
|
1913
2096
|
/**
|
|
1914
|
-
*
|
|
1915
|
-
*
|
|
1916
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1917
|
-
* the V1 format expected by the llamaparse worker.
|
|
2097
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1918
2098
|
*/
|
|
1919
2099
|
export interface ParsingConf {
|
|
1920
2100
|
/**
|
|
@@ -1933,7 +2113,7 @@ export namespace ParsingCreateParams {
|
|
|
1933
2113
|
crop_box?: ParsingConf.CropBox | null;
|
|
1934
2114
|
|
|
1935
2115
|
/**
|
|
1936
|
-
* Custom
|
|
2116
|
+
* Custom AI instructions for matched pages. Overrides the base custom_prompt
|
|
1937
2117
|
*/
|
|
1938
2118
|
custom_prompt?: string | null;
|
|
1939
2119
|
|
|
@@ -1978,12 +2158,12 @@ export namespace ParsingCreateParams {
|
|
|
1978
2158
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
1979
2159
|
|
|
1980
2160
|
/**
|
|
1981
|
-
*
|
|
2161
|
+
* Override the parsing tier for matched pages. Must be paired with version
|
|
1982
2162
|
*/
|
|
1983
2163
|
tier?: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus' | null;
|
|
1984
2164
|
|
|
1985
2165
|
/**
|
|
1986
|
-
*
|
|
2166
|
+
* Tier version when overriding tier. Required when tier is specified
|
|
1987
2167
|
*/
|
|
1988
2168
|
version?:
|
|
1989
2169
|
| '2025-12-11'
|
|
@@ -2002,6 +2182,23 @@ export namespace ParsingCreateParams {
|
|
|
2002
2182
|
| '2026-02-20'
|
|
2003
2183
|
| '2026-02-24'
|
|
2004
2184
|
| '2026-02-26'
|
|
2185
|
+
| '2026-03-02'
|
|
2186
|
+
| '2026-03-03'
|
|
2187
|
+
| '2026-03-04'
|
|
2188
|
+
| '2026-03-05'
|
|
2189
|
+
| '2026-03-09'
|
|
2190
|
+
| '2026-03-10'
|
|
2191
|
+
| '2026-03-11'
|
|
2192
|
+
| '2026-03-12'
|
|
2193
|
+
| '2026-03-17'
|
|
2194
|
+
| '2026-03-19'
|
|
2195
|
+
| '2026-03-20'
|
|
2196
|
+
| '2026-03-22'
|
|
2197
|
+
| '2026-03-23'
|
|
2198
|
+
| '2026-03-24'
|
|
2199
|
+
| '2026-03-25'
|
|
2200
|
+
| '2026-03-26'
|
|
2201
|
+
| '2026-03-27'
|
|
2005
2202
|
| 'latest'
|
|
2006
2203
|
| (string & {})
|
|
2007
2204
|
| null;
|
|
@@ -2086,66 +2283,100 @@ export namespace ParsingCreateParams {
|
|
|
2086
2283
|
}
|
|
2087
2284
|
|
|
2088
2285
|
/**
|
|
2089
|
-
* Cost optimizer
|
|
2286
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
2287
|
+
*
|
|
2288
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
2289
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
2290
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
2090
2291
|
*/
|
|
2091
2292
|
export interface CostOptimizer {
|
|
2092
2293
|
/**
|
|
2093
|
-
*
|
|
2094
|
-
*
|
|
2294
|
+
* Enable cost-optimized parsing. Routes simpler pages to faster processing while
|
|
2295
|
+
* complex pages use full AI analysis. May reduce speed on some documents.
|
|
2296
|
+
* IMPORTANT: Only available with 'agentic' or 'agentic_plus' tiers
|
|
2095
2297
|
*/
|
|
2096
2298
|
enable?: boolean | null;
|
|
2097
2299
|
}
|
|
2098
2300
|
|
|
2099
2301
|
/**
|
|
2100
|
-
* Options for ignoring specific text types
|
|
2302
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
2101
2303
|
*/
|
|
2102
2304
|
export interface Ignore {
|
|
2103
2305
|
/**
|
|
2104
|
-
*
|
|
2306
|
+
* Skip text rotated at an angle (not horizontal/vertical). Useful for ignoring
|
|
2307
|
+
* watermarks or decorative angled text
|
|
2105
2308
|
*/
|
|
2106
2309
|
ignore_diagonal_text?: boolean | null;
|
|
2107
2310
|
|
|
2108
2311
|
/**
|
|
2109
|
-
*
|
|
2312
|
+
* Skip text marked as hidden in the document structure. Some PDFs contain
|
|
2313
|
+
* invisible text layers used for accessibility or search indexing
|
|
2110
2314
|
*/
|
|
2111
2315
|
ignore_hidden_text?: boolean | null;
|
|
2112
2316
|
|
|
2113
2317
|
/**
|
|
2114
|
-
*
|
|
2318
|
+
* Skip OCR text extraction from embedded images. Use when images contain
|
|
2319
|
+
* irrelevant text (watermarks, logos) that shouldn't be in the output
|
|
2115
2320
|
*/
|
|
2116
2321
|
ignore_text_in_image?: boolean | null;
|
|
2117
2322
|
}
|
|
2118
2323
|
|
|
2119
2324
|
/**
|
|
2120
|
-
* OCR configuration
|
|
2325
|
+
* OCR configuration including language detection settings
|
|
2121
2326
|
*/
|
|
2122
2327
|
export interface OcrParameters {
|
|
2123
2328
|
/**
|
|
2124
|
-
*
|
|
2329
|
+
* Languages to use for OCR text recognition. Specify multiple languages if
|
|
2330
|
+
* document contains mixed-language content. Order matters - put primary language
|
|
2331
|
+
* first. Example: ['en', 'es'] for English with Spanish
|
|
2125
2332
|
*/
|
|
2126
2333
|
languages?: Array<ParsingAPI.ParsingLanguages> | null;
|
|
2127
2334
|
}
|
|
2128
2335
|
}
|
|
2129
2336
|
|
|
2337
|
+
/**
|
|
2338
|
+
* Webhook configuration for receiving parsing job notifications.
|
|
2339
|
+
*
|
|
2340
|
+
* Webhooks are called when specified events occur during job processing. Configure
|
|
2341
|
+
* multiple webhook configurations to send to different endpoints.
|
|
2342
|
+
*/
|
|
2130
2343
|
export interface WebhookConfiguration {
|
|
2131
2344
|
/**
|
|
2132
|
-
*
|
|
2345
|
+
* Events that trigger this webhook. Options: 'parse.success' (job completed),
|
|
2346
|
+
* 'parse.failure' (job failed), 'parse.partial' (some pages failed). If not
|
|
2347
|
+
* specified, webhook fires for all events
|
|
2133
2348
|
*/
|
|
2134
2349
|
webhook_events?: Array<string> | null;
|
|
2135
2350
|
|
|
2136
2351
|
/**
|
|
2137
|
-
* Custom headers to include in webhook requests
|
|
2352
|
+
* Custom HTTP headers to include in webhook requests. Use for authentication
|
|
2353
|
+
* tokens or custom routing. Example: {'Authorization': 'Bearer xyz'}
|
|
2138
2354
|
*/
|
|
2139
2355
|
webhook_headers?: { [key: string]: unknown } | null;
|
|
2140
2356
|
|
|
2141
2357
|
/**
|
|
2142
|
-
*
|
|
2358
|
+
* HTTPS URL to receive webhook POST requests. Must be publicly accessible
|
|
2143
2359
|
*/
|
|
2144
2360
|
webhook_url?: string | null;
|
|
2145
2361
|
}
|
|
2146
2362
|
}
|
|
2147
2363
|
|
|
2148
2364
|
export interface ParsingListParams extends PaginatedCursorParams {
|
|
2365
|
+
/**
|
|
2366
|
+
* Include jobs created at or after this timestamp (inclusive)
|
|
2367
|
+
*/
|
|
2368
|
+
created_at_on_or_after?: string | null;
|
|
2369
|
+
|
|
2370
|
+
/**
|
|
2371
|
+
* Include jobs created at or before this timestamp (inclusive)
|
|
2372
|
+
*/
|
|
2373
|
+
created_at_on_or_before?: string | null;
|
|
2374
|
+
|
|
2375
|
+
/**
|
|
2376
|
+
* Filter by specific job IDs
|
|
2377
|
+
*/
|
|
2378
|
+
job_ids?: Array<string> | null;
|
|
2379
|
+
|
|
2149
2380
|
organization_id?: string | null;
|
|
2150
2381
|
|
|
2151
2382
|
project_id?: string | null;
|
|
@@ -2158,10 +2389,10 @@ export interface ParsingListParams extends PaginatedCursorParams {
|
|
|
2158
2389
|
|
|
2159
2390
|
export interface ParsingGetParams {
|
|
2160
2391
|
/**
|
|
2161
|
-
* Fields to include: text, markdown, items, metadata,
|
|
2162
|
-
* markdown_content_metadata, items_content_metadata,
|
|
2163
|
-
* xlsx_content_metadata, output_pdf_content_metadata,
|
|
2164
|
-
* Metadata fields include presigned URLs.
|
|
2392
|
+
* Fields to include: text, markdown, items, metadata, job_metadata,
|
|
2393
|
+
* text_content_metadata, markdown_content_metadata, items_content_metadata,
|
|
2394
|
+
* metadata_content_metadata, xlsx_content_metadata, output_pdf_content_metadata,
|
|
2395
|
+
* images_content_metadata. Metadata fields include presigned URLs.
|
|
2165
2396
|
*/
|
|
2166
2397
|
expand?: Array<string>;
|
|
2167
2398
|
|