@llamaindex/llama-cloud 1.8.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/README.md +10 -8
- package/client.d.mts +4 -6
- package/client.d.mts.map +1 -1
- package/client.d.ts +4 -6
- package/client.d.ts.map +1 -1
- package/client.js +7 -6
- package/client.js.map +1 -1
- package/client.mjs +7 -6
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +0 -23
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +0 -23
- package/core/pagination.d.ts.map +1 -1
- package/core/pagination.js +1 -32
- package/core/pagination.js.map +1 -1
- package/core/pagination.mjs +0 -30
- package/core/pagination.mjs.map +1 -1
- package/package.json +12 -1
- package/resources/beta/batch/batch.d.mts +55 -30
- package/resources/beta/batch/batch.d.mts.map +1 -1
- package/resources/beta/batch/batch.d.ts +55 -30
- package/resources/beta/batch/batch.d.ts.map +1 -1
- package/resources/beta/batch/batch.js +14 -11
- package/resources/beta/batch/batch.js.map +1 -1
- package/resources/beta/batch/batch.mjs +14 -11
- package/resources/beta/batch/batch.mjs.map +1 -1
- package/resources/beta/batch/job-items.d.mts +36 -13
- package/resources/beta/batch/job-items.d.mts.map +1 -1
- package/resources/beta/batch/job-items.d.ts +36 -13
- package/resources/beta/batch/job-items.d.ts.map +1 -1
- package/resources/beta/batch/job-items.js +6 -8
- package/resources/beta/batch/job-items.js.map +1 -1
- package/resources/beta/batch/job-items.mjs +6 -8
- package/resources/beta/batch/job-items.mjs.map +1 -1
- package/resources/beta/parse-configurations.d.mts +7 -9
- package/resources/beta/parse-configurations.d.mts.map +1 -1
- package/resources/beta/parse-configurations.d.ts +7 -9
- package/resources/beta/parse-configurations.d.ts.map +1 -1
- package/resources/beta/parse-configurations.js +7 -9
- package/resources/beta/parse-configurations.js.map +1 -1
- package/resources/beta/parse-configurations.mjs +7 -9
- package/resources/beta/parse-configurations.mjs.map +1 -1
- package/resources/beta/sheets.d.mts +16 -0
- package/resources/beta/sheets.d.mts.map +1 -1
- package/resources/beta/sheets.d.ts +16 -0
- package/resources/beta/sheets.d.ts.map +1 -1
- package/resources/beta/split.d.mts +60 -16
- package/resources/beta/split.d.mts.map +1 -1
- package/resources/beta/split.d.ts +60 -16
- package/resources/beta/split.d.ts.map +1 -1
- package/resources/beta/split.js.map +1 -1
- package/resources/beta/split.mjs.map +1 -1
- package/resources/classifier/jobs.d.mts +12 -3
- package/resources/classifier/jobs.d.mts.map +1 -1
- package/resources/classifier/jobs.d.ts +12 -3
- package/resources/classifier/jobs.d.ts.map +1 -1
- package/resources/classify.d.mts +93 -38
- package/resources/classify.d.mts.map +1 -1
- package/resources/classify.d.ts +93 -38
- package/resources/classify.d.ts.map +1 -1
- package/resources/classify.js +19 -2
- package/resources/classify.js.map +1 -1
- package/resources/classify.mjs +19 -2
- package/resources/classify.mjs.map +1 -1
- package/resources/extract.d.mts +1593 -0
- package/resources/extract.d.mts.map +1 -0
- package/resources/extract.d.ts +1593 -0
- package/resources/extract.d.ts.map +1 -0
- package/resources/extract.js +215 -0
- package/resources/extract.js.map +1 -0
- package/resources/extract.mjs +211 -0
- package/resources/extract.mjs.map +1 -0
- package/resources/files.d.mts +53 -39
- package/resources/files.d.mts.map +1 -1
- package/resources/files.d.ts +53 -39
- package/resources/files.d.ts.map +1 -1
- package/resources/files.js +11 -10
- package/resources/files.js.map +1 -1
- package/resources/files.mjs +11 -10
- package/resources/files.mjs.map +1 -1
- package/resources/index.d.mts +1 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +1 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +3 -3
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/parsing.d.mts +292 -138
- package/resources/parsing.d.mts.map +1 -1
- package/resources/parsing.d.ts +292 -138
- package/resources/parsing.d.ts.map +1 -1
- package/resources/parsing.js +30 -4
- package/resources/parsing.js.map +1 -1
- package/resources/parsing.mjs +30 -4
- package/resources/parsing.mjs.map +1 -1
- package/resources/pipelines/pipelines.d.mts +59 -13
- package/resources/pipelines/pipelines.d.mts.map +1 -1
- package/resources/pipelines/pipelines.d.ts +59 -13
- package/resources/pipelines/pipelines.d.ts.map +1 -1
- package/resources/pipelines/pipelines.js +24 -9
- package/resources/pipelines/pipelines.js.map +1 -1
- package/resources/pipelines/pipelines.mjs +24 -9
- package/resources/pipelines/pipelines.mjs.map +1 -1
- package/resources/pipelines/sync.d.mts +5 -3
- package/resources/pipelines/sync.d.mts.map +1 -1
- package/resources/pipelines/sync.d.ts +5 -3
- package/resources/pipelines/sync.d.ts.map +1 -1
- package/resources/pipelines/sync.js +5 -3
- package/resources/pipelines/sync.js.map +1 -1
- package/resources/pipelines/sync.mjs +5 -3
- package/resources/pipelines/sync.mjs.map +1 -1
- package/src/client.ts +50 -15
- package/src/core/pagination.ts +0 -71
- package/src/resources/beta/batch/batch.ts +75 -30
- package/src/resources/beta/batch/job-items.ts +56 -13
- package/src/resources/beta/parse-configurations.ts +7 -9
- package/src/resources/beta/sheets.ts +20 -0
- package/src/resources/beta/split.ts +70 -17
- package/src/resources/classifier/jobs.ts +12 -3
- package/src/resources/classify.ts +101 -38
- package/src/resources/extract.ts +2055 -0
- package/src/resources/files.ts +53 -39
- package/src/resources/index.ts +22 -1
- package/src/resources/parsing.ts +327 -136
- package/src/resources/pipelines/pipelines.ts +80 -14
- package/src/resources/pipelines/sync.ts +5 -3
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.mts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.ts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.js +0 -56
- package/resources/extraction/extraction-agents/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.mjs +0 -51
- package/resources/extraction/extraction-agents/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction-agents/index.d.mts +0 -3
- package/resources/extraction/extraction-agents/index.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/index.d.ts +0 -3
- package/resources/extraction/extraction-agents/index.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/index.js +0 -9
- package/resources/extraction/extraction-agents/index.js.map +0 -1
- package/resources/extraction/extraction-agents/index.mjs +0 -4
- package/resources/extraction/extraction-agents/index.mjs.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.mts +0 -75
- package/resources/extraction/extraction-agents/schema.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.ts +0 -75
- package/resources/extraction/extraction-agents/schema.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/schema.js +0 -28
- package/resources/extraction/extraction-agents/schema.js.map +0 -1
- package/resources/extraction/extraction-agents/schema.mjs +0 -24
- package/resources/extraction/extraction-agents/schema.mjs.map +0 -1
- package/resources/extraction/extraction-agents.d.mts +0 -2
- package/resources/extraction/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents.d.ts +0 -2
- package/resources/extraction/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents.js +0 -6
- package/resources/extraction/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents.mjs +0 -3
- package/resources/extraction/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction.d.mts +0 -118
- package/resources/extraction/extraction.d.mts.map +0 -1
- package/resources/extraction/extraction.d.ts +0 -118
- package/resources/extraction/extraction.d.ts.map +0 -1
- package/resources/extraction/extraction.js +0 -91
- package/resources/extraction/extraction.js.map +0 -1
- package/resources/extraction/extraction.mjs +0 -86
- package/resources/extraction/extraction.mjs.map +0 -1
- package/resources/extraction/index.d.mts +0 -5
- package/resources/extraction/index.d.mts.map +0 -1
- package/resources/extraction/index.d.ts +0 -5
- package/resources/extraction/index.d.ts.map +0 -1
- package/resources/extraction/index.js +0 -13
- package/resources/extraction/index.js.map +0 -1
- package/resources/extraction/index.mjs +0 -6
- package/resources/extraction/index.mjs.map +0 -1
- package/resources/extraction/jobs.d.mts +0 -280
- package/resources/extraction/jobs.d.mts.map +0 -1
- package/resources/extraction/jobs.d.ts +0 -280
- package/resources/extraction/jobs.d.ts.map +0 -1
- package/resources/extraction/jobs.js +0 -179
- package/resources/extraction/jobs.js.map +0 -1
- package/resources/extraction/jobs.mjs +0 -175
- package/resources/extraction/jobs.mjs.map +0 -1
- package/resources/extraction/runs.d.mts +0 -198
- package/resources/extraction/runs.d.mts.map +0 -1
- package/resources/extraction/runs.d.ts +0 -198
- package/resources/extraction/runs.d.ts.map +0 -1
- package/resources/extraction/runs.js +0 -42
- package/resources/extraction/runs.js.map +0 -1
- package/resources/extraction/runs.mjs +0 -38
- package/resources/extraction/runs.mjs.map +0 -1
- package/resources/extraction.d.mts +0 -2
- package/resources/extraction.d.mts.map +0 -1
- package/resources/extraction.d.ts +0 -2
- package/resources/extraction.d.ts.map +0 -1
- package/resources/extraction.js +0 -6
- package/resources/extraction.js.map +0 -1
- package/resources/extraction.mjs +0 -3
- package/resources/extraction.mjs.map +0 -1
- package/src/resources/extraction/extraction-agents/extraction-agents.ts +0 -196
- package/src/resources/extraction/extraction-agents/index.ts +0 -18
- package/src/resources/extraction/extraction-agents/schema.ts +0 -100
- package/src/resources/extraction/extraction-agents.ts +0 -3
- package/src/resources/extraction/extraction.ts +0 -224
- package/src/resources/extraction/index.ts +0 -34
- package/src/resources/extraction/jobs.ts +0 -414
- package/src/resources/extraction/runs.ts +0 -315
- package/src/resources/extraction.ts +0 -3
package/src/resources/parsing.ts
CHANGED
|
@@ -12,7 +12,21 @@ import { pollUntilComplete, PollingOptions, DEFAULT_TIMEOUT } from '../core/poll
|
|
|
12
12
|
|
|
13
13
|
export class Parsing extends APIResource {
|
|
14
14
|
/**
|
|
15
|
-
* Parse a file by file ID
|
|
15
|
+
* Parse a file by file ID or URL.
|
|
16
|
+
*
|
|
17
|
+
* Provide either `file_id` (a previously uploaded file) or `source_url` (a
|
|
18
|
+
* publicly accessible URL). Configure parsing with options like `tier`,
|
|
19
|
+
* `target_pages`, and `lang`.
|
|
20
|
+
*
|
|
21
|
+
* ## Tiers
|
|
22
|
+
*
|
|
23
|
+
* - `fast` — rule-based, cheapest, no AI
|
|
24
|
+
* - `cost_effective` — balanced speed and quality
|
|
25
|
+
* - `agentic` — full AI-powered parsing
|
|
26
|
+
* - `agentic_plus` — premium AI with specialized features
|
|
27
|
+
*
|
|
28
|
+
* The job runs asynchronously. Poll `GET /parse/{job_id}` with `expand=text` or
|
|
29
|
+
* `expand=markdown` to retrieve results.
|
|
16
30
|
*/
|
|
17
31
|
create(
|
|
18
32
|
params: ParsingCreateParams & { upload_file?: Uploadable },
|
|
@@ -43,8 +57,10 @@ export class Parsing extends APIResource {
|
|
|
43
57
|
}
|
|
44
58
|
|
|
45
59
|
/**
|
|
46
|
-
* List parse jobs for the current project
|
|
47
|
-
*
|
|
60
|
+
* List parse jobs for the current project.
|
|
61
|
+
*
|
|
62
|
+
* Filter by `status` or creation date range. Results are paginated — use
|
|
63
|
+
* `page_token` from the response to fetch subsequent pages.
|
|
48
64
|
*/
|
|
49
65
|
list(
|
|
50
66
|
query: ParsingListParams | null | undefined = {},
|
|
@@ -57,7 +73,17 @@ export class Parsing extends APIResource {
|
|
|
57
73
|
}
|
|
58
74
|
|
|
59
75
|
/**
|
|
60
|
-
* Retrieve parse job with optional content
|
|
76
|
+
* Retrieve a parse job with optional expanded content.
|
|
77
|
+
*
|
|
78
|
+
* By default returns job metadata only. Use `expand` to include parsed content:
|
|
79
|
+
*
|
|
80
|
+
* - `text` — plain text output
|
|
81
|
+
* - `markdown` — markdown output
|
|
82
|
+
* - `items` — structured page-by-page output
|
|
83
|
+
* - `job_metadata` — usage and processing details
|
|
84
|
+
*
|
|
85
|
+
* Content metadata fields (e.g. `text_content_metadata`) return presigned URLs for
|
|
86
|
+
* downloading large results.
|
|
61
87
|
*/
|
|
62
88
|
get(
|
|
63
89
|
jobID: string,
|
|
@@ -599,18 +625,27 @@ export type LlamaParseSupportedFileExtensions =
|
|
|
599
625
|
| '.webm';
|
|
600
626
|
|
|
601
627
|
/**
|
|
602
|
-
*
|
|
628
|
+
* A parse job (v1).
|
|
603
629
|
*/
|
|
604
630
|
export interface ParsingJob {
|
|
631
|
+
/**
|
|
632
|
+
* Unique parse job identifier
|
|
633
|
+
*/
|
|
605
634
|
id: string;
|
|
606
635
|
|
|
607
636
|
/**
|
|
608
|
-
*
|
|
637
|
+
* Current job status
|
|
609
638
|
*/
|
|
610
639
|
status: StatusEnum;
|
|
611
640
|
|
|
641
|
+
/**
|
|
642
|
+
* Machine-readable error code when failed
|
|
643
|
+
*/
|
|
612
644
|
error_code?: string | null;
|
|
613
645
|
|
|
646
|
+
/**
|
|
647
|
+
* Human-readable error details when failed
|
|
648
|
+
*/
|
|
614
649
|
error_message?: string | null;
|
|
615
650
|
}
|
|
616
651
|
|
|
@@ -811,11 +846,11 @@ export interface TextItem {
|
|
|
811
846
|
}
|
|
812
847
|
|
|
813
848
|
/**
|
|
814
|
-
*
|
|
849
|
+
* A parse job.
|
|
815
850
|
*/
|
|
816
851
|
export interface ParsingCreateResponse {
|
|
817
852
|
/**
|
|
818
|
-
* Unique
|
|
853
|
+
* Unique parse job identifier
|
|
819
854
|
*/
|
|
820
855
|
id: string;
|
|
821
856
|
|
|
@@ -825,7 +860,7 @@ export interface ParsingCreateResponse {
|
|
|
825
860
|
project_id: string;
|
|
826
861
|
|
|
827
862
|
/**
|
|
828
|
-
* Current
|
|
863
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
829
864
|
*/
|
|
830
865
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
831
866
|
|
|
@@ -835,15 +870,20 @@ export interface ParsingCreateResponse {
|
|
|
835
870
|
created_at?: string | null;
|
|
836
871
|
|
|
837
872
|
/**
|
|
838
|
-
* Error
|
|
873
|
+
* Error details when status is FAILED
|
|
839
874
|
*/
|
|
840
875
|
error_message?: string | null;
|
|
841
876
|
|
|
842
877
|
/**
|
|
843
|
-
*
|
|
878
|
+
* Optional display name for this parse job
|
|
844
879
|
*/
|
|
845
880
|
name?: string | null;
|
|
846
881
|
|
|
882
|
+
/**
|
|
883
|
+
* Parsing tier used for this job
|
|
884
|
+
*/
|
|
885
|
+
tier?: string | null;
|
|
886
|
+
|
|
847
887
|
/**
|
|
848
888
|
* Update datetime
|
|
849
889
|
*/
|
|
@@ -851,11 +891,11 @@ export interface ParsingCreateResponse {
|
|
|
851
891
|
}
|
|
852
892
|
|
|
853
893
|
/**
|
|
854
|
-
*
|
|
894
|
+
* A parse job.
|
|
855
895
|
*/
|
|
856
896
|
export interface ParsingListResponse {
|
|
857
897
|
/**
|
|
858
|
-
* Unique
|
|
898
|
+
* Unique parse job identifier
|
|
859
899
|
*/
|
|
860
900
|
id: string;
|
|
861
901
|
|
|
@@ -865,7 +905,7 @@ export interface ParsingListResponse {
|
|
|
865
905
|
project_id: string;
|
|
866
906
|
|
|
867
907
|
/**
|
|
868
|
-
* Current
|
|
908
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
869
909
|
*/
|
|
870
910
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
871
911
|
|
|
@@ -875,15 +915,20 @@ export interface ParsingListResponse {
|
|
|
875
915
|
created_at?: string | null;
|
|
876
916
|
|
|
877
917
|
/**
|
|
878
|
-
* Error
|
|
918
|
+
* Error details when status is FAILED
|
|
879
919
|
*/
|
|
880
920
|
error_message?: string | null;
|
|
881
921
|
|
|
882
922
|
/**
|
|
883
|
-
*
|
|
923
|
+
* Optional display name for this parse job
|
|
884
924
|
*/
|
|
885
925
|
name?: string | null;
|
|
886
926
|
|
|
927
|
+
/**
|
|
928
|
+
* Parsing tier used for this job
|
|
929
|
+
*/
|
|
930
|
+
tier?: string | null;
|
|
931
|
+
|
|
887
932
|
/**
|
|
888
933
|
* Update datetime
|
|
889
934
|
*/
|
|
@@ -912,6 +957,11 @@ export interface ParsingGetResponse {
|
|
|
912
957
|
*/
|
|
913
958
|
items?: ParsingGetResponse.Items | null;
|
|
914
959
|
|
|
960
|
+
/**
|
|
961
|
+
* Job execution metadata (if requested)
|
|
962
|
+
*/
|
|
963
|
+
job_metadata?: { [key: string]: unknown } | null;
|
|
964
|
+
|
|
915
965
|
/**
|
|
916
966
|
* Markdown result (if requested)
|
|
917
967
|
*/
|
|
@@ -951,7 +1001,7 @@ export namespace ParsingGetResponse {
|
|
|
951
1001
|
*/
|
|
952
1002
|
export interface Job {
|
|
953
1003
|
/**
|
|
954
|
-
* Unique
|
|
1004
|
+
* Unique parse job identifier
|
|
955
1005
|
*/
|
|
956
1006
|
id: string;
|
|
957
1007
|
|
|
@@ -961,7 +1011,7 @@ export namespace ParsingGetResponse {
|
|
|
961
1011
|
project_id: string;
|
|
962
1012
|
|
|
963
1013
|
/**
|
|
964
|
-
* Current
|
|
1014
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
965
1015
|
*/
|
|
966
1016
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
967
1017
|
|
|
@@ -971,15 +1021,20 @@ export namespace ParsingGetResponse {
|
|
|
971
1021
|
created_at?: string | null;
|
|
972
1022
|
|
|
973
1023
|
/**
|
|
974
|
-
* Error
|
|
1024
|
+
* Error details when status is FAILED
|
|
975
1025
|
*/
|
|
976
1026
|
error_message?: string | null;
|
|
977
1027
|
|
|
978
1028
|
/**
|
|
979
|
-
*
|
|
1029
|
+
* Optional display name for this parse job
|
|
980
1030
|
*/
|
|
981
1031
|
name?: string | null;
|
|
982
1032
|
|
|
1033
|
+
/**
|
|
1034
|
+
* Parsing tier used for this job
|
|
1035
|
+
*/
|
|
1036
|
+
tier?: string | null;
|
|
1037
|
+
|
|
983
1038
|
/**
|
|
984
1039
|
* Update datetime
|
|
985
1040
|
*/
|
|
@@ -1297,12 +1352,15 @@ export namespace ParsingGetResponse {
|
|
|
1297
1352
|
|
|
1298
1353
|
export interface ParsingCreateParams {
|
|
1299
1354
|
/**
|
|
1300
|
-
* Body param:
|
|
1355
|
+
* Body param: Parsing tier: 'fast' (rule-based, cheapest), 'cost_effective'
|
|
1356
|
+
* (balanced), 'agentic' (AI-powered with custom prompts), or 'agentic_plus'
|
|
1357
|
+
* (premium AI with highest accuracy)
|
|
1301
1358
|
*/
|
|
1302
1359
|
tier: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus';
|
|
1303
1360
|
|
|
1304
1361
|
/**
|
|
1305
|
-
* Body param:
|
|
1362
|
+
* Body param: Tier version. Use 'latest' for the current stable version, or
|
|
1363
|
+
* specify a specific version (e.g., '1.0', '2.0') for reproducible results
|
|
1306
1364
|
*/
|
|
1307
1365
|
version:
|
|
1308
1366
|
| '2025-12-11'
|
|
@@ -1324,6 +1382,22 @@ export interface ParsingCreateParams {
|
|
|
1324
1382
|
| '2026-03-02'
|
|
1325
1383
|
| '2026-03-03'
|
|
1326
1384
|
| '2026-03-04'
|
|
1385
|
+
| '2026-03-05'
|
|
1386
|
+
| '2026-03-09'
|
|
1387
|
+
| '2026-03-10'
|
|
1388
|
+
| '2026-03-11'
|
|
1389
|
+
| '2026-03-12'
|
|
1390
|
+
| '2026-03-17'
|
|
1391
|
+
| '2026-03-19'
|
|
1392
|
+
| '2026-03-20'
|
|
1393
|
+
| '2026-03-22'
|
|
1394
|
+
| '2026-03-23'
|
|
1395
|
+
| '2026-03-24'
|
|
1396
|
+
| '2026-03-25'
|
|
1397
|
+
| '2026-03-26'
|
|
1398
|
+
| '2026-03-27'
|
|
1399
|
+
| '2026-03-30'
|
|
1400
|
+
| '2026-03-31'
|
|
1327
1401
|
| 'latest'
|
|
1328
1402
|
| (string & {});
|
|
1329
1403
|
|
|
@@ -1338,296 +1412,341 @@ export interface ParsingCreateParams {
|
|
|
1338
1412
|
project_id?: string | null;
|
|
1339
1413
|
|
|
1340
1414
|
/**
|
|
1341
|
-
* Body param: Options for
|
|
1415
|
+
* Body param: Options for AI-powered parsing tiers (cost_effective, agentic,
|
|
1416
|
+
* agentic_plus).
|
|
1417
|
+
*
|
|
1418
|
+
* These options customize how the AI processes and interprets document content.
|
|
1419
|
+
* Only applicable when using non-fast tiers.
|
|
1342
1420
|
*/
|
|
1343
1421
|
agentic_options?: ParsingCreateParams.AgenticOptions | null;
|
|
1344
1422
|
|
|
1345
1423
|
/**
|
|
1346
|
-
* Body param:
|
|
1424
|
+
* Body param: Identifier for the client/application making the request. Used for
|
|
1425
|
+
* analytics and debugging. Example: 'my-app-v2'
|
|
1347
1426
|
*/
|
|
1348
1427
|
client_name?: string | null;
|
|
1349
1428
|
|
|
1350
1429
|
/**
|
|
1351
|
-
* Body param:
|
|
1430
|
+
* Body param: Crop boundaries to process only a portion of each page. Values are
|
|
1431
|
+
* ratios 0-1 from page edges
|
|
1352
1432
|
*/
|
|
1353
1433
|
crop_box?: ParsingCreateParams.CropBox;
|
|
1354
1434
|
|
|
1355
1435
|
/**
|
|
1356
|
-
* Body param:
|
|
1436
|
+
* Body param: Bypass result caching and force re-parsing. Use when document
|
|
1437
|
+
* content may have changed or you need fresh results
|
|
1357
1438
|
*/
|
|
1358
1439
|
disable_cache?: boolean | null;
|
|
1359
1440
|
|
|
1360
1441
|
/**
|
|
1361
|
-
* Body param: Options for fast tier parsing (
|
|
1442
|
+
* Body param: Options for fast tier parsing (rule-based, no AI).
|
|
1443
|
+
*
|
|
1444
|
+
* Fast tier uses deterministic algorithms for text extraction without AI
|
|
1445
|
+
* enhancement. It's the fastest and most cost-effective option, best suited for
|
|
1446
|
+
* simple documents with standard layouts. Currently has no configurable options
|
|
1447
|
+
* but reserved for future expansion.
|
|
1362
1448
|
*/
|
|
1363
1449
|
fast_options?: unknown | null;
|
|
1364
1450
|
|
|
1365
1451
|
/**
|
|
1366
|
-
* Body param: ID of an existing file in the project to parse
|
|
1452
|
+
* Body param: ID of an existing file in the project to parse. Mutually exclusive
|
|
1453
|
+
* with source_url
|
|
1367
1454
|
*/
|
|
1368
1455
|
file_id?: string | null;
|
|
1369
1456
|
|
|
1370
1457
|
/**
|
|
1371
|
-
* Body param: HTTP proxy
|
|
1458
|
+
* Body param: HTTP/HTTPS proxy for fetching source_url. Ignored if using file_id
|
|
1372
1459
|
*/
|
|
1373
1460
|
http_proxy?: string | null;
|
|
1374
1461
|
|
|
1375
1462
|
/**
|
|
1376
|
-
* Body param:
|
|
1463
|
+
* Body param: Format-specific options (HTML, PDF, spreadsheet, presentation).
|
|
1464
|
+
* Applied based on detected input file type
|
|
1377
1465
|
*/
|
|
1378
1466
|
input_options?: ParsingCreateParams.InputOptions;
|
|
1379
1467
|
|
|
1380
1468
|
/**
|
|
1381
|
-
* Body param: Output
|
|
1469
|
+
* Body param: Output formatting options for markdown, text, and extracted images
|
|
1382
1470
|
*/
|
|
1383
1471
|
output_options?: ParsingCreateParams.OutputOptions;
|
|
1384
1472
|
|
|
1385
1473
|
/**
|
|
1386
|
-
* Body param: Page
|
|
1474
|
+
* Body param: Page selection: limit total pages or specify exact pages to process
|
|
1387
1475
|
*/
|
|
1388
1476
|
page_ranges?: ParsingCreateParams.PageRanges;
|
|
1389
1477
|
|
|
1390
1478
|
/**
|
|
1391
|
-
* Body param: Job
|
|
1479
|
+
* Body param: Job execution controls including timeouts and failure thresholds
|
|
1392
1480
|
*/
|
|
1393
1481
|
processing_control?: ParsingCreateParams.ProcessingControl;
|
|
1394
1482
|
|
|
1395
1483
|
/**
|
|
1396
|
-
* Body param:
|
|
1484
|
+
* Body param: Document processing options including OCR, table extraction, and
|
|
1485
|
+
* chart parsing
|
|
1397
1486
|
*/
|
|
1398
1487
|
processing_options?: ParsingCreateParams.ProcessingOptions;
|
|
1399
1488
|
|
|
1400
1489
|
/**
|
|
1401
|
-
* Body param:
|
|
1490
|
+
* Body param: Public URL of the document to parse. Mutually exclusive with file_id
|
|
1402
1491
|
*/
|
|
1403
1492
|
source_url?: string | null;
|
|
1404
1493
|
|
|
1405
1494
|
/**
|
|
1406
|
-
* Body param:
|
|
1495
|
+
* Body param: Webhook endpoints for job status notifications. Multiple webhooks
|
|
1496
|
+
* can be configured for different events or services
|
|
1407
1497
|
*/
|
|
1408
1498
|
webhook_configurations?: Array<ParsingCreateParams.WebhookConfiguration>;
|
|
1409
1499
|
}
|
|
1410
1500
|
|
|
1411
1501
|
export namespace ParsingCreateParams {
|
|
1412
1502
|
/**
|
|
1413
|
-
* Options for
|
|
1503
|
+
* Options for AI-powered parsing tiers (cost_effective, agentic, agentic_plus).
|
|
1504
|
+
*
|
|
1505
|
+
* These options customize how the AI processes and interprets document content.
|
|
1506
|
+
* Only applicable when using non-fast tiers.
|
|
1414
1507
|
*/
|
|
1415
1508
|
export interface AgenticOptions {
|
|
1416
1509
|
/**
|
|
1417
|
-
* Custom
|
|
1510
|
+
* Custom instructions for the AI parser. Use to guide extraction behavior, specify
|
|
1511
|
+
* output formatting, or provide domain-specific context. Example: 'Extract
|
|
1512
|
+
* financial tables with currency symbols. Format dates as YYYY-MM-DD.'
|
|
1418
1513
|
*/
|
|
1419
1514
|
custom_prompt?: string | null;
|
|
1420
1515
|
}
|
|
1421
1516
|
|
|
1422
1517
|
/**
|
|
1423
|
-
*
|
|
1518
|
+
* Crop boundaries to process only a portion of each page. Values are ratios 0-1
|
|
1519
|
+
* from page edges
|
|
1424
1520
|
*/
|
|
1425
1521
|
export interface CropBox {
|
|
1426
1522
|
/**
|
|
1427
|
-
* Bottom boundary
|
|
1523
|
+
* Bottom boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content below this
|
|
1524
|
+
* line is excluded
|
|
1428
1525
|
*/
|
|
1429
1526
|
bottom?: number | null;
|
|
1430
1527
|
|
|
1431
1528
|
/**
|
|
1432
|
-
* Left boundary
|
|
1529
|
+
* Left boundary as ratio (0-1). 0=left edge, 1=right edge. Content left of this
|
|
1530
|
+
* line is excluded
|
|
1433
1531
|
*/
|
|
1434
1532
|
left?: number | null;
|
|
1435
1533
|
|
|
1436
1534
|
/**
|
|
1437
|
-
* Right boundary
|
|
1535
|
+
* Right boundary as ratio (0-1). 0=left edge, 1=right edge. Content right of this
|
|
1536
|
+
* line is excluded
|
|
1438
1537
|
*/
|
|
1439
1538
|
right?: number | null;
|
|
1440
1539
|
|
|
1441
1540
|
/**
|
|
1442
|
-
* Top boundary
|
|
1541
|
+
* Top boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content above this line
|
|
1542
|
+
* is excluded
|
|
1443
1543
|
*/
|
|
1444
1544
|
top?: number | null;
|
|
1445
1545
|
}
|
|
1446
1546
|
|
|
1447
1547
|
/**
|
|
1448
|
-
*
|
|
1548
|
+
* Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on
|
|
1549
|
+
* detected input file type
|
|
1449
1550
|
*/
|
|
1450
1551
|
export interface InputOptions {
|
|
1451
1552
|
/**
|
|
1452
|
-
* HTML
|
|
1553
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
1453
1554
|
*/
|
|
1454
1555
|
html?: InputOptions.HTML;
|
|
1455
1556
|
|
|
1456
1557
|
/**
|
|
1457
|
-
* PDF-specific parsing options
|
|
1558
|
+
* PDF-specific parsing options (applies to .pdf files)
|
|
1458
1559
|
*/
|
|
1459
1560
|
pdf?: unknown;
|
|
1460
1561
|
|
|
1461
1562
|
/**
|
|
1462
|
-
* Presentation
|
|
1563
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
1463
1564
|
*/
|
|
1464
1565
|
presentation?: InputOptions.Presentation;
|
|
1465
1566
|
|
|
1466
1567
|
/**
|
|
1467
|
-
* Spreadsheet
|
|
1568
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
1468
1569
|
*/
|
|
1469
1570
|
spreadsheet?: InputOptions.Spreadsheet;
|
|
1470
1571
|
}
|
|
1471
1572
|
|
|
1472
1573
|
export namespace InputOptions {
|
|
1473
1574
|
/**
|
|
1474
|
-
* HTML
|
|
1575
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
1475
1576
|
*/
|
|
1476
1577
|
export interface HTML {
|
|
1477
1578
|
/**
|
|
1478
|
-
*
|
|
1579
|
+
* Force all HTML elements to be visible by overriding CSS display/visibility
|
|
1580
|
+
* properties. Useful for parsing pages with hidden content or collapsed sections
|
|
1479
1581
|
*/
|
|
1480
1582
|
make_all_elements_visible?: boolean | null;
|
|
1481
1583
|
|
|
1482
1584
|
/**
|
|
1483
|
-
* Remove fixed
|
|
1585
|
+
* Remove fixed-position elements (headers, footers, floating buttons) that appear
|
|
1586
|
+
* on every page render
|
|
1484
1587
|
*/
|
|
1485
1588
|
remove_fixed_elements?: boolean | null;
|
|
1486
1589
|
|
|
1487
1590
|
/**
|
|
1488
|
-
* Remove navigation elements
|
|
1591
|
+
* Remove navigation elements (nav bars, sidebars, menus) to focus on main content
|
|
1489
1592
|
*/
|
|
1490
1593
|
remove_navigation_elements?: boolean | null;
|
|
1491
1594
|
}
|
|
1492
1595
|
|
|
1493
1596
|
/**
|
|
1494
|
-
* Presentation
|
|
1597
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
1495
1598
|
*/
|
|
1496
1599
|
export interface Presentation {
|
|
1497
1600
|
/**
|
|
1498
|
-
* Extract
|
|
1601
|
+
* Extract content positioned outside the visible slide area. Some presentations
|
|
1602
|
+
* have hidden notes or content that extends beyond slide boundaries
|
|
1499
1603
|
*/
|
|
1500
1604
|
out_of_bounds_content?: boolean | null;
|
|
1501
1605
|
|
|
1502
1606
|
/**
|
|
1503
|
-
* Skip extraction of embedded data
|
|
1607
|
+
* Skip extraction of embedded chart data tables. When true, only the visual
|
|
1608
|
+
* representation of charts is captured, not the underlying data
|
|
1504
1609
|
*/
|
|
1505
1610
|
skip_embedded_data?: boolean | null;
|
|
1506
1611
|
}
|
|
1507
1612
|
|
|
1508
1613
|
/**
|
|
1509
|
-
* Spreadsheet
|
|
1614
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
1510
1615
|
*/
|
|
1511
1616
|
export interface Spreadsheet {
|
|
1512
1617
|
/**
|
|
1513
|
-
* Detect and extract
|
|
1618
|
+
* Detect and extract multiple tables within a single sheet. Useful when
|
|
1619
|
+
* spreadsheets contain several data regions separated by blank rows/columns
|
|
1514
1620
|
*/
|
|
1515
1621
|
detect_sub_tables_in_sheets?: boolean | null;
|
|
1516
1622
|
|
|
1517
1623
|
/**
|
|
1518
|
-
*
|
|
1624
|
+
* Compute formula results instead of extracting formula text. Use when you need
|
|
1625
|
+
* calculated values rather than formula definitions
|
|
1519
1626
|
*/
|
|
1520
1627
|
force_formula_computation_in_sheets?: boolean | null;
|
|
1521
1628
|
|
|
1522
1629
|
/**
|
|
1523
|
-
*
|
|
1630
|
+
* Parse hidden sheets in addition to visible ones. By default, hidden sheets are
|
|
1631
|
+
* skipped
|
|
1524
1632
|
*/
|
|
1525
1633
|
include_hidden_sheets?: boolean | null;
|
|
1526
1634
|
}
|
|
1527
1635
|
}
|
|
1528
1636
|
|
|
1529
1637
|
/**
|
|
1530
|
-
* Output
|
|
1638
|
+
* Output formatting options for markdown, text, and extracted images
|
|
1531
1639
|
*/
|
|
1532
1640
|
export interface OutputOptions {
|
|
1533
1641
|
/**
|
|
1534
|
-
* Extract printed page
|
|
1642
|
+
* Extract the printed page number as it appears in the document (e.g., 'Page 5 of
|
|
1643
|
+
* 10', 'v', 'A-3'). Useful for referencing original page numbers
|
|
1535
1644
|
*/
|
|
1536
1645
|
extract_printed_page_number?: boolean | null;
|
|
1537
1646
|
|
|
1538
1647
|
/**
|
|
1539
|
-
* Image categories to save: 'screenshot' (full page
|
|
1540
|
-
*
|
|
1541
|
-
*
|
|
1648
|
+
* Image categories to extract and save. Options: 'screenshot' (full page renders
|
|
1649
|
+
* useful for visual QA), 'embedded' (images found within the document), 'layout'
|
|
1650
|
+
* (cropped regions from layout detection like figures and diagrams). Empty list
|
|
1651
|
+
* saves no images
|
|
1542
1652
|
*/
|
|
1543
1653
|
images_to_save?: Array<'screenshot' | 'embedded' | 'layout'>;
|
|
1544
1654
|
|
|
1545
1655
|
/**
|
|
1546
|
-
* Markdown
|
|
1656
|
+
* Markdown formatting options including table styles and link annotations
|
|
1547
1657
|
*/
|
|
1548
1658
|
markdown?: OutputOptions.Markdown;
|
|
1549
1659
|
|
|
1550
1660
|
/**
|
|
1551
|
-
* Spatial text output options
|
|
1661
|
+
* Spatial text output options for preserving document layout structure
|
|
1552
1662
|
*/
|
|
1553
1663
|
spatial_text?: OutputOptions.SpatialText;
|
|
1554
1664
|
|
|
1555
1665
|
/**
|
|
1556
|
-
*
|
|
1666
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
1557
1667
|
*/
|
|
1558
1668
|
tables_as_spreadsheet?: OutputOptions.TablesAsSpreadsheet;
|
|
1559
1669
|
}
|
|
1560
1670
|
|
|
1561
1671
|
export namespace OutputOptions {
|
|
1562
1672
|
/**
|
|
1563
|
-
* Markdown
|
|
1673
|
+
* Markdown formatting options including table styles and link annotations
|
|
1564
1674
|
*/
|
|
1565
1675
|
export interface Markdown {
|
|
1566
1676
|
/**
|
|
1567
|
-
* Add annotations to
|
|
1677
|
+
* Add link annotations to markdown output in the format [text](url). When false,
|
|
1678
|
+
* only the link text is included
|
|
1568
1679
|
*/
|
|
1569
1680
|
annotate_links?: boolean | null;
|
|
1570
1681
|
|
|
1571
1682
|
/**
|
|
1572
|
-
*
|
|
1683
|
+
* Embed images directly in markdown as base64 data URIs instead of extracting them
|
|
1684
|
+
* as separate files. Useful for self-contained markdown output
|
|
1573
1685
|
*/
|
|
1574
1686
|
inline_images?: boolean | null;
|
|
1575
1687
|
|
|
1576
1688
|
/**
|
|
1577
|
-
* Table formatting options
|
|
1689
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
1578
1690
|
*/
|
|
1579
1691
|
tables?: Markdown.Tables;
|
|
1580
1692
|
}
|
|
1581
1693
|
|
|
1582
1694
|
export namespace Markdown {
|
|
1583
1695
|
/**
|
|
1584
|
-
* Table formatting options
|
|
1696
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
1585
1697
|
*/
|
|
1586
1698
|
export interface Tables {
|
|
1587
1699
|
/**
|
|
1588
|
-
*
|
|
1700
|
+
* Remove extra whitespace padding in markdown table cells for more compact output
|
|
1589
1701
|
*/
|
|
1590
1702
|
compact_markdown_tables?: boolean | null;
|
|
1591
1703
|
|
|
1592
1704
|
/**
|
|
1593
|
-
* Separator for multiline content in markdown tables
|
|
1705
|
+
* Separator string for multiline cell content in markdown tables. Example:
|
|
1706
|
+
* '<br>' to preserve line breaks, ' ' to join with spaces
|
|
1594
1707
|
*/
|
|
1595
1708
|
markdown_table_multiline_separator?: string | null;
|
|
1596
1709
|
|
|
1597
1710
|
/**
|
|
1598
|
-
*
|
|
1711
|
+
* Automatically merge tables that span multiple pages into a single table. The
|
|
1712
|
+
* merged table appears on the first page with merged_from_pages metadata
|
|
1599
1713
|
*/
|
|
1600
1714
|
merge_continued_tables?: boolean | null;
|
|
1601
1715
|
|
|
1602
1716
|
/**
|
|
1603
|
-
* Output tables
|
|
1717
|
+
* Output tables as markdown pipe tables instead of HTML <table> tags.
|
|
1718
|
+
* Markdown tables are simpler but cannot represent complex structures like merged
|
|
1719
|
+
* cells
|
|
1604
1720
|
*/
|
|
1605
1721
|
output_tables_as_markdown?: boolean | null;
|
|
1606
1722
|
}
|
|
1607
1723
|
}
|
|
1608
1724
|
|
|
1609
1725
|
/**
|
|
1610
|
-
* Spatial text output options
|
|
1726
|
+
* Spatial text output options for preserving document layout structure
|
|
1611
1727
|
*/
|
|
1612
1728
|
export interface SpatialText {
|
|
1613
1729
|
/**
|
|
1614
|
-
* Keep column
|
|
1730
|
+
* Keep multi-column layouts intact instead of linearizing columns into sequential
|
|
1731
|
+
* text. Automatically enabled for non-fast tiers
|
|
1615
1732
|
*/
|
|
1616
1733
|
do_not_unroll_columns?: boolean | null;
|
|
1617
1734
|
|
|
1618
1735
|
/**
|
|
1619
|
-
*
|
|
1736
|
+
* Maintain consistent text column alignment across page boundaries. Automatically
|
|
1737
|
+
* enabled for document-level parsing modes
|
|
1620
1738
|
*/
|
|
1621
1739
|
preserve_layout_alignment_across_pages?: boolean | null;
|
|
1622
1740
|
|
|
1623
1741
|
/**
|
|
1624
|
-
* Include
|
|
1742
|
+
* Include text below the normal size threshold. Useful for footnotes, watermarks,
|
|
1743
|
+
* or fine print that might otherwise be filtered out
|
|
1625
1744
|
*/
|
|
1626
1745
|
preserve_very_small_text?: boolean | null;
|
|
1627
1746
|
}
|
|
1628
1747
|
|
|
1629
1748
|
/**
|
|
1630
|
-
*
|
|
1749
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
1631
1750
|
*/
|
|
1632
1751
|
export interface TablesAsSpreadsheet {
|
|
1633
1752
|
/**
|
|
@@ -1636,140 +1755,165 @@ export namespace ParsingCreateParams {
|
|
|
1636
1755
|
enable?: boolean | null;
|
|
1637
1756
|
|
|
1638
1757
|
/**
|
|
1639
|
-
* Automatically
|
|
1758
|
+
* Automatically generate descriptive sheet names from table context (headers,
|
|
1759
|
+
* surrounding text) instead of using generic names like 'Table_1'
|
|
1640
1760
|
*/
|
|
1641
1761
|
guess_sheet_name?: boolean;
|
|
1642
1762
|
}
|
|
1643
1763
|
}
|
|
1644
1764
|
|
|
1645
1765
|
/**
|
|
1646
|
-
* Page
|
|
1766
|
+
* Page selection: limit total pages or specify exact pages to process
|
|
1647
1767
|
*/
|
|
1648
1768
|
export interface PageRanges {
|
|
1649
1769
|
/**
|
|
1650
|
-
* Maximum number of pages to process
|
|
1770
|
+
* Maximum number of pages to process. Pages are processed in order starting from
|
|
1771
|
+
* page 1. If both max_pages and target_pages are set, target_pages takes
|
|
1772
|
+
* precedence
|
|
1651
1773
|
*/
|
|
1652
1774
|
max_pages?: number | null;
|
|
1653
1775
|
|
|
1654
1776
|
/**
|
|
1655
|
-
*
|
|
1777
|
+
* Comma-separated list of specific pages to process using 1-based indexing.
|
|
1778
|
+
* Supports individual pages and ranges. Examples: '1,3,5' (pages 1, 3, 5), '1-5'
|
|
1779
|
+
* (pages 1 through 5 inclusive), '1,3,5-8,10' (pages 1, 3, 5-8, and 10). Pages are
|
|
1780
|
+
* sorted and deduplicated automatically. Duplicate pages cause an error
|
|
1656
1781
|
*/
|
|
1657
1782
|
target_pages?: string | null;
|
|
1658
1783
|
}
|
|
1659
1784
|
|
|
1660
1785
|
/**
|
|
1661
|
-
* Job
|
|
1786
|
+
* Job execution controls including timeouts and failure thresholds
|
|
1662
1787
|
*/
|
|
1663
1788
|
export interface ProcessingControl {
|
|
1664
1789
|
/**
|
|
1665
|
-
*
|
|
1790
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1791
|
+
* partial results
|
|
1666
1792
|
*/
|
|
1667
1793
|
job_failure_conditions?: ProcessingControl.JobFailureConditions;
|
|
1668
1794
|
|
|
1669
1795
|
/**
|
|
1670
|
-
* Timeout
|
|
1796
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1671
1797
|
*/
|
|
1672
1798
|
timeouts?: ProcessingControl.Timeouts;
|
|
1673
1799
|
}
|
|
1674
1800
|
|
|
1675
1801
|
export namespace ProcessingControl {
|
|
1676
1802
|
/**
|
|
1677
|
-
*
|
|
1803
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1804
|
+
* partial results
|
|
1678
1805
|
*/
|
|
1679
1806
|
export interface JobFailureConditions {
|
|
1680
1807
|
/**
|
|
1681
|
-
* Maximum ratio of pages allowed to fail (0-1)
|
|
1808
|
+
* Maximum ratio of pages allowed to fail before the job fails (0-1). Example: 0.1
|
|
1809
|
+
* means job fails if more than 10% of pages fail. Default is 0.05 (5%)
|
|
1682
1810
|
*/
|
|
1683
1811
|
allowed_page_failure_ratio?: number | null;
|
|
1684
1812
|
|
|
1685
1813
|
/**
|
|
1686
|
-
* Fail job if
|
|
1814
|
+
* Fail the job if a problematic font is detected that may cause incorrect text
|
|
1815
|
+
* extraction. Buggy fonts can produce garbled or missing characters
|
|
1687
1816
|
*/
|
|
1688
1817
|
fail_on_buggy_font?: boolean | null;
|
|
1689
1818
|
|
|
1690
1819
|
/**
|
|
1691
|
-
* Fail job if image
|
|
1820
|
+
* Fail the entire job if any embedded image cannot be extracted. By default, image
|
|
1821
|
+
* extraction errors are logged but don't fail the job
|
|
1692
1822
|
*/
|
|
1693
1823
|
fail_on_image_extraction_error?: boolean | null;
|
|
1694
1824
|
|
|
1695
1825
|
/**
|
|
1696
|
-
* Fail job if image OCR
|
|
1826
|
+
* Fail the entire job if OCR fails on any image. By default, OCR errors result in
|
|
1827
|
+
* empty text for that image
|
|
1697
1828
|
*/
|
|
1698
1829
|
fail_on_image_ocr_error?: boolean | null;
|
|
1699
1830
|
|
|
1700
1831
|
/**
|
|
1701
|
-
* Fail job if markdown
|
|
1832
|
+
* Fail the entire job if markdown cannot be reconstructed for any page. By
|
|
1833
|
+
* default, failed pages use fallback text extraction
|
|
1702
1834
|
*/
|
|
1703
1835
|
fail_on_markdown_reconstruction_error?: boolean | null;
|
|
1704
1836
|
}
|
|
1705
1837
|
|
|
1706
1838
|
/**
|
|
1707
|
-
* Timeout
|
|
1839
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1708
1840
|
*/
|
|
1709
1841
|
export interface Timeouts {
|
|
1710
1842
|
/**
|
|
1711
|
-
* Base timeout in seconds (max 30 minutes)
|
|
1843
|
+
* Base timeout for the job in seconds (max 1800 = 30 minutes). This is the minimum
|
|
1844
|
+
* time allowed regardless of document size
|
|
1712
1845
|
*/
|
|
1713
1846
|
base_in_seconds?: number | null;
|
|
1714
1847
|
|
|
1715
1848
|
/**
|
|
1716
|
-
* Additional timeout per page in seconds (max 5 minutes)
|
|
1849
|
+
* Additional timeout per page in seconds (max 300 = 5 minutes). Total timeout =
|
|
1850
|
+
* base + (this value × page count)
|
|
1717
1851
|
*/
|
|
1718
1852
|
extra_time_per_page_in_seconds?: number | null;
|
|
1719
1853
|
}
|
|
1720
1854
|
}
|
|
1721
1855
|
|
|
1722
1856
|
/**
|
|
1723
|
-
*
|
|
1857
|
+
* Document processing options including OCR, table extraction, and chart parsing
|
|
1724
1858
|
*/
|
|
1725
1859
|
export interface ProcessingOptions {
|
|
1726
1860
|
/**
|
|
1727
|
-
*
|
|
1861
|
+
* Use aggressive heuristics to detect table boundaries, even without visible
|
|
1862
|
+
* borders. Useful for documents with borderless or complex tables
|
|
1728
1863
|
*/
|
|
1729
1864
|
aggressive_table_extraction?: boolean | null;
|
|
1730
1865
|
|
|
1731
1866
|
/**
|
|
1732
|
-
*
|
|
1867
|
+
* Conditional processing rules that apply different parsing options based on page
|
|
1868
|
+
* content, document structure, or filename patterns. Each entry defines trigger
|
|
1869
|
+
* conditions and the parsing configuration to apply when triggered
|
|
1733
1870
|
*/
|
|
1734
1871
|
auto_mode_configuration?: Array<ProcessingOptions.AutoModeConfiguration> | null;
|
|
1735
1872
|
|
|
1736
1873
|
/**
|
|
1737
|
-
* Cost optimizer
|
|
1874
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
1875
|
+
*
|
|
1876
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
1877
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
1878
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
1738
1879
|
*/
|
|
1739
1880
|
cost_optimizer?: ProcessingOptions.CostOptimizer | null;
|
|
1740
1881
|
|
|
1741
1882
|
/**
|
|
1742
|
-
*
|
|
1743
|
-
* table handling
|
|
1883
|
+
* Disable automatic heuristics including outlined table extraction and adaptive
|
|
1884
|
+
* long table handling. Use when heuristics produce incorrect results
|
|
1744
1885
|
*/
|
|
1745
1886
|
disable_heuristics?: boolean | null;
|
|
1746
1887
|
|
|
1747
1888
|
/**
|
|
1748
|
-
* Options for ignoring specific text types
|
|
1889
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
1749
1890
|
*/
|
|
1750
1891
|
ignore?: ProcessingOptions.Ignore;
|
|
1751
1892
|
|
|
1752
1893
|
/**
|
|
1753
|
-
* OCR configuration
|
|
1894
|
+
* OCR configuration including language detection settings
|
|
1754
1895
|
*/
|
|
1755
1896
|
ocr_parameters?: ProcessingOptions.OcrParameters;
|
|
1756
1897
|
|
|
1757
1898
|
/**
|
|
1758
|
-
* Enable
|
|
1899
|
+
* Enable AI-powered chart analysis. Modes: 'efficient' (fast, lower cost),
|
|
1900
|
+
* 'agentic' (balanced), 'agentic_plus' (highest accuracy). Automatically enables
|
|
1901
|
+
* extract_layout and precise_bounding_box when set
|
|
1759
1902
|
*/
|
|
1760
1903
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
1761
1904
|
}
|
|
1762
1905
|
|
|
1763
1906
|
export namespace ProcessingOptions {
|
|
1764
1907
|
/**
|
|
1765
|
-
* A single
|
|
1908
|
+
* A single auto mode rule with trigger conditions and parsing configuration.
|
|
1909
|
+
*
|
|
1910
|
+
* Auto mode allows conditional parsing where different configurations are applied
|
|
1911
|
+
* based on page content, structure, or filename. When triggers match, the
|
|
1912
|
+
* parsing_conf overrides default settings for that page.
|
|
1766
1913
|
*/
|
|
1767
1914
|
export interface AutoModeConfiguration {
|
|
1768
1915
|
/**
|
|
1769
|
-
*
|
|
1770
|
-
*
|
|
1771
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1772
|
-
* the V1 format expected by the llamaparse worker.
|
|
1916
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1773
1917
|
*/
|
|
1774
1918
|
parsing_conf: AutoModeConfiguration.ParsingConf;
|
|
1775
1919
|
|
|
@@ -1944,18 +2088,15 @@ export namespace ParsingCreateParams {
|
|
|
1944
2088
|
text_in_page?: string | null;
|
|
1945
2089
|
|
|
1946
2090
|
/**
|
|
1947
|
-
* How to combine multiple trigger conditions: 'and' (all must match,
|
|
1948
|
-
* 'or' (any can
|
|
2091
|
+
* How to combine multiple trigger conditions: 'and' (all conditions must match,
|
|
2092
|
+
* this is the default) or 'or' (any single condition can trigger)
|
|
1949
2093
|
*/
|
|
1950
2094
|
trigger_mode?: string | null;
|
|
1951
2095
|
}
|
|
1952
2096
|
|
|
1953
2097
|
export namespace AutoModeConfiguration {
|
|
1954
2098
|
/**
|
|
1955
|
-
*
|
|
1956
|
-
*
|
|
1957
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1958
|
-
* the V1 format expected by the llamaparse worker.
|
|
2099
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1959
2100
|
*/
|
|
1960
2101
|
export interface ParsingConf {
|
|
1961
2102
|
/**
|
|
@@ -1974,7 +2115,7 @@ export namespace ParsingCreateParams {
|
|
|
1974
2115
|
crop_box?: ParsingConf.CropBox | null;
|
|
1975
2116
|
|
|
1976
2117
|
/**
|
|
1977
|
-
* Custom
|
|
2118
|
+
* Custom AI instructions for matched pages. Overrides the base custom_prompt
|
|
1978
2119
|
*/
|
|
1979
2120
|
custom_prompt?: string | null;
|
|
1980
2121
|
|
|
@@ -2019,12 +2160,12 @@ export namespace ParsingCreateParams {
|
|
|
2019
2160
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
2020
2161
|
|
|
2021
2162
|
/**
|
|
2022
|
-
*
|
|
2163
|
+
* Override the parsing tier for matched pages. Must be paired with version
|
|
2023
2164
|
*/
|
|
2024
2165
|
tier?: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus' | null;
|
|
2025
2166
|
|
|
2026
2167
|
/**
|
|
2027
|
-
*
|
|
2168
|
+
* Tier version when overriding tier. Required when tier is specified
|
|
2028
2169
|
*/
|
|
2029
2170
|
version?:
|
|
2030
2171
|
| '2025-12-11'
|
|
@@ -2046,6 +2187,22 @@ export namespace ParsingCreateParams {
|
|
|
2046
2187
|
| '2026-03-02'
|
|
2047
2188
|
| '2026-03-03'
|
|
2048
2189
|
| '2026-03-04'
|
|
2190
|
+
| '2026-03-05'
|
|
2191
|
+
| '2026-03-09'
|
|
2192
|
+
| '2026-03-10'
|
|
2193
|
+
| '2026-03-11'
|
|
2194
|
+
| '2026-03-12'
|
|
2195
|
+
| '2026-03-17'
|
|
2196
|
+
| '2026-03-19'
|
|
2197
|
+
| '2026-03-20'
|
|
2198
|
+
| '2026-03-22'
|
|
2199
|
+
| '2026-03-23'
|
|
2200
|
+
| '2026-03-24'
|
|
2201
|
+
| '2026-03-25'
|
|
2202
|
+
| '2026-03-26'
|
|
2203
|
+
| '2026-03-27'
|
|
2204
|
+
| '2026-03-30'
|
|
2205
|
+
| '2026-03-31'
|
|
2049
2206
|
| 'latest'
|
|
2050
2207
|
| (string & {})
|
|
2051
2208
|
| null;
|
|
@@ -2130,66 +2287,100 @@ export namespace ParsingCreateParams {
|
|
|
2130
2287
|
}
|
|
2131
2288
|
|
|
2132
2289
|
/**
|
|
2133
|
-
* Cost optimizer
|
|
2290
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
2291
|
+
*
|
|
2292
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
2293
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
2294
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
2134
2295
|
*/
|
|
2135
2296
|
export interface CostOptimizer {
|
|
2136
2297
|
/**
|
|
2137
|
-
*
|
|
2138
|
-
*
|
|
2298
|
+
* Enable cost-optimized parsing. Routes simpler pages to faster processing while
|
|
2299
|
+
* complex pages use full AI analysis. May reduce speed on some documents.
|
|
2300
|
+
* IMPORTANT: Only available with 'agentic' or 'agentic_plus' tiers
|
|
2139
2301
|
*/
|
|
2140
2302
|
enable?: boolean | null;
|
|
2141
2303
|
}
|
|
2142
2304
|
|
|
2143
2305
|
/**
|
|
2144
|
-
* Options for ignoring specific text types
|
|
2306
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
2145
2307
|
*/
|
|
2146
2308
|
export interface Ignore {
|
|
2147
2309
|
/**
|
|
2148
|
-
*
|
|
2310
|
+
* Skip text rotated at an angle (not horizontal/vertical). Useful for ignoring
|
|
2311
|
+
* watermarks or decorative angled text
|
|
2149
2312
|
*/
|
|
2150
2313
|
ignore_diagonal_text?: boolean | null;
|
|
2151
2314
|
|
|
2152
2315
|
/**
|
|
2153
|
-
*
|
|
2316
|
+
* Skip text marked as hidden in the document structure. Some PDFs contain
|
|
2317
|
+
* invisible text layers used for accessibility or search indexing
|
|
2154
2318
|
*/
|
|
2155
2319
|
ignore_hidden_text?: boolean | null;
|
|
2156
2320
|
|
|
2157
2321
|
/**
|
|
2158
|
-
*
|
|
2322
|
+
* Skip OCR text extraction from embedded images. Use when images contain
|
|
2323
|
+
* irrelevant text (watermarks, logos) that shouldn't be in the output
|
|
2159
2324
|
*/
|
|
2160
2325
|
ignore_text_in_image?: boolean | null;
|
|
2161
2326
|
}
|
|
2162
2327
|
|
|
2163
2328
|
/**
|
|
2164
|
-
* OCR configuration
|
|
2329
|
+
* OCR configuration including language detection settings
|
|
2165
2330
|
*/
|
|
2166
2331
|
export interface OcrParameters {
|
|
2167
2332
|
/**
|
|
2168
|
-
*
|
|
2333
|
+
* Languages to use for OCR text recognition. Specify multiple languages if
|
|
2334
|
+
* document contains mixed-language content. Order matters - put primary language
|
|
2335
|
+
* first. Example: ['en', 'es'] for English with Spanish
|
|
2169
2336
|
*/
|
|
2170
2337
|
languages?: Array<ParsingAPI.ParsingLanguages> | null;
|
|
2171
2338
|
}
|
|
2172
2339
|
}
|
|
2173
2340
|
|
|
2341
|
+
/**
|
|
2342
|
+
* Webhook configuration for receiving parsing job notifications.
|
|
2343
|
+
*
|
|
2344
|
+
* Webhooks are called when specified events occur during job processing. Configure
|
|
2345
|
+
* multiple webhook configurations to send to different endpoints.
|
|
2346
|
+
*/
|
|
2174
2347
|
export interface WebhookConfiguration {
|
|
2175
2348
|
/**
|
|
2176
|
-
*
|
|
2349
|
+
* Events that trigger this webhook. Options: 'parse.success' (job completed),
|
|
2350
|
+
* 'parse.failure' (job failed), 'parse.partial' (some pages failed). If not
|
|
2351
|
+
* specified, webhook fires for all events
|
|
2177
2352
|
*/
|
|
2178
2353
|
webhook_events?: Array<string> | null;
|
|
2179
2354
|
|
|
2180
2355
|
/**
|
|
2181
|
-
* Custom headers to include in webhook requests
|
|
2356
|
+
* Custom HTTP headers to include in webhook requests. Use for authentication
|
|
2357
|
+
* tokens or custom routing. Example: {'Authorization': 'Bearer xyz'}
|
|
2182
2358
|
*/
|
|
2183
2359
|
webhook_headers?: { [key: string]: unknown } | null;
|
|
2184
2360
|
|
|
2185
2361
|
/**
|
|
2186
|
-
*
|
|
2362
|
+
* HTTPS URL to receive webhook POST requests. Must be publicly accessible
|
|
2187
2363
|
*/
|
|
2188
2364
|
webhook_url?: string | null;
|
|
2189
2365
|
}
|
|
2190
2366
|
}
|
|
2191
2367
|
|
|
2192
2368
|
export interface ParsingListParams extends PaginatedCursorParams {
|
|
2369
|
+
/**
|
|
2370
|
+
* Include items created at or after this timestamp (inclusive)
|
|
2371
|
+
*/
|
|
2372
|
+
created_at_on_or_after?: string | null;
|
|
2373
|
+
|
|
2374
|
+
/**
|
|
2375
|
+
* Include items created at or before this timestamp (inclusive)
|
|
2376
|
+
*/
|
|
2377
|
+
created_at_on_or_before?: string | null;
|
|
2378
|
+
|
|
2379
|
+
/**
|
|
2380
|
+
* Filter by specific job IDs
|
|
2381
|
+
*/
|
|
2382
|
+
job_ids?: Array<string> | null;
|
|
2383
|
+
|
|
2193
2384
|
organization_id?: string | null;
|
|
2194
2385
|
|
|
2195
2386
|
project_id?: string | null;
|
|
@@ -2202,10 +2393,10 @@ export interface ParsingListParams extends PaginatedCursorParams {
|
|
|
2202
2393
|
|
|
2203
2394
|
export interface ParsingGetParams {
|
|
2204
2395
|
/**
|
|
2205
|
-
* Fields to include: text, markdown, items, metadata,
|
|
2206
|
-
* markdown_content_metadata, items_content_metadata,
|
|
2207
|
-
* xlsx_content_metadata, output_pdf_content_metadata,
|
|
2208
|
-
* Metadata fields include presigned URLs.
|
|
2396
|
+
* Fields to include: text, markdown, items, metadata, job_metadata,
|
|
2397
|
+
* text_content_metadata, markdown_content_metadata, items_content_metadata,
|
|
2398
|
+
* metadata_content_metadata, xlsx_content_metadata, output_pdf_content_metadata,
|
|
2399
|
+
* images_content_metadata. Metadata fields include presigned URLs.
|
|
2209
2400
|
*/
|
|
2210
2401
|
expand?: Array<string>;
|
|
2211
2402
|
|