@llamaindex/llama-cloud 1.8.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -0
- package/README.md +10 -8
- package/client.d.mts +4 -6
- package/client.d.mts.map +1 -1
- package/client.d.ts +4 -6
- package/client.d.ts.map +1 -1
- package/client.js +7 -6
- package/client.js.map +1 -1
- package/client.mjs +7 -6
- package/client.mjs.map +1 -1
- package/core/pagination.d.mts +0 -23
- package/core/pagination.d.mts.map +1 -1
- package/core/pagination.d.ts +0 -23
- package/core/pagination.d.ts.map +1 -1
- package/core/pagination.js +1 -32
- package/core/pagination.js.map +1 -1
- package/core/pagination.mjs +0 -30
- package/core/pagination.mjs.map +1 -1
- package/package.json +12 -1
- package/resources/beta/batch/batch.d.mts +55 -30
- package/resources/beta/batch/batch.d.mts.map +1 -1
- package/resources/beta/batch/batch.d.ts +55 -30
- package/resources/beta/batch/batch.d.ts.map +1 -1
- package/resources/beta/batch/batch.js +14 -11
- package/resources/beta/batch/batch.js.map +1 -1
- package/resources/beta/batch/batch.mjs +14 -11
- package/resources/beta/batch/batch.mjs.map +1 -1
- package/resources/beta/batch/job-items.d.mts +36 -13
- package/resources/beta/batch/job-items.d.mts.map +1 -1
- package/resources/beta/batch/job-items.d.ts +36 -13
- package/resources/beta/batch/job-items.d.ts.map +1 -1
- package/resources/beta/batch/job-items.js +6 -8
- package/resources/beta/batch/job-items.js.map +1 -1
- package/resources/beta/batch/job-items.mjs +6 -8
- package/resources/beta/batch/job-items.mjs.map +1 -1
- package/resources/beta/sheets.d.mts +16 -0
- package/resources/beta/sheets.d.mts.map +1 -1
- package/resources/beta/sheets.d.ts +16 -0
- package/resources/beta/sheets.d.ts.map +1 -1
- package/resources/beta/split.d.mts +60 -16
- package/resources/beta/split.d.mts.map +1 -1
- package/resources/beta/split.d.ts +60 -16
- package/resources/beta/split.d.ts.map +1 -1
- package/resources/beta/split.js.map +1 -1
- package/resources/beta/split.mjs.map +1 -1
- package/resources/classifier/jobs.d.mts +12 -3
- package/resources/classifier/jobs.d.mts.map +1 -1
- package/resources/classifier/jobs.d.ts +12 -3
- package/resources/classifier/jobs.d.ts.map +1 -1
- package/resources/classify.d.mts +76 -29
- package/resources/classify.d.mts.map +1 -1
- package/resources/classify.d.ts +76 -29
- package/resources/classify.d.ts.map +1 -1
- package/resources/classify.js +19 -2
- package/resources/classify.js.map +1 -1
- package/resources/classify.mjs +19 -2
- package/resources/classify.mjs.map +1 -1
- package/resources/extract.d.mts +1588 -0
- package/resources/extract.d.mts.map +1 -0
- package/resources/extract.d.ts +1588 -0
- package/resources/extract.d.ts.map +1 -0
- package/resources/extract.js +217 -0
- package/resources/extract.js.map +1 -0
- package/resources/extract.mjs +213 -0
- package/resources/extract.mjs.map +1 -0
- package/resources/files.d.mts +52 -38
- package/resources/files.d.mts.map +1 -1
- package/resources/files.d.ts +52 -38
- package/resources/files.d.ts.map +1 -1
- package/resources/files.js +10 -9
- package/resources/files.js.map +1 -1
- package/resources/files.mjs +10 -9
- package/resources/files.mjs.map +1 -1
- package/resources/index.d.mts +1 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +1 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +3 -3
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/parsing.d.mts +292 -138
- package/resources/parsing.d.mts.map +1 -1
- package/resources/parsing.d.ts +292 -138
- package/resources/parsing.d.ts.map +1 -1
- package/resources/parsing.js +30 -4
- package/resources/parsing.js.map +1 -1
- package/resources/parsing.mjs +30 -4
- package/resources/parsing.mjs.map +1 -1
- package/resources/pipelines/pipelines.d.mts +59 -13
- package/resources/pipelines/pipelines.d.mts.map +1 -1
- package/resources/pipelines/pipelines.d.ts +59 -13
- package/resources/pipelines/pipelines.d.ts.map +1 -1
- package/resources/pipelines/pipelines.js +24 -9
- package/resources/pipelines/pipelines.js.map +1 -1
- package/resources/pipelines/pipelines.mjs +24 -9
- package/resources/pipelines/pipelines.mjs.map +1 -1
- package/resources/pipelines/sync.d.mts +5 -3
- package/resources/pipelines/sync.d.mts.map +1 -1
- package/resources/pipelines/sync.d.ts +5 -3
- package/resources/pipelines/sync.d.ts.map +1 -1
- package/resources/pipelines/sync.js +5 -3
- package/resources/pipelines/sync.js.map +1 -1
- package/resources/pipelines/sync.mjs +5 -3
- package/resources/pipelines/sync.mjs.map +1 -1
- package/src/client.ts +50 -15
- package/src/core/pagination.ts +0 -71
- package/src/resources/beta/batch/batch.ts +75 -30
- package/src/resources/beta/batch/job-items.ts +56 -13
- package/src/resources/beta/sheets.ts +20 -0
- package/src/resources/beta/split.ts +70 -17
- package/src/resources/classifier/jobs.ts +12 -3
- package/src/resources/classify.ts +82 -29
- package/src/resources/extract.ts +2045 -0
- package/src/resources/files.ts +52 -38
- package/src/resources/index.ts +22 -1
- package/src/resources/parsing.ts +323 -136
- package/src/resources/pipelines/pipelines.ts +80 -14
- package/src/resources/pipelines/sync.ts +5 -3
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.mts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.d.ts +0 -126
- package/resources/extraction/extraction-agents/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.js +0 -56
- package/resources/extraction/extraction-agents/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents/extraction-agents.mjs +0 -51
- package/resources/extraction/extraction-agents/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction-agents/index.d.mts +0 -3
- package/resources/extraction/extraction-agents/index.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/index.d.ts +0 -3
- package/resources/extraction/extraction-agents/index.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/index.js +0 -9
- package/resources/extraction/extraction-agents/index.js.map +0 -1
- package/resources/extraction/extraction-agents/index.mjs +0 -4
- package/resources/extraction/extraction-agents/index.mjs.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.mts +0 -75
- package/resources/extraction/extraction-agents/schema.d.mts.map +0 -1
- package/resources/extraction/extraction-agents/schema.d.ts +0 -75
- package/resources/extraction/extraction-agents/schema.d.ts.map +0 -1
- package/resources/extraction/extraction-agents/schema.js +0 -28
- package/resources/extraction/extraction-agents/schema.js.map +0 -1
- package/resources/extraction/extraction-agents/schema.mjs +0 -24
- package/resources/extraction/extraction-agents/schema.mjs.map +0 -1
- package/resources/extraction/extraction-agents.d.mts +0 -2
- package/resources/extraction/extraction-agents.d.mts.map +0 -1
- package/resources/extraction/extraction-agents.d.ts +0 -2
- package/resources/extraction/extraction-agents.d.ts.map +0 -1
- package/resources/extraction/extraction-agents.js +0 -6
- package/resources/extraction/extraction-agents.js.map +0 -1
- package/resources/extraction/extraction-agents.mjs +0 -3
- package/resources/extraction/extraction-agents.mjs.map +0 -1
- package/resources/extraction/extraction.d.mts +0 -118
- package/resources/extraction/extraction.d.mts.map +0 -1
- package/resources/extraction/extraction.d.ts +0 -118
- package/resources/extraction/extraction.d.ts.map +0 -1
- package/resources/extraction/extraction.js +0 -91
- package/resources/extraction/extraction.js.map +0 -1
- package/resources/extraction/extraction.mjs +0 -86
- package/resources/extraction/extraction.mjs.map +0 -1
- package/resources/extraction/index.d.mts +0 -5
- package/resources/extraction/index.d.mts.map +0 -1
- package/resources/extraction/index.d.ts +0 -5
- package/resources/extraction/index.d.ts.map +0 -1
- package/resources/extraction/index.js +0 -13
- package/resources/extraction/index.js.map +0 -1
- package/resources/extraction/index.mjs +0 -6
- package/resources/extraction/index.mjs.map +0 -1
- package/resources/extraction/jobs.d.mts +0 -280
- package/resources/extraction/jobs.d.mts.map +0 -1
- package/resources/extraction/jobs.d.ts +0 -280
- package/resources/extraction/jobs.d.ts.map +0 -1
- package/resources/extraction/jobs.js +0 -179
- package/resources/extraction/jobs.js.map +0 -1
- package/resources/extraction/jobs.mjs +0 -175
- package/resources/extraction/jobs.mjs.map +0 -1
- package/resources/extraction/runs.d.mts +0 -198
- package/resources/extraction/runs.d.mts.map +0 -1
- package/resources/extraction/runs.d.ts +0 -198
- package/resources/extraction/runs.d.ts.map +0 -1
- package/resources/extraction/runs.js +0 -42
- package/resources/extraction/runs.js.map +0 -1
- package/resources/extraction/runs.mjs +0 -38
- package/resources/extraction/runs.mjs.map +0 -1
- package/resources/extraction.d.mts +0 -2
- package/resources/extraction.d.mts.map +0 -1
- package/resources/extraction.d.ts +0 -2
- package/resources/extraction.d.ts.map +0 -1
- package/resources/extraction.js +0 -6
- package/resources/extraction.js.map +0 -1
- package/resources/extraction.mjs +0 -3
- package/resources/extraction.mjs.map +0 -1
- package/src/resources/extraction/extraction-agents/extraction-agents.ts +0 -196
- package/src/resources/extraction/extraction-agents/index.ts +0 -18
- package/src/resources/extraction/extraction-agents/schema.ts +0 -100
- package/src/resources/extraction/extraction-agents.ts +0 -3
- package/src/resources/extraction/extraction.ts +0 -224
- package/src/resources/extraction/index.ts +0 -34
- package/src/resources/extraction/jobs.ts +0 -414
- package/src/resources/extraction/runs.ts +0 -315
- package/src/resources/extraction.ts +0 -3
package/src/resources/parsing.ts
CHANGED
|
@@ -12,7 +12,21 @@ import { pollUntilComplete, PollingOptions, DEFAULT_TIMEOUT } from '../core/poll
|
|
|
12
12
|
|
|
13
13
|
export class Parsing extends APIResource {
|
|
14
14
|
/**
|
|
15
|
-
* Parse a file by file ID
|
|
15
|
+
* Parse a file by file ID or URL.
|
|
16
|
+
*
|
|
17
|
+
* Provide either `file_id` (a previously uploaded file) or `source_url` (a
|
|
18
|
+
* publicly accessible URL). Configure parsing with options like `tier`,
|
|
19
|
+
* `target_pages`, and `lang`.
|
|
20
|
+
*
|
|
21
|
+
* ## Tiers
|
|
22
|
+
*
|
|
23
|
+
* - `fast` — rule-based, cheapest, no AI
|
|
24
|
+
* - `cost_effective` — balanced speed and quality
|
|
25
|
+
* - `agentic` — full AI-powered parsing
|
|
26
|
+
* - `agentic_plus` — premium AI with specialized features
|
|
27
|
+
*
|
|
28
|
+
* The job runs asynchronously. Poll `GET /parse/{job_id}` with `expand=text` or
|
|
29
|
+
* `expand=markdown` to retrieve results.
|
|
16
30
|
*/
|
|
17
31
|
create(
|
|
18
32
|
params: ParsingCreateParams & { upload_file?: Uploadable },
|
|
@@ -43,8 +57,10 @@ export class Parsing extends APIResource {
|
|
|
43
57
|
}
|
|
44
58
|
|
|
45
59
|
/**
|
|
46
|
-
* List parse jobs for the current project
|
|
47
|
-
*
|
|
60
|
+
* List parse jobs for the current project.
|
|
61
|
+
*
|
|
62
|
+
* Filter by `status` or creation date range. Results are paginated — use
|
|
63
|
+
* `page_token` from the response to fetch subsequent pages.
|
|
48
64
|
*/
|
|
49
65
|
list(
|
|
50
66
|
query: ParsingListParams | null | undefined = {},
|
|
@@ -57,7 +73,17 @@ export class Parsing extends APIResource {
|
|
|
57
73
|
}
|
|
58
74
|
|
|
59
75
|
/**
|
|
60
|
-
* Retrieve parse job with optional content
|
|
76
|
+
* Retrieve a parse job with optional expanded content.
|
|
77
|
+
*
|
|
78
|
+
* By default returns job metadata only. Use `expand` to include parsed content:
|
|
79
|
+
*
|
|
80
|
+
* - `text` — plain text output
|
|
81
|
+
* - `markdown` — markdown output
|
|
82
|
+
* - `items` — structured page-by-page output
|
|
83
|
+
* - `job_metadata` — usage and processing details
|
|
84
|
+
*
|
|
85
|
+
* Content metadata fields (e.g. `text_content_metadata`) return presigned URLs for
|
|
86
|
+
* downloading large results.
|
|
61
87
|
*/
|
|
62
88
|
get(
|
|
63
89
|
jobID: string,
|
|
@@ -599,18 +625,27 @@ export type LlamaParseSupportedFileExtensions =
|
|
|
599
625
|
| '.webm';
|
|
600
626
|
|
|
601
627
|
/**
|
|
602
|
-
*
|
|
628
|
+
* A parse job (v1).
|
|
603
629
|
*/
|
|
604
630
|
export interface ParsingJob {
|
|
631
|
+
/**
|
|
632
|
+
* Unique parse job identifier
|
|
633
|
+
*/
|
|
605
634
|
id: string;
|
|
606
635
|
|
|
607
636
|
/**
|
|
608
|
-
*
|
|
637
|
+
* Current job status
|
|
609
638
|
*/
|
|
610
639
|
status: StatusEnum;
|
|
611
640
|
|
|
641
|
+
/**
|
|
642
|
+
* Machine-readable error code when failed
|
|
643
|
+
*/
|
|
612
644
|
error_code?: string | null;
|
|
613
645
|
|
|
646
|
+
/**
|
|
647
|
+
* Human-readable error details when failed
|
|
648
|
+
*/
|
|
614
649
|
error_message?: string | null;
|
|
615
650
|
}
|
|
616
651
|
|
|
@@ -811,11 +846,11 @@ export interface TextItem {
|
|
|
811
846
|
}
|
|
812
847
|
|
|
813
848
|
/**
|
|
814
|
-
*
|
|
849
|
+
* A parse job.
|
|
815
850
|
*/
|
|
816
851
|
export interface ParsingCreateResponse {
|
|
817
852
|
/**
|
|
818
|
-
* Unique
|
|
853
|
+
* Unique parse job identifier
|
|
819
854
|
*/
|
|
820
855
|
id: string;
|
|
821
856
|
|
|
@@ -825,7 +860,7 @@ export interface ParsingCreateResponse {
|
|
|
825
860
|
project_id: string;
|
|
826
861
|
|
|
827
862
|
/**
|
|
828
|
-
* Current
|
|
863
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
829
864
|
*/
|
|
830
865
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
831
866
|
|
|
@@ -835,15 +870,20 @@ export interface ParsingCreateResponse {
|
|
|
835
870
|
created_at?: string | null;
|
|
836
871
|
|
|
837
872
|
/**
|
|
838
|
-
* Error
|
|
873
|
+
* Error details when status is FAILED
|
|
839
874
|
*/
|
|
840
875
|
error_message?: string | null;
|
|
841
876
|
|
|
842
877
|
/**
|
|
843
|
-
*
|
|
878
|
+
* Optional display name for this parse job
|
|
844
879
|
*/
|
|
845
880
|
name?: string | null;
|
|
846
881
|
|
|
882
|
+
/**
|
|
883
|
+
* Parsing tier used for this job
|
|
884
|
+
*/
|
|
885
|
+
tier?: string | null;
|
|
886
|
+
|
|
847
887
|
/**
|
|
848
888
|
* Update datetime
|
|
849
889
|
*/
|
|
@@ -851,11 +891,11 @@ export interface ParsingCreateResponse {
|
|
|
851
891
|
}
|
|
852
892
|
|
|
853
893
|
/**
|
|
854
|
-
*
|
|
894
|
+
* A parse job.
|
|
855
895
|
*/
|
|
856
896
|
export interface ParsingListResponse {
|
|
857
897
|
/**
|
|
858
|
-
* Unique
|
|
898
|
+
* Unique parse job identifier
|
|
859
899
|
*/
|
|
860
900
|
id: string;
|
|
861
901
|
|
|
@@ -865,7 +905,7 @@ export interface ParsingListResponse {
|
|
|
865
905
|
project_id: string;
|
|
866
906
|
|
|
867
907
|
/**
|
|
868
|
-
* Current
|
|
908
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
869
909
|
*/
|
|
870
910
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
871
911
|
|
|
@@ -875,15 +915,20 @@ export interface ParsingListResponse {
|
|
|
875
915
|
created_at?: string | null;
|
|
876
916
|
|
|
877
917
|
/**
|
|
878
|
-
* Error
|
|
918
|
+
* Error details when status is FAILED
|
|
879
919
|
*/
|
|
880
920
|
error_message?: string | null;
|
|
881
921
|
|
|
882
922
|
/**
|
|
883
|
-
*
|
|
923
|
+
* Optional display name for this parse job
|
|
884
924
|
*/
|
|
885
925
|
name?: string | null;
|
|
886
926
|
|
|
927
|
+
/**
|
|
928
|
+
* Parsing tier used for this job
|
|
929
|
+
*/
|
|
930
|
+
tier?: string | null;
|
|
931
|
+
|
|
887
932
|
/**
|
|
888
933
|
* Update datetime
|
|
889
934
|
*/
|
|
@@ -912,6 +957,11 @@ export interface ParsingGetResponse {
|
|
|
912
957
|
*/
|
|
913
958
|
items?: ParsingGetResponse.Items | null;
|
|
914
959
|
|
|
960
|
+
/**
|
|
961
|
+
* Job execution metadata (if requested)
|
|
962
|
+
*/
|
|
963
|
+
job_metadata?: { [key: string]: unknown } | null;
|
|
964
|
+
|
|
915
965
|
/**
|
|
916
966
|
* Markdown result (if requested)
|
|
917
967
|
*/
|
|
@@ -951,7 +1001,7 @@ export namespace ParsingGetResponse {
|
|
|
951
1001
|
*/
|
|
952
1002
|
export interface Job {
|
|
953
1003
|
/**
|
|
954
|
-
* Unique
|
|
1004
|
+
* Unique parse job identifier
|
|
955
1005
|
*/
|
|
956
1006
|
id: string;
|
|
957
1007
|
|
|
@@ -961,7 +1011,7 @@ export namespace ParsingGetResponse {
|
|
|
961
1011
|
project_id: string;
|
|
962
1012
|
|
|
963
1013
|
/**
|
|
964
|
-
* Current
|
|
1014
|
+
* Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED
|
|
965
1015
|
*/
|
|
966
1016
|
status: 'PENDING' | 'RUNNING' | 'COMPLETED' | 'FAILED' | 'CANCELLED';
|
|
967
1017
|
|
|
@@ -971,15 +1021,20 @@ export namespace ParsingGetResponse {
|
|
|
971
1021
|
created_at?: string | null;
|
|
972
1022
|
|
|
973
1023
|
/**
|
|
974
|
-
* Error
|
|
1024
|
+
* Error details when status is FAILED
|
|
975
1025
|
*/
|
|
976
1026
|
error_message?: string | null;
|
|
977
1027
|
|
|
978
1028
|
/**
|
|
979
|
-
*
|
|
1029
|
+
* Optional display name for this parse job
|
|
980
1030
|
*/
|
|
981
1031
|
name?: string | null;
|
|
982
1032
|
|
|
1033
|
+
/**
|
|
1034
|
+
* Parsing tier used for this job
|
|
1035
|
+
*/
|
|
1036
|
+
tier?: string | null;
|
|
1037
|
+
|
|
983
1038
|
/**
|
|
984
1039
|
* Update datetime
|
|
985
1040
|
*/
|
|
@@ -1297,12 +1352,15 @@ export namespace ParsingGetResponse {
|
|
|
1297
1352
|
|
|
1298
1353
|
export interface ParsingCreateParams {
|
|
1299
1354
|
/**
|
|
1300
|
-
* Body param:
|
|
1355
|
+
* Body param: Parsing tier: 'fast' (rule-based, cheapest), 'cost_effective'
|
|
1356
|
+
* (balanced), 'agentic' (AI-powered with custom prompts), or 'agentic_plus'
|
|
1357
|
+
* (premium AI with highest accuracy)
|
|
1301
1358
|
*/
|
|
1302
1359
|
tier: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus';
|
|
1303
1360
|
|
|
1304
1361
|
/**
|
|
1305
|
-
* Body param:
|
|
1362
|
+
* Body param: Tier version. Use 'latest' for the current stable version, or
|
|
1363
|
+
* specify a specific version (e.g., '1.0', '2.0') for reproducible results
|
|
1306
1364
|
*/
|
|
1307
1365
|
version:
|
|
1308
1366
|
| '2025-12-11'
|
|
@@ -1324,6 +1382,20 @@ export interface ParsingCreateParams {
|
|
|
1324
1382
|
| '2026-03-02'
|
|
1325
1383
|
| '2026-03-03'
|
|
1326
1384
|
| '2026-03-04'
|
|
1385
|
+
| '2026-03-05'
|
|
1386
|
+
| '2026-03-09'
|
|
1387
|
+
| '2026-03-10'
|
|
1388
|
+
| '2026-03-11'
|
|
1389
|
+
| '2026-03-12'
|
|
1390
|
+
| '2026-03-17'
|
|
1391
|
+
| '2026-03-19'
|
|
1392
|
+
| '2026-03-20'
|
|
1393
|
+
| '2026-03-22'
|
|
1394
|
+
| '2026-03-23'
|
|
1395
|
+
| '2026-03-24'
|
|
1396
|
+
| '2026-03-25'
|
|
1397
|
+
| '2026-03-26'
|
|
1398
|
+
| '2026-03-27'
|
|
1327
1399
|
| 'latest'
|
|
1328
1400
|
| (string & {});
|
|
1329
1401
|
|
|
@@ -1338,296 +1410,341 @@ export interface ParsingCreateParams {
|
|
|
1338
1410
|
project_id?: string | null;
|
|
1339
1411
|
|
|
1340
1412
|
/**
|
|
1341
|
-
* Body param: Options for
|
|
1413
|
+
* Body param: Options for AI-powered parsing tiers (cost_effective, agentic,
|
|
1414
|
+
* agentic_plus).
|
|
1415
|
+
*
|
|
1416
|
+
* These options customize how the AI processes and interprets document content.
|
|
1417
|
+
* Only applicable when using non-fast tiers.
|
|
1342
1418
|
*/
|
|
1343
1419
|
agentic_options?: ParsingCreateParams.AgenticOptions | null;
|
|
1344
1420
|
|
|
1345
1421
|
/**
|
|
1346
|
-
* Body param:
|
|
1422
|
+
* Body param: Identifier for the client/application making the request. Used for
|
|
1423
|
+
* analytics and debugging. Example: 'my-app-v2'
|
|
1347
1424
|
*/
|
|
1348
1425
|
client_name?: string | null;
|
|
1349
1426
|
|
|
1350
1427
|
/**
|
|
1351
|
-
* Body param:
|
|
1428
|
+
* Body param: Crop boundaries to process only a portion of each page. Values are
|
|
1429
|
+
* ratios 0-1 from page edges
|
|
1352
1430
|
*/
|
|
1353
1431
|
crop_box?: ParsingCreateParams.CropBox;
|
|
1354
1432
|
|
|
1355
1433
|
/**
|
|
1356
|
-
* Body param:
|
|
1434
|
+
* Body param: Bypass result caching and force re-parsing. Use when document
|
|
1435
|
+
* content may have changed or you need fresh results
|
|
1357
1436
|
*/
|
|
1358
1437
|
disable_cache?: boolean | null;
|
|
1359
1438
|
|
|
1360
1439
|
/**
|
|
1361
|
-
* Body param: Options for fast tier parsing (
|
|
1440
|
+
* Body param: Options for fast tier parsing (rule-based, no AI).
|
|
1441
|
+
*
|
|
1442
|
+
* Fast tier uses deterministic algorithms for text extraction without AI
|
|
1443
|
+
* enhancement. It's the fastest and most cost-effective option, best suited for
|
|
1444
|
+
* simple documents with standard layouts. Currently has no configurable options
|
|
1445
|
+
* but reserved for future expansion.
|
|
1362
1446
|
*/
|
|
1363
1447
|
fast_options?: unknown | null;
|
|
1364
1448
|
|
|
1365
1449
|
/**
|
|
1366
|
-
* Body param: ID of an existing file in the project to parse
|
|
1450
|
+
* Body param: ID of an existing file in the project to parse. Mutually exclusive
|
|
1451
|
+
* with source_url
|
|
1367
1452
|
*/
|
|
1368
1453
|
file_id?: string | null;
|
|
1369
1454
|
|
|
1370
1455
|
/**
|
|
1371
|
-
* Body param: HTTP proxy
|
|
1456
|
+
* Body param: HTTP/HTTPS proxy for fetching source_url. Ignored if using file_id
|
|
1372
1457
|
*/
|
|
1373
1458
|
http_proxy?: string | null;
|
|
1374
1459
|
|
|
1375
1460
|
/**
|
|
1376
|
-
* Body param:
|
|
1461
|
+
* Body param: Format-specific options (HTML, PDF, spreadsheet, presentation).
|
|
1462
|
+
* Applied based on detected input file type
|
|
1377
1463
|
*/
|
|
1378
1464
|
input_options?: ParsingCreateParams.InputOptions;
|
|
1379
1465
|
|
|
1380
1466
|
/**
|
|
1381
|
-
* Body param: Output
|
|
1467
|
+
* Body param: Output formatting options for markdown, text, and extracted images
|
|
1382
1468
|
*/
|
|
1383
1469
|
output_options?: ParsingCreateParams.OutputOptions;
|
|
1384
1470
|
|
|
1385
1471
|
/**
|
|
1386
|
-
* Body param: Page
|
|
1472
|
+
* Body param: Page selection: limit total pages or specify exact pages to process
|
|
1387
1473
|
*/
|
|
1388
1474
|
page_ranges?: ParsingCreateParams.PageRanges;
|
|
1389
1475
|
|
|
1390
1476
|
/**
|
|
1391
|
-
* Body param: Job
|
|
1477
|
+
* Body param: Job execution controls including timeouts and failure thresholds
|
|
1392
1478
|
*/
|
|
1393
1479
|
processing_control?: ParsingCreateParams.ProcessingControl;
|
|
1394
1480
|
|
|
1395
1481
|
/**
|
|
1396
|
-
* Body param:
|
|
1482
|
+
* Body param: Document processing options including OCR, table extraction, and
|
|
1483
|
+
* chart parsing
|
|
1397
1484
|
*/
|
|
1398
1485
|
processing_options?: ParsingCreateParams.ProcessingOptions;
|
|
1399
1486
|
|
|
1400
1487
|
/**
|
|
1401
|
-
* Body param:
|
|
1488
|
+
* Body param: Public URL of the document to parse. Mutually exclusive with file_id
|
|
1402
1489
|
*/
|
|
1403
1490
|
source_url?: string | null;
|
|
1404
1491
|
|
|
1405
1492
|
/**
|
|
1406
|
-
* Body param:
|
|
1493
|
+
* Body param: Webhook endpoints for job status notifications. Multiple webhooks
|
|
1494
|
+
* can be configured for different events or services
|
|
1407
1495
|
*/
|
|
1408
1496
|
webhook_configurations?: Array<ParsingCreateParams.WebhookConfiguration>;
|
|
1409
1497
|
}
|
|
1410
1498
|
|
|
1411
1499
|
export namespace ParsingCreateParams {
|
|
1412
1500
|
/**
|
|
1413
|
-
* Options for
|
|
1501
|
+
* Options for AI-powered parsing tiers (cost_effective, agentic, agentic_plus).
|
|
1502
|
+
*
|
|
1503
|
+
* These options customize how the AI processes and interprets document content.
|
|
1504
|
+
* Only applicable when using non-fast tiers.
|
|
1414
1505
|
*/
|
|
1415
1506
|
export interface AgenticOptions {
|
|
1416
1507
|
/**
|
|
1417
|
-
* Custom
|
|
1508
|
+
* Custom instructions for the AI parser. Use to guide extraction behavior, specify
|
|
1509
|
+
* output formatting, or provide domain-specific context. Example: 'Extract
|
|
1510
|
+
* financial tables with currency symbols. Format dates as YYYY-MM-DD.'
|
|
1418
1511
|
*/
|
|
1419
1512
|
custom_prompt?: string | null;
|
|
1420
1513
|
}
|
|
1421
1514
|
|
|
1422
1515
|
/**
|
|
1423
|
-
*
|
|
1516
|
+
* Crop boundaries to process only a portion of each page. Values are ratios 0-1
|
|
1517
|
+
* from page edges
|
|
1424
1518
|
*/
|
|
1425
1519
|
export interface CropBox {
|
|
1426
1520
|
/**
|
|
1427
|
-
* Bottom boundary
|
|
1521
|
+
* Bottom boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content below this
|
|
1522
|
+
* line is excluded
|
|
1428
1523
|
*/
|
|
1429
1524
|
bottom?: number | null;
|
|
1430
1525
|
|
|
1431
1526
|
/**
|
|
1432
|
-
* Left boundary
|
|
1527
|
+
* Left boundary as ratio (0-1). 0=left edge, 1=right edge. Content left of this
|
|
1528
|
+
* line is excluded
|
|
1433
1529
|
*/
|
|
1434
1530
|
left?: number | null;
|
|
1435
1531
|
|
|
1436
1532
|
/**
|
|
1437
|
-
* Right boundary
|
|
1533
|
+
* Right boundary as ratio (0-1). 0=left edge, 1=right edge. Content right of this
|
|
1534
|
+
* line is excluded
|
|
1438
1535
|
*/
|
|
1439
1536
|
right?: number | null;
|
|
1440
1537
|
|
|
1441
1538
|
/**
|
|
1442
|
-
* Top boundary
|
|
1539
|
+
* Top boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content above this line
|
|
1540
|
+
* is excluded
|
|
1443
1541
|
*/
|
|
1444
1542
|
top?: number | null;
|
|
1445
1543
|
}
|
|
1446
1544
|
|
|
1447
1545
|
/**
|
|
1448
|
-
*
|
|
1546
|
+
* Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on
|
|
1547
|
+
* detected input file type
|
|
1449
1548
|
*/
|
|
1450
1549
|
export interface InputOptions {
|
|
1451
1550
|
/**
|
|
1452
|
-
* HTML
|
|
1551
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
1453
1552
|
*/
|
|
1454
1553
|
html?: InputOptions.HTML;
|
|
1455
1554
|
|
|
1456
1555
|
/**
|
|
1457
|
-
* PDF-specific parsing options
|
|
1556
|
+
* PDF-specific parsing options (applies to .pdf files)
|
|
1458
1557
|
*/
|
|
1459
1558
|
pdf?: unknown;
|
|
1460
1559
|
|
|
1461
1560
|
/**
|
|
1462
|
-
* Presentation
|
|
1561
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
1463
1562
|
*/
|
|
1464
1563
|
presentation?: InputOptions.Presentation;
|
|
1465
1564
|
|
|
1466
1565
|
/**
|
|
1467
|
-
* Spreadsheet
|
|
1566
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
1468
1567
|
*/
|
|
1469
1568
|
spreadsheet?: InputOptions.Spreadsheet;
|
|
1470
1569
|
}
|
|
1471
1570
|
|
|
1472
1571
|
export namespace InputOptions {
|
|
1473
1572
|
/**
|
|
1474
|
-
* HTML
|
|
1573
|
+
* HTML/web page parsing options (applies to .html, .htm files)
|
|
1475
1574
|
*/
|
|
1476
1575
|
export interface HTML {
|
|
1477
1576
|
/**
|
|
1478
|
-
*
|
|
1577
|
+
* Force all HTML elements to be visible by overriding CSS display/visibility
|
|
1578
|
+
* properties. Useful for parsing pages with hidden content or collapsed sections
|
|
1479
1579
|
*/
|
|
1480
1580
|
make_all_elements_visible?: boolean | null;
|
|
1481
1581
|
|
|
1482
1582
|
/**
|
|
1483
|
-
* Remove fixed
|
|
1583
|
+
* Remove fixed-position elements (headers, footers, floating buttons) that appear
|
|
1584
|
+
* on every page render
|
|
1484
1585
|
*/
|
|
1485
1586
|
remove_fixed_elements?: boolean | null;
|
|
1486
1587
|
|
|
1487
1588
|
/**
|
|
1488
|
-
* Remove navigation elements
|
|
1589
|
+
* Remove navigation elements (nav bars, sidebars, menus) to focus on main content
|
|
1489
1590
|
*/
|
|
1490
1591
|
remove_navigation_elements?: boolean | null;
|
|
1491
1592
|
}
|
|
1492
1593
|
|
|
1493
1594
|
/**
|
|
1494
|
-
* Presentation
|
|
1595
|
+
* Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
|
|
1495
1596
|
*/
|
|
1496
1597
|
export interface Presentation {
|
|
1497
1598
|
/**
|
|
1498
|
-
* Extract
|
|
1599
|
+
* Extract content positioned outside the visible slide area. Some presentations
|
|
1600
|
+
* have hidden notes or content that extends beyond slide boundaries
|
|
1499
1601
|
*/
|
|
1500
1602
|
out_of_bounds_content?: boolean | null;
|
|
1501
1603
|
|
|
1502
1604
|
/**
|
|
1503
|
-
* Skip extraction of embedded data
|
|
1605
|
+
* Skip extraction of embedded chart data tables. When true, only the visual
|
|
1606
|
+
* representation of charts is captured, not the underlying data
|
|
1504
1607
|
*/
|
|
1505
1608
|
skip_embedded_data?: boolean | null;
|
|
1506
1609
|
}
|
|
1507
1610
|
|
|
1508
1611
|
/**
|
|
1509
|
-
* Spreadsheet
|
|
1612
|
+
* Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
|
|
1510
1613
|
*/
|
|
1511
1614
|
export interface Spreadsheet {
|
|
1512
1615
|
/**
|
|
1513
|
-
* Detect and extract
|
|
1616
|
+
* Detect and extract multiple tables within a single sheet. Useful when
|
|
1617
|
+
* spreadsheets contain several data regions separated by blank rows/columns
|
|
1514
1618
|
*/
|
|
1515
1619
|
detect_sub_tables_in_sheets?: boolean | null;
|
|
1516
1620
|
|
|
1517
1621
|
/**
|
|
1518
|
-
*
|
|
1622
|
+
* Compute formula results instead of extracting formula text. Use when you need
|
|
1623
|
+
* calculated values rather than formula definitions
|
|
1519
1624
|
*/
|
|
1520
1625
|
force_formula_computation_in_sheets?: boolean | null;
|
|
1521
1626
|
|
|
1522
1627
|
/**
|
|
1523
|
-
*
|
|
1628
|
+
* Parse hidden sheets in addition to visible ones. By default, hidden sheets are
|
|
1629
|
+
* skipped
|
|
1524
1630
|
*/
|
|
1525
1631
|
include_hidden_sheets?: boolean | null;
|
|
1526
1632
|
}
|
|
1527
1633
|
}
|
|
1528
1634
|
|
|
1529
1635
|
/**
|
|
1530
|
-
* Output
|
|
1636
|
+
* Output formatting options for markdown, text, and extracted images
|
|
1531
1637
|
*/
|
|
1532
1638
|
export interface OutputOptions {
|
|
1533
1639
|
/**
|
|
1534
|
-
* Extract printed page
|
|
1640
|
+
* Extract the printed page number as it appears in the document (e.g., 'Page 5 of
|
|
1641
|
+
* 10', 'v', 'A-3'). Useful for referencing original page numbers
|
|
1535
1642
|
*/
|
|
1536
1643
|
extract_printed_page_number?: boolean | null;
|
|
1537
1644
|
|
|
1538
1645
|
/**
|
|
1539
|
-
* Image categories to save: 'screenshot' (full page
|
|
1540
|
-
*
|
|
1541
|
-
*
|
|
1646
|
+
* Image categories to extract and save. Options: 'screenshot' (full page renders
|
|
1647
|
+
* useful for visual QA), 'embedded' (images found within the document), 'layout'
|
|
1648
|
+
* (cropped regions from layout detection like figures and diagrams). Empty list
|
|
1649
|
+
* saves no images
|
|
1542
1650
|
*/
|
|
1543
1651
|
images_to_save?: Array<'screenshot' | 'embedded' | 'layout'>;
|
|
1544
1652
|
|
|
1545
1653
|
/**
|
|
1546
|
-
* Markdown
|
|
1654
|
+
* Markdown formatting options including table styles and link annotations
|
|
1547
1655
|
*/
|
|
1548
1656
|
markdown?: OutputOptions.Markdown;
|
|
1549
1657
|
|
|
1550
1658
|
/**
|
|
1551
|
-
* Spatial text output options
|
|
1659
|
+
* Spatial text output options for preserving document layout structure
|
|
1552
1660
|
*/
|
|
1553
1661
|
spatial_text?: OutputOptions.SpatialText;
|
|
1554
1662
|
|
|
1555
1663
|
/**
|
|
1556
|
-
*
|
|
1664
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
1557
1665
|
*/
|
|
1558
1666
|
tables_as_spreadsheet?: OutputOptions.TablesAsSpreadsheet;
|
|
1559
1667
|
}
|
|
1560
1668
|
|
|
1561
1669
|
export namespace OutputOptions {
|
|
1562
1670
|
/**
|
|
1563
|
-
* Markdown
|
|
1671
|
+
* Markdown formatting options including table styles and link annotations
|
|
1564
1672
|
*/
|
|
1565
1673
|
export interface Markdown {
|
|
1566
1674
|
/**
|
|
1567
|
-
* Add annotations to
|
|
1675
|
+
* Add link annotations to markdown output in the format [text](url). When false,
|
|
1676
|
+
* only the link text is included
|
|
1568
1677
|
*/
|
|
1569
1678
|
annotate_links?: boolean | null;
|
|
1570
1679
|
|
|
1571
1680
|
/**
|
|
1572
|
-
*
|
|
1681
|
+
* Embed images directly in markdown as base64 data URIs instead of extracting them
|
|
1682
|
+
* as separate files. Useful for self-contained markdown output
|
|
1573
1683
|
*/
|
|
1574
1684
|
inline_images?: boolean | null;
|
|
1575
1685
|
|
|
1576
1686
|
/**
|
|
1577
|
-
* Table formatting options
|
|
1687
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
1578
1688
|
*/
|
|
1579
1689
|
tables?: Markdown.Tables;
|
|
1580
1690
|
}
|
|
1581
1691
|
|
|
1582
1692
|
export namespace Markdown {
|
|
1583
1693
|
/**
|
|
1584
|
-
* Table formatting options
|
|
1694
|
+
* Table formatting options including markdown vs HTML format and merging behavior
|
|
1585
1695
|
*/
|
|
1586
1696
|
export interface Tables {
|
|
1587
1697
|
/**
|
|
1588
|
-
*
|
|
1698
|
+
* Remove extra whitespace padding in markdown table cells for more compact output
|
|
1589
1699
|
*/
|
|
1590
1700
|
compact_markdown_tables?: boolean | null;
|
|
1591
1701
|
|
|
1592
1702
|
/**
|
|
1593
|
-
* Separator for multiline content in markdown tables
|
|
1703
|
+
* Separator string for multiline cell content in markdown tables. Example:
|
|
1704
|
+
* '<br>' to preserve line breaks, ' ' to join with spaces
|
|
1594
1705
|
*/
|
|
1595
1706
|
markdown_table_multiline_separator?: string | null;
|
|
1596
1707
|
|
|
1597
1708
|
/**
|
|
1598
|
-
*
|
|
1709
|
+
* Automatically merge tables that span multiple pages into a single table. The
|
|
1710
|
+
* merged table appears on the first page with merged_from_pages metadata
|
|
1599
1711
|
*/
|
|
1600
1712
|
merge_continued_tables?: boolean | null;
|
|
1601
1713
|
|
|
1602
1714
|
/**
|
|
1603
|
-
* Output tables
|
|
1715
|
+
* Output tables as markdown pipe tables instead of HTML <table> tags.
|
|
1716
|
+
* Markdown tables are simpler but cannot represent complex structures like merged
|
|
1717
|
+
* cells
|
|
1604
1718
|
*/
|
|
1605
1719
|
output_tables_as_markdown?: boolean | null;
|
|
1606
1720
|
}
|
|
1607
1721
|
}
|
|
1608
1722
|
|
|
1609
1723
|
/**
|
|
1610
|
-
* Spatial text output options
|
|
1724
|
+
* Spatial text output options for preserving document layout structure
|
|
1611
1725
|
*/
|
|
1612
1726
|
export interface SpatialText {
|
|
1613
1727
|
/**
|
|
1614
|
-
* Keep column
|
|
1728
|
+
* Keep multi-column layouts intact instead of linearizing columns into sequential
|
|
1729
|
+
* text. Automatically enabled for non-fast tiers
|
|
1615
1730
|
*/
|
|
1616
1731
|
do_not_unroll_columns?: boolean | null;
|
|
1617
1732
|
|
|
1618
1733
|
/**
|
|
1619
|
-
*
|
|
1734
|
+
* Maintain consistent text column alignment across page boundaries. Automatically
|
|
1735
|
+
* enabled for document-level parsing modes
|
|
1620
1736
|
*/
|
|
1621
1737
|
preserve_layout_alignment_across_pages?: boolean | null;
|
|
1622
1738
|
|
|
1623
1739
|
/**
|
|
1624
|
-
* Include
|
|
1740
|
+
* Include text below the normal size threshold. Useful for footnotes, watermarks,
|
|
1741
|
+
* or fine print that might otherwise be filtered out
|
|
1625
1742
|
*/
|
|
1626
1743
|
preserve_very_small_text?: boolean | null;
|
|
1627
1744
|
}
|
|
1628
1745
|
|
|
1629
1746
|
/**
|
|
1630
|
-
*
|
|
1747
|
+
* Options for exporting tables as XLSX spreadsheets
|
|
1631
1748
|
*/
|
|
1632
1749
|
export interface TablesAsSpreadsheet {
|
|
1633
1750
|
/**
|
|
@@ -1636,140 +1753,165 @@ export namespace ParsingCreateParams {
|
|
|
1636
1753
|
enable?: boolean | null;
|
|
1637
1754
|
|
|
1638
1755
|
/**
|
|
1639
|
-
* Automatically
|
|
1756
|
+
* Automatically generate descriptive sheet names from table context (headers,
|
|
1757
|
+
* surrounding text) instead of using generic names like 'Table_1'
|
|
1640
1758
|
*/
|
|
1641
1759
|
guess_sheet_name?: boolean;
|
|
1642
1760
|
}
|
|
1643
1761
|
}
|
|
1644
1762
|
|
|
1645
1763
|
/**
|
|
1646
|
-
* Page
|
|
1764
|
+
* Page selection: limit total pages or specify exact pages to process
|
|
1647
1765
|
*/
|
|
1648
1766
|
export interface PageRanges {
|
|
1649
1767
|
/**
|
|
1650
|
-
* Maximum number of pages to process
|
|
1768
|
+
* Maximum number of pages to process. Pages are processed in order starting from
|
|
1769
|
+
* page 1. If both max_pages and target_pages are set, target_pages takes
|
|
1770
|
+
* precedence
|
|
1651
1771
|
*/
|
|
1652
1772
|
max_pages?: number | null;
|
|
1653
1773
|
|
|
1654
1774
|
/**
|
|
1655
|
-
*
|
|
1775
|
+
* Comma-separated list of specific pages to process using 1-based indexing.
|
|
1776
|
+
* Supports individual pages and ranges. Examples: '1,3,5' (pages 1, 3, 5), '1-5'
|
|
1777
|
+
* (pages 1 through 5 inclusive), '1,3,5-8,10' (pages 1, 3, 5-8, and 10). Pages are
|
|
1778
|
+
* sorted and deduplicated automatically. Duplicate pages cause an error
|
|
1656
1779
|
*/
|
|
1657
1780
|
target_pages?: string | null;
|
|
1658
1781
|
}
|
|
1659
1782
|
|
|
1660
1783
|
/**
|
|
1661
|
-
* Job
|
|
1784
|
+
* Job execution controls including timeouts and failure thresholds
|
|
1662
1785
|
*/
|
|
1663
1786
|
export interface ProcessingControl {
|
|
1664
1787
|
/**
|
|
1665
|
-
*
|
|
1788
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1789
|
+
* partial results
|
|
1666
1790
|
*/
|
|
1667
1791
|
job_failure_conditions?: ProcessingControl.JobFailureConditions;
|
|
1668
1792
|
|
|
1669
1793
|
/**
|
|
1670
|
-
* Timeout
|
|
1794
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1671
1795
|
*/
|
|
1672
1796
|
timeouts?: ProcessingControl.Timeouts;
|
|
1673
1797
|
}
|
|
1674
1798
|
|
|
1675
1799
|
export namespace ProcessingControl {
|
|
1676
1800
|
/**
|
|
1677
|
-
*
|
|
1801
|
+
* Quality thresholds that determine when a job should fail vs complete with
|
|
1802
|
+
* partial results
|
|
1678
1803
|
*/
|
|
1679
1804
|
export interface JobFailureConditions {
|
|
1680
1805
|
/**
|
|
1681
|
-
* Maximum ratio of pages allowed to fail (0-1)
|
|
1806
|
+
* Maximum ratio of pages allowed to fail before the job fails (0-1). Example: 0.1
|
|
1807
|
+
* means job fails if more than 10% of pages fail. Default is 0.05 (5%)
|
|
1682
1808
|
*/
|
|
1683
1809
|
allowed_page_failure_ratio?: number | null;
|
|
1684
1810
|
|
|
1685
1811
|
/**
|
|
1686
|
-
* Fail job if
|
|
1812
|
+
* Fail the job if a problematic font is detected that may cause incorrect text
|
|
1813
|
+
* extraction. Buggy fonts can produce garbled or missing characters
|
|
1687
1814
|
*/
|
|
1688
1815
|
fail_on_buggy_font?: boolean | null;
|
|
1689
1816
|
|
|
1690
1817
|
/**
|
|
1691
|
-
* Fail job if image
|
|
1818
|
+
* Fail the entire job if any embedded image cannot be extracted. By default, image
|
|
1819
|
+
* extraction errors are logged but don't fail the job
|
|
1692
1820
|
*/
|
|
1693
1821
|
fail_on_image_extraction_error?: boolean | null;
|
|
1694
1822
|
|
|
1695
1823
|
/**
|
|
1696
|
-
* Fail job if image OCR
|
|
1824
|
+
* Fail the entire job if OCR fails on any image. By default, OCR errors result in
|
|
1825
|
+
* empty text for that image
|
|
1697
1826
|
*/
|
|
1698
1827
|
fail_on_image_ocr_error?: boolean | null;
|
|
1699
1828
|
|
|
1700
1829
|
/**
|
|
1701
|
-
* Fail job if markdown
|
|
1830
|
+
* Fail the entire job if markdown cannot be reconstructed for any page. By
|
|
1831
|
+
* default, failed pages use fallback text extraction
|
|
1702
1832
|
*/
|
|
1703
1833
|
fail_on_markdown_reconstruction_error?: boolean | null;
|
|
1704
1834
|
}
|
|
1705
1835
|
|
|
1706
1836
|
/**
|
|
1707
|
-
* Timeout
|
|
1837
|
+
* Timeout settings for job execution. Increase for large or complex documents
|
|
1708
1838
|
*/
|
|
1709
1839
|
export interface Timeouts {
|
|
1710
1840
|
/**
|
|
1711
|
-
* Base timeout in seconds (max 30 minutes)
|
|
1841
|
+
* Base timeout for the job in seconds (max 1800 = 30 minutes). This is the minimum
|
|
1842
|
+
* time allowed regardless of document size
|
|
1712
1843
|
*/
|
|
1713
1844
|
base_in_seconds?: number | null;
|
|
1714
1845
|
|
|
1715
1846
|
/**
|
|
1716
|
-
* Additional timeout per page in seconds (max 5 minutes)
|
|
1847
|
+
* Additional timeout per page in seconds (max 300 = 5 minutes). Total timeout =
|
|
1848
|
+
* base + (this value × page count)
|
|
1717
1849
|
*/
|
|
1718
1850
|
extra_time_per_page_in_seconds?: number | null;
|
|
1719
1851
|
}
|
|
1720
1852
|
}
|
|
1721
1853
|
|
|
1722
1854
|
/**
|
|
1723
|
-
*
|
|
1855
|
+
* Document processing options including OCR, table extraction, and chart parsing
|
|
1724
1856
|
*/
|
|
1725
1857
|
export interface ProcessingOptions {
|
|
1726
1858
|
/**
|
|
1727
|
-
*
|
|
1859
|
+
* Use aggressive heuristics to detect table boundaries, even without visible
|
|
1860
|
+
* borders. Useful for documents with borderless or complex tables
|
|
1728
1861
|
*/
|
|
1729
1862
|
aggressive_table_extraction?: boolean | null;
|
|
1730
1863
|
|
|
1731
1864
|
/**
|
|
1732
|
-
*
|
|
1865
|
+
* Conditional processing rules that apply different parsing options based on page
|
|
1866
|
+
* content, document structure, or filename patterns. Each entry defines trigger
|
|
1867
|
+
* conditions and the parsing configuration to apply when triggered
|
|
1733
1868
|
*/
|
|
1734
1869
|
auto_mode_configuration?: Array<ProcessingOptions.AutoModeConfiguration> | null;
|
|
1735
1870
|
|
|
1736
1871
|
/**
|
|
1737
|
-
* Cost optimizer
|
|
1872
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
1873
|
+
*
|
|
1874
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
1875
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
1876
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
1738
1877
|
*/
|
|
1739
1878
|
cost_optimizer?: ProcessingOptions.CostOptimizer | null;
|
|
1740
1879
|
|
|
1741
1880
|
/**
|
|
1742
|
-
*
|
|
1743
|
-
* table handling
|
|
1881
|
+
* Disable automatic heuristics including outlined table extraction and adaptive
|
|
1882
|
+
* long table handling. Use when heuristics produce incorrect results
|
|
1744
1883
|
*/
|
|
1745
1884
|
disable_heuristics?: boolean | null;
|
|
1746
1885
|
|
|
1747
1886
|
/**
|
|
1748
|
-
* Options for ignoring specific text types
|
|
1887
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
1749
1888
|
*/
|
|
1750
1889
|
ignore?: ProcessingOptions.Ignore;
|
|
1751
1890
|
|
|
1752
1891
|
/**
|
|
1753
|
-
* OCR configuration
|
|
1892
|
+
* OCR configuration including language detection settings
|
|
1754
1893
|
*/
|
|
1755
1894
|
ocr_parameters?: ProcessingOptions.OcrParameters;
|
|
1756
1895
|
|
|
1757
1896
|
/**
|
|
1758
|
-
* Enable
|
|
1897
|
+
* Enable AI-powered chart analysis. Modes: 'efficient' (fast, lower cost),
|
|
1898
|
+
* 'agentic' (balanced), 'agentic_plus' (highest accuracy). Automatically enables
|
|
1899
|
+
* extract_layout and precise_bounding_box when set
|
|
1759
1900
|
*/
|
|
1760
1901
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
1761
1902
|
}
|
|
1762
1903
|
|
|
1763
1904
|
export namespace ProcessingOptions {
|
|
1764
1905
|
/**
|
|
1765
|
-
* A single
|
|
1906
|
+
* A single auto mode rule with trigger conditions and parsing configuration.
|
|
1907
|
+
*
|
|
1908
|
+
* Auto mode allows conditional parsing where different configurations are applied
|
|
1909
|
+
* based on page content, structure, or filename. When triggers match, the
|
|
1910
|
+
* parsing_conf overrides default settings for that page.
|
|
1766
1911
|
*/
|
|
1767
1912
|
export interface AutoModeConfiguration {
|
|
1768
1913
|
/**
|
|
1769
|
-
*
|
|
1770
|
-
*
|
|
1771
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1772
|
-
* the V1 format expected by the llamaparse worker.
|
|
1914
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1773
1915
|
*/
|
|
1774
1916
|
parsing_conf: AutoModeConfiguration.ParsingConf;
|
|
1775
1917
|
|
|
@@ -1944,18 +2086,15 @@ export namespace ParsingCreateParams {
|
|
|
1944
2086
|
text_in_page?: string | null;
|
|
1945
2087
|
|
|
1946
2088
|
/**
|
|
1947
|
-
* How to combine multiple trigger conditions: 'and' (all must match,
|
|
1948
|
-
* 'or' (any can
|
|
2089
|
+
* How to combine multiple trigger conditions: 'and' (all conditions must match,
|
|
2090
|
+
* this is the default) or 'or' (any single condition can trigger)
|
|
1949
2091
|
*/
|
|
1950
2092
|
trigger_mode?: string | null;
|
|
1951
2093
|
}
|
|
1952
2094
|
|
|
1953
2095
|
export namespace AutoModeConfiguration {
|
|
1954
2096
|
/**
|
|
1955
|
-
*
|
|
1956
|
-
*
|
|
1957
|
-
* This uses V2 API naming conventions. The backend service will convert these to
|
|
1958
|
-
* the V1 format expected by the llamaparse worker.
|
|
2097
|
+
* Parsing configuration to apply when trigger conditions are met
|
|
1959
2098
|
*/
|
|
1960
2099
|
export interface ParsingConf {
|
|
1961
2100
|
/**
|
|
@@ -1974,7 +2113,7 @@ export namespace ParsingCreateParams {
|
|
|
1974
2113
|
crop_box?: ParsingConf.CropBox | null;
|
|
1975
2114
|
|
|
1976
2115
|
/**
|
|
1977
|
-
* Custom
|
|
2116
|
+
* Custom AI instructions for matched pages. Overrides the base custom_prompt
|
|
1978
2117
|
*/
|
|
1979
2118
|
custom_prompt?: string | null;
|
|
1980
2119
|
|
|
@@ -2019,12 +2158,12 @@ export namespace ParsingCreateParams {
|
|
|
2019
2158
|
specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
|
|
2020
2159
|
|
|
2021
2160
|
/**
|
|
2022
|
-
*
|
|
2161
|
+
* Override the parsing tier for matched pages. Must be paired with version
|
|
2023
2162
|
*/
|
|
2024
2163
|
tier?: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus' | null;
|
|
2025
2164
|
|
|
2026
2165
|
/**
|
|
2027
|
-
*
|
|
2166
|
+
* Tier version when overriding tier. Required when tier is specified
|
|
2028
2167
|
*/
|
|
2029
2168
|
version?:
|
|
2030
2169
|
| '2025-12-11'
|
|
@@ -2046,6 +2185,20 @@ export namespace ParsingCreateParams {
|
|
|
2046
2185
|
| '2026-03-02'
|
|
2047
2186
|
| '2026-03-03'
|
|
2048
2187
|
| '2026-03-04'
|
|
2188
|
+
| '2026-03-05'
|
|
2189
|
+
| '2026-03-09'
|
|
2190
|
+
| '2026-03-10'
|
|
2191
|
+
| '2026-03-11'
|
|
2192
|
+
| '2026-03-12'
|
|
2193
|
+
| '2026-03-17'
|
|
2194
|
+
| '2026-03-19'
|
|
2195
|
+
| '2026-03-20'
|
|
2196
|
+
| '2026-03-22'
|
|
2197
|
+
| '2026-03-23'
|
|
2198
|
+
| '2026-03-24'
|
|
2199
|
+
| '2026-03-25'
|
|
2200
|
+
| '2026-03-26'
|
|
2201
|
+
| '2026-03-27'
|
|
2049
2202
|
| 'latest'
|
|
2050
2203
|
| (string & {})
|
|
2051
2204
|
| null;
|
|
@@ -2130,66 +2283,100 @@ export namespace ParsingCreateParams {
|
|
|
2130
2283
|
}
|
|
2131
2284
|
|
|
2132
2285
|
/**
|
|
2133
|
-
* Cost optimizer
|
|
2286
|
+
* Cost optimizer configuration for reducing parsing costs on simpler pages.
|
|
2287
|
+
*
|
|
2288
|
+
* When enabled, the parser analyzes each page and routes simpler pages to faster,
|
|
2289
|
+
* cheaper processing while preserving quality for complex pages. Only works with
|
|
2290
|
+
* 'agentic' or 'agentic_plus' tiers.
|
|
2134
2291
|
*/
|
|
2135
2292
|
export interface CostOptimizer {
|
|
2136
2293
|
/**
|
|
2137
|
-
*
|
|
2138
|
-
*
|
|
2294
|
+
* Enable cost-optimized parsing. Routes simpler pages to faster processing while
|
|
2295
|
+
* complex pages use full AI analysis. May reduce speed on some documents.
|
|
2296
|
+
* IMPORTANT: Only available with 'agentic' or 'agentic_plus' tiers
|
|
2139
2297
|
*/
|
|
2140
2298
|
enable?: boolean | null;
|
|
2141
2299
|
}
|
|
2142
2300
|
|
|
2143
2301
|
/**
|
|
2144
|
-
* Options for ignoring specific text types
|
|
2302
|
+
* Options for ignoring specific text types (diagonal, hidden, text in images)
|
|
2145
2303
|
*/
|
|
2146
2304
|
export interface Ignore {
|
|
2147
2305
|
/**
|
|
2148
|
-
*
|
|
2306
|
+
* Skip text rotated at an angle (not horizontal/vertical). Useful for ignoring
|
|
2307
|
+
* watermarks or decorative angled text
|
|
2149
2308
|
*/
|
|
2150
2309
|
ignore_diagonal_text?: boolean | null;
|
|
2151
2310
|
|
|
2152
2311
|
/**
|
|
2153
|
-
*
|
|
2312
|
+
* Skip text marked as hidden in the document structure. Some PDFs contain
|
|
2313
|
+
* invisible text layers used for accessibility or search indexing
|
|
2154
2314
|
*/
|
|
2155
2315
|
ignore_hidden_text?: boolean | null;
|
|
2156
2316
|
|
|
2157
2317
|
/**
|
|
2158
|
-
*
|
|
2318
|
+
* Skip OCR text extraction from embedded images. Use when images contain
|
|
2319
|
+
* irrelevant text (watermarks, logos) that shouldn't be in the output
|
|
2159
2320
|
*/
|
|
2160
2321
|
ignore_text_in_image?: boolean | null;
|
|
2161
2322
|
}
|
|
2162
2323
|
|
|
2163
2324
|
/**
|
|
2164
|
-
* OCR configuration
|
|
2325
|
+
* OCR configuration including language detection settings
|
|
2165
2326
|
*/
|
|
2166
2327
|
export interface OcrParameters {
|
|
2167
2328
|
/**
|
|
2168
|
-
*
|
|
2329
|
+
* Languages to use for OCR text recognition. Specify multiple languages if
|
|
2330
|
+
* document contains mixed-language content. Order matters - put primary language
|
|
2331
|
+
* first. Example: ['en', 'es'] for English with Spanish
|
|
2169
2332
|
*/
|
|
2170
2333
|
languages?: Array<ParsingAPI.ParsingLanguages> | null;
|
|
2171
2334
|
}
|
|
2172
2335
|
}
|
|
2173
2336
|
|
|
2337
|
+
/**
|
|
2338
|
+
* Webhook configuration for receiving parsing job notifications.
|
|
2339
|
+
*
|
|
2340
|
+
* Webhooks are called when specified events occur during job processing. Configure
|
|
2341
|
+
* multiple webhook configurations to send to different endpoints.
|
|
2342
|
+
*/
|
|
2174
2343
|
export interface WebhookConfiguration {
|
|
2175
2344
|
/**
|
|
2176
|
-
*
|
|
2345
|
+
* Events that trigger this webhook. Options: 'parse.success' (job completed),
|
|
2346
|
+
* 'parse.failure' (job failed), 'parse.partial' (some pages failed). If not
|
|
2347
|
+
* specified, webhook fires for all events
|
|
2177
2348
|
*/
|
|
2178
2349
|
webhook_events?: Array<string> | null;
|
|
2179
2350
|
|
|
2180
2351
|
/**
|
|
2181
|
-
* Custom headers to include in webhook requests
|
|
2352
|
+
* Custom HTTP headers to include in webhook requests. Use for authentication
|
|
2353
|
+
* tokens or custom routing. Example: {'Authorization': 'Bearer xyz'}
|
|
2182
2354
|
*/
|
|
2183
2355
|
webhook_headers?: { [key: string]: unknown } | null;
|
|
2184
2356
|
|
|
2185
2357
|
/**
|
|
2186
|
-
*
|
|
2358
|
+
* HTTPS URL to receive webhook POST requests. Must be publicly accessible
|
|
2187
2359
|
*/
|
|
2188
2360
|
webhook_url?: string | null;
|
|
2189
2361
|
}
|
|
2190
2362
|
}
|
|
2191
2363
|
|
|
2192
2364
|
export interface ParsingListParams extends PaginatedCursorParams {
|
|
2365
|
+
/**
|
|
2366
|
+
* Include jobs created at or after this timestamp (inclusive)
|
|
2367
|
+
*/
|
|
2368
|
+
created_at_on_or_after?: string | null;
|
|
2369
|
+
|
|
2370
|
+
/**
|
|
2371
|
+
* Include jobs created at or before this timestamp (inclusive)
|
|
2372
|
+
*/
|
|
2373
|
+
created_at_on_or_before?: string | null;
|
|
2374
|
+
|
|
2375
|
+
/**
|
|
2376
|
+
* Filter by specific job IDs
|
|
2377
|
+
*/
|
|
2378
|
+
job_ids?: Array<string> | null;
|
|
2379
|
+
|
|
2193
2380
|
organization_id?: string | null;
|
|
2194
2381
|
|
|
2195
2382
|
project_id?: string | null;
|
|
@@ -2202,10 +2389,10 @@ export interface ParsingListParams extends PaginatedCursorParams {
|
|
|
2202
2389
|
|
|
2203
2390
|
export interface ParsingGetParams {
|
|
2204
2391
|
/**
|
|
2205
|
-
* Fields to include: text, markdown, items, metadata,
|
|
2206
|
-
* markdown_content_metadata, items_content_metadata,
|
|
2207
|
-
* xlsx_content_metadata, output_pdf_content_metadata,
|
|
2208
|
-
* Metadata fields include presigned URLs.
|
|
2392
|
+
* Fields to include: text, markdown, items, metadata, job_metadata,
|
|
2393
|
+
* text_content_metadata, markdown_content_metadata, items_content_metadata,
|
|
2394
|
+
* metadata_content_metadata, xlsx_content_metadata, output_pdf_content_metadata,
|
|
2395
|
+
* images_content_metadata. Metadata fields include presigned URLs.
|
|
2209
2396
|
*/
|
|
2210
2397
|
expand?: Array<string>;
|
|
2211
2398
|
|