@llamaindex/llama-cloud 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/client.d.mts +5 -2
  3. package/client.d.mts.map +1 -1
  4. package/client.d.ts +5 -2
  5. package/client.d.ts.map +1 -1
  6. package/client.js +3 -0
  7. package/client.js.map +1 -1
  8. package/client.mjs +3 -0
  9. package/client.mjs.map +1 -1
  10. package/package.json +1 -1
  11. package/resources/beta/parse-configurations.d.mts +7 -9
  12. package/resources/beta/parse-configurations.d.mts.map +1 -1
  13. package/resources/beta/parse-configurations.d.ts +7 -9
  14. package/resources/beta/parse-configurations.d.ts.map +1 -1
  15. package/resources/beta/parse-configurations.js +7 -9
  16. package/resources/beta/parse-configurations.js.map +1 -1
  17. package/resources/beta/parse-configurations.mjs +7 -9
  18. package/resources/beta/parse-configurations.mjs.map +1 -1
  19. package/resources/beta/sheets.d.mts +2 -2
  20. package/resources/beta/sheets.d.ts +2 -2
  21. package/resources/beta/split.d.mts +2 -2
  22. package/resources/beta/split.d.ts +2 -2
  23. package/resources/classify.d.mts +22 -14
  24. package/resources/classify.d.mts.map +1 -1
  25. package/resources/classify.d.ts +22 -14
  26. package/resources/classify.d.ts.map +1 -1
  27. package/resources/classify.js +3 -3
  28. package/resources/classify.mjs +3 -3
  29. package/resources/configurations.d.mts +1095 -0
  30. package/resources/configurations.d.mts.map +1 -0
  31. package/resources/configurations.d.ts +1095 -0
  32. package/resources/configurations.d.ts.map +1 -0
  33. package/resources/configurations.js +63 -0
  34. package/resources/configurations.js.map +1 -0
  35. package/resources/configurations.mjs +59 -0
  36. package/resources/configurations.mjs.map +1 -0
  37. package/resources/extract.d.mts +29 -992
  38. package/resources/extract.d.mts.map +1 -1
  39. package/resources/extract.d.ts +29 -992
  40. package/resources/extract.d.ts.map +1 -1
  41. package/resources/extract.js +7 -8
  42. package/resources/extract.js.map +1 -1
  43. package/resources/extract.mjs +7 -8
  44. package/resources/extract.mjs.map +1 -1
  45. package/resources/files.d.mts +1 -1
  46. package/resources/files.d.ts +1 -1
  47. package/resources/files.js +1 -1
  48. package/resources/files.mjs +1 -1
  49. package/resources/index.d.mts +2 -1
  50. package/resources/index.d.mts.map +1 -1
  51. package/resources/index.d.ts +2 -1
  52. package/resources/index.d.ts.map +1 -1
  53. package/resources/index.js +3 -1
  54. package/resources/index.js.map +1 -1
  55. package/resources/index.mjs +1 -0
  56. package/resources/index.mjs.map +1 -1
  57. package/resources/parsing.d.mts +4 -4
  58. package/resources/parsing.d.mts.map +1 -1
  59. package/resources/parsing.d.ts +4 -4
  60. package/resources/parsing.d.ts.map +1 -1
  61. package/src/client.ts +35 -2
  62. package/src/resources/beta/parse-configurations.ts +7 -9
  63. package/src/resources/beta/sheets.ts +2 -2
  64. package/src/resources/beta/split.ts +2 -2
  65. package/src/resources/classify.ts +24 -14
  66. package/src/resources/configurations.ts +1443 -0
  67. package/src/resources/extract.ts +29 -1229
  68. package/src/resources/files.ts +1 -1
  69. package/src/resources/index.ts +16 -1
  70. package/src/resources/parsing.ts +6 -2
  71. package/src/version.ts +1 -1
  72. package/version.d.mts +1 -1
  73. package/version.d.ts +1 -1
  74. package/version.js +1 -1
  75. package/version.mjs +1 -1
@@ -0,0 +1,1095 @@
1
+ import { APIResource } from "../core/resource.js";
2
+ import * as ParsingAPI from "./parsing.js";
3
+ import * as SplitAPI from "./beta/split.js";
4
+ import { APIPromise } from "../core/api-promise.js";
5
+ import { PagePromise, PaginatedCursor, type PaginatedCursorParams } from "../core/pagination.js";
6
+ import { RequestOptions } from "../internal/request-options.js";
7
+ export declare class Configurations extends APIResource {
8
+ /**
9
+ * Create or update a product configuration.
10
+ *
11
+ * If a configuration with the same name already exists for this product type and
12
+ * project, it will be updated (upsert semantics).
13
+ */
14
+ create(params: ConfigurationCreateParams, options?: RequestOptions): APIPromise<ConfigurationResponse>;
15
+ /**
16
+ * Get a single product configuration by ID.
17
+ */
18
+ retrieve(configID: string, query?: ConfigurationRetrieveParams | null | undefined, options?: RequestOptions): APIPromise<ConfigurationResponse>;
19
+ /**
20
+ * Update an existing product configuration.
21
+ */
22
+ update(configID: string, params: ConfigurationUpdateParams, options?: RequestOptions): APIPromise<ConfigurationResponse>;
23
+ /**
24
+ * List product configurations for the current project.
25
+ */
26
+ list(query?: ConfigurationListParams | null | undefined, options?: RequestOptions): PagePromise<ConfigurationResponsesPaginatedCursor, ConfigurationResponse>;
27
+ /**
28
+ * Delete a product configuration.
29
+ */
30
+ delete(configID: string, params?: ConfigurationDeleteParams | null | undefined, options?: RequestOptions): APIPromise<void>;
31
+ }
32
+ export type ConfigurationResponsesPaginatedCursor = PaginatedCursor<ConfigurationResponse>;
33
+ /**
34
+ * Typed parameters for a _classify v2_ product configuration.
35
+ */
36
+ export interface ClassifyV2Parameters {
37
+ /**
38
+ * Product type.
39
+ */
40
+ product_type: 'classify_v2';
41
+ /**
42
+ * Classify rules to evaluate against the document (at least one required)
43
+ */
44
+ rules: Array<ClassifyV2Parameters.Rule>;
45
+ /**
46
+ * Classify execution mode
47
+ */
48
+ mode?: 'FAST';
49
+ /**
50
+ * Parsing configuration for classify jobs.
51
+ */
52
+ parsing_configuration?: ClassifyV2Parameters.ParsingConfiguration | null;
53
+ }
54
+ export declare namespace ClassifyV2Parameters {
55
+ /**
56
+ * A rule for classifying documents.
57
+ */
58
+ interface Rule {
59
+ /**
60
+ * Natural language criteria for matching this rule
61
+ */
62
+ description: string;
63
+ /**
64
+ * Document type to assign when rule matches
65
+ */
66
+ type: string;
67
+ }
68
+ /**
69
+ * Parsing configuration for classify jobs.
70
+ */
71
+ interface ParsingConfiguration {
72
+ /**
73
+ * ISO 639-1 language code for the document
74
+ */
75
+ lang?: string;
76
+ /**
77
+ * Maximum number of pages to process. Omit for no limit.
78
+ */
79
+ max_pages?: number | null;
80
+ /**
81
+ * Comma-separated page numbers or ranges to process (1-based). Omit to process all
82
+ * pages.
83
+ */
84
+ target_pages?: string | null;
85
+ }
86
+ }
87
+ /**
88
+ * Request body for creating a product configuration.
89
+ */
90
+ export interface ConfigurationCreate {
91
+ /**
92
+ * Human-readable name for this configuration.
93
+ */
94
+ name: string;
95
+ /**
96
+ * Product-specific configuration parameters.
97
+ */
98
+ parameters: SplitV1Parameters | ExtractV2Parameters | ClassifyV2Parameters | ParseV2Parameters | UntypedParameters;
99
+ }
100
+ /**
101
+ * Response schema for a single product configuration.
102
+ */
103
+ export interface ConfigurationResponse {
104
+ /**
105
+ * Unique configuration ID.
106
+ */
107
+ id: string;
108
+ /**
109
+ * Configuration name.
110
+ */
111
+ name: string;
112
+ /**
113
+ * Product-specific configuration parameters.
114
+ */
115
+ parameters: SplitV1Parameters | ExtractV2Parameters | ClassifyV2Parameters | ParseV2Parameters | UntypedParameters;
116
+ /**
117
+ * Product type.
118
+ */
119
+ product_type: 'split_v1' | 'extract_v2' | 'classify_v2' | 'parse_v2' | 'unknown';
120
+ /**
121
+ * Version identifier (datetime string).
122
+ */
123
+ version: string;
124
+ /**
125
+ * Creation timestamp.
126
+ */
127
+ created_at?: string | null;
128
+ /**
129
+ * Last update timestamp.
130
+ */
131
+ updated_at?: string | null;
132
+ }
133
+ /**
134
+ * Typed parameters for an _extract v2_ product configuration.
135
+ */
136
+ export interface ExtractV2Parameters {
137
+ /**
138
+ * JSON Schema defining the fields to extract. Validate with the /schema/validate
139
+ * endpoint first.
140
+ */
141
+ data_schema: {
142
+ [key: string]: {
143
+ [key: string]: unknown;
144
+ } | Array<unknown> | string | number | boolean | null;
145
+ };
146
+ /**
147
+ * Product type.
148
+ */
149
+ product_type: 'extract_v2';
150
+ /**
151
+ * Include citations in results
152
+ */
153
+ cite_sources?: boolean;
154
+ /**
155
+ * Include confidence scores in results
156
+ */
157
+ confidence_scores?: boolean;
158
+ /**
159
+ * Extract algorithm version. Use 'latest' or a date string.
160
+ */
161
+ extract_version?: string;
162
+ /**
163
+ * Granularity of extraction: per_doc returns one object per document, per_page
164
+ * returns one object per page, per_table_row returns one object per table row
165
+ */
166
+ extraction_target?: 'per_doc' | 'per_page' | 'per_table_row';
167
+ /**
168
+ * ISO 639-1 language code for the document
169
+ */
170
+ lang?: string;
171
+ /**
172
+ * Maximum number of pages to process. Omit for no limit.
173
+ */
174
+ max_pages?: number | null;
175
+ /**
176
+ * Saved parse configuration ID to control how the document is parsed before
177
+ * extraction
178
+ */
179
+ parse_config_id?: string | null;
180
+ /**
181
+ * Parse tier to use before extraction (fast, cost_effective, or agentic)
182
+ */
183
+ parse_tier?: string | null;
184
+ /**
185
+ * Custom system prompt to guide extraction behavior
186
+ */
187
+ system_prompt?: string | null;
188
+ /**
189
+ * Comma-separated page numbers or ranges to process (1-based). Omit to process all
190
+ * pages.
191
+ */
192
+ target_pages?: string | null;
193
+ /**
194
+ * Extract tier: cost_effective (5 credits/page) or agentic (15 credits/page)
195
+ */
196
+ tier?: 'cost_effective' | 'agentic';
197
+ }
198
+ /**
199
+ * Configuration for LlamaParse v2 document parsing.
200
+ *
201
+ * Includes tier selection, processing options, output formatting, page targeting,
202
+ * and webhook delivery. Refer to the LlamaParse documentation for details on each
203
+ * field.
204
+ */
205
+ export interface ParseV2Parameters {
206
+ /**
207
+ * Product type.
208
+ */
209
+ product_type: 'parse_v2';
210
+ /**
211
+ * Parsing tier: 'fast' (rule-based, cheapest), 'cost_effective' (balanced),
212
+ * 'agentic' (AI-powered with custom prompts), or 'agentic_plus' (premium AI with
213
+ * highest accuracy)
214
+ */
215
+ tier: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus';
216
+ /**
217
+ * Tier version. Use 'latest' for the current stable version, or specify a specific
218
+ * version (e.g., '1.0', '2.0') for reproducible results
219
+ */
220
+ version: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | '2026-03-02' | '2026-03-03' | '2026-03-04' | '2026-03-05' | '2026-03-09' | '2026-03-10' | '2026-03-11' | '2026-03-12' | '2026-03-17' | '2026-03-19' | '2026-03-20' | '2026-03-22' | '2026-03-23' | '2026-03-24' | '2026-03-25' | '2026-03-26' | '2026-03-27' | '2026-03-30' | '2026-03-31' | 'latest' | (string & {});
221
+ /**
222
+ * Options for AI-powered parsing tiers (cost_effective, agentic, agentic_plus).
223
+ *
224
+ * These options customize how the AI processes and interprets document content.
225
+ * Only applicable when using non-fast tiers.
226
+ */
227
+ agentic_options?: ParseV2Parameters.AgenticOptions | null;
228
+ /**
229
+ * Identifier for the client/application making the request. Used for analytics and
230
+ * debugging. Example: 'my-app-v2'
231
+ */
232
+ client_name?: string | null;
233
+ /**
234
+ * Crop boundaries to process only a portion of each page. Values are ratios 0-1
235
+ * from page edges
236
+ */
237
+ crop_box?: ParseV2Parameters.CropBox;
238
+ /**
239
+ * Bypass result caching and force re-parsing. Use when document content may have
240
+ * changed or you need fresh results
241
+ */
242
+ disable_cache?: boolean | null;
243
+ /**
244
+ * Options for fast tier parsing (rule-based, no AI).
245
+ *
246
+ * Fast tier uses deterministic algorithms for text extraction without AI
247
+ * enhancement. It's the fastest and most cost-effective option, best suited for
248
+ * simple documents with standard layouts. Currently has no configurable options
249
+ * but reserved for future expansion.
250
+ */
251
+ fast_options?: unknown | null;
252
+ /**
253
+ * Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on
254
+ * detected input file type
255
+ */
256
+ input_options?: ParseV2Parameters.InputOptions;
257
+ /**
258
+ * Output formatting options for markdown, text, and extracted images
259
+ */
260
+ output_options?: ParseV2Parameters.OutputOptions;
261
+ /**
262
+ * Page selection: limit total pages or specify exact pages to process
263
+ */
264
+ page_ranges?: ParseV2Parameters.PageRanges;
265
+ /**
266
+ * Job execution controls including timeouts and failure thresholds
267
+ */
268
+ processing_control?: ParseV2Parameters.ProcessingControl;
269
+ /**
270
+ * Document processing options including OCR, table extraction, and chart parsing
271
+ */
272
+ processing_options?: ParseV2Parameters.ProcessingOptions;
273
+ /**
274
+ * Webhook endpoints for job status notifications. Multiple webhooks can be
275
+ * configured for different events or services
276
+ */
277
+ webhook_configurations?: Array<ParseV2Parameters.WebhookConfiguration>;
278
+ }
279
+ export declare namespace ParseV2Parameters {
280
+ /**
281
+ * Options for AI-powered parsing tiers (cost_effective, agentic, agentic_plus).
282
+ *
283
+ * These options customize how the AI processes and interprets document content.
284
+ * Only applicable when using non-fast tiers.
285
+ */
286
+ interface AgenticOptions {
287
+ /**
288
+ * Custom instructions for the AI parser. Use to guide extraction behavior, specify
289
+ * output formatting, or provide domain-specific context. Example: 'Extract
290
+ * financial tables with currency symbols. Format dates as YYYY-MM-DD.'
291
+ */
292
+ custom_prompt?: string | null;
293
+ }
294
+ /**
295
+ * Crop boundaries to process only a portion of each page. Values are ratios 0-1
296
+ * from page edges
297
+ */
298
+ interface CropBox {
299
+ /**
300
+ * Bottom boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content below this
301
+ * line is excluded
302
+ */
303
+ bottom?: number | null;
304
+ /**
305
+ * Left boundary as ratio (0-1). 0=left edge, 1=right edge. Content left of this
306
+ * line is excluded
307
+ */
308
+ left?: number | null;
309
+ /**
310
+ * Right boundary as ratio (0-1). 0=left edge, 1=right edge. Content right of this
311
+ * line is excluded
312
+ */
313
+ right?: number | null;
314
+ /**
315
+ * Top boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content above this line
316
+ * is excluded
317
+ */
318
+ top?: number | null;
319
+ }
320
+ /**
321
+ * Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on
322
+ * detected input file type
323
+ */
324
+ interface InputOptions {
325
+ /**
326
+ * HTML/web page parsing options (applies to .html, .htm files)
327
+ */
328
+ html?: InputOptions.HTML;
329
+ /**
330
+ * PDF-specific parsing options (applies to .pdf files)
331
+ */
332
+ pdf?: unknown;
333
+ /**
334
+ * Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
335
+ */
336
+ presentation?: InputOptions.Presentation;
337
+ /**
338
+ * Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
339
+ */
340
+ spreadsheet?: InputOptions.Spreadsheet;
341
+ }
342
+ namespace InputOptions {
343
+ /**
344
+ * HTML/web page parsing options (applies to .html, .htm files)
345
+ */
346
+ interface HTML {
347
+ /**
348
+ * Force all HTML elements to be visible by overriding CSS display/visibility
349
+ * properties. Useful for parsing pages with hidden content or collapsed sections
350
+ */
351
+ make_all_elements_visible?: boolean | null;
352
+ /**
353
+ * Remove fixed-position elements (headers, footers, floating buttons) that appear
354
+ * on every page render
355
+ */
356
+ remove_fixed_elements?: boolean | null;
357
+ /**
358
+ * Remove navigation elements (nav bars, sidebars, menus) to focus on main content
359
+ */
360
+ remove_navigation_elements?: boolean | null;
361
+ }
362
+ /**
363
+ * Presentation parsing options (applies to .pptx, .ppt, .odp, .key files)
364
+ */
365
+ interface Presentation {
366
+ /**
367
+ * Extract content positioned outside the visible slide area. Some presentations
368
+ * have hidden notes or content that extends beyond slide boundaries
369
+ */
370
+ out_of_bounds_content?: boolean | null;
371
+ /**
372
+ * Skip extraction of embedded chart data tables. When true, only the visual
373
+ * representation of charts is captured, not the underlying data
374
+ */
375
+ skip_embedded_data?: boolean | null;
376
+ }
377
+ /**
378
+ * Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files)
379
+ */
380
+ interface Spreadsheet {
381
+ /**
382
+ * Detect and extract multiple tables within a single sheet. Useful when
383
+ * spreadsheets contain several data regions separated by blank rows/columns
384
+ */
385
+ detect_sub_tables_in_sheets?: boolean | null;
386
+ /**
387
+ * Compute formula results instead of extracting formula text. Use when you need
388
+ * calculated values rather than formula definitions
389
+ */
390
+ force_formula_computation_in_sheets?: boolean | null;
391
+ /**
392
+ * Parse hidden sheets in addition to visible ones. By default, hidden sheets are
393
+ * skipped
394
+ */
395
+ include_hidden_sheets?: boolean | null;
396
+ }
397
+ }
398
+ /**
399
+ * Output formatting options for markdown, text, and extracted images
400
+ */
401
+ interface OutputOptions {
402
+ /**
403
+ * Extract the printed page number as it appears in the document (e.g., 'Page 5 of
404
+ * 10', 'v', 'A-3'). Useful for referencing original page numbers
405
+ */
406
+ extract_printed_page_number?: boolean | null;
407
+ /**
408
+ * Image categories to extract and save. Options: 'screenshot' (full page renders
409
+ * useful for visual QA), 'embedded' (images found within the document), 'layout'
410
+ * (cropped regions from layout detection like figures and diagrams). Empty list
411
+ * saves no images
412
+ */
413
+ images_to_save?: Array<'screenshot' | 'embedded' | 'layout'>;
414
+ /**
415
+ * Markdown formatting options including table styles and link annotations
416
+ */
417
+ markdown?: OutputOptions.Markdown;
418
+ /**
419
+ * Spatial text output options for preserving document layout structure
420
+ */
421
+ spatial_text?: OutputOptions.SpatialText;
422
+ /**
423
+ * Options for exporting tables as XLSX spreadsheets
424
+ */
425
+ tables_as_spreadsheet?: OutputOptions.TablesAsSpreadsheet;
426
+ }
427
+ namespace OutputOptions {
428
+ /**
429
+ * Markdown formatting options including table styles and link annotations
430
+ */
431
+ interface Markdown {
432
+ /**
433
+ * Add link annotations to markdown output in the format [text](url). When false,
434
+ * only the link text is included
435
+ */
436
+ annotate_links?: boolean | null;
437
+ /**
438
+ * Embed images directly in markdown as base64 data URIs instead of extracting them
439
+ * as separate files. Useful for self-contained markdown output
440
+ */
441
+ inline_images?: boolean | null;
442
+ /**
443
+ * Table formatting options including markdown vs HTML format and merging behavior
444
+ */
445
+ tables?: Markdown.Tables;
446
+ }
447
+ namespace Markdown {
448
+ /**
449
+ * Table formatting options including markdown vs HTML format and merging behavior
450
+ */
451
+ interface Tables {
452
+ /**
453
+ * Remove extra whitespace padding in markdown table cells for more compact output
454
+ */
455
+ compact_markdown_tables?: boolean | null;
456
+ /**
457
+ * Separator string for multiline cell content in markdown tables. Example:
458
+ * '&lt;br&gt;' to preserve line breaks, ' ' to join with spaces
459
+ */
460
+ markdown_table_multiline_separator?: string | null;
461
+ /**
462
+ * Automatically merge tables that span multiple pages into a single table. The
463
+ * merged table appears on the first page with merged_from_pages metadata
464
+ */
465
+ merge_continued_tables?: boolean | null;
466
+ /**
467
+ * Output tables as markdown pipe tables instead of HTML &lt;table&gt; tags.
468
+ * Markdown tables are simpler but cannot represent complex structures like merged
469
+ * cells
470
+ */
471
+ output_tables_as_markdown?: boolean | null;
472
+ }
473
+ }
474
+ /**
475
+ * Spatial text output options for preserving document layout structure
476
+ */
477
+ interface SpatialText {
478
+ /**
479
+ * Keep multi-column layouts intact instead of linearizing columns into sequential
480
+ * text. Automatically enabled for non-fast tiers
481
+ */
482
+ do_not_unroll_columns?: boolean | null;
483
+ /**
484
+ * Maintain consistent text column alignment across page boundaries. Automatically
485
+ * enabled for document-level parsing modes
486
+ */
487
+ preserve_layout_alignment_across_pages?: boolean | null;
488
+ /**
489
+ * Include text below the normal size threshold. Useful for footnotes, watermarks,
490
+ * or fine print that might otherwise be filtered out
491
+ */
492
+ preserve_very_small_text?: boolean | null;
493
+ }
494
+ /**
495
+ * Options for exporting tables as XLSX spreadsheets
496
+ */
497
+ interface TablesAsSpreadsheet {
498
+ /**
499
+ * Whether this option is enabled
500
+ */
501
+ enable?: boolean | null;
502
+ /**
503
+ * Automatically generate descriptive sheet names from table context (headers,
504
+ * surrounding text) instead of using generic names like 'Table_1'
505
+ */
506
+ guess_sheet_name?: boolean;
507
+ }
508
+ }
509
+ /**
510
+ * Page selection: limit total pages or specify exact pages to process
511
+ */
512
+ interface PageRanges {
513
+ /**
514
+ * Maximum number of pages to process. Pages are processed in order starting from
515
+ * page 1. If both max_pages and target_pages are set, target_pages takes
516
+ * precedence
517
+ */
518
+ max_pages?: number | null;
519
+ /**
520
+ * Comma-separated list of specific pages to process using 1-based indexing.
521
+ * Supports individual pages and ranges. Examples: '1,3,5' (pages 1, 3, 5), '1-5'
522
+ * (pages 1 through 5 inclusive), '1,3,5-8,10' (pages 1, 3, 5-8, and 10). Pages are
523
+ * sorted and deduplicated automatically. Duplicate pages cause an error
524
+ */
525
+ target_pages?: string | null;
526
+ }
527
+ /**
528
+ * Job execution controls including timeouts and failure thresholds
529
+ */
530
+ interface ProcessingControl {
531
+ /**
532
+ * Quality thresholds that determine when a job should fail vs complete with
533
+ * partial results
534
+ */
535
+ job_failure_conditions?: ProcessingControl.JobFailureConditions;
536
+ /**
537
+ * Timeout settings for job execution. Increase for large or complex documents
538
+ */
539
+ timeouts?: ProcessingControl.Timeouts;
540
+ }
541
+ namespace ProcessingControl {
542
+ /**
543
+ * Quality thresholds that determine when a job should fail vs complete with
544
+ * partial results
545
+ */
546
+ interface JobFailureConditions {
547
+ /**
548
+ * Maximum ratio of pages allowed to fail before the job fails (0-1). Example: 0.1
549
+ * means job fails if more than 10% of pages fail. Default is 0.05 (5%)
550
+ */
551
+ allowed_page_failure_ratio?: number | null;
552
+ /**
553
+ * Fail the job if a problematic font is detected that may cause incorrect text
554
+ * extraction. Buggy fonts can produce garbled or missing characters
555
+ */
556
+ fail_on_buggy_font?: boolean | null;
557
+ /**
558
+ * Fail the entire job if any embedded image cannot be extracted. By default, image
559
+ * extraction errors are logged but don't fail the job
560
+ */
561
+ fail_on_image_extraction_error?: boolean | null;
562
+ /**
563
+ * Fail the entire job if OCR fails on any image. By default, OCR errors result in
564
+ * empty text for that image
565
+ */
566
+ fail_on_image_ocr_error?: boolean | null;
567
+ /**
568
+ * Fail the entire job if markdown cannot be reconstructed for any page. By
569
+ * default, failed pages use fallback text extraction
570
+ */
571
+ fail_on_markdown_reconstruction_error?: boolean | null;
572
+ }
573
+ /**
574
+ * Timeout settings for job execution. Increase for large or complex documents
575
+ */
576
+ interface Timeouts {
577
+ /**
578
+ * Base timeout for the job in seconds (max 1800 = 30 minutes). This is the minimum
579
+ * time allowed regardless of document size
580
+ */
581
+ base_in_seconds?: number | null;
582
+ /**
583
+ * Additional timeout per page in seconds (max 300 = 5 minutes). Total timeout =
584
+ * base + (this value × page count)
585
+ */
586
+ extra_time_per_page_in_seconds?: number | null;
587
+ }
588
+ }
589
+ /**
590
+ * Document processing options including OCR, table extraction, and chart parsing
591
+ */
592
+ interface ProcessingOptions {
593
+ /**
594
+ * Use aggressive heuristics to detect table boundaries, even without visible
595
+ * borders. Useful for documents with borderless or complex tables
596
+ */
597
+ aggressive_table_extraction?: boolean | null;
598
+ /**
599
+ * Conditional processing rules that apply different parsing options based on page
600
+ * content, document structure, or filename patterns. Each entry defines trigger
601
+ * conditions and the parsing configuration to apply when triggered
602
+ */
603
+ auto_mode_configuration?: Array<ProcessingOptions.AutoModeConfiguration> | null;
604
+ /**
605
+ * Cost optimizer configuration for reducing parsing costs on simpler pages.
606
+ *
607
+ * When enabled, the parser analyzes each page and routes simpler pages to faster,
608
+ * cheaper processing while preserving quality for complex pages. Only works with
609
+ * 'agentic' or 'agentic_plus' tiers.
610
+ */
611
+ cost_optimizer?: ProcessingOptions.CostOptimizer | null;
612
+ /**
613
+ * Disable automatic heuristics including outlined table extraction and adaptive
614
+ * long table handling. Use when heuristics produce incorrect results
615
+ */
616
+ disable_heuristics?: boolean | null;
617
+ /**
618
+ * Options for ignoring specific text types (diagonal, hidden, text in images)
619
+ */
620
+ ignore?: ProcessingOptions.Ignore;
621
+ /**
622
+ * OCR configuration including language detection settings
623
+ */
624
+ ocr_parameters?: ProcessingOptions.OcrParameters;
625
+ /**
626
+ * Enable AI-powered chart analysis. Modes: 'efficient' (fast, lower cost),
627
+ * 'agentic' (balanced), 'agentic_plus' (highest accuracy). Automatically enables
628
+ * extract_layout and precise_bounding_box when set
629
+ */
630
+ specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
631
+ }
632
+ namespace ProcessingOptions {
633
+ /**
634
+ * A single auto mode rule with trigger conditions and parsing configuration.
635
+ *
636
+ * Auto mode allows conditional parsing where different configurations are applied
637
+ * based on page content, structure, or filename. When triggers match, the
638
+ * parsing_conf overrides default settings for that page.
639
+ */
640
+ interface AutoModeConfiguration {
641
+ /**
642
+ * Parsing configuration to apply when trigger conditions are met
643
+ */
644
+ parsing_conf: AutoModeConfiguration.ParsingConf;
645
+ /**
646
+ * Single glob pattern to match against filename
647
+ */
648
+ filename_match_glob?: string | null;
649
+ /**
650
+ * List of glob patterns to match against filename
651
+ */
652
+ filename_match_glob_list?: Array<string> | null;
653
+ /**
654
+ * Regex pattern to match against filename
655
+ */
656
+ filename_regexp?: string | null;
657
+ /**
658
+ * Regex mode flags (e.g., 'i' for case-insensitive)
659
+ */
660
+ filename_regexp_mode?: string | null;
661
+ /**
662
+ * Trigger if page contains a full-page image (scanned page detection)
663
+ */
664
+ full_page_image_in_page?: boolean | null;
665
+ /**
666
+ * Threshold for full page image detection (0.0-1.0, default 0.8)
667
+ */
668
+ full_page_image_in_page_threshold?: number | string | null;
669
+ /**
670
+ * Trigger if page contains non-screenshot images
671
+ */
672
+ image_in_page?: boolean | null;
673
+ /**
674
+ * Trigger if page contains this layout element type
675
+ */
676
+ layout_element_in_page?: string | null;
677
+ /**
678
+ * Confidence threshold for layout element detection
679
+ */
680
+ layout_element_in_page_confidence_threshold?: number | string | null;
681
+ /**
682
+ * Trigger if page has more than N charts
683
+ */
684
+ page_contains_at_least_n_charts?: number | string | null;
685
+ /**
686
+ * Trigger if page has more than N images
687
+ */
688
+ page_contains_at_least_n_images?: number | string | null;
689
+ /**
690
+ * Trigger if page has more than N layout elements
691
+ */
692
+ page_contains_at_least_n_layout_elements?: number | string | null;
693
+ /**
694
+ * Trigger if page has more than N lines
695
+ */
696
+ page_contains_at_least_n_lines?: number | string | null;
697
+ /**
698
+ * Trigger if page has more than N links
699
+ */
700
+ page_contains_at_least_n_links?: number | string | null;
701
+ /**
702
+ * Trigger if page has more than N numeric words
703
+ */
704
+ page_contains_at_least_n_numbers?: number | string | null;
705
+ /**
706
+ * Trigger if page has more than N% numeric words
707
+ */
708
+ page_contains_at_least_n_percent_numbers?: number | string | null;
709
+ /**
710
+ * Trigger if page has more than N tables
711
+ */
712
+ page_contains_at_least_n_tables?: number | string | null;
713
+ /**
714
+ * Trigger if page has more than N words
715
+ */
716
+ page_contains_at_least_n_words?: number | string | null;
717
+ /**
718
+ * Trigger if page has fewer than N charts
719
+ */
720
+ page_contains_at_most_n_charts?: number | string | null;
721
+ /**
722
+ * Trigger if page has fewer than N images
723
+ */
724
+ page_contains_at_most_n_images?: number | string | null;
725
+ /**
726
+ * Trigger if page has fewer than N layout elements
727
+ */
728
+ page_contains_at_most_n_layout_elements?: number | string | null;
729
+ /**
730
+ * Trigger if page has fewer than N lines
731
+ */
732
+ page_contains_at_most_n_lines?: number | string | null;
733
+ /**
734
+ * Trigger if page has fewer than N links
735
+ */
736
+ page_contains_at_most_n_links?: number | string | null;
737
+ /**
738
+ * Trigger if page has fewer than N numeric words
739
+ */
740
+ page_contains_at_most_n_numbers?: number | string | null;
741
+ /**
742
+ * Trigger if page has fewer than N% numeric words
743
+ */
744
+ page_contains_at_most_n_percent_numbers?: number | string | null;
745
+ /**
746
+ * Trigger if page has fewer than N tables
747
+ */
748
+ page_contains_at_most_n_tables?: number | string | null;
749
+ /**
750
+ * Trigger if page has fewer than N words
751
+ */
752
+ page_contains_at_most_n_words?: number | string | null;
753
+ /**
754
+ * Trigger if page has more than N characters
755
+ */
756
+ page_longer_than_n_chars?: number | string | null;
757
+ /**
758
+ * Trigger on pages with markdown extraction errors
759
+ */
760
+ page_md_error?: boolean | null;
761
+ /**
762
+ * Trigger if page has fewer than N characters
763
+ */
764
+ page_shorter_than_n_chars?: number | string | null;
765
+ /**
766
+ * Regex pattern to match in page content
767
+ */
768
+ regexp_in_page?: string | null;
769
+ /**
770
+ * Regex mode flags for regexp_in_page
771
+ */
772
+ regexp_in_page_mode?: string | null;
773
+ /**
774
+ * Trigger if page contains a table
775
+ */
776
+ table_in_page?: boolean | null;
777
+ /**
778
+ * Trigger if page text/markdown contains this string
779
+ */
780
+ text_in_page?: string | null;
781
+ /**
782
+ * How to combine multiple trigger conditions: 'and' (all conditions must match,
783
+ * this is the default) or 'or' (any single condition can trigger)
784
+ */
785
+ trigger_mode?: string | null;
786
+ }
787
+ namespace AutoModeConfiguration {
788
+ /**
789
+ * Parsing configuration to apply when trigger conditions are met
790
+ */
791
+ interface ParsingConf {
792
+ /**
793
+ * Whether to use adaptive long table handling
794
+ */
795
+ adaptive_long_table?: boolean | null;
796
+ /**
797
+ * Whether to use aggressive table extraction
798
+ */
799
+ aggressive_table_extraction?: boolean | null;
800
+ /**
801
+ * Crop box options for auto mode parsing configuration.
802
+ */
803
+ crop_box?: ParsingConf.CropBox | null;
804
+ /**
805
+ * Custom AI instructions for matched pages. Overrides the base custom_prompt
806
+ */
807
+ custom_prompt?: string | null;
808
+ /**
809
+ * Whether to extract layout information
810
+ */
811
+ extract_layout?: boolean | null;
812
+ /**
813
+ * Whether to use high resolution OCR
814
+ */
815
+ high_res_ocr?: boolean | null;
816
+ /**
817
+ * Ignore options for auto mode parsing configuration.
818
+ */
819
+ ignore?: ParsingConf.Ignore | null;
820
+ /**
821
+ * Primary language of the document
822
+ */
823
+ language?: string | null;
824
+ /**
825
+ * Whether to use outlined table extraction
826
+ */
827
+ outlined_table_extraction?: boolean | null;
828
+ /**
829
+ * Presentation-specific options for auto mode parsing configuration.
830
+ */
831
+ presentation?: ParsingConf.Presentation | null;
832
+ /**
833
+ * Spatial text options for auto mode parsing configuration.
834
+ */
835
+ spatial_text?: ParsingConf.SpatialText | null;
836
+ /**
837
+ * Enable specialized chart parsing with the specified mode
838
+ */
839
+ specialized_chart_parsing?: 'agentic_plus' | 'agentic' | 'efficient' | null;
840
+ /**
841
+ * Override the parsing tier for matched pages. Must be paired with version
842
+ */
843
+ tier?: 'fast' | 'cost_effective' | 'agentic' | 'agentic_plus' | null;
844
+ /**
845
+ * Tier version when overriding tier. Required when tier is specified
846
+ */
847
+ version?: '2025-12-11' | '2025-12-18' | '2025-12-31' | '2026-01-08' | '2026-01-09' | '2026-01-16' | '2026-01-21' | '2026-01-22' | '2026-01-24' | '2026-01-29' | '2026-01-30' | '2026-02-03' | '2026-02-18' | '2026-02-20' | '2026-02-24' | '2026-02-26' | '2026-03-02' | '2026-03-03' | '2026-03-04' | '2026-03-05' | '2026-03-09' | '2026-03-10' | '2026-03-11' | '2026-03-12' | '2026-03-17' | '2026-03-19' | '2026-03-20' | '2026-03-22' | '2026-03-23' | '2026-03-24' | '2026-03-25' | '2026-03-26' | '2026-03-27' | '2026-03-30' | '2026-03-31' | 'latest' | (string & {}) | null;
848
+ }
849
+ namespace ParsingConf {
850
+ /**
851
+ * Crop box options for auto mode parsing configuration.
852
+ */
853
+ interface CropBox {
854
+ /**
855
+ * Bottom boundary of crop box as ratio (0-1)
856
+ */
857
+ bottom?: number | null;
858
+ /**
859
+ * Left boundary of crop box as ratio (0-1)
860
+ */
861
+ left?: number | null;
862
+ /**
863
+ * Right boundary of crop box as ratio (0-1)
864
+ */
865
+ right?: number | null;
866
+ /**
867
+ * Top boundary of crop box as ratio (0-1)
868
+ */
869
+ top?: number | null;
870
+ }
871
+ /**
872
+ * Ignore options for auto mode parsing configuration.
873
+ */
874
+ interface Ignore {
875
+ /**
876
+ * Whether to ignore diagonal text in the document
877
+ */
878
+ ignore_diagonal_text?: boolean | null;
879
+ /**
880
+ * Whether to ignore hidden text in the document
881
+ */
882
+ ignore_hidden_text?: boolean | null;
883
+ }
884
+ /**
885
+ * Presentation-specific options for auto mode parsing configuration.
886
+ */
887
+ interface Presentation {
888
+ /**
889
+ * Extract out of bounds content in presentation slides
890
+ */
891
+ out_of_bounds_content?: boolean | null;
892
+ /**
893
+ * Skip extraction of embedded data for charts in presentation slides
894
+ */
895
+ skip_embedded_data?: boolean | null;
896
+ }
897
+ /**
898
+ * Spatial text options for auto mode parsing configuration.
899
+ */
900
+ interface SpatialText {
901
+ /**
902
+ * Keep column structure intact without unrolling
903
+ */
904
+ do_not_unroll_columns?: boolean | null;
905
+ /**
906
+ * Preserve text alignment across page boundaries
907
+ */
908
+ preserve_layout_alignment_across_pages?: boolean | null;
909
+ /**
910
+ * Include very small text in spatial output
911
+ */
912
+ preserve_very_small_text?: boolean | null;
913
+ }
914
+ }
915
+ }
916
+ /**
917
+ * Cost optimizer configuration for reducing parsing costs on simpler pages.
918
+ *
919
+ * When enabled, the parser analyzes each page and routes simpler pages to faster,
920
+ * cheaper processing while preserving quality for complex pages. Only works with
921
+ * 'agentic' or 'agentic_plus' tiers.
922
+ */
923
+ interface CostOptimizer {
924
+ /**
925
+ * Enable cost-optimized parsing. Routes simpler pages to faster processing while
926
+ * complex pages use full AI analysis. May reduce speed on some documents.
927
+ * IMPORTANT: Only available with 'agentic' or 'agentic_plus' tiers
928
+ */
929
+ enable?: boolean | null;
930
+ }
931
+ /**
932
+ * Options for ignoring specific text types (diagonal, hidden, text in images)
933
+ */
934
+ interface Ignore {
935
+ /**
936
+ * Skip text rotated at an angle (not horizontal/vertical). Useful for ignoring
937
+ * watermarks or decorative angled text
938
+ */
939
+ ignore_diagonal_text?: boolean | null;
940
+ /**
941
+ * Skip text marked as hidden in the document structure. Some PDFs contain
942
+ * invisible text layers used for accessibility or search indexing
943
+ */
944
+ ignore_hidden_text?: boolean | null;
945
+ /**
946
+ * Skip OCR text extraction from embedded images. Use when images contain
947
+ * irrelevant text (watermarks, logos) that shouldn't be in the output
948
+ */
949
+ ignore_text_in_image?: boolean | null;
950
+ }
951
+ /**
952
+ * OCR configuration including language detection settings
953
+ */
954
+ interface OcrParameters {
955
+ /**
956
+ * Languages to use for OCR text recognition. Specify multiple languages if
957
+ * document contains mixed-language content. Order matters - put primary language
958
+ * first. Example: ['en', 'es'] for English with Spanish
959
+ */
960
+ languages?: Array<ParsingAPI.ParsingLanguages> | null;
961
+ }
962
+ }
963
+ /**
964
+ * Webhook configuration for receiving parsing job notifications.
965
+ *
966
+ * Webhooks are called when specified events occur during job processing. Configure
967
+ * multiple webhook configurations to send to different endpoints.
968
+ */
969
+ interface WebhookConfiguration {
970
+ /**
971
+ * Events that trigger this webhook. Options: 'parse.success' (job completed),
972
+ * 'parse.failure' (job failed), 'parse.partial' (some pages failed). If not
973
+ * specified, webhook fires for all events
974
+ */
975
+ webhook_events?: Array<string> | null;
976
+ /**
977
+ * Custom HTTP headers to include in webhook requests. Use for authentication
978
+ * tokens or custom routing. Example: {'Authorization': 'Bearer xyz'}
979
+ */
980
+ webhook_headers?: {
981
+ [key: string]: unknown;
982
+ } | null;
983
+ /**
984
+ * HTTPS URL to receive webhook POST requests. Must be publicly accessible
985
+ */
986
+ webhook_url?: string | null;
987
+ }
988
+ }
989
+ /**
990
+ * Typed parameters for a _split v1_ product configuration.
991
+ */
992
+ export interface SplitV1Parameters {
993
+ /**
994
+ * Categories to split documents into.
995
+ */
996
+ categories: Array<SplitAPI.SplitCategory>;
997
+ /**
998
+ * Product type.
999
+ */
1000
+ product_type: 'split_v1';
1001
+ /**
1002
+ * Strategy for splitting documents.
1003
+ */
1004
+ splitting_strategy?: SplitV1Parameters.SplittingStrategy;
1005
+ }
1006
+ export declare namespace SplitV1Parameters {
1007
+ /**
1008
+ * Strategy for splitting documents.
1009
+ */
1010
+ interface SplittingStrategy {
1011
+ /**
1012
+ * Controls handling of pages that don't match any category. 'include': pages can
1013
+ * be grouped as 'uncategorized' and included in results. 'forbid': all pages must
1014
+ * be assigned to a defined category. 'omit': pages can be classified as
1015
+ * 'uncategorized' but are excluded from results.
1016
+ */
1017
+ allow_uncategorized?: 'include' | 'forbid' | 'omit';
1018
+ }
1019
+ }
1020
+ /**
1021
+ * Catch-all for configurations without a dedicated typed schema.
1022
+ *
1023
+ * Accepts arbitrary JSON fields alongside `product_type`.
1024
+ */
1025
+ export interface UntypedParameters {
1026
+ /**
1027
+ * Product type.
1028
+ */
1029
+ product_type: 'unknown';
1030
+ [k: string]: unknown;
1031
+ }
1032
+ export interface ConfigurationCreateParams {
1033
+ /**
1034
+ * Body param: Human-readable name for this configuration.
1035
+ */
1036
+ name: string;
1037
+ /**
1038
+ * Body param: Product-specific configuration parameters.
1039
+ */
1040
+ parameters: SplitV1Parameters | ExtractV2Parameters | ClassifyV2Parameters | ParseV2Parameters | UntypedParameters;
1041
+ /**
1042
+ * Query param
1043
+ */
1044
+ organization_id?: string | null;
1045
+ /**
1046
+ * Query param
1047
+ */
1048
+ project_id?: string | null;
1049
+ }
1050
+ export interface ConfigurationRetrieveParams {
1051
+ organization_id?: string | null;
1052
+ project_id?: string | null;
1053
+ }
1054
+ export interface ConfigurationUpdateParams {
1055
+ /**
1056
+ * Query param
1057
+ */
1058
+ organization_id?: string | null;
1059
+ /**
1060
+ * Query param
1061
+ */
1062
+ project_id?: string | null;
1063
+ /**
1064
+ * Body param: Updated name (omit to leave unchanged).
1065
+ */
1066
+ name?: string | null;
1067
+ /**
1068
+ * Body param: Updated parameters (omit to leave unchanged).
1069
+ */
1070
+ parameters?: SplitV1Parameters | ExtractV2Parameters | ClassifyV2Parameters | ParseV2Parameters | UntypedParameters | null;
1071
+ }
1072
+ export interface ConfigurationListParams extends PaginatedCursorParams {
1073
+ /**
1074
+ * Return only the latest version per configuration name.
1075
+ */
1076
+ latest_only?: boolean;
1077
+ /**
1078
+ * Filter by configuration name.
1079
+ */
1080
+ name?: string | null;
1081
+ organization_id?: string | null;
1082
+ /**
1083
+ * Filter by one or more product types. Repeat the parameter for multiple values.
1084
+ */
1085
+ product_type?: Array<'split_v1' | 'extract_v2' | 'classify_v2' | 'parse_v2' | 'unknown'> | null;
1086
+ project_id?: string | null;
1087
+ }
1088
+ export interface ConfigurationDeleteParams {
1089
+ organization_id?: string | null;
1090
+ project_id?: string | null;
1091
+ }
1092
+ export declare namespace Configurations {
1093
+ export { type ClassifyV2Parameters as ClassifyV2Parameters, type ConfigurationCreate as ConfigurationCreate, type ConfigurationResponse as ConfigurationResponse, type ExtractV2Parameters as ExtractV2Parameters, type ParseV2Parameters as ParseV2Parameters, type SplitV1Parameters as SplitV1Parameters, type UntypedParameters as UntypedParameters, type ConfigurationResponsesPaginatedCursor as ConfigurationResponsesPaginatedCursor, type ConfigurationCreateParams as ConfigurationCreateParams, type ConfigurationRetrieveParams as ConfigurationRetrieveParams, type ConfigurationUpdateParams as ConfigurationUpdateParams, type ConfigurationListParams as ConfigurationListParams, type ConfigurationDeleteParams as ConfigurationDeleteParams, };
1094
+ }
1095
+ //# sourceMappingURL=configurations.d.ts.map