@llamaindex/llama-cloud 2.3.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/client.d.mts +2 -2
- package/client.d.mts.map +1 -1
- package/client.d.ts +2 -2
- package/client.d.ts.map +1 -1
- package/client.js.map +1 -1
- package/client.mjs.map +1 -1
- package/internal/utils/env.js +2 -2
- package/internal/utils/env.js.map +1 -1
- package/internal/utils/env.mjs +2 -2
- package/internal/utils/env.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/beta/agent-data.d.mts +82 -19
- package/resources/beta/agent-data.d.mts.map +1 -1
- package/resources/beta/agent-data.d.ts +82 -19
- package/resources/beta/agent-data.d.ts.map +1 -1
- package/resources/beta/agent-data.js +76 -11
- package/resources/beta/agent-data.js.map +1 -1
- package/resources/beta/agent-data.mjs +76 -11
- package/resources/beta/agent-data.mjs.map +1 -1
- package/resources/beta/batch/batch.d.mts +27 -0
- package/resources/beta/batch/batch.d.mts.map +1 -1
- package/resources/beta/batch/batch.d.ts +27 -0
- package/resources/beta/batch/batch.d.ts.map +1 -1
- package/resources/beta/batch/batch.js +27 -0
- package/resources/beta/batch/batch.js.map +1 -1
- package/resources/beta/batch/batch.mjs +27 -0
- package/resources/beta/batch/batch.mjs.map +1 -1
- package/resources/beta/batch/job-items.d.mts +18 -0
- package/resources/beta/batch/job-items.d.mts.map +1 -1
- package/resources/beta/batch/job-items.d.ts +18 -0
- package/resources/beta/batch/job-items.d.ts.map +1 -1
- package/resources/beta/batch/job-items.js +18 -0
- package/resources/beta/batch/job-items.js.map +1 -1
- package/resources/beta/batch/job-items.mjs +18 -0
- package/resources/beta/batch/job-items.mjs.map +1 -1
- package/resources/beta/beta.d.mts +2 -2
- package/resources/beta/beta.d.mts.map +1 -1
- package/resources/beta/beta.d.ts +2 -2
- package/resources/beta/beta.d.ts.map +1 -1
- package/resources/beta/directories/directories.d.mts +34 -0
- package/resources/beta/directories/directories.d.mts.map +1 -1
- package/resources/beta/directories/directories.d.ts +34 -0
- package/resources/beta/directories/directories.d.ts.map +1 -1
- package/resources/beta/directories/directories.js +34 -0
- package/resources/beta/directories/directories.js.map +1 -1
- package/resources/beta/directories/directories.mjs +34 -0
- package/resources/beta/directories/directories.mjs.map +1 -1
- package/resources/beta/directories/files.d.mts +93 -0
- package/resources/beta/directories/files.d.mts.map +1 -1
- package/resources/beta/directories/files.d.ts +93 -0
- package/resources/beta/directories/files.d.ts.map +1 -1
- package/resources/beta/directories/files.js +50 -0
- package/resources/beta/directories/files.js.map +1 -1
- package/resources/beta/directories/files.mjs +50 -0
- package/resources/beta/directories/files.mjs.map +1 -1
- package/resources/beta/index.d.mts +1 -1
- package/resources/beta/index.d.mts.map +1 -1
- package/resources/beta/index.d.ts +1 -1
- package/resources/beta/index.d.ts.map +1 -1
- package/resources/beta/sheets.d.mts +38 -0
- package/resources/beta/sheets.d.mts.map +1 -1
- package/resources/beta/sheets.d.ts +38 -0
- package/resources/beta/sheets.d.ts.map +1 -1
- package/resources/beta/sheets.js +38 -0
- package/resources/beta/sheets.js.map +1 -1
- package/resources/beta/sheets.mjs +38 -0
- package/resources/beta/sheets.mjs.map +1 -1
- package/resources/beta/split.d.mts +25 -10
- package/resources/beta/split.d.mts.map +1 -1
- package/resources/beta/split.d.ts +25 -10
- package/resources/beta/split.d.ts.map +1 -1
- package/resources/beta/split.js +21 -6
- package/resources/beta/split.js.map +1 -1
- package/resources/beta/split.mjs +21 -6
- package/resources/beta/split.mjs.map +1 -1
- package/resources/classify.d.mts +70 -0
- package/resources/classify.d.mts.map +1 -1
- package/resources/classify.d.ts +70 -0
- package/resources/classify.d.ts.map +1 -1
- package/resources/classify.js +113 -0
- package/resources/classify.js.map +1 -1
- package/resources/classify.mjs +113 -0
- package/resources/classify.mjs.map +1 -1
- package/resources/configurations.d.mts +214 -13
- package/resources/configurations.d.mts.map +1 -1
- package/resources/configurations.d.ts +214 -13
- package/resources/configurations.d.ts.map +1 -1
- package/resources/extract.d.mts +16 -6
- package/resources/extract.d.mts.map +1 -1
- package/resources/extract.d.ts +16 -6
- package/resources/extract.d.ts.map +1 -1
- package/resources/extract.js +14 -1
- package/resources/extract.js.map +1 -1
- package/resources/extract.mjs +14 -1
- package/resources/extract.mjs.map +1 -1
- package/resources/index.d.mts +1 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +1 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/parsing.d.mts +5 -4
- package/resources/parsing.d.mts.map +1 -1
- package/resources/parsing.d.ts +5 -4
- package/resources/parsing.d.ts.map +1 -1
- package/resources/pipelines/documents.d.mts +0 -3
- package/resources/pipelines/documents.d.mts.map +1 -1
- package/resources/pipelines/documents.d.ts +0 -3
- package/resources/pipelines/documents.d.ts.map +1 -1
- package/resources/pipelines/pipelines.d.mts +0 -7
- package/resources/pipelines/pipelines.d.mts.map +1 -1
- package/resources/pipelines/pipelines.d.ts +0 -7
- package/resources/pipelines/pipelines.d.ts.map +1 -1
- package/resources/pipelines/pipelines.js.map +1 -1
- package/resources/pipelines/pipelines.mjs.map +1 -1
- package/resources/retrievers/index.d.mts +1 -1
- package/resources/retrievers/index.d.mts.map +1 -1
- package/resources/retrievers/index.d.ts +1 -1
- package/resources/retrievers/index.d.ts.map +1 -1
- package/resources/retrievers/index.js.map +1 -1
- package/resources/retrievers/index.mjs.map +1 -1
- package/resources/retrievers/retrievers.d.mts +17 -5
- package/resources/retrievers/retrievers.d.mts.map +1 -1
- package/resources/retrievers/retrievers.d.ts +17 -5
- package/resources/retrievers/retrievers.d.ts.map +1 -1
- package/resources/retrievers/retrievers.js +10 -3
- package/resources/retrievers/retrievers.js.map +1 -1
- package/resources/retrievers/retrievers.mjs +10 -3
- package/resources/retrievers/retrievers.mjs.map +1 -1
- package/src/client.ts +2 -0
- package/src/internal/utils/env.ts +2 -2
- package/src/resources/beta/agent-data.ts +94 -28
- package/src/resources/beta/batch/batch.ts +27 -0
- package/src/resources/beta/batch/job-items.ts +18 -0
- package/src/resources/beta/beta.ts +2 -2
- package/src/resources/beta/directories/directories.ts +34 -0
- package/src/resources/beta/directories/files.ts +86 -0
- package/src/resources/beta/index.ts +1 -1
- package/src/resources/beta/sheets.ts +38 -0
- package/src/resources/beta/split.ts +25 -10
- package/src/resources/classify.ts +130 -0
- package/src/resources/configurations.ts +256 -8
- package/src/resources/extract.ts +16 -7
- package/src/resources/index.ts +1 -0
- package/src/resources/parsing.ts +9 -2
- package/src/resources/pipelines/documents.ts +0 -3
- package/src/resources/pipelines/pipelines.ts +0 -7
- package/src/resources/retrievers/index.ts +1 -0
- package/src/resources/retrievers/retrievers.ts +37 -5
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
|
@@ -10,8 +10,14 @@ import { pollUntilComplete, PollingOptions, DEFAULT_TIMEOUT } from '../../core/p
|
|
|
10
10
|
|
|
11
11
|
export class Split extends APIResource {
|
|
12
12
|
/**
|
|
13
|
-
* Create a document split job.
|
|
14
|
-
*
|
|
13
|
+
* Create a document split job.
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* const split = await client.beta.split.create({
|
|
18
|
+
* document_input: { type: 'type', value: 'value' },
|
|
19
|
+
* });
|
|
20
|
+
* ```
|
|
15
21
|
*/
|
|
16
22
|
create(params: SplitCreateParams, options?: RequestOptions): APIPromise<SplitCreateResponse> {
|
|
17
23
|
const { organization_id, project_id, ...body } = params;
|
|
@@ -23,8 +29,15 @@ export class Split extends APIResource {
|
|
|
23
29
|
}
|
|
24
30
|
|
|
25
31
|
/**
|
|
26
|
-
* List document split jobs.
|
|
27
|
-
*
|
|
32
|
+
* List document split jobs.
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```ts
|
|
36
|
+
* // Automatically fetches more pages as needed.
|
|
37
|
+
* for await (const splitListResponse of client.beta.split.list()) {
|
|
38
|
+
* // ...
|
|
39
|
+
* }
|
|
40
|
+
* ```
|
|
28
41
|
*/
|
|
29
42
|
list(
|
|
30
43
|
query: SplitListParams | null | undefined = {},
|
|
@@ -39,8 +52,10 @@ export class Split extends APIResource {
|
|
|
39
52
|
/**
|
|
40
53
|
* Get a document split job.
|
|
41
54
|
*
|
|
42
|
-
*
|
|
43
|
-
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```ts
|
|
57
|
+
* const split = await client.beta.split.get('split_job_id');
|
|
58
|
+
* ```
|
|
44
59
|
*/
|
|
45
60
|
get(
|
|
46
61
|
splitJobID: string,
|
|
@@ -210,7 +225,7 @@ export interface SplitCategory {
|
|
|
210
225
|
}
|
|
211
226
|
|
|
212
227
|
/**
|
|
213
|
-
* Document input specification.
|
|
228
|
+
* Document input specification for beta API.
|
|
214
229
|
*/
|
|
215
230
|
export interface SplitDocumentInput {
|
|
216
231
|
/**
|
|
@@ -255,7 +270,7 @@ export interface SplitSegmentResponse {
|
|
|
255
270
|
}
|
|
256
271
|
|
|
257
272
|
/**
|
|
258
|
-
*
|
|
273
|
+
* Beta response — uses nested document_input object.
|
|
259
274
|
*/
|
|
260
275
|
export interface SplitCreateResponse {
|
|
261
276
|
/**
|
|
@@ -316,7 +331,7 @@ export interface SplitCreateResponse {
|
|
|
316
331
|
}
|
|
317
332
|
|
|
318
333
|
/**
|
|
319
|
-
*
|
|
334
|
+
* Beta response — uses nested document_input object.
|
|
320
335
|
*/
|
|
321
336
|
export interface SplitListResponse {
|
|
322
337
|
/**
|
|
@@ -377,7 +392,7 @@ export interface SplitListResponse {
|
|
|
377
392
|
}
|
|
378
393
|
|
|
379
394
|
/**
|
|
380
|
-
*
|
|
395
|
+
* Beta response — uses nested document_input object.
|
|
381
396
|
*/
|
|
382
397
|
export interface SplitGetResponse {
|
|
383
398
|
/**
|
|
@@ -5,6 +5,7 @@ import { APIPromise } from '../core/api-promise';
|
|
|
5
5
|
import { PagePromise, PaginatedCursor, type PaginatedCursorParams } from '../core/pagination';
|
|
6
6
|
import { RequestOptions } from '../internal/request-options';
|
|
7
7
|
import { path } from '../internal/utils/path';
|
|
8
|
+
import { pollUntilComplete, PollingOptions, DEFAULT_TIMEOUT } from '../core/polling';
|
|
8
9
|
|
|
9
10
|
export class Classify extends APIResource {
|
|
10
11
|
/**
|
|
@@ -20,6 +21,11 @@ export class Classify extends APIResource {
|
|
|
20
21
|
*
|
|
21
22
|
* The job runs asynchronously. Poll `GET /classify/{job_id}` to check status and
|
|
22
23
|
* retrieve results.
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```ts
|
|
27
|
+
* const classify = await client.classify.create();
|
|
28
|
+
* ```
|
|
23
29
|
*/
|
|
24
30
|
create(params: ClassifyCreateParams, options?: RequestOptions): APIPromise<ClassifyCreateResponse> {
|
|
25
31
|
const { organization_id, project_id, ...body } = params;
|
|
@@ -35,6 +41,14 @@ export class Classify extends APIResource {
|
|
|
35
41
|
*
|
|
36
42
|
* Filter by `status`, `configuration_id`, specific `job_ids`, or creation date
|
|
37
43
|
* range.
|
|
44
|
+
*
|
|
45
|
+
* @example
|
|
46
|
+
* ```ts
|
|
47
|
+
* // Automatically fetches more pages as needed.
|
|
48
|
+
* for await (const classifyListResponse of client.classify.list()) {
|
|
49
|
+
* // ...
|
|
50
|
+
* }
|
|
51
|
+
* ```
|
|
38
52
|
*/
|
|
39
53
|
list(
|
|
40
54
|
query: ClassifyListParams | null | undefined = {},
|
|
@@ -51,6 +65,11 @@ export class Classify extends APIResource {
|
|
|
51
65
|
*
|
|
52
66
|
* Returns the job status, configuration, and classify result when complete. The
|
|
53
67
|
* result includes the matched document type, confidence score, and reasoning.
|
|
68
|
+
*
|
|
69
|
+
* @example
|
|
70
|
+
* ```ts
|
|
71
|
+
* const classify = await client.classify.get('job_id');
|
|
72
|
+
* ```
|
|
54
73
|
*/
|
|
55
74
|
get(
|
|
56
75
|
jobID: string,
|
|
@@ -59,6 +78,117 @@ export class Classify extends APIResource {
|
|
|
59
78
|
): APIPromise<ClassifyGetResponse> {
|
|
60
79
|
return this._client.get(path`/api/v2/classify/${jobID}`, { query, ...options });
|
|
61
80
|
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Wait for a classify job to complete by polling until it reaches a terminal state.
|
|
84
|
+
*
|
|
85
|
+
* @param jobID - The ID of the classify job to wait for
|
|
86
|
+
* @param query - Optional query parameters (organization_id, project_id)
|
|
87
|
+
* @param options - Polling configuration and request options
|
|
88
|
+
* @returns The completed classify job
|
|
89
|
+
* @throws {PollingTimeoutError} If the job doesn't complete within the timeout period
|
|
90
|
+
* @throws {PollingError} If the job fails
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* const job = await client.classify.create({
|
|
95
|
+
* file_input: 'dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
|
|
96
|
+
* configuration_id: 'cfg-...',
|
|
97
|
+
* });
|
|
98
|
+
*
|
|
99
|
+
* const completed = await client.classify.waitForCompletion(job.id, undefined, { verbose: true });
|
|
100
|
+
* console.log(completed.result);
|
|
101
|
+
* ```
|
|
102
|
+
*/
|
|
103
|
+
async waitForCompletion(
|
|
104
|
+
jobID: string,
|
|
105
|
+
query?: ClassifyGetParams,
|
|
106
|
+
options?: PollingOptions & RequestOptions,
|
|
107
|
+
): Promise<ClassifyGetResponse> {
|
|
108
|
+
const { pollingInterval, maxInterval, timeout, backoff, verbose, ...requestOptions } = options || {};
|
|
109
|
+
|
|
110
|
+
const getStatus = async (): Promise<ClassifyGetResponse> => {
|
|
111
|
+
return await this.get(jobID, query, requestOptions);
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
const isComplete = (job: ClassifyGetResponse): boolean => {
|
|
115
|
+
return job.status === 'COMPLETED';
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
const isError = (job: ClassifyGetResponse): boolean => {
|
|
119
|
+
return job.status === 'FAILED';
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const getErrorMessage = (job: ClassifyGetResponse): string => {
|
|
123
|
+
const errorParts = [`Job ${jobID} failed with status: ${job.status}`];
|
|
124
|
+
if (job.error_message) {
|
|
125
|
+
errorParts.push(`Error: ${job.error_message}`);
|
|
126
|
+
}
|
|
127
|
+
return errorParts.join(' | ');
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
return await pollUntilComplete(getStatus, isComplete, isError, getErrorMessage, {
|
|
131
|
+
pollingInterval,
|
|
132
|
+
maxInterval,
|
|
133
|
+
timeout: timeout || DEFAULT_TIMEOUT,
|
|
134
|
+
backoff,
|
|
135
|
+
verbose,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Create a classify job, wait for it to complete, and return the result.
|
|
141
|
+
*
|
|
142
|
+
* This is a convenience method that combines create() and waitForCompletion()
|
|
143
|
+
* into a single call for the most common end-to-end workflow.
|
|
144
|
+
*
|
|
145
|
+
* @param params - Classify job creation parameters
|
|
146
|
+
* @param options - Polling configuration and request options
|
|
147
|
+
* @returns The completed classify job with result populated
|
|
148
|
+
* @throws {PollingTimeoutError} If the job doesn't complete within the timeout period
|
|
149
|
+
* @throws {PollingError} If the job fails
|
|
150
|
+
*
|
|
151
|
+
* @example
|
|
152
|
+
* ```typescript
|
|
153
|
+
* import { LlamaCloud } from 'llama-cloud';
|
|
154
|
+
*
|
|
155
|
+
* const client = new LlamaCloud({ apiKey: '...' });
|
|
156
|
+
*
|
|
157
|
+
* const result = await client.classify.run({
|
|
158
|
+
* file_input: 'dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
|
|
159
|
+
* configuration: {
|
|
160
|
+
* rules: [{ type: 'invoice', description: 'contains invoice number' }],
|
|
161
|
+
* },
|
|
162
|
+
* }, { verbose: true });
|
|
163
|
+
*
|
|
164
|
+
* console.log(result.result);
|
|
165
|
+
* ```
|
|
166
|
+
*/
|
|
167
|
+
async run(
|
|
168
|
+
params: ClassifyCreateParams,
|
|
169
|
+
options?: PollingOptions & RequestOptions,
|
|
170
|
+
): Promise<ClassifyGetResponse> {
|
|
171
|
+
const { pollingInterval, maxInterval, timeout, backoff, verbose, ...requestOptions } = options || {};
|
|
172
|
+
|
|
173
|
+
const job = await this.create(params, requestOptions);
|
|
174
|
+
|
|
175
|
+
const getQuery: ClassifyGetParams = {};
|
|
176
|
+
if (params.organization_id !== undefined) {
|
|
177
|
+
getQuery.organization_id = params.organization_id;
|
|
178
|
+
}
|
|
179
|
+
if (params.project_id !== undefined) {
|
|
180
|
+
getQuery.project_id = params.project_id;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return await this.waitForCompletion(job.id, getQuery, {
|
|
184
|
+
pollingInterval,
|
|
185
|
+
maxInterval,
|
|
186
|
+
timeout: timeout || DEFAULT_TIMEOUT,
|
|
187
|
+
backoff,
|
|
188
|
+
verbose,
|
|
189
|
+
...requestOptions,
|
|
190
|
+
});
|
|
191
|
+
}
|
|
62
192
|
}
|
|
63
193
|
|
|
64
194
|
export type ClassifyListResponsesPaginatedCursor = PaginatedCursor<ClassifyListResponse>;
|
|
@@ -164,9 +164,70 @@ export interface ConfigurationCreate {
|
|
|
164
164
|
| ExtractV2Parameters
|
|
165
165
|
| ClassifyV2Parameters
|
|
166
166
|
| ParseV2Parameters
|
|
167
|
+
| ConfigurationCreate.SpreadsheetV1Parameters
|
|
167
168
|
| UntypedParameters;
|
|
168
169
|
}
|
|
169
170
|
|
|
171
|
+
export namespace ConfigurationCreate {
|
|
172
|
+
/**
|
|
173
|
+
* Typed parameters for a _spreadsheet v1_ product configuration.
|
|
174
|
+
*/
|
|
175
|
+
export interface SpreadsheetV1Parameters {
|
|
176
|
+
/**
|
|
177
|
+
* Product type.
|
|
178
|
+
*/
|
|
179
|
+
product_type: 'spreadsheet_v1';
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* A1 notation of the range to extract a single region from. If None, the entire
|
|
183
|
+
* sheet is used.
|
|
184
|
+
*/
|
|
185
|
+
extraction_range?: string | null;
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Return a flattened dataframe when a detected table is recognized as
|
|
189
|
+
* hierarchical.
|
|
190
|
+
*/
|
|
191
|
+
flatten_hierarchical_tables?: boolean;
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Whether to generate additional metadata (title, description) for each extracted
|
|
195
|
+
* region.
|
|
196
|
+
*/
|
|
197
|
+
generate_additional_metadata?: boolean;
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Whether to include hidden cells when extracting regions from the spreadsheet.
|
|
201
|
+
*/
|
|
202
|
+
include_hidden_cells?: boolean;
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* The names of the sheets to extract regions from. If empty, all sheets will be
|
|
206
|
+
* processed.
|
|
207
|
+
*/
|
|
208
|
+
sheet_names?: Array<string> | null;
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Optional specialization mode for domain-specific extraction. Supported values:
|
|
212
|
+
* 'financial-standard', 'financial-enhanced', 'financial-precise'. Default None
|
|
213
|
+
* uses the general-purpose pipeline.
|
|
214
|
+
*/
|
|
215
|
+
specialization?: string | null;
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Influences how likely similar-looking regions are merged into a single table.
|
|
219
|
+
* Useful for spreadsheets that either have sparse tables (strong merging) or many
|
|
220
|
+
* distinct tables close together (weak merging).
|
|
221
|
+
*/
|
|
222
|
+
table_merge_sensitivity?: 'strong' | 'weak';
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Enables experimental processing. Accuracy may be impacted.
|
|
226
|
+
*/
|
|
227
|
+
use_experimental_processing?: boolean;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
170
231
|
/**
|
|
171
232
|
* Response schema for a single product configuration.
|
|
172
233
|
*/
|
|
@@ -189,12 +250,13 @@ export interface ConfigurationResponse {
|
|
|
189
250
|
| ExtractV2Parameters
|
|
190
251
|
| ClassifyV2Parameters
|
|
191
252
|
| ParseV2Parameters
|
|
253
|
+
| ConfigurationResponse.SpreadsheetV1Parameters
|
|
192
254
|
| UntypedParameters;
|
|
193
255
|
|
|
194
256
|
/**
|
|
195
257
|
* Product type.
|
|
196
258
|
*/
|
|
197
|
-
product_type: 'split_v1' | 'extract_v2' | 'classify_v2' | 'parse_v2' | 'unknown';
|
|
259
|
+
product_type: 'split_v1' | 'extract_v2' | 'classify_v2' | 'parse_v2' | 'spreadsheet_v1' | 'unknown';
|
|
198
260
|
|
|
199
261
|
/**
|
|
200
262
|
* Version identifier (datetime string).
|
|
@@ -212,6 +274,66 @@ export interface ConfigurationResponse {
|
|
|
212
274
|
updated_at?: string | null;
|
|
213
275
|
}
|
|
214
276
|
|
|
277
|
+
export namespace ConfigurationResponse {
|
|
278
|
+
/**
|
|
279
|
+
* Typed parameters for a _spreadsheet v1_ product configuration.
|
|
280
|
+
*/
|
|
281
|
+
export interface SpreadsheetV1Parameters {
|
|
282
|
+
/**
|
|
283
|
+
* Product type.
|
|
284
|
+
*/
|
|
285
|
+
product_type: 'spreadsheet_v1';
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* A1 notation of the range to extract a single region from. If None, the entire
|
|
289
|
+
* sheet is used.
|
|
290
|
+
*/
|
|
291
|
+
extraction_range?: string | null;
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Return a flattened dataframe when a detected table is recognized as
|
|
295
|
+
* hierarchical.
|
|
296
|
+
*/
|
|
297
|
+
flatten_hierarchical_tables?: boolean;
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Whether to generate additional metadata (title, description) for each extracted
|
|
301
|
+
* region.
|
|
302
|
+
*/
|
|
303
|
+
generate_additional_metadata?: boolean;
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Whether to include hidden cells when extracting regions from the spreadsheet.
|
|
307
|
+
*/
|
|
308
|
+
include_hidden_cells?: boolean;
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* The names of the sheets to extract regions from. If empty, all sheets will be
|
|
312
|
+
* processed.
|
|
313
|
+
*/
|
|
314
|
+
sheet_names?: Array<string> | null;
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Optional specialization mode for domain-specific extraction. Supported values:
|
|
318
|
+
* 'financial-standard', 'financial-enhanced', 'financial-precise'. Default None
|
|
319
|
+
* uses the general-purpose pipeline.
|
|
320
|
+
*/
|
|
321
|
+
specialization?: string | null;
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Influences how likely similar-looking regions are merged into a single table.
|
|
325
|
+
* Useful for spreadsheets that either have sparse tables (strong merging) or many
|
|
326
|
+
* distinct tables close together (weak merging).
|
|
327
|
+
*/
|
|
328
|
+
table_merge_sensitivity?: 'strong' | 'weak';
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Enables experimental processing. Accuracy may be impacted.
|
|
332
|
+
*/
|
|
333
|
+
use_experimental_processing?: boolean;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
215
337
|
/**
|
|
216
338
|
* Typed parameters for an _extract v2_ product configuration.
|
|
217
339
|
*/
|
|
@@ -250,11 +372,6 @@ export interface ExtractV2Parameters {
|
|
|
250
372
|
*/
|
|
251
373
|
extraction_target?: 'per_doc' | 'per_page' | 'per_table_row';
|
|
252
374
|
|
|
253
|
-
/**
|
|
254
|
-
* ISO 639-1 language code for the document
|
|
255
|
-
*/
|
|
256
|
-
lang?: string;
|
|
257
|
-
|
|
258
375
|
/**
|
|
259
376
|
* Maximum number of pages to process. Omit for no limit.
|
|
260
377
|
*/
|
|
@@ -267,7 +384,8 @@ export interface ExtractV2Parameters {
|
|
|
267
384
|
parse_config_id?: string | null;
|
|
268
385
|
|
|
269
386
|
/**
|
|
270
|
-
* Parse tier to use before extraction
|
|
387
|
+
* Parse tier to use before extraction. Defaults to the extract tier if not
|
|
388
|
+
* specified.
|
|
271
389
|
*/
|
|
272
390
|
parse_tier?: string | null;
|
|
273
391
|
|
|
@@ -348,6 +466,9 @@ export interface ParseV2Parameters {
|
|
|
348
466
|
| '2026-03-27'
|
|
349
467
|
| '2026-03-30'
|
|
350
468
|
| '2026-03-31'
|
|
469
|
+
| '2026-04-02'
|
|
470
|
+
| '2026-04-06'
|
|
471
|
+
| '2026-04-09'
|
|
351
472
|
| 'latest'
|
|
352
473
|
| (string & {});
|
|
353
474
|
|
|
@@ -1125,6 +1246,9 @@ export namespace ParseV2Parameters {
|
|
|
1125
1246
|
| '2026-03-27'
|
|
1126
1247
|
| '2026-03-30'
|
|
1127
1248
|
| '2026-03-31'
|
|
1249
|
+
| '2026-04-02'
|
|
1250
|
+
| '2026-04-06'
|
|
1251
|
+
| '2026-04-09'
|
|
1128
1252
|
| 'latest'
|
|
1129
1253
|
| (string & {})
|
|
1130
1254
|
| null;
|
|
@@ -1350,6 +1474,7 @@ export interface ConfigurationCreateParams {
|
|
|
1350
1474
|
| ExtractV2Parameters
|
|
1351
1475
|
| ClassifyV2Parameters
|
|
1352
1476
|
| ParseV2Parameters
|
|
1477
|
+
| ConfigurationCreateParams.SpreadsheetV1Parameters
|
|
1353
1478
|
| UntypedParameters;
|
|
1354
1479
|
|
|
1355
1480
|
/**
|
|
@@ -1363,6 +1488,66 @@ export interface ConfigurationCreateParams {
|
|
|
1363
1488
|
project_id?: string | null;
|
|
1364
1489
|
}
|
|
1365
1490
|
|
|
1491
|
+
export namespace ConfigurationCreateParams {
|
|
1492
|
+
/**
|
|
1493
|
+
* Typed parameters for a _spreadsheet v1_ product configuration.
|
|
1494
|
+
*/
|
|
1495
|
+
export interface SpreadsheetV1Parameters {
|
|
1496
|
+
/**
|
|
1497
|
+
* Product type.
|
|
1498
|
+
*/
|
|
1499
|
+
product_type: 'spreadsheet_v1';
|
|
1500
|
+
|
|
1501
|
+
/**
|
|
1502
|
+
* A1 notation of the range to extract a single region from. If None, the entire
|
|
1503
|
+
* sheet is used.
|
|
1504
|
+
*/
|
|
1505
|
+
extraction_range?: string | null;
|
|
1506
|
+
|
|
1507
|
+
/**
|
|
1508
|
+
* Return a flattened dataframe when a detected table is recognized as
|
|
1509
|
+
* hierarchical.
|
|
1510
|
+
*/
|
|
1511
|
+
flatten_hierarchical_tables?: boolean;
|
|
1512
|
+
|
|
1513
|
+
/**
|
|
1514
|
+
* Whether to generate additional metadata (title, description) for each extracted
|
|
1515
|
+
* region.
|
|
1516
|
+
*/
|
|
1517
|
+
generate_additional_metadata?: boolean;
|
|
1518
|
+
|
|
1519
|
+
/**
|
|
1520
|
+
* Whether to include hidden cells when extracting regions from the spreadsheet.
|
|
1521
|
+
*/
|
|
1522
|
+
include_hidden_cells?: boolean;
|
|
1523
|
+
|
|
1524
|
+
/**
|
|
1525
|
+
* The names of the sheets to extract regions from. If empty, all sheets will be
|
|
1526
|
+
* processed.
|
|
1527
|
+
*/
|
|
1528
|
+
sheet_names?: Array<string> | null;
|
|
1529
|
+
|
|
1530
|
+
/**
|
|
1531
|
+
* Optional specialization mode for domain-specific extraction. Supported values:
|
|
1532
|
+
* 'financial-standard', 'financial-enhanced', 'financial-precise'. Default None
|
|
1533
|
+
* uses the general-purpose pipeline.
|
|
1534
|
+
*/
|
|
1535
|
+
specialization?: string | null;
|
|
1536
|
+
|
|
1537
|
+
/**
|
|
1538
|
+
* Influences how likely similar-looking regions are merged into a single table.
|
|
1539
|
+
* Useful for spreadsheets that either have sparse tables (strong merging) or many
|
|
1540
|
+
* distinct tables close together (weak merging).
|
|
1541
|
+
*/
|
|
1542
|
+
table_merge_sensitivity?: 'strong' | 'weak';
|
|
1543
|
+
|
|
1544
|
+
/**
|
|
1545
|
+
* Enables experimental processing. Accuracy may be impacted.
|
|
1546
|
+
*/
|
|
1547
|
+
use_experimental_processing?: boolean;
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1366
1551
|
export interface ConfigurationRetrieveParams {
|
|
1367
1552
|
organization_id?: string | null;
|
|
1368
1553
|
|
|
@@ -1393,10 +1578,71 @@ export interface ConfigurationUpdateParams {
|
|
|
1393
1578
|
| ExtractV2Parameters
|
|
1394
1579
|
| ClassifyV2Parameters
|
|
1395
1580
|
| ParseV2Parameters
|
|
1581
|
+
| ConfigurationUpdateParams.SpreadsheetV1Parameters
|
|
1396
1582
|
| UntypedParameters
|
|
1397
1583
|
| null;
|
|
1398
1584
|
}
|
|
1399
1585
|
|
|
1586
|
+
export namespace ConfigurationUpdateParams {
|
|
1587
|
+
/**
|
|
1588
|
+
* Typed parameters for a _spreadsheet v1_ product configuration.
|
|
1589
|
+
*/
|
|
1590
|
+
export interface SpreadsheetV1Parameters {
|
|
1591
|
+
/**
|
|
1592
|
+
* Product type.
|
|
1593
|
+
*/
|
|
1594
|
+
product_type: 'spreadsheet_v1';
|
|
1595
|
+
|
|
1596
|
+
/**
|
|
1597
|
+
* A1 notation of the range to extract a single region from. If None, the entire
|
|
1598
|
+
* sheet is used.
|
|
1599
|
+
*/
|
|
1600
|
+
extraction_range?: string | null;
|
|
1601
|
+
|
|
1602
|
+
/**
|
|
1603
|
+
* Return a flattened dataframe when a detected table is recognized as
|
|
1604
|
+
* hierarchical.
|
|
1605
|
+
*/
|
|
1606
|
+
flatten_hierarchical_tables?: boolean;
|
|
1607
|
+
|
|
1608
|
+
/**
|
|
1609
|
+
* Whether to generate additional metadata (title, description) for each extracted
|
|
1610
|
+
* region.
|
|
1611
|
+
*/
|
|
1612
|
+
generate_additional_metadata?: boolean;
|
|
1613
|
+
|
|
1614
|
+
/**
|
|
1615
|
+
* Whether to include hidden cells when extracting regions from the spreadsheet.
|
|
1616
|
+
*/
|
|
1617
|
+
include_hidden_cells?: boolean;
|
|
1618
|
+
|
|
1619
|
+
/**
|
|
1620
|
+
* The names of the sheets to extract regions from. If empty, all sheets will be
|
|
1621
|
+
* processed.
|
|
1622
|
+
*/
|
|
1623
|
+
sheet_names?: Array<string> | null;
|
|
1624
|
+
|
|
1625
|
+
/**
|
|
1626
|
+
* Optional specialization mode for domain-specific extraction. Supported values:
|
|
1627
|
+
* 'financial-standard', 'financial-enhanced', 'financial-precise'. Default None
|
|
1628
|
+
* uses the general-purpose pipeline.
|
|
1629
|
+
*/
|
|
1630
|
+
specialization?: string | null;
|
|
1631
|
+
|
|
1632
|
+
/**
|
|
1633
|
+
* Influences how likely similar-looking regions are merged into a single table.
|
|
1634
|
+
* Useful for spreadsheets that either have sparse tables (strong merging) or many
|
|
1635
|
+
* distinct tables close together (weak merging).
|
|
1636
|
+
*/
|
|
1637
|
+
table_merge_sensitivity?: 'strong' | 'weak';
|
|
1638
|
+
|
|
1639
|
+
/**
|
|
1640
|
+
* Enables experimental processing. Accuracy may be impacted.
|
|
1641
|
+
*/
|
|
1642
|
+
use_experimental_processing?: boolean;
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1400
1646
|
export interface ConfigurationListParams extends PaginatedCursorParams {
|
|
1401
1647
|
/**
|
|
1402
1648
|
* Return only the latest version per configuration name.
|
|
@@ -1413,7 +1659,9 @@ export interface ConfigurationListParams extends PaginatedCursorParams {
|
|
|
1413
1659
|
/**
|
|
1414
1660
|
* Filter by one or more product types. Repeat the parameter for multiple values.
|
|
1415
1661
|
*/
|
|
1416
|
-
product_type?: Array<
|
|
1662
|
+
product_type?: Array<
|
|
1663
|
+
'split_v1' | 'extract_v2' | 'classify_v2' | 'parse_v2' | 'spreadsheet_v1' | 'unknown'
|
|
1664
|
+
> | null;
|
|
1417
1665
|
|
|
1418
1666
|
project_id?: string | null;
|
|
1419
1667
|
}
|
package/src/resources/extract.ts
CHANGED
|
@@ -133,7 +133,20 @@ export class Extract extends APIResource {
|
|
|
133
133
|
* ```ts
|
|
134
134
|
* const extractV2SchemaValidateResponse =
|
|
135
135
|
* await client.extract.validateSchema({
|
|
136
|
-
* data_schema: {
|
|
136
|
+
* data_schema: {
|
|
137
|
+
* properties: {
|
|
138
|
+
* vendor_name: 'bar',
|
|
139
|
+
* invoice_number: 'bar',
|
|
140
|
+
* total_amount: 'bar',
|
|
141
|
+
* line_items: 'bar',
|
|
142
|
+
* },
|
|
143
|
+
* required: [
|
|
144
|
+
* 'vendor_name',
|
|
145
|
+
* 'invoice_number',
|
|
146
|
+
* 'total_amount',
|
|
147
|
+
* ],
|
|
148
|
+
* type: 'object',
|
|
149
|
+
* },
|
|
137
150
|
* });
|
|
138
151
|
* ```
|
|
139
152
|
*/
|
|
@@ -287,11 +300,6 @@ export interface ExtractConfiguration {
|
|
|
287
300
|
*/
|
|
288
301
|
extraction_target?: 'per_doc' | 'per_page' | 'per_table_row';
|
|
289
302
|
|
|
290
|
-
/**
|
|
291
|
-
* ISO 639-1 language code for the document
|
|
292
|
-
*/
|
|
293
|
-
lang?: string;
|
|
294
|
-
|
|
295
303
|
/**
|
|
296
304
|
* Maximum number of pages to process. Omit for no limit.
|
|
297
305
|
*/
|
|
@@ -304,7 +312,8 @@ export interface ExtractConfiguration {
|
|
|
304
312
|
parse_config_id?: string | null;
|
|
305
313
|
|
|
306
314
|
/**
|
|
307
|
-
* Parse tier to use before extraction
|
|
315
|
+
* Parse tier to use before extraction. Defaults to the extract tier if not
|
|
316
|
+
* specified.
|
|
308
317
|
*/
|
|
309
318
|
parse_tier?: string | null;
|
|
310
319
|
|
package/src/resources/index.ts
CHANGED
package/src/resources/parsing.ts
CHANGED
|
@@ -1093,7 +1093,8 @@ export namespace ParsingGetResponse {
|
|
|
1093
1093
|
presigned_url?: string | null;
|
|
1094
1094
|
|
|
1095
1095
|
/**
|
|
1096
|
-
*
|
|
1096
|
+
* @deprecated Deprecated: always returns None. Will be removed in a future
|
|
1097
|
+
* release.
|
|
1097
1098
|
*/
|
|
1098
1099
|
size_bytes?: number | null;
|
|
1099
1100
|
}
|
|
@@ -1310,7 +1311,7 @@ export namespace ParsingGetResponse {
|
|
|
1310
1311
|
*/
|
|
1311
1312
|
export interface ResultContentMetadata {
|
|
1312
1313
|
/**
|
|
1313
|
-
* Size of the result file in
|
|
1314
|
+
* Size of the result file in bytes
|
|
1314
1315
|
*/
|
|
1315
1316
|
size_bytes: number;
|
|
1316
1317
|
|
|
@@ -1398,6 +1399,9 @@ export interface ParsingCreateParams {
|
|
|
1398
1399
|
| '2026-03-27'
|
|
1399
1400
|
| '2026-03-30'
|
|
1400
1401
|
| '2026-03-31'
|
|
1402
|
+
| '2026-04-02'
|
|
1403
|
+
| '2026-04-06'
|
|
1404
|
+
| '2026-04-09'
|
|
1401
1405
|
| 'latest'
|
|
1402
1406
|
| (string & {});
|
|
1403
1407
|
|
|
@@ -2203,6 +2207,9 @@ export namespace ParsingCreateParams {
|
|
|
2203
2207
|
| '2026-03-27'
|
|
2204
2208
|
| '2026-03-30'
|
|
2205
2209
|
| '2026-03-31'
|
|
2210
|
+
| '2026-04-02'
|
|
2211
|
+
| '2026-04-06'
|
|
2212
|
+
| '2026-04-09'
|
|
2206
2213
|
| 'latest'
|
|
2207
2214
|
| (string & {})
|
|
2208
2215
|
| null;
|
|
@@ -155,9 +155,6 @@ export interface CloudDocumentCreate {
|
|
|
155
155
|
|
|
156
156
|
/**
|
|
157
157
|
* Provided for backward compatibility.
|
|
158
|
-
*
|
|
159
|
-
* Note: we keep the field with the typo "seperator" to maintain backward
|
|
160
|
-
* compatibility for serialized objects.
|
|
161
158
|
*/
|
|
162
159
|
export interface TextNode {
|
|
163
160
|
class_name?: string;
|