chunkr-ai 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/_client.py +2 -1
- chunkr_ai/_version.py +1 -1
- chunkr_ai/resources/task/__init__.py +33 -0
- chunkr_ai/resources/{task.py → task/parse.py} +146 -696
- chunkr_ai/resources/task/task.py +664 -0
- chunkr_ai/types/__init__.py +0 -19
- chunkr_ai/types/task/__init__.py +7 -0
- chunkr_ai/types/task/parse_create_params.py +806 -0
- chunkr_ai/types/task/parse_update_params.py +806 -0
- chunkr_ai/types/task/task.py +1186 -0
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a2.dist-info}/METADATA +12 -12
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a2.dist-info}/RECORD +14 -28
- chunkr_ai/types/auto_generation_config.py +0 -39
- chunkr_ai/types/auto_generation_config_param.py +0 -39
- chunkr_ai/types/bounding_box.py +0 -19
- chunkr_ai/types/chunk_processing.py +0 -40
- chunkr_ai/types/chunk_processing_param.py +0 -42
- chunkr_ai/types/ignore_generation_config.py +0 -39
- chunkr_ai/types/ignore_generation_config_param.py +0 -39
- chunkr_ai/types/llm_generation_config.py +0 -39
- chunkr_ai/types/llm_generation_config_param.py +0 -39
- chunkr_ai/types/llm_processing.py +0 -36
- chunkr_ai/types/llm_processing_param.py +0 -36
- chunkr_ai/types/picture_generation_config.py +0 -39
- chunkr_ai/types/picture_generation_config_param.py +0 -39
- chunkr_ai/types/segment_processing.py +0 -280
- chunkr_ai/types/segment_processing_param.py +0 -281
- chunkr_ai/types/table_generation_config.py +0 -39
- chunkr_ai/types/table_generation_config_param.py +0 -39
- chunkr_ai/types/task.py +0 -379
- chunkr_ai/types/task_parse_params.py +0 -90
- chunkr_ai/types/task_update_params.py +0 -90
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a2.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
@@ -2,70 +2,60 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
-
from typing import
|
6
|
-
from datetime import datetime
|
5
|
+
from typing import Optional
|
7
6
|
from typing_extensions import Literal
|
8
7
|
|
9
8
|
import httpx
|
10
9
|
|
11
|
-
from
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
)
|
17
|
-
from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
|
18
|
-
from .._utils import maybe_transform, async_maybe_transform
|
19
|
-
from .._compat import cached_property
|
20
|
-
from .._resource import SyncAPIResource, AsyncAPIResource
|
21
|
-
from .._response import (
|
10
|
+
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
11
|
+
from ..._utils import maybe_transform, async_maybe_transform
|
12
|
+
from ..._compat import cached_property
|
13
|
+
from ..._resource import SyncAPIResource, AsyncAPIResource
|
14
|
+
from ..._response import (
|
22
15
|
to_raw_response_wrapper,
|
23
16
|
to_streamed_response_wrapper,
|
24
17
|
async_to_raw_response_wrapper,
|
25
18
|
async_to_streamed_response_wrapper,
|
26
19
|
)
|
27
|
-
from
|
28
|
-
from
|
29
|
-
from
|
30
|
-
from ..types.llm_processing_param import LlmProcessingParam
|
31
|
-
from ..types.chunk_processing_param import ChunkProcessingParam
|
32
|
-
from ..types.segment_processing_param import SegmentProcessingParam
|
20
|
+
from ...types.task import parse_create_params, parse_update_params
|
21
|
+
from ..._base_client import make_request_options
|
22
|
+
from ...types.task.task import Task
|
33
23
|
|
34
|
-
__all__ = ["
|
24
|
+
__all__ = ["ParseResource", "AsyncParseResource"]
|
35
25
|
|
36
26
|
|
37
|
-
class
|
27
|
+
class ParseResource(SyncAPIResource):
|
38
28
|
@cached_property
|
39
|
-
def with_raw_response(self) ->
|
29
|
+
def with_raw_response(self) -> ParseResourceWithRawResponse:
|
40
30
|
"""
|
41
31
|
This property can be used as a prefix for any HTTP method call to return
|
42
32
|
the raw response object instead of the parsed content.
|
43
33
|
|
44
34
|
For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#accessing-raw-response-data-eg-headers
|
45
35
|
"""
|
46
|
-
return
|
36
|
+
return ParseResourceWithRawResponse(self)
|
47
37
|
|
48
38
|
@cached_property
|
49
|
-
def with_streaming_response(self) ->
|
39
|
+
def with_streaming_response(self) -> ParseResourceWithStreamingResponse:
|
50
40
|
"""
|
51
41
|
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
52
42
|
|
53
43
|
For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#with_streaming_response
|
54
44
|
"""
|
55
|
-
return
|
45
|
+
return ParseResourceWithStreamingResponse(self)
|
56
46
|
|
57
|
-
def
|
47
|
+
def create(
|
58
48
|
self,
|
59
|
-
task_id: str,
|
60
49
|
*,
|
61
|
-
|
50
|
+
file: str,
|
51
|
+
chunk_processing: Optional[parse_create_params.ChunkProcessing] | NotGiven = NOT_GIVEN,
|
62
52
|
error_handling: Optional[Literal["Fail", "Continue"]] | NotGiven = NOT_GIVEN,
|
63
53
|
expires_in: Optional[int] | NotGiven = NOT_GIVEN,
|
64
|
-
|
65
|
-
llm_processing: Optional[
|
54
|
+
file_name: Optional[str] | NotGiven = NOT_GIVEN,
|
55
|
+
llm_processing: Optional[parse_create_params.LlmProcessing] | NotGiven = NOT_GIVEN,
|
66
56
|
ocr_strategy: Optional[Literal["All", "Auto"]] | NotGiven = NOT_GIVEN,
|
67
57
|
pipeline: Optional[Literal["Azure", "Chunkr"]] | NotGiven = NOT_GIVEN,
|
68
|
-
segment_processing: Optional[
|
58
|
+
segment_processing: Optional[parse_create_params.SegmentProcessing] | NotGiven = NOT_GIVEN,
|
69
59
|
segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]] | NotGiven = NOT_GIVEN,
|
70
60
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
71
61
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
@@ -74,21 +64,22 @@ class TaskResource(SyncAPIResource):
|
|
74
64
|
extra_body: Body | None = None,
|
75
65
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
76
66
|
) -> Task:
|
77
|
-
"""
|
78
|
-
|
79
|
-
The
|
80
|
-
original configuration will be used for all values that are not provided in the
|
81
|
-
update.
|
82
|
-
|
83
|
-
Requirements:
|
67
|
+
"""
|
68
|
+
Queues a document for processing and returns a TaskResponse containing:
|
84
69
|
|
85
|
-
- Task
|
86
|
-
-
|
70
|
+
- Task ID for status polling
|
71
|
+
- Initial configuration
|
72
|
+
- File metadata
|
73
|
+
- Processing status
|
74
|
+
- Creation timestamp
|
75
|
+
- Presigned URLs for file access
|
87
76
|
|
88
77
|
The returned task will typically be in a `Starting` or `Processing` state. Use
|
89
78
|
the `GET /task/{task_id}` endpoint to poll for completion.
|
90
79
|
|
91
80
|
Args:
|
81
|
+
file: The file to be uploaded. Can be a URL or a base64 encoded file.
|
82
|
+
|
92
83
|
chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
|
93
84
|
|
94
85
|
error_handling:
|
@@ -101,8 +92,7 @@ class TaskResource(SyncAPIResource):
|
|
101
92
|
expires_in: The number of seconds until task is deleted. Expired tasks can **not** be
|
102
93
|
updated, polled or accessed via web interface.
|
103
94
|
|
104
|
-
|
105
|
-
penalty: ~7 seconds per page)
|
95
|
+
file_name: The name of the file to be uploaded. If not set a name will be generated.
|
106
96
|
|
107
97
|
llm_processing: Controls the LLM used for the task.
|
108
98
|
|
@@ -130,8 +120,8 @@ class TaskResource(SyncAPIResource):
|
|
130
120
|
|
131
121
|
**Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
|
132
122
|
`Caption`, `Footnote` → **Auto** (Markdown) • `Table` → **LLM** (HTML,
|
133
|
-
description on) • `Picture` → **LLM** (Markdown, description
|
134
|
-
• `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
123
|
+
description on) • `Picture` → **LLM** (Markdown, description off, cropping
|
124
|
+
_All_) • `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
135
125
|
**Ignore** (removed from output)
|
136
126
|
|
137
127
|
---
|
@@ -156,288 +146,41 @@ class TaskResource(SyncAPIResource):
|
|
156
146
|
|
157
147
|
timeout: Override the client-level default timeout for this request, in seconds
|
158
148
|
"""
|
159
|
-
|
160
|
-
|
161
|
-
return self._patch(
|
162
|
-
f"/task/{task_id}/parse",
|
149
|
+
return self._post(
|
150
|
+
"/task/parse",
|
163
151
|
body=maybe_transform(
|
164
152
|
{
|
153
|
+
"file": file,
|
165
154
|
"chunk_processing": chunk_processing,
|
166
155
|
"error_handling": error_handling,
|
167
156
|
"expires_in": expires_in,
|
168
|
-
"
|
157
|
+
"file_name": file_name,
|
169
158
|
"llm_processing": llm_processing,
|
170
159
|
"ocr_strategy": ocr_strategy,
|
171
160
|
"pipeline": pipeline,
|
172
161
|
"segment_processing": segment_processing,
|
173
162
|
"segmentation_strategy": segmentation_strategy,
|
174
163
|
},
|
175
|
-
|
176
|
-
),
|
177
|
-
options=make_request_options(
|
178
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
179
|
-
),
|
180
|
-
cast_to=Task,
|
181
|
-
)
|
182
|
-
|
183
|
-
def list(
|
184
|
-
self,
|
185
|
-
*,
|
186
|
-
base64_urls: bool | NotGiven = NOT_GIVEN,
|
187
|
-
cursor: Union[str, datetime] | NotGiven = NOT_GIVEN,
|
188
|
-
end: Union[str, datetime] | NotGiven = NOT_GIVEN,
|
189
|
-
include_chunks: bool | NotGiven = NOT_GIVEN,
|
190
|
-
limit: int | NotGiven = NOT_GIVEN,
|
191
|
-
sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
192
|
-
start: Union[str, datetime] | NotGiven = NOT_GIVEN,
|
193
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
194
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
195
|
-
extra_headers: Headers | None = None,
|
196
|
-
extra_query: Query | None = None,
|
197
|
-
extra_body: Body | None = None,
|
198
|
-
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
199
|
-
) -> SyncTasksPage[Task]:
|
200
|
-
"""Retrieves a list of tasks with cursor-based pagination.
|
201
|
-
|
202
|
-
By default, tasks are
|
203
|
-
returned in descending order (newest first).
|
204
|
-
|
205
|
-
## Default Behaviors:
|
206
|
-
|
207
|
-
- **limit**: Returns all tasks if not specified
|
208
|
-
- **start**: No start date filter (returns from beginning of time)
|
209
|
-
- **end**: No end date filter (returns up to current time)
|
210
|
-
- **cursor**: Starts from most recent tasks (no pagination offset)
|
211
|
-
- **sort**: 'desc' (descending order, newest first)
|
212
|
-
- **include_chunks**: false (excludes chunks for better performance)
|
213
|
-
- **base64_urls**: false (returns presigned URLs instead of base64)
|
214
|
-
|
215
|
-
## Common Usage Patterns:
|
216
|
-
|
217
|
-
**Basic usage (get all tasks):** `GET /api/v1/tasks`
|
218
|
-
|
219
|
-
**Get first 10 tasks:** `GET /api/v1/tasks?limit=10`
|
220
|
-
|
221
|
-
**Paginate through results:**
|
222
|
-
|
223
|
-
1. First request: `GET /api/v1/tasks?limit=10`
|
224
|
-
2. Use next_cursor from response for subsequent pages:
|
225
|
-
`GET /api/v1/tasks?limit=10&cursor=<timestamp>`
|
226
|
-
|
227
|
-
**Filter by date range:**
|
228
|
-
`GET /api/v1/tasks?start=2025-01-01T00:00:00Z&end=2025-12-31T23:59:59Z`
|
229
|
-
|
230
|
-
**Get detailed results with chunks:** `GET /api/v1/tasks?include_chunks=true`
|
231
|
-
|
232
|
-
**Get base64 encoded content:** `GET /api/v1/tasks?base64_urls=true`
|
233
|
-
|
234
|
-
**Get tasks in ascending order (oldest first):** `GET /api/v1/tasks?sort=asc`
|
235
|
-
|
236
|
-
**Get tasks in descending order (newest first, default):**
|
237
|
-
`GET /api/v1/tasks?sort=desc`
|
238
|
-
|
239
|
-
Args:
|
240
|
-
base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as
|
241
|
-
presigned URLs.
|
242
|
-
|
243
|
-
cursor: Cursor for pagination (timestamp)
|
244
|
-
|
245
|
-
end: End date
|
246
|
-
|
247
|
-
include_chunks: Whether to include chunks in the output response
|
248
|
-
|
249
|
-
limit: Number of tasks per page
|
250
|
-
|
251
|
-
sort: Sort order: 'asc' for ascending, 'desc' for descending (default)
|
252
|
-
|
253
|
-
start: Start date
|
254
|
-
|
255
|
-
extra_headers: Send extra headers
|
256
|
-
|
257
|
-
extra_query: Add additional query parameters to the request
|
258
|
-
|
259
|
-
extra_body: Add additional JSON properties to the request
|
260
|
-
|
261
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
262
|
-
"""
|
263
|
-
return self._get_api_list(
|
264
|
-
"/tasks",
|
265
|
-
page=SyncTasksPage[Task],
|
266
|
-
options=make_request_options(
|
267
|
-
extra_headers=extra_headers,
|
268
|
-
extra_query=extra_query,
|
269
|
-
extra_body=extra_body,
|
270
|
-
timeout=timeout,
|
271
|
-
query=maybe_transform(
|
272
|
-
{
|
273
|
-
"base64_urls": base64_urls,
|
274
|
-
"cursor": cursor,
|
275
|
-
"end": end,
|
276
|
-
"include_chunks": include_chunks,
|
277
|
-
"limit": limit,
|
278
|
-
"sort": sort,
|
279
|
-
"start": start,
|
280
|
-
},
|
281
|
-
task_list_params.TaskListParams,
|
282
|
-
),
|
283
|
-
),
|
284
|
-
model=Task,
|
285
|
-
)
|
286
|
-
|
287
|
-
def delete(
|
288
|
-
self,
|
289
|
-
task_id: Optional[str],
|
290
|
-
*,
|
291
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
292
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
293
|
-
extra_headers: Headers | None = None,
|
294
|
-
extra_query: Query | None = None,
|
295
|
-
extra_body: Body | None = None,
|
296
|
-
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
297
|
-
) -> None:
|
298
|
-
"""
|
299
|
-
Delete a task by its ID.
|
300
|
-
|
301
|
-
Requirements:
|
302
|
-
|
303
|
-
- Task must have status `Succeeded` or `Failed`
|
304
|
-
|
305
|
-
Args:
|
306
|
-
extra_headers: Send extra headers
|
307
|
-
|
308
|
-
extra_query: Add additional query parameters to the request
|
309
|
-
|
310
|
-
extra_body: Add additional JSON properties to the request
|
311
|
-
|
312
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
313
|
-
"""
|
314
|
-
if not task_id:
|
315
|
-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
316
|
-
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
317
|
-
return self._delete(
|
318
|
-
f"/task/{task_id}",
|
319
|
-
options=make_request_options(
|
320
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
164
|
+
parse_create_params.ParseCreateParams,
|
321
165
|
),
|
322
|
-
cast_to=NoneType,
|
323
|
-
)
|
324
|
-
|
325
|
-
def cancel(
|
326
|
-
self,
|
327
|
-
task_id: Optional[str],
|
328
|
-
*,
|
329
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
330
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
331
|
-
extra_headers: Headers | None = None,
|
332
|
-
extra_query: Query | None = None,
|
333
|
-
extra_body: Body | None = None,
|
334
|
-
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
335
|
-
) -> None:
|
336
|
-
"""
|
337
|
-
Cancel a task that hasn't started processing yet:
|
338
|
-
|
339
|
-
- For new tasks: Status will be updated to `Cancelled`
|
340
|
-
- For updating tasks: Task will revert to the previous state
|
341
|
-
|
342
|
-
Requirements:
|
343
|
-
|
344
|
-
- Task must have status `Starting`
|
345
|
-
|
346
|
-
Args:
|
347
|
-
extra_headers: Send extra headers
|
348
|
-
|
349
|
-
extra_query: Add additional query parameters to the request
|
350
|
-
|
351
|
-
extra_body: Add additional JSON properties to the request
|
352
|
-
|
353
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
354
|
-
"""
|
355
|
-
if not task_id:
|
356
|
-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
357
|
-
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
358
|
-
return self._get(
|
359
|
-
f"/task/{task_id}/cancel",
|
360
166
|
options=make_request_options(
|
361
167
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
362
168
|
),
|
363
|
-
cast_to=NoneType,
|
364
|
-
)
|
365
|
-
|
366
|
-
def get(
|
367
|
-
self,
|
368
|
-
task_id: Optional[str],
|
369
|
-
*,
|
370
|
-
base64_urls: bool | NotGiven = NOT_GIVEN,
|
371
|
-
include_chunks: bool | NotGiven = NOT_GIVEN,
|
372
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
373
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
374
|
-
extra_headers: Headers | None = None,
|
375
|
-
extra_query: Query | None = None,
|
376
|
-
extra_body: Body | None = None,
|
377
|
-
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
378
|
-
) -> Task:
|
379
|
-
"""
|
380
|
-
Retrieves detailed information about a task by its ID, including:
|
381
|
-
|
382
|
-
- Processing status
|
383
|
-
- Task configuration
|
384
|
-
- Output data (if processing is complete)
|
385
|
-
- File metadata (name, page count)
|
386
|
-
- Timestamps (created, started, finished)
|
387
|
-
- Presigned URLs for accessing files
|
388
|
-
|
389
|
-
This endpoint can be used to:
|
390
|
-
|
391
|
-
1. Poll the task status during processing
|
392
|
-
2. Retrieve the final output once processing is complete
|
393
|
-
3. Access task metadata and configuration
|
394
|
-
|
395
|
-
Args:
|
396
|
-
base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as
|
397
|
-
presigned URLs.
|
398
|
-
|
399
|
-
include_chunks: Whether to include chunks in the output response
|
400
|
-
|
401
|
-
extra_headers: Send extra headers
|
402
|
-
|
403
|
-
extra_query: Add additional query parameters to the request
|
404
|
-
|
405
|
-
extra_body: Add additional JSON properties to the request
|
406
|
-
|
407
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
408
|
-
"""
|
409
|
-
if not task_id:
|
410
|
-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
411
|
-
return self._get(
|
412
|
-
f"/task/{task_id}",
|
413
|
-
options=make_request_options(
|
414
|
-
extra_headers=extra_headers,
|
415
|
-
extra_query=extra_query,
|
416
|
-
extra_body=extra_body,
|
417
|
-
timeout=timeout,
|
418
|
-
query=maybe_transform(
|
419
|
-
{
|
420
|
-
"base64_urls": base64_urls,
|
421
|
-
"include_chunks": include_chunks,
|
422
|
-
},
|
423
|
-
task_get_params.TaskGetParams,
|
424
|
-
),
|
425
|
-
),
|
426
169
|
cast_to=Task,
|
427
170
|
)
|
428
171
|
|
429
|
-
def
|
172
|
+
def update(
|
430
173
|
self,
|
174
|
+
task_id: str,
|
431
175
|
*,
|
432
|
-
|
433
|
-
chunk_processing: Optional[ChunkProcessingParam] | NotGiven = NOT_GIVEN,
|
176
|
+
chunk_processing: Optional[parse_update_params.ChunkProcessing] | NotGiven = NOT_GIVEN,
|
434
177
|
error_handling: Optional[Literal["Fail", "Continue"]] | NotGiven = NOT_GIVEN,
|
435
178
|
expires_in: Optional[int] | NotGiven = NOT_GIVEN,
|
436
|
-
|
437
|
-
llm_processing: Optional[
|
179
|
+
high_resolution: Optional[bool] | NotGiven = NOT_GIVEN,
|
180
|
+
llm_processing: Optional[parse_update_params.LlmProcessing] | NotGiven = NOT_GIVEN,
|
438
181
|
ocr_strategy: Optional[Literal["All", "Auto"]] | NotGiven = NOT_GIVEN,
|
439
182
|
pipeline: Optional[Literal["Azure", "Chunkr"]] | NotGiven = NOT_GIVEN,
|
440
|
-
segment_processing: Optional[
|
183
|
+
segment_processing: Optional[parse_update_params.SegmentProcessing] | NotGiven = NOT_GIVEN,
|
441
184
|
segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]] | NotGiven = NOT_GIVEN,
|
442
185
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
443
186
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
@@ -446,22 +189,21 @@ class TaskResource(SyncAPIResource):
|
|
446
189
|
extra_body: Body | None = None,
|
447
190
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
448
191
|
) -> Task:
|
449
|
-
"""
|
450
|
-
Queues a document for processing and returns a TaskResponse containing:
|
192
|
+
"""Updates an existing task's configuration and reprocesses the document.
|
451
193
|
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
194
|
+
The
|
195
|
+
original configuration will be used for all values that are not provided in the
|
196
|
+
update.
|
197
|
+
|
198
|
+
Requirements:
|
199
|
+
|
200
|
+
- Task must have status `Succeeded` or `Failed`
|
201
|
+
- New configuration must be different from the current one
|
458
202
|
|
459
203
|
The returned task will typically be in a `Starting` or `Processing` state. Use
|
460
204
|
the `GET /task/{task_id}` endpoint to poll for completion.
|
461
205
|
|
462
206
|
Args:
|
463
|
-
file: The file to be uploaded. Can be a URL or a base64 encoded file.
|
464
|
-
|
465
207
|
chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
|
466
208
|
|
467
209
|
error_handling:
|
@@ -474,7 +216,8 @@ class TaskResource(SyncAPIResource):
|
|
474
216
|
expires_in: The number of seconds until task is deleted. Expired tasks can **not** be
|
475
217
|
updated, polled or accessed via web interface.
|
476
218
|
|
477
|
-
|
219
|
+
high_resolution: Whether to use high-resolution images for cropping and post-processing. (Latency
|
220
|
+
penalty: ~7 seconds per page)
|
478
221
|
|
479
222
|
llm_processing: Controls the LLM used for the task.
|
480
223
|
|
@@ -502,8 +245,8 @@ class TaskResource(SyncAPIResource):
|
|
502
245
|
|
503
246
|
**Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
|
504
247
|
`Caption`, `Footnote` → **Auto** (Markdown) • `Table` → **LLM** (HTML,
|
505
|
-
description on) • `Picture` → **LLM** (Markdown, description
|
506
|
-
• `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
248
|
+
description on) • `Picture` → **LLM** (Markdown, description off, cropping
|
249
|
+
_All_) • `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
507
250
|
**Ignore** (removed from output)
|
508
251
|
|
509
252
|
---
|
@@ -528,22 +271,23 @@ class TaskResource(SyncAPIResource):
|
|
528
271
|
|
529
272
|
timeout: Override the client-level default timeout for this request, in seconds
|
530
273
|
"""
|
531
|
-
|
532
|
-
"
|
274
|
+
if not task_id:
|
275
|
+
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
276
|
+
return self._patch(
|
277
|
+
f"/task/{task_id}/parse",
|
533
278
|
body=maybe_transform(
|
534
279
|
{
|
535
|
-
"file": file,
|
536
280
|
"chunk_processing": chunk_processing,
|
537
281
|
"error_handling": error_handling,
|
538
282
|
"expires_in": expires_in,
|
539
|
-
"
|
283
|
+
"high_resolution": high_resolution,
|
540
284
|
"llm_processing": llm_processing,
|
541
285
|
"ocr_strategy": ocr_strategy,
|
542
286
|
"pipeline": pipeline,
|
543
287
|
"segment_processing": segment_processing,
|
544
288
|
"segmentation_strategy": segmentation_strategy,
|
545
289
|
},
|
546
|
-
|
290
|
+
parse_update_params.ParseUpdateParams,
|
547
291
|
),
|
548
292
|
options=make_request_options(
|
549
293
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
@@ -552,38 +296,38 @@ class TaskResource(SyncAPIResource):
|
|
552
296
|
)
|
553
297
|
|
554
298
|
|
555
|
-
class
|
299
|
+
class AsyncParseResource(AsyncAPIResource):
|
556
300
|
@cached_property
|
557
|
-
def with_raw_response(self) ->
|
301
|
+
def with_raw_response(self) -> AsyncParseResourceWithRawResponse:
|
558
302
|
"""
|
559
303
|
This property can be used as a prefix for any HTTP method call to return
|
560
304
|
the raw response object instead of the parsed content.
|
561
305
|
|
562
306
|
For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#accessing-raw-response-data-eg-headers
|
563
307
|
"""
|
564
|
-
return
|
308
|
+
return AsyncParseResourceWithRawResponse(self)
|
565
309
|
|
566
310
|
@cached_property
|
567
|
-
def with_streaming_response(self) ->
|
311
|
+
def with_streaming_response(self) -> AsyncParseResourceWithStreamingResponse:
|
568
312
|
"""
|
569
313
|
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
570
314
|
|
571
315
|
For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#with_streaming_response
|
572
316
|
"""
|
573
|
-
return
|
317
|
+
return AsyncParseResourceWithStreamingResponse(self)
|
574
318
|
|
575
|
-
async def
|
319
|
+
async def create(
|
576
320
|
self,
|
577
|
-
task_id: str,
|
578
321
|
*,
|
579
|
-
|
322
|
+
file: str,
|
323
|
+
chunk_processing: Optional[parse_create_params.ChunkProcessing] | NotGiven = NOT_GIVEN,
|
580
324
|
error_handling: Optional[Literal["Fail", "Continue"]] | NotGiven = NOT_GIVEN,
|
581
325
|
expires_in: Optional[int] | NotGiven = NOT_GIVEN,
|
582
|
-
|
583
|
-
llm_processing: Optional[
|
326
|
+
file_name: Optional[str] | NotGiven = NOT_GIVEN,
|
327
|
+
llm_processing: Optional[parse_create_params.LlmProcessing] | NotGiven = NOT_GIVEN,
|
584
328
|
ocr_strategy: Optional[Literal["All", "Auto"]] | NotGiven = NOT_GIVEN,
|
585
329
|
pipeline: Optional[Literal["Azure", "Chunkr"]] | NotGiven = NOT_GIVEN,
|
586
|
-
segment_processing: Optional[
|
330
|
+
segment_processing: Optional[parse_create_params.SegmentProcessing] | NotGiven = NOT_GIVEN,
|
587
331
|
segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]] | NotGiven = NOT_GIVEN,
|
588
332
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
589
333
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
@@ -592,21 +336,22 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
592
336
|
extra_body: Body | None = None,
|
593
337
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
594
338
|
) -> Task:
|
595
|
-
"""
|
596
|
-
|
597
|
-
The
|
598
|
-
original configuration will be used for all values that are not provided in the
|
599
|
-
update.
|
600
|
-
|
601
|
-
Requirements:
|
339
|
+
"""
|
340
|
+
Queues a document for processing and returns a TaskResponse containing:
|
602
341
|
|
603
|
-
- Task
|
604
|
-
-
|
342
|
+
- Task ID for status polling
|
343
|
+
- Initial configuration
|
344
|
+
- File metadata
|
345
|
+
- Processing status
|
346
|
+
- Creation timestamp
|
347
|
+
- Presigned URLs for file access
|
605
348
|
|
606
349
|
The returned task will typically be in a `Starting` or `Processing` state. Use
|
607
350
|
the `GET /task/{task_id}` endpoint to poll for completion.
|
608
351
|
|
609
352
|
Args:
|
353
|
+
file: The file to be uploaded. Can be a URL or a base64 encoded file.
|
354
|
+
|
610
355
|
chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
|
611
356
|
|
612
357
|
error_handling:
|
@@ -619,8 +364,7 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
619
364
|
expires_in: The number of seconds until task is deleted. Expired tasks can **not** be
|
620
365
|
updated, polled or accessed via web interface.
|
621
366
|
|
622
|
-
|
623
|
-
penalty: ~7 seconds per page)
|
367
|
+
file_name: The name of the file to be uploaded. If not set a name will be generated.
|
624
368
|
|
625
369
|
llm_processing: Controls the LLM used for the task.
|
626
370
|
|
@@ -648,8 +392,8 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
648
392
|
|
649
393
|
**Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
|
650
394
|
`Caption`, `Footnote` → **Auto** (Markdown) • `Table` → **LLM** (HTML,
|
651
|
-
description on) • `Picture` → **LLM** (Markdown, description
|
652
|
-
• `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
395
|
+
description on) • `Picture` → **LLM** (Markdown, description off, cropping
|
396
|
+
_All_) • `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
653
397
|
**Ignore** (removed from output)
|
654
398
|
|
655
399
|
---
|
@@ -674,23 +418,22 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
674
418
|
|
675
419
|
timeout: Override the client-level default timeout for this request, in seconds
|
676
420
|
"""
|
677
|
-
|
678
|
-
|
679
|
-
return await self._patch(
|
680
|
-
f"/task/{task_id}/parse",
|
421
|
+
return await self._post(
|
422
|
+
"/task/parse",
|
681
423
|
body=await async_maybe_transform(
|
682
424
|
{
|
425
|
+
"file": file,
|
683
426
|
"chunk_processing": chunk_processing,
|
684
427
|
"error_handling": error_handling,
|
685
428
|
"expires_in": expires_in,
|
686
|
-
"
|
429
|
+
"file_name": file_name,
|
687
430
|
"llm_processing": llm_processing,
|
688
431
|
"ocr_strategy": ocr_strategy,
|
689
432
|
"pipeline": pipeline,
|
690
433
|
"segment_processing": segment_processing,
|
691
434
|
"segmentation_strategy": segmentation_strategy,
|
692
435
|
},
|
693
|
-
|
436
|
+
parse_create_params.ParseCreateParams,
|
694
437
|
),
|
695
438
|
options=make_request_options(
|
696
439
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
@@ -698,264 +441,18 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
698
441
|
cast_to=Task,
|
699
442
|
)
|
700
443
|
|
701
|
-
def
|
702
|
-
self,
|
703
|
-
*,
|
704
|
-
base64_urls: bool | NotGiven = NOT_GIVEN,
|
705
|
-
cursor: Union[str, datetime] | NotGiven = NOT_GIVEN,
|
706
|
-
end: Union[str, datetime] | NotGiven = NOT_GIVEN,
|
707
|
-
include_chunks: bool | NotGiven = NOT_GIVEN,
|
708
|
-
limit: int | NotGiven = NOT_GIVEN,
|
709
|
-
sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
710
|
-
start: Union[str, datetime] | NotGiven = NOT_GIVEN,
|
711
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
712
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
713
|
-
extra_headers: Headers | None = None,
|
714
|
-
extra_query: Query | None = None,
|
715
|
-
extra_body: Body | None = None,
|
716
|
-
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
717
|
-
) -> AsyncPaginator[Task, AsyncTasksPage[Task]]:
|
718
|
-
"""Retrieves a list of tasks with cursor-based pagination.
|
719
|
-
|
720
|
-
By default, tasks are
|
721
|
-
returned in descending order (newest first).
|
722
|
-
|
723
|
-
## Default Behaviors:
|
724
|
-
|
725
|
-
- **limit**: Returns all tasks if not specified
|
726
|
-
- **start**: No start date filter (returns from beginning of time)
|
727
|
-
- **end**: No end date filter (returns up to current time)
|
728
|
-
- **cursor**: Starts from most recent tasks (no pagination offset)
|
729
|
-
- **sort**: 'desc' (descending order, newest first)
|
730
|
-
- **include_chunks**: false (excludes chunks for better performance)
|
731
|
-
- **base64_urls**: false (returns presigned URLs instead of base64)
|
732
|
-
|
733
|
-
## Common Usage Patterns:
|
734
|
-
|
735
|
-
**Basic usage (get all tasks):** `GET /api/v1/tasks`
|
736
|
-
|
737
|
-
**Get first 10 tasks:** `GET /api/v1/tasks?limit=10`
|
738
|
-
|
739
|
-
**Paginate through results:**
|
740
|
-
|
741
|
-
1. First request: `GET /api/v1/tasks?limit=10`
|
742
|
-
2. Use next_cursor from response for subsequent pages:
|
743
|
-
`GET /api/v1/tasks?limit=10&cursor=<timestamp>`
|
744
|
-
|
745
|
-
**Filter by date range:**
|
746
|
-
`GET /api/v1/tasks?start=2025-01-01T00:00:00Z&end=2025-12-31T23:59:59Z`
|
747
|
-
|
748
|
-
**Get detailed results with chunks:** `GET /api/v1/tasks?include_chunks=true`
|
749
|
-
|
750
|
-
**Get base64 encoded content:** `GET /api/v1/tasks?base64_urls=true`
|
751
|
-
|
752
|
-
**Get tasks in ascending order (oldest first):** `GET /api/v1/tasks?sort=asc`
|
753
|
-
|
754
|
-
**Get tasks in descending order (newest first, default):**
|
755
|
-
`GET /api/v1/tasks?sort=desc`
|
756
|
-
|
757
|
-
Args:
|
758
|
-
base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as
|
759
|
-
presigned URLs.
|
760
|
-
|
761
|
-
cursor: Cursor for pagination (timestamp)
|
762
|
-
|
763
|
-
end: End date
|
764
|
-
|
765
|
-
include_chunks: Whether to include chunks in the output response
|
766
|
-
|
767
|
-
limit: Number of tasks per page
|
768
|
-
|
769
|
-
sort: Sort order: 'asc' for ascending, 'desc' for descending (default)
|
770
|
-
|
771
|
-
start: Start date
|
772
|
-
|
773
|
-
extra_headers: Send extra headers
|
774
|
-
|
775
|
-
extra_query: Add additional query parameters to the request
|
776
|
-
|
777
|
-
extra_body: Add additional JSON properties to the request
|
778
|
-
|
779
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
780
|
-
"""
|
781
|
-
return self._get_api_list(
|
782
|
-
"/tasks",
|
783
|
-
page=AsyncTasksPage[Task],
|
784
|
-
options=make_request_options(
|
785
|
-
extra_headers=extra_headers,
|
786
|
-
extra_query=extra_query,
|
787
|
-
extra_body=extra_body,
|
788
|
-
timeout=timeout,
|
789
|
-
query=maybe_transform(
|
790
|
-
{
|
791
|
-
"base64_urls": base64_urls,
|
792
|
-
"cursor": cursor,
|
793
|
-
"end": end,
|
794
|
-
"include_chunks": include_chunks,
|
795
|
-
"limit": limit,
|
796
|
-
"sort": sort,
|
797
|
-
"start": start,
|
798
|
-
},
|
799
|
-
task_list_params.TaskListParams,
|
800
|
-
),
|
801
|
-
),
|
802
|
-
model=Task,
|
803
|
-
)
|
804
|
-
|
805
|
-
async def delete(
|
806
|
-
self,
|
807
|
-
task_id: Optional[str],
|
808
|
-
*,
|
809
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
810
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
811
|
-
extra_headers: Headers | None = None,
|
812
|
-
extra_query: Query | None = None,
|
813
|
-
extra_body: Body | None = None,
|
814
|
-
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
815
|
-
) -> None:
|
816
|
-
"""
|
817
|
-
Delete a task by its ID.
|
818
|
-
|
819
|
-
Requirements:
|
820
|
-
|
821
|
-
- Task must have status `Succeeded` or `Failed`
|
822
|
-
|
823
|
-
Args:
|
824
|
-
extra_headers: Send extra headers
|
825
|
-
|
826
|
-
extra_query: Add additional query parameters to the request
|
827
|
-
|
828
|
-
extra_body: Add additional JSON properties to the request
|
829
|
-
|
830
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
831
|
-
"""
|
832
|
-
if not task_id:
|
833
|
-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
834
|
-
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
835
|
-
return await self._delete(
|
836
|
-
f"/task/{task_id}",
|
837
|
-
options=make_request_options(
|
838
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
839
|
-
),
|
840
|
-
cast_to=NoneType,
|
841
|
-
)
|
842
|
-
|
843
|
-
async def cancel(
|
844
|
-
self,
|
845
|
-
task_id: Optional[str],
|
846
|
-
*,
|
847
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
848
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
849
|
-
extra_headers: Headers | None = None,
|
850
|
-
extra_query: Query | None = None,
|
851
|
-
extra_body: Body | None = None,
|
852
|
-
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
853
|
-
) -> None:
|
854
|
-
"""
|
855
|
-
Cancel a task that hasn't started processing yet:
|
856
|
-
|
857
|
-
- For new tasks: Status will be updated to `Cancelled`
|
858
|
-
- For updating tasks: Task will revert to the previous state
|
859
|
-
|
860
|
-
Requirements:
|
861
|
-
|
862
|
-
- Task must have status `Starting`
|
863
|
-
|
864
|
-
Args:
|
865
|
-
extra_headers: Send extra headers
|
866
|
-
|
867
|
-
extra_query: Add additional query parameters to the request
|
868
|
-
|
869
|
-
extra_body: Add additional JSON properties to the request
|
870
|
-
|
871
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
872
|
-
"""
|
873
|
-
if not task_id:
|
874
|
-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
875
|
-
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
876
|
-
return await self._get(
|
877
|
-
f"/task/{task_id}/cancel",
|
878
|
-
options=make_request_options(
|
879
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
880
|
-
),
|
881
|
-
cast_to=NoneType,
|
882
|
-
)
|
883
|
-
|
884
|
-
async def get(
|
885
|
-
self,
|
886
|
-
task_id: Optional[str],
|
887
|
-
*,
|
888
|
-
base64_urls: bool | NotGiven = NOT_GIVEN,
|
889
|
-
include_chunks: bool | NotGiven = NOT_GIVEN,
|
890
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
891
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
892
|
-
extra_headers: Headers | None = None,
|
893
|
-
extra_query: Query | None = None,
|
894
|
-
extra_body: Body | None = None,
|
895
|
-
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
896
|
-
) -> Task:
|
897
|
-
"""
|
898
|
-
Retrieves detailed information about a task by its ID, including:
|
899
|
-
|
900
|
-
- Processing status
|
901
|
-
- Task configuration
|
902
|
-
- Output data (if processing is complete)
|
903
|
-
- File metadata (name, page count)
|
904
|
-
- Timestamps (created, started, finished)
|
905
|
-
- Presigned URLs for accessing files
|
906
|
-
|
907
|
-
This endpoint can be used to:
|
908
|
-
|
909
|
-
1. Poll the task status during processing
|
910
|
-
2. Retrieve the final output once processing is complete
|
911
|
-
3. Access task metadata and configuration
|
912
|
-
|
913
|
-
Args:
|
914
|
-
base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as
|
915
|
-
presigned URLs.
|
916
|
-
|
917
|
-
include_chunks: Whether to include chunks in the output response
|
918
|
-
|
919
|
-
extra_headers: Send extra headers
|
920
|
-
|
921
|
-
extra_query: Add additional query parameters to the request
|
922
|
-
|
923
|
-
extra_body: Add additional JSON properties to the request
|
924
|
-
|
925
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
926
|
-
"""
|
927
|
-
if not task_id:
|
928
|
-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
929
|
-
return await self._get(
|
930
|
-
f"/task/{task_id}",
|
931
|
-
options=make_request_options(
|
932
|
-
extra_headers=extra_headers,
|
933
|
-
extra_query=extra_query,
|
934
|
-
extra_body=extra_body,
|
935
|
-
timeout=timeout,
|
936
|
-
query=await async_maybe_transform(
|
937
|
-
{
|
938
|
-
"base64_urls": base64_urls,
|
939
|
-
"include_chunks": include_chunks,
|
940
|
-
},
|
941
|
-
task_get_params.TaskGetParams,
|
942
|
-
),
|
943
|
-
),
|
944
|
-
cast_to=Task,
|
945
|
-
)
|
946
|
-
|
947
|
-
async def parse(
|
444
|
+
async def update(
|
948
445
|
self,
|
446
|
+
task_id: str,
|
949
447
|
*,
|
950
|
-
|
951
|
-
chunk_processing: Optional[ChunkProcessingParam] | NotGiven = NOT_GIVEN,
|
448
|
+
chunk_processing: Optional[parse_update_params.ChunkProcessing] | NotGiven = NOT_GIVEN,
|
952
449
|
error_handling: Optional[Literal["Fail", "Continue"]] | NotGiven = NOT_GIVEN,
|
953
450
|
expires_in: Optional[int] | NotGiven = NOT_GIVEN,
|
954
|
-
|
955
|
-
llm_processing: Optional[
|
451
|
+
high_resolution: Optional[bool] | NotGiven = NOT_GIVEN,
|
452
|
+
llm_processing: Optional[parse_update_params.LlmProcessing] | NotGiven = NOT_GIVEN,
|
956
453
|
ocr_strategy: Optional[Literal["All", "Auto"]] | NotGiven = NOT_GIVEN,
|
957
454
|
pipeline: Optional[Literal["Azure", "Chunkr"]] | NotGiven = NOT_GIVEN,
|
958
|
-
segment_processing: Optional[
|
455
|
+
segment_processing: Optional[parse_update_params.SegmentProcessing] | NotGiven = NOT_GIVEN,
|
959
456
|
segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]] | NotGiven = NOT_GIVEN,
|
960
457
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
961
458
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
@@ -964,22 +461,21 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
964
461
|
extra_body: Body | None = None,
|
965
462
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
966
463
|
) -> Task:
|
967
|
-
"""
|
968
|
-
Queues a document for processing and returns a TaskResponse containing:
|
464
|
+
"""Updates an existing task's configuration and reprocesses the document.
|
969
465
|
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
466
|
+
The
|
467
|
+
original configuration will be used for all values that are not provided in the
|
468
|
+
update.
|
469
|
+
|
470
|
+
Requirements:
|
471
|
+
|
472
|
+
- Task must have status `Succeeded` or `Failed`
|
473
|
+
- New configuration must be different from the current one
|
976
474
|
|
977
475
|
The returned task will typically be in a `Starting` or `Processing` state. Use
|
978
476
|
the `GET /task/{task_id}` endpoint to poll for completion.
|
979
477
|
|
980
478
|
Args:
|
981
|
-
file: The file to be uploaded. Can be a URL or a base64 encoded file.
|
982
|
-
|
983
479
|
chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
|
984
480
|
|
985
481
|
error_handling:
|
@@ -992,7 +488,8 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
992
488
|
expires_in: The number of seconds until task is deleted. Expired tasks can **not** be
|
993
489
|
updated, polled or accessed via web interface.
|
994
490
|
|
995
|
-
|
491
|
+
high_resolution: Whether to use high-resolution images for cropping and post-processing. (Latency
|
492
|
+
penalty: ~7 seconds per page)
|
996
493
|
|
997
494
|
llm_processing: Controls the LLM used for the task.
|
998
495
|
|
@@ -1020,8 +517,8 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
1020
517
|
|
1021
518
|
**Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
|
1022
519
|
`Caption`, `Footnote` → **Auto** (Markdown) • `Table` → **LLM** (HTML,
|
1023
|
-
description on) • `Picture` → **LLM** (Markdown, description
|
1024
|
-
• `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
520
|
+
description on) • `Picture` → **LLM** (Markdown, description off, cropping
|
521
|
+
_All_) • `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
1025
522
|
**Ignore** (removed from output)
|
1026
523
|
|
1027
524
|
---
|
@@ -1046,22 +543,23 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
1046
543
|
|
1047
544
|
timeout: Override the client-level default timeout for this request, in seconds
|
1048
545
|
"""
|
1049
|
-
|
1050
|
-
"
|
546
|
+
if not task_id:
|
547
|
+
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
548
|
+
return await self._patch(
|
549
|
+
f"/task/{task_id}/parse",
|
1051
550
|
body=await async_maybe_transform(
|
1052
551
|
{
|
1053
|
-
"file": file,
|
1054
552
|
"chunk_processing": chunk_processing,
|
1055
553
|
"error_handling": error_handling,
|
1056
554
|
"expires_in": expires_in,
|
1057
|
-
"
|
555
|
+
"high_resolution": high_resolution,
|
1058
556
|
"llm_processing": llm_processing,
|
1059
557
|
"ocr_strategy": ocr_strategy,
|
1060
558
|
"pipeline": pipeline,
|
1061
559
|
"segment_processing": segment_processing,
|
1062
560
|
"segmentation_strategy": segmentation_strategy,
|
1063
561
|
},
|
1064
|
-
|
562
|
+
parse_update_params.ParseUpdateParams,
|
1065
563
|
),
|
1066
564
|
options=make_request_options(
|
1067
565
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
@@ -1070,97 +568,49 @@ class AsyncTaskResource(AsyncAPIResource):
|
|
1070
568
|
)
|
1071
569
|
|
1072
570
|
|
1073
|
-
class
|
1074
|
-
def __init__(self,
|
1075
|
-
self.
|
571
|
+
class ParseResourceWithRawResponse:
|
572
|
+
def __init__(self, parse: ParseResource) -> None:
|
573
|
+
self._parse = parse
|
1076
574
|
|
1077
|
-
self.
|
1078
|
-
|
1079
|
-
)
|
1080
|
-
self.list = to_raw_response_wrapper(
|
1081
|
-
task.list,
|
1082
|
-
)
|
1083
|
-
self.delete = to_raw_response_wrapper(
|
1084
|
-
task.delete,
|
1085
|
-
)
|
1086
|
-
self.cancel = to_raw_response_wrapper(
|
1087
|
-
task.cancel,
|
575
|
+
self.create = to_raw_response_wrapper(
|
576
|
+
parse.create,
|
1088
577
|
)
|
1089
|
-
self.
|
1090
|
-
|
1091
|
-
)
|
1092
|
-
self.parse = to_raw_response_wrapper(
|
1093
|
-
task.parse,
|
578
|
+
self.update = to_raw_response_wrapper(
|
579
|
+
parse.update,
|
1094
580
|
)
|
1095
581
|
|
1096
582
|
|
1097
|
-
class
|
1098
|
-
def __init__(self,
|
1099
|
-
self.
|
583
|
+
class AsyncParseResourceWithRawResponse:
|
584
|
+
def __init__(self, parse: AsyncParseResource) -> None:
|
585
|
+
self._parse = parse
|
1100
586
|
|
1101
|
-
self.
|
1102
|
-
|
1103
|
-
)
|
1104
|
-
self.list = async_to_raw_response_wrapper(
|
1105
|
-
task.list,
|
1106
|
-
)
|
1107
|
-
self.delete = async_to_raw_response_wrapper(
|
1108
|
-
task.delete,
|
1109
|
-
)
|
1110
|
-
self.cancel = async_to_raw_response_wrapper(
|
1111
|
-
task.cancel,
|
1112
|
-
)
|
1113
|
-
self.get = async_to_raw_response_wrapper(
|
1114
|
-
task.get,
|
587
|
+
self.create = async_to_raw_response_wrapper(
|
588
|
+
parse.create,
|
1115
589
|
)
|
1116
|
-
self.
|
1117
|
-
|
590
|
+
self.update = async_to_raw_response_wrapper(
|
591
|
+
parse.update,
|
1118
592
|
)
|
1119
593
|
|
1120
594
|
|
1121
|
-
class
|
1122
|
-
def __init__(self,
|
1123
|
-
self.
|
595
|
+
class ParseResourceWithStreamingResponse:
|
596
|
+
def __init__(self, parse: ParseResource) -> None:
|
597
|
+
self._parse = parse
|
1124
598
|
|
1125
|
-
self.
|
1126
|
-
|
1127
|
-
)
|
1128
|
-
self.list = to_streamed_response_wrapper(
|
1129
|
-
task.list,
|
1130
|
-
)
|
1131
|
-
self.delete = to_streamed_response_wrapper(
|
1132
|
-
task.delete,
|
599
|
+
self.create = to_streamed_response_wrapper(
|
600
|
+
parse.create,
|
1133
601
|
)
|
1134
|
-
self.
|
1135
|
-
|
1136
|
-
)
|
1137
|
-
self.get = to_streamed_response_wrapper(
|
1138
|
-
task.get,
|
1139
|
-
)
|
1140
|
-
self.parse = to_streamed_response_wrapper(
|
1141
|
-
task.parse,
|
602
|
+
self.update = to_streamed_response_wrapper(
|
603
|
+
parse.update,
|
1142
604
|
)
|
1143
605
|
|
1144
606
|
|
1145
|
-
class
|
1146
|
-
def __init__(self,
|
1147
|
-
self.
|
607
|
+
class AsyncParseResourceWithStreamingResponse:
|
608
|
+
def __init__(self, parse: AsyncParseResource) -> None:
|
609
|
+
self._parse = parse
|
1148
610
|
|
1149
|
-
self.
|
1150
|
-
|
1151
|
-
)
|
1152
|
-
self.list = async_to_streamed_response_wrapper(
|
1153
|
-
task.list,
|
1154
|
-
)
|
1155
|
-
self.delete = async_to_streamed_response_wrapper(
|
1156
|
-
task.delete,
|
611
|
+
self.create = async_to_streamed_response_wrapper(
|
612
|
+
parse.create,
|
1157
613
|
)
|
1158
|
-
self.
|
1159
|
-
|
1160
|
-
)
|
1161
|
-
self.get = async_to_streamed_response_wrapper(
|
1162
|
-
task.get,
|
1163
|
-
)
|
1164
|
-
self.parse = async_to_streamed_response_wrapper(
|
1165
|
-
task.parse,
|
614
|
+
self.update = async_to_streamed_response_wrapper(
|
615
|
+
parse.update,
|
1166
616
|
)
|