chunkr-ai 0.1.0a4__py3-none-any.whl → 0.1.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/__init__.py CHANGED
@@ -72,9 +72,6 @@ __all__ = [
72
72
  ]
73
73
 
74
74
  if not _t.TYPE_CHECKING:
75
- # Load custom helpers that monkey-patch generated types.
76
- # This keeps custom code separate from generated files, per Stainless guidance.
77
- from .lib import tasks_poll as _tasks_poll # noqa: F401
78
75
  from ._utils._resources_proxy import resources as resources
79
76
 
80
77
  _setup_logging()
chunkr_ai/_client.py CHANGED
@@ -95,6 +95,8 @@ class Chunkr(SyncAPIClient):
95
95
  _strict_response_validation=_strict_response_validation,
96
96
  )
97
97
 
98
+ self._idempotency_header = "Idempotency-Key"
99
+
98
100
  self.tasks = tasks.TasksResource(self)
99
101
  self.files = files.FilesResource(self)
100
102
  self.health = health.HealthResource(self)
@@ -267,6 +269,8 @@ class AsyncChunkr(AsyncAPIClient):
267
269
  _strict_response_validation=_strict_response_validation,
268
270
  )
269
271
 
272
+ self._idempotency_header = "Idempotency-Key"
273
+
270
274
  self.tasks = tasks.AsyncTasksResource(self)
271
275
  self.files = files.AsyncFilesResource(self)
272
276
  self.health = health.AsyncHealthResource(self)
chunkr_ai/_constants.py CHANGED
@@ -5,10 +5,10 @@ import httpx
5
5
  RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
6
6
  OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
7
7
 
8
- # default timeout is 1 minute
9
- DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
10
- DEFAULT_MAX_RETRIES = 2
8
+ # default timeout is 30 seconds
9
+ DEFAULT_TIMEOUT = httpx.Timeout(timeout=30, connect=5.0)
10
+ DEFAULT_MAX_RETRIES = 50
11
11
  DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
12
12
 
13
- INITIAL_RETRY_DELAY = 0.5
14
- MAX_RETRY_DELAY = 8.0
13
+ INITIAL_RETRY_DELAY = 1.0
14
+ MAX_RETRY_DELAY = 10.0
chunkr_ai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "chunkr_ai"
4
- __version__ = "0.1.0-alpha.4" # x-release-please-version
4
+ __version__ = "0.1.0-alpha.5" # x-release-please-version
@@ -59,6 +59,7 @@ class FilesResource(SyncAPIResource):
59
59
  extra_query: Query | None = None,
60
60
  extra_body: Body | None = None,
61
61
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
62
+ idempotency_key: str | None = None,
62
63
  ) -> File:
63
64
  """
64
65
  Accepts multipart/form-data with fields:
@@ -78,6 +79,8 @@ class FilesResource(SyncAPIResource):
78
79
  extra_body: Add additional JSON properties to the request
79
80
 
80
81
  timeout: Override the client-level default timeout for this request, in seconds
82
+
83
+ idempotency_key: Specify a custom idempotency key for this request
81
84
  """
82
85
  body = deepcopy_minimal(
83
86
  {
@@ -95,7 +98,11 @@ class FilesResource(SyncAPIResource):
95
98
  body=maybe_transform(body, file_create_params.FileCreateParams),
96
99
  files=files,
97
100
  options=make_request_options(
98
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
101
+ extra_headers=extra_headers,
102
+ extra_query=extra_query,
103
+ extra_body=extra_body,
104
+ timeout=timeout,
105
+ idempotency_key=idempotency_key,
99
106
  ),
100
107
  cast_to=File,
101
108
  )
@@ -170,6 +177,7 @@ class FilesResource(SyncAPIResource):
170
177
  extra_query: Query | None = None,
171
178
  extra_body: Body | None = None,
172
179
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
180
+ idempotency_key: str | None = None,
173
181
  ) -> Delete:
174
182
  """Delete file contents and scrub sensitive metadata.
175
183
 
@@ -184,13 +192,19 @@ class FilesResource(SyncAPIResource):
184
192
  extra_body: Add additional JSON properties to the request
185
193
 
186
194
  timeout: Override the client-level default timeout for this request, in seconds
195
+
196
+ idempotency_key: Specify a custom idempotency key for this request
187
197
  """
188
198
  if not file_id:
189
199
  raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
190
200
  return self._delete(
191
201
  f"/files/{file_id}",
192
202
  options=make_request_options(
193
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
203
+ extra_headers=extra_headers,
204
+ extra_query=extra_query,
205
+ extra_body=extra_body,
206
+ timeout=timeout,
207
+ idempotency_key=idempotency_key,
194
208
  ),
195
209
  cast_to=Delete,
196
210
  )
@@ -353,6 +367,7 @@ class AsyncFilesResource(AsyncAPIResource):
353
367
  extra_query: Query | None = None,
354
368
  extra_body: Body | None = None,
355
369
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
370
+ idempotency_key: str | None = None,
356
371
  ) -> File:
357
372
  """
358
373
  Accepts multipart/form-data with fields:
@@ -372,6 +387,8 @@ class AsyncFilesResource(AsyncAPIResource):
372
387
  extra_body: Add additional JSON properties to the request
373
388
 
374
389
  timeout: Override the client-level default timeout for this request, in seconds
390
+
391
+ idempotency_key: Specify a custom idempotency key for this request
375
392
  """
376
393
  body = deepcopy_minimal(
377
394
  {
@@ -389,7 +406,11 @@ class AsyncFilesResource(AsyncAPIResource):
389
406
  body=await async_maybe_transform(body, file_create_params.FileCreateParams),
390
407
  files=files,
391
408
  options=make_request_options(
392
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
409
+ extra_headers=extra_headers,
410
+ extra_query=extra_query,
411
+ extra_body=extra_body,
412
+ timeout=timeout,
413
+ idempotency_key=idempotency_key,
393
414
  ),
394
415
  cast_to=File,
395
416
  )
@@ -464,6 +485,7 @@ class AsyncFilesResource(AsyncAPIResource):
464
485
  extra_query: Query | None = None,
465
486
  extra_body: Body | None = None,
466
487
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
488
+ idempotency_key: str | None = None,
467
489
  ) -> Delete:
468
490
  """Delete file contents and scrub sensitive metadata.
469
491
 
@@ -478,13 +500,19 @@ class AsyncFilesResource(AsyncAPIResource):
478
500
  extra_body: Add additional JSON properties to the request
479
501
 
480
502
  timeout: Override the client-level default timeout for this request, in seconds
503
+
504
+ idempotency_key: Specify a custom idempotency key for this request
481
505
  """
482
506
  if not file_id:
483
507
  raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
484
508
  return await self._delete(
485
509
  f"/files/{file_id}",
486
510
  options=make_request_options(
487
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
511
+ extra_headers=extra_headers,
512
+ extra_query=extra_query,
513
+ extra_body=extra_body,
514
+ timeout=timeout,
515
+ idempotency_key=idempotency_key,
488
516
  ),
489
517
  cast_to=Delete,
490
518
  )
@@ -48,6 +48,9 @@ class ParseResource(SyncAPIResource):
48
48
  self,
49
49
  *,
50
50
  file: str,
51
+ base64_urls: bool | NotGiven = NOT_GIVEN,
52
+ include_chunks: bool | NotGiven = NOT_GIVEN,
53
+ wait_for_completion: bool | NotGiven = NOT_GIVEN,
51
54
  chunk_processing: Optional[parse_create_params.ChunkProcessing] | NotGiven = NOT_GIVEN,
52
55
  error_handling: Optional[Literal["Fail", "Continue"]] | NotGiven = NOT_GIVEN,
53
56
  expires_in: Optional[int] | NotGiven = NOT_GIVEN,
@@ -63,29 +66,34 @@ class ParseResource(SyncAPIResource):
63
66
  extra_query: Query | None = None,
64
67
  extra_body: Body | None = None,
65
68
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
69
+ idempotency_key: str | None = None,
66
70
  ) -> Task:
67
71
  """
68
- Queues a document for processing and returns a TaskResponse containing:
72
+ Queues a document for processing and returns a `TaskResponse` with the assigned
73
+ `task_id`, initial configuration, file metadata, and timestamps. The initial
74
+ status is `Starting`.
69
75
 
70
- - Task ID for status polling
71
- - Initial configuration
72
- - File metadata
73
- - Processing status
74
- - Creation timestamp
75
- - Presigned URLs for file access
76
-
77
- The returned task will typically be in a `Starting` or `Processing` state. Use
78
- the `GET /tasks/{task_id}` endpoint to poll for completion.
76
+ If `wait_for_completion=true` is provided, the server waits briefly for
77
+ completion. If the task completes within that window, a 200 response with the
78
+ final `TaskResponse` is returned. Otherwise, the server returns a 408 or 409
79
+ with retry guidance and a body describing how long to wait before retrying.
79
80
 
80
81
  Args:
81
82
  file:
82
83
  The file to be uploaded. Supported inputs:
83
84
 
84
- - `ch://files/{file_id}`: References a previously uploaded file you own
85
- (authorization enforced)
85
+ - `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
86
+ API
86
87
  - `http(s)://...`: Remote URL to fetch
87
88
  - `data:*;base64,...` or raw base64 string
88
89
 
90
+ base64_urls: Whether to return base64 encoded URLs. If false, presigned URLs are returned.
91
+
92
+ include_chunks: Whether to include chunks in the output response
93
+
94
+ wait_for_completion: If true, server holds briefly and may return 200 when done; otherwise returns
95
+ 408/409 with Retry-After headers
96
+
89
97
  chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
90
98
 
91
99
  error_handling:
@@ -114,22 +122,29 @@ class ParseResource(SyncAPIResource):
114
122
 
115
123
  segment_processing: Defines how each segment type is handled when generating the final output.
116
124
 
117
- Each segment uses one of three strategies. The chosen strategy controls:
118
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
119
- content is produced (rule-based vs. LLM). The output format (`Html` or
120
- `Markdown`).
125
+ Each segment uses one of three strategies. The chosen strategy controls:
126
+
127
+ - Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
128
+ - How the content is produced (rule-based vs. LLM).
129
+ - The output format (`Html` or `Markdown`).
121
130
 
122
131
  Optional flags such as image **cropping**, **extended context**, and
123
132
  **descriptions** further refine behaviour.
124
133
 
125
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
126
- `Caption`, `Footnote` → **Auto** (Markdown, description off) • `Table` → **LLM**
127
- (HTML, description on) `Picture` → **LLM** (Markdown, description off,
128
- cropping _All_) • `Formula`, `Page` → **LLM** (Markdown, description off)
129
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
134
+ **Default strategy per segment**
135
+
136
+ - `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
137
+ (Markdown, description off)
138
+ - `Table` → **LLM** (HTML, description on)
139
+ - `Picture` → **LLM** (Markdown, description off, cropping _All_)
140
+ - `Formula`, `Page` → **LLM** (Markdown, description off)
141
+ - `PageHeader`, `PageFooter` → **Ignore** (removed from output)
130
142
 
131
- **Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
132
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
143
+ **Strategy reference**
144
+
145
+ - **Auto** – rule-based content generation.
146
+ - **LLM** – generate content with an LLM.
147
+ - **Ignore** – exclude the segment entirely.
133
148
 
134
149
  segmentation_strategy:
135
150
  Controls the segmentation strategy:
@@ -147,6 +162,8 @@ class ParseResource(SyncAPIResource):
147
162
  extra_body: Add additional JSON properties to the request
148
163
 
149
164
  timeout: Override the client-level default timeout for this request, in seconds
165
+
166
+ idempotency_key: Specify a custom idempotency key for this request
150
167
  """
151
168
  return self._post(
152
169
  "/tasks/parse",
@@ -166,7 +183,19 @@ class ParseResource(SyncAPIResource):
166
183
  parse_create_params.ParseCreateParams,
167
184
  ),
168
185
  options=make_request_options(
169
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
186
+ extra_headers=extra_headers,
187
+ extra_query=extra_query,
188
+ extra_body=extra_body,
189
+ timeout=timeout,
190
+ idempotency_key=idempotency_key,
191
+ query=maybe_transform(
192
+ {
193
+ "base64_urls": base64_urls,
194
+ "include_chunks": include_chunks,
195
+ "wait_for_completion": wait_for_completion,
196
+ },
197
+ parse_create_params.ParseCreateParams,
198
+ ),
170
199
  ),
171
200
  cast_to=Task,
172
201
  )
@@ -175,6 +204,9 @@ class ParseResource(SyncAPIResource):
175
204
  self,
176
205
  task_id: str,
177
206
  *,
207
+ base64_urls: bool | NotGiven = NOT_GIVEN,
208
+ include_chunks: bool | NotGiven = NOT_GIVEN,
209
+ wait_for_completion: bool | NotGiven = NOT_GIVEN,
178
210
  chunk_processing: Optional[parse_update_params.ChunkProcessing] | NotGiven = NOT_GIVEN,
179
211
  error_handling: Optional[Literal["Fail", "Continue"]] | NotGiven = NOT_GIVEN,
180
212
  expires_in: Optional[int] | NotGiven = NOT_GIVEN,
@@ -190,22 +222,31 @@ class ParseResource(SyncAPIResource):
190
222
  extra_query: Query | None = None,
191
223
  extra_body: Body | None = None,
192
224
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
225
+ idempotency_key: str | None = None,
193
226
  ) -> Task:
194
227
  """Updates an existing task's configuration and reprocesses the document.
195
228
 
196
229
  The
197
- original configuration will be used for all values that are not provided in the
198
- update.
230
+ current configuration is used as the base; only provided fields are changed.
199
231
 
200
232
  Requirements:
201
233
 
202
- - Task must have status `Succeeded` or `Failed`
203
- - New configuration must be different from the current one
234
+ - Task must be in a terminal state (`Succeeded` or `Failed`).
235
+ - The new configuration must differ from the current configuration.
204
236
 
205
- The returned task will typically be in a `Starting` or `Processing` state. Use
206
- the `GET /tasks/{task_id}` endpoint to poll for completion.
237
+ If `wait_for_completion=true` is provided, the server waits briefly for
238
+ completion. If the task completes within that window, a 200 response with the
239
+ final `TaskResponse` is returned. Otherwise, the server returns a 408 with retry
240
+ guidance and a body describing how long to wait before retrying.
207
241
 
208
242
  Args:
243
+ base64_urls: Whether to return base64 encoded URLs. If false, presigned URLs are returned.
244
+
245
+ include_chunks: Whether to include chunks in the output response
246
+
247
+ wait_for_completion: If true, server holds briefly and may return 200 when done; otherwise returns
248
+ 408/409 with Retry-After headers
249
+
209
250
  chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
210
251
 
211
252
  error_handling:
@@ -235,22 +276,29 @@ class ParseResource(SyncAPIResource):
235
276
 
236
277
  segment_processing: Defines how each segment type is handled when generating the final output.
237
278
 
238
- Each segment uses one of three strategies. The chosen strategy controls:
239
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
240
- content is produced (rule-based vs. LLM). The output format (`Html` or
241
- `Markdown`).
279
+ Each segment uses one of three strategies. The chosen strategy controls:
280
+
281
+ - Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
282
+ - How the content is produced (rule-based vs. LLM).
283
+ - The output format (`Html` or `Markdown`).
242
284
 
243
285
  Optional flags such as image **cropping**, **extended context**, and
244
286
  **descriptions** further refine behaviour.
245
287
 
246
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
247
- `Caption`, `Footnote` → **Auto** (Markdown, description off) • `Table` → **LLM**
248
- (HTML, description on) • `Picture` → **LLM** (Markdown, description off,
249
- cropping _All_) • `Formula`, `Page` → **LLM** (Markdown, description off) •
250
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
288
+ **Default strategy per segment**
251
289
 
252
- **Strategy reference** **Auto** rule-based content generation. **LLM**
253
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
290
+ - `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` **Auto**
291
+ (Markdown, description off)
292
+ - `Table` → **LLM** (HTML, description on)
293
+ - `Picture` → **LLM** (Markdown, description off, cropping _All_)
294
+ - `Formula`, `Page` → **LLM** (Markdown, description off)
295
+ - `PageHeader`, `PageFooter` → **Ignore** (removed from output)
296
+
297
+ **Strategy reference**
298
+
299
+ - **Auto** – rule-based content generation.
300
+ - **LLM** – generate content with an LLM.
301
+ - **Ignore** – exclude the segment entirely.
254
302
 
255
303
  segmentation_strategy:
256
304
  Controls the segmentation strategy:
@@ -268,6 +316,8 @@ class ParseResource(SyncAPIResource):
268
316
  extra_body: Add additional JSON properties to the request
269
317
 
270
318
  timeout: Override the client-level default timeout for this request, in seconds
319
+
320
+ idempotency_key: Specify a custom idempotency key for this request
271
321
  """
272
322
  if not task_id:
273
323
  raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
@@ -288,7 +338,19 @@ class ParseResource(SyncAPIResource):
288
338
  parse_update_params.ParseUpdateParams,
289
339
  ),
290
340
  options=make_request_options(
291
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
341
+ extra_headers=extra_headers,
342
+ extra_query=extra_query,
343
+ extra_body=extra_body,
344
+ timeout=timeout,
345
+ idempotency_key=idempotency_key,
346
+ query=maybe_transform(
347
+ {
348
+ "base64_urls": base64_urls,
349
+ "include_chunks": include_chunks,
350
+ "wait_for_completion": wait_for_completion,
351
+ },
352
+ parse_update_params.ParseUpdateParams,
353
+ ),
292
354
  ),
293
355
  cast_to=Task,
294
356
  )
@@ -318,6 +380,9 @@ class AsyncParseResource(AsyncAPIResource):
318
380
  self,
319
381
  *,
320
382
  file: str,
383
+ base64_urls: bool | NotGiven = NOT_GIVEN,
384
+ include_chunks: bool | NotGiven = NOT_GIVEN,
385
+ wait_for_completion: bool | NotGiven = NOT_GIVEN,
321
386
  chunk_processing: Optional[parse_create_params.ChunkProcessing] | NotGiven = NOT_GIVEN,
322
387
  error_handling: Optional[Literal["Fail", "Continue"]] | NotGiven = NOT_GIVEN,
323
388
  expires_in: Optional[int] | NotGiven = NOT_GIVEN,
@@ -333,29 +398,34 @@ class AsyncParseResource(AsyncAPIResource):
333
398
  extra_query: Query | None = None,
334
399
  extra_body: Body | None = None,
335
400
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
401
+ idempotency_key: str | None = None,
336
402
  ) -> Task:
337
403
  """
338
- Queues a document for processing and returns a TaskResponse containing:
404
+ Queues a document for processing and returns a `TaskResponse` with the assigned
405
+ `task_id`, initial configuration, file metadata, and timestamps. The initial
406
+ status is `Starting`.
339
407
 
340
- - Task ID for status polling
341
- - Initial configuration
342
- - File metadata
343
- - Processing status
344
- - Creation timestamp
345
- - Presigned URLs for file access
346
-
347
- The returned task will typically be in a `Starting` or `Processing` state. Use
348
- the `GET /tasks/{task_id}` endpoint to poll for completion.
408
+ If `wait_for_completion=true` is provided, the server waits briefly for
409
+ completion. If the task completes within that window, a 200 response with the
410
+ final `TaskResponse` is returned. Otherwise, the server returns a 408 or 409
411
+ with retry guidance and a body describing how long to wait before retrying.
349
412
 
350
413
  Args:
351
414
  file:
352
415
  The file to be uploaded. Supported inputs:
353
416
 
354
- - `ch://files/{file_id}`: References a previously uploaded file you own
355
- (authorization enforced)
417
+ - `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
418
+ API
356
419
  - `http(s)://...`: Remote URL to fetch
357
420
  - `data:*;base64,...` or raw base64 string
358
421
 
422
+ base64_urls: Whether to return base64 encoded URLs. If false, presigned URLs are returned.
423
+
424
+ include_chunks: Whether to include chunks in the output response
425
+
426
+ wait_for_completion: If true, server holds briefly and may return 200 when done; otherwise returns
427
+ 408/409 with Retry-After headers
428
+
359
429
  chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
360
430
 
361
431
  error_handling:
@@ -384,22 +454,29 @@ class AsyncParseResource(AsyncAPIResource):
384
454
 
385
455
  segment_processing: Defines how each segment type is handled when generating the final output.
386
456
 
387
- Each segment uses one of three strategies. The chosen strategy controls:
388
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
389
- content is produced (rule-based vs. LLM). The output format (`Html` or
390
- `Markdown`).
457
+ Each segment uses one of three strategies. The chosen strategy controls:
458
+
459
+ - Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
460
+ - How the content is produced (rule-based vs. LLM).
461
+ - The output format (`Html` or `Markdown`).
391
462
 
392
463
  Optional flags such as image **cropping**, **extended context**, and
393
464
  **descriptions** further refine behaviour.
394
465
 
395
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
396
- `Caption`, `Footnote` → **Auto** (Markdown, description off) • `Table` → **LLM**
397
- (HTML, description on) `Picture` → **LLM** (Markdown, description off,
398
- cropping _All_) • `Formula`, `Page` → **LLM** (Markdown, description off)
399
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
466
+ **Default strategy per segment**
467
+
468
+ - `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
469
+ (Markdown, description off)
470
+ - `Table` → **LLM** (HTML, description on)
471
+ - `Picture` → **LLM** (Markdown, description off, cropping _All_)
472
+ - `Formula`, `Page` → **LLM** (Markdown, description off)
473
+ - `PageHeader`, `PageFooter` → **Ignore** (removed from output)
400
474
 
401
- **Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
402
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
475
+ **Strategy reference**
476
+
477
+ - **Auto** – rule-based content generation.
478
+ - **LLM** – generate content with an LLM.
479
+ - **Ignore** – exclude the segment entirely.
403
480
 
404
481
  segmentation_strategy:
405
482
  Controls the segmentation strategy:
@@ -417,6 +494,8 @@ class AsyncParseResource(AsyncAPIResource):
417
494
  extra_body: Add additional JSON properties to the request
418
495
 
419
496
  timeout: Override the client-level default timeout for this request, in seconds
497
+
498
+ idempotency_key: Specify a custom idempotency key for this request
420
499
  """
421
500
  return await self._post(
422
501
  "/tasks/parse",
@@ -436,7 +515,19 @@ class AsyncParseResource(AsyncAPIResource):
436
515
  parse_create_params.ParseCreateParams,
437
516
  ),
438
517
  options=make_request_options(
439
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
518
+ extra_headers=extra_headers,
519
+ extra_query=extra_query,
520
+ extra_body=extra_body,
521
+ timeout=timeout,
522
+ idempotency_key=idempotency_key,
523
+ query=await async_maybe_transform(
524
+ {
525
+ "base64_urls": base64_urls,
526
+ "include_chunks": include_chunks,
527
+ "wait_for_completion": wait_for_completion,
528
+ },
529
+ parse_create_params.ParseCreateParams,
530
+ ),
440
531
  ),
441
532
  cast_to=Task,
442
533
  )
@@ -445,6 +536,9 @@ class AsyncParseResource(AsyncAPIResource):
445
536
  self,
446
537
  task_id: str,
447
538
  *,
539
+ base64_urls: bool | NotGiven = NOT_GIVEN,
540
+ include_chunks: bool | NotGiven = NOT_GIVEN,
541
+ wait_for_completion: bool | NotGiven = NOT_GIVEN,
448
542
  chunk_processing: Optional[parse_update_params.ChunkProcessing] | NotGiven = NOT_GIVEN,
449
543
  error_handling: Optional[Literal["Fail", "Continue"]] | NotGiven = NOT_GIVEN,
450
544
  expires_in: Optional[int] | NotGiven = NOT_GIVEN,
@@ -460,22 +554,31 @@ class AsyncParseResource(AsyncAPIResource):
460
554
  extra_query: Query | None = None,
461
555
  extra_body: Body | None = None,
462
556
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
557
+ idempotency_key: str | None = None,
463
558
  ) -> Task:
464
559
  """Updates an existing task's configuration and reprocesses the document.
465
560
 
466
561
  The
467
- original configuration will be used for all values that are not provided in the
468
- update.
562
+ current configuration is used as the base; only provided fields are changed.
469
563
 
470
564
  Requirements:
471
565
 
472
- - Task must have status `Succeeded` or `Failed`
473
- - New configuration must be different from the current one
566
+ - Task must be in a terminal state (`Succeeded` or `Failed`).
567
+ - The new configuration must differ from the current configuration.
474
568
 
475
- The returned task will typically be in a `Starting` or `Processing` state. Use
476
- the `GET /tasks/{task_id}` endpoint to poll for completion.
569
+ If `wait_for_completion=true` is provided, the server waits briefly for
570
+ completion. If the task completes within that window, a 200 response with the
571
+ final `TaskResponse` is returned. Otherwise, the server returns a 408 with retry
572
+ guidance and a body describing how long to wait before retrying.
477
573
 
478
574
  Args:
575
+ base64_urls: Whether to return base64 encoded URLs. If false, presigned URLs are returned.
576
+
577
+ include_chunks: Whether to include chunks in the output response
578
+
579
+ wait_for_completion: If true, server holds briefly and may return 200 when done; otherwise returns
580
+ 408/409 with Retry-After headers
581
+
479
582
  chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
480
583
 
481
584
  error_handling:
@@ -505,22 +608,29 @@ class AsyncParseResource(AsyncAPIResource):
505
608
 
506
609
  segment_processing: Defines how each segment type is handled when generating the final output.
507
610
 
508
- Each segment uses one of three strategies. The chosen strategy controls:
509
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
510
- content is produced (rule-based vs. LLM). The output format (`Html` or
511
- `Markdown`).
611
+ Each segment uses one of three strategies. The chosen strategy controls:
612
+
613
+ - Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
614
+ - How the content is produced (rule-based vs. LLM).
615
+ - The output format (`Html` or `Markdown`).
512
616
 
513
617
  Optional flags such as image **cropping**, **extended context**, and
514
618
  **descriptions** further refine behaviour.
515
619
 
516
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
517
- `Caption`, `Footnote` → **Auto** (Markdown, description off) • `Table` → **LLM**
518
- (HTML, description on) • `Picture` → **LLM** (Markdown, description off,
519
- cropping _All_) • `Formula`, `Page` → **LLM** (Markdown, description off) •
520
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
620
+ **Default strategy per segment**
521
621
 
522
- **Strategy reference** **Auto** rule-based content generation. **LLM**
523
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
622
+ - `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` **Auto**
623
+ (Markdown, description off)
624
+ - `Table` → **LLM** (HTML, description on)
625
+ - `Picture` → **LLM** (Markdown, description off, cropping _All_)
626
+ - `Formula`, `Page` → **LLM** (Markdown, description off)
627
+ - `PageHeader`, `PageFooter` → **Ignore** (removed from output)
628
+
629
+ **Strategy reference**
630
+
631
+ - **Auto** – rule-based content generation.
632
+ - **LLM** – generate content with an LLM.
633
+ - **Ignore** – exclude the segment entirely.
524
634
 
525
635
  segmentation_strategy:
526
636
  Controls the segmentation strategy:
@@ -538,6 +648,8 @@ class AsyncParseResource(AsyncAPIResource):
538
648
  extra_body: Add additional JSON properties to the request
539
649
 
540
650
  timeout: Override the client-level default timeout for this request, in seconds
651
+
652
+ idempotency_key: Specify a custom idempotency key for this request
541
653
  """
542
654
  if not task_id:
543
655
  raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
@@ -558,7 +670,19 @@ class AsyncParseResource(AsyncAPIResource):
558
670
  parse_update_params.ParseUpdateParams,
559
671
  ),
560
672
  options=make_request_options(
561
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
673
+ extra_headers=extra_headers,
674
+ extra_query=extra_query,
675
+ extra_body=extra_body,
676
+ timeout=timeout,
677
+ idempotency_key=idempotency_key,
678
+ query=await async_maybe_transform(
679
+ {
680
+ "base64_urls": base64_urls,
681
+ "include_chunks": include_chunks,
682
+ "wait_for_completion": wait_for_completion,
683
+ },
684
+ parse_update_params.ParseUpdateParams,
685
+ ),
562
686
  ),
563
687
  cast_to=Task,
564
688
  )
@@ -138,6 +138,7 @@ class TasksResource(SyncAPIResource):
138
138
  extra_query: Query | None = None,
139
139
  extra_body: Body | None = None,
140
140
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
141
+ idempotency_key: str | None = None,
141
142
  ) -> None:
142
143
  """
143
144
  Delete a task by its ID.
@@ -154,6 +155,8 @@ class TasksResource(SyncAPIResource):
154
155
  extra_body: Add additional JSON properties to the request
155
156
 
156
157
  timeout: Override the client-level default timeout for this request, in seconds
158
+
159
+ idempotency_key: Specify a custom idempotency key for this request
157
160
  """
158
161
  if not task_id:
159
162
  raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
@@ -161,7 +164,11 @@ class TasksResource(SyncAPIResource):
161
164
  return self._delete(
162
165
  f"/tasks/{task_id}",
163
166
  options=make_request_options(
164
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
167
+ extra_headers=extra_headers,
168
+ extra_query=extra_query,
169
+ extra_body=extra_body,
170
+ timeout=timeout,
171
+ idempotency_key=idempotency_key,
165
172
  ),
166
173
  cast_to=NoneType,
167
174
  )
@@ -213,6 +220,7 @@ class TasksResource(SyncAPIResource):
213
220
  *,
214
221
  base64_urls: bool | NotGiven = NOT_GIVEN,
215
222
  include_chunks: bool | NotGiven = NOT_GIVEN,
223
+ wait_for_completion: bool | NotGiven = NOT_GIVEN,
216
224
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
217
225
  # The extra values given here take precedence over values defined on the client or passed to this method.
218
226
  extra_headers: Headers | None = None,
@@ -221,20 +229,20 @@ class TasksResource(SyncAPIResource):
221
229
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
222
230
  ) -> Task:
223
231
  """
224
- Retrieves detailed information about a task by its ID, including:
232
+ Retrieves the current state of a task and, when requested, waits briefly for
233
+ completion.
225
234
 
226
- - Processing status
227
- - Task configuration
228
- - Output data (if processing is complete)
229
- - File metadata (name, page count)
230
- - Timestamps (created, started, finished)
231
- - Presigned URLs for accessing files
235
+ Returns task details such as processing status, configuration, output (when
236
+ available), file metadata, and timestamps. If `wait_for_completion=true` is
237
+ provided, the server will hold the request briefly. If the task does not reach a
238
+ terminal state during that window, the response will indicate a retry with
239
+ appropriate headers.
232
240
 
233
- This endpoint can be used to:
241
+ Typical uses:
234
242
 
235
- 1. Poll the task status during processing
236
- 2. Retrieve the final output once processing is complete
237
- 3. Access task metadata and configuration
243
+ - Poll a task during processing
244
+ - Retrieve the final output once processing is complete
245
+ - Access task metadata and configuration
238
246
 
239
247
  Args:
240
248
  base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as
@@ -242,6 +250,8 @@ class TasksResource(SyncAPIResource):
242
250
 
243
251
  include_chunks: Whether to include chunks in the output response
244
252
 
253
+ wait_for_completion: Whether to wait for the task to complete
254
+
245
255
  extra_headers: Send extra headers
246
256
 
247
257
  extra_query: Add additional query parameters to the request
@@ -263,6 +273,7 @@ class TasksResource(SyncAPIResource):
263
273
  {
264
274
  "base64_urls": base64_urls,
265
275
  "include_chunks": include_chunks,
276
+ "wait_for_completion": wait_for_completion,
266
277
  },
267
278
  task_get_params.TaskGetParams,
268
279
  ),
@@ -375,6 +386,7 @@ class AsyncTasksResource(AsyncAPIResource):
375
386
  extra_query: Query | None = None,
376
387
  extra_body: Body | None = None,
377
388
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
389
+ idempotency_key: str | None = None,
378
390
  ) -> None:
379
391
  """
380
392
  Delete a task by its ID.
@@ -391,6 +403,8 @@ class AsyncTasksResource(AsyncAPIResource):
391
403
  extra_body: Add additional JSON properties to the request
392
404
 
393
405
  timeout: Override the client-level default timeout for this request, in seconds
406
+
407
+ idempotency_key: Specify a custom idempotency key for this request
394
408
  """
395
409
  if not task_id:
396
410
  raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
@@ -398,7 +412,11 @@ class AsyncTasksResource(AsyncAPIResource):
398
412
  return await self._delete(
399
413
  f"/tasks/{task_id}",
400
414
  options=make_request_options(
401
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
415
+ extra_headers=extra_headers,
416
+ extra_query=extra_query,
417
+ extra_body=extra_body,
418
+ timeout=timeout,
419
+ idempotency_key=idempotency_key,
402
420
  ),
403
421
  cast_to=NoneType,
404
422
  )
@@ -450,6 +468,7 @@ class AsyncTasksResource(AsyncAPIResource):
450
468
  *,
451
469
  base64_urls: bool | NotGiven = NOT_GIVEN,
452
470
  include_chunks: bool | NotGiven = NOT_GIVEN,
471
+ wait_for_completion: bool | NotGiven = NOT_GIVEN,
453
472
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
454
473
  # The extra values given here take precedence over values defined on the client or passed to this method.
455
474
  extra_headers: Headers | None = None,
@@ -458,20 +477,20 @@ class AsyncTasksResource(AsyncAPIResource):
458
477
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
459
478
  ) -> Task:
460
479
  """
461
- Retrieves detailed information about a task by its ID, including:
480
+ Retrieves the current state of a task and, when requested, waits briefly for
481
+ completion.
462
482
 
463
- - Processing status
464
- - Task configuration
465
- - Output data (if processing is complete)
466
- - File metadata (name, page count)
467
- - Timestamps (created, started, finished)
468
- - Presigned URLs for accessing files
483
+ Returns task details such as processing status, configuration, output (when
484
+ available), file metadata, and timestamps. If `wait_for_completion=true` is
485
+ provided, the server will hold the request briefly. If the task does not reach a
486
+ terminal state during that window, the response will indicate a retry with
487
+ appropriate headers.
469
488
 
470
- This endpoint can be used to:
489
+ Typical uses:
471
490
 
472
- 1. Poll the task status during processing
473
- 2. Retrieve the final output once processing is complete
474
- 3. Access task metadata and configuration
491
+ - Poll a task during processing
492
+ - Retrieve the final output once processing is complete
493
+ - Access task metadata and configuration
475
494
 
476
495
  Args:
477
496
  base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as
@@ -479,6 +498,8 @@ class AsyncTasksResource(AsyncAPIResource):
479
498
 
480
499
  include_chunks: Whether to include chunks in the output response
481
500
 
501
+ wait_for_completion: Whether to wait for the task to complete
502
+
482
503
  extra_headers: Send extra headers
483
504
 
484
505
  extra_query: Add additional query parameters to the request
@@ -500,6 +521,7 @@ class AsyncTasksResource(AsyncAPIResource):
500
521
  {
501
522
  "base64_urls": base64_urls,
502
523
  "include_chunks": include_chunks,
524
+ "wait_for_completion": wait_for_completion,
503
525
  },
504
526
  task_get_params.TaskGetParams,
505
527
  ),
chunkr_ai/types/task.py CHANGED
@@ -827,22 +827,29 @@ class Configuration(BaseModel):
827
827
  segment_processing: ConfigurationSegmentProcessing
828
828
  """Defines how each segment type is handled when generating the final output.
829
829
 
830
- Each segment uses one of three strategies. The chosen strategy controls:
831
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
832
- content is produced (rule-based vs. LLM). The output format (`Html` or
833
- `Markdown`).
830
+ Each segment uses one of three strategies. The chosen strategy controls:
831
+
832
+ - Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
833
+ - How the content is produced (rule-based vs. LLM).
834
+ - The output format (`Html` or `Markdown`).
834
835
 
835
836
  Optional flags such as image **cropping**, **extended context**, and
836
837
  **descriptions** further refine behaviour.
837
838
 
838
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
839
- `Caption`, `Footnote` → **Auto** (Markdown, description off) • `Table` → **LLM**
840
- (HTML, description on) `Picture` → **LLM** (Markdown, description off,
841
- cropping _All_) • `Formula`, `Page` → **LLM** (Markdown, description off)
842
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
839
+ **Default strategy per segment**
840
+
841
+ - `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
842
+ (Markdown, description off)
843
+ - `Table` → **LLM** (HTML, description on)
844
+ - `Picture` → **LLM** (Markdown, description off, cropping _All_)
845
+ - `Formula`, `Page` → **LLM** (Markdown, description off)
846
+ - `PageHeader`, `PageFooter` → **Ignore** (removed from output)
847
+
848
+ **Strategy reference**
843
849
 
844
- **Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
845
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
850
+ - **Auto** – rule-based content generation.
851
+ - **LLM** – generate content with an LLM.
852
+ - **Ignore** – exclude the segment entirely.
846
853
  """
847
854
 
848
855
  segmentation_strategy: Literal["LayoutAnalysis", "Page"]
@@ -16,3 +16,6 @@ class TaskGetParams(TypedDict, total=False):
16
16
 
17
17
  include_chunks: bool
18
18
  """Whether to include chunks in the output response"""
19
+
20
+ wait_for_completion: bool
21
+ """Whether to wait for the task to complete"""
@@ -36,12 +36,24 @@ class ParseCreateParams(TypedDict, total=False):
36
36
  file: Required[str]
37
37
  """The file to be uploaded. Supported inputs:
38
38
 
39
- - `ch://files/{file_id}`: References a previously uploaded file you own
40
- (authorization enforced)
39
+ - `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
40
+ API
41
41
  - `http(s)://...`: Remote URL to fetch
42
42
  - `data:*;base64,...` or raw base64 string
43
43
  """
44
44
 
45
+ base64_urls: bool
46
+ """Whether to return base64 encoded URLs. If false, presigned URLs are returned."""
47
+
48
+ include_chunks: bool
49
+ """Whether to include chunks in the output response"""
50
+
51
+ wait_for_completion: bool
52
+ """
53
+ If true, server holds briefly and may return 200 when done; otherwise returns
54
+ 408/409 with Retry-After headers
55
+ """
56
+
45
57
  chunk_processing: Optional[ChunkProcessing]
46
58
  """Controls the setting for the chunking and post-processing of each chunk."""
47
59
 
@@ -83,22 +95,29 @@ class ParseCreateParams(TypedDict, total=False):
83
95
  segment_processing: Optional[SegmentProcessing]
84
96
  """Defines how each segment type is handled when generating the final output.
85
97
 
86
- Each segment uses one of three strategies. The chosen strategy controls:
87
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
88
- content is produced (rule-based vs. LLM). The output format (`Html` or
89
- `Markdown`).
98
+ Each segment uses one of three strategies. The chosen strategy controls:
99
+
100
+ - Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
101
+ - How the content is produced (rule-based vs. LLM).
102
+ - The output format (`Html` or `Markdown`).
90
103
 
91
104
  Optional flags such as image **cropping**, **extended context**, and
92
105
  **descriptions** further refine behaviour.
93
106
 
94
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
95
- `Caption`, `Footnote` → **Auto** (Markdown, description off) • `Table` → **LLM**
96
- (HTML, description on) `Picture` → **LLM** (Markdown, description off,
97
- cropping _All_) • `Formula`, `Page` → **LLM** (Markdown, description off)
98
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
107
+ **Default strategy per segment**
108
+
109
+ - `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
110
+ (Markdown, description off)
111
+ - `Table` → **LLM** (HTML, description on)
112
+ - `Picture` → **LLM** (Markdown, description off, cropping _All_)
113
+ - `Formula`, `Page` → **LLM** (Markdown, description off)
114
+ - `PageHeader`, `PageFooter` → **Ignore** (removed from output)
115
+
116
+ **Strategy reference**
99
117
 
100
- **Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
101
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
118
+ - **Auto** – rule-based content generation.
119
+ - **LLM** – generate content with an LLM.
120
+ - **Ignore** – exclude the segment entirely.
102
121
  """
103
122
 
104
123
  segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]]
@@ -33,6 +33,18 @@ __all__ = [
33
33
 
34
34
 
35
35
  class ParseUpdateParams(TypedDict, total=False):
36
+ base64_urls: bool
37
+ """Whether to return base64 encoded URLs. If false, presigned URLs are returned."""
38
+
39
+ include_chunks: bool
40
+ """Whether to include chunks in the output response"""
41
+
42
+ wait_for_completion: bool
43
+ """
44
+ If true, server holds briefly and may return 200 when done; otherwise returns
45
+ 408/409 with Retry-After headers
46
+ """
47
+
36
48
  chunk_processing: Optional[ChunkProcessing]
37
49
  """Controls the setting for the chunking and post-processing of each chunk."""
38
50
 
@@ -77,22 +89,29 @@ class ParseUpdateParams(TypedDict, total=False):
77
89
  segment_processing: Optional[SegmentProcessing]
78
90
  """Defines how each segment type is handled when generating the final output.
79
91
 
80
- Each segment uses one of three strategies. The chosen strategy controls:
81
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
82
- content is produced (rule-based vs. LLM). The output format (`Html` or
83
- `Markdown`).
92
+ Each segment uses one of three strategies. The chosen strategy controls:
93
+
94
+ - Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
95
+ - How the content is produced (rule-based vs. LLM).
96
+ - The output format (`Html` or `Markdown`).
84
97
 
85
98
  Optional flags such as image **cropping**, **extended context**, and
86
99
  **descriptions** further refine behaviour.
87
100
 
88
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
89
- `Caption`, `Footnote` → **Auto** (Markdown, description off) • `Table` → **LLM**
90
- (HTML, description on) `Picture` → **LLM** (Markdown, description off,
91
- cropping _All_) • `Formula`, `Page` → **LLM** (Markdown, description off)
92
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
101
+ **Default strategy per segment**
102
+
103
+ - `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
104
+ (Markdown, description off)
105
+ - `Table` → **LLM** (HTML, description on)
106
+ - `Picture` → **LLM** (Markdown, description off, cropping _All_)
107
+ - `Formula`, `Page` → **LLM** (Markdown, description off)
108
+ - `PageHeader`, `PageFooter` → **Ignore** (removed from output)
109
+
110
+ **Strategy reference**
93
111
 
94
- **Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
95
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
112
+ - **Auto** – rule-based content generation.
113
+ - **LLM** – generate content with an LLM.
114
+ - **Ignore** – exclude the segment entirely.
96
115
  """
97
116
 
98
117
  segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chunkr-ai
3
- Version: 0.1.0a4
3
+ Version: 0.1.0a5
4
4
  Summary: The official Python library for the chunkr API
5
5
  Project-URL: Homepage, https://github.com/lumina-ai-inc/chunkr-python
6
6
  Project-URL: Repository, https://github.com/lumina-ai-inc/chunkr-python
@@ -299,7 +299,7 @@ Error codes are as follows:
299
299
 
300
300
  ### Retries
301
301
 
302
- Certain errors are automatically retried 2 times by default, with a short exponential backoff.
302
+ Certain errors are automatically retried 50 times by default, with a short exponential backoff.
303
303
  Connection errors (for example, due to a network connectivity problem), 408 Request Timeout, 409 Conflict,
304
304
  429 Rate Limit, and >=500 Internal errors are all retried by default.
305
305
 
@@ -322,7 +322,7 @@ client.with_options(max_retries=5).tasks.parse.create(
322
322
 
323
323
  ### Timeouts
324
324
 
325
- By default requests time out after 1 minute. You can configure this with a `timeout` option,
325
+ By default requests time out after 30 seconds. You can configure this with a `timeout` option,
326
326
  which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/timeouts/#fine-tuning-the-configuration) object:
327
327
 
328
328
  ```python
@@ -330,7 +330,7 @@ from chunkr_ai import Chunkr
330
330
 
331
331
  # Configure the default for all requests:
332
332
  client = Chunkr(
333
- # 20 seconds (default is 1 minute)
333
+ # 20 seconds (default is 30 seconds)
334
334
  timeout=20.0,
335
335
  )
336
336
 
@@ -1,8 +1,8 @@
1
- chunkr_ai/__init__.py,sha256=RqteAJ-1Ma7DnoNz_AJJIjmynGeoGJ3F4ZKrSnjp9zs,2793
1
+ chunkr_ai/__init__.py,sha256=scS30uHiCpLbaalKTAJSCFSTqnu_b9R5JCkTu2hmbzU,2587
2
2
  chunkr_ai/_base_client.py,sha256=Nv5b_rmVdmmPbF42mlOfymbSC6lxcYsrsvBhKSBDXWQ,67038
3
- chunkr_ai/_client.py,sha256=fseZHGtnXGw3uSa1Le8SxH2oSBeHczn6mOsLeLGj4rY,15867
3
+ chunkr_ai/_client.py,sha256=yn0QdzDkm0M6Ft2-ItmfJpUxQnJVoWa29tSv_2g3KDQ,15975
4
4
  chunkr_ai/_compat.py,sha256=VWemUKbj6DDkQ-O4baSpHVLJafotzeXmCQGJugfVTIw,6580
5
- chunkr_ai/_constants.py,sha256=S14PFzyN9-I31wiV7SmIlL5Ga0MLHxdvegInGdXH7tM,462
5
+ chunkr_ai/_constants.py,sha256=SZppb_i55UWs0n0_MRbw7s0Hy_TOGQu9q7FVd-fCwgM,466
6
6
  chunkr_ai/_exceptions.py,sha256=ClgXUcwf4qhBTXnK4LzUPQCFdFldRxAlcYdOFFgpTxA,3220
7
7
  chunkr_ai/_files.py,sha256=SUFtic_gwSzbvhLtMdQ7TBem8szrqZE2nZFFMRa0KTw,3619
8
8
  chunkr_ai/_models.py,sha256=KvjsMfb88XZlFUKVoOxr8OyDj47MhoH2OKqWNEbBhk4,30010
@@ -11,7 +11,7 @@ chunkr_ai/_resource.py,sha256=f5tiwjxcKdbeMor8idoHtMFTUhqD9yc2xXtq5rqeLLk,1100
11
11
  chunkr_ai/_response.py,sha256=xXNpF53hiYARmAW7npKuxQ5UHAEjgAzm7ME_L3eIstY,28800
12
12
  chunkr_ai/_streaming.py,sha256=ZmyrVWk7-AWkLAATR55WgNxnyFzYmaqJt2LthA_PTqQ,10100
13
13
  chunkr_ai/_types.py,sha256=dnzU2Q2tLcuk29QFEcnPC1wp0-4XB4Cpef_3AnRhV5Y,6200
14
- chunkr_ai/_version.py,sha256=hJYiv4ePWLGN-Ur1VkK5zJERczdAZjPDNh7APrmHgBE,169
14
+ chunkr_ai/_version.py,sha256=W1WwLVPdlihFXegK9LX1wYOXmi6mf953UnIguIoR8TA,169
15
15
  chunkr_ai/pagination.py,sha256=bT-ErcJ80YlKBV6tWq2s9uqg-wv7o66SKe_AgUAGrKc,3533
16
16
  chunkr_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  chunkr_ai/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
@@ -25,13 +25,12 @@ chunkr_ai/_utils/_transform.py,sha256=n7kskEWz6o__aoNvhFoGVyDoalNe6mJwp-g7BWkdj8
25
25
  chunkr_ai/_utils/_typing.py,sha256=D0DbbNu8GnYQTSICnTSHDGsYXj8TcAKyhejb0XcnjtY,4602
26
26
  chunkr_ai/_utils/_utils.py,sha256=ts4CiiuNpFiGB6YMdkQRh2SZvYvsl7mAF-JWHCcLDf4,12312
27
27
  chunkr_ai/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
28
- chunkr_ai/lib/tasks_poll.py,sha256=3yosl_hH5j6NVNH9mANqneAW0FJSbIV9dMoTcF-OdJU,3341
29
28
  chunkr_ai/resources/__init__.py,sha256=K-axuAEg2pJQl45N5ao1tm8AnRwpQVVNp_b6qSMgB6A,1426
30
- chunkr_ai/resources/files.py,sha256=Dez080pD_xUr1jOW3y6QSg92sSSZhEYObPze2RWktoY,26304
29
+ chunkr_ai/resources/files.py,sha256=iX6LbX2PqM6kFKNoLxS_R9OGaVSnnZJ8U0dCUxNBGIM,27184
31
30
  chunkr_ai/resources/health.py,sha256=XTvUtRs5hEK-uccb_40mcIex85eEUo1a171nQUjpSOs,4965
32
31
  chunkr_ai/resources/tasks/__init__.py,sha256=W-sclAx_Kfm7OBGlSs694QzNCMkewtz9LU9KRcb8Ud0,976
33
- chunkr_ai/resources/tasks/parse.py,sha256=um0sw2ZU7bY6AK7LKAS0GxAHUyuSzav_NlhxXPjNjxY,28491
34
- chunkr_ai/resources/tasks/tasks.py,sha256=UC15zZNpY7u85X_JJudDuNrnpaeULiISaBde4BRHGSw,21653
32
+ chunkr_ai/resources/tasks/parse.py,sha256=NDGtWPtrukPG6lwhLYP2kI3vTq0W2I_c8N9crj9OnJo,33441
33
+ chunkr_ai/resources/tasks/tasks.py,sha256=XkNulmXZz4N6UXaG-6EdS-WAyncTgwMY3BYbJBqeEGw,22745
35
34
  chunkr_ai/types/__init__.py,sha256=DSRAMgXVRTZM2t8s2yrFU-FHt3FTs_wpZfVILH1zjJ0,728
36
35
  chunkr_ai/types/delete.py,sha256=EU78fjXpc8-fqvgcFTuJ0ejs5u_UjbhOz5frkeUHvxY,225
37
36
  chunkr_ai/types/file.py,sha256=kOxR0g-3A-qOxz2cjuTcq0wFMqPoph9uQuLYQ56zb-c,718
@@ -41,13 +40,13 @@ chunkr_ai/types/file_url.py,sha256=L434WnOXkNmt59dJiaAgT1_3pN3BIsxm2q14zHQK6xY,3
41
40
  chunkr_ai/types/file_url_params.py,sha256=ZHfKiy_6B25StdDemulavGcsPggNNMKLWf6KN7xfPTY,413
42
41
  chunkr_ai/types/files_list_response.py,sha256=ggSRWhTzZWjcDXxStyCzrYICXXB5TqnL2j-SN9mHH_g,506
43
42
  chunkr_ai/types/health_check_response.py,sha256=6Zn5YYHCQf2RgMjDlf39mtiTPqfaBfC9Vv599U_rKCI,200
44
- chunkr_ai/types/task.py,sha256=aew6aT0ngKtwgfUCSCvMTJOBQL1Xp0F0otB_wxumIGQ,46703
45
- chunkr_ai/types/task_get_params.py,sha256=Nx2luhebcoaiuRln4KP4FarWvBPd1OYi__efi56zHPM,460
43
+ chunkr_ai/types/task.py,sha256=L8vE_q0Hej_YuJM_rd_bZOg8kHbithsFx6fOQYpH0cY,46702
44
+ chunkr_ai/types/task_get_params.py,sha256=yGMHRfkbLzQpRLdF_Dj-8TqcioEhDNWyVbEt50xDAP0,542
46
45
  chunkr_ai/types/task_list_params.py,sha256=fCku42QW6QUsLmZgKJBaxisGvUcmcQ5fa6LgHHRIwiQ,1043
47
46
  chunkr_ai/types/tasks/__init__.py,sha256=VdLEmQvgPoiykSEYaRhkMYVaIueGDkR4P_MjCq9SbQY,267
48
- chunkr_ai/types/tasks/parse_create_params.py,sha256=PBg2VR_OnBdB8K4NihuefGJXgUXBn7v5317LZG7PDks,34340
49
- chunkr_ai/types/tasks/parse_update_params.py,sha256=B1cKfdX_cNDh0m2zDoH0FiZP_Qc-a5GFy-5iXHDHuy8,34113
50
- chunkr_ai-0.1.0a4.dist-info/METADATA,sha256=241RRJb1pTZBg9IG3oGx3_WASrJDN8a2myyJhQ9TUNE,16441
51
- chunkr_ai-0.1.0a4.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
52
- chunkr_ai-0.1.0a4.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
53
- chunkr_ai-0.1.0a4.dist-info/RECORD,,
47
+ chunkr_ai/types/tasks/parse_create_params.py,sha256=tQLvgfhjdgIDKsEejFPLs-guQ8trDmvTC3BVWWXBaNg,34686
48
+ chunkr_ai/types/tasks/parse_update_params.py,sha256=QSUqh2Hb1B5KYEJJqlCJ1XfvoGLVKuOsQz9PceeqHjk,34474
49
+ chunkr_ai-0.1.0a5.dist-info/METADATA,sha256=7-RwQM4pkLESzFisF-3Ofl7jMebFT3eLAXe8Bbl15vU,16446
50
+ chunkr_ai-0.1.0a5.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
51
+ chunkr_ai-0.1.0a5.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
52
+ chunkr_ai-0.1.0a5.dist-info/RECORD,,
@@ -1,122 +0,0 @@
1
- from __future__ import annotations
2
-
3
- """
4
- Custom helpers for task polling.
5
-
6
- This module adds `Task.poll()` and `Task.apoll()` methods at runtime to the
7
- generated `Task` model, without modifying generated code directly.
8
-
9
- Usage:
10
- task = client.tasks.get(task_id)
11
- task = task.poll(client) # blocks until terminal state
12
-
13
- # async
14
- task = await async_client.tasks.get(task_id)
15
- task = await task.apoll(async_client)
16
- """
17
-
18
- import time
19
- import asyncio
20
- from typing import Protocol, cast
21
-
22
- from .._types import NOT_GIVEN, NotGiven
23
- from .._client import Chunkr, AsyncChunkr
24
- from ..types.task import Task as _Task
25
- from .._exceptions import ChunkrError
26
-
27
- TERMINAL_STATUSES = {"Succeeded", "Failed", "Cancelled"}
28
-
29
-
30
- def _task_poll(
31
- self: _Task,
32
- client: Chunkr,
33
- *,
34
- interval: float = 0.5,
35
- timeout: float = 600.0,
36
- include_chunks: bool | NotGiven = NOT_GIVEN,
37
- base64_urls: bool | NotGiven = NOT_GIVEN,
38
- ) -> _Task:
39
- """Poll the task until it reaches a terminal status.
40
-
41
- Args:
42
- client: Synchronous Chunkr client instance.
43
- interval: Seconds to sleep between polls.
44
- timeout: Maximum total seconds to wait before raising an error.
45
- include_chunks: Whether to include chunks in the output response for each poll.
46
- base64_urls: Whether to return base64 encoded URLs.
47
- """
48
- start_time = time.monotonic()
49
- current: _Task = self
50
-
51
- class _TasksGetProtocol(Protocol):
52
- def get(
53
- self,
54
- task_id: str,
55
- *,
56
- base64_urls: bool | NotGiven = NOT_GIVEN,
57
- include_chunks: bool | NotGiven = NOT_GIVEN,
58
- ) -> _Task: ...
59
-
60
- resource = cast(_TasksGetProtocol, client.tasks)
61
-
62
- while current.status not in TERMINAL_STATUSES:
63
- if time.monotonic() - start_time > timeout:
64
- raise ChunkrError("Task polling timed out.")
65
-
66
- if interval > 0:
67
- time.sleep(interval)
68
-
69
- current = resource.get(
70
- current.task_id,
71
- include_chunks=include_chunks,
72
- base64_urls=base64_urls,
73
- )
74
-
75
- return current
76
-
77
-
78
- async def _task_apoll(
79
- self: _Task,
80
- client: AsyncChunkr,
81
- *,
82
- interval: float = 0.5,
83
- timeout: float = 600.0,
84
- include_chunks: bool | NotGiven = NOT_GIVEN,
85
- base64_urls: bool | NotGiven = NOT_GIVEN,
86
- ) -> _Task:
87
- """Async poll the task until it reaches a terminal status."""
88
- start_time = time.monotonic()
89
- current: _Task = self
90
-
91
- class _AsyncTasksGetProtocol(Protocol):
92
- async def get(
93
- self,
94
- task_id: str,
95
- *,
96
- base64_urls: bool | NotGiven = NOT_GIVEN,
97
- include_chunks: bool | NotGiven = NOT_GIVEN,
98
- ) -> _Task: ...
99
-
100
- aresource = cast(_AsyncTasksGetProtocol, client.tasks)
101
-
102
- while current.status not in TERMINAL_STATUSES:
103
- if time.monotonic() - start_time > timeout:
104
- raise ChunkrError("Task polling timed out.")
105
-
106
- if interval > 0:
107
- await asyncio.sleep(interval)
108
-
109
- current = await aresource.get(
110
- current.task_id,
111
- include_chunks=include_chunks,
112
- base64_urls=base64_urls,
113
- )
114
-
115
- return current
116
-
117
-
118
- # Attach methods to the generated Task model
119
- _Task.poll = _task_poll # type: ignore[attr-defined]
120
- _Task.apoll = _task_apoll # type: ignore[attr-defined]
121
-
122
-