chunkr-ai 0.1.0a4__py3-none-any.whl → 0.1.0a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/__init__.py +0 -3
- chunkr_ai/_client.py +4 -0
- chunkr_ai/_constants.py +5 -5
- chunkr_ai/_version.py +1 -1
- chunkr_ai/resources/files.py +32 -4
- chunkr_ai/resources/tasks/parse.py +124 -84
- chunkr_ai/resources/tasks/tasks.py +46 -24
- chunkr_ai/types/task.py +18 -11
- chunkr_ai/types/task_get_params.py +3 -0
- chunkr_ai/types/tasks/parse_create_params.py +20 -13
- chunkr_ai/types/tasks/parse_update_params.py +18 -11
- {chunkr_ai-0.1.0a4.dist-info → chunkr_ai-0.1.0a6.dist-info}/METADATA +4 -4
- {chunkr_ai-0.1.0a4.dist-info → chunkr_ai-0.1.0a6.dist-info}/RECORD +15 -16
- chunkr_ai/lib/tasks_poll.py +0 -122
- {chunkr_ai-0.1.0a4.dist-info → chunkr_ai-0.1.0a6.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0a4.dist-info → chunkr_ai-0.1.0a6.dist-info}/licenses/LICENSE +0 -0
chunkr_ai/__init__.py
CHANGED
@@ -72,9 +72,6 @@ __all__ = [
|
|
72
72
|
]
|
73
73
|
|
74
74
|
if not _t.TYPE_CHECKING:
|
75
|
-
# Load custom helpers that monkey-patch generated types.
|
76
|
-
# This keeps custom code separate from generated files, per Stainless guidance.
|
77
|
-
from .lib import tasks_poll as _tasks_poll # noqa: F401
|
78
75
|
from ._utils._resources_proxy import resources as resources
|
79
76
|
|
80
77
|
_setup_logging()
|
chunkr_ai/_client.py
CHANGED
@@ -95,6 +95,8 @@ class Chunkr(SyncAPIClient):
|
|
95
95
|
_strict_response_validation=_strict_response_validation,
|
96
96
|
)
|
97
97
|
|
98
|
+
self._idempotency_header = "Idempotency-Key"
|
99
|
+
|
98
100
|
self.tasks = tasks.TasksResource(self)
|
99
101
|
self.files = files.FilesResource(self)
|
100
102
|
self.health = health.HealthResource(self)
|
@@ -267,6 +269,8 @@ class AsyncChunkr(AsyncAPIClient):
|
|
267
269
|
_strict_response_validation=_strict_response_validation,
|
268
270
|
)
|
269
271
|
|
272
|
+
self._idempotency_header = "Idempotency-Key"
|
273
|
+
|
270
274
|
self.tasks = tasks.AsyncTasksResource(self)
|
271
275
|
self.files = files.AsyncFilesResource(self)
|
272
276
|
self.health = health.AsyncHealthResource(self)
|
chunkr_ai/_constants.py
CHANGED
@@ -5,10 +5,10 @@ import httpx
|
|
5
5
|
RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
|
6
6
|
OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
|
7
7
|
|
8
|
-
# default timeout is
|
9
|
-
DEFAULT_TIMEOUT = httpx.Timeout(timeout=
|
10
|
-
DEFAULT_MAX_RETRIES =
|
8
|
+
# default timeout is 30 seconds
|
9
|
+
DEFAULT_TIMEOUT = httpx.Timeout(timeout=30, connect=5.0)
|
10
|
+
DEFAULT_MAX_RETRIES = 50
|
11
11
|
DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
|
12
12
|
|
13
|
-
INITIAL_RETRY_DELAY = 0
|
14
|
-
MAX_RETRY_DELAY =
|
13
|
+
INITIAL_RETRY_DELAY = 1.0
|
14
|
+
MAX_RETRY_DELAY = 10.0
|
chunkr_ai/_version.py
CHANGED
chunkr_ai/resources/files.py
CHANGED
@@ -59,6 +59,7 @@ class FilesResource(SyncAPIResource):
|
|
59
59
|
extra_query: Query | None = None,
|
60
60
|
extra_body: Body | None = None,
|
61
61
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
62
|
+
idempotency_key: str | None = None,
|
62
63
|
) -> File:
|
63
64
|
"""
|
64
65
|
Accepts multipart/form-data with fields:
|
@@ -78,6 +79,8 @@ class FilesResource(SyncAPIResource):
|
|
78
79
|
extra_body: Add additional JSON properties to the request
|
79
80
|
|
80
81
|
timeout: Override the client-level default timeout for this request, in seconds
|
82
|
+
|
83
|
+
idempotency_key: Specify a custom idempotency key for this request
|
81
84
|
"""
|
82
85
|
body = deepcopy_minimal(
|
83
86
|
{
|
@@ -95,7 +98,11 @@ class FilesResource(SyncAPIResource):
|
|
95
98
|
body=maybe_transform(body, file_create_params.FileCreateParams),
|
96
99
|
files=files,
|
97
100
|
options=make_request_options(
|
98
|
-
extra_headers=extra_headers,
|
101
|
+
extra_headers=extra_headers,
|
102
|
+
extra_query=extra_query,
|
103
|
+
extra_body=extra_body,
|
104
|
+
timeout=timeout,
|
105
|
+
idempotency_key=idempotency_key,
|
99
106
|
),
|
100
107
|
cast_to=File,
|
101
108
|
)
|
@@ -170,6 +177,7 @@ class FilesResource(SyncAPIResource):
|
|
170
177
|
extra_query: Query | None = None,
|
171
178
|
extra_body: Body | None = None,
|
172
179
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
180
|
+
idempotency_key: str | None = None,
|
173
181
|
) -> Delete:
|
174
182
|
"""Delete file contents and scrub sensitive metadata.
|
175
183
|
|
@@ -184,13 +192,19 @@ class FilesResource(SyncAPIResource):
|
|
184
192
|
extra_body: Add additional JSON properties to the request
|
185
193
|
|
186
194
|
timeout: Override the client-level default timeout for this request, in seconds
|
195
|
+
|
196
|
+
idempotency_key: Specify a custom idempotency key for this request
|
187
197
|
"""
|
188
198
|
if not file_id:
|
189
199
|
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
|
190
200
|
return self._delete(
|
191
201
|
f"/files/{file_id}",
|
192
202
|
options=make_request_options(
|
193
|
-
extra_headers=extra_headers,
|
203
|
+
extra_headers=extra_headers,
|
204
|
+
extra_query=extra_query,
|
205
|
+
extra_body=extra_body,
|
206
|
+
timeout=timeout,
|
207
|
+
idempotency_key=idempotency_key,
|
194
208
|
),
|
195
209
|
cast_to=Delete,
|
196
210
|
)
|
@@ -353,6 +367,7 @@ class AsyncFilesResource(AsyncAPIResource):
|
|
353
367
|
extra_query: Query | None = None,
|
354
368
|
extra_body: Body | None = None,
|
355
369
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
370
|
+
idempotency_key: str | None = None,
|
356
371
|
) -> File:
|
357
372
|
"""
|
358
373
|
Accepts multipart/form-data with fields:
|
@@ -372,6 +387,8 @@ class AsyncFilesResource(AsyncAPIResource):
|
|
372
387
|
extra_body: Add additional JSON properties to the request
|
373
388
|
|
374
389
|
timeout: Override the client-level default timeout for this request, in seconds
|
390
|
+
|
391
|
+
idempotency_key: Specify a custom idempotency key for this request
|
375
392
|
"""
|
376
393
|
body = deepcopy_minimal(
|
377
394
|
{
|
@@ -389,7 +406,11 @@ class AsyncFilesResource(AsyncAPIResource):
|
|
389
406
|
body=await async_maybe_transform(body, file_create_params.FileCreateParams),
|
390
407
|
files=files,
|
391
408
|
options=make_request_options(
|
392
|
-
extra_headers=extra_headers,
|
409
|
+
extra_headers=extra_headers,
|
410
|
+
extra_query=extra_query,
|
411
|
+
extra_body=extra_body,
|
412
|
+
timeout=timeout,
|
413
|
+
idempotency_key=idempotency_key,
|
393
414
|
),
|
394
415
|
cast_to=File,
|
395
416
|
)
|
@@ -464,6 +485,7 @@ class AsyncFilesResource(AsyncAPIResource):
|
|
464
485
|
extra_query: Query | None = None,
|
465
486
|
extra_body: Body | None = None,
|
466
487
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
488
|
+
idempotency_key: str | None = None,
|
467
489
|
) -> Delete:
|
468
490
|
"""Delete file contents and scrub sensitive metadata.
|
469
491
|
|
@@ -478,13 +500,19 @@ class AsyncFilesResource(AsyncAPIResource):
|
|
478
500
|
extra_body: Add additional JSON properties to the request
|
479
501
|
|
480
502
|
timeout: Override the client-level default timeout for this request, in seconds
|
503
|
+
|
504
|
+
idempotency_key: Specify a custom idempotency key for this request
|
481
505
|
"""
|
482
506
|
if not file_id:
|
483
507
|
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
|
484
508
|
return await self._delete(
|
485
509
|
f"/files/{file_id}",
|
486
510
|
options=make_request_options(
|
487
|
-
extra_headers=extra_headers,
|
511
|
+
extra_headers=extra_headers,
|
512
|
+
extra_query=extra_query,
|
513
|
+
extra_body=extra_body,
|
514
|
+
timeout=timeout,
|
515
|
+
idempotency_key=idempotency_key,
|
488
516
|
),
|
489
517
|
cast_to=Delete,
|
490
518
|
)
|
@@ -63,26 +63,21 @@ class ParseResource(SyncAPIResource):
|
|
63
63
|
extra_query: Query | None = None,
|
64
64
|
extra_body: Body | None = None,
|
65
65
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
66
|
+
idempotency_key: str | None = None,
|
66
67
|
) -> Task:
|
67
68
|
"""
|
68
|
-
Queues a document for processing and returns a TaskResponse
|
69
|
+
Queues a document for processing and returns a `TaskResponse` with the assigned
|
70
|
+
`task_id`, initial configuration, file metadata, and timestamps. The initial
|
71
|
+
status is `Starting`.
|
69
72
|
|
70
|
-
|
71
|
-
- Initial configuration
|
72
|
-
- File metadata
|
73
|
-
- Processing status
|
74
|
-
- Creation timestamp
|
75
|
-
- Presigned URLs for file access
|
76
|
-
|
77
|
-
The returned task will typically be in a `Starting` or `Processing` state. Use
|
78
|
-
the `GET /tasks/{task_id}` endpoint to poll for completion.
|
73
|
+
Creates a task and returns its metadata immediately.
|
79
74
|
|
80
75
|
Args:
|
81
76
|
file:
|
82
77
|
The file to be uploaded. Supported inputs:
|
83
78
|
|
84
|
-
- `ch://files/{file_id}`:
|
85
|
-
|
79
|
+
- `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
|
80
|
+
API
|
86
81
|
- `http(s)://...`: Remote URL to fetch
|
87
82
|
- `data:*;base64,...` or raw base64 string
|
88
83
|
|
@@ -114,22 +109,29 @@ class ParseResource(SyncAPIResource):
|
|
114
109
|
|
115
110
|
segment_processing: Defines how each segment type is handled when generating the final output.
|
116
111
|
|
117
|
-
Each segment uses one of three strategies. The chosen strategy controls:
|
118
|
-
|
119
|
-
|
120
|
-
|
112
|
+
Each segment uses one of three strategies. The chosen strategy controls:
|
113
|
+
|
114
|
+
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
|
115
|
+
- How the content is produced (rule-based vs. LLM).
|
116
|
+
- The output format (`Html` or `Markdown`).
|
121
117
|
|
122
118
|
Optional flags such as image **cropping**, **extended context**, and
|
123
119
|
**descriptions** further refine behaviour.
|
124
120
|
|
125
|
-
**Default strategy per segment**
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
121
|
+
**Default strategy per segment**
|
122
|
+
|
123
|
+
- `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
|
124
|
+
(Markdown, description off)
|
125
|
+
- `Table` → **LLM** (HTML, description on)
|
126
|
+
- `Picture` → **LLM** (Markdown, description off, cropping _All_)
|
127
|
+
- `Formula`, `Page` → **LLM** (Markdown, description off)
|
128
|
+
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
|
130
129
|
|
131
|
-
**Strategy reference**
|
132
|
-
|
130
|
+
**Strategy reference**
|
131
|
+
|
132
|
+
- **Auto** – rule-based content generation.
|
133
|
+
- **LLM** – generate content with an LLM.
|
134
|
+
- **Ignore** – exclude the segment entirely.
|
133
135
|
|
134
136
|
segmentation_strategy:
|
135
137
|
Controls the segmentation strategy:
|
@@ -147,6 +149,8 @@ class ParseResource(SyncAPIResource):
|
|
147
149
|
extra_body: Add additional JSON properties to the request
|
148
150
|
|
149
151
|
timeout: Override the client-level default timeout for this request, in seconds
|
152
|
+
|
153
|
+
idempotency_key: Specify a custom idempotency key for this request
|
150
154
|
"""
|
151
155
|
return self._post(
|
152
156
|
"/tasks/parse",
|
@@ -166,7 +170,11 @@ class ParseResource(SyncAPIResource):
|
|
166
170
|
parse_create_params.ParseCreateParams,
|
167
171
|
),
|
168
172
|
options=make_request_options(
|
169
|
-
extra_headers=extra_headers,
|
173
|
+
extra_headers=extra_headers,
|
174
|
+
extra_query=extra_query,
|
175
|
+
extra_body=extra_body,
|
176
|
+
timeout=timeout,
|
177
|
+
idempotency_key=idempotency_key,
|
170
178
|
),
|
171
179
|
cast_to=Task,
|
172
180
|
)
|
@@ -190,20 +198,19 @@ class ParseResource(SyncAPIResource):
|
|
190
198
|
extra_query: Query | None = None,
|
191
199
|
extra_body: Body | None = None,
|
192
200
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
201
|
+
idempotency_key: str | None = None,
|
193
202
|
) -> Task:
|
194
203
|
"""Updates an existing task's configuration and reprocesses the document.
|
195
204
|
|
196
205
|
The
|
197
|
-
|
198
|
-
update.
|
206
|
+
current configuration is used as the base; only provided fields are changed.
|
199
207
|
|
200
208
|
Requirements:
|
201
209
|
|
202
|
-
- Task must
|
203
|
-
-
|
210
|
+
- Task must be in a terminal state (`Succeeded` or `Failed`).
|
211
|
+
- The new configuration must differ from the current configuration.
|
204
212
|
|
205
|
-
|
206
|
-
the `GET /tasks/{task_id}` endpoint to poll for completion.
|
213
|
+
Updates a task and returns its new metadata immediately.
|
207
214
|
|
208
215
|
Args:
|
209
216
|
chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
|
@@ -235,22 +242,29 @@ class ParseResource(SyncAPIResource):
|
|
235
242
|
|
236
243
|
segment_processing: Defines how each segment type is handled when generating the final output.
|
237
244
|
|
238
|
-
Each segment uses one of three strategies. The chosen strategy controls:
|
239
|
-
|
240
|
-
|
241
|
-
|
245
|
+
Each segment uses one of three strategies. The chosen strategy controls:
|
246
|
+
|
247
|
+
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
|
248
|
+
- How the content is produced (rule-based vs. LLM).
|
249
|
+
- The output format (`Html` or `Markdown`).
|
242
250
|
|
243
251
|
Optional flags such as image **cropping**, **extended context**, and
|
244
252
|
**descriptions** further refine behaviour.
|
245
253
|
|
246
|
-
**Default strategy per segment**
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
254
|
+
**Default strategy per segment**
|
255
|
+
|
256
|
+
- `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
|
257
|
+
(Markdown, description off)
|
258
|
+
- `Table` → **LLM** (HTML, description on)
|
259
|
+
- `Picture` → **LLM** (Markdown, description off, cropping _All_)
|
260
|
+
- `Formula`, `Page` → **LLM** (Markdown, description off)
|
261
|
+
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
|
251
262
|
|
252
|
-
**Strategy reference**
|
253
|
-
|
263
|
+
**Strategy reference**
|
264
|
+
|
265
|
+
- **Auto** – rule-based content generation.
|
266
|
+
- **LLM** – generate content with an LLM.
|
267
|
+
- **Ignore** – exclude the segment entirely.
|
254
268
|
|
255
269
|
segmentation_strategy:
|
256
270
|
Controls the segmentation strategy:
|
@@ -268,6 +282,8 @@ class ParseResource(SyncAPIResource):
|
|
268
282
|
extra_body: Add additional JSON properties to the request
|
269
283
|
|
270
284
|
timeout: Override the client-level default timeout for this request, in seconds
|
285
|
+
|
286
|
+
idempotency_key: Specify a custom idempotency key for this request
|
271
287
|
"""
|
272
288
|
if not task_id:
|
273
289
|
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
@@ -288,7 +304,11 @@ class ParseResource(SyncAPIResource):
|
|
288
304
|
parse_update_params.ParseUpdateParams,
|
289
305
|
),
|
290
306
|
options=make_request_options(
|
291
|
-
extra_headers=extra_headers,
|
307
|
+
extra_headers=extra_headers,
|
308
|
+
extra_query=extra_query,
|
309
|
+
extra_body=extra_body,
|
310
|
+
timeout=timeout,
|
311
|
+
idempotency_key=idempotency_key,
|
292
312
|
),
|
293
313
|
cast_to=Task,
|
294
314
|
)
|
@@ -333,26 +353,21 @@ class AsyncParseResource(AsyncAPIResource):
|
|
333
353
|
extra_query: Query | None = None,
|
334
354
|
extra_body: Body | None = None,
|
335
355
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
356
|
+
idempotency_key: str | None = None,
|
336
357
|
) -> Task:
|
337
358
|
"""
|
338
|
-
Queues a document for processing and returns a TaskResponse
|
359
|
+
Queues a document for processing and returns a `TaskResponse` with the assigned
|
360
|
+
`task_id`, initial configuration, file metadata, and timestamps. The initial
|
361
|
+
status is `Starting`.
|
339
362
|
|
340
|
-
|
341
|
-
- Initial configuration
|
342
|
-
- File metadata
|
343
|
-
- Processing status
|
344
|
-
- Creation timestamp
|
345
|
-
- Presigned URLs for file access
|
346
|
-
|
347
|
-
The returned task will typically be in a `Starting` or `Processing` state. Use
|
348
|
-
the `GET /tasks/{task_id}` endpoint to poll for completion.
|
363
|
+
Creates a task and returns its metadata immediately.
|
349
364
|
|
350
365
|
Args:
|
351
366
|
file:
|
352
367
|
The file to be uploaded. Supported inputs:
|
353
368
|
|
354
|
-
- `ch://files/{file_id}`:
|
355
|
-
|
369
|
+
- `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
|
370
|
+
API
|
356
371
|
- `http(s)://...`: Remote URL to fetch
|
357
372
|
- `data:*;base64,...` or raw base64 string
|
358
373
|
|
@@ -384,22 +399,29 @@ class AsyncParseResource(AsyncAPIResource):
|
|
384
399
|
|
385
400
|
segment_processing: Defines how each segment type is handled when generating the final output.
|
386
401
|
|
387
|
-
Each segment uses one of three strategies. The chosen strategy controls:
|
388
|
-
|
389
|
-
|
390
|
-
|
402
|
+
Each segment uses one of three strategies. The chosen strategy controls:
|
403
|
+
|
404
|
+
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
|
405
|
+
- How the content is produced (rule-based vs. LLM).
|
406
|
+
- The output format (`Html` or `Markdown`).
|
391
407
|
|
392
408
|
Optional flags such as image **cropping**, **extended context**, and
|
393
409
|
**descriptions** further refine behaviour.
|
394
410
|
|
395
|
-
**Default strategy per segment**
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
411
|
+
**Default strategy per segment**
|
412
|
+
|
413
|
+
- `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
|
414
|
+
(Markdown, description off)
|
415
|
+
- `Table` → **LLM** (HTML, description on)
|
416
|
+
- `Picture` → **LLM** (Markdown, description off, cropping _All_)
|
417
|
+
- `Formula`, `Page` → **LLM** (Markdown, description off)
|
418
|
+
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
|
400
419
|
|
401
|
-
**Strategy reference**
|
402
|
-
|
420
|
+
**Strategy reference**
|
421
|
+
|
422
|
+
- **Auto** – rule-based content generation.
|
423
|
+
- **LLM** – generate content with an LLM.
|
424
|
+
- **Ignore** – exclude the segment entirely.
|
403
425
|
|
404
426
|
segmentation_strategy:
|
405
427
|
Controls the segmentation strategy:
|
@@ -417,6 +439,8 @@ class AsyncParseResource(AsyncAPIResource):
|
|
417
439
|
extra_body: Add additional JSON properties to the request
|
418
440
|
|
419
441
|
timeout: Override the client-level default timeout for this request, in seconds
|
442
|
+
|
443
|
+
idempotency_key: Specify a custom idempotency key for this request
|
420
444
|
"""
|
421
445
|
return await self._post(
|
422
446
|
"/tasks/parse",
|
@@ -436,7 +460,11 @@ class AsyncParseResource(AsyncAPIResource):
|
|
436
460
|
parse_create_params.ParseCreateParams,
|
437
461
|
),
|
438
462
|
options=make_request_options(
|
439
|
-
extra_headers=extra_headers,
|
463
|
+
extra_headers=extra_headers,
|
464
|
+
extra_query=extra_query,
|
465
|
+
extra_body=extra_body,
|
466
|
+
timeout=timeout,
|
467
|
+
idempotency_key=idempotency_key,
|
440
468
|
),
|
441
469
|
cast_to=Task,
|
442
470
|
)
|
@@ -460,20 +488,19 @@ class AsyncParseResource(AsyncAPIResource):
|
|
460
488
|
extra_query: Query | None = None,
|
461
489
|
extra_body: Body | None = None,
|
462
490
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
491
|
+
idempotency_key: str | None = None,
|
463
492
|
) -> Task:
|
464
493
|
"""Updates an existing task's configuration and reprocesses the document.
|
465
494
|
|
466
495
|
The
|
467
|
-
|
468
|
-
update.
|
496
|
+
current configuration is used as the base; only provided fields are changed.
|
469
497
|
|
470
498
|
Requirements:
|
471
499
|
|
472
|
-
- Task must
|
473
|
-
-
|
500
|
+
- Task must be in a terminal state (`Succeeded` or `Failed`).
|
501
|
+
- The new configuration must differ from the current configuration.
|
474
502
|
|
475
|
-
|
476
|
-
the `GET /tasks/{task_id}` endpoint to poll for completion.
|
503
|
+
Updates a task and returns its new metadata immediately.
|
477
504
|
|
478
505
|
Args:
|
479
506
|
chunk_processing: Controls the setting for the chunking and post-processing of each chunk.
|
@@ -505,22 +532,29 @@ class AsyncParseResource(AsyncAPIResource):
|
|
505
532
|
|
506
533
|
segment_processing: Defines how each segment type is handled when generating the final output.
|
507
534
|
|
508
|
-
Each segment uses one of three strategies. The chosen strategy controls:
|
509
|
-
|
510
|
-
|
511
|
-
|
535
|
+
Each segment uses one of three strategies. The chosen strategy controls:
|
536
|
+
|
537
|
+
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
|
538
|
+
- How the content is produced (rule-based vs. LLM).
|
539
|
+
- The output format (`Html` or `Markdown`).
|
512
540
|
|
513
541
|
Optional flags such as image **cropping**, **extended context**, and
|
514
542
|
**descriptions** further refine behaviour.
|
515
543
|
|
516
|
-
**Default strategy per segment**
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
544
|
+
**Default strategy per segment**
|
545
|
+
|
546
|
+
- `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
|
547
|
+
(Markdown, description off)
|
548
|
+
- `Table` → **LLM** (HTML, description on)
|
549
|
+
- `Picture` → **LLM** (Markdown, description off, cropping _All_)
|
550
|
+
- `Formula`, `Page` → **LLM** (Markdown, description off)
|
551
|
+
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
|
521
552
|
|
522
|
-
**Strategy reference**
|
523
|
-
|
553
|
+
**Strategy reference**
|
554
|
+
|
555
|
+
- **Auto** – rule-based content generation.
|
556
|
+
- **LLM** – generate content with an LLM.
|
557
|
+
- **Ignore** – exclude the segment entirely.
|
524
558
|
|
525
559
|
segmentation_strategy:
|
526
560
|
Controls the segmentation strategy:
|
@@ -538,6 +572,8 @@ class AsyncParseResource(AsyncAPIResource):
|
|
538
572
|
extra_body: Add additional JSON properties to the request
|
539
573
|
|
540
574
|
timeout: Override the client-level default timeout for this request, in seconds
|
575
|
+
|
576
|
+
idempotency_key: Specify a custom idempotency key for this request
|
541
577
|
"""
|
542
578
|
if not task_id:
|
543
579
|
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
@@ -558,7 +594,11 @@ class AsyncParseResource(AsyncAPIResource):
|
|
558
594
|
parse_update_params.ParseUpdateParams,
|
559
595
|
),
|
560
596
|
options=make_request_options(
|
561
|
-
extra_headers=extra_headers,
|
597
|
+
extra_headers=extra_headers,
|
598
|
+
extra_query=extra_query,
|
599
|
+
extra_body=extra_body,
|
600
|
+
timeout=timeout,
|
601
|
+
idempotency_key=idempotency_key,
|
562
602
|
),
|
563
603
|
cast_to=Task,
|
564
604
|
)
|
@@ -138,6 +138,7 @@ class TasksResource(SyncAPIResource):
|
|
138
138
|
extra_query: Query | None = None,
|
139
139
|
extra_body: Body | None = None,
|
140
140
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
141
|
+
idempotency_key: str | None = None,
|
141
142
|
) -> None:
|
142
143
|
"""
|
143
144
|
Delete a task by its ID.
|
@@ -154,6 +155,8 @@ class TasksResource(SyncAPIResource):
|
|
154
155
|
extra_body: Add additional JSON properties to the request
|
155
156
|
|
156
157
|
timeout: Override the client-level default timeout for this request, in seconds
|
158
|
+
|
159
|
+
idempotency_key: Specify a custom idempotency key for this request
|
157
160
|
"""
|
158
161
|
if not task_id:
|
159
162
|
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
@@ -161,7 +164,11 @@ class TasksResource(SyncAPIResource):
|
|
161
164
|
return self._delete(
|
162
165
|
f"/tasks/{task_id}",
|
163
166
|
options=make_request_options(
|
164
|
-
extra_headers=extra_headers,
|
167
|
+
extra_headers=extra_headers,
|
168
|
+
extra_query=extra_query,
|
169
|
+
extra_body=extra_body,
|
170
|
+
timeout=timeout,
|
171
|
+
idempotency_key=idempotency_key,
|
165
172
|
),
|
166
173
|
cast_to=NoneType,
|
167
174
|
)
|
@@ -213,6 +220,7 @@ class TasksResource(SyncAPIResource):
|
|
213
220
|
*,
|
214
221
|
base64_urls: bool | NotGiven = NOT_GIVEN,
|
215
222
|
include_chunks: bool | NotGiven = NOT_GIVEN,
|
223
|
+
wait_for_completion: bool | NotGiven = NOT_GIVEN,
|
216
224
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
217
225
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
218
226
|
extra_headers: Headers | None = None,
|
@@ -221,20 +229,20 @@ class TasksResource(SyncAPIResource):
|
|
221
229
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
222
230
|
) -> Task:
|
223
231
|
"""
|
224
|
-
Retrieves
|
232
|
+
Retrieves the current state of a task and, when requested, waits briefly for
|
233
|
+
completion.
|
225
234
|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
- Presigned URLs for accessing files
|
235
|
+
Returns task details such as processing status, configuration, output (when
|
236
|
+
available), file metadata, and timestamps. If `wait_for_completion=true` is
|
237
|
+
provided, the server will hold the request briefly. If the task does not reach a
|
238
|
+
terminal state during that window, the response will indicate a retry with
|
239
|
+
appropriate headers.
|
232
240
|
|
233
|
-
|
241
|
+
Typical uses:
|
234
242
|
|
235
|
-
|
236
|
-
|
237
|
-
|
243
|
+
- Poll a task during processing
|
244
|
+
- Retrieve the final output once processing is complete
|
245
|
+
- Access task metadata and configuration
|
238
246
|
|
239
247
|
Args:
|
240
248
|
base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as
|
@@ -242,6 +250,8 @@ class TasksResource(SyncAPIResource):
|
|
242
250
|
|
243
251
|
include_chunks: Whether to include chunks in the output response
|
244
252
|
|
253
|
+
wait_for_completion: Whether to wait for the task to complete
|
254
|
+
|
245
255
|
extra_headers: Send extra headers
|
246
256
|
|
247
257
|
extra_query: Add additional query parameters to the request
|
@@ -263,6 +273,7 @@ class TasksResource(SyncAPIResource):
|
|
263
273
|
{
|
264
274
|
"base64_urls": base64_urls,
|
265
275
|
"include_chunks": include_chunks,
|
276
|
+
"wait_for_completion": wait_for_completion,
|
266
277
|
},
|
267
278
|
task_get_params.TaskGetParams,
|
268
279
|
),
|
@@ -375,6 +386,7 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
375
386
|
extra_query: Query | None = None,
|
376
387
|
extra_body: Body | None = None,
|
377
388
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
389
|
+
idempotency_key: str | None = None,
|
378
390
|
) -> None:
|
379
391
|
"""
|
380
392
|
Delete a task by its ID.
|
@@ -391,6 +403,8 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
391
403
|
extra_body: Add additional JSON properties to the request
|
392
404
|
|
393
405
|
timeout: Override the client-level default timeout for this request, in seconds
|
406
|
+
|
407
|
+
idempotency_key: Specify a custom idempotency key for this request
|
394
408
|
"""
|
395
409
|
if not task_id:
|
396
410
|
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
|
@@ -398,7 +412,11 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
398
412
|
return await self._delete(
|
399
413
|
f"/tasks/{task_id}",
|
400
414
|
options=make_request_options(
|
401
|
-
extra_headers=extra_headers,
|
415
|
+
extra_headers=extra_headers,
|
416
|
+
extra_query=extra_query,
|
417
|
+
extra_body=extra_body,
|
418
|
+
timeout=timeout,
|
419
|
+
idempotency_key=idempotency_key,
|
402
420
|
),
|
403
421
|
cast_to=NoneType,
|
404
422
|
)
|
@@ -450,6 +468,7 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
450
468
|
*,
|
451
469
|
base64_urls: bool | NotGiven = NOT_GIVEN,
|
452
470
|
include_chunks: bool | NotGiven = NOT_GIVEN,
|
471
|
+
wait_for_completion: bool | NotGiven = NOT_GIVEN,
|
453
472
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
454
473
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
455
474
|
extra_headers: Headers | None = None,
|
@@ -458,20 +477,20 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
458
477
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
459
478
|
) -> Task:
|
460
479
|
"""
|
461
|
-
Retrieves
|
480
|
+
Retrieves the current state of a task and, when requested, waits briefly for
|
481
|
+
completion.
|
462
482
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
- Presigned URLs for accessing files
|
483
|
+
Returns task details such as processing status, configuration, output (when
|
484
|
+
available), file metadata, and timestamps. If `wait_for_completion=true` is
|
485
|
+
provided, the server will hold the request briefly. If the task does not reach a
|
486
|
+
terminal state during that window, the response will indicate a retry with
|
487
|
+
appropriate headers.
|
469
488
|
|
470
|
-
|
489
|
+
Typical uses:
|
471
490
|
|
472
|
-
|
473
|
-
|
474
|
-
|
491
|
+
- Poll a task during processing
|
492
|
+
- Retrieve the final output once processing is complete
|
493
|
+
- Access task metadata and configuration
|
475
494
|
|
476
495
|
Args:
|
477
496
|
base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as
|
@@ -479,6 +498,8 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
479
498
|
|
480
499
|
include_chunks: Whether to include chunks in the output response
|
481
500
|
|
501
|
+
wait_for_completion: Whether to wait for the task to complete
|
502
|
+
|
482
503
|
extra_headers: Send extra headers
|
483
504
|
|
484
505
|
extra_query: Add additional query parameters to the request
|
@@ -500,6 +521,7 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
500
521
|
{
|
501
522
|
"base64_urls": base64_urls,
|
502
523
|
"include_chunks": include_chunks,
|
524
|
+
"wait_for_completion": wait_for_completion,
|
503
525
|
},
|
504
526
|
task_get_params.TaskGetParams,
|
505
527
|
),
|
chunkr_ai/types/task.py
CHANGED
@@ -827,22 +827,29 @@ class Configuration(BaseModel):
|
|
827
827
|
segment_processing: ConfigurationSegmentProcessing
|
828
828
|
"""Defines how each segment type is handled when generating the final output.
|
829
829
|
|
830
|
-
Each segment uses one of three strategies. The chosen strategy controls:
|
831
|
-
|
832
|
-
|
833
|
-
|
830
|
+
Each segment uses one of three strategies. The chosen strategy controls:
|
831
|
+
|
832
|
+
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
|
833
|
+
- How the content is produced (rule-based vs. LLM).
|
834
|
+
- The output format (`Html` or `Markdown`).
|
834
835
|
|
835
836
|
Optional flags such as image **cropping**, **extended context**, and
|
836
837
|
**descriptions** further refine behaviour.
|
837
838
|
|
838
|
-
**Default strategy per segment**
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
839
|
+
**Default strategy per segment**
|
840
|
+
|
841
|
+
- `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
|
842
|
+
(Markdown, description off)
|
843
|
+
- `Table` → **LLM** (HTML, description on)
|
844
|
+
- `Picture` → **LLM** (Markdown, description off, cropping _All_)
|
845
|
+
- `Formula`, `Page` → **LLM** (Markdown, description off)
|
846
|
+
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
|
847
|
+
|
848
|
+
**Strategy reference**
|
843
849
|
|
844
|
-
|
845
|
-
generate content with an LLM.
|
850
|
+
- **Auto** – rule-based content generation.
|
851
|
+
- **LLM** – generate content with an LLM.
|
852
|
+
- **Ignore** – exclude the segment entirely.
|
846
853
|
"""
|
847
854
|
|
848
855
|
segmentation_strategy: Literal["LayoutAnalysis", "Page"]
|
@@ -36,8 +36,8 @@ class ParseCreateParams(TypedDict, total=False):
|
|
36
36
|
file: Required[str]
|
37
37
|
"""The file to be uploaded. Supported inputs:
|
38
38
|
|
39
|
-
- `ch://files/{file_id}`:
|
40
|
-
|
39
|
+
- `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
|
40
|
+
API
|
41
41
|
- `http(s)://...`: Remote URL to fetch
|
42
42
|
- `data:*;base64,...` or raw base64 string
|
43
43
|
"""
|
@@ -83,22 +83,29 @@ class ParseCreateParams(TypedDict, total=False):
|
|
83
83
|
segment_processing: Optional[SegmentProcessing]
|
84
84
|
"""Defines how each segment type is handled when generating the final output.
|
85
85
|
|
86
|
-
Each segment uses one of three strategies. The chosen strategy controls:
|
87
|
-
|
88
|
-
|
89
|
-
|
86
|
+
Each segment uses one of three strategies. The chosen strategy controls:
|
87
|
+
|
88
|
+
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
|
89
|
+
- How the content is produced (rule-based vs. LLM).
|
90
|
+
- The output format (`Html` or `Markdown`).
|
90
91
|
|
91
92
|
Optional flags such as image **cropping**, **extended context**, and
|
92
93
|
**descriptions** further refine behaviour.
|
93
94
|
|
94
|
-
**Default strategy per segment**
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
95
|
+
**Default strategy per segment**
|
96
|
+
|
97
|
+
- `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
|
98
|
+
(Markdown, description off)
|
99
|
+
- `Table` → **LLM** (HTML, description on)
|
100
|
+
- `Picture` → **LLM** (Markdown, description off, cropping _All_)
|
101
|
+
- `Formula`, `Page` → **LLM** (Markdown, description off)
|
102
|
+
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
|
103
|
+
|
104
|
+
**Strategy reference**
|
99
105
|
|
100
|
-
|
101
|
-
generate content with an LLM.
|
106
|
+
- **Auto** – rule-based content generation.
|
107
|
+
- **LLM** – generate content with an LLM.
|
108
|
+
- **Ignore** – exclude the segment entirely.
|
102
109
|
"""
|
103
110
|
|
104
111
|
segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]]
|
@@ -77,22 +77,29 @@ class ParseUpdateParams(TypedDict, total=False):
|
|
77
77
|
segment_processing: Optional[SegmentProcessing]
|
78
78
|
"""Defines how each segment type is handled when generating the final output.
|
79
79
|
|
80
|
-
Each segment uses one of three strategies. The chosen strategy controls:
|
81
|
-
|
82
|
-
|
83
|
-
|
80
|
+
Each segment uses one of three strategies. The chosen strategy controls:
|
81
|
+
|
82
|
+
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`).
|
83
|
+
- How the content is produced (rule-based vs. LLM).
|
84
|
+
- The output format (`Html` or `Markdown`).
|
84
85
|
|
85
86
|
Optional flags such as image **cropping**, **extended context**, and
|
86
87
|
**descriptions** further refine behaviour.
|
87
88
|
|
88
|
-
**Default strategy per segment**
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
89
|
+
**Default strategy per segment**
|
90
|
+
|
91
|
+
- `Title`, `SectionHeader`, `Text`, `ListItem`, `Caption`, `Footnote` → **Auto**
|
92
|
+
(Markdown, description off)
|
93
|
+
- `Table` → **LLM** (HTML, description on)
|
94
|
+
- `Picture` → **LLM** (Markdown, description off, cropping _All_)
|
95
|
+
- `Formula`, `Page` → **LLM** (Markdown, description off)
|
96
|
+
- `PageHeader`, `PageFooter` → **Ignore** (removed from output)
|
97
|
+
|
98
|
+
**Strategy reference**
|
93
99
|
|
94
|
-
|
95
|
-
generate content with an LLM.
|
100
|
+
- **Auto** – rule-based content generation.
|
101
|
+
- **LLM** – generate content with an LLM.
|
102
|
+
- **Ignore** – exclude the segment entirely.
|
96
103
|
"""
|
97
104
|
|
98
105
|
segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.0a6
|
4
4
|
Summary: The official Python library for the chunkr API
|
5
5
|
Project-URL: Homepage, https://github.com/lumina-ai-inc/chunkr-python
|
6
6
|
Project-URL: Repository, https://github.com/lumina-ai-inc/chunkr-python
|
@@ -299,7 +299,7 @@ Error codes are as follows:
|
|
299
299
|
|
300
300
|
### Retries
|
301
301
|
|
302
|
-
Certain errors are automatically retried
|
302
|
+
Certain errors are automatically retried 50 times by default, with a short exponential backoff.
|
303
303
|
Connection errors (for example, due to a network connectivity problem), 408 Request Timeout, 409 Conflict,
|
304
304
|
429 Rate Limit, and >=500 Internal errors are all retried by default.
|
305
305
|
|
@@ -322,7 +322,7 @@ client.with_options(max_retries=5).tasks.parse.create(
|
|
322
322
|
|
323
323
|
### Timeouts
|
324
324
|
|
325
|
-
By default requests time out after
|
325
|
+
By default requests time out after 30 seconds. You can configure this with a `timeout` option,
|
326
326
|
which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/timeouts/#fine-tuning-the-configuration) object:
|
327
327
|
|
328
328
|
```python
|
@@ -330,7 +330,7 @@ from chunkr_ai import Chunkr
|
|
330
330
|
|
331
331
|
# Configure the default for all requests:
|
332
332
|
client = Chunkr(
|
333
|
-
# 20 seconds (default is
|
333
|
+
# 20 seconds (default is 30 seconds)
|
334
334
|
timeout=20.0,
|
335
335
|
)
|
336
336
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
chunkr_ai/__init__.py,sha256=
|
1
|
+
chunkr_ai/__init__.py,sha256=scS30uHiCpLbaalKTAJSCFSTqnu_b9R5JCkTu2hmbzU,2587
|
2
2
|
chunkr_ai/_base_client.py,sha256=Nv5b_rmVdmmPbF42mlOfymbSC6lxcYsrsvBhKSBDXWQ,67038
|
3
|
-
chunkr_ai/_client.py,sha256=
|
3
|
+
chunkr_ai/_client.py,sha256=yn0QdzDkm0M6Ft2-ItmfJpUxQnJVoWa29tSv_2g3KDQ,15975
|
4
4
|
chunkr_ai/_compat.py,sha256=VWemUKbj6DDkQ-O4baSpHVLJafotzeXmCQGJugfVTIw,6580
|
5
|
-
chunkr_ai/_constants.py,sha256=
|
5
|
+
chunkr_ai/_constants.py,sha256=SZppb_i55UWs0n0_MRbw7s0Hy_TOGQu9q7FVd-fCwgM,466
|
6
6
|
chunkr_ai/_exceptions.py,sha256=ClgXUcwf4qhBTXnK4LzUPQCFdFldRxAlcYdOFFgpTxA,3220
|
7
7
|
chunkr_ai/_files.py,sha256=SUFtic_gwSzbvhLtMdQ7TBem8szrqZE2nZFFMRa0KTw,3619
|
8
8
|
chunkr_ai/_models.py,sha256=KvjsMfb88XZlFUKVoOxr8OyDj47MhoH2OKqWNEbBhk4,30010
|
@@ -11,7 +11,7 @@ chunkr_ai/_resource.py,sha256=f5tiwjxcKdbeMor8idoHtMFTUhqD9yc2xXtq5rqeLLk,1100
|
|
11
11
|
chunkr_ai/_response.py,sha256=xXNpF53hiYARmAW7npKuxQ5UHAEjgAzm7ME_L3eIstY,28800
|
12
12
|
chunkr_ai/_streaming.py,sha256=ZmyrVWk7-AWkLAATR55WgNxnyFzYmaqJt2LthA_PTqQ,10100
|
13
13
|
chunkr_ai/_types.py,sha256=dnzU2Q2tLcuk29QFEcnPC1wp0-4XB4Cpef_3AnRhV5Y,6200
|
14
|
-
chunkr_ai/_version.py,sha256=
|
14
|
+
chunkr_ai/_version.py,sha256=Wa4VwBGgQILEshUij6ZGjQhFefcxwDJV1YTUEWRgG6c,169
|
15
15
|
chunkr_ai/pagination.py,sha256=bT-ErcJ80YlKBV6tWq2s9uqg-wv7o66SKe_AgUAGrKc,3533
|
16
16
|
chunkr_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
chunkr_ai/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
|
@@ -25,13 +25,12 @@ chunkr_ai/_utils/_transform.py,sha256=n7kskEWz6o__aoNvhFoGVyDoalNe6mJwp-g7BWkdj8
|
|
25
25
|
chunkr_ai/_utils/_typing.py,sha256=D0DbbNu8GnYQTSICnTSHDGsYXj8TcAKyhejb0XcnjtY,4602
|
26
26
|
chunkr_ai/_utils/_utils.py,sha256=ts4CiiuNpFiGB6YMdkQRh2SZvYvsl7mAF-JWHCcLDf4,12312
|
27
27
|
chunkr_ai/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
|
28
|
-
chunkr_ai/lib/tasks_poll.py,sha256=3yosl_hH5j6NVNH9mANqneAW0FJSbIV9dMoTcF-OdJU,3341
|
29
28
|
chunkr_ai/resources/__init__.py,sha256=K-axuAEg2pJQl45N5ao1tm8AnRwpQVVNp_b6qSMgB6A,1426
|
30
|
-
chunkr_ai/resources/files.py,sha256=
|
29
|
+
chunkr_ai/resources/files.py,sha256=iX6LbX2PqM6kFKNoLxS_R9OGaVSnnZJ8U0dCUxNBGIM,27184
|
31
30
|
chunkr_ai/resources/health.py,sha256=XTvUtRs5hEK-uccb_40mcIex85eEUo1a171nQUjpSOs,4965
|
32
31
|
chunkr_ai/resources/tasks/__init__.py,sha256=W-sclAx_Kfm7OBGlSs694QzNCMkewtz9LU9KRcb8Ud0,976
|
33
|
-
chunkr_ai/resources/tasks/parse.py,sha256=
|
34
|
-
chunkr_ai/resources/tasks/tasks.py,sha256=
|
32
|
+
chunkr_ai/resources/tasks/parse.py,sha256=cMKxGPFqg8h5HutUx0l7J_namzsYzayWKoQVnlnJ6qA,28999
|
33
|
+
chunkr_ai/resources/tasks/tasks.py,sha256=XkNulmXZz4N6UXaG-6EdS-WAyncTgwMY3BYbJBqeEGw,22745
|
35
34
|
chunkr_ai/types/__init__.py,sha256=DSRAMgXVRTZM2t8s2yrFU-FHt3FTs_wpZfVILH1zjJ0,728
|
36
35
|
chunkr_ai/types/delete.py,sha256=EU78fjXpc8-fqvgcFTuJ0ejs5u_UjbhOz5frkeUHvxY,225
|
37
36
|
chunkr_ai/types/file.py,sha256=kOxR0g-3A-qOxz2cjuTcq0wFMqPoph9uQuLYQ56zb-c,718
|
@@ -41,13 +40,13 @@ chunkr_ai/types/file_url.py,sha256=L434WnOXkNmt59dJiaAgT1_3pN3BIsxm2q14zHQK6xY,3
|
|
41
40
|
chunkr_ai/types/file_url_params.py,sha256=ZHfKiy_6B25StdDemulavGcsPggNNMKLWf6KN7xfPTY,413
|
42
41
|
chunkr_ai/types/files_list_response.py,sha256=ggSRWhTzZWjcDXxStyCzrYICXXB5TqnL2j-SN9mHH_g,506
|
43
42
|
chunkr_ai/types/health_check_response.py,sha256=6Zn5YYHCQf2RgMjDlf39mtiTPqfaBfC9Vv599U_rKCI,200
|
44
|
-
chunkr_ai/types/task.py,sha256=
|
45
|
-
chunkr_ai/types/task_get_params.py,sha256=
|
43
|
+
chunkr_ai/types/task.py,sha256=L8vE_q0Hej_YuJM_rd_bZOg8kHbithsFx6fOQYpH0cY,46702
|
44
|
+
chunkr_ai/types/task_get_params.py,sha256=yGMHRfkbLzQpRLdF_Dj-8TqcioEhDNWyVbEt50xDAP0,542
|
46
45
|
chunkr_ai/types/task_list_params.py,sha256=fCku42QW6QUsLmZgKJBaxisGvUcmcQ5fa6LgHHRIwiQ,1043
|
47
46
|
chunkr_ai/types/tasks/__init__.py,sha256=VdLEmQvgPoiykSEYaRhkMYVaIueGDkR4P_MjCq9SbQY,267
|
48
|
-
chunkr_ai/types/tasks/parse_create_params.py,sha256=
|
49
|
-
chunkr_ai/types/tasks/parse_update_params.py,sha256=
|
50
|
-
chunkr_ai-0.1.
|
51
|
-
chunkr_ai-0.1.
|
52
|
-
chunkr_ai-0.1.
|
53
|
-
chunkr_ai-0.1.
|
47
|
+
chunkr_ai/types/tasks/parse_create_params.py,sha256=lUSnRHq_nT4nW9boB80HvpVcnjHwIpfQoxBKugFRC7Y,34324
|
48
|
+
chunkr_ai/types/tasks/parse_update_params.py,sha256=VrzK-Y4S02fficwIr4EatD73_4i40lOAxMEYDN3KZEQ,34112
|
49
|
+
chunkr_ai-0.1.0a6.dist-info/METADATA,sha256=Kbt8umfSyxxm5O7Xol6ypvmc7yprG33YFE3L-TqGx6Y,16446
|
50
|
+
chunkr_ai-0.1.0a6.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
51
|
+
chunkr_ai-0.1.0a6.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
|
52
|
+
chunkr_ai-0.1.0a6.dist-info/RECORD,,
|
chunkr_ai/lib/tasks_poll.py
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
"""
|
4
|
-
Custom helpers for task polling.
|
5
|
-
|
6
|
-
This module adds `Task.poll()` and `Task.apoll()` methods at runtime to the
|
7
|
-
generated `Task` model, without modifying generated code directly.
|
8
|
-
|
9
|
-
Usage:
|
10
|
-
task = client.tasks.get(task_id)
|
11
|
-
task = task.poll(client) # blocks until terminal state
|
12
|
-
|
13
|
-
# async
|
14
|
-
task = await async_client.tasks.get(task_id)
|
15
|
-
task = await task.apoll(async_client)
|
16
|
-
"""
|
17
|
-
|
18
|
-
import time
|
19
|
-
import asyncio
|
20
|
-
from typing import Protocol, cast
|
21
|
-
|
22
|
-
from .._types import NOT_GIVEN, NotGiven
|
23
|
-
from .._client import Chunkr, AsyncChunkr
|
24
|
-
from ..types.task import Task as _Task
|
25
|
-
from .._exceptions import ChunkrError
|
26
|
-
|
27
|
-
TERMINAL_STATUSES = {"Succeeded", "Failed", "Cancelled"}
|
28
|
-
|
29
|
-
|
30
|
-
def _task_poll(
|
31
|
-
self: _Task,
|
32
|
-
client: Chunkr,
|
33
|
-
*,
|
34
|
-
interval: float = 0.5,
|
35
|
-
timeout: float = 600.0,
|
36
|
-
include_chunks: bool | NotGiven = NOT_GIVEN,
|
37
|
-
base64_urls: bool | NotGiven = NOT_GIVEN,
|
38
|
-
) -> _Task:
|
39
|
-
"""Poll the task until it reaches a terminal status.
|
40
|
-
|
41
|
-
Args:
|
42
|
-
client: Synchronous Chunkr client instance.
|
43
|
-
interval: Seconds to sleep between polls.
|
44
|
-
timeout: Maximum total seconds to wait before raising an error.
|
45
|
-
include_chunks: Whether to include chunks in the output response for each poll.
|
46
|
-
base64_urls: Whether to return base64 encoded URLs.
|
47
|
-
"""
|
48
|
-
start_time = time.monotonic()
|
49
|
-
current: _Task = self
|
50
|
-
|
51
|
-
class _TasksGetProtocol(Protocol):
|
52
|
-
def get(
|
53
|
-
self,
|
54
|
-
task_id: str,
|
55
|
-
*,
|
56
|
-
base64_urls: bool | NotGiven = NOT_GIVEN,
|
57
|
-
include_chunks: bool | NotGiven = NOT_GIVEN,
|
58
|
-
) -> _Task: ...
|
59
|
-
|
60
|
-
resource = cast(_TasksGetProtocol, client.tasks)
|
61
|
-
|
62
|
-
while current.status not in TERMINAL_STATUSES:
|
63
|
-
if time.monotonic() - start_time > timeout:
|
64
|
-
raise ChunkrError("Task polling timed out.")
|
65
|
-
|
66
|
-
if interval > 0:
|
67
|
-
time.sleep(interval)
|
68
|
-
|
69
|
-
current = resource.get(
|
70
|
-
current.task_id,
|
71
|
-
include_chunks=include_chunks,
|
72
|
-
base64_urls=base64_urls,
|
73
|
-
)
|
74
|
-
|
75
|
-
return current
|
76
|
-
|
77
|
-
|
78
|
-
async def _task_apoll(
|
79
|
-
self: _Task,
|
80
|
-
client: AsyncChunkr,
|
81
|
-
*,
|
82
|
-
interval: float = 0.5,
|
83
|
-
timeout: float = 600.0,
|
84
|
-
include_chunks: bool | NotGiven = NOT_GIVEN,
|
85
|
-
base64_urls: bool | NotGiven = NOT_GIVEN,
|
86
|
-
) -> _Task:
|
87
|
-
"""Async poll the task until it reaches a terminal status."""
|
88
|
-
start_time = time.monotonic()
|
89
|
-
current: _Task = self
|
90
|
-
|
91
|
-
class _AsyncTasksGetProtocol(Protocol):
|
92
|
-
async def get(
|
93
|
-
self,
|
94
|
-
task_id: str,
|
95
|
-
*,
|
96
|
-
base64_urls: bool | NotGiven = NOT_GIVEN,
|
97
|
-
include_chunks: bool | NotGiven = NOT_GIVEN,
|
98
|
-
) -> _Task: ...
|
99
|
-
|
100
|
-
aresource = cast(_AsyncTasksGetProtocol, client.tasks)
|
101
|
-
|
102
|
-
while current.status not in TERMINAL_STATUSES:
|
103
|
-
if time.monotonic() - start_time > timeout:
|
104
|
-
raise ChunkrError("Task polling timed out.")
|
105
|
-
|
106
|
-
if interval > 0:
|
107
|
-
await asyncio.sleep(interval)
|
108
|
-
|
109
|
-
current = await aresource.get(
|
110
|
-
current.task_id,
|
111
|
-
include_chunks=include_chunks,
|
112
|
-
base64_urls=base64_urls,
|
113
|
-
)
|
114
|
-
|
115
|
-
return current
|
116
|
-
|
117
|
-
|
118
|
-
# Attach methods to the generated Task model
|
119
|
-
_Task.poll = _task_poll # type: ignore[attr-defined]
|
120
|
-
_Task.apoll = _task_apoll # type: ignore[attr-defined]
|
121
|
-
|
122
|
-
|
File without changes
|
File without changes
|