chunkr-ai 0.1.0a8__py3-none-any.whl → 0.1.0a9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/__init__.py +3 -1
- chunkr_ai/_base_client.py +9 -9
- chunkr_ai/_client.py +8 -8
- chunkr_ai/_models.py +10 -4
- chunkr_ai/_qs.py +7 -7
- chunkr_ai/_types.py +18 -11
- chunkr_ai/_utils/_transform.py +2 -2
- chunkr_ai/_utils/_utils.py +4 -4
- chunkr_ai/_version.py +1 -1
- chunkr_ai/resources/files.py +29 -29
- chunkr_ai/resources/health.py +3 -3
- chunkr_ai/resources/tasks/extract.py +17 -17
- chunkr_ai/resources/tasks/parse.py +25 -34
- chunkr_ai/resources/tasks/tasks.py +31 -31
- chunkr_ai/resources/webhooks.py +3 -3
- chunkr_ai/types/__init__.py +0 -2
- chunkr_ai/types/file_info.py +3 -0
- chunkr_ai/types/ocr_result.py +6 -6
- chunkr_ai/types/parse_configuration.py +0 -4
- chunkr_ai/types/parse_configuration_param.py +0 -4
- chunkr_ai/types/segment.py +8 -5
- chunkr_ai/types/segment_processing.py +92 -2
- chunkr_ai/types/segment_processing_param.py +92 -2
- chunkr_ai/types/tasks/parse_create_params.py +0 -4
- chunkr_ai/types/version_info.py +1 -1
- {chunkr_ai-0.1.0a8.dist-info → chunkr_ai-0.1.0a9.dist-info}/METADATA +1 -1
- {chunkr_ai-0.1.0a8.dist-info → chunkr_ai-0.1.0a9.dist-info}/RECORD +29 -31
- chunkr_ai/types/llm_processing.py +0 -36
- chunkr_ai/types/llm_processing_param.py +0 -36
- {chunkr_ai-0.1.0a8.dist-info → chunkr_ai-0.1.0a9.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0a8.dist-info → chunkr_ai-0.1.0a9.dist-info}/licenses/LICENSE +0 -0
@@ -6,7 +6,7 @@ from typing import Optional
|
|
6
6
|
|
7
7
|
import httpx
|
8
8
|
|
9
|
-
from ..._types import
|
9
|
+
from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
|
10
10
|
from ..._utils import maybe_transform, async_maybe_transform
|
11
11
|
from ..._compat import cached_property
|
12
12
|
from ..._resource import SyncAPIResource, AsyncAPIResource
|
@@ -50,16 +50,16 @@ class ExtractResource(SyncAPIResource):
|
|
50
50
|
*,
|
51
51
|
file: str,
|
52
52
|
schema: object,
|
53
|
-
expires_in: Optional[int] |
|
54
|
-
file_name: Optional[str] |
|
55
|
-
parse_configuration: Optional[ParseConfigurationParam] |
|
56
|
-
system_prompt: Optional[str] |
|
53
|
+
expires_in: Optional[int] | Omit = omit,
|
54
|
+
file_name: Optional[str] | Omit = omit,
|
55
|
+
parse_configuration: Optional[ParseConfigurationParam] | Omit = omit,
|
56
|
+
system_prompt: Optional[str] | Omit = omit,
|
57
57
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
58
58
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
59
59
|
extra_headers: Headers | None = None,
|
60
60
|
extra_query: Query | None = None,
|
61
61
|
extra_body: Body | None = None,
|
62
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
62
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
63
63
|
idempotency_key: str | None = None,
|
64
64
|
) -> ExtractCreateResponse:
|
65
65
|
"""
|
@@ -129,14 +129,14 @@ class ExtractResource(SyncAPIResource):
|
|
129
129
|
self,
|
130
130
|
task_id: Optional[str],
|
131
131
|
*,
|
132
|
-
base64_urls: bool |
|
133
|
-
include_chunks: bool |
|
132
|
+
base64_urls: bool | Omit = omit,
|
133
|
+
include_chunks: bool | Omit = omit,
|
134
134
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
135
135
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
136
136
|
extra_headers: Headers | None = None,
|
137
137
|
extra_query: Query | None = None,
|
138
138
|
extra_body: Body | None = None,
|
139
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
139
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
140
140
|
) -> ExtractGetResponse:
|
141
141
|
"""
|
142
142
|
Retrieves the current state of an extract task.
|
@@ -210,16 +210,16 @@ class AsyncExtractResource(AsyncAPIResource):
|
|
210
210
|
*,
|
211
211
|
file: str,
|
212
212
|
schema: object,
|
213
|
-
expires_in: Optional[int] |
|
214
|
-
file_name: Optional[str] |
|
215
|
-
parse_configuration: Optional[ParseConfigurationParam] |
|
216
|
-
system_prompt: Optional[str] |
|
213
|
+
expires_in: Optional[int] | Omit = omit,
|
214
|
+
file_name: Optional[str] | Omit = omit,
|
215
|
+
parse_configuration: Optional[ParseConfigurationParam] | Omit = omit,
|
216
|
+
system_prompt: Optional[str] | Omit = omit,
|
217
217
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
218
218
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
219
219
|
extra_headers: Headers | None = None,
|
220
220
|
extra_query: Query | None = None,
|
221
221
|
extra_body: Body | None = None,
|
222
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
222
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
223
223
|
idempotency_key: str | None = None,
|
224
224
|
) -> ExtractCreateResponse:
|
225
225
|
"""
|
@@ -289,14 +289,14 @@ class AsyncExtractResource(AsyncAPIResource):
|
|
289
289
|
self,
|
290
290
|
task_id: Optional[str],
|
291
291
|
*,
|
292
|
-
base64_urls: bool |
|
293
|
-
include_chunks: bool |
|
292
|
+
base64_urls: bool | Omit = omit,
|
293
|
+
include_chunks: bool | Omit = omit,
|
294
294
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
295
295
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
296
296
|
extra_headers: Headers | None = None,
|
297
297
|
extra_query: Query | None = None,
|
298
298
|
extra_body: Body | None = None,
|
299
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
299
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
300
300
|
) -> ExtractGetResponse:
|
301
301
|
"""
|
302
302
|
Retrieves the current state of an extract task.
|
@@ -7,7 +7,7 @@ from typing_extensions import Literal
|
|
7
7
|
|
8
8
|
import httpx
|
9
9
|
|
10
|
-
from ..._types import
|
10
|
+
from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
|
11
11
|
from ..._utils import maybe_transform, async_maybe_transform
|
12
12
|
from ..._compat import cached_property
|
13
13
|
from ..._resource import SyncAPIResource, AsyncAPIResource
|
@@ -19,7 +19,6 @@ from ..._response import (
|
|
19
19
|
)
|
20
20
|
from ...types.tasks import parse_get_params, parse_create_params
|
21
21
|
from ..._base_client import make_request_options
|
22
|
-
from ...types.llm_processing_param import LlmProcessingParam
|
23
22
|
from ...types.chunk_processing_param import ChunkProcessingParam
|
24
23
|
from ...types.segment_processing_param import SegmentProcessingParam
|
25
24
|
from ...types.tasks.parse_get_response import ParseGetResponse
|
@@ -52,21 +51,20 @@ class ParseResource(SyncAPIResource):
|
|
52
51
|
self,
|
53
52
|
*,
|
54
53
|
file: str,
|
55
|
-
chunk_processing: ChunkProcessingParam |
|
56
|
-
error_handling: Literal["Fail", "Continue"] |
|
57
|
-
expires_in: Optional[int] |
|
58
|
-
file_name: Optional[str] |
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
segmentation_strategy: Literal["LayoutAnalysis", "Page"] | NotGiven = NOT_GIVEN,
|
54
|
+
chunk_processing: ChunkProcessingParam | Omit = omit,
|
55
|
+
error_handling: Literal["Fail", "Continue"] | Omit = omit,
|
56
|
+
expires_in: Optional[int] | Omit = omit,
|
57
|
+
file_name: Optional[str] | Omit = omit,
|
58
|
+
ocr_strategy: Literal["All", "Auto"] | Omit = omit,
|
59
|
+
pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
|
60
|
+
segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
|
61
|
+
segmentation_strategy: Literal["LayoutAnalysis", "Page"] | Omit = omit,
|
64
62
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
65
63
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
66
64
|
extra_headers: Headers | None = None,
|
67
65
|
extra_query: Query | None = None,
|
68
66
|
extra_body: Body | None = None,
|
69
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
67
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
70
68
|
idempotency_key: str | None = None,
|
71
69
|
) -> ParseCreateResponse:
|
72
70
|
"""
|
@@ -99,8 +97,6 @@ class ParseResource(SyncAPIResource):
|
|
99
97
|
|
100
98
|
file_name: The name of the file to be parsed. If not set a name will be generated.
|
101
99
|
|
102
|
-
llm_processing: Controls the LLM used for the task.
|
103
|
-
|
104
100
|
ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
|
105
101
|
|
106
102
|
- `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
|
@@ -150,7 +146,6 @@ class ParseResource(SyncAPIResource):
|
|
150
146
|
"error_handling": error_handling,
|
151
147
|
"expires_in": expires_in,
|
152
148
|
"file_name": file_name,
|
153
|
-
"llm_processing": llm_processing,
|
154
149
|
"ocr_strategy": ocr_strategy,
|
155
150
|
"pipeline": pipeline,
|
156
151
|
"segment_processing": segment_processing,
|
@@ -172,14 +167,14 @@ class ParseResource(SyncAPIResource):
|
|
172
167
|
self,
|
173
168
|
task_id: Optional[str],
|
174
169
|
*,
|
175
|
-
base64_urls: bool |
|
176
|
-
include_chunks: bool |
|
170
|
+
base64_urls: bool | Omit = omit,
|
171
|
+
include_chunks: bool | Omit = omit,
|
177
172
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
178
173
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
179
174
|
extra_headers: Headers | None = None,
|
180
175
|
extra_query: Query | None = None,
|
181
176
|
extra_body: Body | None = None,
|
182
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
177
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
183
178
|
) -> ParseGetResponse:
|
184
179
|
"""
|
185
180
|
Retrieves the current state of a parse task.
|
@@ -252,21 +247,20 @@ class AsyncParseResource(AsyncAPIResource):
|
|
252
247
|
self,
|
253
248
|
*,
|
254
249
|
file: str,
|
255
|
-
chunk_processing: ChunkProcessingParam |
|
256
|
-
error_handling: Literal["Fail", "Continue"] |
|
257
|
-
expires_in: Optional[int] |
|
258
|
-
file_name: Optional[str] |
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
segmentation_strategy: Literal["LayoutAnalysis", "Page"] | NotGiven = NOT_GIVEN,
|
250
|
+
chunk_processing: ChunkProcessingParam | Omit = omit,
|
251
|
+
error_handling: Literal["Fail", "Continue"] | Omit = omit,
|
252
|
+
expires_in: Optional[int] | Omit = omit,
|
253
|
+
file_name: Optional[str] | Omit = omit,
|
254
|
+
ocr_strategy: Literal["All", "Auto"] | Omit = omit,
|
255
|
+
pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
|
256
|
+
segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
|
257
|
+
segmentation_strategy: Literal["LayoutAnalysis", "Page"] | Omit = omit,
|
264
258
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
265
259
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
266
260
|
extra_headers: Headers | None = None,
|
267
261
|
extra_query: Query | None = None,
|
268
262
|
extra_body: Body | None = None,
|
269
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
263
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
270
264
|
idempotency_key: str | None = None,
|
271
265
|
) -> ParseCreateResponse:
|
272
266
|
"""
|
@@ -299,8 +293,6 @@ class AsyncParseResource(AsyncAPIResource):
|
|
299
293
|
|
300
294
|
file_name: The name of the file to be parsed. If not set a name will be generated.
|
301
295
|
|
302
|
-
llm_processing: Controls the LLM used for the task.
|
303
|
-
|
304
296
|
ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
|
305
297
|
|
306
298
|
- `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
|
@@ -350,7 +342,6 @@ class AsyncParseResource(AsyncAPIResource):
|
|
350
342
|
"error_handling": error_handling,
|
351
343
|
"expires_in": expires_in,
|
352
344
|
"file_name": file_name,
|
353
|
-
"llm_processing": llm_processing,
|
354
345
|
"ocr_strategy": ocr_strategy,
|
355
346
|
"pipeline": pipeline,
|
356
347
|
"segment_processing": segment_processing,
|
@@ -372,14 +363,14 @@ class AsyncParseResource(AsyncAPIResource):
|
|
372
363
|
self,
|
373
364
|
task_id: Optional[str],
|
374
365
|
*,
|
375
|
-
base64_urls: bool |
|
376
|
-
include_chunks: bool |
|
366
|
+
base64_urls: bool | Omit = omit,
|
367
|
+
include_chunks: bool | Omit = omit,
|
377
368
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
378
369
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
379
370
|
extra_headers: Headers | None = None,
|
380
371
|
extra_query: Query | None = None,
|
381
372
|
extra_body: Body | None = None,
|
382
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
373
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
383
374
|
) -> ParseGetResponse:
|
384
375
|
"""
|
385
376
|
Retrieves the current state of a parse task.
|
@@ -25,7 +25,7 @@ from .extract import (
|
|
25
25
|
ExtractResourceWithStreamingResponse,
|
26
26
|
AsyncExtractResourceWithStreamingResponse,
|
27
27
|
)
|
28
|
-
from ..._types import
|
28
|
+
from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
|
29
29
|
from ..._utils import maybe_transform, async_maybe_transform
|
30
30
|
from ..._compat import cached_property
|
31
31
|
from ..._resource import SyncAPIResource, AsyncAPIResource
|
@@ -73,21 +73,21 @@ class TasksResource(SyncAPIResource):
|
|
73
73
|
def list(
|
74
74
|
self,
|
75
75
|
*,
|
76
|
-
base64_urls: bool |
|
77
|
-
cursor: Union[str, datetime] |
|
78
|
-
end: Union[str, datetime] |
|
79
|
-
include_chunks: bool |
|
80
|
-
limit: int |
|
81
|
-
sort: Literal["asc", "desc"] |
|
82
|
-
start: Union[str, datetime] |
|
83
|
-
statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] |
|
84
|
-
task_types: List[Literal["Parse", "Extract"]] |
|
76
|
+
base64_urls: bool | Omit = omit,
|
77
|
+
cursor: Union[str, datetime] | Omit = omit,
|
78
|
+
end: Union[str, datetime] | Omit = omit,
|
79
|
+
include_chunks: bool | Omit = omit,
|
80
|
+
limit: int | Omit = omit,
|
81
|
+
sort: Literal["asc", "desc"] | Omit = omit,
|
82
|
+
start: Union[str, datetime] | Omit = omit,
|
83
|
+
statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | Omit = omit,
|
84
|
+
task_types: List[Literal["Parse", "Extract"]] | Omit = omit,
|
85
85
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
86
86
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
87
87
|
extra_headers: Headers | None = None,
|
88
88
|
extra_query: Query | None = None,
|
89
89
|
extra_body: Body | None = None,
|
90
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
90
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
91
91
|
) -> SyncTasksPage[TaskResponse]:
|
92
92
|
"""
|
93
93
|
Lists tasks for the authenticated user with cursor-based pagination and optional
|
@@ -157,7 +157,7 @@ class TasksResource(SyncAPIResource):
|
|
157
157
|
extra_headers: Headers | None = None,
|
158
158
|
extra_query: Query | None = None,
|
159
159
|
extra_body: Body | None = None,
|
160
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
160
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
161
161
|
idempotency_key: str | None = None,
|
162
162
|
) -> None:
|
163
163
|
"""
|
@@ -202,7 +202,7 @@ class TasksResource(SyncAPIResource):
|
|
202
202
|
extra_headers: Headers | None = None,
|
203
203
|
extra_query: Query | None = None,
|
204
204
|
extra_body: Body | None = None,
|
205
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
205
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
206
206
|
) -> None:
|
207
207
|
"""
|
208
208
|
Cancel a task that hasn't started processing yet:
|
@@ -238,14 +238,14 @@ class TasksResource(SyncAPIResource):
|
|
238
238
|
self,
|
239
239
|
task_id: Optional[str],
|
240
240
|
*,
|
241
|
-
base64_urls: bool |
|
242
|
-
include_chunks: bool |
|
241
|
+
base64_urls: bool | Omit = omit,
|
242
|
+
include_chunks: bool | Omit = omit,
|
243
243
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
244
244
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
245
245
|
extra_headers: Headers | None = None,
|
246
246
|
extra_query: Query | None = None,
|
247
247
|
extra_body: Body | None = None,
|
248
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
248
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
249
249
|
) -> TaskResponse:
|
250
250
|
"""
|
251
251
|
Retrieves the current state of a task.
|
@@ -325,21 +325,21 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
325
325
|
def list(
|
326
326
|
self,
|
327
327
|
*,
|
328
|
-
base64_urls: bool |
|
329
|
-
cursor: Union[str, datetime] |
|
330
|
-
end: Union[str, datetime] |
|
331
|
-
include_chunks: bool |
|
332
|
-
limit: int |
|
333
|
-
sort: Literal["asc", "desc"] |
|
334
|
-
start: Union[str, datetime] |
|
335
|
-
statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] |
|
336
|
-
task_types: List[Literal["Parse", "Extract"]] |
|
328
|
+
base64_urls: bool | Omit = omit,
|
329
|
+
cursor: Union[str, datetime] | Omit = omit,
|
330
|
+
end: Union[str, datetime] | Omit = omit,
|
331
|
+
include_chunks: bool | Omit = omit,
|
332
|
+
limit: int | Omit = omit,
|
333
|
+
sort: Literal["asc", "desc"] | Omit = omit,
|
334
|
+
start: Union[str, datetime] | Omit = omit,
|
335
|
+
statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | Omit = omit,
|
336
|
+
task_types: List[Literal["Parse", "Extract"]] | Omit = omit,
|
337
337
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
338
338
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
339
339
|
extra_headers: Headers | None = None,
|
340
340
|
extra_query: Query | None = None,
|
341
341
|
extra_body: Body | None = None,
|
342
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
342
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
343
343
|
) -> AsyncPaginator[TaskResponse, AsyncTasksPage[TaskResponse]]:
|
344
344
|
"""
|
345
345
|
Lists tasks for the authenticated user with cursor-based pagination and optional
|
@@ -409,7 +409,7 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
409
409
|
extra_headers: Headers | None = None,
|
410
410
|
extra_query: Query | None = None,
|
411
411
|
extra_body: Body | None = None,
|
412
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
412
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
413
413
|
idempotency_key: str | None = None,
|
414
414
|
) -> None:
|
415
415
|
"""
|
@@ -454,7 +454,7 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
454
454
|
extra_headers: Headers | None = None,
|
455
455
|
extra_query: Query | None = None,
|
456
456
|
extra_body: Body | None = None,
|
457
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
457
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
458
458
|
) -> None:
|
459
459
|
"""
|
460
460
|
Cancel a task that hasn't started processing yet:
|
@@ -490,14 +490,14 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
490
490
|
self,
|
491
491
|
task_id: Optional[str],
|
492
492
|
*,
|
493
|
-
base64_urls: bool |
|
494
|
-
include_chunks: bool |
|
493
|
+
base64_urls: bool | Omit = omit,
|
494
|
+
include_chunks: bool | Omit = omit,
|
495
495
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
496
496
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
497
497
|
extra_headers: Headers | None = None,
|
498
498
|
extra_query: Query | None = None,
|
499
499
|
extra_body: Body | None = None,
|
500
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
500
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
501
501
|
) -> TaskResponse:
|
502
502
|
"""
|
503
503
|
Retrieves the current state of a task.
|
chunkr_ai/resources/webhooks.py
CHANGED
@@ -7,7 +7,7 @@ from typing import Mapping, cast
|
|
7
7
|
|
8
8
|
import httpx
|
9
9
|
|
10
|
-
from .._types import
|
10
|
+
from .._types import Body, Query, Headers, NotGiven, not_given
|
11
11
|
from .._compat import cached_property
|
12
12
|
from .._models import construct_type
|
13
13
|
from .._resource import SyncAPIResource, AsyncAPIResource
|
@@ -79,7 +79,7 @@ class WebhooksResource(SyncAPIResource):
|
|
79
79
|
extra_headers: Headers | None = None,
|
80
80
|
extra_query: Query | None = None,
|
81
81
|
extra_body: Body | None = None,
|
82
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
82
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
83
83
|
) -> WebhookURLResponse:
|
84
84
|
"""Get or create webhook for user and return dashboard URL"""
|
85
85
|
return self._get(
|
@@ -145,7 +145,7 @@ class AsyncWebhooksResource(AsyncAPIResource):
|
|
145
145
|
extra_headers: Headers | None = None,
|
146
146
|
extra_query: Query | None = None,
|
147
147
|
extra_body: Body | None = None,
|
148
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
148
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
149
149
|
) -> WebhookURLResponse:
|
150
150
|
"""Get or create webhook for user and return dashboard URL"""
|
151
151
|
return await self._get(
|
chunkr_ai/types/__init__.py
CHANGED
@@ -15,7 +15,6 @@ from .ocr_result import OcrResult as OcrResult
|
|
15
15
|
from .bounding_box import BoundingBox as BoundingBox
|
16
16
|
from .version_info import VersionInfo as VersionInfo
|
17
17
|
from .task_response import TaskResponse as TaskResponse
|
18
|
-
from .llm_processing import LlmProcessing as LlmProcessing
|
19
18
|
from .file_url_params import FileURLParams as FileURLParams
|
20
19
|
from .task_get_params import TaskGetParams as TaskGetParams
|
21
20
|
from .chunk_processing import ChunkProcessing as ChunkProcessing
|
@@ -26,7 +25,6 @@ from .file_create_params import FileCreateParams as FileCreateParams
|
|
26
25
|
from .segment_processing import SegmentProcessing as SegmentProcessing
|
27
26
|
from .files_list_response import FilesListResponse as FilesListResponse
|
28
27
|
from .parse_configuration import ParseConfiguration as ParseConfiguration
|
29
|
-
from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
|
30
28
|
from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
|
31
29
|
from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
|
32
30
|
from .extract_configuration import ExtractConfiguration as ExtractConfiguration
|
chunkr_ai/types/file_info.py
CHANGED
chunkr_ai/types/ocr_result.py
CHANGED
@@ -15,14 +15,14 @@ class OcrResult(BaseModel):
|
|
15
15
|
text: str
|
16
16
|
"""The recognized text of the OCR result."""
|
17
17
|
|
18
|
-
cell_ref: Optional[str] = None
|
19
|
-
"""
|
20
|
-
Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
|
21
|
-
spreadsheet cell
|
22
|
-
"""
|
23
|
-
|
24
18
|
confidence: Optional[float] = None
|
25
19
|
"""The confidence score of the recognized text."""
|
26
20
|
|
27
21
|
ocr_id: Optional[str] = None
|
28
22
|
"""The unique identifier for the OCR result."""
|
23
|
+
|
24
|
+
ss_cell_ref: Optional[str] = None
|
25
|
+
"""
|
26
|
+
Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
|
27
|
+
spreadsheet cell
|
28
|
+
"""
|
@@ -4,7 +4,6 @@ from typing import Optional
|
|
4
4
|
from typing_extensions import Literal
|
5
5
|
|
6
6
|
from .._models import BaseModel
|
7
|
-
from .llm_processing import LlmProcessing
|
8
7
|
from .chunk_processing import ChunkProcessing
|
9
8
|
from .segment_processing import SegmentProcessing
|
10
9
|
|
@@ -23,9 +22,6 @@ class ParseConfiguration(BaseModel):
|
|
23
22
|
LLM refusals etc.)
|
24
23
|
"""
|
25
24
|
|
26
|
-
llm_processing: Optional[LlmProcessing] = None
|
27
|
-
"""Controls the LLM used for the task."""
|
28
|
-
|
29
25
|
ocr_strategy: Optional[Literal["All", "Auto"]] = None
|
30
26
|
"""Controls the Optical Character Recognition (OCR) strategy.
|
31
27
|
|
@@ -5,7 +5,6 @@ from __future__ import annotations
|
|
5
5
|
from typing import Optional
|
6
6
|
from typing_extensions import Literal, TypedDict
|
7
7
|
|
8
|
-
from .llm_processing_param import LlmProcessingParam
|
9
8
|
from .chunk_processing_param import ChunkProcessingParam
|
10
9
|
from .segment_processing_param import SegmentProcessingParam
|
11
10
|
|
@@ -24,9 +23,6 @@ class ParseConfigurationParam(TypedDict, total=False):
|
|
24
23
|
LLM refusals etc.)
|
25
24
|
"""
|
26
25
|
|
27
|
-
llm_processing: LlmProcessingParam
|
28
|
-
"""Controls the LLM used for the task."""
|
29
|
-
|
30
26
|
ocr_strategy: Literal["All", "Auto"]
|
31
27
|
"""Controls the Optical Character Recognition (OCR) strategy.
|
32
28
|
|
chunkr_ai/types/segment.py
CHANGED
@@ -31,20 +31,23 @@ class Segment(BaseModel):
|
|
31
31
|
"Caption",
|
32
32
|
"Footnote",
|
33
33
|
"Formula",
|
34
|
+
"FormRegion",
|
35
|
+
"GraphicalItem",
|
36
|
+
"Legend",
|
37
|
+
"LineNumber",
|
34
38
|
"ListItem",
|
35
39
|
"Page",
|
36
40
|
"PageFooter",
|
37
41
|
"PageHeader",
|
42
|
+
"PageNumber",
|
38
43
|
"Picture",
|
39
|
-
"SectionHeader",
|
40
44
|
"Table",
|
41
45
|
"Text",
|
42
46
|
"Title",
|
47
|
+
"Unknown",
|
48
|
+
"SectionHeader",
|
43
49
|
]
|
44
|
-
"""
|
45
|
-
All the possible types for a segment. Note: Different configurations will
|
46
|
-
produce different types. Please refer to the documentation for more information.
|
47
|
-
"""
|
50
|
+
"""All the possible types for a segment."""
|
48
51
|
|
49
52
|
confidence: Optional[float] = None
|
50
53
|
"""Confidence score of the layout analysis model"""
|
@@ -47,6 +47,24 @@ class SegmentProcessing(BaseModel):
|
|
47
47
|
- `extended_context` uses the full page image as context for LLM generation.
|
48
48
|
"""
|
49
49
|
|
50
|
+
form_region: Optional[GenerationConfig] = FieldInfo(alias="FormRegion", default=None)
|
51
|
+
"""Controls the processing and generation for the segment.
|
52
|
+
|
53
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
54
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
55
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
56
|
+
post-processing.
|
57
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
58
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
59
|
+
- `Auto`: Process content automatically
|
60
|
+
- `LLM`: Use large language models for processing
|
61
|
+
- `Ignore`: Exclude segments from final output
|
62
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
63
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
64
|
+
configuration.
|
65
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
66
|
+
"""
|
67
|
+
|
50
68
|
formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
|
51
69
|
"""Controls the processing and generation for the segment.
|
52
70
|
|
@@ -65,6 +83,60 @@ class SegmentProcessing(BaseModel):
|
|
65
83
|
- `extended_context` uses the full page image as context for LLM generation.
|
66
84
|
"""
|
67
85
|
|
86
|
+
graphical_item: Optional[GenerationConfig] = FieldInfo(alias="GraphicalItem", default=None)
|
87
|
+
"""Controls the processing and generation for the segment.
|
88
|
+
|
89
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
90
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
91
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
92
|
+
post-processing.
|
93
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
94
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
95
|
+
- `Auto`: Process content automatically
|
96
|
+
- `LLM`: Use large language models for processing
|
97
|
+
- `Ignore`: Exclude segments from final output
|
98
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
99
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
100
|
+
configuration.
|
101
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
102
|
+
"""
|
103
|
+
|
104
|
+
legend: Optional[GenerationConfig] = FieldInfo(alias="Legend", default=None)
|
105
|
+
"""Controls the processing and generation for the segment.
|
106
|
+
|
107
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
108
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
109
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
110
|
+
post-processing.
|
111
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
112
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
113
|
+
- `Auto`: Process content automatically
|
114
|
+
- `LLM`: Use large language models for processing
|
115
|
+
- `Ignore`: Exclude segments from final output
|
116
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
117
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
118
|
+
configuration.
|
119
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
120
|
+
"""
|
121
|
+
|
122
|
+
line_number: Optional[GenerationConfig] = FieldInfo(alias="LineNumber", default=None)
|
123
|
+
"""Controls the processing and generation for the segment.
|
124
|
+
|
125
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
126
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
127
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
128
|
+
post-processing.
|
129
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
130
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
131
|
+
- `Auto`: Process content automatically
|
132
|
+
- `LLM`: Use large language models for processing
|
133
|
+
- `Ignore`: Exclude segments from final output
|
134
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
135
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
136
|
+
configuration.
|
137
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
138
|
+
"""
|
139
|
+
|
68
140
|
list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
|
69
141
|
"""Controls the processing and generation for the segment.
|
70
142
|
|
@@ -137,7 +209,7 @@ class SegmentProcessing(BaseModel):
|
|
137
209
|
- `extended_context` uses the full page image as context for LLM generation.
|
138
210
|
"""
|
139
211
|
|
140
|
-
|
212
|
+
page_number: Optional[GenerationConfig] = FieldInfo(alias="PageNumber", default=None)
|
141
213
|
"""Controls the processing and generation for the segment.
|
142
214
|
|
143
215
|
- `crop_image` controls whether to crop the file's images to the segment's
|
@@ -155,7 +227,7 @@ class SegmentProcessing(BaseModel):
|
|
155
227
|
- `extended_context` uses the full page image as context for LLM generation.
|
156
228
|
"""
|
157
229
|
|
158
|
-
|
230
|
+
picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
|
159
231
|
"""Controls the processing and generation for the segment.
|
160
232
|
|
161
233
|
- `crop_image` controls whether to crop the file's images to the segment's
|
@@ -226,3 +298,21 @@ class SegmentProcessing(BaseModel):
|
|
226
298
|
configuration.
|
227
299
|
- `extended_context` uses the full page image as context for LLM generation.
|
228
300
|
"""
|
301
|
+
|
302
|
+
unknown: Optional[GenerationConfig] = FieldInfo(alias="Unknown", default=None)
|
303
|
+
"""Controls the processing and generation for the segment.
|
304
|
+
|
305
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
306
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
307
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
308
|
+
post-processing.
|
309
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
310
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
311
|
+
- `Auto`: Process content automatically
|
312
|
+
- `LLM`: Use large language models for processing
|
313
|
+
- `Ignore`: Exclude segments from final output
|
314
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
315
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
316
|
+
configuration.
|
317
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
318
|
+
"""
|