chunkr-ai 0.1.0a8__py3-none-any.whl → 0.1.0a9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ from typing import Optional
6
6
 
7
7
  import httpx
8
8
 
9
- from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
9
+ from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
10
10
  from ..._utils import maybe_transform, async_maybe_transform
11
11
  from ..._compat import cached_property
12
12
  from ..._resource import SyncAPIResource, AsyncAPIResource
@@ -50,16 +50,16 @@ class ExtractResource(SyncAPIResource):
50
50
  *,
51
51
  file: str,
52
52
  schema: object,
53
- expires_in: Optional[int] | NotGiven = NOT_GIVEN,
54
- file_name: Optional[str] | NotGiven = NOT_GIVEN,
55
- parse_configuration: Optional[ParseConfigurationParam] | NotGiven = NOT_GIVEN,
56
- system_prompt: Optional[str] | NotGiven = NOT_GIVEN,
53
+ expires_in: Optional[int] | Omit = omit,
54
+ file_name: Optional[str] | Omit = omit,
55
+ parse_configuration: Optional[ParseConfigurationParam] | Omit = omit,
56
+ system_prompt: Optional[str] | Omit = omit,
57
57
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
58
58
  # The extra values given here take precedence over values defined on the client or passed to this method.
59
59
  extra_headers: Headers | None = None,
60
60
  extra_query: Query | None = None,
61
61
  extra_body: Body | None = None,
62
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
62
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
63
63
  idempotency_key: str | None = None,
64
64
  ) -> ExtractCreateResponse:
65
65
  """
@@ -129,14 +129,14 @@ class ExtractResource(SyncAPIResource):
129
129
  self,
130
130
  task_id: Optional[str],
131
131
  *,
132
- base64_urls: bool | NotGiven = NOT_GIVEN,
133
- include_chunks: bool | NotGiven = NOT_GIVEN,
132
+ base64_urls: bool | Omit = omit,
133
+ include_chunks: bool | Omit = omit,
134
134
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
135
135
  # The extra values given here take precedence over values defined on the client or passed to this method.
136
136
  extra_headers: Headers | None = None,
137
137
  extra_query: Query | None = None,
138
138
  extra_body: Body | None = None,
139
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
139
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
140
140
  ) -> ExtractGetResponse:
141
141
  """
142
142
  Retrieves the current state of an extract task.
@@ -210,16 +210,16 @@ class AsyncExtractResource(AsyncAPIResource):
210
210
  *,
211
211
  file: str,
212
212
  schema: object,
213
- expires_in: Optional[int] | NotGiven = NOT_GIVEN,
214
- file_name: Optional[str] | NotGiven = NOT_GIVEN,
215
- parse_configuration: Optional[ParseConfigurationParam] | NotGiven = NOT_GIVEN,
216
- system_prompt: Optional[str] | NotGiven = NOT_GIVEN,
213
+ expires_in: Optional[int] | Omit = omit,
214
+ file_name: Optional[str] | Omit = omit,
215
+ parse_configuration: Optional[ParseConfigurationParam] | Omit = omit,
216
+ system_prompt: Optional[str] | Omit = omit,
217
217
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
218
218
  # The extra values given here take precedence over values defined on the client or passed to this method.
219
219
  extra_headers: Headers | None = None,
220
220
  extra_query: Query | None = None,
221
221
  extra_body: Body | None = None,
222
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
222
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
223
223
  idempotency_key: str | None = None,
224
224
  ) -> ExtractCreateResponse:
225
225
  """
@@ -289,14 +289,14 @@ class AsyncExtractResource(AsyncAPIResource):
289
289
  self,
290
290
  task_id: Optional[str],
291
291
  *,
292
- base64_urls: bool | NotGiven = NOT_GIVEN,
293
- include_chunks: bool | NotGiven = NOT_GIVEN,
292
+ base64_urls: bool | Omit = omit,
293
+ include_chunks: bool | Omit = omit,
294
294
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
295
295
  # The extra values given here take precedence over values defined on the client or passed to this method.
296
296
  extra_headers: Headers | None = None,
297
297
  extra_query: Query | None = None,
298
298
  extra_body: Body | None = None,
299
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
299
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
300
300
  ) -> ExtractGetResponse:
301
301
  """
302
302
  Retrieves the current state of an extract task.
@@ -7,7 +7,7 @@ from typing_extensions import Literal
7
7
 
8
8
  import httpx
9
9
 
10
- from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
10
+ from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
11
11
  from ..._utils import maybe_transform, async_maybe_transform
12
12
  from ..._compat import cached_property
13
13
  from ..._resource import SyncAPIResource, AsyncAPIResource
@@ -19,7 +19,6 @@ from ..._response import (
19
19
  )
20
20
  from ...types.tasks import parse_get_params, parse_create_params
21
21
  from ..._base_client import make_request_options
22
- from ...types.llm_processing_param import LlmProcessingParam
23
22
  from ...types.chunk_processing_param import ChunkProcessingParam
24
23
  from ...types.segment_processing_param import SegmentProcessingParam
25
24
  from ...types.tasks.parse_get_response import ParseGetResponse
@@ -52,21 +51,20 @@ class ParseResource(SyncAPIResource):
52
51
  self,
53
52
  *,
54
53
  file: str,
55
- chunk_processing: ChunkProcessingParam | NotGiven = NOT_GIVEN,
56
- error_handling: Literal["Fail", "Continue"] | NotGiven = NOT_GIVEN,
57
- expires_in: Optional[int] | NotGiven = NOT_GIVEN,
58
- file_name: Optional[str] | NotGiven = NOT_GIVEN,
59
- llm_processing: LlmProcessingParam | NotGiven = NOT_GIVEN,
60
- ocr_strategy: Literal["All", "Auto"] | NotGiven = NOT_GIVEN,
61
- pipeline: Literal["Azure", "Chunkr"] | NotGiven = NOT_GIVEN,
62
- segment_processing: Optional[SegmentProcessingParam] | NotGiven = NOT_GIVEN,
63
- segmentation_strategy: Literal["LayoutAnalysis", "Page"] | NotGiven = NOT_GIVEN,
54
+ chunk_processing: ChunkProcessingParam | Omit = omit,
55
+ error_handling: Literal["Fail", "Continue"] | Omit = omit,
56
+ expires_in: Optional[int] | Omit = omit,
57
+ file_name: Optional[str] | Omit = omit,
58
+ ocr_strategy: Literal["All", "Auto"] | Omit = omit,
59
+ pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
60
+ segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
61
+ segmentation_strategy: Literal["LayoutAnalysis", "Page"] | Omit = omit,
64
62
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
65
63
  # The extra values given here take precedence over values defined on the client or passed to this method.
66
64
  extra_headers: Headers | None = None,
67
65
  extra_query: Query | None = None,
68
66
  extra_body: Body | None = None,
69
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
67
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
70
68
  idempotency_key: str | None = None,
71
69
  ) -> ParseCreateResponse:
72
70
  """
@@ -99,8 +97,6 @@ class ParseResource(SyncAPIResource):
99
97
 
100
98
  file_name: The name of the file to be parsed. If not set a name will be generated.
101
99
 
102
- llm_processing: Controls the LLM used for the task.
103
-
104
100
  ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
105
101
 
106
102
  - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -150,7 +146,6 @@ class ParseResource(SyncAPIResource):
150
146
  "error_handling": error_handling,
151
147
  "expires_in": expires_in,
152
148
  "file_name": file_name,
153
- "llm_processing": llm_processing,
154
149
  "ocr_strategy": ocr_strategy,
155
150
  "pipeline": pipeline,
156
151
  "segment_processing": segment_processing,
@@ -172,14 +167,14 @@ class ParseResource(SyncAPIResource):
172
167
  self,
173
168
  task_id: Optional[str],
174
169
  *,
175
- base64_urls: bool | NotGiven = NOT_GIVEN,
176
- include_chunks: bool | NotGiven = NOT_GIVEN,
170
+ base64_urls: bool | Omit = omit,
171
+ include_chunks: bool | Omit = omit,
177
172
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
178
173
  # The extra values given here take precedence over values defined on the client or passed to this method.
179
174
  extra_headers: Headers | None = None,
180
175
  extra_query: Query | None = None,
181
176
  extra_body: Body | None = None,
182
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
177
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
183
178
  ) -> ParseGetResponse:
184
179
  """
185
180
  Retrieves the current state of a parse task.
@@ -252,21 +247,20 @@ class AsyncParseResource(AsyncAPIResource):
252
247
  self,
253
248
  *,
254
249
  file: str,
255
- chunk_processing: ChunkProcessingParam | NotGiven = NOT_GIVEN,
256
- error_handling: Literal["Fail", "Continue"] | NotGiven = NOT_GIVEN,
257
- expires_in: Optional[int] | NotGiven = NOT_GIVEN,
258
- file_name: Optional[str] | NotGiven = NOT_GIVEN,
259
- llm_processing: LlmProcessingParam | NotGiven = NOT_GIVEN,
260
- ocr_strategy: Literal["All", "Auto"] | NotGiven = NOT_GIVEN,
261
- pipeline: Literal["Azure", "Chunkr"] | NotGiven = NOT_GIVEN,
262
- segment_processing: Optional[SegmentProcessingParam] | NotGiven = NOT_GIVEN,
263
- segmentation_strategy: Literal["LayoutAnalysis", "Page"] | NotGiven = NOT_GIVEN,
250
+ chunk_processing: ChunkProcessingParam | Omit = omit,
251
+ error_handling: Literal["Fail", "Continue"] | Omit = omit,
252
+ expires_in: Optional[int] | Omit = omit,
253
+ file_name: Optional[str] | Omit = omit,
254
+ ocr_strategy: Literal["All", "Auto"] | Omit = omit,
255
+ pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
256
+ segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
257
+ segmentation_strategy: Literal["LayoutAnalysis", "Page"] | Omit = omit,
264
258
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
265
259
  # The extra values given here take precedence over values defined on the client or passed to this method.
266
260
  extra_headers: Headers | None = None,
267
261
  extra_query: Query | None = None,
268
262
  extra_body: Body | None = None,
269
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
263
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
270
264
  idempotency_key: str | None = None,
271
265
  ) -> ParseCreateResponse:
272
266
  """
@@ -299,8 +293,6 @@ class AsyncParseResource(AsyncAPIResource):
299
293
 
300
294
  file_name: The name of the file to be parsed. If not set a name will be generated.
301
295
 
302
- llm_processing: Controls the LLM used for the task.
303
-
304
296
  ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
305
297
 
306
298
  - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -350,7 +342,6 @@ class AsyncParseResource(AsyncAPIResource):
350
342
  "error_handling": error_handling,
351
343
  "expires_in": expires_in,
352
344
  "file_name": file_name,
353
- "llm_processing": llm_processing,
354
345
  "ocr_strategy": ocr_strategy,
355
346
  "pipeline": pipeline,
356
347
  "segment_processing": segment_processing,
@@ -372,14 +363,14 @@ class AsyncParseResource(AsyncAPIResource):
372
363
  self,
373
364
  task_id: Optional[str],
374
365
  *,
375
- base64_urls: bool | NotGiven = NOT_GIVEN,
376
- include_chunks: bool | NotGiven = NOT_GIVEN,
366
+ base64_urls: bool | Omit = omit,
367
+ include_chunks: bool | Omit = omit,
377
368
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
378
369
  # The extra values given here take precedence over values defined on the client or passed to this method.
379
370
  extra_headers: Headers | None = None,
380
371
  extra_query: Query | None = None,
381
372
  extra_body: Body | None = None,
382
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
373
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
383
374
  ) -> ParseGetResponse:
384
375
  """
385
376
  Retrieves the current state of a parse task.
@@ -25,7 +25,7 @@ from .extract import (
25
25
  ExtractResourceWithStreamingResponse,
26
26
  AsyncExtractResourceWithStreamingResponse,
27
27
  )
28
- from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
28
+ from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
29
29
  from ..._utils import maybe_transform, async_maybe_transform
30
30
  from ..._compat import cached_property
31
31
  from ..._resource import SyncAPIResource, AsyncAPIResource
@@ -73,21 +73,21 @@ class TasksResource(SyncAPIResource):
73
73
  def list(
74
74
  self,
75
75
  *,
76
- base64_urls: bool | NotGiven = NOT_GIVEN,
77
- cursor: Union[str, datetime] | NotGiven = NOT_GIVEN,
78
- end: Union[str, datetime] | NotGiven = NOT_GIVEN,
79
- include_chunks: bool | NotGiven = NOT_GIVEN,
80
- limit: int | NotGiven = NOT_GIVEN,
81
- sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
82
- start: Union[str, datetime] | NotGiven = NOT_GIVEN,
83
- statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
84
- task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
76
+ base64_urls: bool | Omit = omit,
77
+ cursor: Union[str, datetime] | Omit = omit,
78
+ end: Union[str, datetime] | Omit = omit,
79
+ include_chunks: bool | Omit = omit,
80
+ limit: int | Omit = omit,
81
+ sort: Literal["asc", "desc"] | Omit = omit,
82
+ start: Union[str, datetime] | Omit = omit,
83
+ statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | Omit = omit,
84
+ task_types: List[Literal["Parse", "Extract"]] | Omit = omit,
85
85
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
86
86
  # The extra values given here take precedence over values defined on the client or passed to this method.
87
87
  extra_headers: Headers | None = None,
88
88
  extra_query: Query | None = None,
89
89
  extra_body: Body | None = None,
90
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
90
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
91
91
  ) -> SyncTasksPage[TaskResponse]:
92
92
  """
93
93
  Lists tasks for the authenticated user with cursor-based pagination and optional
@@ -157,7 +157,7 @@ class TasksResource(SyncAPIResource):
157
157
  extra_headers: Headers | None = None,
158
158
  extra_query: Query | None = None,
159
159
  extra_body: Body | None = None,
160
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
160
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
161
161
  idempotency_key: str | None = None,
162
162
  ) -> None:
163
163
  """
@@ -202,7 +202,7 @@ class TasksResource(SyncAPIResource):
202
202
  extra_headers: Headers | None = None,
203
203
  extra_query: Query | None = None,
204
204
  extra_body: Body | None = None,
205
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
205
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
206
206
  ) -> None:
207
207
  """
208
208
  Cancel a task that hasn't started processing yet:
@@ -238,14 +238,14 @@ class TasksResource(SyncAPIResource):
238
238
  self,
239
239
  task_id: Optional[str],
240
240
  *,
241
- base64_urls: bool | NotGiven = NOT_GIVEN,
242
- include_chunks: bool | NotGiven = NOT_GIVEN,
241
+ base64_urls: bool | Omit = omit,
242
+ include_chunks: bool | Omit = omit,
243
243
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
244
244
  # The extra values given here take precedence over values defined on the client or passed to this method.
245
245
  extra_headers: Headers | None = None,
246
246
  extra_query: Query | None = None,
247
247
  extra_body: Body | None = None,
248
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
248
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
249
249
  ) -> TaskResponse:
250
250
  """
251
251
  Retrieves the current state of a task.
@@ -325,21 +325,21 @@ class AsyncTasksResource(AsyncAPIResource):
325
325
  def list(
326
326
  self,
327
327
  *,
328
- base64_urls: bool | NotGiven = NOT_GIVEN,
329
- cursor: Union[str, datetime] | NotGiven = NOT_GIVEN,
330
- end: Union[str, datetime] | NotGiven = NOT_GIVEN,
331
- include_chunks: bool | NotGiven = NOT_GIVEN,
332
- limit: int | NotGiven = NOT_GIVEN,
333
- sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
334
- start: Union[str, datetime] | NotGiven = NOT_GIVEN,
335
- statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
336
- task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
328
+ base64_urls: bool | Omit = omit,
329
+ cursor: Union[str, datetime] | Omit = omit,
330
+ end: Union[str, datetime] | Omit = omit,
331
+ include_chunks: bool | Omit = omit,
332
+ limit: int | Omit = omit,
333
+ sort: Literal["asc", "desc"] | Omit = omit,
334
+ start: Union[str, datetime] | Omit = omit,
335
+ statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | Omit = omit,
336
+ task_types: List[Literal["Parse", "Extract"]] | Omit = omit,
337
337
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
338
338
  # The extra values given here take precedence over values defined on the client or passed to this method.
339
339
  extra_headers: Headers | None = None,
340
340
  extra_query: Query | None = None,
341
341
  extra_body: Body | None = None,
342
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
342
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
343
343
  ) -> AsyncPaginator[TaskResponse, AsyncTasksPage[TaskResponse]]:
344
344
  """
345
345
  Lists tasks for the authenticated user with cursor-based pagination and optional
@@ -409,7 +409,7 @@ class AsyncTasksResource(AsyncAPIResource):
409
409
  extra_headers: Headers | None = None,
410
410
  extra_query: Query | None = None,
411
411
  extra_body: Body | None = None,
412
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
412
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
413
413
  idempotency_key: str | None = None,
414
414
  ) -> None:
415
415
  """
@@ -454,7 +454,7 @@ class AsyncTasksResource(AsyncAPIResource):
454
454
  extra_headers: Headers | None = None,
455
455
  extra_query: Query | None = None,
456
456
  extra_body: Body | None = None,
457
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
457
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
458
458
  ) -> None:
459
459
  """
460
460
  Cancel a task that hasn't started processing yet:
@@ -490,14 +490,14 @@ class AsyncTasksResource(AsyncAPIResource):
490
490
  self,
491
491
  task_id: Optional[str],
492
492
  *,
493
- base64_urls: bool | NotGiven = NOT_GIVEN,
494
- include_chunks: bool | NotGiven = NOT_GIVEN,
493
+ base64_urls: bool | Omit = omit,
494
+ include_chunks: bool | Omit = omit,
495
495
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
496
496
  # The extra values given here take precedence over values defined on the client or passed to this method.
497
497
  extra_headers: Headers | None = None,
498
498
  extra_query: Query | None = None,
499
499
  extra_body: Body | None = None,
500
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
500
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
501
501
  ) -> TaskResponse:
502
502
  """
503
503
  Retrieves the current state of a task.
@@ -7,7 +7,7 @@ from typing import Mapping, cast
7
7
 
8
8
  import httpx
9
9
 
10
- from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
10
+ from .._types import Body, Query, Headers, NotGiven, not_given
11
11
  from .._compat import cached_property
12
12
  from .._models import construct_type
13
13
  from .._resource import SyncAPIResource, AsyncAPIResource
@@ -79,7 +79,7 @@ class WebhooksResource(SyncAPIResource):
79
79
  extra_headers: Headers | None = None,
80
80
  extra_query: Query | None = None,
81
81
  extra_body: Body | None = None,
82
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
82
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
83
83
  ) -> WebhookURLResponse:
84
84
  """Get or create webhook for user and return dashboard URL"""
85
85
  return self._get(
@@ -145,7 +145,7 @@ class AsyncWebhooksResource(AsyncAPIResource):
145
145
  extra_headers: Headers | None = None,
146
146
  extra_query: Query | None = None,
147
147
  extra_body: Body | None = None,
148
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
148
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
149
149
  ) -> WebhookURLResponse:
150
150
  """Get or create webhook for user and return dashboard URL"""
151
151
  return await self._get(
@@ -15,7 +15,6 @@ from .ocr_result import OcrResult as OcrResult
15
15
  from .bounding_box import BoundingBox as BoundingBox
16
16
  from .version_info import VersionInfo as VersionInfo
17
17
  from .task_response import TaskResponse as TaskResponse
18
- from .llm_processing import LlmProcessing as LlmProcessing
19
18
  from .file_url_params import FileURLParams as FileURLParams
20
19
  from .task_get_params import TaskGetParams as TaskGetParams
21
20
  from .chunk_processing import ChunkProcessing as ChunkProcessing
@@ -26,7 +25,6 @@ from .file_create_params import FileCreateParams as FileCreateParams
26
25
  from .segment_processing import SegmentProcessing as SegmentProcessing
27
26
  from .files_list_response import FilesListResponse as FilesListResponse
28
27
  from .parse_configuration import ParseConfiguration as ParseConfiguration
29
- from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
30
28
  from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
31
29
  from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
32
30
  from .extract_configuration import ExtractConfiguration as ExtractConfiguration
@@ -19,3 +19,6 @@ class FileInfo(BaseModel):
19
19
 
20
20
  page_count: Optional[int] = None
21
21
  """The number of pages in the file."""
22
+
23
+ ss_cell_count: Optional[int] = None
24
+ """The number of cells in the file. Only used for spreadsheets."""
@@ -15,14 +15,14 @@ class OcrResult(BaseModel):
15
15
  text: str
16
16
  """The recognized text of the OCR result."""
17
17
 
18
- cell_ref: Optional[str] = None
19
- """
20
- Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
21
- spreadsheet cell
22
- """
23
-
24
18
  confidence: Optional[float] = None
25
19
  """The confidence score of the recognized text."""
26
20
 
27
21
  ocr_id: Optional[str] = None
28
22
  """The unique identifier for the OCR result."""
23
+
24
+ ss_cell_ref: Optional[str] = None
25
+ """
26
+ Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
27
+ spreadsheet cell
28
+ """
@@ -4,7 +4,6 @@ from typing import Optional
4
4
  from typing_extensions import Literal
5
5
 
6
6
  from .._models import BaseModel
7
- from .llm_processing import LlmProcessing
8
7
  from .chunk_processing import ChunkProcessing
9
8
  from .segment_processing import SegmentProcessing
10
9
 
@@ -23,9 +22,6 @@ class ParseConfiguration(BaseModel):
23
22
  LLM refusals etc.)
24
23
  """
25
24
 
26
- llm_processing: Optional[LlmProcessing] = None
27
- """Controls the LLM used for the task."""
28
-
29
25
  ocr_strategy: Optional[Literal["All", "Auto"]] = None
30
26
  """Controls the Optical Character Recognition (OCR) strategy.
31
27
 
@@ -5,7 +5,6 @@ from __future__ import annotations
5
5
  from typing import Optional
6
6
  from typing_extensions import Literal, TypedDict
7
7
 
8
- from .llm_processing_param import LlmProcessingParam
9
8
  from .chunk_processing_param import ChunkProcessingParam
10
9
  from .segment_processing_param import SegmentProcessingParam
11
10
 
@@ -24,9 +23,6 @@ class ParseConfigurationParam(TypedDict, total=False):
24
23
  LLM refusals etc.)
25
24
  """
26
25
 
27
- llm_processing: LlmProcessingParam
28
- """Controls the LLM used for the task."""
29
-
30
26
  ocr_strategy: Literal["All", "Auto"]
31
27
  """Controls the Optical Character Recognition (OCR) strategy.
32
28
 
@@ -31,20 +31,23 @@ class Segment(BaseModel):
31
31
  "Caption",
32
32
  "Footnote",
33
33
  "Formula",
34
+ "FormRegion",
35
+ "GraphicalItem",
36
+ "Legend",
37
+ "LineNumber",
34
38
  "ListItem",
35
39
  "Page",
36
40
  "PageFooter",
37
41
  "PageHeader",
42
+ "PageNumber",
38
43
  "Picture",
39
- "SectionHeader",
40
44
  "Table",
41
45
  "Text",
42
46
  "Title",
47
+ "Unknown",
48
+ "SectionHeader",
43
49
  ]
44
- """
45
- All the possible types for a segment. Note: Different configurations will
46
- produce different types. Please refer to the documentation for more information.
47
- """
50
+ """All the possible types for a segment."""
48
51
 
49
52
  confidence: Optional[float] = None
50
53
  """Confidence score of the layout analysis model"""
@@ -47,6 +47,24 @@ class SegmentProcessing(BaseModel):
47
47
  - `extended_context` uses the full page image as context for LLM generation.
48
48
  """
49
49
 
50
+ form_region: Optional[GenerationConfig] = FieldInfo(alias="FormRegion", default=None)
51
+ """Controls the processing and generation for the segment.
52
+
53
+ - `crop_image` controls whether to crop the file's images to the segment's
54
+ bounding box. The cropped image will be stored in the segment's `image` field.
55
+ Use `All` to always crop, or `Auto` to only crop when needed for
56
+ post-processing.
57
+ - `format` specifies the output format: `Html` or `Markdown`
58
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
59
+ - `Auto`: Process content automatically
60
+ - `LLM`: Use large language models for processing
61
+ - `Ignore`: Exclude segments from final output
62
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
63
+ uses chunkr's own VLM models and is not configurable via LLM processing
64
+ configuration.
65
+ - `extended_context` uses the full page image as context for LLM generation.
66
+ """
67
+
50
68
  formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
51
69
  """Controls the processing and generation for the segment.
52
70
 
@@ -65,6 +83,60 @@ class SegmentProcessing(BaseModel):
65
83
  - `extended_context` uses the full page image as context for LLM generation.
66
84
  """
67
85
 
86
+ graphical_item: Optional[GenerationConfig] = FieldInfo(alias="GraphicalItem", default=None)
87
+ """Controls the processing and generation for the segment.
88
+
89
+ - `crop_image` controls whether to crop the file's images to the segment's
90
+ bounding box. The cropped image will be stored in the segment's `image` field.
91
+ Use `All` to always crop, or `Auto` to only crop when needed for
92
+ post-processing.
93
+ - `format` specifies the output format: `Html` or `Markdown`
94
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
95
+ - `Auto`: Process content automatically
96
+ - `LLM`: Use large language models for processing
97
+ - `Ignore`: Exclude segments from final output
98
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
99
+ uses chunkr's own VLM models and is not configurable via LLM processing
100
+ configuration.
101
+ - `extended_context` uses the full page image as context for LLM generation.
102
+ """
103
+
104
+ legend: Optional[GenerationConfig] = FieldInfo(alias="Legend", default=None)
105
+ """Controls the processing and generation for the segment.
106
+
107
+ - `crop_image` controls whether to crop the file's images to the segment's
108
+ bounding box. The cropped image will be stored in the segment's `image` field.
109
+ Use `All` to always crop, or `Auto` to only crop when needed for
110
+ post-processing.
111
+ - `format` specifies the output format: `Html` or `Markdown`
112
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
113
+ - `Auto`: Process content automatically
114
+ - `LLM`: Use large language models for processing
115
+ - `Ignore`: Exclude segments from final output
116
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
117
+ uses chunkr's own VLM models and is not configurable via LLM processing
118
+ configuration.
119
+ - `extended_context` uses the full page image as context for LLM generation.
120
+ """
121
+
122
+ line_number: Optional[GenerationConfig] = FieldInfo(alias="LineNumber", default=None)
123
+ """Controls the processing and generation for the segment.
124
+
125
+ - `crop_image` controls whether to crop the file's images to the segment's
126
+ bounding box. The cropped image will be stored in the segment's `image` field.
127
+ Use `All` to always crop, or `Auto` to only crop when needed for
128
+ post-processing.
129
+ - `format` specifies the output format: `Html` or `Markdown`
130
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
131
+ - `Auto`: Process content automatically
132
+ - `LLM`: Use large language models for processing
133
+ - `Ignore`: Exclude segments from final output
134
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
135
+ uses chunkr's own VLM models and is not configurable via LLM processing
136
+ configuration.
137
+ - `extended_context` uses the full page image as context for LLM generation.
138
+ """
139
+
68
140
  list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
69
141
  """Controls the processing and generation for the segment.
70
142
 
@@ -137,7 +209,7 @@ class SegmentProcessing(BaseModel):
137
209
  - `extended_context` uses the full page image as context for LLM generation.
138
210
  """
139
211
 
140
- picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
212
+ page_number: Optional[GenerationConfig] = FieldInfo(alias="PageNumber", default=None)
141
213
  """Controls the processing and generation for the segment.
142
214
 
143
215
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -155,7 +227,7 @@ class SegmentProcessing(BaseModel):
155
227
  - `extended_context` uses the full page image as context for LLM generation.
156
228
  """
157
229
 
158
- section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
230
+ picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
159
231
  """Controls the processing and generation for the segment.
160
232
 
161
233
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -226,3 +298,21 @@ class SegmentProcessing(BaseModel):
226
298
  configuration.
227
299
  - `extended_context` uses the full page image as context for LLM generation.
228
300
  """
301
+
302
+ unknown: Optional[GenerationConfig] = FieldInfo(alias="Unknown", default=None)
303
+ """Controls the processing and generation for the segment.
304
+
305
+ - `crop_image` controls whether to crop the file's images to the segment's
306
+ bounding box. The cropped image will be stored in the segment's `image` field.
307
+ Use `All` to always crop, or `Auto` to only crop when needed for
308
+ post-processing.
309
+ - `format` specifies the output format: `Html` or `Markdown`
310
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
311
+ - `Auto`: Process content automatically
312
+ - `LLM`: Use large language models for processing
313
+ - `Ignore`: Exclude segments from final output
314
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
315
+ uses chunkr's own VLM models and is not configurable via LLM processing
316
+ configuration.
317
+ - `extended_context` uses the full page image as context for LLM generation.
318
+ """