chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. chunkr_ai/__init__.py +2 -0
  2. chunkr_ai/_client.py +31 -3
  3. chunkr_ai/_constants.py +5 -5
  4. chunkr_ai/_exceptions.py +4 -0
  5. chunkr_ai/_models.py +1 -1
  6. chunkr_ai/_types.py +35 -1
  7. chunkr_ai/_utils/__init__.py +1 -0
  8. chunkr_ai/_utils/_typing.py +5 -0
  9. chunkr_ai/_version.py +1 -1
  10. chunkr_ai/resources/__init__.py +14 -0
  11. chunkr_ai/resources/files.py +3 -3
  12. chunkr_ai/resources/tasks/__init__.py +14 -0
  13. chunkr_ai/resources/tasks/extract.py +409 -0
  14. chunkr_ai/resources/tasks/parse.py +124 -284
  15. chunkr_ai/resources/tasks/tasks.py +62 -14
  16. chunkr_ai/resources/webhooks.py +193 -0
  17. chunkr_ai/types/__init__.py +27 -1
  18. chunkr_ai/types/bounding_box.py +19 -0
  19. chunkr_ai/types/cell.py +39 -0
  20. chunkr_ai/types/cell_style.py +28 -0
  21. chunkr_ai/types/chunk.py +40 -0
  22. chunkr_ai/types/chunk_processing.py +40 -0
  23. chunkr_ai/types/chunk_processing_param.py +42 -0
  24. chunkr_ai/types/extract_configuration.py +24 -0
  25. chunkr_ai/types/extract_output_response.py +19 -0
  26. chunkr_ai/types/file_create_params.py +2 -1
  27. chunkr_ai/types/file_info.py +21 -0
  28. chunkr_ai/types/generation_config.py +29 -0
  29. chunkr_ai/types/generation_config_param.py +29 -0
  30. chunkr_ai/types/llm_processing.py +36 -0
  31. chunkr_ai/types/llm_processing_param.py +36 -0
  32. chunkr_ai/types/ocr_result.py +28 -0
  33. chunkr_ai/types/page.py +27 -0
  34. chunkr_ai/types/parse_configuration.py +64 -0
  35. chunkr_ai/types/parse_configuration_param.py +65 -0
  36. chunkr_ai/types/parse_output_response.py +29 -0
  37. chunkr_ai/types/segment.py +109 -0
  38. chunkr_ai/types/segment_processing.py +228 -0
  39. chunkr_ai/types/segment_processing_param.py +229 -0
  40. chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
  41. chunkr_ai/types/task_list_params.py +7 -1
  42. chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
  43. chunkr_ai/types/task_response.py +68 -0
  44. chunkr_ai/types/tasks/__init__.py +7 -1
  45. chunkr_ai/types/tasks/extract_create_params.py +47 -0
  46. chunkr_ai/types/tasks/extract_create_response.py +214 -0
  47. chunkr_ai/types/tasks/extract_get_params.py +21 -0
  48. chunkr_ai/types/tasks/extract_get_response.py +214 -0
  49. chunkr_ai/types/tasks/parse_create_params.py +25 -793
  50. chunkr_ai/types/tasks/parse_create_response.py +55 -0
  51. chunkr_ai/types/tasks/parse_get_params.py +21 -0
  52. chunkr_ai/types/tasks/parse_get_response.py +55 -0
  53. chunkr_ai/types/unwrap_webhook_event.py +11 -0
  54. chunkr_ai/types/version_info.py +31 -0
  55. chunkr_ai/types/webhook_url_response.py +9 -0
  56. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/METADATA +14 -13
  57. chunkr_ai-0.1.0a7.dist-info/RECORD +86 -0
  58. chunkr_ai/types/task.py +0 -1225
  59. chunkr_ai/types/tasks/parse_update_params.py +0 -845
  60. chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
  61. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/WHEEL +0 -0
  62. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Union, Optional
5
+ from typing import List, Union, Optional
6
6
  from datetime import datetime
7
7
  from typing_extensions import Literal
8
8
 
@@ -17,6 +17,14 @@ from .parse import (
17
17
  AsyncParseResourceWithStreamingResponse,
18
18
  )
19
19
  from ...types import task_get_params, task_list_params
20
+ from .extract import (
21
+ ExtractResource,
22
+ AsyncExtractResource,
23
+ ExtractResourceWithRawResponse,
24
+ AsyncExtractResourceWithRawResponse,
25
+ ExtractResourceWithStreamingResponse,
26
+ AsyncExtractResourceWithStreamingResponse,
27
+ )
20
28
  from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
21
29
  from ..._utils import maybe_transform, async_maybe_transform
22
30
  from ..._compat import cached_property
@@ -28,13 +36,17 @@ from ..._response import (
28
36
  async_to_streamed_response_wrapper,
29
37
  )
30
38
  from ...pagination import SyncTasksPage, AsyncTasksPage
31
- from ...types.task import Task
32
39
  from ..._base_client import AsyncPaginator, make_request_options
40
+ from ...types.task_response import TaskResponse
33
41
 
34
42
  __all__ = ["TasksResource", "AsyncTasksResource"]
35
43
 
36
44
 
37
45
  class TasksResource(SyncAPIResource):
46
+ @cached_property
47
+ def extract(self) -> ExtractResource:
48
+ return ExtractResource(self._client)
49
+
38
50
  @cached_property
39
51
  def parse(self) -> ParseResource:
40
52
  return ParseResource(self._client)
@@ -68,13 +80,15 @@ class TasksResource(SyncAPIResource):
68
80
  limit: int | NotGiven = NOT_GIVEN,
69
81
  sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
70
82
  start: Union[str, datetime] | NotGiven = NOT_GIVEN,
83
+ statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
84
+ task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
71
85
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
72
86
  # The extra values given here take precedence over values defined on the client or passed to this method.
73
87
  extra_headers: Headers | None = None,
74
88
  extra_query: Query | None = None,
75
89
  extra_body: Body | None = None,
76
90
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
77
- ) -> SyncTasksPage[Task]:
91
+ ) -> SyncTasksPage[TaskResponse]:
78
92
  """
79
93
  Lists tasks for the authenticated user with cursor-based pagination and optional
80
94
  filtering by date range. Supports ascending or descending sort order and
@@ -96,6 +110,10 @@ class TasksResource(SyncAPIResource):
96
110
 
97
111
  start: Start date
98
112
 
113
+ statuses: Filter by one or more statuses
114
+
115
+ task_types: Filter by one or more task types
116
+
99
117
  extra_headers: Send extra headers
100
118
 
101
119
  extra_query: Add additional query parameters to the request
@@ -106,7 +124,7 @@ class TasksResource(SyncAPIResource):
106
124
  """
107
125
  return self._get_api_list(
108
126
  "/tasks",
109
- page=SyncTasksPage[Task],
127
+ page=SyncTasksPage[TaskResponse],
110
128
  options=make_request_options(
111
129
  extra_headers=extra_headers,
112
130
  extra_query=extra_query,
@@ -121,11 +139,13 @@ class TasksResource(SyncAPIResource):
121
139
  "limit": limit,
122
140
  "sort": sort,
123
141
  "start": start,
142
+ "statuses": statuses,
143
+ "task_types": task_types,
124
144
  },
125
145
  task_list_params.TaskListParams,
126
146
  ),
127
147
  ),
128
- model=Task,
148
+ model=TaskResponse,
129
149
  )
130
150
 
131
151
  def delete(
@@ -227,9 +247,9 @@ class TasksResource(SyncAPIResource):
227
247
  extra_query: Query | None = None,
228
248
  extra_body: Body | None = None,
229
249
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
230
- ) -> Task:
250
+ ) -> TaskResponse:
231
251
  """
232
- Retrieves the current state of a task and, when requested, waits briefly for
252
+ Retrieves the current state of a task and, when requested, can wait for
233
253
  completion.
234
254
 
235
255
  Returns task details such as processing status, configuration, output (when
@@ -278,11 +298,15 @@ class TasksResource(SyncAPIResource):
278
298
  task_get_params.TaskGetParams,
279
299
  ),
280
300
  ),
281
- cast_to=Task,
301
+ cast_to=TaskResponse,
282
302
  )
283
303
 
284
304
 
285
305
  class AsyncTasksResource(AsyncAPIResource):
306
+ @cached_property
307
+ def extract(self) -> AsyncExtractResource:
308
+ return AsyncExtractResource(self._client)
309
+
286
310
  @cached_property
287
311
  def parse(self) -> AsyncParseResource:
288
312
  return AsyncParseResource(self._client)
@@ -316,13 +340,15 @@ class AsyncTasksResource(AsyncAPIResource):
316
340
  limit: int | NotGiven = NOT_GIVEN,
317
341
  sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
318
342
  start: Union[str, datetime] | NotGiven = NOT_GIVEN,
343
+ statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
344
+ task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
319
345
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
320
346
  # The extra values given here take precedence over values defined on the client or passed to this method.
321
347
  extra_headers: Headers | None = None,
322
348
  extra_query: Query | None = None,
323
349
  extra_body: Body | None = None,
324
350
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
325
- ) -> AsyncPaginator[Task, AsyncTasksPage[Task]]:
351
+ ) -> AsyncPaginator[TaskResponse, AsyncTasksPage[TaskResponse]]:
326
352
  """
327
353
  Lists tasks for the authenticated user with cursor-based pagination and optional
328
354
  filtering by date range. Supports ascending or descending sort order and
@@ -344,6 +370,10 @@ class AsyncTasksResource(AsyncAPIResource):
344
370
 
345
371
  start: Start date
346
372
 
373
+ statuses: Filter by one or more statuses
374
+
375
+ task_types: Filter by one or more task types
376
+
347
377
  extra_headers: Send extra headers
348
378
 
349
379
  extra_query: Add additional query parameters to the request
@@ -354,7 +384,7 @@ class AsyncTasksResource(AsyncAPIResource):
354
384
  """
355
385
  return self._get_api_list(
356
386
  "/tasks",
357
- page=AsyncTasksPage[Task],
387
+ page=AsyncTasksPage[TaskResponse],
358
388
  options=make_request_options(
359
389
  extra_headers=extra_headers,
360
390
  extra_query=extra_query,
@@ -369,11 +399,13 @@ class AsyncTasksResource(AsyncAPIResource):
369
399
  "limit": limit,
370
400
  "sort": sort,
371
401
  "start": start,
402
+ "statuses": statuses,
403
+ "task_types": task_types,
372
404
  },
373
405
  task_list_params.TaskListParams,
374
406
  ),
375
407
  ),
376
- model=Task,
408
+ model=TaskResponse,
377
409
  )
378
410
 
379
411
  async def delete(
@@ -475,9 +507,9 @@ class AsyncTasksResource(AsyncAPIResource):
475
507
  extra_query: Query | None = None,
476
508
  extra_body: Body | None = None,
477
509
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
478
- ) -> Task:
510
+ ) -> TaskResponse:
479
511
  """
480
- Retrieves the current state of a task and, when requested, waits briefly for
512
+ Retrieves the current state of a task and, when requested, can wait for
481
513
  completion.
482
514
 
483
515
  Returns task details such as processing status, configuration, output (when
@@ -526,7 +558,7 @@ class AsyncTasksResource(AsyncAPIResource):
526
558
  task_get_params.TaskGetParams,
527
559
  ),
528
560
  ),
529
- cast_to=Task,
561
+ cast_to=TaskResponse,
530
562
  )
531
563
 
532
564
 
@@ -547,6 +579,10 @@ class TasksResourceWithRawResponse:
547
579
  tasks.get,
548
580
  )
549
581
 
582
+ @cached_property
583
+ def extract(self) -> ExtractResourceWithRawResponse:
584
+ return ExtractResourceWithRawResponse(self._tasks.extract)
585
+
550
586
  @cached_property
551
587
  def parse(self) -> ParseResourceWithRawResponse:
552
588
  return ParseResourceWithRawResponse(self._tasks.parse)
@@ -569,6 +605,10 @@ class AsyncTasksResourceWithRawResponse:
569
605
  tasks.get,
570
606
  )
571
607
 
608
+ @cached_property
609
+ def extract(self) -> AsyncExtractResourceWithRawResponse:
610
+ return AsyncExtractResourceWithRawResponse(self._tasks.extract)
611
+
572
612
  @cached_property
573
613
  def parse(self) -> AsyncParseResourceWithRawResponse:
574
614
  return AsyncParseResourceWithRawResponse(self._tasks.parse)
@@ -591,6 +631,10 @@ class TasksResourceWithStreamingResponse:
591
631
  tasks.get,
592
632
  )
593
633
 
634
+ @cached_property
635
+ def extract(self) -> ExtractResourceWithStreamingResponse:
636
+ return ExtractResourceWithStreamingResponse(self._tasks.extract)
637
+
594
638
  @cached_property
595
639
  def parse(self) -> ParseResourceWithStreamingResponse:
596
640
  return ParseResourceWithStreamingResponse(self._tasks.parse)
@@ -613,6 +657,10 @@ class AsyncTasksResourceWithStreamingResponse:
613
657
  tasks.get,
614
658
  )
615
659
 
660
+ @cached_property
661
+ def extract(self) -> AsyncExtractResourceWithStreamingResponse:
662
+ return AsyncExtractResourceWithStreamingResponse(self._tasks.extract)
663
+
616
664
  @cached_property
617
665
  def parse(self) -> AsyncParseResourceWithStreamingResponse:
618
666
  return AsyncParseResourceWithStreamingResponse(self._tasks.parse)
@@ -0,0 +1,193 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Mapping, cast
7
+
8
+ import httpx
9
+
10
+ from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
11
+ from .._compat import cached_property
12
+ from .._models import construct_type
13
+ from .._resource import SyncAPIResource, AsyncAPIResource
14
+ from .._response import (
15
+ to_raw_response_wrapper,
16
+ to_streamed_response_wrapper,
17
+ async_to_raw_response_wrapper,
18
+ async_to_streamed_response_wrapper,
19
+ )
20
+ from .._exceptions import ChunkrError
21
+ from .._base_client import make_request_options
22
+ from ..types.unwrap_webhook_event import UnwrapWebhookEvent
23
+ from ..types.webhook_url_response import WebhookURLResponse
24
+
25
+ __all__ = ["WebhooksResource", "AsyncWebhooksResource"]
26
+
27
+
28
+ class WebhooksResource(SyncAPIResource):
29
+ @cached_property
30
+ def with_raw_response(self) -> WebhooksResourceWithRawResponse:
31
+ """
32
+ This property can be used as a prefix for any HTTP method call to return
33
+ the raw response object instead of the parsed content.
34
+
35
+ For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#accessing-raw-response-data-eg-headers
36
+ """
37
+ return WebhooksResourceWithRawResponse(self)
38
+
39
+ @cached_property
40
+ def with_streaming_response(self) -> WebhooksResourceWithStreamingResponse:
41
+ """
42
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
43
+
44
+ For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#with_streaming_response
45
+ """
46
+ return WebhooksResourceWithStreamingResponse(self)
47
+
48
+ def unwrap(self, payload: str, *, headers: Mapping[str, str], key: str | bytes | None = None) -> UnwrapWebhookEvent:
49
+ try:
50
+ from standardwebhooks import Webhook
51
+ except ImportError as exc:
52
+ raise ChunkrError("You need to install `chunkr-ai[webhooks]` to use this method") from exc
53
+
54
+ if key is None:
55
+ key = self._client.webhook_key
56
+ if key is None:
57
+ raise ValueError(
58
+ "Cannot verify a webhook without a key on either the client's webhook_key or passed in as an argument"
59
+ )
60
+
61
+ if not isinstance(headers, dict):
62
+ headers = dict(headers)
63
+
64
+ Webhook(key).verify(payload, headers)
65
+
66
+ return cast(
67
+ UnwrapWebhookEvent,
68
+ construct_type(
69
+ type_=UnwrapWebhookEvent,
70
+ value=json.loads(payload),
71
+ ),
72
+ )
73
+
74
+ def url(
75
+ self,
76
+ *,
77
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
78
+ # The extra values given here take precedence over values defined on the client or passed to this method.
79
+ extra_headers: Headers | None = None,
80
+ extra_query: Query | None = None,
81
+ extra_body: Body | None = None,
82
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
83
+ ) -> WebhookURLResponse:
84
+ """Get or create webhook for user and return dashboard URL"""
85
+ return self._get(
86
+ "/webhook/url",
87
+ options=make_request_options(
88
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
89
+ ),
90
+ cast_to=WebhookURLResponse,
91
+ )
92
+
93
+
94
+ class AsyncWebhooksResource(AsyncAPIResource):
95
+ @cached_property
96
+ def with_raw_response(self) -> AsyncWebhooksResourceWithRawResponse:
97
+ """
98
+ This property can be used as a prefix for any HTTP method call to return
99
+ the raw response object instead of the parsed content.
100
+
101
+ For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#accessing-raw-response-data-eg-headers
102
+ """
103
+ return AsyncWebhooksResourceWithRawResponse(self)
104
+
105
+ @cached_property
106
+ def with_streaming_response(self) -> AsyncWebhooksResourceWithStreamingResponse:
107
+ """
108
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
109
+
110
+ For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#with_streaming_response
111
+ """
112
+ return AsyncWebhooksResourceWithStreamingResponse(self)
113
+
114
+ def unwrap(self, payload: str, *, headers: Mapping[str, str], key: str | bytes | None = None) -> UnwrapWebhookEvent:
115
+ try:
116
+ from standardwebhooks import Webhook
117
+ except ImportError as exc:
118
+ raise ChunkrError("You need to install `chunkr-ai[webhooks]` to use this method") from exc
119
+
120
+ if key is None:
121
+ key = self._client.webhook_key
122
+ if key is None:
123
+ raise ValueError(
124
+ "Cannot verify a webhook without a key on either the client's webhook_key or passed in as an argument"
125
+ )
126
+
127
+ if not isinstance(headers, dict):
128
+ headers = dict(headers)
129
+
130
+ Webhook(key).verify(payload, headers)
131
+
132
+ return cast(
133
+ UnwrapWebhookEvent,
134
+ construct_type(
135
+ type_=UnwrapWebhookEvent,
136
+ value=json.loads(payload),
137
+ ),
138
+ )
139
+
140
+ async def url(
141
+ self,
142
+ *,
143
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
144
+ # The extra values given here take precedence over values defined on the client or passed to this method.
145
+ extra_headers: Headers | None = None,
146
+ extra_query: Query | None = None,
147
+ extra_body: Body | None = None,
148
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
149
+ ) -> WebhookURLResponse:
150
+ """Get or create webhook for user and return dashboard URL"""
151
+ return await self._get(
152
+ "/webhook/url",
153
+ options=make_request_options(
154
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
155
+ ),
156
+ cast_to=WebhookURLResponse,
157
+ )
158
+
159
+
160
+ class WebhooksResourceWithRawResponse:
161
+ def __init__(self, webhooks: WebhooksResource) -> None:
162
+ self._webhooks = webhooks
163
+
164
+ self.url = to_raw_response_wrapper(
165
+ webhooks.url,
166
+ )
167
+
168
+
169
+ class AsyncWebhooksResourceWithRawResponse:
170
+ def __init__(self, webhooks: AsyncWebhooksResource) -> None:
171
+ self._webhooks = webhooks
172
+
173
+ self.url = async_to_raw_response_wrapper(
174
+ webhooks.url,
175
+ )
176
+
177
+
178
+ class WebhooksResourceWithStreamingResponse:
179
+ def __init__(self, webhooks: WebhooksResource) -> None:
180
+ self._webhooks = webhooks
181
+
182
+ self.url = to_streamed_response_wrapper(
183
+ webhooks.url,
184
+ )
185
+
186
+
187
+ class AsyncWebhooksResourceWithStreamingResponse:
188
+ def __init__(self, webhooks: AsyncWebhooksResource) -> None:
189
+ self._webhooks = webhooks
190
+
191
+ self.url = async_to_streamed_response_wrapper(
192
+ webhooks.url,
193
+ )
@@ -2,14 +2,40 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from .cell import Cell as Cell
5
6
  from .file import File as File
6
- from .task import Task as Task
7
+ from .page import Page as Page
8
+ from .chunk import Chunk as Chunk
7
9
  from .delete import Delete as Delete
10
+ from .segment import Segment as Segment
8
11
  from .file_url import FileURL as FileURL
12
+ from .file_info import FileInfo as FileInfo
13
+ from .cell_style import CellStyle as CellStyle
14
+ from .ocr_result import OcrResult as OcrResult
15
+ from .bounding_box import BoundingBox as BoundingBox
16
+ from .version_info import VersionInfo as VersionInfo
17
+ from .task_response import TaskResponse as TaskResponse
18
+ from .llm_processing import LlmProcessing as LlmProcessing
9
19
  from .file_url_params import FileURLParams as FileURLParams
10
20
  from .task_get_params import TaskGetParams as TaskGetParams
21
+ from .chunk_processing import ChunkProcessing as ChunkProcessing
11
22
  from .file_list_params import FileListParams as FileListParams
12
23
  from .task_list_params import TaskListParams as TaskListParams
24
+ from .generation_config import GenerationConfig as GenerationConfig
13
25
  from .file_create_params import FileCreateParams as FileCreateParams
26
+ from .segment_processing import SegmentProcessing as SegmentProcessing
14
27
  from .files_list_response import FilesListResponse as FilesListResponse
28
+ from .parse_configuration import ParseConfiguration as ParseConfiguration
29
+ from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
30
+ from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
31
+ from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
32
+ from .extract_configuration import ExtractConfiguration as ExtractConfiguration
15
33
  from .health_check_response import HealthCheckResponse as HealthCheckResponse
34
+ from .parse_output_response import ParseOutputResponse as ParseOutputResponse
35
+ from .chunk_processing_param import ChunkProcessingParam as ChunkProcessingParam
36
+ from .extract_output_response import ExtractOutputResponse as ExtractOutputResponse
37
+ from .generation_config_param import GenerationConfigParam as GenerationConfigParam
38
+ from .segment_processing_param import SegmentProcessingParam as SegmentProcessingParam
39
+ from .parse_configuration_param import ParseConfigurationParam as ParseConfigurationParam
40
+ from .task_parse_updated_webhook_event import TaskParseUpdatedWebhookEvent as TaskParseUpdatedWebhookEvent
41
+ from .task_extract_updated_webhook_event import TaskExtractUpdatedWebhookEvent as TaskExtractUpdatedWebhookEvent
@@ -0,0 +1,19 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from .._models import BaseModel
4
+
5
+ __all__ = ["BoundingBox"]
6
+
7
+
8
+ class BoundingBox(BaseModel):
9
+ height: float
10
+ """The height of the bounding box."""
11
+
12
+ left: float
13
+ """The left coordinate of the bounding box."""
14
+
15
+ top: float
16
+ """The top coordinate of the bounding box."""
17
+
18
+ width: float
19
+ """The width of the bounding box."""
@@ -0,0 +1,39 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Optional
4
+
5
+ from .._models import BaseModel
6
+ from .cell_style import CellStyle
7
+
8
+ __all__ = ["Cell"]
9
+
10
+
11
+ class Cell(BaseModel):
12
+ cell_id: str
13
+ """The cell ID."""
14
+
15
+ range: str
16
+ """Range of the cell."""
17
+
18
+ text: str
19
+ """Text content of the cell."""
20
+
21
+ formula: Optional[str] = None
22
+ """Formula of the cell."""
23
+
24
+ hyperlink: Optional[str] = None
25
+ """Hyperlink URL if the cell contains a link (e.g., "https://www.chunkr.ai")."""
26
+
27
+ style: Optional[CellStyle] = None
28
+ """Styling information for the cell including colors, fonts, and formatting."""
29
+
30
+ value: Optional[str] = None
31
+ """The computed/evaluated value of the cell.
32
+
33
+ This represents the actual result after evaluating any formulas, as opposed to
34
+ the raw text content. For cells with formulas, this is the calculated result;
35
+ for cells with static content, this is typically the same as the text field.
36
+
37
+ Example: text might show "3.14" (formatted to 2 decimal places) while value
38
+ could be "3.141592653589793" (full precision).
39
+ """
@@ -0,0 +1,28 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["CellStyle"]
9
+
10
+
11
+ class CellStyle(BaseModel):
12
+ align: Optional[Literal["Left", "Center", "Right", "Justify"]] = None
13
+ """Alignment of the cell content."""
14
+
15
+ bg_color: Optional[str] = None
16
+ """Background color of the cell (e.g., "#FFFFFF" or "#DAE3F3")."""
17
+
18
+ font_face: Optional[str] = None
19
+ """Font face/family of the cell (e.g., "Arial", "Daytona")."""
20
+
21
+ is_bold: Optional[bool] = None
22
+ """Whether the cell content is bold."""
23
+
24
+ text_color: Optional[str] = None
25
+ """Text color of the cell (e.g., "#000000" or "red")."""
26
+
27
+ valign: Optional[Literal["Top", "Middle", "Bottom", "Baseline"]] = None
28
+ """Vertical alignment of the cell content."""
@@ -0,0 +1,40 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+
5
+ from .segment import Segment
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["Chunk"]
9
+
10
+
11
+ class Chunk(BaseModel):
12
+ chunk_length: int
13
+ """The total number of tokens in the `embed` field of the chunk.
14
+
15
+ Calculated by the `tokenizer`.
16
+ """
17
+
18
+ segments: List[Segment]
19
+ """
20
+ Collection of document segments that form this chunk. When
21
+ `target_chunk_length` > 0, contains the maximum number of segments that fit
22
+ within that length (segments remain intact). Otherwise, contains exactly one
23
+ segment.
24
+ """
25
+
26
+ chunk_id: Optional[str] = None
27
+ """The unique identifier for the chunk."""
28
+
29
+ content: Optional[str] = None
30
+ """The content of the chunk.
31
+
32
+ This is the text that is generated by combining the `content` field from each
33
+ segment. Can be used provided as context to the LLM.
34
+ """
35
+
36
+ embed: Optional[str] = None
37
+ """Suggested text to be embedded for the chunk.
38
+
39
+ This text is generated by combining the `embed` field from each segment.
40
+ """
@@ -0,0 +1,40 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Union, Optional
4
+ from typing_extensions import Literal, TypeAlias
5
+
6
+ from pydantic import Field as FieldInfo
7
+
8
+ from .._models import BaseModel
9
+
10
+ __all__ = ["ChunkProcessing", "Tokenizer", "TokenizerEnum", "TokenizerString"]
11
+
12
+
13
+ class TokenizerEnum(BaseModel):
14
+ enum: Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"] = FieldInfo(alias="Enum")
15
+ """Use one of the predefined tokenizer types"""
16
+
17
+
18
+ class TokenizerString(BaseModel):
19
+ string: str = FieldInfo(alias="String")
20
+ """
21
+ Use any Hugging Face tokenizer by specifying its model ID Examples:
22
+ "Qwen/Qwen-tokenizer", "facebook/bart-large"
23
+ """
24
+
25
+
26
+ Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
27
+
28
+
29
+ class ChunkProcessing(BaseModel):
30
+ ignore_headers_and_footers: Optional[bool] = None
31
+ """DEPRECATED: use `segment_processing.ignore` instead"""
32
+
33
+ target_length: Optional[int] = None
34
+ """The target number of words in each chunk.
35
+
36
+ If 0, each chunk will contain a single segment.
37
+ """
38
+
39
+ tokenizer: Optional[Tokenizer] = None
40
+ """The tokenizer to use for the chunking process."""
@@ -0,0 +1,42 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Union, Optional
6
+ from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
7
+
8
+ from .._utils import PropertyInfo
9
+
10
+ __all__ = ["ChunkProcessingParam", "Tokenizer", "TokenizerEnum", "TokenizerString"]
11
+
12
+
13
+ class TokenizerEnum(TypedDict, total=False):
14
+ enum: Required[
15
+ Annotated[Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"], PropertyInfo(alias="Enum")]
16
+ ]
17
+ """Use one of the predefined tokenizer types"""
18
+
19
+
20
+ class TokenizerString(TypedDict, total=False):
21
+ string: Required[Annotated[str, PropertyInfo(alias="String")]]
22
+ """
23
+ Use any Hugging Face tokenizer by specifying its model ID Examples:
24
+ "Qwen/Qwen-tokenizer", "facebook/bart-large"
25
+ """
26
+
27
+
28
+ Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
29
+
30
+
31
+ class ChunkProcessingParam(TypedDict, total=False):
32
+ ignore_headers_and_footers: Optional[bool]
33
+ """DEPRECATED: use `segment_processing.ignore` instead"""
34
+
35
+ target_length: int
36
+ """The target number of words in each chunk.
37
+
38
+ If 0, each chunk will contain a single segment.
39
+ """
40
+
41
+ tokenizer: Tokenizer
42
+ """The tokenizer to use for the chunking process."""