chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/__init__.py +2 -0
- chunkr_ai/_client.py +31 -3
- chunkr_ai/_constants.py +5 -5
- chunkr_ai/_exceptions.py +4 -0
- chunkr_ai/_models.py +1 -1
- chunkr_ai/_types.py +35 -1
- chunkr_ai/_utils/__init__.py +1 -0
- chunkr_ai/_utils/_typing.py +5 -0
- chunkr_ai/_version.py +1 -1
- chunkr_ai/resources/__init__.py +14 -0
- chunkr_ai/resources/files.py +3 -3
- chunkr_ai/resources/tasks/__init__.py +14 -0
- chunkr_ai/resources/tasks/extract.py +409 -0
- chunkr_ai/resources/tasks/parse.py +124 -284
- chunkr_ai/resources/tasks/tasks.py +62 -14
- chunkr_ai/resources/webhooks.py +193 -0
- chunkr_ai/types/__init__.py +27 -1
- chunkr_ai/types/bounding_box.py +19 -0
- chunkr_ai/types/cell.py +39 -0
- chunkr_ai/types/cell_style.py +28 -0
- chunkr_ai/types/chunk.py +40 -0
- chunkr_ai/types/chunk_processing.py +40 -0
- chunkr_ai/types/chunk_processing_param.py +42 -0
- chunkr_ai/types/extract_configuration.py +24 -0
- chunkr_ai/types/extract_output_response.py +19 -0
- chunkr_ai/types/file_create_params.py +2 -1
- chunkr_ai/types/file_info.py +21 -0
- chunkr_ai/types/generation_config.py +29 -0
- chunkr_ai/types/generation_config_param.py +29 -0
- chunkr_ai/types/llm_processing.py +36 -0
- chunkr_ai/types/llm_processing_param.py +36 -0
- chunkr_ai/types/ocr_result.py +28 -0
- chunkr_ai/types/page.py +27 -0
- chunkr_ai/types/parse_configuration.py +64 -0
- chunkr_ai/types/parse_configuration_param.py +65 -0
- chunkr_ai/types/parse_output_response.py +29 -0
- chunkr_ai/types/segment.py +109 -0
- chunkr_ai/types/segment_processing.py +228 -0
- chunkr_ai/types/segment_processing_param.py +229 -0
- chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
- chunkr_ai/types/task_list_params.py +7 -1
- chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
- chunkr_ai/types/task_response.py +68 -0
- chunkr_ai/types/tasks/__init__.py +7 -1
- chunkr_ai/types/tasks/extract_create_params.py +47 -0
- chunkr_ai/types/tasks/extract_create_response.py +214 -0
- chunkr_ai/types/tasks/extract_get_params.py +21 -0
- chunkr_ai/types/tasks/extract_get_response.py +214 -0
- chunkr_ai/types/tasks/parse_create_params.py +25 -793
- chunkr_ai/types/tasks/parse_create_response.py +55 -0
- chunkr_ai/types/tasks/parse_get_params.py +21 -0
- chunkr_ai/types/tasks/parse_get_response.py +55 -0
- chunkr_ai/types/unwrap_webhook_event.py +11 -0
- chunkr_ai/types/version_info.py +31 -0
- chunkr_ai/types/webhook_url_response.py +9 -0
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/METADATA +14 -13
- chunkr_ai-0.1.0a7.dist-info/RECORD +86 -0
- chunkr_ai/types/task.py +0 -1225
- chunkr_ai/types/tasks/parse_update_params.py +0 -845
- chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
-
from typing import Union, Optional
|
5
|
+
from typing import List, Union, Optional
|
6
6
|
from datetime import datetime
|
7
7
|
from typing_extensions import Literal
|
8
8
|
|
@@ -17,6 +17,14 @@ from .parse import (
|
|
17
17
|
AsyncParseResourceWithStreamingResponse,
|
18
18
|
)
|
19
19
|
from ...types import task_get_params, task_list_params
|
20
|
+
from .extract import (
|
21
|
+
ExtractResource,
|
22
|
+
AsyncExtractResource,
|
23
|
+
ExtractResourceWithRawResponse,
|
24
|
+
AsyncExtractResourceWithRawResponse,
|
25
|
+
ExtractResourceWithStreamingResponse,
|
26
|
+
AsyncExtractResourceWithStreamingResponse,
|
27
|
+
)
|
20
28
|
from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
|
21
29
|
from ..._utils import maybe_transform, async_maybe_transform
|
22
30
|
from ..._compat import cached_property
|
@@ -28,13 +36,17 @@ from ..._response import (
|
|
28
36
|
async_to_streamed_response_wrapper,
|
29
37
|
)
|
30
38
|
from ...pagination import SyncTasksPage, AsyncTasksPage
|
31
|
-
from ...types.task import Task
|
32
39
|
from ..._base_client import AsyncPaginator, make_request_options
|
40
|
+
from ...types.task_response import TaskResponse
|
33
41
|
|
34
42
|
__all__ = ["TasksResource", "AsyncTasksResource"]
|
35
43
|
|
36
44
|
|
37
45
|
class TasksResource(SyncAPIResource):
|
46
|
+
@cached_property
|
47
|
+
def extract(self) -> ExtractResource:
|
48
|
+
return ExtractResource(self._client)
|
49
|
+
|
38
50
|
@cached_property
|
39
51
|
def parse(self) -> ParseResource:
|
40
52
|
return ParseResource(self._client)
|
@@ -68,13 +80,15 @@ class TasksResource(SyncAPIResource):
|
|
68
80
|
limit: int | NotGiven = NOT_GIVEN,
|
69
81
|
sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
70
82
|
start: Union[str, datetime] | NotGiven = NOT_GIVEN,
|
83
|
+
statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
|
84
|
+
task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
|
71
85
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
72
86
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
73
87
|
extra_headers: Headers | None = None,
|
74
88
|
extra_query: Query | None = None,
|
75
89
|
extra_body: Body | None = None,
|
76
90
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
77
|
-
) -> SyncTasksPage[
|
91
|
+
) -> SyncTasksPage[TaskResponse]:
|
78
92
|
"""
|
79
93
|
Lists tasks for the authenticated user with cursor-based pagination and optional
|
80
94
|
filtering by date range. Supports ascending or descending sort order and
|
@@ -96,6 +110,10 @@ class TasksResource(SyncAPIResource):
|
|
96
110
|
|
97
111
|
start: Start date
|
98
112
|
|
113
|
+
statuses: Filter by one or more statuses
|
114
|
+
|
115
|
+
task_types: Filter by one or more task types
|
116
|
+
|
99
117
|
extra_headers: Send extra headers
|
100
118
|
|
101
119
|
extra_query: Add additional query parameters to the request
|
@@ -106,7 +124,7 @@ class TasksResource(SyncAPIResource):
|
|
106
124
|
"""
|
107
125
|
return self._get_api_list(
|
108
126
|
"/tasks",
|
109
|
-
page=SyncTasksPage[
|
127
|
+
page=SyncTasksPage[TaskResponse],
|
110
128
|
options=make_request_options(
|
111
129
|
extra_headers=extra_headers,
|
112
130
|
extra_query=extra_query,
|
@@ -121,11 +139,13 @@ class TasksResource(SyncAPIResource):
|
|
121
139
|
"limit": limit,
|
122
140
|
"sort": sort,
|
123
141
|
"start": start,
|
142
|
+
"statuses": statuses,
|
143
|
+
"task_types": task_types,
|
124
144
|
},
|
125
145
|
task_list_params.TaskListParams,
|
126
146
|
),
|
127
147
|
),
|
128
|
-
model=
|
148
|
+
model=TaskResponse,
|
129
149
|
)
|
130
150
|
|
131
151
|
def delete(
|
@@ -227,9 +247,9 @@ class TasksResource(SyncAPIResource):
|
|
227
247
|
extra_query: Query | None = None,
|
228
248
|
extra_body: Body | None = None,
|
229
249
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
230
|
-
) ->
|
250
|
+
) -> TaskResponse:
|
231
251
|
"""
|
232
|
-
Retrieves the current state of a task and, when requested,
|
252
|
+
Retrieves the current state of a task and, when requested, can wait for
|
233
253
|
completion.
|
234
254
|
|
235
255
|
Returns task details such as processing status, configuration, output (when
|
@@ -278,11 +298,15 @@ class TasksResource(SyncAPIResource):
|
|
278
298
|
task_get_params.TaskGetParams,
|
279
299
|
),
|
280
300
|
),
|
281
|
-
cast_to=
|
301
|
+
cast_to=TaskResponse,
|
282
302
|
)
|
283
303
|
|
284
304
|
|
285
305
|
class AsyncTasksResource(AsyncAPIResource):
|
306
|
+
@cached_property
|
307
|
+
def extract(self) -> AsyncExtractResource:
|
308
|
+
return AsyncExtractResource(self._client)
|
309
|
+
|
286
310
|
@cached_property
|
287
311
|
def parse(self) -> AsyncParseResource:
|
288
312
|
return AsyncParseResource(self._client)
|
@@ -316,13 +340,15 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
316
340
|
limit: int | NotGiven = NOT_GIVEN,
|
317
341
|
sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
318
342
|
start: Union[str, datetime] | NotGiven = NOT_GIVEN,
|
343
|
+
statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
|
344
|
+
task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
|
319
345
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
320
346
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
321
347
|
extra_headers: Headers | None = None,
|
322
348
|
extra_query: Query | None = None,
|
323
349
|
extra_body: Body | None = None,
|
324
350
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
325
|
-
) -> AsyncPaginator[
|
351
|
+
) -> AsyncPaginator[TaskResponse, AsyncTasksPage[TaskResponse]]:
|
326
352
|
"""
|
327
353
|
Lists tasks for the authenticated user with cursor-based pagination and optional
|
328
354
|
filtering by date range. Supports ascending or descending sort order and
|
@@ -344,6 +370,10 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
344
370
|
|
345
371
|
start: Start date
|
346
372
|
|
373
|
+
statuses: Filter by one or more statuses
|
374
|
+
|
375
|
+
task_types: Filter by one or more task types
|
376
|
+
|
347
377
|
extra_headers: Send extra headers
|
348
378
|
|
349
379
|
extra_query: Add additional query parameters to the request
|
@@ -354,7 +384,7 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
354
384
|
"""
|
355
385
|
return self._get_api_list(
|
356
386
|
"/tasks",
|
357
|
-
page=AsyncTasksPage[
|
387
|
+
page=AsyncTasksPage[TaskResponse],
|
358
388
|
options=make_request_options(
|
359
389
|
extra_headers=extra_headers,
|
360
390
|
extra_query=extra_query,
|
@@ -369,11 +399,13 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
369
399
|
"limit": limit,
|
370
400
|
"sort": sort,
|
371
401
|
"start": start,
|
402
|
+
"statuses": statuses,
|
403
|
+
"task_types": task_types,
|
372
404
|
},
|
373
405
|
task_list_params.TaskListParams,
|
374
406
|
),
|
375
407
|
),
|
376
|
-
model=
|
408
|
+
model=TaskResponse,
|
377
409
|
)
|
378
410
|
|
379
411
|
async def delete(
|
@@ -475,9 +507,9 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
475
507
|
extra_query: Query | None = None,
|
476
508
|
extra_body: Body | None = None,
|
477
509
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
478
|
-
) ->
|
510
|
+
) -> TaskResponse:
|
479
511
|
"""
|
480
|
-
Retrieves the current state of a task and, when requested,
|
512
|
+
Retrieves the current state of a task and, when requested, can wait for
|
481
513
|
completion.
|
482
514
|
|
483
515
|
Returns task details such as processing status, configuration, output (when
|
@@ -526,7 +558,7 @@ class AsyncTasksResource(AsyncAPIResource):
|
|
526
558
|
task_get_params.TaskGetParams,
|
527
559
|
),
|
528
560
|
),
|
529
|
-
cast_to=
|
561
|
+
cast_to=TaskResponse,
|
530
562
|
)
|
531
563
|
|
532
564
|
|
@@ -547,6 +579,10 @@ class TasksResourceWithRawResponse:
|
|
547
579
|
tasks.get,
|
548
580
|
)
|
549
581
|
|
582
|
+
@cached_property
|
583
|
+
def extract(self) -> ExtractResourceWithRawResponse:
|
584
|
+
return ExtractResourceWithRawResponse(self._tasks.extract)
|
585
|
+
|
550
586
|
@cached_property
|
551
587
|
def parse(self) -> ParseResourceWithRawResponse:
|
552
588
|
return ParseResourceWithRawResponse(self._tasks.parse)
|
@@ -569,6 +605,10 @@ class AsyncTasksResourceWithRawResponse:
|
|
569
605
|
tasks.get,
|
570
606
|
)
|
571
607
|
|
608
|
+
@cached_property
|
609
|
+
def extract(self) -> AsyncExtractResourceWithRawResponse:
|
610
|
+
return AsyncExtractResourceWithRawResponse(self._tasks.extract)
|
611
|
+
|
572
612
|
@cached_property
|
573
613
|
def parse(self) -> AsyncParseResourceWithRawResponse:
|
574
614
|
return AsyncParseResourceWithRawResponse(self._tasks.parse)
|
@@ -591,6 +631,10 @@ class TasksResourceWithStreamingResponse:
|
|
591
631
|
tasks.get,
|
592
632
|
)
|
593
633
|
|
634
|
+
@cached_property
|
635
|
+
def extract(self) -> ExtractResourceWithStreamingResponse:
|
636
|
+
return ExtractResourceWithStreamingResponse(self._tasks.extract)
|
637
|
+
|
594
638
|
@cached_property
|
595
639
|
def parse(self) -> ParseResourceWithStreamingResponse:
|
596
640
|
return ParseResourceWithStreamingResponse(self._tasks.parse)
|
@@ -613,6 +657,10 @@ class AsyncTasksResourceWithStreamingResponse:
|
|
613
657
|
tasks.get,
|
614
658
|
)
|
615
659
|
|
660
|
+
@cached_property
|
661
|
+
def extract(self) -> AsyncExtractResourceWithStreamingResponse:
|
662
|
+
return AsyncExtractResourceWithStreamingResponse(self._tasks.extract)
|
663
|
+
|
616
664
|
@cached_property
|
617
665
|
def parse(self) -> AsyncParseResourceWithStreamingResponse:
|
618
666
|
return AsyncParseResourceWithStreamingResponse(self._tasks.parse)
|
@@ -0,0 +1,193 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import json
|
6
|
+
from typing import Mapping, cast
|
7
|
+
|
8
|
+
import httpx
|
9
|
+
|
10
|
+
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
11
|
+
from .._compat import cached_property
|
12
|
+
from .._models import construct_type
|
13
|
+
from .._resource import SyncAPIResource, AsyncAPIResource
|
14
|
+
from .._response import (
|
15
|
+
to_raw_response_wrapper,
|
16
|
+
to_streamed_response_wrapper,
|
17
|
+
async_to_raw_response_wrapper,
|
18
|
+
async_to_streamed_response_wrapper,
|
19
|
+
)
|
20
|
+
from .._exceptions import ChunkrError
|
21
|
+
from .._base_client import make_request_options
|
22
|
+
from ..types.unwrap_webhook_event import UnwrapWebhookEvent
|
23
|
+
from ..types.webhook_url_response import WebhookURLResponse
|
24
|
+
|
25
|
+
__all__ = ["WebhooksResource", "AsyncWebhooksResource"]
|
26
|
+
|
27
|
+
|
28
|
+
class WebhooksResource(SyncAPIResource):
|
29
|
+
@cached_property
|
30
|
+
def with_raw_response(self) -> WebhooksResourceWithRawResponse:
|
31
|
+
"""
|
32
|
+
This property can be used as a prefix for any HTTP method call to return
|
33
|
+
the raw response object instead of the parsed content.
|
34
|
+
|
35
|
+
For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#accessing-raw-response-data-eg-headers
|
36
|
+
"""
|
37
|
+
return WebhooksResourceWithRawResponse(self)
|
38
|
+
|
39
|
+
@cached_property
|
40
|
+
def with_streaming_response(self) -> WebhooksResourceWithStreamingResponse:
|
41
|
+
"""
|
42
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
43
|
+
|
44
|
+
For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#with_streaming_response
|
45
|
+
"""
|
46
|
+
return WebhooksResourceWithStreamingResponse(self)
|
47
|
+
|
48
|
+
def unwrap(self, payload: str, *, headers: Mapping[str, str], key: str | bytes | None = None) -> UnwrapWebhookEvent:
|
49
|
+
try:
|
50
|
+
from standardwebhooks import Webhook
|
51
|
+
except ImportError as exc:
|
52
|
+
raise ChunkrError("You need to install `chunkr-ai[webhooks]` to use this method") from exc
|
53
|
+
|
54
|
+
if key is None:
|
55
|
+
key = self._client.webhook_key
|
56
|
+
if key is None:
|
57
|
+
raise ValueError(
|
58
|
+
"Cannot verify a webhook without a key on either the client's webhook_key or passed in as an argument"
|
59
|
+
)
|
60
|
+
|
61
|
+
if not isinstance(headers, dict):
|
62
|
+
headers = dict(headers)
|
63
|
+
|
64
|
+
Webhook(key).verify(payload, headers)
|
65
|
+
|
66
|
+
return cast(
|
67
|
+
UnwrapWebhookEvent,
|
68
|
+
construct_type(
|
69
|
+
type_=UnwrapWebhookEvent,
|
70
|
+
value=json.loads(payload),
|
71
|
+
),
|
72
|
+
)
|
73
|
+
|
74
|
+
def url(
|
75
|
+
self,
|
76
|
+
*,
|
77
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
78
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
79
|
+
extra_headers: Headers | None = None,
|
80
|
+
extra_query: Query | None = None,
|
81
|
+
extra_body: Body | None = None,
|
82
|
+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
83
|
+
) -> WebhookURLResponse:
|
84
|
+
"""Get or create webhook for user and return dashboard URL"""
|
85
|
+
return self._get(
|
86
|
+
"/webhook/url",
|
87
|
+
options=make_request_options(
|
88
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
89
|
+
),
|
90
|
+
cast_to=WebhookURLResponse,
|
91
|
+
)
|
92
|
+
|
93
|
+
|
94
|
+
class AsyncWebhooksResource(AsyncAPIResource):
|
95
|
+
@cached_property
|
96
|
+
def with_raw_response(self) -> AsyncWebhooksResourceWithRawResponse:
|
97
|
+
"""
|
98
|
+
This property can be used as a prefix for any HTTP method call to return
|
99
|
+
the raw response object instead of the parsed content.
|
100
|
+
|
101
|
+
For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#accessing-raw-response-data-eg-headers
|
102
|
+
"""
|
103
|
+
return AsyncWebhooksResourceWithRawResponse(self)
|
104
|
+
|
105
|
+
@cached_property
|
106
|
+
def with_streaming_response(self) -> AsyncWebhooksResourceWithStreamingResponse:
|
107
|
+
"""
|
108
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
109
|
+
|
110
|
+
For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#with_streaming_response
|
111
|
+
"""
|
112
|
+
return AsyncWebhooksResourceWithStreamingResponse(self)
|
113
|
+
|
114
|
+
def unwrap(self, payload: str, *, headers: Mapping[str, str], key: str | bytes | None = None) -> UnwrapWebhookEvent:
|
115
|
+
try:
|
116
|
+
from standardwebhooks import Webhook
|
117
|
+
except ImportError as exc:
|
118
|
+
raise ChunkrError("You need to install `chunkr-ai[webhooks]` to use this method") from exc
|
119
|
+
|
120
|
+
if key is None:
|
121
|
+
key = self._client.webhook_key
|
122
|
+
if key is None:
|
123
|
+
raise ValueError(
|
124
|
+
"Cannot verify a webhook without a key on either the client's webhook_key or passed in as an argument"
|
125
|
+
)
|
126
|
+
|
127
|
+
if not isinstance(headers, dict):
|
128
|
+
headers = dict(headers)
|
129
|
+
|
130
|
+
Webhook(key).verify(payload, headers)
|
131
|
+
|
132
|
+
return cast(
|
133
|
+
UnwrapWebhookEvent,
|
134
|
+
construct_type(
|
135
|
+
type_=UnwrapWebhookEvent,
|
136
|
+
value=json.loads(payload),
|
137
|
+
),
|
138
|
+
)
|
139
|
+
|
140
|
+
async def url(
|
141
|
+
self,
|
142
|
+
*,
|
143
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
144
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
145
|
+
extra_headers: Headers | None = None,
|
146
|
+
extra_query: Query | None = None,
|
147
|
+
extra_body: Body | None = None,
|
148
|
+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
149
|
+
) -> WebhookURLResponse:
|
150
|
+
"""Get or create webhook for user and return dashboard URL"""
|
151
|
+
return await self._get(
|
152
|
+
"/webhook/url",
|
153
|
+
options=make_request_options(
|
154
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
155
|
+
),
|
156
|
+
cast_to=WebhookURLResponse,
|
157
|
+
)
|
158
|
+
|
159
|
+
|
160
|
+
class WebhooksResourceWithRawResponse:
|
161
|
+
def __init__(self, webhooks: WebhooksResource) -> None:
|
162
|
+
self._webhooks = webhooks
|
163
|
+
|
164
|
+
self.url = to_raw_response_wrapper(
|
165
|
+
webhooks.url,
|
166
|
+
)
|
167
|
+
|
168
|
+
|
169
|
+
class AsyncWebhooksResourceWithRawResponse:
|
170
|
+
def __init__(self, webhooks: AsyncWebhooksResource) -> None:
|
171
|
+
self._webhooks = webhooks
|
172
|
+
|
173
|
+
self.url = async_to_raw_response_wrapper(
|
174
|
+
webhooks.url,
|
175
|
+
)
|
176
|
+
|
177
|
+
|
178
|
+
class WebhooksResourceWithStreamingResponse:
|
179
|
+
def __init__(self, webhooks: WebhooksResource) -> None:
|
180
|
+
self._webhooks = webhooks
|
181
|
+
|
182
|
+
self.url = to_streamed_response_wrapper(
|
183
|
+
webhooks.url,
|
184
|
+
)
|
185
|
+
|
186
|
+
|
187
|
+
class AsyncWebhooksResourceWithStreamingResponse:
|
188
|
+
def __init__(self, webhooks: AsyncWebhooksResource) -> None:
|
189
|
+
self._webhooks = webhooks
|
190
|
+
|
191
|
+
self.url = async_to_streamed_response_wrapper(
|
192
|
+
webhooks.url,
|
193
|
+
)
|
chunkr_ai/types/__init__.py
CHANGED
@@ -2,14 +2,40 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
+
from .cell import Cell as Cell
|
5
6
|
from .file import File as File
|
6
|
-
from .
|
7
|
+
from .page import Page as Page
|
8
|
+
from .chunk import Chunk as Chunk
|
7
9
|
from .delete import Delete as Delete
|
10
|
+
from .segment import Segment as Segment
|
8
11
|
from .file_url import FileURL as FileURL
|
12
|
+
from .file_info import FileInfo as FileInfo
|
13
|
+
from .cell_style import CellStyle as CellStyle
|
14
|
+
from .ocr_result import OcrResult as OcrResult
|
15
|
+
from .bounding_box import BoundingBox as BoundingBox
|
16
|
+
from .version_info import VersionInfo as VersionInfo
|
17
|
+
from .task_response import TaskResponse as TaskResponse
|
18
|
+
from .llm_processing import LlmProcessing as LlmProcessing
|
9
19
|
from .file_url_params import FileURLParams as FileURLParams
|
10
20
|
from .task_get_params import TaskGetParams as TaskGetParams
|
21
|
+
from .chunk_processing import ChunkProcessing as ChunkProcessing
|
11
22
|
from .file_list_params import FileListParams as FileListParams
|
12
23
|
from .task_list_params import TaskListParams as TaskListParams
|
24
|
+
from .generation_config import GenerationConfig as GenerationConfig
|
13
25
|
from .file_create_params import FileCreateParams as FileCreateParams
|
26
|
+
from .segment_processing import SegmentProcessing as SegmentProcessing
|
14
27
|
from .files_list_response import FilesListResponse as FilesListResponse
|
28
|
+
from .parse_configuration import ParseConfiguration as ParseConfiguration
|
29
|
+
from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
|
30
|
+
from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
|
31
|
+
from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
|
32
|
+
from .extract_configuration import ExtractConfiguration as ExtractConfiguration
|
15
33
|
from .health_check_response import HealthCheckResponse as HealthCheckResponse
|
34
|
+
from .parse_output_response import ParseOutputResponse as ParseOutputResponse
|
35
|
+
from .chunk_processing_param import ChunkProcessingParam as ChunkProcessingParam
|
36
|
+
from .extract_output_response import ExtractOutputResponse as ExtractOutputResponse
|
37
|
+
from .generation_config_param import GenerationConfigParam as GenerationConfigParam
|
38
|
+
from .segment_processing_param import SegmentProcessingParam as SegmentProcessingParam
|
39
|
+
from .parse_configuration_param import ParseConfigurationParam as ParseConfigurationParam
|
40
|
+
from .task_parse_updated_webhook_event import TaskParseUpdatedWebhookEvent as TaskParseUpdatedWebhookEvent
|
41
|
+
from .task_extract_updated_webhook_event import TaskExtractUpdatedWebhookEvent as TaskExtractUpdatedWebhookEvent
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from .._models import BaseModel
|
4
|
+
|
5
|
+
__all__ = ["BoundingBox"]
|
6
|
+
|
7
|
+
|
8
|
+
class BoundingBox(BaseModel):
|
9
|
+
height: float
|
10
|
+
"""The height of the bounding box."""
|
11
|
+
|
12
|
+
left: float
|
13
|
+
"""The left coordinate of the bounding box."""
|
14
|
+
|
15
|
+
top: float
|
16
|
+
"""The top coordinate of the bounding box."""
|
17
|
+
|
18
|
+
width: float
|
19
|
+
"""The width of the bounding box."""
|
chunkr_ai/types/cell.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from .._models import BaseModel
|
6
|
+
from .cell_style import CellStyle
|
7
|
+
|
8
|
+
__all__ = ["Cell"]
|
9
|
+
|
10
|
+
|
11
|
+
class Cell(BaseModel):
|
12
|
+
cell_id: str
|
13
|
+
"""The cell ID."""
|
14
|
+
|
15
|
+
range: str
|
16
|
+
"""Range of the cell."""
|
17
|
+
|
18
|
+
text: str
|
19
|
+
"""Text content of the cell."""
|
20
|
+
|
21
|
+
formula: Optional[str] = None
|
22
|
+
"""Formula of the cell."""
|
23
|
+
|
24
|
+
hyperlink: Optional[str] = None
|
25
|
+
"""Hyperlink URL if the cell contains a link (e.g., "https://www.chunkr.ai")."""
|
26
|
+
|
27
|
+
style: Optional[CellStyle] = None
|
28
|
+
"""Styling information for the cell including colors, fonts, and formatting."""
|
29
|
+
|
30
|
+
value: Optional[str] = None
|
31
|
+
"""The computed/evaluated value of the cell.
|
32
|
+
|
33
|
+
This represents the actual result after evaluating any formulas, as opposed to
|
34
|
+
the raw text content. For cells with formulas, this is the calculated result;
|
35
|
+
for cells with static content, this is typically the same as the text field.
|
36
|
+
|
37
|
+
Example: text might show "3.14" (formatted to 2 decimal places) while value
|
38
|
+
could be "3.141592653589793" (full precision).
|
39
|
+
"""
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
from typing_extensions import Literal
|
5
|
+
|
6
|
+
from .._models import BaseModel
|
7
|
+
|
8
|
+
__all__ = ["CellStyle"]
|
9
|
+
|
10
|
+
|
11
|
+
class CellStyle(BaseModel):
|
12
|
+
align: Optional[Literal["Left", "Center", "Right", "Justify"]] = None
|
13
|
+
"""Alignment of the cell content."""
|
14
|
+
|
15
|
+
bg_color: Optional[str] = None
|
16
|
+
"""Background color of the cell (e.g., "#FFFFFF" or "#DAE3F3")."""
|
17
|
+
|
18
|
+
font_face: Optional[str] = None
|
19
|
+
"""Font face/family of the cell (e.g., "Arial", "Daytona")."""
|
20
|
+
|
21
|
+
is_bold: Optional[bool] = None
|
22
|
+
"""Whether the cell content is bold."""
|
23
|
+
|
24
|
+
text_color: Optional[str] = None
|
25
|
+
"""Text color of the cell (e.g., "#000000" or "red")."""
|
26
|
+
|
27
|
+
valign: Optional[Literal["Top", "Middle", "Bottom", "Baseline"]] = None
|
28
|
+
"""Vertical alignment of the cell content."""
|
chunkr_ai/types/chunk.py
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from typing import List, Optional
|
4
|
+
|
5
|
+
from .segment import Segment
|
6
|
+
from .._models import BaseModel
|
7
|
+
|
8
|
+
__all__ = ["Chunk"]
|
9
|
+
|
10
|
+
|
11
|
+
class Chunk(BaseModel):
|
12
|
+
chunk_length: int
|
13
|
+
"""The total number of tokens in the `embed` field of the chunk.
|
14
|
+
|
15
|
+
Calculated by the `tokenizer`.
|
16
|
+
"""
|
17
|
+
|
18
|
+
segments: List[Segment]
|
19
|
+
"""
|
20
|
+
Collection of document segments that form this chunk. When
|
21
|
+
`target_chunk_length` > 0, contains the maximum number of segments that fit
|
22
|
+
within that length (segments remain intact). Otherwise, contains exactly one
|
23
|
+
segment.
|
24
|
+
"""
|
25
|
+
|
26
|
+
chunk_id: Optional[str] = None
|
27
|
+
"""The unique identifier for the chunk."""
|
28
|
+
|
29
|
+
content: Optional[str] = None
|
30
|
+
"""The content of the chunk.
|
31
|
+
|
32
|
+
This is the text that is generated by combining the `content` field from each
|
33
|
+
segment. Can be used provided as context to the LLM.
|
34
|
+
"""
|
35
|
+
|
36
|
+
embed: Optional[str] = None
|
37
|
+
"""Suggested text to be embedded for the chunk.
|
38
|
+
|
39
|
+
This text is generated by combining the `embed` field from each segment.
|
40
|
+
"""
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from typing import Union, Optional
|
4
|
+
from typing_extensions import Literal, TypeAlias
|
5
|
+
|
6
|
+
from pydantic import Field as FieldInfo
|
7
|
+
|
8
|
+
from .._models import BaseModel
|
9
|
+
|
10
|
+
__all__ = ["ChunkProcessing", "Tokenizer", "TokenizerEnum", "TokenizerString"]
|
11
|
+
|
12
|
+
|
13
|
+
class TokenizerEnum(BaseModel):
|
14
|
+
enum: Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"] = FieldInfo(alias="Enum")
|
15
|
+
"""Use one of the predefined tokenizer types"""
|
16
|
+
|
17
|
+
|
18
|
+
class TokenizerString(BaseModel):
|
19
|
+
string: str = FieldInfo(alias="String")
|
20
|
+
"""
|
21
|
+
Use any Hugging Face tokenizer by specifying its model ID Examples:
|
22
|
+
"Qwen/Qwen-tokenizer", "facebook/bart-large"
|
23
|
+
"""
|
24
|
+
|
25
|
+
|
26
|
+
Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
|
27
|
+
|
28
|
+
|
29
|
+
class ChunkProcessing(BaseModel):
|
30
|
+
ignore_headers_and_footers: Optional[bool] = None
|
31
|
+
"""DEPRECATED: use `segment_processing.ignore` instead"""
|
32
|
+
|
33
|
+
target_length: Optional[int] = None
|
34
|
+
"""The target number of words in each chunk.
|
35
|
+
|
36
|
+
If 0, each chunk will contain a single segment.
|
37
|
+
"""
|
38
|
+
|
39
|
+
tokenizer: Optional[Tokenizer] = None
|
40
|
+
"""The tokenizer to use for the chunking process."""
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import Union, Optional
|
6
|
+
from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
|
7
|
+
|
8
|
+
from .._utils import PropertyInfo
|
9
|
+
|
10
|
+
__all__ = ["ChunkProcessingParam", "Tokenizer", "TokenizerEnum", "TokenizerString"]
|
11
|
+
|
12
|
+
|
13
|
+
class TokenizerEnum(TypedDict, total=False):
|
14
|
+
enum: Required[
|
15
|
+
Annotated[Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"], PropertyInfo(alias="Enum")]
|
16
|
+
]
|
17
|
+
"""Use one of the predefined tokenizer types"""
|
18
|
+
|
19
|
+
|
20
|
+
class TokenizerString(TypedDict, total=False):
|
21
|
+
string: Required[Annotated[str, PropertyInfo(alias="String")]]
|
22
|
+
"""
|
23
|
+
Use any Hugging Face tokenizer by specifying its model ID Examples:
|
24
|
+
"Qwen/Qwen-tokenizer", "facebook/bart-large"
|
25
|
+
"""
|
26
|
+
|
27
|
+
|
28
|
+
Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
|
29
|
+
|
30
|
+
|
31
|
+
class ChunkProcessingParam(TypedDict, total=False):
|
32
|
+
ignore_headers_and_footers: Optional[bool]
|
33
|
+
"""DEPRECATED: use `segment_processing.ignore` instead"""
|
34
|
+
|
35
|
+
target_length: int
|
36
|
+
"""The target number of words in each chunk.
|
37
|
+
|
38
|
+
If 0, each chunk will contain a single segment.
|
39
|
+
"""
|
40
|
+
|
41
|
+
tokenizer: Tokenizer
|
42
|
+
"""The tokenizer to use for the chunking process."""
|