chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/__init__.py +2 -0
- chunkr_ai/_base_client.py +3 -3
- chunkr_ai/_client.py +31 -3
- chunkr_ai/_compat.py +48 -48
- chunkr_ai/_constants.py +5 -5
- chunkr_ai/_exceptions.py +4 -0
- chunkr_ai/_models.py +41 -41
- chunkr_ai/_types.py +35 -1
- chunkr_ai/_utils/__init__.py +9 -2
- chunkr_ai/_utils/_compat.py +45 -0
- chunkr_ai/_utils/_datetime_parse.py +136 -0
- chunkr_ai/_utils/_transform.py +11 -1
- chunkr_ai/_utils/_typing.py +6 -1
- chunkr_ai/_utils/_utils.py +0 -1
- chunkr_ai/_version.py +1 -1
- chunkr_ai/resources/__init__.py +14 -0
- chunkr_ai/resources/files.py +3 -3
- chunkr_ai/resources/tasks/__init__.py +14 -0
- chunkr_ai/resources/tasks/extract.py +393 -0
- chunkr_ai/resources/tasks/parse.py +110 -286
- chunkr_ai/resources/tasks/tasks.py +64 -32
- chunkr_ai/resources/webhooks.py +193 -0
- chunkr_ai/types/__init__.py +27 -1
- chunkr_ai/types/bounding_box.py +19 -0
- chunkr_ai/types/cell.py +39 -0
- chunkr_ai/types/cell_style.py +28 -0
- chunkr_ai/types/chunk.py +40 -0
- chunkr_ai/types/chunk_processing.py +40 -0
- chunkr_ai/types/chunk_processing_param.py +42 -0
- chunkr_ai/types/extract_configuration.py +24 -0
- chunkr_ai/types/extract_output_response.py +62 -0
- chunkr_ai/types/file_create_params.py +2 -1
- chunkr_ai/types/file_info.py +21 -0
- chunkr_ai/types/generation_config.py +29 -0
- chunkr_ai/types/generation_config_param.py +29 -0
- chunkr_ai/types/llm_processing.py +36 -0
- chunkr_ai/types/llm_processing_param.py +36 -0
- chunkr_ai/types/ocr_result.py +28 -0
- chunkr_ai/types/page.py +27 -0
- chunkr_ai/types/parse_configuration.py +64 -0
- chunkr_ai/types/parse_configuration_param.py +65 -0
- chunkr_ai/types/parse_output_response.py +29 -0
- chunkr_ai/types/segment.py +109 -0
- chunkr_ai/types/segment_processing.py +228 -0
- chunkr_ai/types/segment_processing_param.py +229 -0
- chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
- chunkr_ai/types/task_get_params.py +0 -3
- chunkr_ai/types/task_list_params.py +7 -1
- chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
- chunkr_ai/types/task_response.py +68 -0
- chunkr_ai/types/tasks/__init__.py +7 -1
- chunkr_ai/types/tasks/extract_create_params.py +47 -0
- chunkr_ai/types/tasks/extract_create_response.py +67 -0
- chunkr_ai/types/tasks/extract_get_params.py +18 -0
- chunkr_ai/types/tasks/extract_get_response.py +67 -0
- chunkr_ai/types/tasks/parse_create_params.py +25 -793
- chunkr_ai/types/tasks/parse_create_response.py +55 -0
- chunkr_ai/types/tasks/parse_get_params.py +18 -0
- chunkr_ai/types/tasks/parse_get_response.py +55 -0
- chunkr_ai/types/unwrap_webhook_event.py +11 -0
- chunkr_ai/types/version_info.py +31 -0
- chunkr_ai/types/webhook_url_response.py +9 -0
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/METADATA +14 -13
- chunkr_ai-0.1.0a8.dist-info/RECORD +88 -0
- chunkr_ai/types/task.py +0 -1225
- chunkr_ai/types/tasks/parse_update_params.py +0 -845
- chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,228 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from pydantic import Field as FieldInfo
|
6
|
+
|
7
|
+
from .._models import BaseModel
|
8
|
+
from .generation_config import GenerationConfig
|
9
|
+
|
10
|
+
__all__ = ["SegmentProcessing"]
|
11
|
+
|
12
|
+
|
13
|
+
class SegmentProcessing(BaseModel):
|
14
|
+
caption: Optional[GenerationConfig] = FieldInfo(alias="Caption", default=None)
|
15
|
+
"""Controls the processing and generation for the segment.
|
16
|
+
|
17
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
18
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
19
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
20
|
+
post-processing.
|
21
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
22
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
23
|
+
- `Auto`: Process content automatically
|
24
|
+
- `LLM`: Use large language models for processing
|
25
|
+
- `Ignore`: Exclude segments from final output
|
26
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
27
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
28
|
+
configuration.
|
29
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
30
|
+
"""
|
31
|
+
|
32
|
+
footnote: Optional[GenerationConfig] = FieldInfo(alias="Footnote", default=None)
|
33
|
+
"""Controls the processing and generation for the segment.
|
34
|
+
|
35
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
36
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
37
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
38
|
+
post-processing.
|
39
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
40
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
41
|
+
- `Auto`: Process content automatically
|
42
|
+
- `LLM`: Use large language models for processing
|
43
|
+
- `Ignore`: Exclude segments from final output
|
44
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
45
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
46
|
+
configuration.
|
47
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
48
|
+
"""
|
49
|
+
|
50
|
+
formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
|
51
|
+
"""Controls the processing and generation for the segment.
|
52
|
+
|
53
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
54
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
55
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
56
|
+
post-processing.
|
57
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
58
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
59
|
+
- `Auto`: Process content automatically
|
60
|
+
- `LLM`: Use large language models for processing
|
61
|
+
- `Ignore`: Exclude segments from final output
|
62
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
63
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
64
|
+
configuration.
|
65
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
66
|
+
"""
|
67
|
+
|
68
|
+
list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
|
69
|
+
"""Controls the processing and generation for the segment.
|
70
|
+
|
71
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
72
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
73
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
74
|
+
post-processing.
|
75
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
76
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
77
|
+
- `Auto`: Process content automatically
|
78
|
+
- `LLM`: Use large language models for processing
|
79
|
+
- `Ignore`: Exclude segments from final output
|
80
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
81
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
82
|
+
configuration.
|
83
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
84
|
+
"""
|
85
|
+
|
86
|
+
page: Optional[GenerationConfig] = FieldInfo(alias="Page", default=None)
|
87
|
+
"""Controls the processing and generation for the segment.
|
88
|
+
|
89
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
90
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
91
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
92
|
+
post-processing.
|
93
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
94
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
95
|
+
- `Auto`: Process content automatically
|
96
|
+
- `LLM`: Use large language models for processing
|
97
|
+
- `Ignore`: Exclude segments from final output
|
98
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
99
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
100
|
+
configuration.
|
101
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
102
|
+
"""
|
103
|
+
|
104
|
+
page_footer: Optional[GenerationConfig] = FieldInfo(alias="PageFooter", default=None)
|
105
|
+
"""Controls the processing and generation for the segment.
|
106
|
+
|
107
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
108
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
109
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
110
|
+
post-processing.
|
111
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
112
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
113
|
+
- `Auto`: Process content automatically
|
114
|
+
- `LLM`: Use large language models for processing
|
115
|
+
- `Ignore`: Exclude segments from final output
|
116
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
117
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
118
|
+
configuration.
|
119
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
120
|
+
"""
|
121
|
+
|
122
|
+
page_header: Optional[GenerationConfig] = FieldInfo(alias="PageHeader", default=None)
|
123
|
+
"""Controls the processing and generation for the segment.
|
124
|
+
|
125
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
126
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
127
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
128
|
+
post-processing.
|
129
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
130
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
131
|
+
- `Auto`: Process content automatically
|
132
|
+
- `LLM`: Use large language models for processing
|
133
|
+
- `Ignore`: Exclude segments from final output
|
134
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
135
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
136
|
+
configuration.
|
137
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
138
|
+
"""
|
139
|
+
|
140
|
+
picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
|
141
|
+
"""Controls the processing and generation for the segment.
|
142
|
+
|
143
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
144
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
145
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
146
|
+
post-processing.
|
147
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
148
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
149
|
+
- `Auto`: Process content automatically
|
150
|
+
- `LLM`: Use large language models for processing
|
151
|
+
- `Ignore`: Exclude segments from final output
|
152
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
153
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
154
|
+
configuration.
|
155
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
156
|
+
"""
|
157
|
+
|
158
|
+
section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
|
159
|
+
"""Controls the processing and generation for the segment.
|
160
|
+
|
161
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
162
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
163
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
164
|
+
post-processing.
|
165
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
166
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
167
|
+
- `Auto`: Process content automatically
|
168
|
+
- `LLM`: Use large language models for processing
|
169
|
+
- `Ignore`: Exclude segments from final output
|
170
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
171
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
172
|
+
configuration.
|
173
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
174
|
+
"""
|
175
|
+
|
176
|
+
table: Optional[GenerationConfig] = FieldInfo(alias="Table", default=None)
|
177
|
+
"""Controls the processing and generation for the segment.
|
178
|
+
|
179
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
180
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
181
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
182
|
+
post-processing.
|
183
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
184
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
185
|
+
- `Auto`: Process content automatically
|
186
|
+
- `LLM`: Use large language models for processing
|
187
|
+
- `Ignore`: Exclude segments from final output
|
188
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
189
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
190
|
+
configuration.
|
191
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
192
|
+
"""
|
193
|
+
|
194
|
+
text: Optional[GenerationConfig] = FieldInfo(alias="Text", default=None)
|
195
|
+
"""Controls the processing and generation for the segment.
|
196
|
+
|
197
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
198
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
199
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
200
|
+
post-processing.
|
201
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
202
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
203
|
+
- `Auto`: Process content automatically
|
204
|
+
- `LLM`: Use large language models for processing
|
205
|
+
- `Ignore`: Exclude segments from final output
|
206
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
207
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
208
|
+
configuration.
|
209
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
210
|
+
"""
|
211
|
+
|
212
|
+
title: Optional[GenerationConfig] = FieldInfo(alias="Title", default=None)
|
213
|
+
"""Controls the processing and generation for the segment.
|
214
|
+
|
215
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
216
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
217
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
218
|
+
post-processing.
|
219
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
220
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
221
|
+
- `Auto`: Process content automatically
|
222
|
+
- `LLM`: Use large language models for processing
|
223
|
+
- `Ignore`: Exclude segments from final output
|
224
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
225
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
226
|
+
configuration.
|
227
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
228
|
+
"""
|
@@ -0,0 +1,229 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import Optional
|
6
|
+
from typing_extensions import Annotated, TypedDict
|
7
|
+
|
8
|
+
from .._utils import PropertyInfo
|
9
|
+
from .generation_config_param import GenerationConfigParam
|
10
|
+
|
11
|
+
__all__ = ["SegmentProcessingParam"]
|
12
|
+
|
13
|
+
|
14
|
+
class SegmentProcessingParam(TypedDict, total=False):
|
15
|
+
caption: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Caption")]
|
16
|
+
"""Controls the processing and generation for the segment.
|
17
|
+
|
18
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
19
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
20
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
21
|
+
post-processing.
|
22
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
23
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
24
|
+
- `Auto`: Process content automatically
|
25
|
+
- `LLM`: Use large language models for processing
|
26
|
+
- `Ignore`: Exclude segments from final output
|
27
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
28
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
29
|
+
configuration.
|
30
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
31
|
+
"""
|
32
|
+
|
33
|
+
footnote: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Footnote")]
|
34
|
+
"""Controls the processing and generation for the segment.
|
35
|
+
|
36
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
37
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
38
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
39
|
+
post-processing.
|
40
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
41
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
42
|
+
- `Auto`: Process content automatically
|
43
|
+
- `LLM`: Use large language models for processing
|
44
|
+
- `Ignore`: Exclude segments from final output
|
45
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
46
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
47
|
+
configuration.
|
48
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
49
|
+
"""
|
50
|
+
|
51
|
+
formula: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Formula")]
|
52
|
+
"""Controls the processing and generation for the segment.
|
53
|
+
|
54
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
55
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
56
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
57
|
+
post-processing.
|
58
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
59
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
60
|
+
- `Auto`: Process content automatically
|
61
|
+
- `LLM`: Use large language models for processing
|
62
|
+
- `Ignore`: Exclude segments from final output
|
63
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
64
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
65
|
+
configuration.
|
66
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
67
|
+
"""
|
68
|
+
|
69
|
+
list_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="ListItem")]
|
70
|
+
"""Controls the processing and generation for the segment.
|
71
|
+
|
72
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
73
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
74
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
75
|
+
post-processing.
|
76
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
77
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
78
|
+
- `Auto`: Process content automatically
|
79
|
+
- `LLM`: Use large language models for processing
|
80
|
+
- `Ignore`: Exclude segments from final output
|
81
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
82
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
83
|
+
configuration.
|
84
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
85
|
+
"""
|
86
|
+
|
87
|
+
page: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Page")]
|
88
|
+
"""Controls the processing and generation for the segment.
|
89
|
+
|
90
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
91
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
92
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
93
|
+
post-processing.
|
94
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
95
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
96
|
+
- `Auto`: Process content automatically
|
97
|
+
- `LLM`: Use large language models for processing
|
98
|
+
- `Ignore`: Exclude segments from final output
|
99
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
100
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
101
|
+
configuration.
|
102
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
103
|
+
"""
|
104
|
+
|
105
|
+
page_footer: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageFooter")]
|
106
|
+
"""Controls the processing and generation for the segment.
|
107
|
+
|
108
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
109
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
110
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
111
|
+
post-processing.
|
112
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
113
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
114
|
+
- `Auto`: Process content automatically
|
115
|
+
- `LLM`: Use large language models for processing
|
116
|
+
- `Ignore`: Exclude segments from final output
|
117
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
118
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
119
|
+
configuration.
|
120
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
121
|
+
"""
|
122
|
+
|
123
|
+
page_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageHeader")]
|
124
|
+
"""Controls the processing and generation for the segment.
|
125
|
+
|
126
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
127
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
128
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
129
|
+
post-processing.
|
130
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
131
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
132
|
+
- `Auto`: Process content automatically
|
133
|
+
- `LLM`: Use large language models for processing
|
134
|
+
- `Ignore`: Exclude segments from final output
|
135
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
136
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
137
|
+
configuration.
|
138
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
139
|
+
"""
|
140
|
+
|
141
|
+
picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
|
142
|
+
"""Controls the processing and generation for the segment.
|
143
|
+
|
144
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
145
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
146
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
147
|
+
post-processing.
|
148
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
149
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
150
|
+
- `Auto`: Process content automatically
|
151
|
+
- `LLM`: Use large language models for processing
|
152
|
+
- `Ignore`: Exclude segments from final output
|
153
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
154
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
155
|
+
configuration.
|
156
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
157
|
+
"""
|
158
|
+
|
159
|
+
section_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="SectionHeader")]
|
160
|
+
"""Controls the processing and generation for the segment.
|
161
|
+
|
162
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
163
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
164
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
165
|
+
post-processing.
|
166
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
167
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
168
|
+
- `Auto`: Process content automatically
|
169
|
+
- `LLM`: Use large language models for processing
|
170
|
+
- `Ignore`: Exclude segments from final output
|
171
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
172
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
173
|
+
configuration.
|
174
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
175
|
+
"""
|
176
|
+
|
177
|
+
table: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Table")]
|
178
|
+
"""Controls the processing and generation for the segment.
|
179
|
+
|
180
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
181
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
182
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
183
|
+
post-processing.
|
184
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
185
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
186
|
+
- `Auto`: Process content automatically
|
187
|
+
- `LLM`: Use large language models for processing
|
188
|
+
- `Ignore`: Exclude segments from final output
|
189
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
190
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
191
|
+
configuration.
|
192
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
193
|
+
"""
|
194
|
+
|
195
|
+
text: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Text")]
|
196
|
+
"""Controls the processing and generation for the segment.
|
197
|
+
|
198
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
199
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
200
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
201
|
+
post-processing.
|
202
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
203
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
204
|
+
- `Auto`: Process content automatically
|
205
|
+
- `LLM`: Use large language models for processing
|
206
|
+
- `Ignore`: Exclude segments from final output
|
207
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
208
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
209
|
+
configuration.
|
210
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
211
|
+
"""
|
212
|
+
|
213
|
+
title: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Title")]
|
214
|
+
"""Controls the processing and generation for the segment.
|
215
|
+
|
216
|
+
- `crop_image` controls whether to crop the file's images to the segment's
|
217
|
+
bounding box. The cropped image will be stored in the segment's `image` field.
|
218
|
+
Use `All` to always crop, or `Auto` to only crop when needed for
|
219
|
+
post-processing.
|
220
|
+
- `format` specifies the output format: `Html` or `Markdown`
|
221
|
+
- `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
|
222
|
+
- `Auto`: Process content automatically
|
223
|
+
- `LLM`: Use large language models for processing
|
224
|
+
- `Ignore`: Exclude segments from final output
|
225
|
+
- `description` enables LLM-generated descriptions for segments. **Note:** This
|
226
|
+
uses chunkr's own VLM models and is not configurable via LLM processing
|
227
|
+
configuration.
|
228
|
+
- `extended_context` uses the full page image as context for LLM generation.
|
229
|
+
"""
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
from typing_extensions import Literal
|
5
|
+
|
6
|
+
from .._models import BaseModel
|
7
|
+
|
8
|
+
__all__ = ["TaskExtractUpdatedWebhookEvent"]
|
9
|
+
|
10
|
+
|
11
|
+
class TaskExtractUpdatedWebhookEvent(BaseModel):
|
12
|
+
event_type: Literal["task.parse.updated", "task.extract.updated"]
|
13
|
+
"""Event type identifier"""
|
14
|
+
|
15
|
+
status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
|
16
|
+
"""Current status of the task"""
|
17
|
+
|
18
|
+
task_id: str
|
19
|
+
"""Unique task identifier"""
|
20
|
+
|
21
|
+
message: Optional[str] = None
|
22
|
+
"""Optional human-readable status message"""
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
-
from typing import Union
|
5
|
+
from typing import List, Union
|
6
6
|
from datetime import datetime
|
7
7
|
from typing_extensions import Literal, Annotated, TypedDict
|
8
8
|
|
@@ -35,3 +35,9 @@ class TaskListParams(TypedDict, total=False):
|
|
35
35
|
|
36
36
|
start: Annotated[Union[str, datetime], PropertyInfo(format="iso8601")]
|
37
37
|
"""Start date"""
|
38
|
+
|
39
|
+
statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]]
|
40
|
+
"""Filter by one or more statuses"""
|
41
|
+
|
42
|
+
task_types: List[Literal["Parse", "Extract"]]
|
43
|
+
"""Filter by one or more task types"""
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
from typing_extensions import Literal
|
5
|
+
|
6
|
+
from .._models import BaseModel
|
7
|
+
|
8
|
+
__all__ = ["TaskParseUpdatedWebhookEvent"]
|
9
|
+
|
10
|
+
|
11
|
+
class TaskParseUpdatedWebhookEvent(BaseModel):
|
12
|
+
event_type: Literal["task.parse.updated", "task.extract.updated"]
|
13
|
+
"""Event type identifier"""
|
14
|
+
|
15
|
+
status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
|
16
|
+
"""Current status of the task"""
|
17
|
+
|
18
|
+
task_id: str
|
19
|
+
"""Unique task identifier"""
|
20
|
+
|
21
|
+
message: Optional[str] = None
|
22
|
+
"""Optional human-readable status message"""
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from typing import Union, Optional
|
4
|
+
from datetime import datetime
|
5
|
+
from typing_extensions import Literal, TypeAlias
|
6
|
+
|
7
|
+
from .._models import BaseModel
|
8
|
+
from .file_info import FileInfo
|
9
|
+
from .version_info import VersionInfo
|
10
|
+
from .parse_configuration import ParseConfiguration
|
11
|
+
from .extract_configuration import ExtractConfiguration
|
12
|
+
from .parse_output_response import ParseOutputResponse
|
13
|
+
from .extract_output_response import ExtractOutputResponse
|
14
|
+
|
15
|
+
__all__ = ["TaskResponse", "Configuration", "Output"]
|
16
|
+
|
17
|
+
Configuration: TypeAlias = Union[ParseConfiguration, ExtractConfiguration]
|
18
|
+
|
19
|
+
Output: TypeAlias = Union[ParseOutputResponse, ExtractOutputResponse, None]
|
20
|
+
|
21
|
+
|
22
|
+
class TaskResponse(BaseModel):
|
23
|
+
configuration: Configuration
|
24
|
+
"""
|
25
|
+
Unified configuration type that can represent either parse or extract
|
26
|
+
configurations
|
27
|
+
"""
|
28
|
+
|
29
|
+
created_at: datetime
|
30
|
+
"""The date and time when the task was created and queued."""
|
31
|
+
|
32
|
+
file_info: FileInfo
|
33
|
+
"""Information about the input file."""
|
34
|
+
|
35
|
+
message: str
|
36
|
+
"""A message describing the task's status or any errors that occurred."""
|
37
|
+
|
38
|
+
status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
|
39
|
+
"""The status of the task."""
|
40
|
+
|
41
|
+
task_id: str
|
42
|
+
"""The unique identifier for the task."""
|
43
|
+
|
44
|
+
task_type: Literal["Parse", "Extract"]
|
45
|
+
|
46
|
+
version_info: VersionInfo
|
47
|
+
"""Version information for the task."""
|
48
|
+
|
49
|
+
expires_at: Optional[datetime] = None
|
50
|
+
"""The date and time when the task will expire."""
|
51
|
+
|
52
|
+
finished_at: Optional[datetime] = None
|
53
|
+
"""The date and time when the task was finished."""
|
54
|
+
|
55
|
+
input_file_url: Optional[str] = None
|
56
|
+
"""The presigned URL of the input file. Deprecated use `file_info.url` instead."""
|
57
|
+
|
58
|
+
output: Optional[Output] = None
|
59
|
+
"""Unified output type that can represent either parse or extract results"""
|
60
|
+
|
61
|
+
source_task_id: Optional[str] = None
|
62
|
+
"""The ID of the source task that was used for the task"""
|
63
|
+
|
64
|
+
started_at: Optional[datetime] = None
|
65
|
+
"""The date and time when the task was started."""
|
66
|
+
|
67
|
+
task_url: Optional[str] = None
|
68
|
+
"""The presigned URL of the task."""
|
@@ -2,5 +2,11 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
+
from .parse_get_params import ParseGetParams as ParseGetParams
|
6
|
+
from .extract_get_params import ExtractGetParams as ExtractGetParams
|
7
|
+
from .parse_get_response import ParseGetResponse as ParseGetResponse
|
5
8
|
from .parse_create_params import ParseCreateParams as ParseCreateParams
|
6
|
-
from .
|
9
|
+
from .extract_get_response import ExtractGetResponse as ExtractGetResponse
|
10
|
+
from .extract_create_params import ExtractCreateParams as ExtractCreateParams
|
11
|
+
from .parse_create_response import ParseCreateResponse as ParseCreateResponse
|
12
|
+
from .extract_create_response import ExtractCreateResponse as ExtractCreateResponse
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import Optional
|
6
|
+
from typing_extensions import Required, TypedDict
|
7
|
+
|
8
|
+
from ..parse_configuration_param import ParseConfigurationParam
|
9
|
+
|
10
|
+
__all__ = ["ExtractCreateParams"]
|
11
|
+
|
12
|
+
|
13
|
+
class ExtractCreateParams(TypedDict, total=False):
|
14
|
+
file: Required[str]
|
15
|
+
"""The file to be extracted. Supported inputs:
|
16
|
+
|
17
|
+
- `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
|
18
|
+
API
|
19
|
+
- `http(s)://...`: Remote URL to fetch
|
20
|
+
- `data:*;base64,...` or raw base64 string
|
21
|
+
- `task_id`: Reference to an existing `parse`task.
|
22
|
+
"""
|
23
|
+
|
24
|
+
schema: Required[object]
|
25
|
+
"""The schema to be used for the extraction."""
|
26
|
+
|
27
|
+
expires_in: Optional[int]
|
28
|
+
"""
|
29
|
+
The number of seconds until task is deleted. Expired tasks can **not** be
|
30
|
+
updated, polled or accessed via web interface.
|
31
|
+
"""
|
32
|
+
|
33
|
+
file_name: Optional[str]
|
34
|
+
"""The name of the file to be extracted.
|
35
|
+
|
36
|
+
If not set a name will be generated. Can not be provided if the `file` is a
|
37
|
+
`task_id`.
|
38
|
+
"""
|
39
|
+
|
40
|
+
parse_configuration: Optional[ParseConfigurationParam]
|
41
|
+
"""
|
42
|
+
Optional configuration for the `parse` task. Can not be used if `file` is a
|
43
|
+
`task_id`.
|
44
|
+
"""
|
45
|
+
|
46
|
+
system_prompt: Optional[str]
|
47
|
+
"""The system prompt to be used for the extraction."""
|