chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. chunkr_ai/__init__.py +2 -0
  2. chunkr_ai/_base_client.py +3 -3
  3. chunkr_ai/_client.py +31 -3
  4. chunkr_ai/_compat.py +48 -48
  5. chunkr_ai/_constants.py +5 -5
  6. chunkr_ai/_exceptions.py +4 -0
  7. chunkr_ai/_models.py +41 -41
  8. chunkr_ai/_types.py +35 -1
  9. chunkr_ai/_utils/__init__.py +9 -2
  10. chunkr_ai/_utils/_compat.py +45 -0
  11. chunkr_ai/_utils/_datetime_parse.py +136 -0
  12. chunkr_ai/_utils/_transform.py +11 -1
  13. chunkr_ai/_utils/_typing.py +6 -1
  14. chunkr_ai/_utils/_utils.py +0 -1
  15. chunkr_ai/_version.py +1 -1
  16. chunkr_ai/resources/__init__.py +14 -0
  17. chunkr_ai/resources/files.py +3 -3
  18. chunkr_ai/resources/tasks/__init__.py +14 -0
  19. chunkr_ai/resources/tasks/extract.py +393 -0
  20. chunkr_ai/resources/tasks/parse.py +110 -286
  21. chunkr_ai/resources/tasks/tasks.py +64 -32
  22. chunkr_ai/resources/webhooks.py +193 -0
  23. chunkr_ai/types/__init__.py +27 -1
  24. chunkr_ai/types/bounding_box.py +19 -0
  25. chunkr_ai/types/cell.py +39 -0
  26. chunkr_ai/types/cell_style.py +28 -0
  27. chunkr_ai/types/chunk.py +40 -0
  28. chunkr_ai/types/chunk_processing.py +40 -0
  29. chunkr_ai/types/chunk_processing_param.py +42 -0
  30. chunkr_ai/types/extract_configuration.py +24 -0
  31. chunkr_ai/types/extract_output_response.py +62 -0
  32. chunkr_ai/types/file_create_params.py +2 -1
  33. chunkr_ai/types/file_info.py +21 -0
  34. chunkr_ai/types/generation_config.py +29 -0
  35. chunkr_ai/types/generation_config_param.py +29 -0
  36. chunkr_ai/types/llm_processing.py +36 -0
  37. chunkr_ai/types/llm_processing_param.py +36 -0
  38. chunkr_ai/types/ocr_result.py +28 -0
  39. chunkr_ai/types/page.py +27 -0
  40. chunkr_ai/types/parse_configuration.py +64 -0
  41. chunkr_ai/types/parse_configuration_param.py +65 -0
  42. chunkr_ai/types/parse_output_response.py +29 -0
  43. chunkr_ai/types/segment.py +109 -0
  44. chunkr_ai/types/segment_processing.py +228 -0
  45. chunkr_ai/types/segment_processing_param.py +229 -0
  46. chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
  47. chunkr_ai/types/task_get_params.py +0 -3
  48. chunkr_ai/types/task_list_params.py +7 -1
  49. chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
  50. chunkr_ai/types/task_response.py +68 -0
  51. chunkr_ai/types/tasks/__init__.py +7 -1
  52. chunkr_ai/types/tasks/extract_create_params.py +47 -0
  53. chunkr_ai/types/tasks/extract_create_response.py +67 -0
  54. chunkr_ai/types/tasks/extract_get_params.py +18 -0
  55. chunkr_ai/types/tasks/extract_get_response.py +67 -0
  56. chunkr_ai/types/tasks/parse_create_params.py +25 -793
  57. chunkr_ai/types/tasks/parse_create_response.py +55 -0
  58. chunkr_ai/types/tasks/parse_get_params.py +18 -0
  59. chunkr_ai/types/tasks/parse_get_response.py +55 -0
  60. chunkr_ai/types/unwrap_webhook_event.py +11 -0
  61. chunkr_ai/types/version_info.py +31 -0
  62. chunkr_ai/types/webhook_url_response.py +9 -0
  63. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/METADATA +14 -13
  64. chunkr_ai-0.1.0a8.dist-info/RECORD +88 -0
  65. chunkr_ai/types/task.py +0 -1225
  66. chunkr_ai/types/tasks/parse_update_params.py +0 -845
  67. chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
  68. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/WHEEL +0 -0
  69. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,228 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Optional
4
+
5
+ from pydantic import Field as FieldInfo
6
+
7
+ from .._models import BaseModel
8
+ from .generation_config import GenerationConfig
9
+
10
+ __all__ = ["SegmentProcessing"]
11
+
12
+
13
+ class SegmentProcessing(BaseModel):
14
+ caption: Optional[GenerationConfig] = FieldInfo(alias="Caption", default=None)
15
+ """Controls the processing and generation for the segment.
16
+
17
+ - `crop_image` controls whether to crop the file's images to the segment's
18
+ bounding box. The cropped image will be stored in the segment's `image` field.
19
+ Use `All` to always crop, or `Auto` to only crop when needed for
20
+ post-processing.
21
+ - `format` specifies the output format: `Html` or `Markdown`
22
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
23
+ - `Auto`: Process content automatically
24
+ - `LLM`: Use large language models for processing
25
+ - `Ignore`: Exclude segments from final output
26
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
27
+ uses chunkr's own VLM models and is not configurable via LLM processing
28
+ configuration.
29
+ - `extended_context` uses the full page image as context for LLM generation.
30
+ """
31
+
32
+ footnote: Optional[GenerationConfig] = FieldInfo(alias="Footnote", default=None)
33
+ """Controls the processing and generation for the segment.
34
+
35
+ - `crop_image` controls whether to crop the file's images to the segment's
36
+ bounding box. The cropped image will be stored in the segment's `image` field.
37
+ Use `All` to always crop, or `Auto` to only crop when needed for
38
+ post-processing.
39
+ - `format` specifies the output format: `Html` or `Markdown`
40
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
41
+ - `Auto`: Process content automatically
42
+ - `LLM`: Use large language models for processing
43
+ - `Ignore`: Exclude segments from final output
44
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
45
+ uses chunkr's own VLM models and is not configurable via LLM processing
46
+ configuration.
47
+ - `extended_context` uses the full page image as context for LLM generation.
48
+ """
49
+
50
+ formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
51
+ """Controls the processing and generation for the segment.
52
+
53
+ - `crop_image` controls whether to crop the file's images to the segment's
54
+ bounding box. The cropped image will be stored in the segment's `image` field.
55
+ Use `All` to always crop, or `Auto` to only crop when needed for
56
+ post-processing.
57
+ - `format` specifies the output format: `Html` or `Markdown`
58
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
59
+ - `Auto`: Process content automatically
60
+ - `LLM`: Use large language models for processing
61
+ - `Ignore`: Exclude segments from final output
62
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
63
+ uses chunkr's own VLM models and is not configurable via LLM processing
64
+ configuration.
65
+ - `extended_context` uses the full page image as context for LLM generation.
66
+ """
67
+
68
+ list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
69
+ """Controls the processing and generation for the segment.
70
+
71
+ - `crop_image` controls whether to crop the file's images to the segment's
72
+ bounding box. The cropped image will be stored in the segment's `image` field.
73
+ Use `All` to always crop, or `Auto` to only crop when needed for
74
+ post-processing.
75
+ - `format` specifies the output format: `Html` or `Markdown`
76
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
77
+ - `Auto`: Process content automatically
78
+ - `LLM`: Use large language models for processing
79
+ - `Ignore`: Exclude segments from final output
80
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
81
+ uses chunkr's own VLM models and is not configurable via LLM processing
82
+ configuration.
83
+ - `extended_context` uses the full page image as context for LLM generation.
84
+ """
85
+
86
+ page: Optional[GenerationConfig] = FieldInfo(alias="Page", default=None)
87
+ """Controls the processing and generation for the segment.
88
+
89
+ - `crop_image` controls whether to crop the file's images to the segment's
90
+ bounding box. The cropped image will be stored in the segment's `image` field.
91
+ Use `All` to always crop, or `Auto` to only crop when needed for
92
+ post-processing.
93
+ - `format` specifies the output format: `Html` or `Markdown`
94
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
95
+ - `Auto`: Process content automatically
96
+ - `LLM`: Use large language models for processing
97
+ - `Ignore`: Exclude segments from final output
98
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
99
+ uses chunkr's own VLM models and is not configurable via LLM processing
100
+ configuration.
101
+ - `extended_context` uses the full page image as context for LLM generation.
102
+ """
103
+
104
+ page_footer: Optional[GenerationConfig] = FieldInfo(alias="PageFooter", default=None)
105
+ """Controls the processing and generation for the segment.
106
+
107
+ - `crop_image` controls whether to crop the file's images to the segment's
108
+ bounding box. The cropped image will be stored in the segment's `image` field.
109
+ Use `All` to always crop, or `Auto` to only crop when needed for
110
+ post-processing.
111
+ - `format` specifies the output format: `Html` or `Markdown`
112
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
113
+ - `Auto`: Process content automatically
114
+ - `LLM`: Use large language models for processing
115
+ - `Ignore`: Exclude segments from final output
116
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
117
+ uses chunkr's own VLM models and is not configurable via LLM processing
118
+ configuration.
119
+ - `extended_context` uses the full page image as context for LLM generation.
120
+ """
121
+
122
+ page_header: Optional[GenerationConfig] = FieldInfo(alias="PageHeader", default=None)
123
+ """Controls the processing and generation for the segment.
124
+
125
+ - `crop_image` controls whether to crop the file's images to the segment's
126
+ bounding box. The cropped image will be stored in the segment's `image` field.
127
+ Use `All` to always crop, or `Auto` to only crop when needed for
128
+ post-processing.
129
+ - `format` specifies the output format: `Html` or `Markdown`
130
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
131
+ - `Auto`: Process content automatically
132
+ - `LLM`: Use large language models for processing
133
+ - `Ignore`: Exclude segments from final output
134
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
135
+ uses chunkr's own VLM models and is not configurable via LLM processing
136
+ configuration.
137
+ - `extended_context` uses the full page image as context for LLM generation.
138
+ """
139
+
140
+ picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
141
+ """Controls the processing and generation for the segment.
142
+
143
+ - `crop_image` controls whether to crop the file's images to the segment's
144
+ bounding box. The cropped image will be stored in the segment's `image` field.
145
+ Use `All` to always crop, or `Auto` to only crop when needed for
146
+ post-processing.
147
+ - `format` specifies the output format: `Html` or `Markdown`
148
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
149
+ - `Auto`: Process content automatically
150
+ - `LLM`: Use large language models for processing
151
+ - `Ignore`: Exclude segments from final output
152
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
153
+ uses chunkr's own VLM models and is not configurable via LLM processing
154
+ configuration.
155
+ - `extended_context` uses the full page image as context for LLM generation.
156
+ """
157
+
158
+ section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
159
+ """Controls the processing and generation for the segment.
160
+
161
+ - `crop_image` controls whether to crop the file's images to the segment's
162
+ bounding box. The cropped image will be stored in the segment's `image` field.
163
+ Use `All` to always crop, or `Auto` to only crop when needed for
164
+ post-processing.
165
+ - `format` specifies the output format: `Html` or `Markdown`
166
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
167
+ - `Auto`: Process content automatically
168
+ - `LLM`: Use large language models for processing
169
+ - `Ignore`: Exclude segments from final output
170
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
171
+ uses chunkr's own VLM models and is not configurable via LLM processing
172
+ configuration.
173
+ - `extended_context` uses the full page image as context for LLM generation.
174
+ """
175
+
176
+ table: Optional[GenerationConfig] = FieldInfo(alias="Table", default=None)
177
+ """Controls the processing and generation for the segment.
178
+
179
+ - `crop_image` controls whether to crop the file's images to the segment's
180
+ bounding box. The cropped image will be stored in the segment's `image` field.
181
+ Use `All` to always crop, or `Auto` to only crop when needed for
182
+ post-processing.
183
+ - `format` specifies the output format: `Html` or `Markdown`
184
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
185
+ - `Auto`: Process content automatically
186
+ - `LLM`: Use large language models for processing
187
+ - `Ignore`: Exclude segments from final output
188
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
189
+ uses chunkr's own VLM models and is not configurable via LLM processing
190
+ configuration.
191
+ - `extended_context` uses the full page image as context for LLM generation.
192
+ """
193
+
194
+ text: Optional[GenerationConfig] = FieldInfo(alias="Text", default=None)
195
+ """Controls the processing and generation for the segment.
196
+
197
+ - `crop_image` controls whether to crop the file's images to the segment's
198
+ bounding box. The cropped image will be stored in the segment's `image` field.
199
+ Use `All` to always crop, or `Auto` to only crop when needed for
200
+ post-processing.
201
+ - `format` specifies the output format: `Html` or `Markdown`
202
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
203
+ - `Auto`: Process content automatically
204
+ - `LLM`: Use large language models for processing
205
+ - `Ignore`: Exclude segments from final output
206
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
207
+ uses chunkr's own VLM models and is not configurable via LLM processing
208
+ configuration.
209
+ - `extended_context` uses the full page image as context for LLM generation.
210
+ """
211
+
212
+ title: Optional[GenerationConfig] = FieldInfo(alias="Title", default=None)
213
+ """Controls the processing and generation for the segment.
214
+
215
+ - `crop_image` controls whether to crop the file's images to the segment's
216
+ bounding box. The cropped image will be stored in the segment's `image` field.
217
+ Use `All` to always crop, or `Auto` to only crop when needed for
218
+ post-processing.
219
+ - `format` specifies the output format: `Html` or `Markdown`
220
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
221
+ - `Auto`: Process content automatically
222
+ - `LLM`: Use large language models for processing
223
+ - `Ignore`: Exclude segments from final output
224
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
225
+ uses chunkr's own VLM models and is not configurable via LLM processing
226
+ configuration.
227
+ - `extended_context` uses the full page image as context for LLM generation.
228
+ """
@@ -0,0 +1,229 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+ from typing_extensions import Annotated, TypedDict
7
+
8
+ from .._utils import PropertyInfo
9
+ from .generation_config_param import GenerationConfigParam
10
+
11
+ __all__ = ["SegmentProcessingParam"]
12
+
13
+
14
+ class SegmentProcessingParam(TypedDict, total=False):
15
+ caption: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Caption")]
16
+ """Controls the processing and generation for the segment.
17
+
18
+ - `crop_image` controls whether to crop the file's images to the segment's
19
+ bounding box. The cropped image will be stored in the segment's `image` field.
20
+ Use `All` to always crop, or `Auto` to only crop when needed for
21
+ post-processing.
22
+ - `format` specifies the output format: `Html` or `Markdown`
23
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
24
+ - `Auto`: Process content automatically
25
+ - `LLM`: Use large language models for processing
26
+ - `Ignore`: Exclude segments from final output
27
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
28
+ uses chunkr's own VLM models and is not configurable via LLM processing
29
+ configuration.
30
+ - `extended_context` uses the full page image as context for LLM generation.
31
+ """
32
+
33
+ footnote: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Footnote")]
34
+ """Controls the processing and generation for the segment.
35
+
36
+ - `crop_image` controls whether to crop the file's images to the segment's
37
+ bounding box. The cropped image will be stored in the segment's `image` field.
38
+ Use `All` to always crop, or `Auto` to only crop when needed for
39
+ post-processing.
40
+ - `format` specifies the output format: `Html` or `Markdown`
41
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
42
+ - `Auto`: Process content automatically
43
+ - `LLM`: Use large language models for processing
44
+ - `Ignore`: Exclude segments from final output
45
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
46
+ uses chunkr's own VLM models and is not configurable via LLM processing
47
+ configuration.
48
+ - `extended_context` uses the full page image as context for LLM generation.
49
+ """
50
+
51
+ formula: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Formula")]
52
+ """Controls the processing and generation for the segment.
53
+
54
+ - `crop_image` controls whether to crop the file's images to the segment's
55
+ bounding box. The cropped image will be stored in the segment's `image` field.
56
+ Use `All` to always crop, or `Auto` to only crop when needed for
57
+ post-processing.
58
+ - `format` specifies the output format: `Html` or `Markdown`
59
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
60
+ - `Auto`: Process content automatically
61
+ - `LLM`: Use large language models for processing
62
+ - `Ignore`: Exclude segments from final output
63
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
64
+ uses chunkr's own VLM models and is not configurable via LLM processing
65
+ configuration.
66
+ - `extended_context` uses the full page image as context for LLM generation.
67
+ """
68
+
69
+ list_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="ListItem")]
70
+ """Controls the processing and generation for the segment.
71
+
72
+ - `crop_image` controls whether to crop the file's images to the segment's
73
+ bounding box. The cropped image will be stored in the segment's `image` field.
74
+ Use `All` to always crop, or `Auto` to only crop when needed for
75
+ post-processing.
76
+ - `format` specifies the output format: `Html` or `Markdown`
77
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
78
+ - `Auto`: Process content automatically
79
+ - `LLM`: Use large language models for processing
80
+ - `Ignore`: Exclude segments from final output
81
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
82
+ uses chunkr's own VLM models and is not configurable via LLM processing
83
+ configuration.
84
+ - `extended_context` uses the full page image as context for LLM generation.
85
+ """
86
+
87
+ page: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Page")]
88
+ """Controls the processing and generation for the segment.
89
+
90
+ - `crop_image` controls whether to crop the file's images to the segment's
91
+ bounding box. The cropped image will be stored in the segment's `image` field.
92
+ Use `All` to always crop, or `Auto` to only crop when needed for
93
+ post-processing.
94
+ - `format` specifies the output format: `Html` or `Markdown`
95
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
96
+ - `Auto`: Process content automatically
97
+ - `LLM`: Use large language models for processing
98
+ - `Ignore`: Exclude segments from final output
99
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
100
+ uses chunkr's own VLM models and is not configurable via LLM processing
101
+ configuration.
102
+ - `extended_context` uses the full page image as context for LLM generation.
103
+ """
104
+
105
+ page_footer: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageFooter")]
106
+ """Controls the processing and generation for the segment.
107
+
108
+ - `crop_image` controls whether to crop the file's images to the segment's
109
+ bounding box. The cropped image will be stored in the segment's `image` field.
110
+ Use `All` to always crop, or `Auto` to only crop when needed for
111
+ post-processing.
112
+ - `format` specifies the output format: `Html` or `Markdown`
113
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
114
+ - `Auto`: Process content automatically
115
+ - `LLM`: Use large language models for processing
116
+ - `Ignore`: Exclude segments from final output
117
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
118
+ uses chunkr's own VLM models and is not configurable via LLM processing
119
+ configuration.
120
+ - `extended_context` uses the full page image as context for LLM generation.
121
+ """
122
+
123
+ page_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageHeader")]
124
+ """Controls the processing and generation for the segment.
125
+
126
+ - `crop_image` controls whether to crop the file's images to the segment's
127
+ bounding box. The cropped image will be stored in the segment's `image` field.
128
+ Use `All` to always crop, or `Auto` to only crop when needed for
129
+ post-processing.
130
+ - `format` specifies the output format: `Html` or `Markdown`
131
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
132
+ - `Auto`: Process content automatically
133
+ - `LLM`: Use large language models for processing
134
+ - `Ignore`: Exclude segments from final output
135
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
136
+ uses chunkr's own VLM models and is not configurable via LLM processing
137
+ configuration.
138
+ - `extended_context` uses the full page image as context for LLM generation.
139
+ """
140
+
141
+ picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
142
+ """Controls the processing and generation for the segment.
143
+
144
+ - `crop_image` controls whether to crop the file's images to the segment's
145
+ bounding box. The cropped image will be stored in the segment's `image` field.
146
+ Use `All` to always crop, or `Auto` to only crop when needed for
147
+ post-processing.
148
+ - `format` specifies the output format: `Html` or `Markdown`
149
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
150
+ - `Auto`: Process content automatically
151
+ - `LLM`: Use large language models for processing
152
+ - `Ignore`: Exclude segments from final output
153
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
154
+ uses chunkr's own VLM models and is not configurable via LLM processing
155
+ configuration.
156
+ - `extended_context` uses the full page image as context for LLM generation.
157
+ """
158
+
159
+ section_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="SectionHeader")]
160
+ """Controls the processing and generation for the segment.
161
+
162
+ - `crop_image` controls whether to crop the file's images to the segment's
163
+ bounding box. The cropped image will be stored in the segment's `image` field.
164
+ Use `All` to always crop, or `Auto` to only crop when needed for
165
+ post-processing.
166
+ - `format` specifies the output format: `Html` or `Markdown`
167
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
168
+ - `Auto`: Process content automatically
169
+ - `LLM`: Use large language models for processing
170
+ - `Ignore`: Exclude segments from final output
171
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
172
+ uses chunkr's own VLM models and is not configurable via LLM processing
173
+ configuration.
174
+ - `extended_context` uses the full page image as context for LLM generation.
175
+ """
176
+
177
+ table: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Table")]
178
+ """Controls the processing and generation for the segment.
179
+
180
+ - `crop_image` controls whether to crop the file's images to the segment's
181
+ bounding box. The cropped image will be stored in the segment's `image` field.
182
+ Use `All` to always crop, or `Auto` to only crop when needed for
183
+ post-processing.
184
+ - `format` specifies the output format: `Html` or `Markdown`
185
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
186
+ - `Auto`: Process content automatically
187
+ - `LLM`: Use large language models for processing
188
+ - `Ignore`: Exclude segments from final output
189
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
190
+ uses chunkr's own VLM models and is not configurable via LLM processing
191
+ configuration.
192
+ - `extended_context` uses the full page image as context for LLM generation.
193
+ """
194
+
195
+ text: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Text")]
196
+ """Controls the processing and generation for the segment.
197
+
198
+ - `crop_image` controls whether to crop the file's images to the segment's
199
+ bounding box. The cropped image will be stored in the segment's `image` field.
200
+ Use `All` to always crop, or `Auto` to only crop when needed for
201
+ post-processing.
202
+ - `format` specifies the output format: `Html` or `Markdown`
203
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
204
+ - `Auto`: Process content automatically
205
+ - `LLM`: Use large language models for processing
206
+ - `Ignore`: Exclude segments from final output
207
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
208
+ uses chunkr's own VLM models and is not configurable via LLM processing
209
+ configuration.
210
+ - `extended_context` uses the full page image as context for LLM generation.
211
+ """
212
+
213
+ title: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Title")]
214
+ """Controls the processing and generation for the segment.
215
+
216
+ - `crop_image` controls whether to crop the file's images to the segment's
217
+ bounding box. The cropped image will be stored in the segment's `image` field.
218
+ Use `All` to always crop, or `Auto` to only crop when needed for
219
+ post-processing.
220
+ - `format` specifies the output format: `Html` or `Markdown`
221
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
222
+ - `Auto`: Process content automatically
223
+ - `LLM`: Use large language models for processing
224
+ - `Ignore`: Exclude segments from final output
225
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
226
+ uses chunkr's own VLM models and is not configurable via LLM processing
227
+ configuration.
228
+ - `extended_context` uses the full page image as context for LLM generation.
229
+ """
@@ -0,0 +1,22 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["TaskExtractUpdatedWebhookEvent"]
9
+
10
+
11
+ class TaskExtractUpdatedWebhookEvent(BaseModel):
12
+ event_type: Literal["task.parse.updated", "task.extract.updated"]
13
+ """Event type identifier"""
14
+
15
+ status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
16
+ """Current status of the task"""
17
+
18
+ task_id: str
19
+ """Unique task identifier"""
20
+
21
+ message: Optional[str] = None
22
+ """Optional human-readable status message"""
@@ -16,6 +16,3 @@ class TaskGetParams(TypedDict, total=False):
16
16
 
17
17
  include_chunks: bool
18
18
  """Whether to include chunks in the output response"""
19
-
20
- wait_for_completion: bool
21
- """Whether to wait for the task to complete"""
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Union
5
+ from typing import List, Union
6
6
  from datetime import datetime
7
7
  from typing_extensions import Literal, Annotated, TypedDict
8
8
 
@@ -35,3 +35,9 @@ class TaskListParams(TypedDict, total=False):
35
35
 
36
36
  start: Annotated[Union[str, datetime], PropertyInfo(format="iso8601")]
37
37
  """Start date"""
38
+
39
+ statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]]
40
+ """Filter by one or more statuses"""
41
+
42
+ task_types: List[Literal["Parse", "Extract"]]
43
+ """Filter by one or more task types"""
@@ -0,0 +1,22 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["TaskParseUpdatedWebhookEvent"]
9
+
10
+
11
+ class TaskParseUpdatedWebhookEvent(BaseModel):
12
+ event_type: Literal["task.parse.updated", "task.extract.updated"]
13
+ """Event type identifier"""
14
+
15
+ status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
16
+ """Current status of the task"""
17
+
18
+ task_id: str
19
+ """Unique task identifier"""
20
+
21
+ message: Optional[str] = None
22
+ """Optional human-readable status message"""
@@ -0,0 +1,68 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Union, Optional
4
+ from datetime import datetime
5
+ from typing_extensions import Literal, TypeAlias
6
+
7
+ from .._models import BaseModel
8
+ from .file_info import FileInfo
9
+ from .version_info import VersionInfo
10
+ from .parse_configuration import ParseConfiguration
11
+ from .extract_configuration import ExtractConfiguration
12
+ from .parse_output_response import ParseOutputResponse
13
+ from .extract_output_response import ExtractOutputResponse
14
+
15
+ __all__ = ["TaskResponse", "Configuration", "Output"]
16
+
17
+ Configuration: TypeAlias = Union[ParseConfiguration, ExtractConfiguration]
18
+
19
+ Output: TypeAlias = Union[ParseOutputResponse, ExtractOutputResponse, None]
20
+
21
+
22
+ class TaskResponse(BaseModel):
23
+ configuration: Configuration
24
+ """
25
+ Unified configuration type that can represent either parse or extract
26
+ configurations
27
+ """
28
+
29
+ created_at: datetime
30
+ """The date and time when the task was created and queued."""
31
+
32
+ file_info: FileInfo
33
+ """Information about the input file."""
34
+
35
+ message: str
36
+ """A message describing the task's status or any errors that occurred."""
37
+
38
+ status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
39
+ """The status of the task."""
40
+
41
+ task_id: str
42
+ """The unique identifier for the task."""
43
+
44
+ task_type: Literal["Parse", "Extract"]
45
+
46
+ version_info: VersionInfo
47
+ """Version information for the task."""
48
+
49
+ expires_at: Optional[datetime] = None
50
+ """The date and time when the task will expire."""
51
+
52
+ finished_at: Optional[datetime] = None
53
+ """The date and time when the task was finished."""
54
+
55
+ input_file_url: Optional[str] = None
56
+ """The presigned URL of the input file. Deprecated use `file_info.url` instead."""
57
+
58
+ output: Optional[Output] = None
59
+ """Unified output type that can represent either parse or extract results"""
60
+
61
+ source_task_id: Optional[str] = None
62
+ """The ID of the source task that was used for the task"""
63
+
64
+ started_at: Optional[datetime] = None
65
+ """The date and time when the task was started."""
66
+
67
+ task_url: Optional[str] = None
68
+ """The presigned URL of the task."""
@@ -2,5 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from .parse_get_params import ParseGetParams as ParseGetParams
6
+ from .extract_get_params import ExtractGetParams as ExtractGetParams
7
+ from .parse_get_response import ParseGetResponse as ParseGetResponse
5
8
  from .parse_create_params import ParseCreateParams as ParseCreateParams
6
- from .parse_update_params import ParseUpdateParams as ParseUpdateParams
9
+ from .extract_get_response import ExtractGetResponse as ExtractGetResponse
10
+ from .extract_create_params import ExtractCreateParams as ExtractCreateParams
11
+ from .parse_create_response import ParseCreateResponse as ParseCreateResponse
12
+ from .extract_create_response import ExtractCreateResponse as ExtractCreateResponse
@@ -0,0 +1,47 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+ from typing_extensions import Required, TypedDict
7
+
8
+ from ..parse_configuration_param import ParseConfigurationParam
9
+
10
+ __all__ = ["ExtractCreateParams"]
11
+
12
+
13
+ class ExtractCreateParams(TypedDict, total=False):
14
+ file: Required[str]
15
+ """The file to be extracted. Supported inputs:
16
+
17
+ - `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
18
+ API
19
+ - `http(s)://...`: Remote URL to fetch
20
+ - `data:*;base64,...` or raw base64 string
21
+ - `task_id`: Reference to an existing `parse`task.
22
+ """
23
+
24
+ schema: Required[object]
25
+ """The schema to be used for the extraction."""
26
+
27
+ expires_in: Optional[int]
28
+ """
29
+ The number of seconds until task is deleted. Expired tasks can **not** be
30
+ updated, polled or accessed via web interface.
31
+ """
32
+
33
+ file_name: Optional[str]
34
+ """The name of the file to be extracted.
35
+
36
+ If not set a name will be generated. Can not be provided if the `file` is a
37
+ `task_id`.
38
+ """
39
+
40
+ parse_configuration: Optional[ParseConfigurationParam]
41
+ """
42
+ Optional configuration for the `parse` task. Can not be used if `file` is a
43
+ `task_id`.
44
+ """
45
+
46
+ system_prompt: Optional[str]
47
+ """The system prompt to be used for the extraction."""