chunkr-ai 0.1.0a11__py3-none-any.whl → 0.1.0a12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "chunkr_ai"
4
- __version__ = "0.1.0-alpha.11" # x-release-please-version
4
+ __version__ = "0.1.0-alpha.12" # x-release-please-version
@@ -19,7 +19,6 @@ from ..._response import (
19
19
  )
20
20
  from ...types.tasks import parse_get_params, parse_create_params
21
21
  from ..._base_client import make_request_options
22
- from ...types.llm_processing_param import LlmProcessingParam
23
22
  from ...types.chunk_processing_param import ChunkProcessingParam
24
23
  from ...types.segment_processing_param import SegmentProcessingParam
25
24
  from ...types.tasks.parse_get_response import ParseGetResponse
@@ -56,7 +55,6 @@ class ParseResource(SyncAPIResource):
56
55
  error_handling: Literal["Fail", "Continue"] | Omit = omit,
57
56
  expires_in: Optional[int] | Omit = omit,
58
57
  file_name: Optional[str] | Omit = omit,
59
- llm_processing: LlmProcessingParam | Omit = omit,
60
58
  ocr_strategy: Literal["All", "Auto"] | Omit = omit,
61
59
  pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
62
60
  segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
@@ -99,8 +97,6 @@ class ParseResource(SyncAPIResource):
99
97
 
100
98
  file_name: The name of the file to be parsed. If not set a name will be generated.
101
99
 
102
- llm_processing: Controls the LLM used for the task.
103
-
104
100
  ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
105
101
 
106
102
  - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -150,7 +146,6 @@ class ParseResource(SyncAPIResource):
150
146
  "error_handling": error_handling,
151
147
  "expires_in": expires_in,
152
148
  "file_name": file_name,
153
- "llm_processing": llm_processing,
154
149
  "ocr_strategy": ocr_strategy,
155
150
  "pipeline": pipeline,
156
151
  "segment_processing": segment_processing,
@@ -256,7 +251,6 @@ class AsyncParseResource(AsyncAPIResource):
256
251
  error_handling: Literal["Fail", "Continue"] | Omit = omit,
257
252
  expires_in: Optional[int] | Omit = omit,
258
253
  file_name: Optional[str] | Omit = omit,
259
- llm_processing: LlmProcessingParam | Omit = omit,
260
254
  ocr_strategy: Literal["All", "Auto"] | Omit = omit,
261
255
  pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
262
256
  segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
@@ -299,8 +293,6 @@ class AsyncParseResource(AsyncAPIResource):
299
293
 
300
294
  file_name: The name of the file to be parsed. If not set a name will be generated.
301
295
 
302
- llm_processing: Controls the LLM used for the task.
303
-
304
296
  ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
305
297
 
306
298
  - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -350,7 +342,6 @@ class AsyncParseResource(AsyncAPIResource):
350
342
  "error_handling": error_handling,
351
343
  "expires_in": expires_in,
352
344
  "file_name": file_name,
353
- "llm_processing": llm_processing,
354
345
  "ocr_strategy": ocr_strategy,
355
346
  "pipeline": pipeline,
356
347
  "segment_processing": segment_processing,
@@ -15,7 +15,6 @@ from .ocr_result import OcrResult as OcrResult
15
15
  from .bounding_box import BoundingBox as BoundingBox
16
16
  from .version_info import VersionInfo as VersionInfo
17
17
  from .task_response import TaskResponse as TaskResponse
18
- from .llm_processing import LlmProcessing as LlmProcessing
19
18
  from .file_url_params import FileURLParams as FileURLParams
20
19
  from .task_get_params import TaskGetParams as TaskGetParams
21
20
  from .chunk_processing import ChunkProcessing as ChunkProcessing
@@ -26,7 +25,6 @@ from .file_create_params import FileCreateParams as FileCreateParams
26
25
  from .segment_processing import SegmentProcessing as SegmentProcessing
27
26
  from .files_list_response import FilesListResponse as FilesListResponse
28
27
  from .parse_configuration import ParseConfiguration as ParseConfiguration
29
- from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
30
28
  from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
31
29
  from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
32
30
  from .extract_configuration import ExtractConfiguration as ExtractConfiguration
@@ -19,3 +19,6 @@ class FileInfo(BaseModel):
19
19
 
20
20
  page_count: Optional[int] = None
21
21
  """The number of pages in the file."""
22
+
23
+ ss_cell_count: Optional[int] = None
24
+ """The number of cells in the file. Only used for spreadsheets."""
@@ -15,14 +15,14 @@ class OcrResult(BaseModel):
15
15
  text: str
16
16
  """The recognized text of the OCR result."""
17
17
 
18
- cell_ref: Optional[str] = None
19
- """
20
- Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
21
- spreadsheet cell
22
- """
23
-
24
18
  confidence: Optional[float] = None
25
19
  """The confidence score of the recognized text."""
26
20
 
27
21
  ocr_id: Optional[str] = None
28
22
  """The unique identifier for the OCR result."""
23
+
24
+ ss_cell_ref: Optional[str] = None
25
+ """
26
+ Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
27
+ spreadsheet cell
28
+ """
@@ -4,7 +4,6 @@ from typing import Optional
4
4
  from typing_extensions import Literal
5
5
 
6
6
  from .._models import BaseModel
7
- from .llm_processing import LlmProcessing
8
7
  from .chunk_processing import ChunkProcessing
9
8
  from .segment_processing import SegmentProcessing
10
9
 
@@ -23,9 +22,6 @@ class ParseConfiguration(BaseModel):
23
22
  LLM refusals etc.)
24
23
  """
25
24
 
26
- llm_processing: Optional[LlmProcessing] = None
27
- """Controls the LLM used for the task."""
28
-
29
25
  ocr_strategy: Optional[Literal["All", "Auto"]] = None
30
26
  """Controls the Optical Character Recognition (OCR) strategy.
31
27
 
@@ -5,7 +5,6 @@ from __future__ import annotations
5
5
  from typing import Optional
6
6
  from typing_extensions import Literal, TypedDict
7
7
 
8
- from .llm_processing_param import LlmProcessingParam
9
8
  from .chunk_processing_param import ChunkProcessingParam
10
9
  from .segment_processing_param import SegmentProcessingParam
11
10
 
@@ -24,9 +23,6 @@ class ParseConfigurationParam(TypedDict, total=False):
24
23
  LLM refusals etc.)
25
24
  """
26
25
 
27
- llm_processing: LlmProcessingParam
28
- """Controls the LLM used for the task."""
29
-
30
26
  ocr_strategy: Literal["All", "Auto"]
31
27
  """Controls the Optical Character Recognition (OCR) strategy.
32
28
 
@@ -31,20 +31,23 @@ class Segment(BaseModel):
31
31
  "Caption",
32
32
  "Footnote",
33
33
  "Formula",
34
+ "FormRegion",
35
+ "GraphicalItem",
36
+ "Legend",
37
+ "LineNumber",
34
38
  "ListItem",
35
39
  "Page",
36
40
  "PageFooter",
37
41
  "PageHeader",
42
+ "PageNumber",
38
43
  "Picture",
39
- "SectionHeader",
40
44
  "Table",
41
45
  "Text",
42
46
  "Title",
47
+ "Unknown",
48
+ "SectionHeader",
43
49
  ]
44
- """
45
- All the possible types for a segment. Note: Different configurations will
46
- produce different types. Please refer to the documentation for more information.
47
- """
50
+ """All the possible types for a segment."""
48
51
 
49
52
  confidence: Optional[float] = None
50
53
  """Confidence score of the layout analysis model"""
@@ -47,6 +47,24 @@ class SegmentProcessing(BaseModel):
47
47
  - `extended_context` uses the full page image as context for LLM generation.
48
48
  """
49
49
 
50
+ form_region: Optional[GenerationConfig] = FieldInfo(alias="FormRegion", default=None)
51
+ """Controls the processing and generation for the segment.
52
+
53
+ - `crop_image` controls whether to crop the file's images to the segment's
54
+ bounding box. The cropped image will be stored in the segment's `image` field.
55
+ Use `All` to always crop, or `Auto` to only crop when needed for
56
+ post-processing.
57
+ - `format` specifies the output format: `Html` or `Markdown`
58
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
59
+ - `Auto`: Process content automatically
60
+ - `LLM`: Use large language models for processing
61
+ - `Ignore`: Exclude segments from final output
62
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
63
+ uses chunkr's own VLM models and is not configurable via LLM processing
64
+ configuration.
65
+ - `extended_context` uses the full page image as context for LLM generation.
66
+ """
67
+
50
68
  formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
51
69
  """Controls the processing and generation for the segment.
52
70
 
@@ -65,6 +83,60 @@ class SegmentProcessing(BaseModel):
65
83
  - `extended_context` uses the full page image as context for LLM generation.
66
84
  """
67
85
 
86
+ graphical_item: Optional[GenerationConfig] = FieldInfo(alias="GraphicalItem", default=None)
87
+ """Controls the processing and generation for the segment.
88
+
89
+ - `crop_image` controls whether to crop the file's images to the segment's
90
+ bounding box. The cropped image will be stored in the segment's `image` field.
91
+ Use `All` to always crop, or `Auto` to only crop when needed for
92
+ post-processing.
93
+ - `format` specifies the output format: `Html` or `Markdown`
94
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
95
+ - `Auto`: Process content automatically
96
+ - `LLM`: Use large language models for processing
97
+ - `Ignore`: Exclude segments from final output
98
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
99
+ uses chunkr's own VLM models and is not configurable via LLM processing
100
+ configuration.
101
+ - `extended_context` uses the full page image as context for LLM generation.
102
+ """
103
+
104
+ legend: Optional[GenerationConfig] = FieldInfo(alias="Legend", default=None)
105
+ """Controls the processing and generation for the segment.
106
+
107
+ - `crop_image` controls whether to crop the file's images to the segment's
108
+ bounding box. The cropped image will be stored in the segment's `image` field.
109
+ Use `All` to always crop, or `Auto` to only crop when needed for
110
+ post-processing.
111
+ - `format` specifies the output format: `Html` or `Markdown`
112
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
113
+ - `Auto`: Process content automatically
114
+ - `LLM`: Use large language models for processing
115
+ - `Ignore`: Exclude segments from final output
116
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
117
+ uses chunkr's own VLM models and is not configurable via LLM processing
118
+ configuration.
119
+ - `extended_context` uses the full page image as context for LLM generation.
120
+ """
121
+
122
+ line_number: Optional[GenerationConfig] = FieldInfo(alias="LineNumber", default=None)
123
+ """Controls the processing and generation for the segment.
124
+
125
+ - `crop_image` controls whether to crop the file's images to the segment's
126
+ bounding box. The cropped image will be stored in the segment's `image` field.
127
+ Use `All` to always crop, or `Auto` to only crop when needed for
128
+ post-processing.
129
+ - `format` specifies the output format: `Html` or `Markdown`
130
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
131
+ - `Auto`: Process content automatically
132
+ - `LLM`: Use large language models for processing
133
+ - `Ignore`: Exclude segments from final output
134
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
135
+ uses chunkr's own VLM models and is not configurable via LLM processing
136
+ configuration.
137
+ - `extended_context` uses the full page image as context for LLM generation.
138
+ """
139
+
68
140
  list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
69
141
  """Controls the processing and generation for the segment.
70
142
 
@@ -137,7 +209,7 @@ class SegmentProcessing(BaseModel):
137
209
  - `extended_context` uses the full page image as context for LLM generation.
138
210
  """
139
211
 
140
- picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
212
+ page_number: Optional[GenerationConfig] = FieldInfo(alias="PageNumber", default=None)
141
213
  """Controls the processing and generation for the segment.
142
214
 
143
215
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -155,7 +227,7 @@ class SegmentProcessing(BaseModel):
155
227
  - `extended_context` uses the full page image as context for LLM generation.
156
228
  """
157
229
 
158
- section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
230
+ picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
159
231
  """Controls the processing and generation for the segment.
160
232
 
161
233
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -226,3 +298,21 @@ class SegmentProcessing(BaseModel):
226
298
  configuration.
227
299
  - `extended_context` uses the full page image as context for LLM generation.
228
300
  """
301
+
302
+ unknown: Optional[GenerationConfig] = FieldInfo(alias="Unknown", default=None)
303
+ """Controls the processing and generation for the segment.
304
+
305
+ - `crop_image` controls whether to crop the file's images to the segment's
306
+ bounding box. The cropped image will be stored in the segment's `image` field.
307
+ Use `All` to always crop, or `Auto` to only crop when needed for
308
+ post-processing.
309
+ - `format` specifies the output format: `Html` or `Markdown`
310
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
311
+ - `Auto`: Process content automatically
312
+ - `LLM`: Use large language models for processing
313
+ - `Ignore`: Exclude segments from final output
314
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
315
+ uses chunkr's own VLM models and is not configurable via LLM processing
316
+ configuration.
317
+ - `extended_context` uses the full page image as context for LLM generation.
318
+ """
@@ -48,6 +48,24 @@ class SegmentProcessingParam(TypedDict, total=False):
48
48
  - `extended_context` uses the full page image as context for LLM generation.
49
49
  """
50
50
 
51
+ form_region: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="FormRegion")]
52
+ """Controls the processing and generation for the segment.
53
+
54
+ - `crop_image` controls whether to crop the file's images to the segment's
55
+ bounding box. The cropped image will be stored in the segment's `image` field.
56
+ Use `All` to always crop, or `Auto` to only crop when needed for
57
+ post-processing.
58
+ - `format` specifies the output format: `Html` or `Markdown`
59
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
60
+ - `Auto`: Process content automatically
61
+ - `LLM`: Use large language models for processing
62
+ - `Ignore`: Exclude segments from final output
63
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
64
+ uses chunkr's own VLM models and is not configurable via LLM processing
65
+ configuration.
66
+ - `extended_context` uses the full page image as context for LLM generation.
67
+ """
68
+
51
69
  formula: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Formula")]
52
70
  """Controls the processing and generation for the segment.
53
71
 
@@ -66,6 +84,60 @@ class SegmentProcessingParam(TypedDict, total=False):
66
84
  - `extended_context` uses the full page image as context for LLM generation.
67
85
  """
68
86
 
87
+ graphical_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="GraphicalItem")]
88
+ """Controls the processing and generation for the segment.
89
+
90
+ - `crop_image` controls whether to crop the file's images to the segment's
91
+ bounding box. The cropped image will be stored in the segment's `image` field.
92
+ Use `All` to always crop, or `Auto` to only crop when needed for
93
+ post-processing.
94
+ - `format` specifies the output format: `Html` or `Markdown`
95
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
96
+ - `Auto`: Process content automatically
97
+ - `LLM`: Use large language models for processing
98
+ - `Ignore`: Exclude segments from final output
99
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
100
+ uses chunkr's own VLM models and is not configurable via LLM processing
101
+ configuration.
102
+ - `extended_context` uses the full page image as context for LLM generation.
103
+ """
104
+
105
+ legend: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Legend")]
106
+ """Controls the processing and generation for the segment.
107
+
108
+ - `crop_image` controls whether to crop the file's images to the segment's
109
+ bounding box. The cropped image will be stored in the segment's `image` field.
110
+ Use `All` to always crop, or `Auto` to only crop when needed for
111
+ post-processing.
112
+ - `format` specifies the output format: `Html` or `Markdown`
113
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
114
+ - `Auto`: Process content automatically
115
+ - `LLM`: Use large language models for processing
116
+ - `Ignore`: Exclude segments from final output
117
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
118
+ uses chunkr's own VLM models and is not configurable via LLM processing
119
+ configuration.
120
+ - `extended_context` uses the full page image as context for LLM generation.
121
+ """
122
+
123
+ line_number: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="LineNumber")]
124
+ """Controls the processing and generation for the segment.
125
+
126
+ - `crop_image` controls whether to crop the file's images to the segment's
127
+ bounding box. The cropped image will be stored in the segment's `image` field.
128
+ Use `All` to always crop, or `Auto` to only crop when needed for
129
+ post-processing.
130
+ - `format` specifies the output format: `Html` or `Markdown`
131
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
132
+ - `Auto`: Process content automatically
133
+ - `LLM`: Use large language models for processing
134
+ - `Ignore`: Exclude segments from final output
135
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
136
+ uses chunkr's own VLM models and is not configurable via LLM processing
137
+ configuration.
138
+ - `extended_context` uses the full page image as context for LLM generation.
139
+ """
140
+
69
141
  list_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="ListItem")]
70
142
  """Controls the processing and generation for the segment.
71
143
 
@@ -138,7 +210,7 @@ class SegmentProcessingParam(TypedDict, total=False):
138
210
  - `extended_context` uses the full page image as context for LLM generation.
139
211
  """
140
212
 
141
- picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
213
+ page_number: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageNumber")]
142
214
  """Controls the processing and generation for the segment.
143
215
 
144
216
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -156,7 +228,7 @@ class SegmentProcessingParam(TypedDict, total=False):
156
228
  - `extended_context` uses the full page image as context for LLM generation.
157
229
  """
158
230
 
159
- section_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="SectionHeader")]
231
+ picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
160
232
  """Controls the processing and generation for the segment.
161
233
 
162
234
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -227,3 +299,21 @@ class SegmentProcessingParam(TypedDict, total=False):
227
299
  configuration.
228
300
  - `extended_context` uses the full page image as context for LLM generation.
229
301
  """
302
+
303
+ unknown: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Unknown")]
304
+ """Controls the processing and generation for the segment.
305
+
306
+ - `crop_image` controls whether to crop the file's images to the segment's
307
+ bounding box. The cropped image will be stored in the segment's `image` field.
308
+ Use `All` to always crop, or `Auto` to only crop when needed for
309
+ post-processing.
310
+ - `format` specifies the output format: `Html` or `Markdown`
311
+ - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
312
+ - `Auto`: Process content automatically
313
+ - `LLM`: Use large language models for processing
314
+ - `Ignore`: Exclude segments from final output
315
+ - `description` enables LLM-generated descriptions for segments. **Note:** This
316
+ uses chunkr's own VLM models and is not configurable via LLM processing
317
+ configuration.
318
+ - `extended_context` uses the full page image as context for LLM generation.
319
+ """
@@ -20,6 +20,12 @@ Output: TypeAlias = Union[ParseOutputResponse, ExtractOutputResponse, None]
20
20
 
21
21
 
22
22
  class TaskResponse(BaseModel):
23
+ completed: bool
24
+ """True when the task reaches a terminal state i.e.
25
+
26
+ `status` is `Succeeded` or `Failed` or `Cancelled`
27
+ """
28
+
23
29
  configuration: Configuration
24
30
  """
25
31
  Unified configuration type that can represent either parse or extract
@@ -58,8 +64,8 @@ class TaskResponse(BaseModel):
58
64
  output: Optional[Output] = None
59
65
  """Unified output type that can represent either parse or extract results"""
60
66
 
61
- source_task_id: Optional[str] = None
62
- """The ID of the source task that was used for the task"""
67
+ parse_task_id: Optional[str] = None
68
+ """The ID of the source `parse` task that was used for the task"""
63
69
 
64
70
  started_at: Optional[datetime] = None
65
71
  """The date and time when the task was started."""
@@ -14,6 +14,12 @@ __all__ = ["ExtractCreateResponse"]
14
14
 
15
15
 
16
16
  class ExtractCreateResponse(BaseModel):
17
+ completed: bool
18
+ """True when the task reaches a terminal state i.e.
19
+
20
+ `status` is `Succeeded` or `Failed` or `Cancelled`
21
+ """
22
+
17
23
  configuration: ExtractConfiguration
18
24
 
19
25
  created_at: datetime
@@ -57,7 +63,7 @@ class ExtractCreateResponse(BaseModel):
57
63
  for that field.
58
64
  """
59
65
 
60
- source_task_id: Optional[str] = None
66
+ parse_task_id: Optional[str] = None
61
67
  """The ID of the source `parse` task that was used for extraction"""
62
68
 
63
69
  started_at: Optional[datetime] = None
@@ -14,6 +14,12 @@ __all__ = ["ExtractGetResponse"]
14
14
 
15
15
 
16
16
  class ExtractGetResponse(BaseModel):
17
+ completed: bool
18
+ """True when the task reaches a terminal state i.e.
19
+
20
+ `status` is `Succeeded` or `Failed` or `Cancelled`
21
+ """
22
+
17
23
  configuration: ExtractConfiguration
18
24
 
19
25
  created_at: datetime
@@ -57,7 +63,7 @@ class ExtractGetResponse(BaseModel):
57
63
  for that field.
58
64
  """
59
65
 
60
- source_task_id: Optional[str] = None
66
+ parse_task_id: Optional[str] = None
61
67
  """The ID of the source `parse` task that was used for extraction"""
62
68
 
63
69
  started_at: Optional[datetime] = None
@@ -5,7 +5,6 @@ from __future__ import annotations
5
5
  from typing import Optional
6
6
  from typing_extensions import Literal, Required, TypedDict
7
7
 
8
- from ..llm_processing_param import LlmProcessingParam
9
8
  from ..chunk_processing_param import ChunkProcessingParam
10
9
  from ..segment_processing_param import SegmentProcessingParam
11
10
 
@@ -42,9 +41,6 @@ class ParseCreateParams(TypedDict, total=False):
42
41
  file_name: Optional[str]
43
42
  """The name of the file to be parsed. If not set a name will be generated."""
44
43
 
45
- llm_processing: LlmProcessingParam
46
- """Controls the LLM used for the task."""
47
-
48
44
  ocr_strategy: Literal["All", "Auto"]
49
45
  """Controls the Optical Character Recognition (OCR) strategy.
50
46
 
@@ -14,6 +14,12 @@ __all__ = ["ParseCreateResponse"]
14
14
 
15
15
 
16
16
  class ParseCreateResponse(BaseModel):
17
+ completed: bool
18
+ """True when the task reaches a terminal state i.e.
19
+
20
+ `status` is `Succeeded` or `Failed` or `Cancelled`
21
+ """
22
+
17
23
  configuration: ParseConfiguration
18
24
 
19
25
  created_at: datetime
@@ -14,6 +14,12 @@ __all__ = ["ParseGetResponse"]
14
14
 
15
15
 
16
16
  class ParseGetResponse(BaseModel):
17
+ completed: bool
18
+ """True when the task reaches a terminal state i.e.
19
+
20
+ `status` is `Succeeded` or `Failed` or `Cancelled`
21
+ """
22
+
17
23
  configuration: ParseConfiguration
18
24
 
19
25
  created_at: datetime
@@ -20,7 +20,7 @@ class ClientVersionGeneratedSDK(BaseModel):
20
20
  """Version of the auto-generated SDK"""
21
21
 
22
22
 
23
- ClientVersion: TypeAlias = Union[Literal["Legacy"], ClientVersionManualSDK, ClientVersionGeneratedSDK]
23
+ ClientVersion: TypeAlias = Union[Literal["Legacy", "Unspecified"], ClientVersionManualSDK, ClientVersionGeneratedSDK]
24
24
 
25
25
 
26
26
  class VersionInfo(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chunkr-ai
3
- Version: 0.1.0a11
3
+ Version: 0.1.0a12
4
4
  Summary: The official Python library for the chunkr API
5
5
  Project-URL: Homepage, https://github.com/lumina-ai-inc/chunkr-python
6
6
  Project-URL: Repository, https://github.com/lumina-ai-inc/chunkr-python
@@ -11,7 +11,7 @@ chunkr_ai/_resource.py,sha256=f5tiwjxcKdbeMor8idoHtMFTUhqD9yc2xXtq5rqeLLk,1100
11
11
  chunkr_ai/_response.py,sha256=xXNpF53hiYARmAW7npKuxQ5UHAEjgAzm7ME_L3eIstY,28800
12
12
  chunkr_ai/_streaming.py,sha256=ZmyrVWk7-AWkLAATR55WgNxnyFzYmaqJt2LthA_PTqQ,10100
13
13
  chunkr_ai/_types.py,sha256=nzD_EEP9CVutLcSeuKLO6Mpn5cd_R0Vo0dEM7AWa7yY,7239
14
- chunkr_ai/_version.py,sha256=O_LWqHa5Todq2XeeCDSW_cu4p1oubeyQ-SQUznjufns,170
14
+ chunkr_ai/_version.py,sha256=qDyA1DMpmqGoQPNBAzGD_frtGPh6Bee1q-GXz3_l60c,170
15
15
  chunkr_ai/pagination.py,sha256=bT-ErcJ80YlKBV6tWq2s9uqg-wv7o66SKe_AgUAGrKc,3533
16
16
  chunkr_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  chunkr_ai/_utils/__init__.py,sha256=7fch0GT9zpNnErbciSpUNa-SjTxxjY6kxHxKMOM4AGs,2305
@@ -33,9 +33,9 @@ chunkr_ai/resources/health.py,sha256=sLA4PSAf-4JK1Lrqb0TApQ0Hc5Q8fZzbKQXzA3bNEdQ
33
33
  chunkr_ai/resources/webhooks.py,sha256=RhteI3ahE2rGSDEMUZH0HDBTOQqWS8sZ5D00ErKWnpE,7006
34
34
  chunkr_ai/resources/tasks/__init__.py,sha256=wDCnwtnpTfiaLg7NBxoLZYh2TtOw44_DSqtJa_TjmXU,1439
35
35
  chunkr_ai/resources/tasks/extract.py,sha256=LQJTmYItqUu60G0hYfdw_nyMLARyxqzjVO6ETIn8hDo,14980
36
- chunkr_ai/resources/tasks/parse.py,sha256=bQcA-zOQvTN9_OiqmzILsJLQqJtONU5XbNZQMAm-0yk,19374
36
+ chunkr_ai/resources/tasks/parse.py,sha256=hbFEFu-tU6RWktA0Tv6KP0HyeLdS62kO4UoQfbSsAJc,18963
37
37
  chunkr_ai/resources/tasks/tasks.py,sha256=W9bXpfgE56MkM5buBYg5-dcPYGFo_CzgVkr1kOOpXtQ,23582
38
- chunkr_ai/types/__init__.py,sha256=vBUEzAt6cblUc44h8NivvVrua_CmaIujTvjvf8B0_S0,2499
38
+ chunkr_ai/types/__init__.py,sha256=gO4mkpo_tYfe1PGSc0Uzlc8rZr9PTmHshGZFzmG98mM,2365
39
39
  chunkr_ai/types/bounding_box.py,sha256=JDZlhJJl4lg6RYGf8VpC46soQfQ10-K8YwHHA6XBFkM,431
40
40
  chunkr_ai/types/cell.py,sha256=D-S_XAzmOJs0Lo2RgY7T3h_ChdhSrRrI8IN4qo2sGOU,1143
41
41
  chunkr_ai/types/cell_style.py,sha256=VqSz6pZ7rjmHYrq_S63IOFPaWvXjWhNUIGc3V4UlF6U,873
@@ -47,7 +47,7 @@ chunkr_ai/types/extract_configuration.py,sha256=OCs3SnuS8qXWB926o8Gv1Y2AuNszplGm
47
47
  chunkr_ai/types/extract_output_response.py,sha256=kfkHbeEVl3x3t-7u4h4Cd4wC5KbrOjz4-joP5RV1WyA,1272
48
48
  chunkr_ai/types/file.py,sha256=kOxR0g-3A-qOxz2cjuTcq0wFMqPoph9uQuLYQ56zb-c,718
49
49
  chunkr_ai/types/file_create_params.py,sha256=_1Dr3FlO9BOv6gzhCN4g46_otCBqEdLe0mnxpdaRPaE,468
50
- chunkr_ai/types/file_info.py,sha256=78r0AITTY2nlZyW9pHy7dywVXyOsCK1Ysmmc-wfQBVM,499
50
+ chunkr_ai/types/file_info.py,sha256=y5kVR3mPeiwsPBMc3IBAigQbkVRpGQN09IpDPdvCe7E,611
51
51
  chunkr_ai/types/file_list_params.py,sha256=oJGTf88aAxBhNfmQDbxGT63b95HdSbMXUubKjXM22_U,822
52
52
  chunkr_ai/types/file_url.py,sha256=L434WnOXkNmt59dJiaAgT1_3pN3BIsxm2q14zHQK6xY,365
53
53
  chunkr_ai/types/file_url_params.py,sha256=ZHfKiy_6B25StdDemulavGcsPggNNMKLWf6KN7xfPTY,413
@@ -55,34 +55,32 @@ chunkr_ai/types/files_list_response.py,sha256=ggSRWhTzZWjcDXxStyCzrYICXXB5TqnL2j
55
55
  chunkr_ai/types/generation_config.py,sha256=9gfwdd228x29jC1egxq3IreKwgkGZCjSWHCXIkzQwqE,958
56
56
  chunkr_ai/types/generation_config_param.py,sha256=9E0Mhee-NInwOzjXmq3gpd8G5drsPBpzFs0AA2ywTc0,960
57
57
  chunkr_ai/types/health_check_response.py,sha256=6Zn5YYHCQf2RgMjDlf39mtiTPqfaBfC9Vv599U_rKCI,200
58
- chunkr_ai/types/llm_processing.py,sha256=cTm5LBBCpnmA4u-nGQdO67JYCTizzf4lJAvUsG6BX2Q,1095
59
- chunkr_ai/types/llm_processing_param.py,sha256=CSnW4-5-32Pzoo-G7G3p_NUvljtCkNguj1dHVc2Y4cA,1135
60
- chunkr_ai/types/ocr_result.py,sha256=EdIvpuccQ_8A8ml7yVCOEOfBoewgwTBzVJZ_les9udM,740
58
+ chunkr_ai/types/ocr_result.py,sha256=W3piXLotfmZ40FJrJYMuS72shaVYLsKeN6jCf26uZGI,743
61
59
  chunkr_ai/types/page.py,sha256=ADdGJisS-GxBD_wdu3q1pmikgJ7twFsP0choDEXw9ro,690
62
- chunkr_ai/types/parse_configuration.py,sha256=MOTypSv3kiwoLEcjAgsyc674CMG4xw6SxXEXweFU6V4,2567
63
- chunkr_ai/types/parse_configuration_param.py,sha256=hJWngWEtT3RYgHqkAvmS4RG7cODla6i-Cp8ubtZfmz0,2544
60
+ chunkr_ai/types/parse_configuration.py,sha256=WcUUk7ai0sHTeWUQYHyDn1ZjYqT7dzLqRWjGx5zFfsg,2427
61
+ chunkr_ai/types/parse_configuration_param.py,sha256=dl884XkCnloSS9YMk8UnUm7Z963HiAzNy5qgtCSIPH8,2405
64
62
  chunkr_ai/types/parse_output_response.py,sha256=KfRFY5PnchJfEWr4jy3Dd-3AWeImGE5BP_NMFC5I6_c,947
65
- chunkr_ai/types/segment.py,sha256=_QNYmAMSZOCNv67CwsmIOq7X0ZeYsj6jzQbKhbnoTrA,3096
66
- chunkr_ai/types/segment_processing.py,sha256=0-b4nSoLeGsMKSVw5LPQFXnn-PoVvIJ0wFfSFEOTpsw,12115
67
- chunkr_ai/types/segment_processing_param.py,sha256=fVbvFSzmxLPiQoPniFwqqXlbo6fRaZwzZ_TbnwYVOss,12245
63
+ chunkr_ai/types/segment.py,sha256=KsGNynGQq7s55EHuPLY7glmvXunv2Wszhx5FhkhkN5U,3093
64
+ chunkr_ai/types/segment_processing.py,sha256=uyNbxp7DsgTgBHoS9ELoyW8j-aTBbOshxYrb-TQ990E,17049
65
+ chunkr_ai/types/segment_processing_param.py,sha256=a1Zk8NiaeFNSbHw5P8Usq-17mqENuZdCOQCn8nNN0o0,17199
68
66
  chunkr_ai/types/task_extract_updated_webhook_event.py,sha256=YYHDQEs4wg2bDgGXgHUgX_CwSLFxePJZrT5OV4J6Mhk,640
69
67
  chunkr_ai/types/task_get_params.py,sha256=Nx2luhebcoaiuRln4KP4FarWvBPd1OYi__efi56zHPM,460
70
68
  chunkr_ai/types/task_list_params.py,sha256=NySdOH1mIhZAJvcHr9xm2PeODsCO05lJMsrAiGGBKNE,1275
71
69
  chunkr_ai/types/task_parse_updated_webhook_event.py,sha256=3NsfEpJr_bfFB3Y66elraSxk0FS76c60BLUmhqmU9Vc,636
72
- chunkr_ai/types/task_response.py,sha256=I53d8JdZxbg3p-AUnigISmwFFt5DILnm0gyOc2wRu28,2191
70
+ chunkr_ai/types/task_response.py,sha256=RgyRaZK8TKjVfMSvsI10LYAv61QY2-195Tg3op8yCGo,2339
73
71
  chunkr_ai/types/unwrap_webhook_event.py,sha256=G23CLp0__AMlfM2xE11ugnDxN82uiG0Xru0p-pI9VHQ,442
74
- chunkr_ai/types/version_info.py,sha256=1qYIitGOB7aeOI7gWFfX6cAp2yw5pMzARiTtNl5NNDc,902
72
+ chunkr_ai/types/version_info.py,sha256=MVSU2Z9ATehyc1IgVExczvcP_yH7wYc1UV_BwXeF0UA,917
75
73
  chunkr_ai/types/webhook_url_response.py,sha256=q7VsWGOLqVfA_ctdcrbynQJVbfCGh1rHlXZsDc-9Sus,205
76
74
  chunkr_ai/types/tasks/__init__.py,sha256=AEF_lM5YdEvz8_7fcX0HHnVvsXdC8Hcsb2Cs-LzRBK4,711
77
75
  chunkr_ai/types/tasks/extract_create_params.py,sha256=IV5TrFqJAGFf4w_uH3hqWlbEySlAOC_2QzwKZ-3oM6o,1376
78
- chunkr_ai/types/tasks/extract_create_response.py,sha256=Umbm40i0Q6Oz3vX9WwTeOXwKU2g8ddZmFhRwl-5Rm5U,2147
76
+ chunkr_ai/types/tasks/extract_create_response.py,sha256=goc8x-L3W0hJowb4PvXigc2o_p15JL0a2ESo9Geg9xc,2287
79
77
  chunkr_ai/types/tasks/extract_get_params.py,sha256=AsJvXHvdDnIcVOvTK9gCeiMFk4wckuv19IXIJcqpqso,466
80
- chunkr_ai/types/tasks/extract_get_response.py,sha256=FWXfS3Qobg3k6seWbbXJZ5y4N_mtJbHsN_FeZV5_ieM,2141
81
- chunkr_ai/types/tasks/parse_create_params.py,sha256=xzWA5UX183-Lk-JDtWoV37-4pXpFGmQmS4DwSa7K8Ug,3097
82
- chunkr_ai/types/tasks/parse_create_response.py,sha256=asqeI2l6QbdtPhaQkxkbpSwVLe2HEFfeTPG5OX_xfus,1679
78
+ chunkr_ai/types/tasks/extract_get_response.py,sha256=sQgDLTwpE2w-xVcuXO7NAF_kjhLXv_0swdBJQRGQNSI,2281
79
+ chunkr_ai/types/tasks/parse_create_params.py,sha256=8ctOPP2QT-q_8zN8Fl8ene74ZGOUnR6EAA9XcvA_0p4,2957
80
+ chunkr_ai/types/tasks/parse_create_response.py,sha256=l9hj6qKs76-qKzjBtVeo9lRe9wRuYltsh2GFNPyfEDM,1820
83
81
  chunkr_ai/types/tasks/parse_get_params.py,sha256=Ca0C91k6ajNTMhtUkFMulgC6g8_wI7YLVGxsWiupiVA,462
84
- chunkr_ai/types/tasks/parse_get_response.py,sha256=-0_j1_5skabmAtmcK882jZGroVsBRxC_o5d6pg31bJY,1673
85
- chunkr_ai-0.1.0a11.dist-info/METADATA,sha256=-aO85KHXxUtKZc2UZdKBe_pIWnljQ8wOj02ug2oVGCE,16493
86
- chunkr_ai-0.1.0a11.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
87
- chunkr_ai-0.1.0a11.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
88
- chunkr_ai-0.1.0a11.dist-info/RECORD,,
82
+ chunkr_ai/types/tasks/parse_get_response.py,sha256=2IoZeN8BAxQEtxRq6CCA_d9nWPvCQbp71zMzaaKPlug,1814
83
+ chunkr_ai-0.1.0a12.dist-info/METADATA,sha256=HUM4LJTDsqX9zgI_0QgVqHbZbHlpxwENau7AVKmVslg,16493
84
+ chunkr_ai-0.1.0a12.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
85
+ chunkr_ai-0.1.0a12.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
86
+ chunkr_ai-0.1.0a12.dist-info/RECORD,,
@@ -1,36 +0,0 @@
1
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
-
3
- from typing import Union, Optional
4
- from typing_extensions import Literal, TypeAlias
5
-
6
- from pydantic import Field as FieldInfo
7
-
8
- from .._models import BaseModel
9
-
10
- __all__ = ["LlmProcessing", "FallbackStrategy", "FallbackStrategyModel"]
11
-
12
-
13
- class FallbackStrategyModel(BaseModel):
14
- model: str = FieldInfo(alias="Model")
15
- """Use a specific model as fallback"""
16
-
17
-
18
- FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
19
-
20
-
21
- class LlmProcessing(BaseModel):
22
- fallback_strategy: Optional[FallbackStrategy] = None
23
- """The fallback strategy to use for the LLMs in the task."""
24
-
25
- llm_model_id: Optional[str] = None
26
- """The ID of the model to use for the task.
27
-
28
- If not provided, the default model will be used. Please check the documentation
29
- for the model you want to use.
30
- """
31
-
32
- max_completion_tokens: Optional[int] = None
33
- """The maximum number of tokens to generate."""
34
-
35
- temperature: Optional[float] = None
36
- """The temperature to use for the LLM."""
@@ -1,36 +0,0 @@
1
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Union, Optional
6
- from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
7
-
8
- from .._utils import PropertyInfo
9
-
10
- __all__ = ["LlmProcessingParam", "FallbackStrategy", "FallbackStrategyModel"]
11
-
12
-
13
- class FallbackStrategyModel(TypedDict, total=False):
14
- model: Required[Annotated[str, PropertyInfo(alias="Model")]]
15
- """Use a specific model as fallback"""
16
-
17
-
18
- FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
19
-
20
-
21
- class LlmProcessingParam(TypedDict, total=False):
22
- fallback_strategy: FallbackStrategy
23
- """The fallback strategy to use for the LLMs in the task."""
24
-
25
- llm_model_id: Optional[str]
26
- """The ID of the model to use for the task.
27
-
28
- If not provided, the default model will be used. Please check the documentation
29
- for the model you want to use.
30
- """
31
-
32
- max_completion_tokens: Optional[int]
33
- """The maximum number of tokens to generate."""
34
-
35
- temperature: float
36
- """The temperature to use for the LLM."""