chunkr-ai 0.1.0a10__py3-none-any.whl → 0.1.0a11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "chunkr_ai"
4
- __version__ = "0.1.0-alpha.10" # x-release-please-version
4
+ __version__ = "0.1.0-alpha.11" # x-release-please-version
@@ -19,6 +19,7 @@ from ..._response import (
19
19
  )
20
20
  from ...types.tasks import parse_get_params, parse_create_params
21
21
  from ..._base_client import make_request_options
22
+ from ...types.llm_processing_param import LlmProcessingParam
22
23
  from ...types.chunk_processing_param import ChunkProcessingParam
23
24
  from ...types.segment_processing_param import SegmentProcessingParam
24
25
  from ...types.tasks.parse_get_response import ParseGetResponse
@@ -55,6 +56,7 @@ class ParseResource(SyncAPIResource):
55
56
  error_handling: Literal["Fail", "Continue"] | Omit = omit,
56
57
  expires_in: Optional[int] | Omit = omit,
57
58
  file_name: Optional[str] | Omit = omit,
59
+ llm_processing: LlmProcessingParam | Omit = omit,
58
60
  ocr_strategy: Literal["All", "Auto"] | Omit = omit,
59
61
  pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
60
62
  segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
@@ -97,6 +99,8 @@ class ParseResource(SyncAPIResource):
97
99
 
98
100
  file_name: The name of the file to be parsed. If not set a name will be generated.
99
101
 
102
+ llm_processing: Controls the LLM used for the task.
103
+
100
104
  ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
101
105
 
102
106
  - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -146,6 +150,7 @@ class ParseResource(SyncAPIResource):
146
150
  "error_handling": error_handling,
147
151
  "expires_in": expires_in,
148
152
  "file_name": file_name,
153
+ "llm_processing": llm_processing,
149
154
  "ocr_strategy": ocr_strategy,
150
155
  "pipeline": pipeline,
151
156
  "segment_processing": segment_processing,
@@ -251,6 +256,7 @@ class AsyncParseResource(AsyncAPIResource):
251
256
  error_handling: Literal["Fail", "Continue"] | Omit = omit,
252
257
  expires_in: Optional[int] | Omit = omit,
253
258
  file_name: Optional[str] | Omit = omit,
259
+ llm_processing: LlmProcessingParam | Omit = omit,
254
260
  ocr_strategy: Literal["All", "Auto"] | Omit = omit,
255
261
  pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
256
262
  segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
@@ -293,6 +299,8 @@ class AsyncParseResource(AsyncAPIResource):
293
299
 
294
300
  file_name: The name of the file to be parsed. If not set a name will be generated.
295
301
 
302
+ llm_processing: Controls the LLM used for the task.
303
+
296
304
  ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
297
305
 
298
306
  - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -342,6 +350,7 @@ class AsyncParseResource(AsyncAPIResource):
342
350
  "error_handling": error_handling,
343
351
  "expires_in": expires_in,
344
352
  "file_name": file_name,
353
+ "llm_processing": llm_processing,
345
354
  "ocr_strategy": ocr_strategy,
346
355
  "pipeline": pipeline,
347
356
  "segment_processing": segment_processing,
@@ -15,6 +15,7 @@ from .ocr_result import OcrResult as OcrResult
15
15
  from .bounding_box import BoundingBox as BoundingBox
16
16
  from .version_info import VersionInfo as VersionInfo
17
17
  from .task_response import TaskResponse as TaskResponse
18
+ from .llm_processing import LlmProcessing as LlmProcessing
18
19
  from .file_url_params import FileURLParams as FileURLParams
19
20
  from .task_get_params import TaskGetParams as TaskGetParams
20
21
  from .chunk_processing import ChunkProcessing as ChunkProcessing
@@ -25,6 +26,7 @@ from .file_create_params import FileCreateParams as FileCreateParams
25
26
  from .segment_processing import SegmentProcessing as SegmentProcessing
26
27
  from .files_list_response import FilesListResponse as FilesListResponse
27
28
  from .parse_configuration import ParseConfiguration as ParseConfiguration
29
+ from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
28
30
  from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
29
31
  from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
30
32
  from .extract_configuration import ExtractConfiguration as ExtractConfiguration
@@ -19,6 +19,3 @@ class FileInfo(BaseModel):
19
19
 
20
20
  page_count: Optional[int] = None
21
21
  """The number of pages in the file."""
22
-
23
- ss_cell_count: Optional[int] = None
24
- """The number of cells in the file. Only used for spreadsheets."""
@@ -0,0 +1,36 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Union, Optional
4
+ from typing_extensions import Literal, TypeAlias
5
+
6
+ from pydantic import Field as FieldInfo
7
+
8
+ from .._models import BaseModel
9
+
10
+ __all__ = ["LlmProcessing", "FallbackStrategy", "FallbackStrategyModel"]
11
+
12
+
13
+ class FallbackStrategyModel(BaseModel):
14
+ model: str = FieldInfo(alias="Model")
15
+ """Use a specific model as fallback"""
16
+
17
+
18
+ FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
19
+
20
+
21
+ class LlmProcessing(BaseModel):
22
+ fallback_strategy: Optional[FallbackStrategy] = None
23
+ """The fallback strategy to use for the LLMs in the task."""
24
+
25
+ llm_model_id: Optional[str] = None
26
+ """The ID of the model to use for the task.
27
+
28
+ If not provided, the default model will be used. Please check the documentation
29
+ for the model you want to use.
30
+ """
31
+
32
+ max_completion_tokens: Optional[int] = None
33
+ """The maximum number of tokens to generate."""
34
+
35
+ temperature: Optional[float] = None
36
+ """The temperature to use for the LLM."""
@@ -0,0 +1,36 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Union, Optional
6
+ from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
7
+
8
+ from .._utils import PropertyInfo
9
+
10
+ __all__ = ["LlmProcessingParam", "FallbackStrategy", "FallbackStrategyModel"]
11
+
12
+
13
+ class FallbackStrategyModel(TypedDict, total=False):
14
+ model: Required[Annotated[str, PropertyInfo(alias="Model")]]
15
+ """Use a specific model as fallback"""
16
+
17
+
18
+ FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
19
+
20
+
21
+ class LlmProcessingParam(TypedDict, total=False):
22
+ fallback_strategy: FallbackStrategy
23
+ """The fallback strategy to use for the LLMs in the task."""
24
+
25
+ llm_model_id: Optional[str]
26
+ """The ID of the model to use for the task.
27
+
28
+ If not provided, the default model will be used. Please check the documentation
29
+ for the model you want to use.
30
+ """
31
+
32
+ max_completion_tokens: Optional[int]
33
+ """The maximum number of tokens to generate."""
34
+
35
+ temperature: float
36
+ """The temperature to use for the LLM."""
@@ -15,14 +15,14 @@ class OcrResult(BaseModel):
15
15
  text: str
16
16
  """The recognized text of the OCR result."""
17
17
 
18
+ cell_ref: Optional[str] = None
19
+ """
20
+ Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
21
+ spreadsheet cell
22
+ """
23
+
18
24
  confidence: Optional[float] = None
19
25
  """The confidence score of the recognized text."""
20
26
 
21
27
  ocr_id: Optional[str] = None
22
28
  """The unique identifier for the OCR result."""
23
-
24
- ss_cell_ref: Optional[str] = None
25
- """
26
- Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
27
- spreadsheet cell
28
- """
@@ -4,6 +4,7 @@ from typing import Optional
4
4
  from typing_extensions import Literal
5
5
 
6
6
  from .._models import BaseModel
7
+ from .llm_processing import LlmProcessing
7
8
  from .chunk_processing import ChunkProcessing
8
9
  from .segment_processing import SegmentProcessing
9
10
 
@@ -22,6 +23,9 @@ class ParseConfiguration(BaseModel):
22
23
  LLM refusals etc.)
23
24
  """
24
25
 
26
+ llm_processing: Optional[LlmProcessing] = None
27
+ """Controls the LLM used for the task."""
28
+
25
29
  ocr_strategy: Optional[Literal["All", "Auto"]] = None
26
30
  """Controls the Optical Character Recognition (OCR) strategy.
27
31
 
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  from typing import Optional
6
6
  from typing_extensions import Literal, TypedDict
7
7
 
8
+ from .llm_processing_param import LlmProcessingParam
8
9
  from .chunk_processing_param import ChunkProcessingParam
9
10
  from .segment_processing_param import SegmentProcessingParam
10
11
 
@@ -23,6 +24,9 @@ class ParseConfigurationParam(TypedDict, total=False):
23
24
  LLM refusals etc.)
24
25
  """
25
26
 
27
+ llm_processing: LlmProcessingParam
28
+ """Controls the LLM used for the task."""
29
+
26
30
  ocr_strategy: Literal["All", "Auto"]
27
31
  """Controls the Optical Character Recognition (OCR) strategy.
28
32
 
@@ -31,23 +31,20 @@ class Segment(BaseModel):
31
31
  "Caption",
32
32
  "Footnote",
33
33
  "Formula",
34
- "FormRegion",
35
- "GraphicalItem",
36
- "Legend",
37
- "LineNumber",
38
34
  "ListItem",
39
35
  "Page",
40
36
  "PageFooter",
41
37
  "PageHeader",
42
- "PageNumber",
43
38
  "Picture",
39
+ "SectionHeader",
44
40
  "Table",
45
41
  "Text",
46
42
  "Title",
47
- "Unknown",
48
- "SectionHeader",
49
43
  ]
50
- """All the possible types for a segment."""
44
+ """
45
+ All the possible types for a segment. Note: Different configurations will
46
+ produce different types. Please refer to the documentation for more information.
47
+ """
51
48
 
52
49
  confidence: Optional[float] = None
53
50
  """Confidence score of the layout analysis model"""
@@ -47,24 +47,6 @@ class SegmentProcessing(BaseModel):
47
47
  - `extended_context` uses the full page image as context for LLM generation.
48
48
  """
49
49
 
50
- form_region: Optional[GenerationConfig] = FieldInfo(alias="FormRegion", default=None)
51
- """Controls the processing and generation for the segment.
52
-
53
- - `crop_image` controls whether to crop the file's images to the segment's
54
- bounding box. The cropped image will be stored in the segment's `image` field.
55
- Use `All` to always crop, or `Auto` to only crop when needed for
56
- post-processing.
57
- - `format` specifies the output format: `Html` or `Markdown`
58
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
59
- - `Auto`: Process content automatically
60
- - `LLM`: Use large language models for processing
61
- - `Ignore`: Exclude segments from final output
62
- - `description` enables LLM-generated descriptions for segments. **Note:** This
63
- uses chunkr's own VLM models and is not configurable via LLM processing
64
- configuration.
65
- - `extended_context` uses the full page image as context for LLM generation.
66
- """
67
-
68
50
  formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
69
51
  """Controls the processing and generation for the segment.
70
52
 
@@ -83,60 +65,6 @@ class SegmentProcessing(BaseModel):
83
65
  - `extended_context` uses the full page image as context for LLM generation.
84
66
  """
85
67
 
86
- graphical_item: Optional[GenerationConfig] = FieldInfo(alias="GraphicalItem", default=None)
87
- """Controls the processing and generation for the segment.
88
-
89
- - `crop_image` controls whether to crop the file's images to the segment's
90
- bounding box. The cropped image will be stored in the segment's `image` field.
91
- Use `All` to always crop, or `Auto` to only crop when needed for
92
- post-processing.
93
- - `format` specifies the output format: `Html` or `Markdown`
94
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
95
- - `Auto`: Process content automatically
96
- - `LLM`: Use large language models for processing
97
- - `Ignore`: Exclude segments from final output
98
- - `description` enables LLM-generated descriptions for segments. **Note:** This
99
- uses chunkr's own VLM models and is not configurable via LLM processing
100
- configuration.
101
- - `extended_context` uses the full page image as context for LLM generation.
102
- """
103
-
104
- legend: Optional[GenerationConfig] = FieldInfo(alias="Legend", default=None)
105
- """Controls the processing and generation for the segment.
106
-
107
- - `crop_image` controls whether to crop the file's images to the segment's
108
- bounding box. The cropped image will be stored in the segment's `image` field.
109
- Use `All` to always crop, or `Auto` to only crop when needed for
110
- post-processing.
111
- - `format` specifies the output format: `Html` or `Markdown`
112
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
113
- - `Auto`: Process content automatically
114
- - `LLM`: Use large language models for processing
115
- - `Ignore`: Exclude segments from final output
116
- - `description` enables LLM-generated descriptions for segments. **Note:** This
117
- uses chunkr's own VLM models and is not configurable via LLM processing
118
- configuration.
119
- - `extended_context` uses the full page image as context for LLM generation.
120
- """
121
-
122
- line_number: Optional[GenerationConfig] = FieldInfo(alias="LineNumber", default=None)
123
- """Controls the processing and generation for the segment.
124
-
125
- - `crop_image` controls whether to crop the file's images to the segment's
126
- bounding box. The cropped image will be stored in the segment's `image` field.
127
- Use `All` to always crop, or `Auto` to only crop when needed for
128
- post-processing.
129
- - `format` specifies the output format: `Html` or `Markdown`
130
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
131
- - `Auto`: Process content automatically
132
- - `LLM`: Use large language models for processing
133
- - `Ignore`: Exclude segments from final output
134
- - `description` enables LLM-generated descriptions for segments. **Note:** This
135
- uses chunkr's own VLM models and is not configurable via LLM processing
136
- configuration.
137
- - `extended_context` uses the full page image as context for LLM generation.
138
- """
139
-
140
68
  list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
141
69
  """Controls the processing and generation for the segment.
142
70
 
@@ -209,7 +137,7 @@ class SegmentProcessing(BaseModel):
209
137
  - `extended_context` uses the full page image as context for LLM generation.
210
138
  """
211
139
 
212
- page_number: Optional[GenerationConfig] = FieldInfo(alias="PageNumber", default=None)
140
+ picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
213
141
  """Controls the processing and generation for the segment.
214
142
 
215
143
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -227,7 +155,7 @@ class SegmentProcessing(BaseModel):
227
155
  - `extended_context` uses the full page image as context for LLM generation.
228
156
  """
229
157
 
230
- picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
158
+ section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
231
159
  """Controls the processing and generation for the segment.
232
160
 
233
161
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -298,21 +226,3 @@ class SegmentProcessing(BaseModel):
298
226
  configuration.
299
227
  - `extended_context` uses the full page image as context for LLM generation.
300
228
  """
301
-
302
- unknown: Optional[GenerationConfig] = FieldInfo(alias="Unknown", default=None)
303
- """Controls the processing and generation for the segment.
304
-
305
- - `crop_image` controls whether to crop the file's images to the segment's
306
- bounding box. The cropped image will be stored in the segment's `image` field.
307
- Use `All` to always crop, or `Auto` to only crop when needed for
308
- post-processing.
309
- - `format` specifies the output format: `Html` or `Markdown`
310
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
311
- - `Auto`: Process content automatically
312
- - `LLM`: Use large language models for processing
313
- - `Ignore`: Exclude segments from final output
314
- - `description` enables LLM-generated descriptions for segments. **Note:** This
315
- uses chunkr's own VLM models and is not configurable via LLM processing
316
- configuration.
317
- - `extended_context` uses the full page image as context for LLM generation.
318
- """
@@ -48,24 +48,6 @@ class SegmentProcessingParam(TypedDict, total=False):
48
48
  - `extended_context` uses the full page image as context for LLM generation.
49
49
  """
50
50
 
51
- form_region: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="FormRegion")]
52
- """Controls the processing and generation for the segment.
53
-
54
- - `crop_image` controls whether to crop the file's images to the segment's
55
- bounding box. The cropped image will be stored in the segment's `image` field.
56
- Use `All` to always crop, or `Auto` to only crop when needed for
57
- post-processing.
58
- - `format` specifies the output format: `Html` or `Markdown`
59
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
60
- - `Auto`: Process content automatically
61
- - `LLM`: Use large language models for processing
62
- - `Ignore`: Exclude segments from final output
63
- - `description` enables LLM-generated descriptions for segments. **Note:** This
64
- uses chunkr's own VLM models and is not configurable via LLM processing
65
- configuration.
66
- - `extended_context` uses the full page image as context for LLM generation.
67
- """
68
-
69
51
  formula: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Formula")]
70
52
  """Controls the processing and generation for the segment.
71
53
 
@@ -84,60 +66,6 @@ class SegmentProcessingParam(TypedDict, total=False):
84
66
  - `extended_context` uses the full page image as context for LLM generation.
85
67
  """
86
68
 
87
- graphical_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="GraphicalItem")]
88
- """Controls the processing and generation for the segment.
89
-
90
- - `crop_image` controls whether to crop the file's images to the segment's
91
- bounding box. The cropped image will be stored in the segment's `image` field.
92
- Use `All` to always crop, or `Auto` to only crop when needed for
93
- post-processing.
94
- - `format` specifies the output format: `Html` or `Markdown`
95
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
96
- - `Auto`: Process content automatically
97
- - `LLM`: Use large language models for processing
98
- - `Ignore`: Exclude segments from final output
99
- - `description` enables LLM-generated descriptions for segments. **Note:** This
100
- uses chunkr's own VLM models and is not configurable via LLM processing
101
- configuration.
102
- - `extended_context` uses the full page image as context for LLM generation.
103
- """
104
-
105
- legend: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Legend")]
106
- """Controls the processing and generation for the segment.
107
-
108
- - `crop_image` controls whether to crop the file's images to the segment's
109
- bounding box. The cropped image will be stored in the segment's `image` field.
110
- Use `All` to always crop, or `Auto` to only crop when needed for
111
- post-processing.
112
- - `format` specifies the output format: `Html` or `Markdown`
113
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
114
- - `Auto`: Process content automatically
115
- - `LLM`: Use large language models for processing
116
- - `Ignore`: Exclude segments from final output
117
- - `description` enables LLM-generated descriptions for segments. **Note:** This
118
- uses chunkr's own VLM models and is not configurable via LLM processing
119
- configuration.
120
- - `extended_context` uses the full page image as context for LLM generation.
121
- """
122
-
123
- line_number: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="LineNumber")]
124
- """Controls the processing and generation for the segment.
125
-
126
- - `crop_image` controls whether to crop the file's images to the segment's
127
- bounding box. The cropped image will be stored in the segment's `image` field.
128
- Use `All` to always crop, or `Auto` to only crop when needed for
129
- post-processing.
130
- - `format` specifies the output format: `Html` or `Markdown`
131
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
132
- - `Auto`: Process content automatically
133
- - `LLM`: Use large language models for processing
134
- - `Ignore`: Exclude segments from final output
135
- - `description` enables LLM-generated descriptions for segments. **Note:** This
136
- uses chunkr's own VLM models and is not configurable via LLM processing
137
- configuration.
138
- - `extended_context` uses the full page image as context for LLM generation.
139
- """
140
-
141
69
  list_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="ListItem")]
142
70
  """Controls the processing and generation for the segment.
143
71
 
@@ -210,7 +138,7 @@ class SegmentProcessingParam(TypedDict, total=False):
210
138
  - `extended_context` uses the full page image as context for LLM generation.
211
139
  """
212
140
 
213
- page_number: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageNumber")]
141
+ picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
214
142
  """Controls the processing and generation for the segment.
215
143
 
216
144
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -228,7 +156,7 @@ class SegmentProcessingParam(TypedDict, total=False):
228
156
  - `extended_context` uses the full page image as context for LLM generation.
229
157
  """
230
158
 
231
- picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
159
+ section_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="SectionHeader")]
232
160
  """Controls the processing and generation for the segment.
233
161
 
234
162
  - `crop_image` controls whether to crop the file's images to the segment's
@@ -299,21 +227,3 @@ class SegmentProcessingParam(TypedDict, total=False):
299
227
  configuration.
300
228
  - `extended_context` uses the full page image as context for LLM generation.
301
229
  """
302
-
303
- unknown: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Unknown")]
304
- """Controls the processing and generation for the segment.
305
-
306
- - `crop_image` controls whether to crop the file's images to the segment's
307
- bounding box. The cropped image will be stored in the segment's `image` field.
308
- Use `All` to always crop, or `Auto` to only crop when needed for
309
- post-processing.
310
- - `format` specifies the output format: `Html` or `Markdown`
311
- - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
312
- - `Auto`: Process content automatically
313
- - `LLM`: Use large language models for processing
314
- - `Ignore`: Exclude segments from final output
315
- - `description` enables LLM-generated descriptions for segments. **Note:** This
316
- uses chunkr's own VLM models and is not configurable via LLM processing
317
- configuration.
318
- - `extended_context` uses the full page image as context for LLM generation.
319
- """
@@ -20,12 +20,6 @@ Output: TypeAlias = Union[ParseOutputResponse, ExtractOutputResponse, None]
20
20
 
21
21
 
22
22
  class TaskResponse(BaseModel):
23
- completed: bool
24
- """True when the task reaches a terminal state i.e.
25
-
26
- `status` is `Succeeded` or `Failed` or `Cancelled`
27
- """
28
-
29
23
  configuration: Configuration
30
24
  """
31
25
  Unified configuration type that can represent either parse or extract
@@ -64,8 +58,8 @@ class TaskResponse(BaseModel):
64
58
  output: Optional[Output] = None
65
59
  """Unified output type that can represent either parse or extract results"""
66
60
 
67
- parse_task_id: Optional[str] = None
68
- """The ID of the source `parse` task that was used for the task"""
61
+ source_task_id: Optional[str] = None
62
+ """The ID of the source task that was used for the task"""
69
63
 
70
64
  started_at: Optional[datetime] = None
71
65
  """The date and time when the task was started."""
@@ -14,12 +14,6 @@ __all__ = ["ExtractCreateResponse"]
14
14
 
15
15
 
16
16
  class ExtractCreateResponse(BaseModel):
17
- completed: bool
18
- """True when the task reaches a terminal state i.e.
19
-
20
- `status` is `Succeeded` or `Failed` or `Cancelled`
21
- """
22
-
23
17
  configuration: ExtractConfiguration
24
18
 
25
19
  created_at: datetime
@@ -63,7 +57,7 @@ class ExtractCreateResponse(BaseModel):
63
57
  for that field.
64
58
  """
65
59
 
66
- parse_task_id: Optional[str] = None
60
+ source_task_id: Optional[str] = None
67
61
  """The ID of the source `parse` task that was used for extraction"""
68
62
 
69
63
  started_at: Optional[datetime] = None
@@ -14,12 +14,6 @@ __all__ = ["ExtractGetResponse"]
14
14
 
15
15
 
16
16
  class ExtractGetResponse(BaseModel):
17
- completed: bool
18
- """True when the task reaches a terminal state i.e.
19
-
20
- `status` is `Succeeded` or `Failed` or `Cancelled`
21
- """
22
-
23
17
  configuration: ExtractConfiguration
24
18
 
25
19
  created_at: datetime
@@ -63,7 +57,7 @@ class ExtractGetResponse(BaseModel):
63
57
  for that field.
64
58
  """
65
59
 
66
- parse_task_id: Optional[str] = None
60
+ source_task_id: Optional[str] = None
67
61
  """The ID of the source `parse` task that was used for extraction"""
68
62
 
69
63
  started_at: Optional[datetime] = None
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  from typing import Optional
6
6
  from typing_extensions import Literal, Required, TypedDict
7
7
 
8
+ from ..llm_processing_param import LlmProcessingParam
8
9
  from ..chunk_processing_param import ChunkProcessingParam
9
10
  from ..segment_processing_param import SegmentProcessingParam
10
11
 
@@ -41,6 +42,9 @@ class ParseCreateParams(TypedDict, total=False):
41
42
  file_name: Optional[str]
42
43
  """The name of the file to be parsed. If not set a name will be generated."""
43
44
 
45
+ llm_processing: LlmProcessingParam
46
+ """Controls the LLM used for the task."""
47
+
44
48
  ocr_strategy: Literal["All", "Auto"]
45
49
  """Controls the Optical Character Recognition (OCR) strategy.
46
50
 
@@ -14,12 +14,6 @@ __all__ = ["ParseCreateResponse"]
14
14
 
15
15
 
16
16
  class ParseCreateResponse(BaseModel):
17
- completed: bool
18
- """True when the task reaches a terminal state i.e.
19
-
20
- `status` is `Succeeded` or `Failed` or `Cancelled`
21
- """
22
-
23
17
  configuration: ParseConfiguration
24
18
 
25
19
  created_at: datetime
@@ -14,12 +14,6 @@ __all__ = ["ParseGetResponse"]
14
14
 
15
15
 
16
16
  class ParseGetResponse(BaseModel):
17
- completed: bool
18
- """True when the task reaches a terminal state i.e.
19
-
20
- `status` is `Succeeded` or `Failed` or `Cancelled`
21
- """
22
-
23
17
  configuration: ParseConfiguration
24
18
 
25
19
  created_at: datetime
@@ -20,7 +20,7 @@ class ClientVersionGeneratedSDK(BaseModel):
20
20
  """Version of the auto-generated SDK"""
21
21
 
22
22
 
23
- ClientVersion: TypeAlias = Union[Literal["Legacy", "Unspecified"], ClientVersionManualSDK, ClientVersionGeneratedSDK]
23
+ ClientVersion: TypeAlias = Union[Literal["Legacy"], ClientVersionManualSDK, ClientVersionGeneratedSDK]
24
24
 
25
25
 
26
26
  class VersionInfo(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chunkr-ai
3
- Version: 0.1.0a10
3
+ Version: 0.1.0a11
4
4
  Summary: The official Python library for the chunkr API
5
5
  Project-URL: Homepage, https://github.com/lumina-ai-inc/chunkr-python
6
6
  Project-URL: Repository, https://github.com/lumina-ai-inc/chunkr-python
@@ -11,7 +11,7 @@ chunkr_ai/_resource.py,sha256=f5tiwjxcKdbeMor8idoHtMFTUhqD9yc2xXtq5rqeLLk,1100
11
11
  chunkr_ai/_response.py,sha256=xXNpF53hiYARmAW7npKuxQ5UHAEjgAzm7ME_L3eIstY,28800
12
12
  chunkr_ai/_streaming.py,sha256=ZmyrVWk7-AWkLAATR55WgNxnyFzYmaqJt2LthA_PTqQ,10100
13
13
  chunkr_ai/_types.py,sha256=nzD_EEP9CVutLcSeuKLO6Mpn5cd_R0Vo0dEM7AWa7yY,7239
14
- chunkr_ai/_version.py,sha256=DjItTmf7c0SXbPfiF1CUJ2pzf-qJNG2loWN0G4kVsqw,170
14
+ chunkr_ai/_version.py,sha256=O_LWqHa5Todq2XeeCDSW_cu4p1oubeyQ-SQUznjufns,170
15
15
  chunkr_ai/pagination.py,sha256=bT-ErcJ80YlKBV6tWq2s9uqg-wv7o66SKe_AgUAGrKc,3533
16
16
  chunkr_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  chunkr_ai/_utils/__init__.py,sha256=7fch0GT9zpNnErbciSpUNa-SjTxxjY6kxHxKMOM4AGs,2305
@@ -33,9 +33,9 @@ chunkr_ai/resources/health.py,sha256=sLA4PSAf-4JK1Lrqb0TApQ0Hc5Q8fZzbKQXzA3bNEdQ
33
33
  chunkr_ai/resources/webhooks.py,sha256=RhteI3ahE2rGSDEMUZH0HDBTOQqWS8sZ5D00ErKWnpE,7006
34
34
  chunkr_ai/resources/tasks/__init__.py,sha256=wDCnwtnpTfiaLg7NBxoLZYh2TtOw44_DSqtJa_TjmXU,1439
35
35
  chunkr_ai/resources/tasks/extract.py,sha256=LQJTmYItqUu60G0hYfdw_nyMLARyxqzjVO6ETIn8hDo,14980
36
- chunkr_ai/resources/tasks/parse.py,sha256=hbFEFu-tU6RWktA0Tv6KP0HyeLdS62kO4UoQfbSsAJc,18963
36
+ chunkr_ai/resources/tasks/parse.py,sha256=bQcA-zOQvTN9_OiqmzILsJLQqJtONU5XbNZQMAm-0yk,19374
37
37
  chunkr_ai/resources/tasks/tasks.py,sha256=W9bXpfgE56MkM5buBYg5-dcPYGFo_CzgVkr1kOOpXtQ,23582
38
- chunkr_ai/types/__init__.py,sha256=gO4mkpo_tYfe1PGSc0Uzlc8rZr9PTmHshGZFzmG98mM,2365
38
+ chunkr_ai/types/__init__.py,sha256=vBUEzAt6cblUc44h8NivvVrua_CmaIujTvjvf8B0_S0,2499
39
39
  chunkr_ai/types/bounding_box.py,sha256=JDZlhJJl4lg6RYGf8VpC46soQfQ10-K8YwHHA6XBFkM,431
40
40
  chunkr_ai/types/cell.py,sha256=D-S_XAzmOJs0Lo2RgY7T3h_ChdhSrRrI8IN4qo2sGOU,1143
41
41
  chunkr_ai/types/cell_style.py,sha256=VqSz6pZ7rjmHYrq_S63IOFPaWvXjWhNUIGc3V4UlF6U,873
@@ -47,7 +47,7 @@ chunkr_ai/types/extract_configuration.py,sha256=OCs3SnuS8qXWB926o8Gv1Y2AuNszplGm
47
47
  chunkr_ai/types/extract_output_response.py,sha256=kfkHbeEVl3x3t-7u4h4Cd4wC5KbrOjz4-joP5RV1WyA,1272
48
48
  chunkr_ai/types/file.py,sha256=kOxR0g-3A-qOxz2cjuTcq0wFMqPoph9uQuLYQ56zb-c,718
49
49
  chunkr_ai/types/file_create_params.py,sha256=_1Dr3FlO9BOv6gzhCN4g46_otCBqEdLe0mnxpdaRPaE,468
50
- chunkr_ai/types/file_info.py,sha256=y5kVR3mPeiwsPBMc3IBAigQbkVRpGQN09IpDPdvCe7E,611
50
+ chunkr_ai/types/file_info.py,sha256=78r0AITTY2nlZyW9pHy7dywVXyOsCK1Ysmmc-wfQBVM,499
51
51
  chunkr_ai/types/file_list_params.py,sha256=oJGTf88aAxBhNfmQDbxGT63b95HdSbMXUubKjXM22_U,822
52
52
  chunkr_ai/types/file_url.py,sha256=L434WnOXkNmt59dJiaAgT1_3pN3BIsxm2q14zHQK6xY,365
53
53
  chunkr_ai/types/file_url_params.py,sha256=ZHfKiy_6B25StdDemulavGcsPggNNMKLWf6KN7xfPTY,413
@@ -55,32 +55,34 @@ chunkr_ai/types/files_list_response.py,sha256=ggSRWhTzZWjcDXxStyCzrYICXXB5TqnL2j
55
55
  chunkr_ai/types/generation_config.py,sha256=9gfwdd228x29jC1egxq3IreKwgkGZCjSWHCXIkzQwqE,958
56
56
  chunkr_ai/types/generation_config_param.py,sha256=9E0Mhee-NInwOzjXmq3gpd8G5drsPBpzFs0AA2ywTc0,960
57
57
  chunkr_ai/types/health_check_response.py,sha256=6Zn5YYHCQf2RgMjDlf39mtiTPqfaBfC9Vv599U_rKCI,200
58
- chunkr_ai/types/ocr_result.py,sha256=W3piXLotfmZ40FJrJYMuS72shaVYLsKeN6jCf26uZGI,743
58
+ chunkr_ai/types/llm_processing.py,sha256=cTm5LBBCpnmA4u-nGQdO67JYCTizzf4lJAvUsG6BX2Q,1095
59
+ chunkr_ai/types/llm_processing_param.py,sha256=CSnW4-5-32Pzoo-G7G3p_NUvljtCkNguj1dHVc2Y4cA,1135
60
+ chunkr_ai/types/ocr_result.py,sha256=EdIvpuccQ_8A8ml7yVCOEOfBoewgwTBzVJZ_les9udM,740
59
61
  chunkr_ai/types/page.py,sha256=ADdGJisS-GxBD_wdu3q1pmikgJ7twFsP0choDEXw9ro,690
60
- chunkr_ai/types/parse_configuration.py,sha256=WcUUk7ai0sHTeWUQYHyDn1ZjYqT7dzLqRWjGx5zFfsg,2427
61
- chunkr_ai/types/parse_configuration_param.py,sha256=dl884XkCnloSS9YMk8UnUm7Z963HiAzNy5qgtCSIPH8,2405
62
+ chunkr_ai/types/parse_configuration.py,sha256=MOTypSv3kiwoLEcjAgsyc674CMG4xw6SxXEXweFU6V4,2567
63
+ chunkr_ai/types/parse_configuration_param.py,sha256=hJWngWEtT3RYgHqkAvmS4RG7cODla6i-Cp8ubtZfmz0,2544
62
64
  chunkr_ai/types/parse_output_response.py,sha256=KfRFY5PnchJfEWr4jy3Dd-3AWeImGE5BP_NMFC5I6_c,947
63
- chunkr_ai/types/segment.py,sha256=KsGNynGQq7s55EHuPLY7glmvXunv2Wszhx5FhkhkN5U,3093
64
- chunkr_ai/types/segment_processing.py,sha256=uyNbxp7DsgTgBHoS9ELoyW8j-aTBbOshxYrb-TQ990E,17049
65
- chunkr_ai/types/segment_processing_param.py,sha256=a1Zk8NiaeFNSbHw5P8Usq-17mqENuZdCOQCn8nNN0o0,17199
65
+ chunkr_ai/types/segment.py,sha256=_QNYmAMSZOCNv67CwsmIOq7X0ZeYsj6jzQbKhbnoTrA,3096
66
+ chunkr_ai/types/segment_processing.py,sha256=0-b4nSoLeGsMKSVw5LPQFXnn-PoVvIJ0wFfSFEOTpsw,12115
67
+ chunkr_ai/types/segment_processing_param.py,sha256=fVbvFSzmxLPiQoPniFwqqXlbo6fRaZwzZ_TbnwYVOss,12245
66
68
  chunkr_ai/types/task_extract_updated_webhook_event.py,sha256=YYHDQEs4wg2bDgGXgHUgX_CwSLFxePJZrT5OV4J6Mhk,640
67
69
  chunkr_ai/types/task_get_params.py,sha256=Nx2luhebcoaiuRln4KP4FarWvBPd1OYi__efi56zHPM,460
68
70
  chunkr_ai/types/task_list_params.py,sha256=NySdOH1mIhZAJvcHr9xm2PeODsCO05lJMsrAiGGBKNE,1275
69
71
  chunkr_ai/types/task_parse_updated_webhook_event.py,sha256=3NsfEpJr_bfFB3Y66elraSxk0FS76c60BLUmhqmU9Vc,636
70
- chunkr_ai/types/task_response.py,sha256=RgyRaZK8TKjVfMSvsI10LYAv61QY2-195Tg3op8yCGo,2339
72
+ chunkr_ai/types/task_response.py,sha256=I53d8JdZxbg3p-AUnigISmwFFt5DILnm0gyOc2wRu28,2191
71
73
  chunkr_ai/types/unwrap_webhook_event.py,sha256=G23CLp0__AMlfM2xE11ugnDxN82uiG0Xru0p-pI9VHQ,442
72
- chunkr_ai/types/version_info.py,sha256=MVSU2Z9ATehyc1IgVExczvcP_yH7wYc1UV_BwXeF0UA,917
74
+ chunkr_ai/types/version_info.py,sha256=1qYIitGOB7aeOI7gWFfX6cAp2yw5pMzARiTtNl5NNDc,902
73
75
  chunkr_ai/types/webhook_url_response.py,sha256=q7VsWGOLqVfA_ctdcrbynQJVbfCGh1rHlXZsDc-9Sus,205
74
76
  chunkr_ai/types/tasks/__init__.py,sha256=AEF_lM5YdEvz8_7fcX0HHnVvsXdC8Hcsb2Cs-LzRBK4,711
75
77
  chunkr_ai/types/tasks/extract_create_params.py,sha256=IV5TrFqJAGFf4w_uH3hqWlbEySlAOC_2QzwKZ-3oM6o,1376
76
- chunkr_ai/types/tasks/extract_create_response.py,sha256=goc8x-L3W0hJowb4PvXigc2o_p15JL0a2ESo9Geg9xc,2287
78
+ chunkr_ai/types/tasks/extract_create_response.py,sha256=Umbm40i0Q6Oz3vX9WwTeOXwKU2g8ddZmFhRwl-5Rm5U,2147
77
79
  chunkr_ai/types/tasks/extract_get_params.py,sha256=AsJvXHvdDnIcVOvTK9gCeiMFk4wckuv19IXIJcqpqso,466
78
- chunkr_ai/types/tasks/extract_get_response.py,sha256=sQgDLTwpE2w-xVcuXO7NAF_kjhLXv_0swdBJQRGQNSI,2281
79
- chunkr_ai/types/tasks/parse_create_params.py,sha256=8ctOPP2QT-q_8zN8Fl8ene74ZGOUnR6EAA9XcvA_0p4,2957
80
- chunkr_ai/types/tasks/parse_create_response.py,sha256=l9hj6qKs76-qKzjBtVeo9lRe9wRuYltsh2GFNPyfEDM,1820
80
+ chunkr_ai/types/tasks/extract_get_response.py,sha256=FWXfS3Qobg3k6seWbbXJZ5y4N_mtJbHsN_FeZV5_ieM,2141
81
+ chunkr_ai/types/tasks/parse_create_params.py,sha256=xzWA5UX183-Lk-JDtWoV37-4pXpFGmQmS4DwSa7K8Ug,3097
82
+ chunkr_ai/types/tasks/parse_create_response.py,sha256=asqeI2l6QbdtPhaQkxkbpSwVLe2HEFfeTPG5OX_xfus,1679
81
83
  chunkr_ai/types/tasks/parse_get_params.py,sha256=Ca0C91k6ajNTMhtUkFMulgC6g8_wI7YLVGxsWiupiVA,462
82
- chunkr_ai/types/tasks/parse_get_response.py,sha256=2IoZeN8BAxQEtxRq6CCA_d9nWPvCQbp71zMzaaKPlug,1814
83
- chunkr_ai-0.1.0a10.dist-info/METADATA,sha256=xTpeNw9Gsz28LK7DnbTqP7cw_VcaVifr2tHH2DepHho,16493
84
- chunkr_ai-0.1.0a10.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
85
- chunkr_ai-0.1.0a10.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
86
- chunkr_ai-0.1.0a10.dist-info/RECORD,,
84
+ chunkr_ai/types/tasks/parse_get_response.py,sha256=-0_j1_5skabmAtmcK882jZGroVsBRxC_o5d6pg31bJY,1673
85
+ chunkr_ai-0.1.0a11.dist-info/METADATA,sha256=-aO85KHXxUtKZc2UZdKBe_pIWnljQ8wOj02ug2oVGCE,16493
86
+ chunkr_ai-0.1.0a11.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
87
+ chunkr_ai-0.1.0a11.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
88
+ chunkr_ai-0.1.0a11.dist-info/RECORD,,