PyPI - chunkr-ai - Versions diffs - 0.1.0a10__tar.gz → 0.1.0a11__tar.gz - Mend

chunkr-ai 0.1.0a10tar.gz → 0.1.0a11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

chunkr_ai-0.1.0a11/.release-please-manifest.json ADDED Viewed

@@ -0,0 +1,3 @@
+{
+  ".": "0.1.0-alpha.11"
+}

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,13 @@
 # Changelog
+## 0.1.0-alpha.11 (2025-10-02)
+Full Changelog: [v0.1.0-alpha.10...v0.1.0-alpha.11](https://github.com/lumina-ai-inc/chunkr-python/compare/v0.1.0-alpha.10...v0.1.0-alpha.11)
+### Features
+* **api:** api update ([335fcdd](https://github.com/lumina-ai-inc/chunkr-python/commit/335fcdd892d4d009ea966d12c5a3357fa38b595f))
 ## 0.1.0-alpha.10 (2025-10-02)
 Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/lumina-ai-inc/chunkr-python/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: chunkr-ai
-Version: 0.1.0a10
+Version: 0.1.0a11
 Summary: The official Python library for the chunkr API
 Project-URL: Homepage, https://github.com/lumina-ai-inc/chunkr-python
 Project-URL: Repository, https://github.com/lumina-ai-inc/chunkr-python

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "chunkr-ai"
-version = "0.1.0-alpha.10"
+version = "0.1.0-alpha.11"
 description = "The official Python library for the chunkr API"
 dynamic = ["readme"]
 license = "Apache-2.0"

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/_version.py RENAMED Viewed

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 __title__ = "chunkr_ai"
-__version__ = "0.1.0-alpha.10"  # x-release-please-version
+__version__ = "0.1.0-alpha.11"  # x-release-please-version

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/resources/tasks/parse.py RENAMED Viewed

@@ -19,6 +19,7 @@ from ..._response import (
 )
 from ...types.tasks import parse_get_params, parse_create_params
 from ..._base_client import make_request_options
+from ...types.llm_processing_param import LlmProcessingParam
 from ...types.chunk_processing_param import ChunkProcessingParam
 from ...types.segment_processing_param import SegmentProcessingParam
 from ...types.tasks.parse_get_response import ParseGetResponse
@@ -55,6 +56,7 @@ class ParseResource(SyncAPIResource):
         error_handling: Literal["Fail", "Continue"] | Omit = omit,
         expires_in: Optional[int] | Omit = omit,
         file_name: Optional[str] | Omit = omit,
+        llm_processing: LlmProcessingParam | Omit = omit,
         ocr_strategy: Literal["All", "Auto"] | Omit = omit,
         pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
         segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
@@ -97,6 +99,8 @@ class ParseResource(SyncAPIResource):
           file_name: The name of the file to be parsed. If not set a name will be generated.
+          llm_processing: Controls the LLM used for the task.
           ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
               - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -146,6 +150,7 @@ class ParseResource(SyncAPIResource):
                     "error_handling": error_handling,
                     "expires_in": expires_in,
                     "file_name": file_name,
+                    "llm_processing": llm_processing,
                     "ocr_strategy": ocr_strategy,
                     "pipeline": pipeline,
                     "segment_processing": segment_processing,
@@ -251,6 +256,7 @@ class AsyncParseResource(AsyncAPIResource):
         error_handling: Literal["Fail", "Continue"] | Omit = omit,
         expires_in: Optional[int] | Omit = omit,
         file_name: Optional[str] | Omit = omit,
+        llm_processing: LlmProcessingParam | Omit = omit,
         ocr_strategy: Literal["All", "Auto"] | Omit = omit,
         pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
         segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
@@ -293,6 +299,8 @@ class AsyncParseResource(AsyncAPIResource):
           file_name: The name of the file to be parsed. If not set a name will be generated.
+          llm_processing: Controls the LLM used for the task.
           ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
               - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -342,6 +350,7 @@ class AsyncParseResource(AsyncAPIResource):
                     "error_handling": error_handling,
                     "expires_in": expires_in,
                     "file_name": file_name,
+                    "llm_processing": llm_processing,
                     "ocr_strategy": ocr_strategy,
                     "pipeline": pipeline,
                     "segment_processing": segment_processing,

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/__init__.py RENAMED Viewed

@@ -15,6 +15,7 @@ from .ocr_result import OcrResult as OcrResult
 from .bounding_box import BoundingBox as BoundingBox
 from .version_info import VersionInfo as VersionInfo
 from .task_response import TaskResponse as TaskResponse
+from .llm_processing import LlmProcessing as LlmProcessing
 from .file_url_params import FileURLParams as FileURLParams
 from .task_get_params import TaskGetParams as TaskGetParams
 from .chunk_processing import ChunkProcessing as ChunkProcessing
@@ -25,6 +26,7 @@ from .file_create_params import FileCreateParams as FileCreateParams
 from .segment_processing import SegmentProcessing as SegmentProcessing
 from .files_list_response import FilesListResponse as FilesListResponse
 from .parse_configuration import ParseConfiguration as ParseConfiguration
+from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
 from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
 from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
 from .extract_configuration import ExtractConfiguration as ExtractConfiguration

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/file_info.py RENAMED Viewed

@@ -19,6 +19,3 @@ class FileInfo(BaseModel):
     page_count: Optional[int] = None
     """The number of pages in the file."""
-    ss_cell_count: Optional[int] = None
-    """The number of cells in the file. Only used for spreadsheets."""

chunkr_ai-0.1.0a11/src/chunkr_ai/types/llm_processing.py ADDED Viewed

@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Union, Optional
+from typing_extensions import Literal, TypeAlias
+from pydantic import Field as FieldInfo
+from .._models import BaseModel
+__all__ = ["LlmProcessing", "FallbackStrategy", "FallbackStrategyModel"]
+class FallbackStrategyModel(BaseModel):
+    model: str = FieldInfo(alias="Model")
+    """Use a specific model as fallback"""
+FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
+class LlmProcessing(BaseModel):
+    fallback_strategy: Optional[FallbackStrategy] = None
+    """The fallback strategy to use for the LLMs in the task."""
+    llm_model_id: Optional[str] = None
+    """The ID of the model to use for the task.
+    If not provided, the default model will be used. Please check the documentation
+    for the model you want to use.
+    """
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens to generate."""
+    temperature: Optional[float] = None
+    """The temperature to use for the LLM."""

chunkr_ai-0.1.0a11/src/chunkr_ai/types/llm_processing_param.py ADDED Viewed

@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import Union, Optional
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+from .._utils import PropertyInfo
+__all__ = ["LlmProcessingParam", "FallbackStrategy", "FallbackStrategyModel"]
+class FallbackStrategyModel(TypedDict, total=False):
+    model: Required[Annotated[str, PropertyInfo(alias="Model")]]
+    """Use a specific model as fallback"""
+FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
+class LlmProcessingParam(TypedDict, total=False):
+    fallback_strategy: FallbackStrategy
+    """The fallback strategy to use for the LLMs in the task."""
+    llm_model_id: Optional[str]
+    """The ID of the model to use for the task.
+    If not provided, the default model will be used. Please check the documentation
+    for the model you want to use.
+    """
+    max_completion_tokens: Optional[int]
+    """The maximum number of tokens to generate."""
+    temperature: float
+    """The temperature to use for the LLM."""

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/ocr_result.py RENAMED Viewed

@@ -15,14 +15,14 @@ class OcrResult(BaseModel):
     text: str
     """The recognized text of the OCR result."""
+    cell_ref: Optional[str] = None
+    """
+    Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
+    spreadsheet cell
+    """
     confidence: Optional[float] = None
     """The confidence score of the recognized text."""
     ocr_id: Optional[str] = None
     """The unique identifier for the OCR result."""
-    ss_cell_ref: Optional[str] = None
-    """
-    Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
-    spreadsheet cell
-    """

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/parse_configuration.py RENAMED Viewed

@@ -4,6 +4,7 @@ from typing import Optional
 from typing_extensions import Literal
 from .._models import BaseModel
+from .llm_processing import LlmProcessing
 from .chunk_processing import ChunkProcessing
 from .segment_processing import SegmentProcessing
@@ -22,6 +23,9 @@ class ParseConfiguration(BaseModel):
       LLM refusals etc.)
     """
+    llm_processing: Optional[LlmProcessing] = None
+    """Controls the LLM used for the task."""
     ocr_strategy: Optional[Literal["All", "Auto"]] = None
     """Controls the Optical Character Recognition (OCR) strategy.

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/parse_configuration_param.py RENAMED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 from typing import Optional
 from typing_extensions import Literal, TypedDict
+from .llm_processing_param import LlmProcessingParam
 from .chunk_processing_param import ChunkProcessingParam
 from .segment_processing_param import SegmentProcessingParam
@@ -23,6 +24,9 @@ class ParseConfigurationParam(TypedDict, total=False):
       LLM refusals etc.)
     """
+    llm_processing: LlmProcessingParam
+    """Controls the LLM used for the task."""
     ocr_strategy: Literal["All", "Auto"]
     """Controls the Optical Character Recognition (OCR) strategy.

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/segment.py RENAMED Viewed

@@ -31,23 +31,20 @@ class Segment(BaseModel):
         "Caption",
         "Footnote",
         "Formula",
-        "FormRegion",
-        "GraphicalItem",
-        "Legend",
-        "LineNumber",
         "ListItem",
         "Page",
         "PageFooter",
         "PageHeader",
-        "PageNumber",
         "Picture",
+        "SectionHeader",
         "Table",
         "Text",
         "Title",
-        "Unknown",
-        "SectionHeader",
     ]
-    """All the possible types for a segment."""
+    """
+    All the possible types for a segment. Note: Different configurations will
+    produce different types. Please refer to the documentation for more information.
+    """
     confidence: Optional[float] = None
     """Confidence score of the layout analysis model"""

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/segment_processing.py RENAMED Viewed

@@ -47,24 +47,6 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    form_region: Optional[GenerationConfig] = FieldInfo(alias="FormRegion", default=None)
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """
     formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
     """Controls the processing and generation for the segment.
@@ -83,60 +65,6 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    graphical_item: Optional[GenerationConfig] = FieldInfo(alias="GraphicalItem", default=None)
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """
-    legend: Optional[GenerationConfig] = FieldInfo(alias="Legend", default=None)
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """
-    line_number: Optional[GenerationConfig] = FieldInfo(alias="LineNumber", default=None)
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """
     list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
     """Controls the processing and generation for the segment.
@@ -209,7 +137,7 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    page_number: Optional[GenerationConfig] = FieldInfo(alias="PageNumber", default=None)
+    picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -227,7 +155,7 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
+    section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -298,21 +226,3 @@ class SegmentProcessing(BaseModel):
       configuration.
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    unknown: Optional[GenerationConfig] = FieldInfo(alias="Unknown", default=None)
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/segment_processing_param.py RENAMED Viewed

@@ -48,24 +48,6 @@ class SegmentProcessingParam(TypedDict, total=False):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    form_region: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="FormRegion")]
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """
     formula: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Formula")]
     """Controls the processing and generation for the segment.
@@ -84,60 +66,6 @@ class SegmentProcessingParam(TypedDict, total=False):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    graphical_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="GraphicalItem")]
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """
-    legend: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Legend")]
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """
-    line_number: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="LineNumber")]
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """
     list_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="ListItem")]
     """Controls the processing and generation for the segment.
@@ -210,7 +138,7 @@ class SegmentProcessingParam(TypedDict, total=False):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    page_number: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageNumber")]
+    picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -228,7 +156,7 @@ class SegmentProcessingParam(TypedDict, total=False):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
+    section_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="SectionHeader")]
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -299,21 +227,3 @@ class SegmentProcessingParam(TypedDict, total=False):
       configuration.
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    unknown: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Unknown")]
-    """Controls the processing and generation for the segment.
-    - `crop_image` controls whether to crop the file's images to the segment's
-      bounding box. The cropped image will be stored in the segment's `image` field.
-      Use `All` to always crop, or `Auto` to only crop when needed for
-      post-processing.
-    - `format` specifies the output format: `Html` or `Markdown`
-    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
-      - `Auto`: Process content automatically
-      - `LLM`: Use large language models for processing
-      - `Ignore`: Exclude segments from final output
-    - `description` enables LLM-generated descriptions for segments. **Note:** This
-      uses chunkr's own VLM models and is not configurable via LLM processing
-      configuration.
-    - `extended_context` uses the full page image as context for LLM generation.
-    """

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/task_response.py RENAMED Viewed

@@ -20,12 +20,6 @@ Output: TypeAlias = Union[ParseOutputResponse, ExtractOutputResponse, None]
 class TaskResponse(BaseModel):
-    completed: bool
-    """True when the task reaches a terminal state i.e.
-    `status` is `Succeeded` or `Failed` or `Cancelled`
-    """
     configuration: Configuration
     """
     Unified configuration type that can represent either parse or extract
@@ -64,8 +58,8 @@ class TaskResponse(BaseModel):
     output: Optional[Output] = None
     """Unified output type that can represent either parse or extract results"""
-    parse_task_id: Optional[str] = None
-    """The ID of the source `parse` task that was used for the task"""
+    source_task_id: Optional[str] = None
+    """The ID of the source task that was used for the task"""
     started_at: Optional[datetime] = None
     """The date and time when the task was started."""

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/tasks/extract_create_response.py RENAMED Viewed

@@ -14,12 +14,6 @@ __all__ = ["ExtractCreateResponse"]
 class ExtractCreateResponse(BaseModel):
-    completed: bool
-    """True when the task reaches a terminal state i.e.
-    `status` is `Succeeded` or `Failed` or `Cancelled`
-    """
     configuration: ExtractConfiguration
     created_at: datetime
@@ -63,7 +57,7 @@ class ExtractCreateResponse(BaseModel):
       for that field.
     """
-    parse_task_id: Optional[str] = None
+    source_task_id: Optional[str] = None
     """The ID of the source `parse` task that was used for extraction"""
     started_at: Optional[datetime] = None

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/tasks/extract_get_response.py RENAMED Viewed

@@ -14,12 +14,6 @@ __all__ = ["ExtractGetResponse"]
 class ExtractGetResponse(BaseModel):
-    completed: bool
-    """True when the task reaches a terminal state i.e.
-    `status` is `Succeeded` or `Failed` or `Cancelled`
-    """
     configuration: ExtractConfiguration
     created_at: datetime
@@ -63,7 +57,7 @@ class ExtractGetResponse(BaseModel):
       for that field.
     """
-    parse_task_id: Optional[str] = None
+    source_task_id: Optional[str] = None
     """The ID of the source `parse` task that was used for extraction"""
     started_at: Optional[datetime] = None

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/tasks/parse_create_params.py RENAMED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
+from ..llm_processing_param import LlmProcessingParam
 from ..chunk_processing_param import ChunkProcessingParam
 from ..segment_processing_param import SegmentProcessingParam
@@ -41,6 +42,9 @@ class ParseCreateParams(TypedDict, total=False):
     file_name: Optional[str]
     """The name of the file to be parsed. If not set a name will be generated."""
+    llm_processing: LlmProcessingParam
+    """Controls the LLM used for the task."""
     ocr_strategy: Literal["All", "Auto"]
     """Controls the Optical Character Recognition (OCR) strategy.

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/tasks/parse_create_response.py RENAMED Viewed

@@ -14,12 +14,6 @@ __all__ = ["ParseCreateResponse"]
 class ParseCreateResponse(BaseModel):
-    completed: bool
-    """True when the task reaches a terminal state i.e.
-    `status` is `Succeeded` or `Failed` or `Cancelled`
-    """
     configuration: ParseConfiguration
     created_at: datetime

{chunkr_ai-0.1.0a10 → chunkr_ai-0.1.0a11}/src/chunkr_ai/types/tasks/parse_get_response.py RENAMED Viewed

@@ -14,12 +14,6 @@ __all__ = ["ParseGetResponse"]
 class ParseGetResponse(BaseModel):
-    completed: bool
-    """True when the task reaches a terminal state i.e.
-    `status` is `Succeeded` or `Failed` or `Cancelled`
-    """
     configuration: ParseConfiguration
     created_at: datetime

chunkr-ai 0.1.0a10__tar.gz → 0.1.0a11__tar.gz

chunkr-ai 0.1.0a10tar.gz → 0.1.0a11tar.gz