PyPI - chunkr-ai - Versions diffs - 0.1.0a8__py3-none-any.whl → 0.1.0a9__py3-none-any.whl - Mend

chunkr-ai 0.1.0a8py3-none-any.whl → 0.1.0a9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

chunkr_ai/__init__.py +3 -1
chunkr_ai/_base_client.py +9 -9
chunkr_ai/_client.py +8 -8
chunkr_ai/_models.py +10 -4
chunkr_ai/_qs.py +7 -7
chunkr_ai/_types.py +18 -11
chunkr_ai/_utils/_transform.py +2 -2
chunkr_ai/_utils/_utils.py +4 -4
chunkr_ai/_version.py +1 -1
chunkr_ai/resources/files.py +29 -29
chunkr_ai/resources/health.py +3 -3
chunkr_ai/resources/tasks/extract.py +17 -17
chunkr_ai/resources/tasks/parse.py +25 -34
chunkr_ai/resources/tasks/tasks.py +31 -31
chunkr_ai/resources/webhooks.py +3 -3
chunkr_ai/types/__init__.py +0 -2
chunkr_ai/types/file_info.py +3 -0
chunkr_ai/types/ocr_result.py +6 -6
chunkr_ai/types/parse_configuration.py +0 -4
chunkr_ai/types/parse_configuration_param.py +0 -4
chunkr_ai/types/segment.py +8 -5
chunkr_ai/types/segment_processing.py +92 -2
chunkr_ai/types/segment_processing_param.py +92 -2
chunkr_ai/types/tasks/parse_create_params.py +0 -4
chunkr_ai/types/version_info.py +1 -1
{chunkr_ai-0.1.0a8.dist-info → chunkr_ai-0.1.0a9.dist-info}/METADATA +1 -1
{chunkr_ai-0.1.0a8.dist-info → chunkr_ai-0.1.0a9.dist-info}/RECORD +29 -31
chunkr_ai/types/llm_processing.py +0 -36
chunkr_ai/types/llm_processing_param.py +0 -36
{chunkr_ai-0.1.0a8.dist-info → chunkr_ai-0.1.0a9.dist-info}/WHEEL +0 -0
{chunkr_ai-0.1.0a8.dist-info → chunkr_ai-0.1.0a9.dist-info}/licenses/LICENSE +0 -0

chunkr_ai/resources/tasks/extract.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Optional
 import httpx
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
 from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
@@ -50,16 +50,16 @@ class ExtractResource(SyncAPIResource):
         *,
         file: str,
         schema: object,
-        expires_in: Optional[int] | NotGiven = NOT_GIVEN,
-        file_name: Optional[str] | NotGiven = NOT_GIVEN,
-        parse_configuration: Optional[ParseConfigurationParam] | NotGiven = NOT_GIVEN,
-        system_prompt: Optional[str] | NotGiven = NOT_GIVEN,
+        expires_in: Optional[int] | Omit = omit,
+        file_name: Optional[str] | Omit = omit,
+        parse_configuration: Optional[ParseConfigurationParam] | Omit = omit,
+        system_prompt: Optional[str] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> ExtractCreateResponse:
         """
@@ -129,14 +129,14 @@ class ExtractResource(SyncAPIResource):
         self,
         task_id: Optional[str],
         *,
-        base64_urls: bool | NotGiven = NOT_GIVEN,
-        include_chunks: bool | NotGiven = NOT_GIVEN,
+        base64_urls: bool | Omit = omit,
+        include_chunks: bool | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ExtractGetResponse:
         """
         Retrieves the current state of an extract task.
@@ -210,16 +210,16 @@ class AsyncExtractResource(AsyncAPIResource):
         *,
         file: str,
         schema: object,
-        expires_in: Optional[int] | NotGiven = NOT_GIVEN,
-        file_name: Optional[str] | NotGiven = NOT_GIVEN,
-        parse_configuration: Optional[ParseConfigurationParam] | NotGiven = NOT_GIVEN,
-        system_prompt: Optional[str] | NotGiven = NOT_GIVEN,
+        expires_in: Optional[int] | Omit = omit,
+        file_name: Optional[str] | Omit = omit,
+        parse_configuration: Optional[ParseConfigurationParam] | Omit = omit,
+        system_prompt: Optional[str] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> ExtractCreateResponse:
         """
@@ -289,14 +289,14 @@ class AsyncExtractResource(AsyncAPIResource):
         self,
         task_id: Optional[str],
         *,
-        base64_urls: bool | NotGiven = NOT_GIVEN,
-        include_chunks: bool | NotGiven = NOT_GIVEN,
+        base64_urls: bool | Omit = omit,
+        include_chunks: bool | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ExtractGetResponse:
         """
         Retrieves the current state of an extract task.

chunkr_ai/resources/tasks/parse.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing_extensions import Literal
 import httpx
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
 from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
@@ -19,7 +19,6 @@ from ..._response import (
 )
 from ...types.tasks import parse_get_params, parse_create_params
 from ..._base_client import make_request_options
-from ...types.llm_processing_param import LlmProcessingParam
 from ...types.chunk_processing_param import ChunkProcessingParam
 from ...types.segment_processing_param import SegmentProcessingParam
 from ...types.tasks.parse_get_response import ParseGetResponse
@@ -52,21 +51,20 @@ class ParseResource(SyncAPIResource):
         self,
         *,
         file: str,
-        chunk_processing: ChunkProcessingParam | NotGiven = NOT_GIVEN,
-        error_handling: Literal["Fail", "Continue"] | NotGiven = NOT_GIVEN,
-        expires_in: Optional[int] | NotGiven = NOT_GIVEN,
-        file_name: Optional[str] | NotGiven = NOT_GIVEN,
-        llm_processing: LlmProcessingParam | NotGiven = NOT_GIVEN,
-        ocr_strategy: Literal["All", "Auto"] | NotGiven = NOT_GIVEN,
-        pipeline: Literal["Azure", "Chunkr"] | NotGiven = NOT_GIVEN,
-        segment_processing: Optional[SegmentProcessingParam] | NotGiven = NOT_GIVEN,
-        segmentation_strategy: Literal["LayoutAnalysis", "Page"] | NotGiven = NOT_GIVEN,
+        chunk_processing: ChunkProcessingParam | Omit = omit,
+        error_handling: Literal["Fail", "Continue"] | Omit = omit,
+        expires_in: Optional[int] | Omit = omit,
+        file_name: Optional[str] | Omit = omit,
+        ocr_strategy: Literal["All", "Auto"] | Omit = omit,
+        pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
+        segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
+        segmentation_strategy: Literal["LayoutAnalysis", "Page"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> ParseCreateResponse:
         """
@@ -99,8 +97,6 @@ class ParseResource(SyncAPIResource):
           file_name: The name of the file to be parsed. If not set a name will be generated.
-          llm_processing: Controls the LLM used for the task.
           ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
               - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -150,7 +146,6 @@ class ParseResource(SyncAPIResource):
                     "error_handling": error_handling,
                     "expires_in": expires_in,
                     "file_name": file_name,
-                    "llm_processing": llm_processing,
                     "ocr_strategy": ocr_strategy,
                     "pipeline": pipeline,
                     "segment_processing": segment_processing,
@@ -172,14 +167,14 @@ class ParseResource(SyncAPIResource):
         self,
         task_id: Optional[str],
         *,
-        base64_urls: bool | NotGiven = NOT_GIVEN,
-        include_chunks: bool | NotGiven = NOT_GIVEN,
+        base64_urls: bool | Omit = omit,
+        include_chunks: bool | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ParseGetResponse:
         """
         Retrieves the current state of a parse task.
@@ -252,21 +247,20 @@ class AsyncParseResource(AsyncAPIResource):
         self,
         *,
         file: str,
-        chunk_processing: ChunkProcessingParam | NotGiven = NOT_GIVEN,
-        error_handling: Literal["Fail", "Continue"] | NotGiven = NOT_GIVEN,
-        expires_in: Optional[int] | NotGiven = NOT_GIVEN,
-        file_name: Optional[str] | NotGiven = NOT_GIVEN,
-        llm_processing: LlmProcessingParam | NotGiven = NOT_GIVEN,
-        ocr_strategy: Literal["All", "Auto"] | NotGiven = NOT_GIVEN,
-        pipeline: Literal["Azure", "Chunkr"] | NotGiven = NOT_GIVEN,
-        segment_processing: Optional[SegmentProcessingParam] | NotGiven = NOT_GIVEN,
-        segmentation_strategy: Literal["LayoutAnalysis", "Page"] | NotGiven = NOT_GIVEN,
+        chunk_processing: ChunkProcessingParam | Omit = omit,
+        error_handling: Literal["Fail", "Continue"] | Omit = omit,
+        expires_in: Optional[int] | Omit = omit,
+        file_name: Optional[str] | Omit = omit,
+        ocr_strategy: Literal["All", "Auto"] | Omit = omit,
+        pipeline: Literal["Azure", "Chunkr"] | Omit = omit,
+        segment_processing: Optional[SegmentProcessingParam] | Omit = omit,
+        segmentation_strategy: Literal["LayoutAnalysis", "Page"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> ParseCreateResponse:
         """
@@ -299,8 +293,6 @@ class AsyncParseResource(AsyncAPIResource):
           file_name: The name of the file to be parsed. If not set a name will be generated.
-          llm_processing: Controls the LLM used for the task.
           ocr_strategy: Controls the Optical Character Recognition (OCR) strategy.
               - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
@@ -350,7 +342,6 @@ class AsyncParseResource(AsyncAPIResource):
                     "error_handling": error_handling,
                     "expires_in": expires_in,
                     "file_name": file_name,
-                    "llm_processing": llm_processing,
                     "ocr_strategy": ocr_strategy,
                     "pipeline": pipeline,
                     "segment_processing": segment_processing,
@@ -372,14 +363,14 @@ class AsyncParseResource(AsyncAPIResource):
         self,
         task_id: Optional[str],
         *,
-        base64_urls: bool | NotGiven = NOT_GIVEN,
-        include_chunks: bool | NotGiven = NOT_GIVEN,
+        base64_urls: bool | Omit = omit,
+        include_chunks: bool | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ParseGetResponse:
         """
         Retrieves the current state of a parse task.

chunkr_ai/resources/tasks/tasks.py CHANGED Viewed

@@ -25,7 +25,7 @@ from .extract import (
     ExtractResourceWithStreamingResponse,
     AsyncExtractResourceWithStreamingResponse,
 )
-from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
 from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
@@ -73,21 +73,21 @@ class TasksResource(SyncAPIResource):
     def list(
         self,
         *,
-        base64_urls: bool | NotGiven = NOT_GIVEN,
-        cursor: Union[str, datetime] | NotGiven = NOT_GIVEN,
-        end: Union[str, datetime] | NotGiven = NOT_GIVEN,
-        include_chunks: bool | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        start: Union[str, datetime] | NotGiven = NOT_GIVEN,
-        statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
-        task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
+        base64_urls: bool | Omit = omit,
+        cursor: Union[str, datetime] | Omit = omit,
+        end: Union[str, datetime] | Omit = omit,
+        include_chunks: bool | Omit = omit,
+        limit: int | Omit = omit,
+        sort: Literal["asc", "desc"] | Omit = omit,
+        start: Union[str, datetime] | Omit = omit,
+        statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | Omit = omit,
+        task_types: List[Literal["Parse", "Extract"]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncTasksPage[TaskResponse]:
         """
         Lists tasks for the authenticated user with cursor-based pagination and optional
@@ -157,7 +157,7 @@ class TasksResource(SyncAPIResource):
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> None:
         """
@@ -202,7 +202,7 @@ class TasksResource(SyncAPIResource):
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> None:
         """
         Cancel a task that hasn't started processing yet:
@@ -238,14 +238,14 @@ class TasksResource(SyncAPIResource):
         self,
         task_id: Optional[str],
         *,
-        base64_urls: bool | NotGiven = NOT_GIVEN,
-        include_chunks: bool | NotGiven = NOT_GIVEN,
+        base64_urls: bool | Omit = omit,
+        include_chunks: bool | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> TaskResponse:
         """
         Retrieves the current state of a task.
@@ -325,21 +325,21 @@ class AsyncTasksResource(AsyncAPIResource):
     def list(
         self,
         *,
-        base64_urls: bool | NotGiven = NOT_GIVEN,
-        cursor: Union[str, datetime] | NotGiven = NOT_GIVEN,
-        end: Union[str, datetime] | NotGiven = NOT_GIVEN,
-        include_chunks: bool | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        start: Union[str, datetime] | NotGiven = NOT_GIVEN,
-        statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
-        task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
+        base64_urls: bool | Omit = omit,
+        cursor: Union[str, datetime] | Omit = omit,
+        end: Union[str, datetime] | Omit = omit,
+        include_chunks: bool | Omit = omit,
+        limit: int | Omit = omit,
+        sort: Literal["asc", "desc"] | Omit = omit,
+        start: Union[str, datetime] | Omit = omit,
+        statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | Omit = omit,
+        task_types: List[Literal["Parse", "Extract"]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[TaskResponse, AsyncTasksPage[TaskResponse]]:
         """
         Lists tasks for the authenticated user with cursor-based pagination and optional
@@ -409,7 +409,7 @@ class AsyncTasksResource(AsyncAPIResource):
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> None:
         """
@@ -454,7 +454,7 @@ class AsyncTasksResource(AsyncAPIResource):
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> None:
         """
         Cancel a task that hasn't started processing yet:
@@ -490,14 +490,14 @@ class AsyncTasksResource(AsyncAPIResource):
         self,
         task_id: Optional[str],
         *,
-        base64_urls: bool | NotGiven = NOT_GIVEN,
-        include_chunks: bool | NotGiven = NOT_GIVEN,
+        base64_urls: bool | Omit = omit,
+        include_chunks: bool | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> TaskResponse:
         """
         Retrieves the current state of a task.

chunkr_ai/resources/webhooks.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Mapping, cast
 import httpx
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._types import Body, Query, Headers, NotGiven, not_given
 from .._compat import cached_property
 from .._models import construct_type
 from .._resource import SyncAPIResource, AsyncAPIResource
@@ -79,7 +79,7 @@ class WebhooksResource(SyncAPIResource):
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> WebhookURLResponse:
         """Get or create webhook for user and return dashboard URL"""
         return self._get(
@@ -145,7 +145,7 @@ class AsyncWebhooksResource(AsyncAPIResource):
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> WebhookURLResponse:
         """Get or create webhook for user and return dashboard URL"""
         return await self._get(

chunkr_ai/types/__init__.py CHANGED Viewed

@@ -15,7 +15,6 @@ from .ocr_result import OcrResult as OcrResult
 from .bounding_box import BoundingBox as BoundingBox
 from .version_info import VersionInfo as VersionInfo
 from .task_response import TaskResponse as TaskResponse
-from .llm_processing import LlmProcessing as LlmProcessing
 from .file_url_params import FileURLParams as FileURLParams
 from .task_get_params import TaskGetParams as TaskGetParams
 from .chunk_processing import ChunkProcessing as ChunkProcessing
@@ -26,7 +25,6 @@ from .file_create_params import FileCreateParams as FileCreateParams
 from .segment_processing import SegmentProcessing as SegmentProcessing
 from .files_list_response import FilesListResponse as FilesListResponse
 from .parse_configuration import ParseConfiguration as ParseConfiguration
-from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
 from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
 from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
 from .extract_configuration import ExtractConfiguration as ExtractConfiguration

chunkr_ai/types/file_info.py CHANGED Viewed

@@ -19,3 +19,6 @@ class FileInfo(BaseModel):
     page_count: Optional[int] = None
     """The number of pages in the file."""
+    ss_cell_count: Optional[int] = None
+    """The number of cells in the file. Only used for spreadsheets."""

chunkr_ai/types/ocr_result.py CHANGED Viewed

@@ -15,14 +15,14 @@ class OcrResult(BaseModel):
     text: str
     """The recognized text of the OCR result."""
-    cell_ref: Optional[str] = None
-    """
-    Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
-    spreadsheet cell
-    """
     confidence: Optional[float] = None
     """The confidence score of the recognized text."""
     ocr_id: Optional[str] = None
     """The unique identifier for the OCR result."""
+    ss_cell_ref: Optional[str] = None
+    """
+    Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
+    spreadsheet cell
+    """

chunkr_ai/types/parse_configuration.py CHANGED Viewed

@@ -4,7 +4,6 @@ from typing import Optional
 from typing_extensions import Literal
 from .._models import BaseModel
-from .llm_processing import LlmProcessing
 from .chunk_processing import ChunkProcessing
 from .segment_processing import SegmentProcessing
@@ -23,9 +22,6 @@ class ParseConfiguration(BaseModel):
       LLM refusals etc.)
     """
-    llm_processing: Optional[LlmProcessing] = None
-    """Controls the LLM used for the task."""
     ocr_strategy: Optional[Literal["All", "Auto"]] = None
     """Controls the Optical Character Recognition (OCR) strategy.

chunkr_ai/types/parse_configuration_param.py CHANGED Viewed

@@ -5,7 +5,6 @@ from __future__ import annotations
 from typing import Optional
 from typing_extensions import Literal, TypedDict
-from .llm_processing_param import LlmProcessingParam
 from .chunk_processing_param import ChunkProcessingParam
 from .segment_processing_param import SegmentProcessingParam
@@ -24,9 +23,6 @@ class ParseConfigurationParam(TypedDict, total=False):
       LLM refusals etc.)
     """
-    llm_processing: LlmProcessingParam
-    """Controls the LLM used for the task."""
     ocr_strategy: Literal["All", "Auto"]
     """Controls the Optical Character Recognition (OCR) strategy.

chunkr_ai/types/segment.py CHANGED Viewed

@@ -31,20 +31,23 @@ class Segment(BaseModel):
         "Caption",
         "Footnote",
         "Formula",
+        "FormRegion",
+        "GraphicalItem",
+        "Legend",
+        "LineNumber",
         "ListItem",
         "Page",
         "PageFooter",
         "PageHeader",
+        "PageNumber",
         "Picture",
-        "SectionHeader",
         "Table",
         "Text",
         "Title",
+        "Unknown",
+        "SectionHeader",
     ]
-    """
-    All the possible types for a segment. Note: Different configurations will
-    produce different types. Please refer to the documentation for more information.
-    """
+    """All the possible types for a segment."""
     confidence: Optional[float] = None
     """Confidence score of the layout analysis model"""

chunkr_ai/types/segment_processing.py CHANGED Viewed

@@ -47,6 +47,24 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    form_region: Optional[GenerationConfig] = FieldInfo(alias="FormRegion", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
     formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
     """Controls the processing and generation for the segment.
@@ -65,6 +83,60 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    graphical_item: Optional[GenerationConfig] = FieldInfo(alias="GraphicalItem", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    legend: Optional[GenerationConfig] = FieldInfo(alias="Legend", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    line_number: Optional[GenerationConfig] = FieldInfo(alias="LineNumber", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
     list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
     """Controls the processing and generation for the segment.
@@ -137,7 +209,7 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
+    page_number: Optional[GenerationConfig] = FieldInfo(alias="PageNumber", default=None)
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -155,7 +227,7 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
+    picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -226,3 +298,21 @@ class SegmentProcessing(BaseModel):
       configuration.
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    unknown: Optional[GenerationConfig] = FieldInfo(alias="Unknown", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """

chunkr-ai 0.1.0a8__py3-none-any.whl → 0.1.0a9__py3-none-any.whl

chunkr-ai 0.1.0a8py3-none-any.whl → 0.1.0a9py3-none-any.whl