PyPI - chunkr-ai - Versions diffs - 0.1.0a6__py3-none-any.whl → 0.1.0a7__py3-none-any.whl - Mend

chunkr-ai 0.1.0a6py3-none-any.whl → 0.1.0a7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

chunkr_ai/__init__.py +2 -0
chunkr_ai/_client.py +31 -3
chunkr_ai/_constants.py +5 -5
chunkr_ai/_exceptions.py +4 -0
chunkr_ai/_models.py +1 -1
chunkr_ai/_types.py +35 -1
chunkr_ai/_utils/__init__.py +1 -0
chunkr_ai/_utils/_typing.py +5 -0
chunkr_ai/_version.py +1 -1
chunkr_ai/resources/__init__.py +14 -0
chunkr_ai/resources/files.py +3 -3
chunkr_ai/resources/tasks/__init__.py +14 -0
chunkr_ai/resources/tasks/extract.py +409 -0
chunkr_ai/resources/tasks/parse.py +124 -284
chunkr_ai/resources/tasks/tasks.py +62 -14
chunkr_ai/resources/webhooks.py +193 -0
chunkr_ai/types/__init__.py +27 -1
chunkr_ai/types/bounding_box.py +19 -0
chunkr_ai/types/cell.py +39 -0
chunkr_ai/types/cell_style.py +28 -0
chunkr_ai/types/chunk.py +40 -0
chunkr_ai/types/chunk_processing.py +40 -0
chunkr_ai/types/chunk_processing_param.py +42 -0
chunkr_ai/types/extract_configuration.py +24 -0
chunkr_ai/types/extract_output_response.py +19 -0
chunkr_ai/types/file_create_params.py +2 -1
chunkr_ai/types/file_info.py +21 -0
chunkr_ai/types/generation_config.py +29 -0
chunkr_ai/types/generation_config_param.py +29 -0
chunkr_ai/types/llm_processing.py +36 -0
chunkr_ai/types/llm_processing_param.py +36 -0
chunkr_ai/types/ocr_result.py +28 -0
chunkr_ai/types/page.py +27 -0
chunkr_ai/types/parse_configuration.py +64 -0
chunkr_ai/types/parse_configuration_param.py +65 -0
chunkr_ai/types/parse_output_response.py +29 -0
chunkr_ai/types/segment.py +109 -0
chunkr_ai/types/segment_processing.py +228 -0
chunkr_ai/types/segment_processing_param.py +229 -0
chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
chunkr_ai/types/task_list_params.py +7 -1
chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
chunkr_ai/types/task_response.py +68 -0
chunkr_ai/types/tasks/__init__.py +7 -1
chunkr_ai/types/tasks/extract_create_params.py +47 -0
chunkr_ai/types/tasks/extract_create_response.py +214 -0
chunkr_ai/types/tasks/extract_get_params.py +21 -0
chunkr_ai/types/tasks/extract_get_response.py +214 -0
chunkr_ai/types/tasks/parse_create_params.py +25 -793
chunkr_ai/types/tasks/parse_create_response.py +55 -0
chunkr_ai/types/tasks/parse_get_params.py +21 -0
chunkr_ai/types/tasks/parse_get_response.py +55 -0
chunkr_ai/types/unwrap_webhook_event.py +11 -0
chunkr_ai/types/version_info.py +31 -0
chunkr_ai/types/webhook_url_response.py +9 -0
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/METADATA +14 -13
chunkr_ai-0.1.0a7.dist-info/RECORD +86 -0
chunkr_ai/types/task.py +0 -1225
chunkr_ai/types/tasks/parse_update_params.py +0 -845
chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/WHEEL +0 -0
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/licenses/LICENSE +0 -0

chunkr_ai/resources/tasks/tasks.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from __future__ import annotations
-from typing import Union, Optional
+from typing import List, Union, Optional
 from datetime import datetime
 from typing_extensions import Literal
@@ -17,6 +17,14 @@ from .parse import (
     AsyncParseResourceWithStreamingResponse,
 )
 from ...types import task_get_params, task_list_params
+from .extract import (
+    ExtractResource,
+    AsyncExtractResource,
+    ExtractResourceWithRawResponse,
+    AsyncExtractResourceWithRawResponse,
+    ExtractResourceWithStreamingResponse,
+    AsyncExtractResourceWithStreamingResponse,
+)
 from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
@@ -28,13 +36,17 @@ from ..._response import (
     async_to_streamed_response_wrapper,
 )
 from ...pagination import SyncTasksPage, AsyncTasksPage
-from ...types.task import Task
 from ..._base_client import AsyncPaginator, make_request_options
+from ...types.task_response import TaskResponse
 __all__ = ["TasksResource", "AsyncTasksResource"]
 class TasksResource(SyncAPIResource):
+    @cached_property
+    def extract(self) -> ExtractResource:
+        return ExtractResource(self._client)
     @cached_property
     def parse(self) -> ParseResource:
         return ParseResource(self._client)
@@ -68,13 +80,15 @@ class TasksResource(SyncAPIResource):
         limit: int | NotGiven = NOT_GIVEN,
         sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         start: Union[str, datetime] | NotGiven = NOT_GIVEN,
+        statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
+        task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncTasksPage[Task]:
+    ) -> SyncTasksPage[TaskResponse]:
         """
         Lists tasks for the authenticated user with cursor-based pagination and optional
         filtering by date range. Supports ascending or descending sort order and
@@ -96,6 +110,10 @@ class TasksResource(SyncAPIResource):
           start: Start date
+          statuses: Filter by one or more statuses
+          task_types: Filter by one or more task types
           extra_headers: Send extra headers
           extra_query: Add additional query parameters to the request
@@ -106,7 +124,7 @@ class TasksResource(SyncAPIResource):
         """
         return self._get_api_list(
             "/tasks",
-            page=SyncTasksPage[Task],
+            page=SyncTasksPage[TaskResponse],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -121,11 +139,13 @@ class TasksResource(SyncAPIResource):
                         "limit": limit,
                         "sort": sort,
                         "start": start,
+                        "statuses": statuses,
+                        "task_types": task_types,
                     },
                     task_list_params.TaskListParams,
                 ),
             ),
-            model=Task,
+            model=TaskResponse,
         )
     def delete(
@@ -227,9 +247,9 @@ class TasksResource(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Task:
+    ) -> TaskResponse:
         """
-        Retrieves the current state of a task and, when requested, waits briefly for
+        Retrieves the current state of a task and, when requested, can wait for
         completion.
         Returns task details such as processing status, configuration, output (when
@@ -278,11 +298,15 @@ class TasksResource(SyncAPIResource):
                     task_get_params.TaskGetParams,
                 ),
             ),
-            cast_to=Task,
+            cast_to=TaskResponse,
         )
 class AsyncTasksResource(AsyncAPIResource):
+    @cached_property
+    def extract(self) -> AsyncExtractResource:
+        return AsyncExtractResource(self._client)
     @cached_property
     def parse(self) -> AsyncParseResource:
         return AsyncParseResource(self._client)
@@ -316,13 +340,15 @@ class AsyncTasksResource(AsyncAPIResource):
         limit: int | NotGiven = NOT_GIVEN,
         sort: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         start: Union[str, datetime] | NotGiven = NOT_GIVEN,
+        statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]] | NotGiven = NOT_GIVEN,
+        task_types: List[Literal["Parse", "Extract"]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Task, AsyncTasksPage[Task]]:
+    ) -> AsyncPaginator[TaskResponse, AsyncTasksPage[TaskResponse]]:
         """
         Lists tasks for the authenticated user with cursor-based pagination and optional
         filtering by date range. Supports ascending or descending sort order and
@@ -344,6 +370,10 @@ class AsyncTasksResource(AsyncAPIResource):
           start: Start date
+          statuses: Filter by one or more statuses
+          task_types: Filter by one or more task types
           extra_headers: Send extra headers
           extra_query: Add additional query parameters to the request
@@ -354,7 +384,7 @@ class AsyncTasksResource(AsyncAPIResource):
         """
         return self._get_api_list(
             "/tasks",
-            page=AsyncTasksPage[Task],
+            page=AsyncTasksPage[TaskResponse],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -369,11 +399,13 @@ class AsyncTasksResource(AsyncAPIResource):
                         "limit": limit,
                         "sort": sort,
                         "start": start,
+                        "statuses": statuses,
+                        "task_types": task_types,
                     },
                     task_list_params.TaskListParams,
                 ),
             ),
-            model=Task,
+            model=TaskResponse,
         )
     async def delete(
@@ -475,9 +507,9 @@ class AsyncTasksResource(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Task:
+    ) -> TaskResponse:
         """
-        Retrieves the current state of a task and, when requested, waits briefly for
+        Retrieves the current state of a task and, when requested, can wait for
         completion.
         Returns task details such as processing status, configuration, output (when
@@ -526,7 +558,7 @@ class AsyncTasksResource(AsyncAPIResource):
                     task_get_params.TaskGetParams,
                 ),
             ),
-            cast_to=Task,
+            cast_to=TaskResponse,
         )
@@ -547,6 +579,10 @@ class TasksResourceWithRawResponse:
             tasks.get,
         )
+    @cached_property
+    def extract(self) -> ExtractResourceWithRawResponse:
+        return ExtractResourceWithRawResponse(self._tasks.extract)
     @cached_property
     def parse(self) -> ParseResourceWithRawResponse:
         return ParseResourceWithRawResponse(self._tasks.parse)
@@ -569,6 +605,10 @@ class AsyncTasksResourceWithRawResponse:
             tasks.get,
         )
+    @cached_property
+    def extract(self) -> AsyncExtractResourceWithRawResponse:
+        return AsyncExtractResourceWithRawResponse(self._tasks.extract)
     @cached_property
     def parse(self) -> AsyncParseResourceWithRawResponse:
         return AsyncParseResourceWithRawResponse(self._tasks.parse)
@@ -591,6 +631,10 @@ class TasksResourceWithStreamingResponse:
             tasks.get,
         )
+    @cached_property
+    def extract(self) -> ExtractResourceWithStreamingResponse:
+        return ExtractResourceWithStreamingResponse(self._tasks.extract)
     @cached_property
     def parse(self) -> ParseResourceWithStreamingResponse:
         return ParseResourceWithStreamingResponse(self._tasks.parse)
@@ -613,6 +657,10 @@ class AsyncTasksResourceWithStreamingResponse:
             tasks.get,
         )
+    @cached_property
+    def extract(self) -> AsyncExtractResourceWithStreamingResponse:
+        return AsyncExtractResourceWithStreamingResponse(self._tasks.extract)
     @cached_property
     def parse(self) -> AsyncParseResourceWithStreamingResponse:
         return AsyncParseResourceWithStreamingResponse(self._tasks.parse)

chunkr_ai/resources/webhooks.py ADDED Viewed

@@ -0,0 +1,193 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+import json
+from typing import Mapping, cast
+import httpx
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._compat import cached_property
+from .._models import construct_type
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._exceptions import ChunkrError
+from .._base_client import make_request_options
+from ..types.unwrap_webhook_event import UnwrapWebhookEvent
+from ..types.webhook_url_response import WebhookURLResponse
+__all__ = ["WebhooksResource", "AsyncWebhooksResource"]
+class WebhooksResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> WebhooksResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+        For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#accessing-raw-response-data-eg-headers
+        """
+        return WebhooksResourceWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> WebhooksResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+        For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#with_streaming_response
+        """
+        return WebhooksResourceWithStreamingResponse(self)
+    def unwrap(self, payload: str, *, headers: Mapping[str, str], key: str | bytes | None = None) -> UnwrapWebhookEvent:
+        try:
+            from standardwebhooks import Webhook
+        except ImportError as exc:
+            raise ChunkrError("You need to install `chunkr-ai[webhooks]` to use this method") from exc
+        if key is None:
+            key = self._client.webhook_key
+            if key is None:
+                raise ValueError(
+                    "Cannot verify a webhook without a key on either the client's webhook_key or passed in as an argument"
+                )
+        if not isinstance(headers, dict):
+            headers = dict(headers)
+        Webhook(key).verify(payload, headers)
+        return cast(
+            UnwrapWebhookEvent,
+            construct_type(
+                type_=UnwrapWebhookEvent,
+                value=json.loads(payload),
+            ),
+        )
+    def url(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> WebhookURLResponse:
+        """Get or create webhook for user and return dashboard URL"""
+        return self._get(
+            "/webhook/url",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=WebhookURLResponse,
+        )
+class AsyncWebhooksResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncWebhooksResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+        For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncWebhooksResourceWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> AsyncWebhooksResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+        For more information, see https://www.github.com/lumina-ai-inc/chunkr-python#with_streaming_response
+        """
+        return AsyncWebhooksResourceWithStreamingResponse(self)
+    def unwrap(self, payload: str, *, headers: Mapping[str, str], key: str | bytes | None = None) -> UnwrapWebhookEvent:
+        try:
+            from standardwebhooks import Webhook
+        except ImportError as exc:
+            raise ChunkrError("You need to install `chunkr-ai[webhooks]` to use this method") from exc
+        if key is None:
+            key = self._client.webhook_key
+            if key is None:
+                raise ValueError(
+                    "Cannot verify a webhook without a key on either the client's webhook_key or passed in as an argument"
+                )
+        if not isinstance(headers, dict):
+            headers = dict(headers)
+        Webhook(key).verify(payload, headers)
+        return cast(
+            UnwrapWebhookEvent,
+            construct_type(
+                type_=UnwrapWebhookEvent,
+                value=json.loads(payload),
+            ),
+        )
+    async def url(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> WebhookURLResponse:
+        """Get or create webhook for user and return dashboard URL"""
+        return await self._get(
+            "/webhook/url",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=WebhookURLResponse,
+        )
+class WebhooksResourceWithRawResponse:
+    def __init__(self, webhooks: WebhooksResource) -> None:
+        self._webhooks = webhooks
+        self.url = to_raw_response_wrapper(
+            webhooks.url,
+        )
+class AsyncWebhooksResourceWithRawResponse:
+    def __init__(self, webhooks: AsyncWebhooksResource) -> None:
+        self._webhooks = webhooks
+        self.url = async_to_raw_response_wrapper(
+            webhooks.url,
+        )
+class WebhooksResourceWithStreamingResponse:
+    def __init__(self, webhooks: WebhooksResource) -> None:
+        self._webhooks = webhooks
+        self.url = to_streamed_response_wrapper(
+            webhooks.url,
+        )
+class AsyncWebhooksResourceWithStreamingResponse:
+    def __init__(self, webhooks: AsyncWebhooksResource) -> None:
+        self._webhooks = webhooks
+        self.url = async_to_streamed_response_wrapper(
+            webhooks.url,
+        )

chunkr_ai/types/__init__.py CHANGED Viewed

@@ -2,14 +2,40 @@
 from __future__ import annotations
+from .cell import Cell as Cell
 from .file import File as File
-from .task import Task as Task
+from .page import Page as Page
+from .chunk import Chunk as Chunk
 from .delete import Delete as Delete
+from .segment import Segment as Segment
 from .file_url import FileURL as FileURL
+from .file_info import FileInfo as FileInfo
+from .cell_style import CellStyle as CellStyle
+from .ocr_result import OcrResult as OcrResult
+from .bounding_box import BoundingBox as BoundingBox
+from .version_info import VersionInfo as VersionInfo
+from .task_response import TaskResponse as TaskResponse
+from .llm_processing import LlmProcessing as LlmProcessing
 from .file_url_params import FileURLParams as FileURLParams
 from .task_get_params import TaskGetParams as TaskGetParams
+from .chunk_processing import ChunkProcessing as ChunkProcessing
 from .file_list_params import FileListParams as FileListParams
 from .task_list_params import TaskListParams as TaskListParams
+from .generation_config import GenerationConfig as GenerationConfig
 from .file_create_params import FileCreateParams as FileCreateParams
+from .segment_processing import SegmentProcessing as SegmentProcessing
 from .files_list_response import FilesListResponse as FilesListResponse
+from .parse_configuration import ParseConfiguration as ParseConfiguration
+from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
+from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
+from .webhook_url_response import WebhookURLResponse as WebhookURLResponse
+from .extract_configuration import ExtractConfiguration as ExtractConfiguration
 from .health_check_response import HealthCheckResponse as HealthCheckResponse
+from .parse_output_response import ParseOutputResponse as ParseOutputResponse
+from .chunk_processing_param import ChunkProcessingParam as ChunkProcessingParam
+from .extract_output_response import ExtractOutputResponse as ExtractOutputResponse
+from .generation_config_param import GenerationConfigParam as GenerationConfigParam
+from .segment_processing_param import SegmentProcessingParam as SegmentProcessingParam
+from .parse_configuration_param import ParseConfigurationParam as ParseConfigurationParam
+from .task_parse_updated_webhook_event import TaskParseUpdatedWebhookEvent as TaskParseUpdatedWebhookEvent
+from .task_extract_updated_webhook_event import TaskExtractUpdatedWebhookEvent as TaskExtractUpdatedWebhookEvent

chunkr_ai/types/bounding_box.py ADDED Viewed

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from .._models import BaseModel
+__all__ = ["BoundingBox"]
+class BoundingBox(BaseModel):
+    height: float
+    """The height of the bounding box."""
+    left: float
+    """The left coordinate of the bounding box."""
+    top: float
+    """The top coordinate of the bounding box."""
+    width: float
+    """The width of the bounding box."""

chunkr_ai/types/cell.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Optional
+from .._models import BaseModel
+from .cell_style import CellStyle
+__all__ = ["Cell"]
+class Cell(BaseModel):
+    cell_id: str
+    """The cell ID."""
+    range: str
+    """Range of the cell."""
+    text: str
+    """Text content of the cell."""
+    formula: Optional[str] = None
+    """Formula of the cell."""
+    hyperlink: Optional[str] = None
+    """Hyperlink URL if the cell contains a link (e.g., "https://www.chunkr.ai")."""
+    style: Optional[CellStyle] = None
+    """Styling information for the cell including colors, fonts, and formatting."""
+    value: Optional[str] = None
+    """The computed/evaluated value of the cell.
+    This represents the actual result after evaluating any formulas, as opposed to
+    the raw text content. For cells with formulas, this is the calculated result;
+    for cells with static content, this is typically the same as the text field.
+    Example: text might show "3.14" (formatted to 2 decimal places) while value
+    could be "3.141592653589793" (full precision).
+    """

chunkr_ai/types/cell_style.py ADDED Viewed

@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Optional
+from typing_extensions import Literal
+from .._models import BaseModel
+__all__ = ["CellStyle"]
+class CellStyle(BaseModel):
+    align: Optional[Literal["Left", "Center", "Right", "Justify"]] = None
+    """Alignment of the cell content."""
+    bg_color: Optional[str] = None
+    """Background color of the cell (e.g., "#FFFFFF" or "#DAE3F3")."""
+    font_face: Optional[str] = None
+    """Font face/family of the cell (e.g., "Arial", "Daytona")."""
+    is_bold: Optional[bool] = None
+    """Whether the cell content is bold."""
+    text_color: Optional[str] = None
+    """Text color of the cell (e.g., "#000000" or "red")."""
+    valign: Optional[Literal["Top", "Middle", "Bottom", "Baseline"]] = None
+    """Vertical alignment of the cell content."""

chunkr_ai/types/chunk.py ADDED Viewed

@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import List, Optional
+from .segment import Segment
+from .._models import BaseModel
+__all__ = ["Chunk"]
+class Chunk(BaseModel):
+    chunk_length: int
+    """The total number of tokens in the `embed` field of the chunk.
+    Calculated by the `tokenizer`.
+    """
+    segments: List[Segment]
+    """
+    Collection of document segments that form this chunk. When
+    `target_chunk_length` > 0, contains the maximum number of segments that fit
+    within that length (segments remain intact). Otherwise, contains exactly one
+    segment.
+    """
+    chunk_id: Optional[str] = None
+    """The unique identifier for the chunk."""
+    content: Optional[str] = None
+    """The content of the chunk.
+    This is the text that is generated by combining the `content` field from each
+    segment. Can be used provided as context to the LLM.
+    """
+    embed: Optional[str] = None
+    """Suggested text to be embedded for the chunk.
+    This text is generated by combining the `embed` field from each segment.
+    """

chunkr_ai/types/chunk_processing.py ADDED Viewed

@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Union, Optional
+from typing_extensions import Literal, TypeAlias
+from pydantic import Field as FieldInfo
+from .._models import BaseModel
+__all__ = ["ChunkProcessing", "Tokenizer", "TokenizerEnum", "TokenizerString"]
+class TokenizerEnum(BaseModel):
+    enum: Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"] = FieldInfo(alias="Enum")
+    """Use one of the predefined tokenizer types"""
+class TokenizerString(BaseModel):
+    string: str = FieldInfo(alias="String")
+    """
+    Use any Hugging Face tokenizer by specifying its model ID Examples:
+    "Qwen/Qwen-tokenizer", "facebook/bart-large"
+    """
+Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
+class ChunkProcessing(BaseModel):
+    ignore_headers_and_footers: Optional[bool] = None
+    """DEPRECATED: use `segment_processing.ignore` instead"""
+    target_length: Optional[int] = None
+    """The target number of words in each chunk.
+    If 0, each chunk will contain a single segment.
+    """
+    tokenizer: Optional[Tokenizer] = None
+    """The tokenizer to use for the chunking process."""

chunkr_ai/types/chunk_processing_param.py ADDED Viewed

@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import Union, Optional
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+from .._utils import PropertyInfo
+__all__ = ["ChunkProcessingParam", "Tokenizer", "TokenizerEnum", "TokenizerString"]
+class TokenizerEnum(TypedDict, total=False):
+    enum: Required[
+        Annotated[Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"], PropertyInfo(alias="Enum")]
+    ]
+    """Use one of the predefined tokenizer types"""
+class TokenizerString(TypedDict, total=False):
+    string: Required[Annotated[str, PropertyInfo(alias="String")]]
+    """
+    Use any Hugging Face tokenizer by specifying its model ID Examples:
+    "Qwen/Qwen-tokenizer", "facebook/bart-large"
+    """
+Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
+class ChunkProcessingParam(TypedDict, total=False):
+    ignore_headers_and_footers: Optional[bool]
+    """DEPRECATED: use `segment_processing.ignore` instead"""
+    target_length: int
+    """The target number of words in each chunk.
+    If 0, each chunk will contain a single segment.
+    """
+    tokenizer: Tokenizer
+    """The tokenizer to use for the chunking process."""

chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a7__py3-none-any.whl

chunkr-ai 0.1.0a6py3-none-any.whl → 0.1.0a7py3-none-any.whl