PyPI - chunkr-ai - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.0a1__py3-none-any.whl - Mend

chunkr-ai 0.1.0py3-none-any.whl → 0.1.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

chunkr_ai/__init__.py +89 -2
chunkr_ai/_base_client.py +1995 -0
chunkr_ai/_client.py +402 -0
chunkr_ai/_compat.py +219 -0
chunkr_ai/_constants.py +14 -0
chunkr_ai/_exceptions.py +108 -0
chunkr_ai/_files.py +123 -0
chunkr_ai/_models.py +829 -0
chunkr_ai/_qs.py +150 -0
chunkr_ai/_resource.py +43 -0
chunkr_ai/_response.py +830 -0
chunkr_ai/_streaming.py +333 -0
chunkr_ai/_types.py +219 -0
chunkr_ai/_utils/__init__.py +57 -0
chunkr_ai/_utils/_logs.py +25 -0
chunkr_ai/_utils/_proxy.py +65 -0
chunkr_ai/_utils/_reflection.py +42 -0
chunkr_ai/_utils/_resources_proxy.py +24 -0
chunkr_ai/_utils/_streams.py +12 -0
chunkr_ai/_utils/_sync.py +86 -0
chunkr_ai/_utils/_transform.py +447 -0
chunkr_ai/_utils/_typing.py +151 -0
chunkr_ai/_utils/_utils.py +422 -0
chunkr_ai/_version.py +4 -0
chunkr_ai/lib/.keep +4 -0
chunkr_ai/pagination.py +71 -0
chunkr_ai/resources/__init__.py +33 -0
chunkr_ai/resources/health.py +136 -0
chunkr_ai/resources/task.py +1166 -0
chunkr_ai/types/__init__.py +27 -0
chunkr_ai/types/auto_generation_config.py +39 -0
chunkr_ai/types/auto_generation_config_param.py +39 -0
chunkr_ai/types/bounding_box.py +19 -0
chunkr_ai/types/chunk_processing.py +40 -0
chunkr_ai/types/chunk_processing_param.py +42 -0
chunkr_ai/types/health_check_response.py +7 -0
chunkr_ai/types/ignore_generation_config.py +39 -0
chunkr_ai/types/ignore_generation_config_param.py +39 -0
chunkr_ai/types/llm_generation_config.py +39 -0
chunkr_ai/types/llm_generation_config_param.py +39 -0
chunkr_ai/types/llm_processing.py +36 -0
chunkr_ai/types/llm_processing_param.py +36 -0
chunkr_ai/types/picture_generation_config.py +39 -0
chunkr_ai/types/picture_generation_config_param.py +39 -0
chunkr_ai/types/segment_processing.py +280 -0
chunkr_ai/types/segment_processing_param.py +281 -0
chunkr_ai/types/table_generation_config.py +39 -0
chunkr_ai/types/table_generation_config_param.py +39 -0
chunkr_ai/types/task.py +379 -0
chunkr_ai/types/task_get_params.py +18 -0
chunkr_ai/types/task_list_params.py +37 -0
chunkr_ai/types/task_parse_params.py +90 -0
chunkr_ai/types/task_update_params.py +90 -0
chunkr_ai-0.1.0a1.dist-info/METADATA +504 -0
chunkr_ai-0.1.0a1.dist-info/RECORD +58 -0
{chunkr_ai-0.1.0.dist-info → chunkr_ai-0.1.0a1.dist-info}/WHEEL +1 -2
chunkr_ai-0.1.0a1.dist-info/licenses/LICENSE +201 -0
chunkr_ai/api/auth.py +0 -13
chunkr_ai/api/chunkr.py +0 -103
chunkr_ai/api/chunkr_base.py +0 -185
chunkr_ai/api/configuration.py +0 -313
chunkr_ai/api/decorators.py +0 -101
chunkr_ai/api/misc.py +0 -139
chunkr_ai/api/protocol.py +0 -14
chunkr_ai/api/task_response.py +0 -208
chunkr_ai/models.py +0 -55
chunkr_ai-0.1.0.dist-info/METADATA +0 -268
chunkr_ai-0.1.0.dist-info/RECORD +0 -16
chunkr_ai-0.1.0.dist-info/licenses/LICENSE +0 -21
chunkr_ai-0.1.0.dist-info/top_level.txt +0 -1
/chunkr_ai/{api/__init__.py → py.typed} +0 -0

chunkr_ai/types/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from .task import Task as Task
+from .bounding_box import BoundingBox as BoundingBox
+from .llm_processing import LlmProcessing as LlmProcessing
+from .task_get_params import TaskGetParams as TaskGetParams
+from .chunk_processing import ChunkProcessing as ChunkProcessing
+from .task_list_params import TaskListParams as TaskListParams
+from .task_parse_params import TaskParseParams as TaskParseParams
+from .segment_processing import SegmentProcessing as SegmentProcessing
+from .task_update_params import TaskUpdateParams as TaskUpdateParams
+from .llm_processing_param import LlmProcessingParam as LlmProcessingParam
+from .health_check_response import HealthCheckResponse as HealthCheckResponse
+from .llm_generation_config import LlmGenerationConfig as LlmGenerationConfig
+from .auto_generation_config import AutoGenerationConfig as AutoGenerationConfig
+from .chunk_processing_param import ChunkProcessingParam as ChunkProcessingParam
+from .table_generation_config import TableGenerationConfig as TableGenerationConfig
+from .ignore_generation_config import IgnoreGenerationConfig as IgnoreGenerationConfig
+from .segment_processing_param import SegmentProcessingParam as SegmentProcessingParam
+from .picture_generation_config import PictureGenerationConfig as PictureGenerationConfig
+from .llm_generation_config_param import LlmGenerationConfigParam as LlmGenerationConfigParam
+from .auto_generation_config_param import AutoGenerationConfigParam as AutoGenerationConfigParam
+from .table_generation_config_param import TableGenerationConfigParam as TableGenerationConfigParam
+from .ignore_generation_config_param import IgnoreGenerationConfigParam as IgnoreGenerationConfigParam
+from .picture_generation_config_param import PictureGenerationConfigParam as PictureGenerationConfigParam

chunkr_ai/types/auto_generation_config.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import List, Optional
+from typing_extensions import Literal
+from .._models import BaseModel
+__all__ = ["AutoGenerationConfig"]
+class AutoGenerationConfig(BaseModel):
+    crop_image: Optional[Literal["All", "Auto"]] = None
+    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
+    - `All` crops all images in the item
+    - `Auto` crops images only if required for post-processing
+    """
+    description: Optional[bool] = None
+    """Generate LLM descriptions for this segment"""
+    embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
+    """**DEPRECATED**: `embed` field is auto populated"""
+    extended_context: Optional[bool] = None
+    """Use the full page image as context for LLM generation"""
+    format: Optional[Literal["Html", "Markdown"]] = None
+    html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
+    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
+    llm: Optional[str] = None
+    """**DEPRECATED**: use description instead"""
+    markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
+    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
+    strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None

chunkr_ai/types/auto_generation_config_param.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import List, Optional
+from typing_extensions import Literal, TypedDict
+__all__ = ["AutoGenerationConfigParam"]
+class AutoGenerationConfigParam(TypedDict, total=False):
+    crop_image: Literal["All", "Auto"]
+    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
+    - `All` crops all images in the item
+    - `Auto` crops images only if required for post-processing
+    """
+    description: bool
+    """Generate LLM descriptions for this segment"""
+    embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
+    """**DEPRECATED**: `embed` field is auto populated"""
+    extended_context: bool
+    """Use the full page image as context for LLM generation"""
+    format: Literal["Html", "Markdown"]
+    html: Optional[Literal["LLM", "Auto", "Ignore"]]
+    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
+    llm: Optional[str]
+    """**DEPRECATED**: use description instead"""
+    markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
+    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
+    strategy: Literal["LLM", "Auto", "Ignore"]

chunkr_ai/types/bounding_box.py ADDED Viewed

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from .._models import BaseModel
+__all__ = ["BoundingBox"]
+class BoundingBox(BaseModel):
+    height: float
+    """The height of the bounding box."""
+    left: float
+    """The left coordinate of the bounding box."""
+    top: float
+    """The top coordinate of the bounding box."""
+    width: float
+    """The width of the bounding box."""

chunkr_ai/types/chunk_processing.py ADDED Viewed

@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Union, Optional
+from typing_extensions import Literal, TypeAlias
+from pydantic import Field as FieldInfo
+from .._models import BaseModel
+__all__ = ["ChunkProcessing", "Tokenizer", "TokenizerEnum", "TokenizerString"]
+class TokenizerEnum(BaseModel):
+    enum: Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"] = FieldInfo(alias="Enum")
+    """Use one of the predefined tokenizer types"""
+class TokenizerString(BaseModel):
+    string: str = FieldInfo(alias="String")
+    """
+    Use any Hugging Face tokenizer by specifying its model ID Examples:
+    "Qwen/Qwen-tokenizer", "facebook/bart-large"
+    """
+Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
+class ChunkProcessing(BaseModel):
+    ignore_headers_and_footers: Optional[bool] = None
+    """DEPRECATED: use `segment_processing.ignore` This value will not be used"""
+    target_length: Optional[int] = None
+    """The target number of words in each chunk.
+    If 0, each chunk will contain a single segment.
+    """
+    tokenizer: Optional[Tokenizer] = None
+    """The tokenizer to use for the chunking process."""

chunkr_ai/types/chunk_processing_param.py ADDED Viewed

@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import Union
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+from .._utils import PropertyInfo
+__all__ = ["ChunkProcessingParam", "Tokenizer", "TokenizerEnum", "TokenizerString"]
+class TokenizerEnum(TypedDict, total=False):
+    enum: Required[
+        Annotated[Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"], PropertyInfo(alias="Enum")]
+    ]
+    """Use one of the predefined tokenizer types"""
+class TokenizerString(TypedDict, total=False):
+    string: Required[Annotated[str, PropertyInfo(alias="String")]]
+    """
+    Use any Hugging Face tokenizer by specifying its model ID Examples:
+    "Qwen/Qwen-tokenizer", "facebook/bart-large"
+    """
+Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
+class ChunkProcessingParam(TypedDict, total=False):
+    ignore_headers_and_footers: bool
+    """DEPRECATED: use `segment_processing.ignore` This value will not be used"""
+    target_length: int
+    """The target number of words in each chunk.
+    If 0, each chunk will contain a single segment.
+    """
+    tokenizer: Tokenizer
+    """The tokenizer to use for the chunking process."""

chunkr_ai/types/health_check_response.py ADDED Viewed

@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing_extensions import TypeAlias
+__all__ = ["HealthCheckResponse"]
+HealthCheckResponse: TypeAlias = str

chunkr_ai/types/ignore_generation_config.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import List, Optional
+from typing_extensions import Literal
+from .._models import BaseModel
+__all__ = ["IgnoreGenerationConfig"]
+class IgnoreGenerationConfig(BaseModel):
+    crop_image: Optional[Literal["All", "Auto"]] = None
+    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
+    - `All` crops all images in the item
+    - `Auto` crops images only if required for post-processing
+    """
+    description: Optional[bool] = None
+    """Generate LLM descriptions for this segment"""
+    embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
+    """**DEPRECATED**: `embed` field is auto populated"""
+    extended_context: Optional[bool] = None
+    """Use the full page image as context for LLM generation"""
+    format: Optional[Literal["Html", "Markdown"]] = None
+    html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
+    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
+    llm: Optional[str] = None
+    """**DEPRECATED**: use description instead"""
+    markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
+    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
+    strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None

chunkr_ai/types/ignore_generation_config_param.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import List, Optional
+from typing_extensions import Literal, TypedDict
+__all__ = ["IgnoreGenerationConfigParam"]
+class IgnoreGenerationConfigParam(TypedDict, total=False):
+    crop_image: Literal["All", "Auto"]
+    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
+    - `All` crops all images in the item
+    - `Auto` crops images only if required for post-processing
+    """
+    description: bool
+    """Generate LLM descriptions for this segment"""
+    embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
+    """**DEPRECATED**: `embed` field is auto populated"""
+    extended_context: bool
+    """Use the full page image as context for LLM generation"""
+    format: Literal["Html", "Markdown"]
+    html: Optional[Literal["LLM", "Auto", "Ignore"]]
+    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
+    llm: Optional[str]
+    """**DEPRECATED**: use description instead"""
+    markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
+    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
+    strategy: Literal["LLM", "Auto", "Ignore"]

chunkr_ai/types/llm_generation_config.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import List, Optional
+from typing_extensions import Literal
+from .._models import BaseModel
+__all__ = ["LlmGenerationConfig"]
+class LlmGenerationConfig(BaseModel):
+    crop_image: Optional[Literal["All", "Auto"]] = None
+    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
+    - `All` crops all images in the item
+    - `Auto` crops images only if required for post-processing
+    """
+    description: Optional[bool] = None
+    """Generate LLM descriptions for this segment"""
+    embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
+    """**DEPRECATED**: `embed` field is auto populated"""
+    extended_context: Optional[bool] = None
+    """Use the full page image as context for LLM generation"""
+    format: Optional[Literal["Html", "Markdown"]] = None
+    html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
+    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
+    llm: Optional[str] = None
+    """**DEPRECATED**: use description instead"""
+    markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
+    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
+    strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None

chunkr_ai/types/llm_generation_config_param.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import List, Optional
+from typing_extensions import Literal, TypedDict
+__all__ = ["LlmGenerationConfigParam"]
+class LlmGenerationConfigParam(TypedDict, total=False):
+    crop_image: Literal["All", "Auto"]
+    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
+    - `All` crops all images in the item
+    - `Auto` crops images only if required for post-processing
+    """
+    description: bool
+    """Generate LLM descriptions for this segment"""
+    embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
+    """**DEPRECATED**: `embed` field is auto populated"""
+    extended_context: bool
+    """Use the full page image as context for LLM generation"""
+    format: Literal["Html", "Markdown"]
+    html: Optional[Literal["LLM", "Auto", "Ignore"]]
+    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
+    llm: Optional[str]
+    """**DEPRECATED**: use description instead"""
+    markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
+    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
+    strategy: Literal["LLM", "Auto", "Ignore"]

chunkr_ai/types/llm_processing.py ADDED Viewed

@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Union, Optional
+from typing_extensions import Literal, TypeAlias
+from pydantic import Field as FieldInfo
+from .._models import BaseModel
+__all__ = ["LlmProcessing", "FallbackStrategy", "FallbackStrategyModel"]
+class FallbackStrategyModel(BaseModel):
+    model: str = FieldInfo(alias="Model")
+    """Use a specific model as fallback"""
+FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
+class LlmProcessing(BaseModel):
+    fallback_strategy: Optional[FallbackStrategy] = None
+    """The fallback strategy to use for the LLMs in the task."""
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens to generate."""
+    api_model_id: Optional[str] = FieldInfo(alias="model_id", default=None)
+    """The ID of the model to use for the task.
+    If not provided, the default model will be used. Please check the documentation
+    for the model you want to use.
+    """
+    temperature: Optional[float] = None
+    """The temperature to use for the LLM."""

chunkr_ai/types/llm_processing_param.py ADDED Viewed

@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import Union, Optional
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+from .._utils import PropertyInfo
+__all__ = ["LlmProcessingParam", "FallbackStrategy", "FallbackStrategyModel"]
+class FallbackStrategyModel(TypedDict, total=False):
+    model: Required[Annotated[str, PropertyInfo(alias="Model")]]
+    """Use a specific model as fallback"""
+FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
+class LlmProcessingParam(TypedDict, total=False):
+    fallback_strategy: FallbackStrategy
+    """The fallback strategy to use for the LLMs in the task."""
+    max_completion_tokens: Optional[int]
+    """The maximum number of tokens to generate."""
+    model_id: Optional[str]
+    """The ID of the model to use for the task.
+    If not provided, the default model will be used. Please check the documentation
+    for the model you want to use.
+    """
+    temperature: float
+    """The temperature to use for the LLM."""

chunkr_ai/types/picture_generation_config.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import List, Optional
+from typing_extensions import Literal
+from .._models import BaseModel
+__all__ = ["PictureGenerationConfig"]
+class PictureGenerationConfig(BaseModel):
+    crop_image: Optional[Literal["All", "Auto"]] = None
+    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
+    - `All` crops all images in the item
+    - `Auto` crops images only if required for post-processing
+    """
+    description: Optional[bool] = None
+    """Generate LLM descriptions for this segment"""
+    embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
+    """**DEPRECATED**: `embed` field is auto populated"""
+    extended_context: Optional[bool] = None
+    """Use the full page image as context for LLM generation"""
+    format: Optional[Literal["Html", "Markdown"]] = None
+    html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
+    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
+    llm: Optional[str] = None
+    """**DEPRECATED**: use description instead"""
+    markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
+    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
+    strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None

chunkr_ai/types/picture_generation_config_param.py ADDED Viewed

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import List, Optional
+from typing_extensions import Literal, TypedDict
+__all__ = ["PictureGenerationConfigParam"]
+class PictureGenerationConfigParam(TypedDict, total=False):
+    crop_image: Literal["All", "Auto"]
+    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
+    - `All` crops all images in the item
+    - `Auto` crops images only if required for post-processing
+    """
+    description: bool
+    """Generate LLM descriptions for this segment"""
+    embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
+    """**DEPRECATED**: `embed` field is auto populated"""
+    extended_context: bool
+    """Use the full page image as context for LLM generation"""
+    format: Literal["Html", "Markdown"]
+    html: Optional[Literal["LLM", "Auto", "Ignore"]]
+    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
+    llm: Optional[str]
+    """**DEPRECATED**: use description instead"""
+    markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
+    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
+    strategy: Literal["LLM", "Auto", "Ignore"]

chunkr-ai 0.1.0__py3-none-any.whl → 0.1.0a1__py3-none-any.whl

chunkr-ai 0.1.0py3-none-any.whl → 0.1.0a1py3-none-any.whl