PyPI - chunkr-ai - Versions diffs - 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl - Mend

chunkr-ai 0.1.0a1py3-none-any.whl → 0.1.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

chunkr_ai/_client.py +18 -9
chunkr_ai/_files.py +1 -1
chunkr_ai/_version.py +1 -1
chunkr_ai/pagination.py +61 -1
chunkr_ai/resources/__init__.py +27 -13
chunkr_ai/resources/files.py +712 -0
chunkr_ai/resources/tasks/__init__.py +33 -0
chunkr_ai/resources/tasks/parse.py +612 -0
chunkr_ai/resources/tasks/tasks.py +596 -0
chunkr_ai/types/__init__.py +7 -19
chunkr_ai/types/delete.py +10 -0
chunkr_ai/types/file.py +30 -0
chunkr_ai/types/file_create_params.py +17 -0
chunkr_ai/types/file_list_params.py +28 -0
chunkr_ai/types/file_url.py +15 -0
chunkr_ai/types/file_url_params.py +15 -0
chunkr_ai/types/files_page_response.py +20 -0
chunkr_ai/types/task.py +866 -27
chunkr_ai/types/tasks/__init__.py +6 -0
chunkr_ai/types/tasks/parse_create_params.py +844 -0
chunkr_ai/types/tasks/parse_update_params.py +838 -0
{chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/METADATA +39 -21
chunkr_ai-0.1.0a3.dist-info/RECORD +52 -0
chunkr_ai/resources/task.py +0 -1166
chunkr_ai/types/auto_generation_config.py +0 -39
chunkr_ai/types/auto_generation_config_param.py +0 -39
chunkr_ai/types/bounding_box.py +0 -19
chunkr_ai/types/chunk_processing.py +0 -40
chunkr_ai/types/chunk_processing_param.py +0 -42
chunkr_ai/types/ignore_generation_config.py +0 -39
chunkr_ai/types/ignore_generation_config_param.py +0 -39
chunkr_ai/types/llm_generation_config.py +0 -39
chunkr_ai/types/llm_generation_config_param.py +0 -39
chunkr_ai/types/llm_processing.py +0 -36
chunkr_ai/types/llm_processing_param.py +0 -36
chunkr_ai/types/picture_generation_config.py +0 -39
chunkr_ai/types/picture_generation_config_param.py +0 -39
chunkr_ai/types/segment_processing.py +0 -280
chunkr_ai/types/segment_processing_param.py +0 -281
chunkr_ai/types/table_generation_config.py +0 -39
chunkr_ai/types/table_generation_config_param.py +0 -39
chunkr_ai/types/task_parse_params.py +0 -90
chunkr_ai/types/task_update_params.py +0 -90
chunkr_ai-0.1.0a1.dist-info/RECORD +0 -58
{chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/WHEEL +0 -0
{chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/licenses/LICENSE +0 -0

chunkr_ai/types/auto_generation_config.py DELETED Viewed

@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List, Optional
-from typing_extensions import Literal
-from .._models import BaseModel
-__all__ = ["AutoGenerationConfig"]
-class AutoGenerationConfig(BaseModel):
-    crop_image: Optional[Literal["All", "Auto"]] = None
-    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
-    - `All` crops all images in the item
-    - `Auto` crops images only if required for post-processing
-    """
-    description: Optional[bool] = None
-    """Generate LLM descriptions for this segment"""
-    embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
-    """**DEPRECATED**: `embed` field is auto populated"""
-    extended_context: Optional[bool] = None
-    """Use the full page image as context for LLM generation"""
-    format: Optional[Literal["Html", "Markdown"]] = None
-    html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
-    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
-    llm: Optional[str] = None
-    """**DEPRECATED**: use description instead"""
-    markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
-    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
-    strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None

chunkr_ai/types/auto_generation_config_param.py DELETED Viewed

@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from __future__ import annotations
-from typing import List, Optional
-from typing_extensions import Literal, TypedDict
-__all__ = ["AutoGenerationConfigParam"]
-class AutoGenerationConfigParam(TypedDict, total=False):
-    crop_image: Literal["All", "Auto"]
-    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
-    - `All` crops all images in the item
-    - `Auto` crops images only if required for post-processing
-    """
-    description: bool
-    """Generate LLM descriptions for this segment"""
-    embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
-    """**DEPRECATED**: `embed` field is auto populated"""
-    extended_context: bool
-    """Use the full page image as context for LLM generation"""
-    format: Literal["Html", "Markdown"]
-    html: Optional[Literal["LLM", "Auto", "Ignore"]]
-    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
-    llm: Optional[str]
-    """**DEPRECATED**: use description instead"""
-    markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
-    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
-    strategy: Literal["LLM", "Auto", "Ignore"]

chunkr_ai/types/bounding_box.py DELETED Viewed

@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from .._models import BaseModel
-__all__ = ["BoundingBox"]
-class BoundingBox(BaseModel):
-    height: float
-    """The height of the bounding box."""
-    left: float
-    """The left coordinate of the bounding box."""
-    top: float
-    """The top coordinate of the bounding box."""
-    width: float
-    """The width of the bounding box."""

chunkr_ai/types/chunk_processing.py DELETED Viewed

@@ -1,40 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Union, Optional
-from typing_extensions import Literal, TypeAlias
-from pydantic import Field as FieldInfo
-from .._models import BaseModel
-__all__ = ["ChunkProcessing", "Tokenizer", "TokenizerEnum", "TokenizerString"]
-class TokenizerEnum(BaseModel):
-    enum: Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"] = FieldInfo(alias="Enum")
-    """Use one of the predefined tokenizer types"""
-class TokenizerString(BaseModel):
-    string: str = FieldInfo(alias="String")
-    """
-    Use any Hugging Face tokenizer by specifying its model ID Examples:
-    "Qwen/Qwen-tokenizer", "facebook/bart-large"
-    """
-Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
-class ChunkProcessing(BaseModel):
-    ignore_headers_and_footers: Optional[bool] = None
-    """DEPRECATED: use `segment_processing.ignore` This value will not be used"""
-    target_length: Optional[int] = None
-    """The target number of words in each chunk.
-    If 0, each chunk will contain a single segment.
-    """
-    tokenizer: Optional[Tokenizer] = None
-    """The tokenizer to use for the chunking process."""

chunkr_ai/types/chunk_processing_param.py DELETED Viewed

@@ -1,42 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from __future__ import annotations
-from typing import Union
-from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
-from .._utils import PropertyInfo
-__all__ = ["ChunkProcessingParam", "Tokenizer", "TokenizerEnum", "TokenizerString"]
-class TokenizerEnum(TypedDict, total=False):
-    enum: Required[
-        Annotated[Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"], PropertyInfo(alias="Enum")]
-    ]
-    """Use one of the predefined tokenizer types"""
-class TokenizerString(TypedDict, total=False):
-    string: Required[Annotated[str, PropertyInfo(alias="String")]]
-    """
-    Use any Hugging Face tokenizer by specifying its model ID Examples:
-    "Qwen/Qwen-tokenizer", "facebook/bart-large"
-    """
-Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
-class ChunkProcessingParam(TypedDict, total=False):
-    ignore_headers_and_footers: bool
-    """DEPRECATED: use `segment_processing.ignore` This value will not be used"""
-    target_length: int
-    """The target number of words in each chunk.
-    If 0, each chunk will contain a single segment.
-    """
-    tokenizer: Tokenizer
-    """The tokenizer to use for the chunking process."""

chunkr_ai/types/ignore_generation_config.py DELETED Viewed

@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List, Optional
-from typing_extensions import Literal
-from .._models import BaseModel
-__all__ = ["IgnoreGenerationConfig"]
-class IgnoreGenerationConfig(BaseModel):
-    crop_image: Optional[Literal["All", "Auto"]] = None
-    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
-    - `All` crops all images in the item
-    - `Auto` crops images only if required for post-processing
-    """
-    description: Optional[bool] = None
-    """Generate LLM descriptions for this segment"""
-    embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
-    """**DEPRECATED**: `embed` field is auto populated"""
-    extended_context: Optional[bool] = None
-    """Use the full page image as context for LLM generation"""
-    format: Optional[Literal["Html", "Markdown"]] = None
-    html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
-    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
-    llm: Optional[str] = None
-    """**DEPRECATED**: use description instead"""
-    markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
-    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
-    strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None

chunkr_ai/types/ignore_generation_config_param.py DELETED Viewed

@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from __future__ import annotations
-from typing import List, Optional
-from typing_extensions import Literal, TypedDict
-__all__ = ["IgnoreGenerationConfigParam"]
-class IgnoreGenerationConfigParam(TypedDict, total=False):
-    crop_image: Literal["All", "Auto"]
-    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
-    - `All` crops all images in the item
-    - `Auto` crops images only if required for post-processing
-    """
-    description: bool
-    """Generate LLM descriptions for this segment"""
-    embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
-    """**DEPRECATED**: `embed` field is auto populated"""
-    extended_context: bool
-    """Use the full page image as context for LLM generation"""
-    format: Literal["Html", "Markdown"]
-    html: Optional[Literal["LLM", "Auto", "Ignore"]]
-    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
-    llm: Optional[str]
-    """**DEPRECATED**: use description instead"""
-    markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
-    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
-    strategy: Literal["LLM", "Auto", "Ignore"]

chunkr_ai/types/llm_generation_config.py DELETED Viewed

@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List, Optional
-from typing_extensions import Literal
-from .._models import BaseModel
-__all__ = ["LlmGenerationConfig"]
-class LlmGenerationConfig(BaseModel):
-    crop_image: Optional[Literal["All", "Auto"]] = None
-    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
-    - `All` crops all images in the item
-    - `Auto` crops images only if required for post-processing
-    """
-    description: Optional[bool] = None
-    """Generate LLM descriptions for this segment"""
-    embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
-    """**DEPRECATED**: `embed` field is auto populated"""
-    extended_context: Optional[bool] = None
-    """Use the full page image as context for LLM generation"""
-    format: Optional[Literal["Html", "Markdown"]] = None
-    html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
-    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
-    llm: Optional[str] = None
-    """**DEPRECATED**: use description instead"""
-    markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
-    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
-    strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None

chunkr_ai/types/llm_generation_config_param.py DELETED Viewed

@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from __future__ import annotations
-from typing import List, Optional
-from typing_extensions import Literal, TypedDict
-__all__ = ["LlmGenerationConfigParam"]
-class LlmGenerationConfigParam(TypedDict, total=False):
-    crop_image: Literal["All", "Auto"]
-    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
-    - `All` crops all images in the item
-    - `Auto` crops images only if required for post-processing
-    """
-    description: bool
-    """Generate LLM descriptions for this segment"""
-    embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
-    """**DEPRECATED**: `embed` field is auto populated"""
-    extended_context: bool
-    """Use the full page image as context for LLM generation"""
-    format: Literal["Html", "Markdown"]
-    html: Optional[Literal["LLM", "Auto", "Ignore"]]
-    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
-    llm: Optional[str]
-    """**DEPRECATED**: use description instead"""
-    markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
-    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
-    strategy: Literal["LLM", "Auto", "Ignore"]

chunkr_ai/types/llm_processing.py DELETED Viewed

@@ -1,36 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Union, Optional
-from typing_extensions import Literal, TypeAlias
-from pydantic import Field as FieldInfo
-from .._models import BaseModel
-__all__ = ["LlmProcessing", "FallbackStrategy", "FallbackStrategyModel"]
-class FallbackStrategyModel(BaseModel):
-    model: str = FieldInfo(alias="Model")
-    """Use a specific model as fallback"""
-FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
-class LlmProcessing(BaseModel):
-    fallback_strategy: Optional[FallbackStrategy] = None
-    """The fallback strategy to use for the LLMs in the task."""
-    max_completion_tokens: Optional[int] = None
-    """The maximum number of tokens to generate."""
-    api_model_id: Optional[str] = FieldInfo(alias="model_id", default=None)
-    """The ID of the model to use for the task.
-    If not provided, the default model will be used. Please check the documentation
-    for the model you want to use.
-    """
-    temperature: Optional[float] = None
-    """The temperature to use for the LLM."""

chunkr_ai/types/llm_processing_param.py DELETED Viewed

@@ -1,36 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from __future__ import annotations
-from typing import Union, Optional
-from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
-from .._utils import PropertyInfo
-__all__ = ["LlmProcessingParam", "FallbackStrategy", "FallbackStrategyModel"]
-class FallbackStrategyModel(TypedDict, total=False):
-    model: Required[Annotated[str, PropertyInfo(alias="Model")]]
-    """Use a specific model as fallback"""
-FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
-class LlmProcessingParam(TypedDict, total=False):
-    fallback_strategy: FallbackStrategy
-    """The fallback strategy to use for the LLMs in the task."""
-    max_completion_tokens: Optional[int]
-    """The maximum number of tokens to generate."""
-    model_id: Optional[str]
-    """The ID of the model to use for the task.
-    If not provided, the default model will be used. Please check the documentation
-    for the model you want to use.
-    """
-    temperature: float
-    """The temperature to use for the LLM."""

chunkr_ai/types/picture_generation_config.py DELETED Viewed

@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List, Optional
-from typing_extensions import Literal
-from .._models import BaseModel
-__all__ = ["PictureGenerationConfig"]
-class PictureGenerationConfig(BaseModel):
-    crop_image: Optional[Literal["All", "Auto"]] = None
-    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
-    - `All` crops all images in the item
-    - `Auto` crops images only if required for post-processing
-    """
-    description: Optional[bool] = None
-    """Generate LLM descriptions for this segment"""
-    embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
-    """**DEPRECATED**: `embed` field is auto populated"""
-    extended_context: Optional[bool] = None
-    """Use the full page image as context for LLM generation"""
-    format: Optional[Literal["Html", "Markdown"]] = None
-    html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
-    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
-    llm: Optional[str] = None
-    """**DEPRECATED**: use description instead"""
-    markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
-    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
-    strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None

chunkr_ai/types/picture_generation_config_param.py DELETED Viewed

@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from __future__ import annotations
-from typing import List, Optional
-from typing_extensions import Literal, TypedDict
-__all__ = ["PictureGenerationConfigParam"]
-class PictureGenerationConfigParam(TypedDict, total=False):
-    crop_image: Literal["All", "Auto"]
-    """Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
-    - `All` crops all images in the item
-    - `Auto` crops images only if required for post-processing
-    """
-    description: bool
-    """Generate LLM descriptions for this segment"""
-    embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
-    """**DEPRECATED**: `embed` field is auto populated"""
-    extended_context: bool
-    """Use the full page image as context for LLM generation"""
-    format: Literal["Html", "Markdown"]
-    html: Optional[Literal["LLM", "Auto", "Ignore"]]
-    """**DEPRECATED**: Use `format: html` and `strategy` instead."""
-    llm: Optional[str]
-    """**DEPRECATED**: use description instead"""
-    markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
-    """**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
-    strategy: Literal["LLM", "Auto", "Ignore"]

chunkr-ai 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

chunkr-ai 0.1.0a1py3-none-any.whl → 0.1.0a3py3-none-any.whl