PyPI - chunkr-ai - Versions diffs - 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl - Mend

chunkr-ai 0.1.0a6py3-none-any.whl → 0.1.0a8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

chunkr_ai/__init__.py +2 -0
chunkr_ai/_base_client.py +3 -3
chunkr_ai/_client.py +31 -3
chunkr_ai/_compat.py +48 -48
chunkr_ai/_constants.py +5 -5
chunkr_ai/_exceptions.py +4 -0
chunkr_ai/_models.py +41 -41
chunkr_ai/_types.py +35 -1
chunkr_ai/_utils/__init__.py +9 -2
chunkr_ai/_utils/_compat.py +45 -0
chunkr_ai/_utils/_datetime_parse.py +136 -0
chunkr_ai/_utils/_transform.py +11 -1
chunkr_ai/_utils/_typing.py +6 -1
chunkr_ai/_utils/_utils.py +0 -1
chunkr_ai/_version.py +1 -1
chunkr_ai/resources/__init__.py +14 -0
chunkr_ai/resources/files.py +3 -3
chunkr_ai/resources/tasks/__init__.py +14 -0
chunkr_ai/resources/tasks/extract.py +393 -0
chunkr_ai/resources/tasks/parse.py +110 -286
chunkr_ai/resources/tasks/tasks.py +64 -32
chunkr_ai/resources/webhooks.py +193 -0
chunkr_ai/types/__init__.py +27 -1
chunkr_ai/types/bounding_box.py +19 -0
chunkr_ai/types/cell.py +39 -0
chunkr_ai/types/cell_style.py +28 -0
chunkr_ai/types/chunk.py +40 -0
chunkr_ai/types/chunk_processing.py +40 -0
chunkr_ai/types/chunk_processing_param.py +42 -0
chunkr_ai/types/extract_configuration.py +24 -0
chunkr_ai/types/extract_output_response.py +62 -0
chunkr_ai/types/file_create_params.py +2 -1
chunkr_ai/types/file_info.py +21 -0
chunkr_ai/types/generation_config.py +29 -0
chunkr_ai/types/generation_config_param.py +29 -0
chunkr_ai/types/llm_processing.py +36 -0
chunkr_ai/types/llm_processing_param.py +36 -0
chunkr_ai/types/ocr_result.py +28 -0
chunkr_ai/types/page.py +27 -0
chunkr_ai/types/parse_configuration.py +64 -0
chunkr_ai/types/parse_configuration_param.py +65 -0
chunkr_ai/types/parse_output_response.py +29 -0
chunkr_ai/types/segment.py +109 -0
chunkr_ai/types/segment_processing.py +228 -0
chunkr_ai/types/segment_processing_param.py +229 -0
chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
chunkr_ai/types/task_get_params.py +0 -3
chunkr_ai/types/task_list_params.py +7 -1
chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
chunkr_ai/types/task_response.py +68 -0
chunkr_ai/types/tasks/__init__.py +7 -1
chunkr_ai/types/tasks/extract_create_params.py +47 -0
chunkr_ai/types/tasks/extract_create_response.py +67 -0
chunkr_ai/types/tasks/extract_get_params.py +18 -0
chunkr_ai/types/tasks/extract_get_response.py +67 -0
chunkr_ai/types/tasks/parse_create_params.py +25 -793
chunkr_ai/types/tasks/parse_create_response.py +55 -0
chunkr_ai/types/tasks/parse_get_params.py +18 -0
chunkr_ai/types/tasks/parse_get_response.py +55 -0
chunkr_ai/types/unwrap_webhook_event.py +11 -0
chunkr_ai/types/version_info.py +31 -0
chunkr_ai/types/webhook_url_response.py +9 -0
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/METADATA +14 -13
chunkr_ai-0.1.0a8.dist-info/RECORD +88 -0
chunkr_ai/types/task.py +0 -1225
chunkr_ai/types/tasks/parse_update_params.py +0 -845
chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/WHEEL +0 -0
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/licenses/LICENSE +0 -0

chunkr_ai/types/segment_processing.py ADDED Viewed

@@ -0,0 +1,228 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Optional
+from pydantic import Field as FieldInfo
+from .._models import BaseModel
+from .generation_config import GenerationConfig
+__all__ = ["SegmentProcessing"]
+class SegmentProcessing(BaseModel):
+    caption: Optional[GenerationConfig] = FieldInfo(alias="Caption", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    footnote: Optional[GenerationConfig] = FieldInfo(alias="Footnote", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    page: Optional[GenerationConfig] = FieldInfo(alias="Page", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    page_footer: Optional[GenerationConfig] = FieldInfo(alias="PageFooter", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    page_header: Optional[GenerationConfig] = FieldInfo(alias="PageHeader", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    table: Optional[GenerationConfig] = FieldInfo(alias="Table", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    text: Optional[GenerationConfig] = FieldInfo(alias="Text", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    title: Optional[GenerationConfig] = FieldInfo(alias="Title", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """

chunkr_ai/types/segment_processing_param.py ADDED Viewed

@@ -0,0 +1,229 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import Optional
+from typing_extensions import Annotated, TypedDict
+from .._utils import PropertyInfo
+from .generation_config_param import GenerationConfigParam
+__all__ = ["SegmentProcessingParam"]
+class SegmentProcessingParam(TypedDict, total=False):
+    caption: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Caption")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    footnote: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Footnote")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    formula: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Formula")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    list_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="ListItem")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    page: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Page")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    page_footer: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageFooter")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    page_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageHeader")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    section_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="SectionHeader")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    table: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Table")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    text: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Text")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    title: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Title")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """

chunkr_ai/types/task_extract_updated_webhook_event.py ADDED Viewed

@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Optional
+from typing_extensions import Literal
+from .._models import BaseModel
+__all__ = ["TaskExtractUpdatedWebhookEvent"]
+class TaskExtractUpdatedWebhookEvent(BaseModel):
+    event_type: Literal["task.parse.updated", "task.extract.updated"]
+    """Event type identifier"""
+    status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
+    """Current status of the task"""
+    task_id: str
+    """Unique task identifier"""
+    message: Optional[str] = None
+    """Optional human-readable status message"""

chunkr_ai/types/task_get_params.py CHANGED Viewed

@@ -16,6 +16,3 @@ class TaskGetParams(TypedDict, total=False):
     include_chunks: bool
     """Whether to include chunks in the output response"""
-    wait_for_completion: bool
-    """Whether to wait for the task to complete"""

chunkr_ai/types/task_list_params.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from __future__ import annotations
-from typing import Union
+from typing import List, Union
 from datetime import datetime
 from typing_extensions import Literal, Annotated, TypedDict
@@ -35,3 +35,9 @@ class TaskListParams(TypedDict, total=False):
     start: Annotated[Union[str, datetime], PropertyInfo(format="iso8601")]
     """Start date"""
+    statuses: List[Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]]
+    """Filter by one or more statuses"""
+    task_types: List[Literal["Parse", "Extract"]]
+    """Filter by one or more task types"""

chunkr_ai/types/task_parse_updated_webhook_event.py ADDED Viewed

@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Optional
+from typing_extensions import Literal
+from .._models import BaseModel
+__all__ = ["TaskParseUpdatedWebhookEvent"]
+class TaskParseUpdatedWebhookEvent(BaseModel):
+    event_type: Literal["task.parse.updated", "task.extract.updated"]
+    """Event type identifier"""
+    status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
+    """Current status of the task"""
+    task_id: str
+    """Unique task identifier"""
+    message: Optional[str] = None
+    """Optional human-readable status message"""

chunkr_ai/types/task_response.py ADDED Viewed

@@ -0,0 +1,68 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Union, Optional
+from datetime import datetime
+from typing_extensions import Literal, TypeAlias
+from .._models import BaseModel
+from .file_info import FileInfo
+from .version_info import VersionInfo
+from .parse_configuration import ParseConfiguration
+from .extract_configuration import ExtractConfiguration
+from .parse_output_response import ParseOutputResponse
+from .extract_output_response import ExtractOutputResponse
+__all__ = ["TaskResponse", "Configuration", "Output"]
+Configuration: TypeAlias = Union[ParseConfiguration, ExtractConfiguration]
+Output: TypeAlias = Union[ParseOutputResponse, ExtractOutputResponse, None]
+class TaskResponse(BaseModel):
+    configuration: Configuration
+    """
+    Unified configuration type that can represent either parse or extract
+    configurations
+    """
+    created_at: datetime
+    """The date and time when the task was created and queued."""
+    file_info: FileInfo
+    """Information about the input file."""
+    message: str
+    """A message describing the task's status or any errors that occurred."""
+    status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
+    """The status of the task."""
+    task_id: str
+    """The unique identifier for the task."""
+    task_type: Literal["Parse", "Extract"]
+    version_info: VersionInfo
+    """Version information for the task."""
+    expires_at: Optional[datetime] = None
+    """The date and time when the task will expire."""
+    finished_at: Optional[datetime] = None
+    """The date and time when the task was finished."""
+    input_file_url: Optional[str] = None
+    """The presigned URL of the input file. Deprecated use `file_info.url` instead."""
+    output: Optional[Output] = None
+    """Unified output type that can represent either parse or extract results"""
+    source_task_id: Optional[str] = None
+    """The ID of the source task that was used for the task"""
+    started_at: Optional[datetime] = None
+    """The date and time when the task was started."""
+    task_url: Optional[str] = None
+    """The presigned URL of the task."""

chunkr_ai/types/tasks/__init__.py CHANGED Viewed

@@ -2,5 +2,11 @@
 from __future__ import annotations
+from .parse_get_params import ParseGetParams as ParseGetParams
+from .extract_get_params import ExtractGetParams as ExtractGetParams
+from .parse_get_response import ParseGetResponse as ParseGetResponse
 from .parse_create_params import ParseCreateParams as ParseCreateParams
-from .parse_update_params import ParseUpdateParams as ParseUpdateParams
+from .extract_get_response import ExtractGetResponse as ExtractGetResponse
+from .extract_create_params import ExtractCreateParams as ExtractCreateParams
+from .parse_create_response import ParseCreateResponse as ParseCreateResponse
+from .extract_create_response import ExtractCreateResponse as ExtractCreateResponse

chunkr_ai/types/tasks/extract_create_params.py ADDED Viewed

@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing import Optional
+from typing_extensions import Required, TypedDict
+from ..parse_configuration_param import ParseConfigurationParam
+__all__ = ["ExtractCreateParams"]
+class ExtractCreateParams(TypedDict, total=False):
+    file: Required[str]
+    """The file to be extracted. Supported inputs:
+    - `ch://files/{file_id}`: Reference to an existing file. Upload via the Files
+      API
+    - `http(s)://...`: Remote URL to fetch
+    - `data:*;base64,...` or raw base64 string
+    - `task_id`: Reference to an existing `parse`task.
+    """
+    schema: Required[object]
+    """The schema to be used for the extraction."""
+    expires_in: Optional[int]
+    """
+    The number of seconds until task is deleted. Expired tasks can **not** be
+    updated, polled or accessed via web interface.
+    """
+    file_name: Optional[str]
+    """The name of the file to be extracted.
+    If not set a name will be generated. Can not be provided if the `file` is a
+    `task_id`.
+    """
+    parse_configuration: Optional[ParseConfigurationParam]
+    """
+    Optional configuration for the `parse` task. Can not be used if `file` is a
+    `task_id`.
+    """
+    system_prompt: Optional[str]
+    """The system prompt to be used for the extraction."""

chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl

chunkr-ai 0.1.0a6py3-none-any.whl → 0.1.0a8py3-none-any.whl