PyPI - chunkr-ai - Versions diffs - 0.1.0a7__py3-none-any.whl → 0.1.0a9__py3-none-any.whl - Mend

chunkr-ai 0.1.0a7py3-none-any.whl → 0.1.0a9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

chunkr_ai/__init__.py +3 -1
chunkr_ai/_base_client.py +12 -12
chunkr_ai/_client.py +8 -8
chunkr_ai/_compat.py +48 -48
chunkr_ai/_models.py +50 -44
chunkr_ai/_qs.py +7 -7
chunkr_ai/_types.py +18 -11
chunkr_ai/_utils/__init__.py +8 -2
chunkr_ai/_utils/_compat.py +45 -0
chunkr_ai/_utils/_datetime_parse.py +136 -0
chunkr_ai/_utils/_transform.py +13 -3
chunkr_ai/_utils/_typing.py +1 -1
chunkr_ai/_utils/_utils.py +4 -5
chunkr_ai/_version.py +1 -1
chunkr_ai/resources/files.py +29 -29
chunkr_ai/resources/health.py +3 -3
chunkr_ai/resources/tasks/extract.py +21 -37
chunkr_ai/resources/tasks/parse.py +29 -54
chunkr_ai/resources/tasks/tasks.py +35 -51
chunkr_ai/resources/webhooks.py +3 -3
chunkr_ai/types/__init__.py +0 -2
chunkr_ai/types/extract_output_response.py +45 -2
chunkr_ai/types/file_info.py +3 -0
chunkr_ai/types/ocr_result.py +6 -6
chunkr_ai/types/parse_configuration.py +0 -4
chunkr_ai/types/parse_configuration_param.py +0 -4
chunkr_ai/types/segment.py +8 -5
chunkr_ai/types/segment_processing.py +92 -2
chunkr_ai/types/segment_processing_param.py +92 -2
chunkr_ai/types/task_get_params.py +0 -3
chunkr_ai/types/tasks/extract_create_response.py +0 -147
chunkr_ai/types/tasks/extract_get_params.py +0 -3
chunkr_ai/types/tasks/extract_get_response.py +0 -147
chunkr_ai/types/tasks/parse_create_params.py +0 -4
chunkr_ai/types/tasks/parse_get_params.py +0 -3
chunkr_ai/types/version_info.py +1 -1
{chunkr_ai-0.1.0a7.dist-info → chunkr_ai-0.1.0a9.dist-info}/METADATA +1 -1
{chunkr_ai-0.1.0a7.dist-info → chunkr_ai-0.1.0a9.dist-info}/RECORD +40 -40
chunkr_ai/types/llm_processing.py +0 -36
chunkr_ai/types/llm_processing_param.py +0 -36
{chunkr_ai-0.1.0a7.dist-info → chunkr_ai-0.1.0a9.dist-info}/WHEEL +0 -0
{chunkr_ai-0.1.0a7.dist-info → chunkr_ai-0.1.0a9.dist-info}/licenses/LICENSE +0 -0

chunkr_ai/types/file_info.py CHANGED Viewed

@@ -19,3 +19,6 @@ class FileInfo(BaseModel):
     page_count: Optional[int] = None
     """The number of pages in the file."""
+    ss_cell_count: Optional[int] = None
+    """The number of cells in the file. Only used for spreadsheets."""

chunkr_ai/types/ocr_result.py CHANGED Viewed

@@ -15,14 +15,14 @@ class OcrResult(BaseModel):
     text: str
     """The recognized text of the OCR result."""
-    cell_ref: Optional[str] = None
-    """
-    Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
-    spreadsheet cell
-    """
     confidence: Optional[float] = None
     """The confidence score of the recognized text."""
     ocr_id: Optional[str] = None
     """The unique identifier for the OCR result."""
+    ss_cell_ref: Optional[str] = None
+    """
+    Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR originates from a
+    spreadsheet cell
+    """

chunkr_ai/types/parse_configuration.py CHANGED Viewed

@@ -4,7 +4,6 @@ from typing import Optional
 from typing_extensions import Literal
 from .._models import BaseModel
-from .llm_processing import LlmProcessing
 from .chunk_processing import ChunkProcessing
 from .segment_processing import SegmentProcessing
@@ -23,9 +22,6 @@ class ParseConfiguration(BaseModel):
       LLM refusals etc.)
     """
-    llm_processing: Optional[LlmProcessing] = None
-    """Controls the LLM used for the task."""
     ocr_strategy: Optional[Literal["All", "Auto"]] = None
     """Controls the Optical Character Recognition (OCR) strategy.

chunkr_ai/types/parse_configuration_param.py CHANGED Viewed

@@ -5,7 +5,6 @@ from __future__ import annotations
 from typing import Optional
 from typing_extensions import Literal, TypedDict
-from .llm_processing_param import LlmProcessingParam
 from .chunk_processing_param import ChunkProcessingParam
 from .segment_processing_param import SegmentProcessingParam
@@ -24,9 +23,6 @@ class ParseConfigurationParam(TypedDict, total=False):
       LLM refusals etc.)
     """
-    llm_processing: LlmProcessingParam
-    """Controls the LLM used for the task."""
     ocr_strategy: Literal["All", "Auto"]
     """Controls the Optical Character Recognition (OCR) strategy.

chunkr_ai/types/segment.py CHANGED Viewed

@@ -31,20 +31,23 @@ class Segment(BaseModel):
         "Caption",
         "Footnote",
         "Formula",
+        "FormRegion",
+        "GraphicalItem",
+        "Legend",
+        "LineNumber",
         "ListItem",
         "Page",
         "PageFooter",
         "PageHeader",
+        "PageNumber",
         "Picture",
-        "SectionHeader",
         "Table",
         "Text",
         "Title",
+        "Unknown",
+        "SectionHeader",
     ]
-    """
-    All the possible types for a segment. Note: Different configurations will
-    produce different types. Please refer to the documentation for more information.
-    """
+    """All the possible types for a segment."""
     confidence: Optional[float] = None
     """Confidence score of the layout analysis model"""

chunkr_ai/types/segment_processing.py CHANGED Viewed

@@ -47,6 +47,24 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    form_region: Optional[GenerationConfig] = FieldInfo(alias="FormRegion", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
     formula: Optional[GenerationConfig] = FieldInfo(alias="Formula", default=None)
     """Controls the processing and generation for the segment.
@@ -65,6 +83,60 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    graphical_item: Optional[GenerationConfig] = FieldInfo(alias="GraphicalItem", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    legend: Optional[GenerationConfig] = FieldInfo(alias="Legend", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    line_number: Optional[GenerationConfig] = FieldInfo(alias="LineNumber", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
     list_item: Optional[GenerationConfig] = FieldInfo(alias="ListItem", default=None)
     """Controls the processing and generation for the segment.
@@ -137,7 +209,7 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
+    page_number: Optional[GenerationConfig] = FieldInfo(alias="PageNumber", default=None)
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -155,7 +227,7 @@ class SegmentProcessing(BaseModel):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    section_header: Optional[GenerationConfig] = FieldInfo(alias="SectionHeader", default=None)
+    picture: Optional[GenerationConfig] = FieldInfo(alias="Picture", default=None)
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -226,3 +298,21 @@ class SegmentProcessing(BaseModel):
       configuration.
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    unknown: Optional[GenerationConfig] = FieldInfo(alias="Unknown", default=None)
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """

chunkr_ai/types/segment_processing_param.py CHANGED Viewed

@@ -48,6 +48,24 @@ class SegmentProcessingParam(TypedDict, total=False):
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    form_region: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="FormRegion")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
     formula: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Formula")]
     """Controls the processing and generation for the segment.
@@ -66,6 +84,60 @@ class SegmentProcessingParam(TypedDict, total=False):
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    graphical_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="GraphicalItem")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    legend: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Legend")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
+    line_number: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="LineNumber")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """
     list_item: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="ListItem")]
     """Controls the processing and generation for the segment.
@@ -138,7 +210,7 @@ class SegmentProcessingParam(TypedDict, total=False):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
+    page_number: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="PageNumber")]
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -156,7 +228,7 @@ class SegmentProcessingParam(TypedDict, total=False):
     - `extended_context` uses the full page image as context for LLM generation.
     """
-    section_header: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="SectionHeader")]
+    picture: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Picture")]
     """Controls the processing and generation for the segment.
     - `crop_image` controls whether to crop the file's images to the segment's
@@ -227,3 +299,21 @@ class SegmentProcessingParam(TypedDict, total=False):
       configuration.
     - `extended_context` uses the full page image as context for LLM generation.
     """
+    unknown: Annotated[Optional[GenerationConfigParam], PropertyInfo(alias="Unknown")]
+    """Controls the processing and generation for the segment.
+    - `crop_image` controls whether to crop the file's images to the segment's
+      bounding box. The cropped image will be stored in the segment's `image` field.
+      Use `All` to always crop, or `Auto` to only crop when needed for
+      post-processing.
+    - `format` specifies the output format: `Html` or `Markdown`
+    - `strategy` determines how the content is generated: `Auto`, `LLM`, or `Ignore`
+      - `Auto`: Process content automatically
+      - `LLM`: Use large language models for processing
+      - `Ignore`: Exclude segments from final output
+    - `description` enables LLM-generated descriptions for segments. **Note:** This
+      uses chunkr's own VLM models and is not configurable via LLM processing
+      configuration.
+    - `extended_context` uses the full page image as context for LLM generation.
+    """

chunkr_ai/types/task_get_params.py CHANGED Viewed

@@ -16,6 +16,3 @@ class TaskGetParams(TypedDict, total=False):
     include_chunks: bool
     """Whether to include chunks in the output response"""
-    wait_for_completion: bool
-    """Whether to wait for the task to complete"""

chunkr_ai/types/tasks/extract_create_response.py CHANGED Viewed

@@ -55,153 +55,6 @@ class ExtractCreateResponse(BaseModel):
       array-of-primitives) contain a `Vec<Citation>` supporting that field.
     - `metrics`: mirror of `results`; only leaf positions contain a `Metrics` object
       for that field.
-    Detailed shape:
-    - Shared structure: `results`, `citations`, and `metrics` have the same
-      object/array shape as the user schema. Non-leaf nodes (objects, arrays of
-      objects) are mirrored; only leaves carry values.
-    - Leaf definition:
-      - A leaf is either a JSON primitive (string, number, bool, or null) or an
-        array of primitives (including empty).
-      - Arrays of objects are not leaves; recurse into their elements (`items`
-        mirror index-by-index).
-    - Null handling:
-      - If a leaf in `results` is null, the corresponding position in `citations`
-        and `metrics` remains null.
-    - Arrays:
-      - Array of objects: `citations`/`metrics` are arrays whose elements mirror
-        each object and carry values at their own leaves.
-      - Array of primitives: treated as a single leaf. `citations[path]` is a list
-        of `Citation` supporting the array as a whole. `metrics[path]` is a
-        `Metrics` object for the array as a whole.
-    - Citations leaves:
-      - Type: JSON array of `Citation` objects.
-      - Each `Citation` has: `citation_id: string`, `citation_type: Segment|Word`,
-        `bbox: BoundingBox[]`, `content: string`, `segment_id?: string`,
-        `segment_type: SegmentType`, `ss_range?: string[]`.
-        - Segment citation: represents a full parsed segment; `segment_id` set,
-          `bbox` has one entry (segment box), `content` is the segment text. If the
-          segment is from a spreadsheet, `ss_range` contains the table range
-          (single-element array) or the underlying cell refs if available.
-        - Word citation: represents selected OCR words within a segment;
-          `segment_id` is null, `bbox` has one entry per word, `content` is the
-          whitespace-joined text of those words; `segment_type` is `Text`. If OCR
-          words came from spreadsheet cells, `ss_range` lists those cell refs.
-    - Metrics leaves:
-      - Type: `Metrics` object with `confidence: "High" | "Low"`, indicating whether
-        citations sufficiently support the item.
-    Example:
-    results
-    ```json
-    {
-      "invoice_id": "INV-001",
-      "seller": { "name": "Acme" },
-      "line_items": [{ "sku": "A1", "qty": 2 }],
-      "tags": ["urgent", "paid"],
-      "notes": null
-    }
-    ```
-    citations
-    ```json
-    {
-      "invoice_id": [
-        {
-          "citation_id": "abc1234",
-          "citation_type": "Segment",
-          "bbox": [{ "left": 10, "top": 20, "width": 100, "height": 18 }],
-          "content": "Invoice INV-001",
-          "segment_id": "seg_001",
-          "segment_type": "Text",
-          "ss_range": ["A1:C10"]
-        },
-        {
-          "citation_id": "pqr2345",
-          "citation_type": "Word",
-          "bbox": [
-            { "left": 12, "top": 24, "width": 36, "height": 18 },
-            { "left": 52, "top": 24, "width": 48, "height": 18 }
-          ],
-          "content": "INV-001",
-          "segment_id": null,
-          "segment_type": "Text",
-          "ss_range": ["B3", "C3"]
-        }
-      ],
-      "seller": {
-        "name": [
-          {
-            "citation_id": "def5678",
-            "citation_type": "Word",
-            "bbox": [
-              { "left": 45, "top": 80, "width": 30, "height": 12 },
-              { "left": 80, "top": 80, "width": 40, "height": 12 }
-            ],
-            "content": "Acme",
-            "segment_id": null,
-            "segment_type": "Text"
-          }
-        ]
-      },
-      "line_items": [
-        {
-          "sku": [
-            {
-              "citation_id": "ghi9012",
-              "citation_type": "Segment",
-              "bbox": [{ "left": 12, "top": 140, "width": 60, "height": 16 }],
-              "content": "A1",
-              "segment_id": "seg_010",
-              "segment_type": "Text",
-              "ss_range": ["D5:E12"]
-            }
-          ],
-          "qty": [
-            {
-              "citation_id": "jkl3456",
-              "citation_type": "Word",
-              "bbox": [{ "left": 85, "top": 140, "width": 12, "height": 16 }],
-              "content": "2",
-              "segment_id": null,
-              "segment_type": "Text",
-              "ss_range": ["E12"]
-            }
-          ]
-        }
-      ],
-      "tags": [
-        {
-          "citation_id": "mno7890",
-          "citation_type": "Segment",
-          "bbox": [{ "left": 12, "top": 200, "width": 100, "height": 16 }],
-          "content": "urgent paid",
-          "segment_id": "seg_020",
-          "segment_type": "Text",
-          "ss_range": ["A20:C25"]
-        }
-      ],
-      "notes": null
-    }
-    ```
-    metrics
-    ```json
-    {
-      "invoice_id": { "confidence": "High" },
-      "seller": { "name": { "confidence": "Low" } },
-      "line_items": [
-        { "sku": { "confidence": "High" }, "qty": { "confidence": "High" } }
-      ],
-      "tags": { "confidence": "Low" },
-      "notes": null
-    }
-    ```
     """
     source_task_id: Optional[str] = None

chunkr_ai/types/tasks/extract_get_params.py CHANGED Viewed

@@ -16,6 +16,3 @@ class ExtractGetParams(TypedDict, total=False):
     include_chunks: bool
     """Whether to include chunks in the output response"""
-    wait_for_completion: bool
-    """Whether to wait for the task to complete"""

chunkr_ai/types/tasks/extract_get_response.py CHANGED Viewed

@@ -55,153 +55,6 @@ class ExtractGetResponse(BaseModel):
       array-of-primitives) contain a `Vec<Citation>` supporting that field.
     - `metrics`: mirror of `results`; only leaf positions contain a `Metrics` object
       for that field.
-    Detailed shape:
-    - Shared structure: `results`, `citations`, and `metrics` have the same
-      object/array shape as the user schema. Non-leaf nodes (objects, arrays of
-      objects) are mirrored; only leaves carry values.
-    - Leaf definition:
-      - A leaf is either a JSON primitive (string, number, bool, or null) or an
-        array of primitives (including empty).
-      - Arrays of objects are not leaves; recurse into their elements (`items`
-        mirror index-by-index).
-    - Null handling:
-      - If a leaf in `results` is null, the corresponding position in `citations`
-        and `metrics` remains null.
-    - Arrays:
-      - Array of objects: `citations`/`metrics` are arrays whose elements mirror
-        each object and carry values at their own leaves.
-      - Array of primitives: treated as a single leaf. `citations[path]` is a list
-        of `Citation` supporting the array as a whole. `metrics[path]` is a
-        `Metrics` object for the array as a whole.
-    - Citations leaves:
-      - Type: JSON array of `Citation` objects.
-      - Each `Citation` has: `citation_id: string`, `citation_type: Segment|Word`,
-        `bbox: BoundingBox[]`, `content: string`, `segment_id?: string`,
-        `segment_type: SegmentType`, `ss_range?: string[]`.
-        - Segment citation: represents a full parsed segment; `segment_id` set,
-          `bbox` has one entry (segment box), `content` is the segment text. If the
-          segment is from a spreadsheet, `ss_range` contains the table range
-          (single-element array) or the underlying cell refs if available.
-        - Word citation: represents selected OCR words within a segment;
-          `segment_id` is null, `bbox` has one entry per word, `content` is the
-          whitespace-joined text of those words; `segment_type` is `Text`. If OCR
-          words came from spreadsheet cells, `ss_range` lists those cell refs.
-    - Metrics leaves:
-      - Type: `Metrics` object with `confidence: "High" | "Low"`, indicating whether
-        citations sufficiently support the item.
-    Example:
-    results
-    ```json
-    {
-      "invoice_id": "INV-001",
-      "seller": { "name": "Acme" },
-      "line_items": [{ "sku": "A1", "qty": 2 }],
-      "tags": ["urgent", "paid"],
-      "notes": null
-    }
-    ```
-    citations
-    ```json
-    {
-      "invoice_id": [
-        {
-          "citation_id": "abc1234",
-          "citation_type": "Segment",
-          "bbox": [{ "left": 10, "top": 20, "width": 100, "height": 18 }],
-          "content": "Invoice INV-001",
-          "segment_id": "seg_001",
-          "segment_type": "Text",
-          "ss_range": ["A1:C10"]
-        },
-        {
-          "citation_id": "pqr2345",
-          "citation_type": "Word",
-          "bbox": [
-            { "left": 12, "top": 24, "width": 36, "height": 18 },
-            { "left": 52, "top": 24, "width": 48, "height": 18 }
-          ],
-          "content": "INV-001",
-          "segment_id": null,
-          "segment_type": "Text",
-          "ss_range": ["B3", "C3"]
-        }
-      ],
-      "seller": {
-        "name": [
-          {
-            "citation_id": "def5678",
-            "citation_type": "Word",
-            "bbox": [
-              { "left": 45, "top": 80, "width": 30, "height": 12 },
-              { "left": 80, "top": 80, "width": 40, "height": 12 }
-            ],
-            "content": "Acme",
-            "segment_id": null,
-            "segment_type": "Text"
-          }
-        ]
-      },
-      "line_items": [
-        {
-          "sku": [
-            {
-              "citation_id": "ghi9012",
-              "citation_type": "Segment",
-              "bbox": [{ "left": 12, "top": 140, "width": 60, "height": 16 }],
-              "content": "A1",
-              "segment_id": "seg_010",
-              "segment_type": "Text",
-              "ss_range": ["D5:E12"]
-            }
-          ],
-          "qty": [
-            {
-              "citation_id": "jkl3456",
-              "citation_type": "Word",
-              "bbox": [{ "left": 85, "top": 140, "width": 12, "height": 16 }],
-              "content": "2",
-              "segment_id": null,
-              "segment_type": "Text",
-              "ss_range": ["E12"]
-            }
-          ]
-        }
-      ],
-      "tags": [
-        {
-          "citation_id": "mno7890",
-          "citation_type": "Segment",
-          "bbox": [{ "left": 12, "top": 200, "width": 100, "height": 16 }],
-          "content": "urgent paid",
-          "segment_id": "seg_020",
-          "segment_type": "Text",
-          "ss_range": ["A20:C25"]
-        }
-      ],
-      "notes": null
-    }
-    ```
-    metrics
-    ```json
-    {
-      "invoice_id": { "confidence": "High" },
-      "seller": { "name": { "confidence": "Low" } },
-      "line_items": [
-        { "sku": { "confidence": "High" }, "qty": { "confidence": "High" } }
-      ],
-      "tags": { "confidence": "Low" },
-      "notes": null
-    }
-    ```
     """
     source_task_id: Optional[str] = None

chunkr-ai 0.1.0a7__py3-none-any.whl → 0.1.0a9__py3-none-any.whl

chunkr-ai 0.1.0a7py3-none-any.whl → 0.1.0a9py3-none-any.whl