PyPI - chunkr-ai - Versions diffs - 0.1.0a5__py3-none-any.whl → 0.1.0a7__py3-none-any.whl - Mend

chunkr-ai 0.1.0a5py3-none-any.whl → 0.1.0a7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

chunkr_ai/__init__.py +2 -0
chunkr_ai/_client.py +31 -3
chunkr_ai/_constants.py +5 -5
chunkr_ai/_exceptions.py +4 -0
chunkr_ai/_models.py +1 -1
chunkr_ai/_types.py +35 -1
chunkr_ai/_utils/__init__.py +1 -0
chunkr_ai/_utils/_typing.py +5 -0
chunkr_ai/_version.py +1 -1
chunkr_ai/resources/__init__.py +14 -0
chunkr_ai/resources/files.py +3 -3
chunkr_ai/resources/tasks/__init__.py +14 -0
chunkr_ai/resources/tasks/extract.py +409 -0
chunkr_ai/resources/tasks/parse.py +102 -346
chunkr_ai/resources/tasks/tasks.py +62 -14
chunkr_ai/resources/webhooks.py +193 -0
chunkr_ai/types/__init__.py +27 -1
chunkr_ai/types/bounding_box.py +19 -0
chunkr_ai/types/cell.py +39 -0
chunkr_ai/types/cell_style.py +28 -0
chunkr_ai/types/chunk.py +40 -0
chunkr_ai/types/chunk_processing.py +40 -0
chunkr_ai/types/chunk_processing_param.py +42 -0
chunkr_ai/types/extract_configuration.py +24 -0
chunkr_ai/types/extract_output_response.py +19 -0
chunkr_ai/types/file_create_params.py +2 -1
chunkr_ai/types/file_info.py +21 -0
chunkr_ai/types/generation_config.py +29 -0
chunkr_ai/types/generation_config_param.py +29 -0
chunkr_ai/types/llm_processing.py +36 -0
chunkr_ai/types/llm_processing_param.py +36 -0
chunkr_ai/types/ocr_result.py +28 -0
chunkr_ai/types/page.py +27 -0
chunkr_ai/types/parse_configuration.py +64 -0
chunkr_ai/types/parse_configuration_param.py +65 -0
chunkr_ai/types/parse_output_response.py +29 -0
chunkr_ai/types/segment.py +109 -0
chunkr_ai/types/segment_processing.py +228 -0
chunkr_ai/types/segment_processing_param.py +229 -0
chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
chunkr_ai/types/task_list_params.py +7 -1
chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
chunkr_ai/types/task_response.py +68 -0
chunkr_ai/types/tasks/__init__.py +7 -1
chunkr_ai/types/tasks/extract_create_params.py +47 -0
chunkr_ai/types/tasks/extract_create_response.py +214 -0
chunkr_ai/types/tasks/extract_get_params.py +21 -0
chunkr_ai/types/tasks/extract_get_response.py +214 -0
chunkr_ai/types/tasks/parse_create_params.py +25 -805
chunkr_ai/types/tasks/parse_create_response.py +55 -0
chunkr_ai/types/tasks/parse_get_params.py +21 -0
chunkr_ai/types/tasks/parse_get_response.py +55 -0
chunkr_ai/types/unwrap_webhook_event.py +11 -0
chunkr_ai/types/version_info.py +31 -0
chunkr_ai/types/webhook_url_response.py +9 -0
{chunkr_ai-0.1.0a5.dist-info → chunkr_ai-0.1.0a7.dist-info}/METADATA +14 -13
chunkr_ai-0.1.0a7.dist-info/RECORD +86 -0
chunkr_ai/types/task.py +0 -1225
chunkr_ai/types/tasks/parse_update_params.py +0 -857
chunkr_ai-0.1.0a5.dist-info/RECORD +0 -52
{chunkr_ai-0.1.0a5.dist-info → chunkr_ai-0.1.0a7.dist-info}/WHEEL +0 -0
{chunkr_ai-0.1.0a5.dist-info → chunkr_ai-0.1.0a7.dist-info}/licenses/LICENSE +0 -0

chunkr_ai/types/tasks/extract_create_response.py ADDED Viewed

@@ -0,0 +1,214 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+from ..._models import BaseModel
+from ..file_info import FileInfo
+from ..version_info import VersionInfo
+from ..extract_configuration import ExtractConfiguration
+from ..extract_output_response import ExtractOutputResponse
+__all__ = ["ExtractCreateResponse"]
+class ExtractCreateResponse(BaseModel):
+    configuration: ExtractConfiguration
+    created_at: datetime
+    """The date and time when the task was created and queued."""
+    file_info: FileInfo
+    """Information about the input file."""
+    message: str
+    """A message describing the task's status or any errors that occurred."""
+    status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
+    """The status of the task."""
+    task_id: str
+    """The unique identifier for the task."""
+    task_type: Literal["Parse", "Extract"]
+    version_info: VersionInfo
+    """Version information for the task."""
+    expires_at: Optional[datetime] = None
+    """The date and time when the task will expire."""
+    finished_at: Optional[datetime] = None
+    """The date and time when the task was finished."""
+    input_file_url: Optional[str] = None
+    """The presigned URL of the input file. Deprecated use `file_info.url` instead."""
+    output: Optional[ExtractOutputResponse] = None
+    """The processed results of a document extraction task.
+    Shapes:
+    - `results`: JSON matching the user-provided schema.
+    - `citations`: mirror of `results`; only leaf positions (primitive or
+      array-of-primitives) contain a `Vec<Citation>` supporting that field.
+    - `metrics`: mirror of `results`; only leaf positions contain a `Metrics` object
+      for that field.
+    Detailed shape:
+    - Shared structure: `results`, `citations`, and `metrics` have the same
+      object/array shape as the user schema. Non-leaf nodes (objects, arrays of
+      objects) are mirrored; only leaves carry values.
+    - Leaf definition:
+      - A leaf is either a JSON primitive (string, number, bool, or null) or an
+        array of primitives (including empty).
+      - Arrays of objects are not leaves; recurse into their elements (`items`
+        mirror index-by-index).
+    - Null handling:
+      - If a leaf in `results` is null, the corresponding position in `citations`
+        and `metrics` remains null.
+    - Arrays:
+      - Array of objects: `citations`/`metrics` are arrays whose elements mirror
+        each object and carry values at their own leaves.
+      - Array of primitives: treated as a single leaf. `citations[path]` is a list
+        of `Citation` supporting the array as a whole. `metrics[path]` is a
+        `Metrics` object for the array as a whole.
+    - Citations leaves:
+      - Type: JSON array of `Citation` objects.
+      - Each `Citation` has: `citation_id: string`, `citation_type: Segment|Word`,
+        `bbox: BoundingBox[]`, `content: string`, `segment_id?: string`,
+        `segment_type: SegmentType`, `ss_range?: string[]`.
+        - Segment citation: represents a full parsed segment; `segment_id` set,
+          `bbox` has one entry (segment box), `content` is the segment text. If the
+          segment is from a spreadsheet, `ss_range` contains the table range
+          (single-element array) or the underlying cell refs if available.
+        - Word citation: represents selected OCR words within a segment;
+          `segment_id` is null, `bbox` has one entry per word, `content` is the
+          whitespace-joined text of those words; `segment_type` is `Text`. If OCR
+          words came from spreadsheet cells, `ss_range` lists those cell refs.
+    - Metrics leaves:
+      - Type: `Metrics` object with `confidence: "High" | "Low"`, indicating whether
+        citations sufficiently support the item.
+    Example:
+    results
+    ```json
+    {
+      "invoice_id": "INV-001",
+      "seller": { "name": "Acme" },
+      "line_items": [{ "sku": "A1", "qty": 2 }],
+      "tags": ["urgent", "paid"],
+      "notes": null
+    }
+    ```
+    citations
+    ```json
+    {
+      "invoice_id": [
+        {
+          "citation_id": "abc1234",
+          "citation_type": "Segment",
+          "bbox": [{ "left": 10, "top": 20, "width": 100, "height": 18 }],
+          "content": "Invoice INV-001",
+          "segment_id": "seg_001",
+          "segment_type": "Text",
+          "ss_range": ["A1:C10"]
+        },
+        {
+          "citation_id": "pqr2345",
+          "citation_type": "Word",
+          "bbox": [
+            { "left": 12, "top": 24, "width": 36, "height": 18 },
+            { "left": 52, "top": 24, "width": 48, "height": 18 }
+          ],
+          "content": "INV-001",
+          "segment_id": null,
+          "segment_type": "Text",
+          "ss_range": ["B3", "C3"]
+        }
+      ],
+      "seller": {
+        "name": [
+          {
+            "citation_id": "def5678",
+            "citation_type": "Word",
+            "bbox": [
+              { "left": 45, "top": 80, "width": 30, "height": 12 },
+              { "left": 80, "top": 80, "width": 40, "height": 12 }
+            ],
+            "content": "Acme",
+            "segment_id": null,
+            "segment_type": "Text"
+          }
+        ]
+      },
+      "line_items": [
+        {
+          "sku": [
+            {
+              "citation_id": "ghi9012",
+              "citation_type": "Segment",
+              "bbox": [{ "left": 12, "top": 140, "width": 60, "height": 16 }],
+              "content": "A1",
+              "segment_id": "seg_010",
+              "segment_type": "Text",
+              "ss_range": ["D5:E12"]
+            }
+          ],
+          "qty": [
+            {
+              "citation_id": "jkl3456",
+              "citation_type": "Word",
+              "bbox": [{ "left": 85, "top": 140, "width": 12, "height": 16 }],
+              "content": "2",
+              "segment_id": null,
+              "segment_type": "Text",
+              "ss_range": ["E12"]
+            }
+          ]
+        }
+      ],
+      "tags": [
+        {
+          "citation_id": "mno7890",
+          "citation_type": "Segment",
+          "bbox": [{ "left": 12, "top": 200, "width": 100, "height": 16 }],
+          "content": "urgent paid",
+          "segment_id": "seg_020",
+          "segment_type": "Text",
+          "ss_range": ["A20:C25"]
+        }
+      ],
+      "notes": null
+    }
+    ```
+    metrics
+    ```json
+    {
+      "invoice_id": { "confidence": "High" },
+      "seller": { "name": { "confidence": "Low" } },
+      "line_items": [
+        { "sku": { "confidence": "High" }, "qty": { "confidence": "High" } }
+      ],
+      "tags": { "confidence": "Low" },
+      "notes": null
+    }
+    ```
+    """
+    source_task_id: Optional[str] = None
+    """The ID of the source `parse` task that was used for extraction"""
+    started_at: Optional[datetime] = None
+    """The date and time when the task was started."""
+    task_url: Optional[str] = None
+    """The presigned URL of the task."""

chunkr_ai/types/tasks/extract_get_params.py ADDED Viewed

@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+from typing_extensions import TypedDict
+__all__ = ["ExtractGetParams"]
+class ExtractGetParams(TypedDict, total=False):
+    base64_urls: bool
+    """Whether to return base64 encoded URLs.
+    If false, the URLs will be returned as presigned URLs.
+    """
+    include_chunks: bool
+    """Whether to include chunks in the output response"""
+    wait_for_completion: bool
+    """Whether to wait for the task to complete"""

chunkr_ai/types/tasks/extract_get_response.py ADDED Viewed

@@ -0,0 +1,214 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+from ..._models import BaseModel
+from ..file_info import FileInfo
+from ..version_info import VersionInfo
+from ..extract_configuration import ExtractConfiguration
+from ..extract_output_response import ExtractOutputResponse
+__all__ = ["ExtractGetResponse"]
+class ExtractGetResponse(BaseModel):
+    configuration: ExtractConfiguration
+    created_at: datetime
+    """The date and time when the task was created and queued."""
+    file_info: FileInfo
+    """Information about the input file."""
+    message: str
+    """A message describing the task's status or any errors that occurred."""
+    status: Literal["Starting", "Processing", "Succeeded", "Failed", "Cancelled"]
+    """The status of the task."""
+    task_id: str
+    """The unique identifier for the task."""
+    task_type: Literal["Parse", "Extract"]
+    version_info: VersionInfo
+    """Version information for the task."""
+    expires_at: Optional[datetime] = None
+    """The date and time when the task will expire."""
+    finished_at: Optional[datetime] = None
+    """The date and time when the task was finished."""
+    input_file_url: Optional[str] = None
+    """The presigned URL of the input file. Deprecated use `file_info.url` instead."""
+    output: Optional[ExtractOutputResponse] = None
+    """The processed results of a document extraction task.
+    Shapes:
+    - `results`: JSON matching the user-provided schema.
+    - `citations`: mirror of `results`; only leaf positions (primitive or
+      array-of-primitives) contain a `Vec<Citation>` supporting that field.
+    - `metrics`: mirror of `results`; only leaf positions contain a `Metrics` object
+      for that field.
+    Detailed shape:
+    - Shared structure: `results`, `citations`, and `metrics` have the same
+      object/array shape as the user schema. Non-leaf nodes (objects, arrays of
+      objects) are mirrored; only leaves carry values.
+    - Leaf definition:
+      - A leaf is either a JSON primitive (string, number, bool, or null) or an
+        array of primitives (including empty).
+      - Arrays of objects are not leaves; recurse into their elements (`items`
+        mirror index-by-index).
+    - Null handling:
+      - If a leaf in `results` is null, the corresponding position in `citations`
+        and `metrics` remains null.
+    - Arrays:
+      - Array of objects: `citations`/`metrics` are arrays whose elements mirror
+        each object and carry values at their own leaves.
+      - Array of primitives: treated as a single leaf. `citations[path]` is a list
+        of `Citation` supporting the array as a whole. `metrics[path]` is a
+        `Metrics` object for the array as a whole.
+    - Citations leaves:
+      - Type: JSON array of `Citation` objects.
+      - Each `Citation` has: `citation_id: string`, `citation_type: Segment|Word`,
+        `bbox: BoundingBox[]`, `content: string`, `segment_id?: string`,
+        `segment_type: SegmentType`, `ss_range?: string[]`.
+        - Segment citation: represents a full parsed segment; `segment_id` set,
+          `bbox` has one entry (segment box), `content` is the segment text. If the
+          segment is from a spreadsheet, `ss_range` contains the table range
+          (single-element array) or the underlying cell refs if available.
+        - Word citation: represents selected OCR words within a segment;
+          `segment_id` is null, `bbox` has one entry per word, `content` is the
+          whitespace-joined text of those words; `segment_type` is `Text`. If OCR
+          words came from spreadsheet cells, `ss_range` lists those cell refs.
+    - Metrics leaves:
+      - Type: `Metrics` object with `confidence: "High" | "Low"`, indicating whether
+        citations sufficiently support the item.
+    Example:
+    results
+    ```json
+    {
+      "invoice_id": "INV-001",
+      "seller": { "name": "Acme" },
+      "line_items": [{ "sku": "A1", "qty": 2 }],
+      "tags": ["urgent", "paid"],
+      "notes": null
+    }
+    ```
+    citations
+    ```json
+    {
+      "invoice_id": [
+        {
+          "citation_id": "abc1234",
+          "citation_type": "Segment",
+          "bbox": [{ "left": 10, "top": 20, "width": 100, "height": 18 }],
+          "content": "Invoice INV-001",
+          "segment_id": "seg_001",
+          "segment_type": "Text",
+          "ss_range": ["A1:C10"]
+        },
+        {
+          "citation_id": "pqr2345",
+          "citation_type": "Word",
+          "bbox": [
+            { "left": 12, "top": 24, "width": 36, "height": 18 },
+            { "left": 52, "top": 24, "width": 48, "height": 18 }
+          ],
+          "content": "INV-001",
+          "segment_id": null,
+          "segment_type": "Text",
+          "ss_range": ["B3", "C3"]
+        }
+      ],
+      "seller": {
+        "name": [
+          {
+            "citation_id": "def5678",
+            "citation_type": "Word",
+            "bbox": [
+              { "left": 45, "top": 80, "width": 30, "height": 12 },
+              { "left": 80, "top": 80, "width": 40, "height": 12 }
+            ],
+            "content": "Acme",
+            "segment_id": null,
+            "segment_type": "Text"
+          }
+        ]
+      },
+      "line_items": [
+        {
+          "sku": [
+            {
+              "citation_id": "ghi9012",
+              "citation_type": "Segment",
+              "bbox": [{ "left": 12, "top": 140, "width": 60, "height": 16 }],
+              "content": "A1",
+              "segment_id": "seg_010",
+              "segment_type": "Text",
+              "ss_range": ["D5:E12"]
+            }
+          ],
+          "qty": [
+            {
+              "citation_id": "jkl3456",
+              "citation_type": "Word",
+              "bbox": [{ "left": 85, "top": 140, "width": 12, "height": 16 }],
+              "content": "2",
+              "segment_id": null,
+              "segment_type": "Text",
+              "ss_range": ["E12"]
+            }
+          ]
+        }
+      ],
+      "tags": [
+        {
+          "citation_id": "mno7890",
+          "citation_type": "Segment",
+          "bbox": [{ "left": 12, "top": 200, "width": 100, "height": 16 }],
+          "content": "urgent paid",
+          "segment_id": "seg_020",
+          "segment_type": "Text",
+          "ss_range": ["A20:C25"]
+        }
+      ],
+      "notes": null
+    }
+    ```
+    metrics
+    ```json
+    {
+      "invoice_id": { "confidence": "High" },
+      "seller": { "name": { "confidence": "Low" } },
+      "line_items": [
+        { "sku": { "confidence": "High" }, "qty": { "confidence": "High" } }
+      ],
+      "tags": { "confidence": "Low" },
+      "notes": null
+    }
+    ```
+    """
+    source_task_id: Optional[str] = None
+    """The ID of the source `parse` task that was used for extraction"""
+    started_at: Optional[datetime] = None
+    """The date and time when the task was started."""
+    task_url: Optional[str] = None
+    """The presigned URL of the task."""

chunkr-ai 0.1.0a5__py3-none-any.whl → 0.1.0a7__py3-none-any.whl

chunkr-ai 0.1.0a5py3-none-any.whl → 0.1.0a7py3-none-any.whl