PyPI - chunkr-ai - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

chunkr-ai 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

chunkr_ai/__init__.py +1 -2
chunkr_ai/api/chunkr.py +46 -39
chunkr_ai/api/chunkr_base.py +142 -8
chunkr_ai/api/config.py +18 -45
chunkr_ai/api/decorators.py +58 -0
chunkr_ai/api/misc.py +0 -2
chunkr_ai/api/protocol.py +0 -2
chunkr_ai/api/task_response.py +119 -0
chunkr_ai/models.py +3 -12
{chunkr_ai-0.0.17.dist-info → chunkr_ai-0.0.19.dist-info}/METADATA +89 -40
chunkr_ai-0.0.19.dist-info/RECORD +17 -0
chunkr_ai/api/base.py +0 -183
chunkr_ai/api/chunkr_async.py +0 -120
chunkr_ai/api/schema.py +0 -136
chunkr_ai/api/task.py +0 -66
chunkr_ai/api/task_async.py +0 -69
chunkr_ai/api/task_base.py +0 -85
chunkr_ai-0.0.17.dist-info/RECORD +0 -21
{chunkr_ai-0.0.17.dist-info → chunkr_ai-0.0.19.dist-info}/LICENSE +0 -0
{chunkr_ai-0.0.17.dist-info → chunkr_ai-0.0.19.dist-info}/WHEEL +0 -0
{chunkr_ai-0.0.17.dist-info → chunkr_ai-0.0.19.dist-info}/top_level.txt +0 -0

chunkr_ai/api/schema.py DELETED Viewed

@@ -1,136 +0,0 @@
-from pydantic import BaseModel
-from typing import Optional, List, Union, Type
-import json
-class Property(BaseModel):
-    name: str
-    prop_type: str
-    description: Optional[str] = None
-    default: Optional[str] = None
-class JsonSchema(BaseModel):
-    title: str
-    properties: List[Property]
-def from_pydantic(
-    pydantic: Union[BaseModel, Type[BaseModel]], current_depth: int = 0
-) -> dict:
-    """Convert a Pydantic model to a Chunk json schema."""
-    MAX_DEPTH = 5
-    model = pydantic if isinstance(pydantic, type) else pydantic.__class__
-    schema = model.model_json_schema()
-    properties = []
-    def get_enum_description(details: dict) -> str:
-        """Get description including enum values if they exist"""
-        description = details.get("description", "")
-        # First check if this is a direct enum
-        if "enum" in details:
-            enum_values = details["enum"]
-            enum_str = "\nAllowed values:\n" + "\n".join(
-                f"- {val}" for val in enum_values
-            )
-            return f"{description}{enum_str}"
-        # Then check if it's a reference to an enum
-        if "$ref" in details:
-            ref_schema = resolve_ref(details["$ref"], schema.get("$defs", {}))
-            if "enum" in ref_schema:
-                enum_values = ref_schema["enum"]
-                enum_str = "\nAllowed values:\n" + "\n".join(
-                    f"- {val}" for val in enum_values
-                )
-                return f"{description}{enum_str}"
-        return description
-    def resolve_ref(ref: str, definitions: dict) -> dict:
-        """Resolve a $ref reference to its actual schema"""
-        if not ref.startswith("#/$defs/"):
-            return {}
-        ref_name = ref[len("#/$defs/") :]
-        return definitions.get(ref_name, {})
-    def get_nested_schema(field_schema: dict, depth: int) -> dict:
-        if depth >= MAX_DEPTH:
-            return {}
-        # If there's a $ref, resolve it first
-        if "$ref" in field_schema:
-            field_schema = resolve_ref(field_schema["$ref"], schema.get("$defs", {}))
-        nested_props = {}
-        if field_schema.get("type") == "object":
-            for name, details in field_schema.get("properties", {}).items():
-                if details.get("type") == "object" or "$ref" in details:
-                    ref_schema = details
-                    if "$ref" in details:
-                        ref_schema = resolve_ref(
-                            details["$ref"], schema.get("$defs", {})
-                        )
-                    nested_schema = get_nested_schema(ref_schema, depth + 1)
-                    nested_props[name] = {
-                        "type": "object",
-                        "description": get_enum_description(details),
-                        "properties": nested_schema,
-                    }
-                else:
-                    nested_props[name] = {
-                        "type": details.get("type", "string"),
-                        "description": get_enum_description(details),
-                    }
-        return nested_props
-    for name, details in schema.get("properties", {}).items():
-        # Handle arrays
-        if details.get("type") == "array":
-            items = details.get("items", {})
-            if "$ref" in items:
-                items = resolve_ref(items["$ref"], schema.get("$defs", {}))
-            # Get nested schema for array items
-            item_schema = get_nested_schema(items, current_depth)
-            description = get_enum_description(details)
-            if item_schema:
-                description = f"{description}\nList items schema:\n{json.dumps(item_schema, indent=2)}"
-            prop = Property(name=name, prop_type="list", description=description)
-        # Handle objects and references
-        elif details.get("type") == "object" or "$ref" in details:
-            prop_type = "object"
-            ref_schema = details
-            if "$ref" in details:
-                ref_schema = resolve_ref(details["$ref"], schema.get("$defs", {}))
-            nested_schema = get_nested_schema(ref_schema, current_depth)
-            prop = Property(
-                name=name,
-                prop_type=prop_type,
-                description=get_enum_description(details),
-                properties=nested_schema,
-            )
-        # Handle primitive types
-        else:
-            prop = Property(
-                name=name,
-                prop_type=details.get("type", "string"),
-                description=get_enum_description(details),
-                default=str(details.get("default"))
-                if details.get("default") is not None
-                else None,
-            )
-        properties.append(prop)
-    json_schema = JsonSchema(
-        title=schema.get("title", model.__name__), properties=properties
-    )
-    return json_schema.model_dump(mode="json", exclude_none=True)

chunkr_ai/api/task.py DELETED Viewed

@@ -1,66 +0,0 @@
-from .config import Configuration
-from .misc import prepare_upload_data
-from .task_base import TaskBase
-import time
-class TaskResponse(TaskBase):
-    def _poll_request(self) -> dict:
-        while True:
-            try:
-                if not self.task_url:
-                    raise ValueError("Task URL not found in response")
-                if not self._client._session:
-                    raise ValueError("Client session not found")
-                r = self._client._session.get(
-                    self.task_url, headers=self._client._headers()
-                )
-                r.raise_for_status()
-                return r.json()
-            except (ConnectionError, TimeoutError) as _:
-                print("Connection error while polling the task, retrying...")
-                time.sleep(0.5)
-            except Exception:
-                raise
-    def poll(self) -> "TaskResponse":
-        while True:
-            response = self._poll_request()
-            updated_task = TaskResponse(**response).with_client(self._client)
-            self.__dict__.update(updated_task.__dict__)
-            if result := self._check_status():
-                return result
-            time.sleep(0.5)
-    def update(self, config: Configuration) -> "TaskResponse":
-        if not self.task_url:
-            raise ValueError("Task URL not found")
-        if not self._client._session:
-            raise ValueError("Client session not found")
-        files = prepare_upload_data(None, config)
-        r = self._client._session.patch(
-            self.task_url, files=files, headers=self._client._headers()
-        )
-        r.raise_for_status()
-        updated = TaskResponse(**r.json()).with_client(self._client)
-        self.__dict__.update(updated.__dict__)
-        return self.poll()
-    def cancel(self):
-        if not self.task_url:
-            raise ValueError("Task URL not found")
-        if not self._client._session:
-            raise ValueError("Client session not found")
-        r = self._client._session.get(
-            f"{self.task_url}/cancel", headers=self._client._headers()
-        )
-        r.raise_for_status()
-        self.poll()
-    def delete(self):
-        if not self.task_url:
-            raise ValueError("Task URL not found")
-        if not self._client._session:
-            raise ValueError("Client session not found")
-        r = self._client._session.delete(self.task_url, headers=self._client._headers())
-        r.raise_for_status()

chunkr_ai/api/task_async.py DELETED Viewed

@@ -1,69 +0,0 @@
-from .config import Configuration
-from .misc import prepare_upload_data
-from .task_base import TaskBase
-import asyncio
-class TaskResponseAsync(TaskBase):
-    async def _poll_request(self) -> dict:
-        try:
-            if not self._client._client:
-                raise ValueError("Client not found")
-            r = await self._client._client.get(
-                self.task_url, headers=self._client._headers()
-            )
-            r.raise_for_status()
-            return r.json()
-        except (ConnectionError, TimeoutError) as _:
-            print("Connection error while polling the task, retrying...")
-            await asyncio.sleep(0.5)
-        except Exception:
-            raise
-    async def poll(self) -> "TaskResponseAsync":
-        if not self.task_url:
-            raise ValueError("Task URL not found")
-        if not self._client._client:
-            raise ValueError("Client not found")
-        while True:
-            j = await self._poll_request()
-            updated = TaskResponseAsync(**j).with_client(self._client)
-            self.__dict__.update(updated.__dict__)
-            if res := self._check_status():
-                return res
-            await asyncio.sleep(0.5)
-    async def update(self, config: Configuration) -> "TaskResponseAsync":
-        if not self.task_url:
-            raise ValueError("Task URL not found")
-        if not self._client._client:
-            raise ValueError("Client not found")
-        f = prepare_upload_data(None, config)
-        r = await self._client._client.patch(
-            self.task_url, files=f, headers=self._client._headers()
-        )
-        r.raise_for_status()
-        updated = TaskResponseAsync(**r.json()).with_client(self._client)
-        self.__dict__.update(updated.__dict__)
-        return await self.poll()
-    async def cancel(self):
-        if not self.task_url:
-            raise ValueError("Task URL not found")
-        if not self._client._client:
-            raise ValueError("Client not found")
-        r = await self._client._client.get(
-            f"{self.task_url}/cancel", headers=self._client._headers()
-        )
-        r.raise_for_status()
-        return await self.poll()
-    async def delete(self):
-        if not self.task_url:
-            raise ValueError("Task URL not found")
-        if not self._client._client:
-            raise ValueError("Client not found")
-        r = await self._client._client.delete(
-            self.task_url, headers=self._client._headers()
-        )
-        r.raise_for_status()

chunkr_ai/api/task_base.py DELETED Viewed

@@ -1,85 +0,0 @@
-from .config import Configuration, Status, OutputResponse
-from .protocol import ChunkrClientProtocol
-from abc import ABC, abstractmethod
-from typing import TypeVar, Optional, Generic
-from pydantic import BaseModel, PrivateAttr
-from datetime import datetime
-T = TypeVar("T", bound="TaskBase")
-class TaskBase(BaseModel, ABC, Generic[T]):
-    configuration: Configuration
-    created_at: datetime
-    expires_at: Optional[datetime]
-    file_name: Optional[str]
-    finished_at: Optional[datetime]
-    input_file_url: Optional[str]
-    message: str
-    output: Optional[OutputResponse]
-    page_count: Optional[int]
-    pdf_url: Optional[str]
-    started_at: Optional[datetime]
-    status: Status
-    task_id: str
-    task_url: Optional[str]
-    _client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
-    @abstractmethod
-    def _poll_request(self) -> dict:
-        """Helper method to make polling request with retry logic (synchronous)"""
-        pass
-    @abstractmethod
-    def poll(self) -> T:
-        """Poll the task for completion."""
-        pass
-    @abstractmethod
-    def update(self, config: Configuration) -> T:
-        """Update the task configuration."""
-        pass
-    @abstractmethod
-    def cancel(self) -> T:
-        """Cancel the task."""
-        pass
-    @abstractmethod
-    def delete(self) -> T:
-        """Delete the task."""
-        pass
-    def with_client(self, client: ChunkrClientProtocol) -> T:
-        self._client = client
-        return self
-    def _check_status(self) -> Optional[T]:
-        """Helper method to check task status and handle completion/failure"""
-        if self.status == "Failed":
-            raise ValueError(self.message)
-        if self.status not in ("Starting", "Processing"):
-            return self
-        return None
-    def html(self) -> str:
-        """Get the full HTML of the task"""
-        return self._get_content("html")
-    def markdown(self) -> str:
-        """Get the full markdown of the task"""
-        return self._get_content("markdown")
-    def content(self) -> str:
-        """Get the full content of the task"""
-        return self._get_content("content")
-    def _get_content(self, t: str) -> str:
-        if not self.output:
-            return ""
-        parts = []
-        for c in self.output.chunks:
-            for s in c.segments:
-                v = getattr(s, t)
-                if v:
-                    parts.append(v)
-        return "\n".join(parts)

chunkr_ai-0.0.17.dist-info/RECORD DELETED Viewed

@@ -1,21 +0,0 @@
-chunkr_ai/__init__.py,sha256=q5YosvCNXPNGjV10pZY1gcvdosqUh38nVQTQA9g8EuM,110
-chunkr_ai/models.py,sha256=hahbtxtTyzE_ygFgmlZwbfM6Vj2k5uSDEP02psxDOSQ,924
-chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
-chunkr_ai/api/base.py,sha256=QvHl8FInKHYKPLWDeEPpCchB1uktzOwTW7iPnyXccUc,6449
-chunkr_ai/api/chunkr.py,sha256=0extAWVeZtI7B-g14smTfFZD_csdJNCcVNXx2_L69OQ,2617
-chunkr_ai/api/chunkr_async.py,sha256=aa0s_tnYoujHBsfe8uLiPpVEnb2l9A3CXwPP34w9Mk8,4127
-chunkr_ai/api/chunkr_base.py,sha256=k34Dyt1f21NBWZvZJ3w6Svvpg4SKnzr2ldGQ4ib96Wc,4951
-chunkr_ai/api/config.py,sha256=TWl0Az6acKQCS1LIpKD4qr_lQ_63wqQ5M6calpLOlDM,5040
-chunkr_ai/api/misc.py,sha256=bQpURc7soT5GL2ZpY7EiYyvPYWEzDM9qaX-UHa-oFeI,4909
-chunkr_ai/api/protocol.py,sha256=lxIR_qoCA2a1OXjpq3LrWMdS0jRHct1bEmBlUzV8gvE,526
-chunkr_ai/api/schema.py,sha256=yYesvueGgtmRa7Fi_Tpdv8A2bzHlx-B-5DxRAPlaDHo,4926
-chunkr_ai/api/task.py,sha256=28J4dR8BDjvtkh3CQjW_YUEkgPXhCHBGu0wH6AQKKuE,2474
-chunkr_ai/api/task_async.py,sha256=K5hTEOnmD42snPZg_JtJsVWg6QBUFZ1aBz1Abwv58-A,2529
-chunkr_ai/api/task_base.py,sha256=KLiMhvvbCgcilguQKrtEPMlNs8oaatfQUtn8pYt9t6g,2467
-chunkr_ai-0.0.17.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-chunkr_ai-0.0.17.dist-info/METADATA,sha256=giy1xeKYXk18W5U-baNoAAlvXciJldhA_EBi87NqKpA,4839
-chunkr_ai-0.0.17.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-chunkr_ai-0.0.17.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
-chunkr_ai-0.0.17.dist-info/RECORD,,

{chunkr_ai-0.0.17.dist-info → chunkr_ai-0.0.19.dist-info}/LICENSE RENAMED Viewed

File without changes

{chunkr_ai-0.0.17.dist-info → chunkr_ai-0.0.19.dist-info}/WHEEL RENAMED Viewed

File without changes

{chunkr_ai-0.0.17.dist-info → chunkr_ai-0.0.19.dist-info}/top_level.txt RENAMED Viewed

File without changes

chunkr-ai 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

chunkr-ai 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl