PyPI - chunkr-ai - Versions diffs - 0.0.45__tar.gz → 0.0.47__tar.gz - Mend

chunkr-ai 0.0.45tar.gz → 0.0.47tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{chunkr_ai-0.0.45/src/chunkr_ai.egg-info → chunkr_ai-0.0.47}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chunkr-ai
-Version: 0.0.45
+Version: 0.0.47
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 License: MIT License

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "chunkr-ai"
-version = "0.0.45"
+version = "0.0.47"
 authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
 description = "Python client for Chunkr: open source document intelligence"
 readme = "README.md"

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/src/chunkr_ai/api/auth.py RENAMED Viewed

@@ -1,5 +1,6 @@
 class HeadersMixin:
     """Mixin class for handling authorization headers"""
+    _api_key: str = ""
     def get_api_key(self) -> str:
         """Get the API key"""

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/src/chunkr_ai/api/chunkr.py RENAMED Viewed

@@ -1,12 +1,13 @@
 from pathlib import Path
 from PIL import Image
-from typing import Union, BinaryIO, Optional
+from typing import Union, BinaryIO, Optional, cast, Awaitable
 from .configuration import Configuration
 from .decorators import anywhere, ensure_client, retry_on_429
 from .misc import prepare_upload_data
 from .task_response import TaskResponse
 from .chunkr_base import ChunkrBase
+from .protocol import ChunkrClientProtocol
 class Chunkr(ChunkrBase):
     """Chunkr API client that works in both sync and async contexts"""
@@ -16,17 +17,17 @@ class Chunkr(ChunkrBase):
     async def upload(
         self,
         file: Union[str, Path, BinaryIO, Image.Image],
-        config: Configuration = None,
+        config: Optional[Configuration] = None,
         filename: Optional[str] = None,
     ) -> TaskResponse:
-        task = await self.create_task(file, config, filename)
-        return await task.poll()
+        task = await cast(Awaitable[TaskResponse], self.create_task(file, config, filename))
+        return await cast(Awaitable[TaskResponse], task.poll())
     @anywhere()
     @ensure_client()
     async def update(self, task_id: str, config: Configuration) -> TaskResponse:
-        task = await self.update_task(task_id, config)
-        return await task.poll()
+        task = await cast(Awaitable[TaskResponse], self.update_task(task_id, config))
+        return await cast(Awaitable[TaskResponse], task.poll())
     @anywhere()
     @ensure_client()
@@ -34,30 +35,32 @@ class Chunkr(ChunkrBase):
     async def create_task(
         self,
         file: Union[str, Path, BinaryIO, Image.Image],
-        config: Configuration = None,
+        config: Optional[Configuration] = None,
         filename: Optional[str] = None,
     ) -> TaskResponse:
         """Create a new task with the given file and configuration."""
         data = await prepare_upload_data(file, filename, config)
+        assert self._client is not None
         r = await self._client.post(
             f"{self.url}/api/v1/task/parse", json=data, headers=self._headers()
         )
         r.raise_for_status()
-        return TaskResponse(**r.json()).with_client(self, True, False)
+        return TaskResponse(**r.json()).with_client(cast(ChunkrClientProtocol, self), True, False)
     @anywhere()
     @ensure_client()
     @retry_on_429()
-    async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
+    async def update_task(self, task_id: str, config: Optional[Configuration] = None) -> TaskResponse:
         """Update an existing task with new configuration."""
         data = await prepare_upload_data(None, None, config)
+        assert self._client is not None
         r = await self._client.patch(
             f"{self.url}/api/v1/task/{task_id}/parse",
             json=data,
             headers=self._headers(),
         )
         r.raise_for_status()
-        return TaskResponse(**r.json()).with_client(self, True, False)
+        return TaskResponse(**r.json()).with_client(cast(ChunkrClientProtocol, self), True, False)
     @anywhere()
     @ensure_client()
@@ -66,17 +69,19 @@ class Chunkr(ChunkrBase):
             "base64_urls": str(base64_urls).lower(),
             "include_chunks": str(include_chunks).lower()
         }
+        assert self._client is not None
         r = await self._client.get(
             f"{self.url}/api/v1/task/{task_id}",
             params=params,
             headers=self._headers()
         )
         r.raise_for_status()
-        return TaskResponse(**r.json()).with_client(self, include_chunks, base64_urls)
+        return TaskResponse(**r.json()).with_client(cast(ChunkrClientProtocol, self), include_chunks, base64_urls)
     @anywhere()
     @ensure_client()
     async def delete_task(self, task_id: str) -> None:
+        assert self._client is not None
         r = await self._client.delete(
             f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
         )
@@ -85,6 +90,7 @@ class Chunkr(ChunkrBase):
     @anywhere()
     @ensure_client()
     async def cancel_task(self, task_id: str) -> None:
+        assert self._client is not None
         r = await self._client.get(
             f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
         )

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/src/chunkr_ai/api/chunkr_base.py RENAMED Viewed

@@ -18,17 +18,23 @@ class ChunkrBase(HeadersMixin):
         raise_on_failure: Whether to raise an exception if the task fails. Defaults to False.
     """
-    def __init__(self, url: str = None, api_key: str = None, raise_on_failure: bool = False):
+    url: str
+    _api_key: str
+    raise_on_failure: bool
+    _client: Optional[httpx.AsyncClient]
+    def __init__(self, url: Optional[str] = None, api_key: Optional[str] = None, raise_on_failure: bool = False):
         load_dotenv(override=True)
         self.url = url or os.getenv("CHUNKR_URL") or "https://api.chunkr.ai"
-        self._api_key = api_key or os.getenv("CHUNKR_API_KEY")
+        _api_key = api_key or os.getenv("CHUNKR_API_KEY")
         self.raise_on_failure = raise_on_failure
-        if not self._api_key:
+        if not _api_key:
             raise ValueError(
                 "API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai"
             )
+        self._api_key = _api_key
         self.url = self.url.rstrip("/")
         self._client = httpx.AsyncClient()
@@ -36,7 +42,7 @@ class ChunkrBase(HeadersMixin):
     def upload(
         self,
         file: Union[str, Path, BinaryIO, Image.Image],
-        config: Configuration = None,
+        config: Optional[Configuration] = None,
         filename: Optional[str] = None,
     ) -> TaskResponse:
         """Upload a file and wait for processing to complete.
@@ -90,7 +96,7 @@ class ChunkrBase(HeadersMixin):
     def create_task(
         self,
         file: Union[str, Path, BinaryIO, Image.Image],
-        config: Configuration = None,
+        config: Optional[Configuration] = None,
         filename: Optional[str] = None,
     ) -> TaskResponse:
         """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
@@ -127,7 +133,7 @@ class ChunkrBase(HeadersMixin):
     @abstractmethod
     def update_task(
-        self, task_id: str, config: Configuration
+        self, task_id: str, config: Optional[Configuration] = None
     ) -> TaskResponse:
         """Update a task by its ID and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/src/chunkr_ai/api/configuration.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from pydantic import BaseModel, Field, ConfigDict
 from enum import Enum
 from typing import Any, List, Optional, Union
-from pydantic import field_validator
+from pydantic import field_validator, field_serializer
 class GenerationStrategy(str, Enum):
     LLM = "LLM"
@@ -65,11 +65,7 @@ class TokenizerType(BaseModel):
             return f"string:{self.string_value}"
         return ""
-    model_config = ConfigDict(
-        json_encoders={
-            'TokenizerType': lambda v: v.model_dump()
-        }
-    )
+    model_config = ConfigDict()
     def model_dump(self, **kwargs):
         if self.enum_value is not None:
@@ -85,10 +81,13 @@ class ChunkProcessing(BaseModel):
     model_config = ConfigDict(
         arbitrary_types_allowed=True,
-        json_encoders={
-            TokenizerType: lambda v: v.model_dump()
-        }
     )
+    @field_serializer('tokenizer')
+    def serialize_tokenizer(self, tokenizer: Optional[TokenizerType], _info):
+        if tokenizer is None:
+            return None
+        return tokenizer.model_dump()
     @field_validator('tokenizer', mode='before')
     def validate_tokenizer(cls, v):
@@ -130,6 +129,95 @@ class ErrorHandlingStrategy(str, Enum):
     FAIL = "Fail"
     CONTINUE = "Continue"
+class FallbackStrategy(BaseModel):
+    type: str
+    model_id: Optional[str] = None
+    @classmethod
+    def none(cls) -> "FallbackStrategy":
+        return cls(type="None")
+    @classmethod
+    def default(cls) -> "FallbackStrategy":
+        return cls(type="Default")
+    @classmethod
+    def model(cls, model_id: str) -> "FallbackStrategy":
+        return cls(type="Model", model_id=model_id)
+    def __str__(self) -> str:
+        if self.type == "Model":
+            return f"Model({self.model_id})"
+        return self.type
+    def model_dump(self, **kwargs):
+        if self.type == "Model":
+            return {"Model": self.model_id}
+        return self.type
+    @field_validator('type')
+    def validate_type(cls, v):
+        if v not in ["None", "Default", "Model"]:
+            raise ValueError(f"Invalid fallback strategy: {v}")
+        return v
+    model_config = ConfigDict()
+    @classmethod
+    def model_validate(cls, obj):
+        # Handle string values like "None" or "Default"
+        if isinstance(obj, str):
+            if obj in ["None", "Default"]:
+                return cls(type=obj)
+            # Try to parse as Enum value if it's not a direct match
+            try:
+                return cls(type=obj)
+            except ValueError:
+                pass  # Let it fall through to normal validation
+        # Handle dictionary format like {"Model": "model-id"}
+        elif isinstance(obj, dict) and len(obj) == 1:
+            if "Model" in obj:
+                return cls(type="Model", model_id=obj["Model"])
+        # Fall back to normal validation
+        return super().model_validate(obj)
+class LlmProcessing(BaseModel):
+    model_id: Optional[str] = None
+    fallback_strategy: FallbackStrategy = Field(default_factory=FallbackStrategy.default)
+    max_completion_tokens: Optional[int] = None
+    temperature: float = 0.0
+    model_config = ConfigDict()
+    @field_serializer('fallback_strategy')
+    def serialize_fallback_strategy(self, fallback_strategy: FallbackStrategy, _info):
+        return fallback_strategy.model_dump()
+    @field_validator('fallback_strategy', mode='before')
+    def validate_fallback_strategy(cls, v):
+        if isinstance(v, str):
+            if v == "None":
+                return FallbackStrategy.none()
+            elif v == "Default":
+                return FallbackStrategy.default()
+            # Try to parse as a model ID if it's not None or Default
+            try:
+                return FallbackStrategy.model(v)
+            except ValueError:
+                pass  # Let it fall through to normal validation
+        # Handle dictionary format like {"Model": "model-id"}
+        elif isinstance(v, dict) and len(v) == 1:
+            if "Model" in v:
+                return FallbackStrategy.model(v["Model"])
+            elif "None" in v or v.get("None") is None:
+                return FallbackStrategy.none()
+            elif "Default" in v or v.get("Default") is None:
+                return FallbackStrategy.default()
+        return v
 class BoundingBox(BaseModel):
     left: float
     top: float
@@ -199,6 +287,7 @@ class Configuration(BaseModel):
     segment_processing: Optional[SegmentProcessing] = None
     segmentation_strategy: Optional[SegmentationStrategy] = None
     pipeline: Optional[Pipeline] = None
+    llm_processing: Optional[LlmProcessing] = None
 class OutputConfiguration(Configuration):
     input_file_url: Optional[str] = None

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/src/chunkr_ai/api/decorators.py RENAMED Viewed

@@ -13,10 +13,7 @@ P = ParamSpec('P')
 _sync_loop = None
-@overload
-def anywhere() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Union[Awaitable[T], T]]]: ...
-def anywhere():
+def anywhere() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Union[Awaitable[T], T]]]:
     """Decorator that allows an async function to run anywhere - sync or async context."""
     def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Union[Awaitable[T], T]]:
         @functools.wraps(async_func)
@@ -42,22 +39,22 @@ def anywhere():
         return wrapper
     return decorator
-def ensure_client() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
+def ensure_client() -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]:
     """Decorator that ensures a valid httpx.AsyncClient exists before executing the method"""
-    def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
+    def decorator(async_func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
         @functools.wraps(async_func)
-        async def wrapper(self: Any, *args: P.args, **kwargs: P.kwargs) -> T:
+        async def wrapper(self: Any, *args: Any, **kwargs: Any) -> T:
             if not self._client or self._client.is_closed:
                 self._client = httpx.AsyncClient()
             return await async_func(self, *args, **kwargs)
         return wrapper
     return decorator
-def require_task() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
+def require_task() -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]:
     """Decorator that ensures task has required attributes and valid client before execution"""
-    def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
+    def decorator(async_func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
         @functools.wraps(async_func)
-        async def wrapper(self: Any, *args: P.args, **kwargs: P.kwargs) -> T:
+        async def wrapper(self: Any, *args: Any, **kwargs: Any) -> T:
             if not self.task_url:
                 raise ValueError("Task URL not found")
             if not self._client:

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/src/chunkr_ai/api/misc.py RENAMED Viewed

@@ -30,14 +30,18 @@ async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[O
     if isinstance(file, str):
         if file.startswith(('http://', 'https://')):
             return None, file
-        try:
-            base64.b64decode(file)
-            return None, file
-        except:
+        # Try to handle as a file path first
+        path = Path(file)
+        if path.exists():
+            # It's a valid file path, convert to Path object and continue processing
+            file = path
+        else:
+            # If not a valid file path, try treating as base64
             try:
-                file = Path(file)
+                base64.b64decode(file)
+                return None, file
             except:
-                raise ValueError("File must be a valid path, URL, or base64 string")
+                raise ValueError(f"File not found: {file} and it's not a valid base64 string")
     # Handle file paths - convert to base64
     if isinstance(file, Path):

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/src/chunkr_ai/api/task_response.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import TypeVar, Optional, Generic
+from typing import Optional, cast, Awaitable, Union
 from pydantic import BaseModel, PrivateAttr
 import asyncio
 import json
@@ -11,9 +11,7 @@ from .protocol import ChunkrClientProtocol
 from .misc import prepare_upload_data
 from .decorators import anywhere, require_task, retry_on_429
-T = TypeVar("T", bound="TaskResponse")
-class TaskResponse(BaseModel, Generic[T]):
+class TaskResponse(BaseModel):
     configuration: OutputConfiguration
     created_at: datetime
     expires_at: Optional[datetime] = None
@@ -28,13 +26,13 @@ class TaskResponse(BaseModel, Generic[T]):
     _base64_urls: bool = False
     _client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
-    def with_client(self, client: ChunkrClientProtocol, include_chunks: bool = False, base64_urls: bool = False) -> T:
+    def with_client(self, client: ChunkrClientProtocol, include_chunks: bool = False, base64_urls: bool = False) -> "TaskResponse":
         self._client = client
         self.include_chunks = include_chunks
         self._base64_urls = base64_urls
         return self
-    def _check_status(self) -> Optional[T]:
+    def _check_status(self) -> Optional["TaskResponse"]:
         """Helper method to check task status and handle completion/failure"""
         if self.status == "Failed":
             if getattr(self._client, 'raise_on_failure', True):
@@ -47,6 +45,11 @@ class TaskResponse(BaseModel, Generic[T]):
     @require_task()
     async def _poll_request(self) -> dict:
         try:
+            if not self._client:
+                raise ValueError("Chunkr client protocol is not initialized")
+            if not self._client._client or self._client._client.is_closed:
+                raise ValueError("httpx client is not open")
+            assert self.task_url is not None
             r = await self._client._client.get(
                 self.task_url, headers=self._client._headers()
             )
@@ -64,10 +67,12 @@ class TaskResponse(BaseModel, Generic[T]):
             raise e
     @anywhere()
-    async def poll(self) -> T:
+    async def poll(self) -> "TaskResponse":
         """Poll the task for completion."""
         while True:
             j = await self._poll_request()
+            if not self._client:
+                raise ValueError("Chunkr client protocol is not initialized")
             updated = TaskResponse(**j).with_client(self._client)
             self.__dict__.update(updated.__dict__)
             if res := self._check_status():
@@ -77,9 +82,14 @@ class TaskResponse(BaseModel, Generic[T]):
     @anywhere()
     @require_task()
     @retry_on_429()
-    async def update(self, config: Configuration) -> T:
+    async def update(self, config: Configuration) -> "TaskResponse":
         """Update the task configuration."""
         data = await prepare_upload_data(None, None, config)
+        if not self._client:
+            raise ValueError("Chunkr client protocol is not initialized")
+        if not self._client._client or self._client._client.is_closed:
+            raise ValueError("httpx client is not open")
+        assert self.task_url is not None
         r = await self._client._client.patch(
             f"{self.task_url}/parse",
             json=data,
@@ -88,12 +98,17 @@ class TaskResponse(BaseModel, Generic[T]):
         r.raise_for_status()
         updated = TaskResponse(**r.json()).with_client(self._client)
         self.__dict__.update(updated.__dict__)
-        return await self.poll()
+        return cast(TaskResponse, self.poll())
     @anywhere()
     @require_task()
-    async def delete(self) -> T:
+    async def delete(self) -> "TaskResponse":
         """Delete the task."""
+        if not self._client:
+            raise ValueError("Chunkr client protocol is not initialized")
+        if not self._client._client or self._client._client.is_closed:
+            raise ValueError("httpx client is not open")
+        assert self.task_url is not None
         r = await self._client._client.delete(
             self.task_url, headers=self._client._headers()
         )
@@ -102,15 +117,20 @@ class TaskResponse(BaseModel, Generic[T]):
     @anywhere()
     @require_task()
-    async def cancel(self) -> T:
+    async def cancel(self) -> "TaskResponse":
         """Cancel the task."""
+        if not self._client:
+            raise ValueError("Chunkr client protocol is not initialized")
+        if not self._client._client or self._client._client.is_closed:
+            raise ValueError("httpx client is not open")
+        assert self.task_url is not None
         r = await self._client._client.get(
             f"{self.task_url}/cancel", headers=self._client._headers()
         )
         r.raise_for_status()
-        return await self.poll()
+        return cast(TaskResponse, self.poll())
-    def _write_to_file(self, content: str | dict, output_file: str, is_json: bool = False) -> None:
+    def _write_to_file(self, content: Union[str, dict], output_file: Optional[str], is_json: bool = False) -> None:
         """Helper method to write content to a file
         Args:
@@ -131,9 +151,12 @@ class TaskResponse(BaseModel, Generic[T]):
                 if is_json:
                     json.dump(content, f, cls=DateTimeEncoder, indent=2)
                 else:
-                    f.write(content)
+                    if isinstance(content, str):
+                        f.write(content)
+                    else:
+                        raise ValueError("Content is not a string")
-    def html(self, output_file: str = None) -> str:
+    def html(self, output_file: Optional[str] = None) -> str:
         """Get the full HTML of the task
         Args:
@@ -143,7 +166,7 @@ class TaskResponse(BaseModel, Generic[T]):
         self._write_to_file(content, output_file)
         return content
-    def markdown(self, output_file: str = None) -> str:
+    def markdown(self, output_file: Optional[str] = None) -> str:
         """Get the full markdown of the task
         Args:
@@ -153,7 +176,7 @@ class TaskResponse(BaseModel, Generic[T]):
         self._write_to_file(content, output_file)
         return content
-    def content(self, output_file: str = None) -> str:
+    def content(self, output_file: Optional[str] = None) -> str:
         """Get the full content of the task
         Args:
@@ -163,7 +186,7 @@ class TaskResponse(BaseModel, Generic[T]):
         self._write_to_file(content, output_file)
         return content
-    def json(self, output_file: str = None) -> dict:
+    def json(self, output_file: Optional[str] = None) -> dict:
         """Get the full task data as JSON
         Args:

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/src/chunkr_ai/models.py RENAMED Viewed

@@ -6,8 +6,10 @@ from .api.configuration import (
     CroppingStrategy,
     EmbedSource,
     ErrorHandlingStrategy,
+    FallbackStrategy,
     GenerationStrategy,
     GenerationConfig,
+    LlmProcessing,
     Model,
     OCRResult,
     OcrStrategy,
@@ -31,8 +33,10 @@ __all__ = [
     "CroppingStrategy",
     "EmbedSource",
     "ErrorHandlingStrategy",
+    "FallbackStrategy",
     "GenerationConfig",
     "GenerationStrategy",
+    "LlmProcessing",
     "Model",
     "OCRResult",
     "OcrStrategy",

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47/src/chunkr_ai.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chunkr-ai
-Version: 0.0.45
+Version: 0.0.47
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 License: MIT License

{chunkr_ai-0.0.45 → chunkr_ai-0.0.47}/tests/test_chunkr.py RENAMED Viewed

@@ -18,12 +18,22 @@ from chunkr_ai.models import (
     EmbedSource,
     ErrorHandlingStrategy,
     Tokenizer,
+    LlmProcessing,
+    FallbackStrategy,
 )
 @pytest.fixture
 def sample_path():
     return Path("tests/files/test.pdf")
+@pytest.fixture
+def sample_absolute_path_str():
+    return "tests/files/test.pdf"
+@pytest.fixture
+def sample_relative_path_str():
+    return "./tests/files/test.pdf"
 @pytest.fixture
 def sample_image():
     return Image.open("tests/files/test.jpg")
@@ -41,7 +51,7 @@ def client():
 def markdown_embed_config():
     return Configuration(
         segment_processing=SegmentProcessing(
-            page=GenerationConfig(
+            Page=GenerationConfig(
                 html=GenerationStrategy.LLM,
                 markdown=GenerationStrategy.LLM,
                 embed_sources=[EmbedSource.MARKDOWN]
@@ -53,7 +63,7 @@ def markdown_embed_config():
 def html_embed_config():
     return Configuration(
         segment_processing=SegmentProcessing(
-            page=GenerationConfig(
+            Page=GenerationConfig(
                 html=GenerationStrategy.LLM,
                 markdown=GenerationStrategy.LLM,
                 embed_sources=[EmbedSource.HTML]
@@ -65,7 +75,7 @@ def html_embed_config():
 def multiple_embed_config():
     return Configuration(
         segment_processing=SegmentProcessing(
-            page=GenerationConfig(
+            Page=GenerationConfig(
                 html=GenerationStrategy.LLM,
                 markdown=GenerationStrategy.LLM,
                 llm="Generate a summary of this content",
@@ -113,7 +123,7 @@ def xlm_roberta_with_html_content_config():
             tokenizer=Tokenizer.XLM_ROBERTA_BASE
         ),
         segment_processing=SegmentProcessing(
-            page=GenerationConfig(
+            Page=GenerationConfig(
                 html=GenerationStrategy.LLM,
                 markdown=GenerationStrategy.LLM,
                 embed_sources=[EmbedSource.HTML, EmbedSource.CONTENT]
@@ -121,6 +131,39 @@ def xlm_roberta_with_html_content_config():
         ),
     )
+@pytest.fixture
+def none_fallback_config():
+    return Configuration(
+        llm_processing=LlmProcessing(
+            model_id="gemini-pro-2.5",
+            fallback_strategy=FallbackStrategy.none(),
+            max_completion_tokens=500,
+            temperature=0.2
+        ),
+    )
+@pytest.fixture
+def default_fallback_config():
+    return Configuration(
+        llm_processing=LlmProcessing(
+            model_id="gemini-pro-2.5",
+            fallback_strategy=FallbackStrategy.default(),
+            max_completion_tokens=1000,
+            temperature=0.5
+        ),
+    )
+@pytest.fixture
+def model_fallback_config():
+    return Configuration(
+        llm_processing=LlmProcessing(
+            model_id="gemini-pro-2.5",
+            fallback_strategy=FallbackStrategy.model("claude-3.7-sonnet"),
+            max_completion_tokens=2000,
+            temperature=0.7
+        ),
+    )
 @pytest.mark.asyncio
 async def test_send_file_path(client, sample_path):
     response = await client.upload(sample_path)
@@ -128,6 +171,20 @@ async def test_send_file_path(client, sample_path):
     assert response.status == "Succeeded"
     assert response.output is not None
+@pytest.mark.asyncio
+async def test_send_file_path_str(client, sample_absolute_path_str):
+    response = await client.upload(sample_absolute_path_str)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_send_file_relative_path_str(client, sample_relative_path_str):
+    response = await client.upload(sample_relative_path_str)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
 @pytest.mark.asyncio
 async def test_send_file_url(client, sample_url):
     response = await client.upload(sample_url)
@@ -136,7 +193,7 @@ async def test_send_file_url(client, sample_url):
     assert response.output is not None
 @pytest.mark.asyncio
-async def test_send_file_path_str(client, sample_path):
+async def test_send_file_path_as_str(client, sample_path):
     response = await client.upload(str(sample_path))
     assert response.task_id is not None
     assert response.status == "Succeeded"
@@ -205,7 +262,7 @@ async def test_page_llm_html(client, sample_path):
         Configuration(
             segmentation_strategy=SegmentationStrategy.PAGE,
             segment_processing=SegmentProcessing(
-                page=GenerationConfig(html=GenerationStrategy.LLM)
+                Page=GenerationConfig(html=GenerationStrategy.LLM)
             ),
         ),
     )
@@ -218,7 +275,7 @@ async def test_page_llm(client, sample_path):
     configuration = Configuration(
         segmentation_strategy=SegmentationStrategy.PAGE,
         segment_processing=SegmentProcessing(
-            page=GenerationConfig(
+            Page=GenerationConfig(
                 html=GenerationStrategy.LLM, markdown=GenerationStrategy.LLM
             )
         ),
@@ -297,7 +354,7 @@ async def test_pipeline_type_azure(client, sample_path):
     assert response.output is not None
 @pytest.mark.asyncio
-async def test_pipeline_type_azure(client, sample_path):
+async def test_pipeline_type_chunkr(client, sample_path):
     response = await client.upload(sample_path, Configuration(pipeline=Pipeline.CHUNKR))
     assert response.task_id is not None
     assert response.status == "Succeeded"
@@ -451,3 +508,101 @@ async def test_error_handling_continue(client, sample_path):
     assert response.task_id is not None
     assert response.status == "Succeeded"
     assert response.output is not None
+@pytest.mark.asyncio
+async def test_llm_processing_none_fallback(client, sample_path, none_fallback_config):
+    response = await client.upload(sample_path, none_fallback_config)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+    assert response.configuration.llm_processing is not None
+    assert response.configuration.llm_processing.model_id == "gemini-pro-2.5"
+    assert str(response.configuration.llm_processing.fallback_strategy) == "None"
+    assert response.configuration.llm_processing.max_completion_tokens == 500
+    assert response.configuration.llm_processing.temperature == 0.2
+@pytest.mark.asyncio
+async def test_llm_processing_default_fallback(client, sample_path, default_fallback_config):
+    response = await client.upload(sample_path, default_fallback_config)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+    assert response.configuration.llm_processing is not None
+    assert response.configuration.llm_processing.model_id == "gemini-pro-2.5"
+    # The service may resolve Default to an actual model
+    assert response.configuration.llm_processing.fallback_strategy is not None
+    assert response.configuration.llm_processing.max_completion_tokens == 1000
+    assert response.configuration.llm_processing.temperature == 0.5
+@pytest.mark.asyncio
+async def test_llm_processing_model_fallback(client, sample_path, model_fallback_config):
+    response = await client.upload(sample_path, model_fallback_config)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+    assert response.configuration.llm_processing is not None
+    assert response.configuration.llm_processing.model_id == "gemini-pro-2.5"
+    assert str(response.configuration.llm_processing.fallback_strategy) == "Model(claude-3.7-sonnet)"
+    assert response.configuration.llm_processing.max_completion_tokens == 2000
+    assert response.configuration.llm_processing.temperature == 0.7
+@pytest.mark.asyncio
+async def test_llm_custom_model(client, sample_path):
+    config = Configuration(
+        llm_processing=LlmProcessing(
+            model_id="claude-3.7-sonnet",  # Using a model from models.yaml
+            fallback_strategy=FallbackStrategy.none(),
+            max_completion_tokens=1500,
+            temperature=0.3
+        ),
+    )
+    response = await client.upload(sample_path, config)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+    assert response.configuration.llm_processing is not None
+    assert response.configuration.llm_processing.model_id == "claude-3.7-sonnet"
+@pytest.mark.asyncio
+async def test_fallback_strategy_serialization():
+    # Test that FallbackStrategy objects serialize correctly
+    none_strategy = FallbackStrategy.none()
+    default_strategy = FallbackStrategy.default()
+    model_strategy = FallbackStrategy.model("gpt-4.1")
+    assert none_strategy.model_dump() == "None"
+    assert default_strategy.model_dump() == "Default"
+    assert model_strategy.model_dump() == {"Model": "gpt-4.1"}
+    # Test string representation
+    assert str(none_strategy) == "None"
+    assert str(default_strategy) == "Default"
+    assert str(model_strategy) == "Model(gpt-4.1)"
+@pytest.mark.asyncio
+async def test_combined_config_with_llm_and_other_settings(client, sample_path):
+    # Test combining LLM settings with other configuration options
+    config = Configuration(
+        llm_processing=LlmProcessing(
+            model_id="qwen-2.5-vl-7b-instruct",
+            fallback_strategy=FallbackStrategy.model("gemini-flash-2.0"),
+            temperature=0.4
+        ),
+        segmentation_strategy=SegmentationStrategy.PAGE,
+        segment_processing=SegmentProcessing(
+            Page=GenerationConfig(
+                html=GenerationStrategy.LLM,
+                markdown=GenerationStrategy.LLM
+            )
+        ),
+        chunk_processing=ChunkProcessing(target_length=1024)
+    )
+    response = await client.upload(sample_path, config)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+    assert response.configuration.llm_processing is not None
+    assert response.configuration.llm_processing.model_id == "qwen-2.5-vl-7b-instruct"
+    assert response.configuration.segmentation_strategy == SegmentationStrategy.PAGE
+    assert response.configuration.chunk_processing.target_length == 1024