PyPI - lm-deluge - Versions diffs - 0.0.14__tar.gz → 0.0.15__tar.gz - Mend

lm-deluge 0.0.14tar.gz → 0.0.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (82) hide show

{lm_deluge-0.0.14/src/lm_deluge.egg-info → lm_deluge-0.0.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.14
+Version: 0.0.15
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.14 → lm_deluge-0.0.15}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.14"
+version = "0.0.15"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/common.py RENAMED Viewed

@@ -2,6 +2,7 @@ from .openai import OpenAIRequest, OpenAIResponsesRequest
 from .anthropic import AnthropicRequest
 from .mistral import MistralRequest
 from .bedrock import BedrockRequest
+from .gemini import GeminiRequest
 CLASSES = {
     "openai": OpenAIRequest,
@@ -9,4 +10,5 @@ CLASSES = {
     "anthropic": AnthropicRequest,
     "mistral": MistralRequest,
     "bedrock": BedrockRequest,
+    "gemini": GeminiRequest,
 }

lm_deluge-0.0.15/src/lm_deluge/api_requests/gemini.py ADDED Viewed

@@ -0,0 +1,222 @@
+import json
+import os
+import warnings
+from typing import Callable
+from aiohttp import ClientResponse
+from lm_deluge.tool import Tool
+from ..config import SamplingParams
+from ..models import APIModel
+from ..prompt import CachePattern, Conversation, Message, Text, Thinking, ToolCall
+from ..tracker import StatusTracker
+from ..usage import Usage
+from .base import APIRequestBase, APIResponse
+def _build_gemini_request(
+    model: APIModel,
+    prompt: Conversation,
+    tools: list[Tool] | None,
+    sampling_params: SamplingParams,
+) -> dict:
+    system_message, messages = prompt.to_gemini()
+    request_json = {
+        "contents": messages,
+        "generationConfig": {
+            "temperature": sampling_params.temperature,
+            "topP": sampling_params.top_p,
+            "maxOutputTokens": sampling_params.max_new_tokens,
+        },
+    }
+    # Add system instruction if present
+    if system_message:
+        request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
+    # Handle reasoning models (thinking)
+    if model.reasoning_model:
+        request_json["generationConfig"]["thinkingConfig"] = {"includeThoughts": True}
+        if sampling_params.reasoning_effort and "flash" in model.id:
+            budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
+                sampling_params.reasoning_effort
+            )
+            request_json["generationConfig"]["thinkingConfig"]["thinkingBudget"] = (
+                budget
+            )
+    else:
+        if sampling_params.reasoning_effort:
+            warnings.warn(
+                f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
+            )
+    # Add tools if provided
+    if tools:
+        tool_declarations = [tool.dump_for("google") for tool in tools]
+        request_json["tools"] = [{"functionDeclarations": tool_declarations}]
+    # Handle JSON mode
+    if sampling_params.json_mode and model.supports_json:
+        request_json["generationConfig"]["responseMimeType"] = "application/json"
+    return request_json
+class GeminiRequest(APIRequestBase):
+    def __init__(
+        self,
+        task_id: int,
+        model_name: str,  # must correspond to registry
+        prompt: Conversation,
+        attempts_left: int,
+        status_tracker: StatusTracker,
+        results_arr: list,
+        request_timeout: int = 30,
+        sampling_params: SamplingParams = SamplingParams(),
+        callback: Callable | None = None,
+        all_model_names: list[str] | None = None,
+        all_sampling_params: list[SamplingParams] | None = None,
+        tools: list | None = None,
+        cache: CachePattern | None = None,
+    ):
+        super().__init__(
+            task_id=task_id,
+            model_name=model_name,
+            prompt=prompt,
+            attempts_left=attempts_left,
+            status_tracker=status_tracker,
+            results_arr=results_arr,
+            request_timeout=request_timeout,
+            sampling_params=sampling_params,
+            callback=callback,
+            all_model_names=all_model_names,
+            all_sampling_params=all_sampling_params,
+            tools=tools,
+            cache=cache,
+        )
+        # Warn if cache is specified for Gemini model
+        if cache is not None:
+            warnings.warn(
+                f"Cache parameter '{cache}' is not supported for Gemini models, ignoring for {model_name}"
+            )
+        self.model = APIModel.from_registry(model_name)
+        # Gemini API endpoint format: https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent
+        self.url = f"{self.model.api_base}/models/{self.model.name}:generateContent"
+        self.request_header = {
+            "Content-Type": "application/json",
+        }
+        # Add API key as query parameter for Gemini
+        api_key = os.getenv(self.model.api_key_env_var)
+        if not api_key:
+            raise ValueError(
+                f"API key environment variable {self.model.api_key_env_var} not set"
+            )
+        self.url += f"?key={api_key}"
+        self.request_json = _build_gemini_request(
+            self.model, prompt, tools, sampling_params
+        )
+    async def handle_response(self, http_response: ClientResponse) -> APIResponse:
+        is_error = False
+        error_message = None
+        thinking = None
+        content = None
+        usage = None
+        status_code = http_response.status
+        mimetype = http_response.headers.get("Content-Type", None)
+        data = None
+        if status_code >= 200 and status_code < 300:
+            try:
+                data = await http_response.json()
+            except Exception as e:
+                is_error = True
+                error_message = (
+                    f"Error calling .json() on response w/ status {status_code}: {e}"
+                )
+            if not is_error:
+                assert data
+                try:
+                    # Parse Gemini response format
+                    parts = []
+                    if "candidates" in data and data["candidates"]:
+                        candidate = data["candidates"][0]
+                        if "content" in candidate and "parts" in candidate["content"]:
+                            for part in candidate["content"]["parts"]:
+                                if "text" in part:
+                                    parts.append(Text(part["text"]))
+                                elif "thought" in part:
+                                    parts.append(Thinking(part["thought"]))
+                                elif "functionCall" in part:
+                                    func_call = part["functionCall"]
+                                    # Generate a unique ID since Gemini doesn't provide one
+                                    import uuid
+                                    tool_id = f"call_{uuid.uuid4().hex[:8]}"
+                                    parts.append(
+                                        ToolCall(
+                                            id=tool_id,
+                                            name=func_call["name"],
+                                            arguments=func_call.get("args", {}),
+                                        )
+                                    )
+                    content = Message("assistant", parts)
+                    # Extract usage information if present
+                    if "usageMetadata" in data:
+                        usage_data = data["usageMetadata"]
+                        usage = Usage.from_gemini_usage(usage_data)
+                except Exception as e:
+                    is_error = True
+                    error_message = f"Error parsing Gemini response: {str(e)}"
+        elif mimetype and "json" in mimetype.lower():
+            is_error = True
+            try:
+                data = await http_response.json()
+                error_message = json.dumps(data)
+            except Exception:
+                error_message = (
+                    f"HTTP {status_code} with JSON content type but failed to parse"
+                )
+        else:
+            is_error = True
+            text = await http_response.text()
+            error_message = text
+        # Handle special kinds of errors
+        if is_error and error_message is not None:
+            if "rate limit" in error_message.lower() or status_code == 429:
+                error_message += " (Rate limit error, triggering cooldown.)"
+                self.status_tracker.rate_limit_exceeded()
+            if (
+                "context length" in error_message.lower()
+                or "token limit" in error_message.lower()
+            ):
+                error_message += " (Context length exceeded, set retries to 0.)"
+                self.attempts_left = 0
+        return APIResponse(
+            id=self.task_id,
+            status_code=status_code,
+            is_error=is_error,
+            error_message=error_message,
+            prompt=self.prompt,
+            content=content,
+            thinking=thinking,
+            model_internal=self.model_name,
+            sampling_params=self.sampling_params,
+            usage=usage,
+            raw_response=data,
+        )

{lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/file.py RENAMED Viewed

@@ -141,8 +141,13 @@ class File:
         return filename, content, media_type
     def gemini(self) -> dict:
-        """For Gemini API - not yet supported."""
-        raise NotImplementedError("File support for Gemini is not yet implemented")
+        """For Gemini API - files are provided as inline data."""
+        return {
+            "inlineData": {
+                "mimeType": self._mime(),
+                "data": self._base64(include_header=False),
+            }
+        }
     def mistral(self) -> dict:
         """For Mistral API - not yet supported."""

{lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/models.py RENAMED Viewed

@@ -167,6 +167,63 @@ registry = {
         "tokens_per_minute": 100_000,
         "reasoning_model": True,
     },
+    # Native Gemini API versions with file support
+    "gemini-2.0-flash-gemini": {
+        "id": "gemini-2.0-flash-gemini",
+        "name": "gemini-2.0-flash",
+        "api_base": "https://generativelanguage.googleapis.com/v1beta",
+        "api_key_env_var": "GEMINI_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "api_spec": "gemini",
+        "input_cost": 0.1,
+        "output_cost": 0.4,
+        "requests_per_minute": 20,
+        "tokens_per_minute": 100_000,
+        "reasoning_model": False,
+    },
+    "gemini-2.0-flash-lite-gemini": {
+        "id": "gemini-2.0-flash-lite-gemini",
+        "name": "gemini-2.0-flash-lite",
+        "api_base": "https://generativelanguage.googleapis.com/v1beta",
+        "api_key_env_var": "GEMINI_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "api_spec": "gemini",
+        "input_cost": 0.1,
+        "output_cost": 0.4,
+        "requests_per_minute": 20,
+        "tokens_per_minute": 100_000,
+        "reasoning_model": False,
+    },
+    "gemini-2.5-pro-gemini": {
+        "id": "gemini-2.5-pro-gemini",
+        "name": "gemini-2.5-pro-preview-05-06",
+        "api_base": "https://generativelanguage.googleapis.com/v1beta",
+        "api_key_env_var": "GEMINI_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "api_spec": "gemini",
+        "input_cost": 0.1,
+        "output_cost": 0.4,
+        "requests_per_minute": 20,
+        "tokens_per_minute": 100_000,
+        "reasoning_model": True,
+    },
+    "gemini-2.5-flash-gemini": {
+        "id": "gemini-2.5-flash-gemini",
+        "name": "gemini-2.5-flash-preview-05-20",
+        "api_base": "https://generativelanguage.googleapis.com/v1beta",
+        "api_key_env_var": "GEMINI_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "api_spec": "gemini",
+        "input_cost": 0.1,
+        "output_cost": 0.4,
+        "requests_per_minute": 20,
+        "tokens_per_minute": 100_000,
+        "reasoning_model": True,
+    },
     #     ███████                                    █████████   █████
     #   ███░░░░░███                                 ███░░░░░███ ░░███
     #  ███     ░░███ ████████   ██████  ████████   ░███    ░███  ░███

{lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/usage.py RENAMED Viewed

@@ -71,6 +71,16 @@ class Usage:
             cache_write_tokens=None,
         )
+    @classmethod
+    def from_gemini_usage(cls, usage_data: dict) -> "Usage":
+        """Create Usage from Gemini API response usage data."""
+        return cls(
+            input_tokens=usage_data.get("promptTokenCount", 0),
+            output_tokens=usage_data.get("candidatesTokenCount", 0),
+            cache_read_tokens=None,  # Gemini doesn't support caching yet
+            cache_write_tokens=None,
+        )
     def to_dict(self) -> dict:
         """Convert to dictionary for serialization."""
         return {

{lm_deluge-0.0.14 → lm_deluge-0.0.15/src/lm_deluge.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.14
+Version: 0.0.15
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge.egg-info/SOURCES.txt RENAMED Viewed

@@ -28,6 +28,7 @@ src/lm_deluge/api_requests/anthropic.py
 src/lm_deluge/api_requests/base.py
 src/lm_deluge/api_requests/bedrock.py
 src/lm_deluge/api_requests/common.py
+src/lm_deluge/api_requests/gemini.py
 src/lm_deluge/api_requests/mistral.py
 src/lm_deluge/api_requests/openai.py
 src/lm_deluge/api_requests/response.py
@@ -56,6 +57,7 @@ tests/test_computer_use_integration.py
 tests/test_debug_format.py
 tests/test_file_integration.py
 tests/test_file_support.py
+tests/test_gemini_integration.py
 tests/test_image_models.py
 tests/test_image_utils.py
 tests/test_json_utils.py
@@ -69,6 +71,7 @@ tests/test_real_caching_bedrock.py
 tests/test_retry_fix.py
 tests/test_rich_display.py
 tests/test_sampling_params.py
+tests/test_simple_gemini.py
 tests/test_tool_calls.py
 tests/test_tool_from_function.py
 tests/test_tool_validation.py

lm_deluge-0.0.15/tests/test_gemini_integration.py ADDED Viewed

@@ -0,0 +1,238 @@
+import asyncio
+import os
+from pathlib import Path
+from lm_deluge import Conversation, LLMClient, Message
+from lm_deluge.tool import Tool
+def test_gemini_basic_text():
+    """Test basic text generation with native Gemini API."""
+    # Skip if no API key
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Skipping Gemini test - no API key")
+        return
+    client = LLMClient(
+        ["gemini-2.0-flash-gemini"],
+        max_requests_per_minute=10,
+        max_tokens_per_minute=100_000,
+    )
+    conversation = Conversation.user("What is 2+2? Answer briefly.")
+    responses = asyncio.run(client.process_prompts_async([conversation]))
+    assert len(responses) == 1
+    response = responses[0]
+    assert response
+    assert not response.is_error
+    assert response.content is not None
+    assert response.content.completion is not None
+    assert "4" in response.content.completion
+    print(f"✓ Basic text test passed: {response.content.completion}")
+def test_gemini_with_image():
+    """Test Gemini API with image support."""
+    # Skip if no API key
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Skipping Gemini image test - no API key")
+        return
+    # Check if test image exists
+    test_image_path = Path(__file__).parent / "image.jpg"
+    if not test_image_path.exists():
+        print("Skipping image test - test image not found")
+        return
+    client = LLMClient(
+        ["gemini-2.0-flash-gemini"],
+        max_requests_per_minute=10,
+        max_tokens_per_minute=100_000,
+    )
+    conversation = Conversation(
+        [Message.user("What do you see in this image?").add_image(test_image_path)]
+    )
+    responses = asyncio.run(client.process_prompts_async([conversation]))
+    assert len(responses) == 1
+    response = responses[0]
+    assert response
+    assert not response.is_error
+    assert response.content is not None
+    assert response.content.completion is not None
+    print(f"✓ Image test passed: {response.content.completion[:100]}...")
+def test_gemini_with_pdf():
+    """Test Gemini API with PDF file support."""
+    # Skip if no API key
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Skipping Gemini PDF test - no API key")
+        return
+    # Check if test PDF exists
+    test_pdf_path = Path(__file__).parent / "sample.pdf"
+    if not test_pdf_path.exists():
+        print("Skipping PDF test - test PDF not found")
+        return
+    client = LLMClient(
+        ["gemini-2.0-flash-gemini"],
+        max_requests_per_minute=10,
+        max_tokens_per_minute=100_000,
+    )
+    conversation = Conversation(
+        [Message.user("Summarize this PDF document briefly.").add_file(test_pdf_path)]
+    )
+    responses = asyncio.run(client.process_prompts_async([conversation]))
+    assert len(responses) == 1
+    response = responses[0]
+    assert response
+    assert not response.is_error
+    assert response.content is not None
+    assert response.content.completion is not None
+    print(f"✓ PDF test passed: {response.content.completion[:100]}...")
+def test_gemini_with_tools():
+    """Test Gemini API with tool calls."""
+    # Skip if no API key
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Skipping Gemini tools test - no API key")
+        return
+    # Define a simple tool
+    def get_weather(location: str) -> str:
+        """Get the weather for a location"""
+        return f"The weather in {location} is sunny and 72°F"
+    weather_tool = Tool.from_function(get_weather)
+    client = LLMClient(
+        ["gemini-2.0-flash-gemini"],
+        max_requests_per_minute=10,
+        max_tokens_per_minute=100_000,
+    )
+    conversation = Conversation.user("What's the weather like in San Francisco?")
+    responses = asyncio.run(
+        client.process_prompts_async([conversation], tools=[weather_tool])
+    )
+    assert len(responses) == 1
+    response = responses[0]
+    assert response
+    assert not response.is_error
+    assert response.content is not None
+    # Check if tool call was made
+    tool_calls = response.content.tool_calls
+    if len(tool_calls) > 0:
+        tool_call = tool_calls[0]
+        assert tool_call.name == "get_weather"
+        assert "location" in tool_call.arguments
+        print(
+            f"✓ Tool call test passed: {tool_call.name} with args {tool_call.arguments}"
+        )
+    else:
+        print("✓ Tool test passed (no tool call made, but response was valid)")
+def test_gemini_json_mode():
+    """Test Gemini API with JSON mode."""
+    # Skip if no API key
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Skipping Gemini JSON test - no API key")
+        return
+    from lm_deluge.config import SamplingParams
+    client = LLMClient(
+        ["gemini-2.0-flash-gemini"],
+        sampling_params=[SamplingParams(json_mode=True)],
+        max_requests_per_minute=10,
+        max_tokens_per_minute=100_000,
+    )
+    conversation = Conversation.user(
+        'Return a JSON object with keys "name" and "age" for a fictional character.'
+    )
+    responses = asyncio.run(client.process_prompts_async([conversation]))
+    assert len(responses) == 1
+    response = responses[0]
+    assert response
+    assert not response.is_error
+    assert response.content is not None
+    assert response.content.completion is not None
+    # Try to parse as JSON
+    import json
+    try:
+        parsed = json.loads(response.content.completion)
+        assert "name" in parsed or "age" in parsed
+        print(f"✓ JSON mode test passed: {response.content.completion}")
+    except json.JSONDecodeError:
+        print(
+            f"✓ JSON mode test passed (response may not be pure JSON): {response.content.completion}"
+        )
+def test_gemini_reasoning_model():
+    """Test Gemini reasoning model."""
+    # Skip if no API key
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Skipping Gemini reasoning test - no API key")
+        return
+    from lm_deluge.config import SamplingParams
+    client = LLMClient(
+        ["gemini-2.5-pro-gemini"],  # reasoning model
+        sampling_params=[SamplingParams(reasoning_effort="medium")],
+        max_requests_per_minute=10,
+        max_tokens_per_minute=100_000,
+    )
+    conversation = Conversation.user(
+        "What is the 15th Fibonacci number? Show your reasoning."
+    )
+    responses = asyncio.run(client.process_prompts_async([conversation]))
+    assert len(responses) == 1
+    response = responses[0]
+    assert response
+    assert not response.is_error
+    assert response.content is not None
+    assert response.content.completion is not None
+    print(f"✓ Reasoning test passed: {response.content.completion[:100]}...")
+if __name__ == "__main__":
+    print("Testing Gemini API integration...")
+    test_gemini_basic_text()
+    test_gemini_with_image()
+    test_gemini_with_pdf()
+    test_gemini_with_tools()
+    test_gemini_json_mode()
+    test_gemini_reasoning_model()
+    print("✓ All Gemini tests completed!")

lm_deluge-0.0.15/tests/test_simple_gemini.py ADDED Viewed

@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+"""Simple Gemini API test."""
+import asyncio
+import os
+from lm_deluge import LLMClient, Conversation
+async def main():
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Skipping test - no GEMINI_API_KEY set")
+        return
+    print("Testing native Gemini API support...")
+    # Test the new -gemini model
+    client = LLMClient.basic("gemini-2.0-flash-gemini")
+    client.max_attempts = 2
+    client.request_timeout = 30
+    try:
+        res = await client.process_prompts_async(
+            [Conversation.user("What is the capital of France? Answer briefly.")],
+            show_progress=False,
+        )
+        print(f"✓ Gemini native API test passed: {res[0].content.completion}")
+    except Exception as e:
+        print(f"✗ Exception: {e}")
+if __name__ == "__main__":
+    asyncio.run(main())