PyPI - lm-deluge - Versions diffs - 0.0.35__tar.gz → 0.0.36__tar.gz - Mend

lm-deluge 0.0.35tar.gz → 0.0.36tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (79) hide show

{lm_deluge-0.0.35/src/lm_deluge.egg-info → lm_deluge-0.0.36}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.35
+Version: 0.0.36
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.35 → lm_deluge-0.0.36}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.35"
+version = "0.0.36"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"
@@ -28,5 +28,9 @@ dependencies = [
     "pdf2image",
     "pillow",
     "fastmcp>=2.4",
-    "rich"
+    "rich",
+    # "textual>=0.58.0"
 ]
+# [project.scripts]
+# deluge = "lm_deluge.cli:main"

{lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

@@ -57,9 +57,9 @@ def _build_anthropic_request(
     # handle thinking
     if model.reasoning_model and sampling_params.reasoning_effort:
         # translate reasoning effort of low, medium, high to budget tokens
-        budget = {
-            "minimal": 256, "low": 1024, "medium": 4096, "high": 16384
-        }.get(sampling_params.reasoning_effort)
+        budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
+            sampling_params.reasoning_effort
+        )
         request_json["thinking"] = {
             "type": "enabled",
             "budget_tokens": budget,

{lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

@@ -46,7 +46,9 @@ async def _build_gemini_request(
         else:
             thinking_config = {"includeThoughts": True}
             if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
-                budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[effort]
+                budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
+                    effort
+                ]
                 thinking_config["thinkingBudget"] = budget
         request_json["generationConfig"]["thinkingConfig"] = thinking_config

{lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/openai.py RENAMED Viewed

@@ -47,7 +47,9 @@ async def _build_oa_chat_request(
             else:
                 effort = "low"
         if effort == "minimal" and "gpt-5" not in model.id:
-            print("WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'.")
+            print(
+                "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
+            )
             effort = "low"
         request_json["reasoning_effort"] = effort
     else:

{lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/response.py RENAMED Viewed

@@ -89,9 +89,10 @@ class APIResponse:
                 + self.usage.output_tokens * api_model.output_cost / 1e6
             )
         elif self.content is not None and self.completion is not None:
-            print(
-                f"Warning: Completion provided without token counts for model {self.model_internal}."
-            )
+            pass
+            # print(
+            #     f"Warning: Completion provided without token counts for model {self.model_internal}."
+            # )
         if isinstance(self.prompt, Conversation):
             self.prompt = self.prompt.to_log()  # avoid keeping images in memory

lm_deluge-0.0.36/src/lm_deluge/cli.py ADDED Viewed

@@ -0,0 +1,300 @@
+# import argparse
+# import asyncio
+# import os
+# import sys
+# from typing import Optional
+# from .client import LLMClient
+# from .models import registry, APIModel
+# from .prompt import Conversation, Message
+# def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
+#     model: APIModel = APIModel.from_registry(model_id)
+#     env_var = model.api_key_env_var or ""
+#     if not env_var:
+#         return  # Some providers (e.g., Bedrock entries) don't use a single key
+#     if os.getenv(env_var):
+#         return
+#     if passed_api_key:
+#         os.environ[env_var] = passed_api_key
+#         return
+#     # If we get here, interactive prompting should occur at the UI layer.
+#     # In non-interactive contexts, we will error before calling this without key.
+# def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
+#     _ensure_api_key_for_model(model_id, api_key)
+#     client = LLMClient(model_names=[model_id], progress="manual")
+#     # Single round, print completion only to stdout
+#     completions = asyncio.run(
+#         client.process_prompts_async(
+#             [Conversation.user(prompt_text)],
+#             return_completions_only=True,
+#             show_progress=False,
+#         )
+#     )
+#     out = completions[0] if completions and completions[0] is not None else ""
+#     # Write raw completion to stdout with no extra decoration
+#     sys.stdout.write(out)
+#     if out and not out.endswith("\n"):
+#         sys.stdout.write("\n")
+# # -------- Textual UI (interactive chat) --------
+# try:
+#     from textual.app import App, ComposeResult
+#     from textual.containers import Container, Horizontal
+#     from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
+#     from textual.widgets._rich_log import RichLog
+#     from textual.reactive import reactive
+#     TEXTUAL_AVAILABLE = True
+# except Exception:  # pragma: no cover - textual may not be installed in some dev envs
+#     TEXTUAL_AVAILABLE = False
+# if TEXTUAL_AVAILABLE:
+#     class ModelPicker(Static):
+#         """Minimal model picker: arrows to move, Enter to select."""
+#         def __init__(self, preselected: Optional[str] = None):
+#             super().__init__()
+#             self.preselected = preselected
+#         def compose(self) -> ComposeResult:  # type: ignore[override]
+#             # Keep it terminal-y: one-line hint + list. No buttons.
+#             yield Static("Pick a model (Enter)", classes="hint")
+#             list_items: list[ListItem] = []
+#             # Curated small set to avoid scrollbars
+#             preferred = [
+#                 "gpt-5",
+#                 "gpt-5-chat",
+#                 "gpt-5-mini",
+#                 "claude-4-sonnet",
+#                 "gemini-2.5-pro",
+#                 "gemini-2.5-flash",
+#                 "gemini-2.0-flash",
+#             ]
+#             for mid in preferred:
+#                 if mid in registry:
+#                     list_items.append(ListItem(Label(mid)))
+#             yield ListView(*list_items, classes="model-list")
+#         def on_mount(self) -> None:  # type: ignore[override]
+#             # Focus the list so Enter works immediately
+#             self.query_one(ListView).focus()
+#         def get_selected(self) -> Optional[str]:
+#             listview = self.query_one(ListView)
+#             if not listview.index is None and 0 <= listview.index < len(listview.children):
+#                 label = listview.children[listview.index].query_one(Label)
+#                 return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
+#             return None
+#         def on_key(self, event):  # type: ignore[override]
+#             # Select current item on Enter
+#             try:
+#                 key = getattr(event, "key", None)
+#             except Exception:
+#                 key = None
+#             if key == "enter":
+#                 sel = self.get_selected()
+#                 if sel:
+#                     # Ask app to proceed with the chosen model
+#                     getattr(self.app, "model_chosen", lambda *_: None)(sel)  # type: ignore[attr-defined]
+#     class ApiKeyPrompt(Static):
+#         def __init__(self, env_var: str):
+#             super().__init__()
+#             self.env_var = env_var
+#             self.input = Input(password=True, placeholder=f"Enter {env_var}")
+#         def compose(self) -> ComposeResult:  # type: ignore[override]
+#             yield Static(f"API key required: set {self.env_var}", classes="title")
+#             yield self.input
+#             yield Button("Save", id="save-key", variant="primary")
+#         def value(self) -> str:
+#             return self.input.value
+#     class MessagesView(RichLog):
+#         def __init__(self, **kwargs):
+#             # Terminal-like log with markup and auto-scroll
+#             super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
+#         def append_user(self, text: str):
+#             self.write(f"[bold cyan]You:[/bold cyan] {text}")
+#         def append_assistant(self, text: str):
+#             self.write(f"[bold magenta]Model:[/bold magenta] {text}")
+#     class ChatInput(Horizontal):
+#         def compose(self) -> ComposeResult:  # type: ignore[override]
+#             self.input = Input(placeholder="Type message, Enter to send")
+#             yield self.input
+#     class DelugeApp(App):
+#         CSS = """
+#         #screen { height: 100%; }
+#         .chat { height: 1fr; padding: 0 1; }
+#         .composer { dock: bottom; height: 3; }
+#         """
+#         BINDINGS = [
+#             ("ctrl+c", "quit", "Quit"),
+#         ]
+#         model_id = reactive("")
+#         api_env_var = reactive("")
+#         def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
+#             super().__init__()
+#             self._model_arg = model_arg
+#             self._api_key_arg = api_key_arg
+#             self._conversation = Conversation.system("You are a helpful assistant.")
+#             self._client = None
+#         def compose(self) -> ComposeResult:  # type: ignore[override]
+#             yield Header(show_clock=True)
+#             self.body = Container(id="screen")
+#             yield self.body
+#             yield Footer()
+#         def on_mount(self):  # type: ignore[override]
+#             # Step 1: pick model if not provided
+#             if not self._model_arg:
+#                 self.model_picker = ModelPicker()
+#                 self.body.mount(self.model_picker)
+#             else:
+#                 self.model_id = self._model_arg
+#                 self._after_model_selected()
+#         def action_quit(self) -> None:  # type: ignore[override]
+#             self.exit()
+#         def _after_model_selected(self):
+#             # Resolve API requirement
+#             model = APIModel.from_registry(self.model_id)
+#             self.api_env_var = model.api_key_env_var or ""
+#             if self.api_env_var and not os.getenv(self.api_env_var):
+#                 if self._api_key_arg:
+#                     os.environ[self.api_env_var] = self._api_key_arg
+#                     self._show_chat()
+#                 else:
+#                     # Prompt for key
+#                     self.body.remove_children()
+#                     self.key_prompt = ApiKeyPrompt(self.api_env_var)
+#                     self.body.mount(self.key_prompt)
+#             else:
+#                 self._show_chat()
+#         def model_chosen(self, sel: str) -> None:
+#             """Called by ModelPicker when Enter is pressed on a selection."""
+#             self.model_id = sel
+#             self._after_model_selected()
+#         def _show_chat(self):
+#             self.body.remove_children()
+#             # Build UI
+#             self.messages = MessagesView(classes="chat")
+#             self.composer = ChatInput(classes="composer")
+#             self.body.mount(self.messages)
+#             self.body.mount(self.composer)
+#             # Focus input after mounting
+#             self.set_focus(self.composer.input)
+#             # Init client
+#             self._client = LLMClient(model_names=[self.model_id], progress="manual")
+#             # Update header subtitle
+#             self.query_one(Header).sub_title = f"Model: {self.model_id}"
+#         async def _send_and_receive(self, text: str):
+#             # Append user message
+#             self._conversation.add(Message.user(text))
+#             self.messages.append_user(text)
+#             # Call model (non-streaming for simplicity across providers)
+#             responses = await self._client.process_prompts_async(
+#                 [self._conversation], return_completions_only=False, show_progress=False
+#             )
+#             resp = responses[0]
+#             if resp and resp.completion:
+#                 self._conversation.add(Message.ai(resp.completion))
+#                 self.messages.append_assistant(resp.completion)
+#             else:
+#                 self.messages.append_assistant("<no response>")
+#         async def on_button_pressed(self, event):  # type: ignore[override]
+#             if hasattr(event.button, "id"):
+#                 if event.button.id == "save-key":
+#                     key = self.key_prompt.value().strip()
+#                     if self.api_env_var and key:
+#                         os.environ[self.api_env_var] = key
+#                     self._show_chat()
+#                 elif event.button.id == "send":
+#                     text = self.composer.input.value.strip()
+#                     if text:
+#                         self.composer.input.value = ""
+#                         await self._send_and_receive(text)
+#         async def on_input_submitted(self, event: Input.Submitted):  # type: ignore[override]
+#             if isinstance(event.input.parent, ChatInput):
+#                 text = event.value.strip()
+#                 if text:
+#                     self.composer.input.value = ""
+#                     await self._send_and_receive(text)
+# def run_interactive(model: Optional[str], api_key: Optional[str]):
+#     if not TEXTUAL_AVAILABLE:
+#         sys.stderr.write(
+#             "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
+#         )
+#         sys.exit(2)
+#     app = DelugeApp(model, api_key)  # type: ignore[name-defined]
+#     app.run()
+# def main():
+#     parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
+#     parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
+#     parser.add_argument("--model", dest="model", help="Model ID to use")
+#     parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
+#     parser.add_argument(
+#         "-p",
+#         dest="print_mode",
+#         action="store_true",
+#         help="Print single completion to stdout (non-interactive)",
+#     )
+#     args = parser.parse_args()
+#     if args.print_mode:
+#         # Determine prompt text
+#         prompt_text = " ".join(args.prompt).strip()
+#         if not prompt_text and not sys.stdin.isatty():
+#             prompt_text = sys.stdin.read()
+#         if not prompt_text:
+#             sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
+#             sys.exit(2)
+#         # Determine model
+#         model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
+#         # Require API key non-interactively if provider needs it and not set
+#         env_var = APIModel.from_registry(model_id).api_key_env_var or ""
+#         if env_var and not (os.getenv(env_var) or args.api_key):
+#             sys.stderr.write(
+#                 f"Missing API key. Set {env_var} or pass --api-key.\n"
+#             )
+#             sys.exit(2)
+#         run_non_interactive(model_id, prompt_text, args.api_key)
+#         return
+#     # Interactive Textual chat
+#     run_interactive(args.model, args.api_key)
+# if __name__ == "__main__":
+#     main()

{lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/client.py RENAMED Viewed

@@ -22,6 +22,7 @@ from .models import APIModel, registry
 from .request_context import RequestContext
 from .tracker import StatusTracker
 # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
 class _LLMClient(BaseModel):
     """
@@ -246,6 +247,7 @@ class _LLMClient(BaseModel):
         self, context: RequestContext, retry_queue: asyncio.Queue | None = None
     ) -> APIResponse:
         """Handle caching and single HTTP call for a request. Failed requests go to retry queue."""
         # Check cache first
         def _maybe_postprocess(response: APIResponse):
             if self.postprocess:
@@ -712,6 +714,7 @@ class _LLMClient(BaseModel):
             batch_ids, provider, poll_interval=30
         )
 # factory function -- allows positional model names,
 # keeps pydantic validation, without sacrificing IDE support
 @overload
@@ -736,7 +739,7 @@ def LLMClient(
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
     progress: Literal["rich", "tqdm", "manual"] = "rich",
-    postprocess: Callable[[APIResponse], APIResponse] | None = None
+    postprocess: Callable[[APIResponse], APIResponse] | None = None,
 ) -> _LLMClient: ...
@@ -762,7 +765,7 @@ def LLMClient(
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
     progress: Literal["rich", "tqdm", "manual"] = "rich",
-    postprocess: Callable[[APIResponse], APIResponse] | None = None
+    postprocess: Callable[[APIResponse], APIResponse] | None = None,
 ) -> _LLMClient: ...
@@ -787,7 +790,7 @@ def LLMClient(
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
     progress: Literal["rich", "tqdm", "manual"] = "rich",
-    postprocess: Callable[[APIResponse], APIResponse] | None = None
+    postprocess: Callable[[APIResponse], APIResponse] | None = None,
 ) -> _LLMClient:
     """
     Create an LLMClient with model_names as a positional argument.
@@ -824,5 +827,5 @@ def LLMClient(
         top_logprobs=top_logprobs,
         force_local_mcp=force_local_mcp,
         progress=progress,
-        postprocess=postprocess
+        postprocess=postprocess,
     )

lm_deluge-0.0.36/src/lm_deluge/models/__init__.py ADDED Viewed

@@ -0,0 +1,144 @@
+from __future__ import annotations
+import random
+from dataclasses import dataclass, field
+from ..request_context import RequestContext
+# Import and register all provider models
+from .anthropic import ANTHROPIC_MODELS
+from .bedrock import BEDROCK_MODELS
+from .cerebras import CEREBRAS_MODELS
+from .cohere import COHERE_MODELS
+from .deepseek import DEEPSEEK_MODELS
+from .fireworks import FIREWORKS_MODELS
+from .google import GOOGLE_MODELS
+from .grok import XAI_MODELS
+from .groq import GROQ_MODELS
+from .meta import META_MODELS
+from .mistral import MISTRAL_MODELS
+from .openai import OPENAI_MODELS
+from .openrouter import OPENROUTER_MODELS
+from .together import TOGETHER_MODELS
+@dataclass
+class APIModel:
+    id: str
+    name: str
+    api_base: str
+    api_key_env_var: str
+    api_spec: str
+    cached_input_cost: float | None = 0
+    input_cost: float | None = 0  # $ per million input tokens
+    output_cost: float | None = 0  # $ per million output tokens
+    supports_json: bool = False
+    supports_logprobs: bool = False
+    supports_responses: bool = False
+    reasoning_model: bool = False
+    regions: list[str] | dict[str, int] = field(default_factory=list)
+    tokens_per_minute: int | None = None
+    requests_per_minute: int | None = None
+    gpus: list[str] | None = None
+    @classmethod
+    def from_registry(cls, name: str):
+        if name not in registry:
+            raise ValueError(f"Model {name} not found in registry")
+        cfg = registry[name]
+        if isinstance(cfg, APIModel):
+            return cfg
+        return cls(**cfg)
+    def sample_region(self):
+        if isinstance(self.regions, list):
+            regions = self.regions
+            weights = [1] * len(regions)
+        elif isinstance(self.regions, dict):
+            regions = list(self.regions.keys())
+            weights = self.regions.values()
+        else:
+            raise ValueError("no regions to sample")
+        random.sample(regions, 1, counts=weights)[0]
+    def make_request(self, context: RequestContext):  # -> "APIRequestBase"
+        from ..api_requests.common import CLASSES
+        api_spec = self.api_spec
+        if (
+            context.use_responses_api
+            and self.supports_responses
+            and api_spec == "openai"
+        ):
+            api_spec = "openai-responses"
+        request_class = CLASSES.get(api_spec, None)
+        if request_class is None:
+            raise ValueError(f"Unsupported API spec: {api_spec}")
+        return request_class(context=context)
+registry: dict[str, APIModel] = {}
+def register_model(
+    id: str,
+    name: str,
+    api_base: str,
+    api_key_env_var: str,
+    api_spec: str,
+    input_cost: float | None = 0,  # $ per million input tokens
+    cached_input_cost: float | None = 0,
+    output_cost: float | None = 0,  # $ per million output tokens
+    supports_json: bool = False,
+    supports_logprobs: bool = False,
+    supports_responses: bool = False,
+    reasoning_model: bool = False,
+    regions: list[str] | dict[str, int] = field(default_factory=list),
+    tokens_per_minute: int | None = None,
+    requests_per_minute: int | None = None,
+) -> APIModel:
+    """Register a model configuration and return the created APIModel."""
+    model = APIModel(
+        id=id,
+        name=name,
+        api_base=api_base,
+        api_key_env_var=api_key_env_var,
+        api_spec=api_spec,
+        cached_input_cost=cached_input_cost,
+        input_cost=input_cost,
+        output_cost=output_cost,
+        supports_json=supports_json,
+        supports_logprobs=supports_logprobs,
+        supports_responses=supports_responses,
+        reasoning_model=reasoning_model,
+        regions=regions,
+        tokens_per_minute=tokens_per_minute,
+        requests_per_minute=requests_per_minute,
+    )
+    registry[model.id] = model
+    return model
+# Register all models from all providers
+for model_dict in [
+    ANTHROPIC_MODELS,
+    BEDROCK_MODELS,
+    COHERE_MODELS,
+    DEEPSEEK_MODELS,
+    FIREWORKS_MODELS,
+    GOOGLE_MODELS,
+    XAI_MODELS,
+    META_MODELS,
+    MISTRAL_MODELS,
+    OPENAI_MODELS,
+    OPENROUTER_MODELS,
+    TOGETHER_MODELS,
+    GROQ_MODELS,
+    CEREBRAS_MODELS,
+]:
+    for cfg in model_dict.values():
+        register_model(**cfg)
+# print("Valid models:", registry.keys())

lm-deluge 0.0.35__tar.gz → 0.0.36__tar.gz

Potentially problematic release.

lm-deluge 0.0.35tar.gz → 0.0.36tar.gz