PyPI - lm-deluge - Versions diffs - 0.0.34__tar.gz → 0.0.36__tar.gz - Mend

lm-deluge 0.0.34tar.gz → 0.0.36tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (79) hide show

{lm_deluge-0.0.34/src/lm_deluge.egg-info → lm_deluge-0.0.36}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.34
+Version: 0.0.36
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.34 → lm_deluge-0.0.36}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.34"
+version = "0.0.36"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"
@@ -28,5 +28,9 @@ dependencies = [
     "pdf2image",
     "pillow",
     "fastmcp>=2.4",
-    "rich"
+    "rich",
+    # "textual>=0.58.0"
 ]
+# [project.scripts]
+# deluge = "lm_deluge.cli:main"

{lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

@@ -57,7 +57,7 @@ def _build_anthropic_request(
     # handle thinking
     if model.reasoning_model and sampling_params.reasoning_effort:
         # translate reasoning effort of low, medium, high to budget tokens
-        budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
+        budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
             sampling_params.reasoning_effort
         )
         request_json["thinking"] = {

{lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

@@ -45,8 +45,10 @@ async def _build_gemini_request(
             thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
         else:
             thinking_config = {"includeThoughts": True}
-            if effort in {"low", "medium", "high"} and "flash" in model.id:
-                budget = {"low": 1024, "medium": 4096, "high": 16384}[effort]
+            if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
+                budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
+                    effort
+                ]
                 thinking_config["thinkingBudget"] = budget
         request_json["generationConfig"]["thinkingConfig"] = thinking_config

{lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/openai.py RENAMED Viewed

@@ -42,8 +42,15 @@ async def _build_oa_chat_request(
             # Disable reasoning for Gemini models when no effort requested
             if "gemini" in model.id:
                 effort = "none"
+            elif "gpt-5" in model.id:
+                effort = "minimal"
             else:
                 effort = "low"
+        if effort == "minimal" and "gpt-5" not in model.id:
+            print(
+                "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
+            )
+            effort = "low"
         request_json["reasoning_effort"] = effort
     else:
         if sampling_params.reasoning_effort:
@@ -122,15 +129,21 @@ class OpenAIRequest(APIRequestBase):
                     message = data["choices"][0]["message"]
                     finish_reason = data["choices"][0]["finish_reason"]
-                    # Add text content if present
-                    if message.get("content"):
-                        parts.append(Text(message["content"]))
                     # Add thinking content if present (reasoning models)
                     if "reasoning_content" in message:
                         thinking = message["reasoning_content"]
                         parts.append(Thinking(thinking))
+                    # Together AI returns reasoning in a "reasoning"
+                    # field which is not correct but whatever
+                    if message.get("reasoning"):
+                        thinking = message["reasoning"]
+                        parts.append(Thinking(thinking))
+                    # Add text content if present
+                    if message.get("content"):
+                        parts.append(Text(message["content"]))
                     # Add tool calls if present
                     if "tool_calls" in message:
                         for tool_call in message["tool_calls"]:

{lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/response.py RENAMED Viewed

@@ -89,9 +89,10 @@ class APIResponse:
                 + self.usage.output_tokens * api_model.output_cost / 1e6
             )
         elif self.content is not None and self.completion is not None:
-            print(
-                f"Warning: Completion provided without token counts for model {self.model_internal}."
-            )
+            pass
+            # print(
+            #     f"Warning: Completion provided without token counts for model {self.model_internal}."
+            # )
         if isinstance(self.prompt, Conversation):
             self.prompt = self.prompt.to_log()  # avoid keeping images in memory

lm_deluge-0.0.36/src/lm_deluge/cli.py ADDED Viewed

@@ -0,0 +1,300 @@
+# import argparse
+# import asyncio
+# import os
+# import sys
+# from typing import Optional
+# from .client import LLMClient
+# from .models import registry, APIModel
+# from .prompt import Conversation, Message
+# def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
+#     model: APIModel = APIModel.from_registry(model_id)
+#     env_var = model.api_key_env_var or ""
+#     if not env_var:
+#         return  # Some providers (e.g., Bedrock entries) don't use a single key
+#     if os.getenv(env_var):
+#         return
+#     if passed_api_key:
+#         os.environ[env_var] = passed_api_key
+#         return
+#     # If we get here, interactive prompting should occur at the UI layer.
+#     # In non-interactive contexts, we will error before calling this without key.
+# def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
+#     _ensure_api_key_for_model(model_id, api_key)
+#     client = LLMClient(model_names=[model_id], progress="manual")
+#     # Single round, print completion only to stdout
+#     completions = asyncio.run(
+#         client.process_prompts_async(
+#             [Conversation.user(prompt_text)],
+#             return_completions_only=True,
+#             show_progress=False,
+#         )
+#     )
+#     out = completions[0] if completions and completions[0] is not None else ""
+#     # Write raw completion to stdout with no extra decoration
+#     sys.stdout.write(out)
+#     if out and not out.endswith("\n"):
+#         sys.stdout.write("\n")
+# # -------- Textual UI (interactive chat) --------
+# try:
+#     from textual.app import App, ComposeResult
+#     from textual.containers import Container, Horizontal
+#     from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
+#     from textual.widgets._rich_log import RichLog
+#     from textual.reactive import reactive
+#     TEXTUAL_AVAILABLE = True
+# except Exception:  # pragma: no cover - textual may not be installed in some dev envs
+#     TEXTUAL_AVAILABLE = False
+# if TEXTUAL_AVAILABLE:
+#     class ModelPicker(Static):
+#         """Minimal model picker: arrows to move, Enter to select."""
+#         def __init__(self, preselected: Optional[str] = None):
+#             super().__init__()
+#             self.preselected = preselected
+#         def compose(self) -> ComposeResult:  # type: ignore[override]
+#             # Keep it terminal-y: one-line hint + list. No buttons.
+#             yield Static("Pick a model (Enter)", classes="hint")
+#             list_items: list[ListItem] = []
+#             # Curated small set to avoid scrollbars
+#             preferred = [
+#                 "gpt-5",
+#                 "gpt-5-chat",
+#                 "gpt-5-mini",
+#                 "claude-4-sonnet",
+#                 "gemini-2.5-pro",
+#                 "gemini-2.5-flash",
+#                 "gemini-2.0-flash",
+#             ]
+#             for mid in preferred:
+#                 if mid in registry:
+#                     list_items.append(ListItem(Label(mid)))
+#             yield ListView(*list_items, classes="model-list")
+#         def on_mount(self) -> None:  # type: ignore[override]
+#             # Focus the list so Enter works immediately
+#             self.query_one(ListView).focus()
+#         def get_selected(self) -> Optional[str]:
+#             listview = self.query_one(ListView)
+#             if not listview.index is None and 0 <= listview.index < len(listview.children):
+#                 label = listview.children[listview.index].query_one(Label)
+#                 return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
+#             return None
+#         def on_key(self, event):  # type: ignore[override]
+#             # Select current item on Enter
+#             try:
+#                 key = getattr(event, "key", None)
+#             except Exception:
+#                 key = None
+#             if key == "enter":
+#                 sel = self.get_selected()
+#                 if sel:
+#                     # Ask app to proceed with the chosen model
+#                     getattr(self.app, "model_chosen", lambda *_: None)(sel)  # type: ignore[attr-defined]
+#     class ApiKeyPrompt(Static):
+#         def __init__(self, env_var: str):
+#             super().__init__()
+#             self.env_var = env_var
+#             self.input = Input(password=True, placeholder=f"Enter {env_var}")
+#         def compose(self) -> ComposeResult:  # type: ignore[override]
+#             yield Static(f"API key required: set {self.env_var}", classes="title")
+#             yield self.input
+#             yield Button("Save", id="save-key", variant="primary")
+#         def value(self) -> str:
+#             return self.input.value
+#     class MessagesView(RichLog):
+#         def __init__(self, **kwargs):
+#             # Terminal-like log with markup and auto-scroll
+#             super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
+#         def append_user(self, text: str):
+#             self.write(f"[bold cyan]You:[/bold cyan] {text}")
+#         def append_assistant(self, text: str):
+#             self.write(f"[bold magenta]Model:[/bold magenta] {text}")
+#     class ChatInput(Horizontal):
+#         def compose(self) -> ComposeResult:  # type: ignore[override]
+#             self.input = Input(placeholder="Type message, Enter to send")
+#             yield self.input
+#     class DelugeApp(App):
+#         CSS = """
+#         #screen { height: 100%; }
+#         .chat { height: 1fr; padding: 0 1; }
+#         .composer { dock: bottom; height: 3; }
+#         """
+#         BINDINGS = [
+#             ("ctrl+c", "quit", "Quit"),
+#         ]
+#         model_id = reactive("")
+#         api_env_var = reactive("")
+#         def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
+#             super().__init__()
+#             self._model_arg = model_arg
+#             self._api_key_arg = api_key_arg
+#             self._conversation = Conversation.system("You are a helpful assistant.")
+#             self._client = None
+#         def compose(self) -> ComposeResult:  # type: ignore[override]
+#             yield Header(show_clock=True)
+#             self.body = Container(id="screen")
+#             yield self.body
+#             yield Footer()
+#         def on_mount(self):  # type: ignore[override]
+#             # Step 1: pick model if not provided
+#             if not self._model_arg:
+#                 self.model_picker = ModelPicker()
+#                 self.body.mount(self.model_picker)
+#             else:
+#                 self.model_id = self._model_arg
+#                 self._after_model_selected()
+#         def action_quit(self) -> None:  # type: ignore[override]
+#             self.exit()
+#         def _after_model_selected(self):
+#             # Resolve API requirement
+#             model = APIModel.from_registry(self.model_id)
+#             self.api_env_var = model.api_key_env_var or ""
+#             if self.api_env_var and not os.getenv(self.api_env_var):
+#                 if self._api_key_arg:
+#                     os.environ[self.api_env_var] = self._api_key_arg
+#                     self._show_chat()
+#                 else:
+#                     # Prompt for key
+#                     self.body.remove_children()
+#                     self.key_prompt = ApiKeyPrompt(self.api_env_var)
+#                     self.body.mount(self.key_prompt)
+#             else:
+#                 self._show_chat()
+#         def model_chosen(self, sel: str) -> None:
+#             """Called by ModelPicker when Enter is pressed on a selection."""
+#             self.model_id = sel
+#             self._after_model_selected()
+#         def _show_chat(self):
+#             self.body.remove_children()
+#             # Build UI
+#             self.messages = MessagesView(classes="chat")
+#             self.composer = ChatInput(classes="composer")
+#             self.body.mount(self.messages)
+#             self.body.mount(self.composer)
+#             # Focus input after mounting
+#             self.set_focus(self.composer.input)
+#             # Init client
+#             self._client = LLMClient(model_names=[self.model_id], progress="manual")
+#             # Update header subtitle
+#             self.query_one(Header).sub_title = f"Model: {self.model_id}"
+#         async def _send_and_receive(self, text: str):
+#             # Append user message
+#             self._conversation.add(Message.user(text))
+#             self.messages.append_user(text)
+#             # Call model (non-streaming for simplicity across providers)
+#             responses = await self._client.process_prompts_async(
+#                 [self._conversation], return_completions_only=False, show_progress=False
+#             )
+#             resp = responses[0]
+#             if resp and resp.completion:
+#                 self._conversation.add(Message.ai(resp.completion))
+#                 self.messages.append_assistant(resp.completion)
+#             else:
+#                 self.messages.append_assistant("<no response>")
+#         async def on_button_pressed(self, event):  # type: ignore[override]
+#             if hasattr(event.button, "id"):
+#                 if event.button.id == "save-key":
+#                     key = self.key_prompt.value().strip()
+#                     if self.api_env_var and key:
+#                         os.environ[self.api_env_var] = key
+#                     self._show_chat()
+#                 elif event.button.id == "send":
+#                     text = self.composer.input.value.strip()
+#                     if text:
+#                         self.composer.input.value = ""
+#                         await self._send_and_receive(text)
+#         async def on_input_submitted(self, event: Input.Submitted):  # type: ignore[override]
+#             if isinstance(event.input.parent, ChatInput):
+#                 text = event.value.strip()
+#                 if text:
+#                     self.composer.input.value = ""
+#                     await self._send_and_receive(text)
+# def run_interactive(model: Optional[str], api_key: Optional[str]):
+#     if not TEXTUAL_AVAILABLE:
+#         sys.stderr.write(
+#             "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
+#         )
+#         sys.exit(2)
+#     app = DelugeApp(model, api_key)  # type: ignore[name-defined]
+#     app.run()
+# def main():
+#     parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
+#     parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
+#     parser.add_argument("--model", dest="model", help="Model ID to use")
+#     parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
+#     parser.add_argument(
+#         "-p",
+#         dest="print_mode",
+#         action="store_true",
+#         help="Print single completion to stdout (non-interactive)",
+#     )
+#     args = parser.parse_args()
+#     if args.print_mode:
+#         # Determine prompt text
+#         prompt_text = " ".join(args.prompt).strip()
+#         if not prompt_text and not sys.stdin.isatty():
+#             prompt_text = sys.stdin.read()
+#         if not prompt_text:
+#             sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
+#             sys.exit(2)
+#         # Determine model
+#         model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
+#         # Require API key non-interactively if provider needs it and not set
+#         env_var = APIModel.from_registry(model_id).api_key_env_var or ""
+#         if env_var and not (os.getenv(env_var) or args.api_key):
+#             sys.stderr.write(
+#                 f"Missing API key. Set {env_var} or pass --api-key.\n"
+#             )
+#             sys.exit(2)
+#         run_non_interactive(model_id, prompt_text, args.api_key)
+#         return
+#     # Interactive Textual chat
+#     run_interactive(args.model, args.api_key)
+# if __name__ == "__main__":
+#     main()

{lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/client.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import random
-from typing import Any, Literal, Self, Sequence, overload
+from typing import Any, Literal, Self, Sequence, Callable, overload
 import numpy as np
 import yaml
@@ -23,7 +23,6 @@ from .request_context import RequestContext
 from .tracker import StatusTracker
-# TODO: get completions as they finish, not all at once at the end.
 # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
 class _LLMClient(BaseModel):
     """
@@ -55,6 +54,9 @@ class _LLMClient(BaseModel):
     # Progress configuration
     progress: Literal["rich", "tqdm", "manual"] = "rich"
+    # Postprocessing - run on every APIResponse
+    postprocess: Callable[[APIResponse], APIResponse] | None = None
     # Internal state for async task handling
     _next_task_id: int = PrivateAttr(default=0)
     _tasks: dict[int, asyncio.Task] = PrivateAttr(default_factory=dict)
@@ -196,14 +198,6 @@ class _LLMClient(BaseModel):
         config_dict = yaml.safe_load(open(file_path))
         return cls.from_dict(config_dict)
-    @classmethod
-    def basic(cls, model: str | list[str], **kwargs):
-        """
-        Doesn't do anything differently now, kept for backwards compat.
-        """
-        kwargs["model_names"] = model
-        return cls(**kwargs)
     def _select_model(self):
         assert isinstance(self.model_weights, list)
         model_idx = np.random.choice(range(len(self.models)), p=self.model_weights)
@@ -253,14 +247,20 @@ class _LLMClient(BaseModel):
         self, context: RequestContext, retry_queue: asyncio.Queue | None = None
     ) -> APIResponse:
         """Handle caching and single HTTP call for a request. Failed requests go to retry queue."""
         # Check cache first
+        def _maybe_postprocess(response: APIResponse):
+            if self.postprocess:
+                return self.postprocess(response)
+            return response
         if self.cache:
             cached = self.cache.get(context.prompt)
             if cached:
                 cached.local_cache_hit = True
                 if context.status_tracker:
                     context.status_tracker.task_succeeded(context.task_id)
-                return cached
+                return _maybe_postprocess(cached)
         # Execute single request
         assert context.status_tracker
@@ -275,7 +275,7 @@ class _LLMClient(BaseModel):
                 self.cache.put(context.prompt, response)
             # Call callback if provided
             context.maybe_callback(response, context.status_tracker)
-            return response
+            return _maybe_postprocess(response)
         # Handle error response - add to retry queue if available
         if retry_queue and context.attempts_left > 1:
@@ -303,7 +303,7 @@ class _LLMClient(BaseModel):
             # Add to retry queue for later processing
             await retry_queue.put(retry_context)
-            return response  # Return the error response for now
+            return _maybe_postprocess(response)  # Return the error response for now
         # No retries left or no retry queue - final failure
         context.status_tracker.task_failed(context.task_id)
@@ -316,7 +316,7 @@ class _LLMClient(BaseModel):
         error_msg += f" Message: {response.error_message}. Giving up."
         print(error_msg)
-        return response
+        return _maybe_postprocess(response)
     @overload
     async def process_prompts_async(
@@ -570,6 +570,8 @@ class _LLMClient(BaseModel):
                 print(item, end="", flush=True)
             else:
                 # final item
+                if self.postprocess:
+                    return self.postprocess(item)
                 return item
     async def run_agent_loop(
@@ -713,65 +715,8 @@ class _LLMClient(BaseModel):
         )
-# def api_prompts_dry_run(
-#     ids: np.ndarray | list[int],
-#     prompts: list[Conversation],
-#     models: str | list[str],
-#     model_weights: list[float],
-#     sampling_params: list[SamplingParams],
-#     max_tokens_per_minute: int = 500_000,
-#     max_requests_per_minute: int = 1_000,
-# ):
-#     """
-#     Count tokens and estimate costs for a batch of prompts.
-#     """
-#     results = []
-#     for i, prompt in zip(ids, prompts):
-#         # choose a model
-#         model_idx = np.random.choice(range(len(models)), p=model_weights)
-#         model = models[model_idx]
-#         # dry run
-#         input_tokens, output_tokens, min_cost, max_cost = prompt.dry_run(
-#             model, sampling_params[model_idx].max_new_tokens
-#         )
-#         results.append(
-#             {
-#                 "id": i,
-#                 "input_tokens": input_tokens,
-#                 "output_tokens": output_tokens,
-#                 "min_cost": min_cost,
-#                 "max_cost": max_cost,
-#             }
-#         )
-#     combined_results: dict[str, Any] = {
-#         "total_input_tokens": sum([r["input_tokens"] for r in results]),
-#         "total_output_tokens": sum([r["output_tokens"] for r in results]),
-#         "total_min_cost": sum([r["min_cost"] for r in results]),
-#         "total_max_cost": sum([r["max_cost"] for r in results]),
-#     }
-#     minimum_time_tpm = combined_results["total_input_tokens"] / max_tokens_per_minute
-#     maximum_time_tpm = (
-#         combined_results["total_input_tokens"] + combined_results["total_output_tokens"]
-#     ) / max_tokens_per_minute
-#     minimum_time_rpm = len(prompts) / max_requests_per_minute
-#     combined_results["minimum_time"] = max(minimum_time_tpm, minimum_time_rpm)
-#     combined_results["maximum_time"] = max(maximum_time_tpm, minimum_time_rpm)
-#     limiting_factor = None
-#     if minimum_time_rpm > maximum_time_tpm:
-#         limiting_factor = "requests"
-#     elif minimum_time_rpm < minimum_time_tpm:
-#         limiting_factor = "tokens"
-#     else:
-#         limiting_factor = "depends"
-#     combined_results["limiting_factor"] = limiting_factor
-#     return combined_results
-# Clean factory function with perfect IDE support
+# factory function -- allows positional model names,
+# keeps pydantic validation, without sacrificing IDE support
 @overload
 def LLMClient(
     model_names: str,
@@ -794,6 +739,7 @@ def LLMClient(
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
     progress: Literal["rich", "tqdm", "manual"] = "rich",
+    postprocess: Callable[[APIResponse], APIResponse] | None = None,
 ) -> _LLMClient: ...
@@ -819,6 +765,7 @@ def LLMClient(
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
     progress: Literal["rich", "tqdm", "manual"] = "rich",
+    postprocess: Callable[[APIResponse], APIResponse] | None = None,
 ) -> _LLMClient: ...
@@ -843,6 +790,7 @@ def LLMClient(
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
     progress: Literal["rich", "tqdm", "manual"] = "rich",
+    postprocess: Callable[[APIResponse], APIResponse] | None = None,
 ) -> _LLMClient:
     """
     Create an LLMClient with model_names as a positional argument.
@@ -879,4 +827,5 @@ def LLMClient(
         top_logprobs=top_logprobs,
         force_local_mcp=force_local_mcp,
         progress=progress,
+        postprocess=postprocess,
     )

lm-deluge 0.0.34__tar.gz → 0.0.36__tar.gz

Potentially problematic release.

lm-deluge 0.0.34tar.gz → 0.0.36tar.gz