PyPI - lm-deluge - Versions diffs - 0.0.89__py3-none-any.whl → 0.0.91__py3-none-any.whl - Mend

lm-deluge 0.0.89py3-none-any.whl → 0.0.91py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

lm_deluge/__init__.py +3 -27
lm_deluge/api_requests/anthropic.py +29 -7
lm_deluge/api_requests/base.py +38 -1
lm_deluge/api_requests/bedrock.py +29 -3
lm_deluge/{request_context.py → api_requests/context.py} +4 -4
lm_deluge/api_requests/gemini.py +30 -14
lm_deluge/api_requests/mistral.py +1 -1
lm_deluge/api_requests/openai.py +34 -5
lm_deluge/batches.py +19 -49
lm_deluge/cache.py +1 -1
lm_deluge/cli.py +672 -300
lm_deluge/{client.py → client/__init__.py} +42 -13
lm_deluge/config.py +9 -31
lm_deluge/embed.py +2 -6
lm_deluge/models/__init__.py +138 -29
lm_deluge/models/anthropic.py +32 -24
lm_deluge/models/bedrock.py +9 -0
lm_deluge/models/cerebras.py +2 -0
lm_deluge/models/cohere.py +2 -0
lm_deluge/models/google.py +13 -0
lm_deluge/models/grok.py +4 -0
lm_deluge/models/groq.py +2 -0
lm_deluge/models/meta.py +2 -0
lm_deluge/models/minimax.py +9 -1
lm_deluge/models/openai.py +24 -1
lm_deluge/models/openrouter.py +155 -1
lm_deluge/models/together.py +3 -0
lm_deluge/models/zai.py +50 -1
lm_deluge/pipelines/extract.py +4 -5
lm_deluge/pipelines/gepa/__init__.py +1 -1
lm_deluge/pipelines/gepa/docs/samples.py +19 -10
lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
lm_deluge/prompt/__init__.py +45 -0
lm_deluge/{prompt.py → prompt/conversation.py} +165 -869
lm_deluge/{image.py → prompt/image.py} +0 -10
lm_deluge/prompt/message.py +571 -0
lm_deluge/prompt/serialization.py +21 -0
lm_deluge/prompt/signatures.py +77 -0
lm_deluge/prompt/text.py +47 -0
lm_deluge/prompt/thinking.py +55 -0
lm_deluge/prompt/tool_calls.py +245 -0
lm_deluge/server/__init__.py +24 -0
lm_deluge/server/__main__.py +144 -0
lm_deluge/server/adapters.py +369 -0
lm_deluge/server/app.py +388 -0
lm_deluge/server/auth.py +71 -0
lm_deluge/server/model_policy.py +215 -0
lm_deluge/server/models_anthropic.py +172 -0
lm_deluge/server/models_openai.py +175 -0
lm_deluge/skills/anthropic.py +0 -0
lm_deluge/skills/compat.py +0 -0
lm_deluge/tool/__init__.py +78 -19
lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
lm_deluge/tool/cua/actions.py +26 -26
lm_deluge/tool/cua/batch.py +1 -2
lm_deluge/tool/cua/kernel.py +1 -1
lm_deluge/tool/prefab/filesystem.py +2 -2
lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
lm_deluge/tool/prefab/memory.py +3 -1
lm_deluge/tool/prefab/otc/executor.py +3 -3
lm_deluge/tool/prefab/random.py +30 -54
lm_deluge/tool/prefab/rlm/__init__.py +2 -2
lm_deluge/tool/prefab/rlm/executor.py +1 -1
lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +829 -0
lm_deluge/tool/prefab/skills.py +0 -0
lm_deluge/tool/prefab/subagents.py +1 -1
lm_deluge/util/logprobs.py +4 -4
lm_deluge/util/schema.py +6 -6
lm_deluge/util/validation.py +14 -9
{lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/METADATA +12 -12
lm_deluge-0.0.91.dist-info/RECORD +140 -0
lm_deluge-0.0.91.dist-info/entry_points.txt +3 -0
lm_deluge/mock_openai.py +0 -643
lm_deluge/tool/prefab/sandbox.py +0 -1621
lm_deluge-0.0.89.dist-info/RECORD +0 -117
/lm_deluge/{file.py → prompt/file.py} +0 -0
{lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/top_level.txt +0 -0

lm_deluge/cli.py CHANGED Viewed

@@ -1,300 +1,672 @@
-# import argparse
-# import asyncio
-# import os
-# import sys
-# from typing import Optional
-# from .client import LLMClient
-# from .models import registry, APIModel
-# from .prompt import Conversation, Message
-# def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
-#     model: APIModel = APIModel.from_registry(model_id)
-#     env_var = model.api_key_env_var or ""
-#     if not env_var:
-#         return  # Some providers (e.g., Bedrock entries) don't use a single key
-#     if os.getenv(env_var):
-#         return
-#     if passed_api_key:
-#         os.environ[env_var] = passed_api_key
-#         return
-#     # If we get here, interactive prompting should occur at the UI layer.
-#     # In non-interactive contexts, we will error before calling this without key.
-# def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
-#     _ensure_api_key_for_model(model_id, api_key)
-#     client = LLMClient(model_names=[model_id], progress="manual")
-#     # Single round, print completion only to stdout
-#     completions = asyncio.run(
-#         client.process_prompts_async(
-#             [Conversation.user(prompt_text)],
-#             return_completions_only=True,
-#             show_progress=False,
-#         )
-#     )
-#     out = completions[0] if completions and completions[0] is not None else ""
-#     # Write raw completion to stdout with no extra decoration
-#     sys.stdout.write(out)
-#     if out and not out.endswith("\n"):
-#         sys.stdout.write("\n")
-# # -------- Textual UI (interactive chat) --------
-# try:
-#     from textual.app import App, ComposeResult
-#     from textual.containers import Container, Horizontal
-#     from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
-#     from textual.widgets._rich_log import RichLog
-#     from textual.reactive import reactive
-#     TEXTUAL_AVAILABLE = True
-# except Exception:  # pragma: no cover - textual may not be installed in some dev envs
-#     TEXTUAL_AVAILABLE = False
-# if TEXTUAL_AVAILABLE:
-#     class ModelPicker(Static):
-#         """Minimal model picker: arrows to move, Enter to select."""
-#         def __init__(self, preselected: Optional[str] = None):
-#             super().__init__()
-#             self.preselected = preselected
-#         def compose(self) -> ComposeResult:  # type: ignore[override]
-#             # Keep it terminal-y: one-line hint + list. No buttons.
-#             yield Static("Pick a model (Enter)", classes="hint")
-#             list_items: list[ListItem] = []
-#             # Curated small set to avoid scrollbars
-#             preferred = [
-#                 "gpt-5",
-#                 "gpt-5-chat",
-#                 "gpt-5-mini",
-#                 "claude-4-sonnet",
-#                 "gemini-2.5-pro",
-#                 "gemini-2.5-flash",
-#                 "gemini-2.0-flash",
-#             ]
-#             for mid in preferred:
-#                 if mid in registry:
-#                     list_items.append(ListItem(Label(mid)))
-#             yield ListView(*list_items, classes="model-list")
-#         def on_mount(self) -> None:  # type: ignore[override]
-#             # Focus the list so Enter works immediately
-#             self.query_one(ListView).focus()
-#         def get_selected(self) -> Optional[str]:
-#             listview = self.query_one(ListView)
-#             if not listview.index is None and 0 <= listview.index < len(listview.children):
-#                 label = listview.children[listview.index].query_one(Label)
-#                 return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
-#             return None
-#         def on_key(self, event):  # type: ignore[override]
-#             # Select current item on Enter
-#             try:
-#                 key = getattr(event, "key", None)
-#             except Exception:
-#                 key = None
-#             if key == "enter":
-#                 sel = self.get_selected()
-#                 if sel:
-#                     # Ask app to proceed with the chosen model
-#                     getattr(self.app, "model_chosen", lambda *_: None)(sel)  # type: ignore[attr-defined]
-#     class ApiKeyPrompt(Static):
-#         def __init__(self, env_var: str):
-#             super().__init__()
-#             self.env_var = env_var
-#             self.input = Input(password=True, placeholder=f"Enter {env_var}")
-#         def compose(self) -> ComposeResult:  # type: ignore[override]
-#             yield Static(f"API key required: set {self.env_var}", classes="title")
-#             yield self.input
-#             yield Button("Save", id="save-key", variant="primary")
-#         def value(self) -> str:
-#             return self.input.value
-#     class MessagesView(RichLog):
-#         def __init__(self, **kwargs):
-#             # Terminal-like log with markup and auto-scroll
-#             super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
-#         def append_user(self, text: str):
-#             self.write(f"[bold cyan]You:[/bold cyan] {text}")
-#         def append_assistant(self, text: str):
-#             self.write(f"[bold magenta]Model:[/bold magenta] {text}")
-#     class ChatInput(Horizontal):
-#         def compose(self) -> ComposeResult:  # type: ignore[override]
-#             self.input = Input(placeholder="Type message, Enter to send")
-#             yield self.input
-#     class DelugeApp(App):
-#         CSS = """
-#         #screen { height: 100%; }
-#         .chat { height: 1fr; padding: 0 1; }
-#         .composer { dock: bottom; height: 3; }
-#         """
-#         BINDINGS = [
-#             ("ctrl+c", "quit", "Quit"),
-#         ]
-#         model_id = reactive("")
-#         api_env_var = reactive("")
-#         def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
-#             super().__init__()
-#             self._model_arg = model_arg
-#             self._api_key_arg = api_key_arg
-#             self._conversation = Conversation.system("You are a helpful assistant.")
-#             self._client = None
-#         def compose(self) -> ComposeResult:  # type: ignore[override]
-#             yield Header(show_clock=True)
-#             self.body = Container(id="screen")
-#             yield self.body
-#             yield Footer()
-#         def on_mount(self):  # type: ignore[override]
-#             # Step 1: pick model if not provided
-#             if not self._model_arg:
-#                 self.model_picker = ModelPicker()
-#                 self.body.mount(self.model_picker)
-#             else:
-#                 self.model_id = self._model_arg
-#                 self._after_model_selected()
-#         def action_quit(self) -> None:  # type: ignore[override]
-#             self.exit()
-#         def _after_model_selected(self):
-#             # Resolve API requirement
-#             model = APIModel.from_registry(self.model_id)
-#             self.api_env_var = model.api_key_env_var or ""
-#             if self.api_env_var and not os.getenv(self.api_env_var):
-#                 if self._api_key_arg:
-#                     os.environ[self.api_env_var] = self._api_key_arg
-#                     self._show_chat()
-#                 else:
-#                     # Prompt for key
-#                     self.body.remove_children()
-#                     self.key_prompt = ApiKeyPrompt(self.api_env_var)
-#                     self.body.mount(self.key_prompt)
-#             else:
-#                 self._show_chat()
-#         def model_chosen(self, sel: str) -> None:
-#             """Called by ModelPicker when Enter is pressed on a selection."""
-#             self.model_id = sel
-#             self._after_model_selected()
-#         def _show_chat(self):
-#             self.body.remove_children()
-#             # Build UI
-#             self.messages = MessagesView(classes="chat")
-#             self.composer = ChatInput(classes="composer")
-#             self.body.mount(self.messages)
-#             self.body.mount(self.composer)
-#             # Focus input after mounting
-#             self.set_focus(self.composer.input)
-#             # Init client
-#             self._client = LLMClient(model_names=[self.model_id], progress="manual")
-#             # Update header subtitle
-#             self.query_one(Header).sub_title = f"Model: {self.model_id}"
-#         async def _send_and_receive(self, text: str):
-#             # Append user message
-#             self._conversation.add(Message.user(text))
-#             self.messages.append_user(text)
-#             # Call model (non-streaming for simplicity across providers)
-#             responses = await self._client.process_prompts_async(
-#                 [self._conversation], return_completions_only=False, show_progress=False
-#             )
-#             resp = responses[0]
-#             if resp and resp.completion:
-#                 self._conversation.add(Message.ai(resp.completion))
-#                 self.messages.append_assistant(resp.completion)
-#             else:
-#                 self.messages.append_assistant("<no response>")
-#         async def on_button_pressed(self, event):  # type: ignore[override]
-#             if hasattr(event.button, "id"):
-#                 if event.button.id == "save-key":
-#                     key = self.key_prompt.value().strip()
-#                     if self.api_env_var and key:
-#                         os.environ[self.api_env_var] = key
-#                     self._show_chat()
-#                 elif event.button.id == "send":
-#                     text = self.composer.input.value.strip()
-#                     if text:
-#                         self.composer.input.value = ""
-#                         await self._send_and_receive(text)
-#         async def on_input_submitted(self, event: Input.Submitted):  # type: ignore[override]
-#             if isinstance(event.input.parent, ChatInput):
-#                 text = event.value.strip()
-#                 if text:
-#                     self.composer.input.value = ""
-#                     await self._send_and_receive(text)
-# def run_interactive(model: Optional[str], api_key: Optional[str]):
-#     if not TEXTUAL_AVAILABLE:
-#         sys.stderr.write(
-#             "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
-#         )
-#         sys.exit(2)
-#     app = DelugeApp(model, api_key)  # type: ignore[name-defined]
-#     app.run()
-# def main():
-#     parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
-#     parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
-#     parser.add_argument("--model", dest="model", help="Model ID to use")
-#     parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
-#     parser.add_argument(
-#         "-p",
-#         dest="print_mode",
-#         action="store_true",
-#         help="Print single completion to stdout (non-interactive)",
-#     )
-#     args = parser.parse_args()
-#     if args.print_mode:
-#         # Determine prompt text
-#         prompt_text = " ".join(args.prompt).strip()
-#         if not prompt_text and not sys.stdin.isatty():
-#             prompt_text = sys.stdin.read()
-#         if not prompt_text:
-#             sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
-#             sys.exit(2)
-#         # Determine model
-#         model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
-#         # Require API key non-interactively if provider needs it and not set
-#         env_var = APIModel.from_registry(model_id).api_key_env_var or ""
-#         if env_var and not (os.getenv(env_var) or args.api_key):
-#             sys.stderr.write(
-#                 f"Missing API key. Set {env_var} or pass --api-key.\n"
-#             )
-#             sys.exit(2)
-#         run_non_interactive(model_id, prompt_text, args.api_key)
-#         return
-#     # Interactive Textual chat
-#     run_interactive(args.model, args.api_key)
-# if __name__ == "__main__":
-#     main()
+"""
+LM-Deluge CLI
+Usage:
+    deluge list [--provider PROVIDER] [--name NAME] [--json] ...
+    deluge run MODEL [--input INPUT | --file FILE] [--max-tokens N] [--temperature T] ...
+    deluge agent MODEL [--mcp-config FILE] [--prefab TOOLS] [--input INPUT] ...
+Examples:
+    deluge list
+    deluge list --provider anthropic --reasoning
+    deluge list --name claude --json
+    deluge run claude-3.5-haiku -i "What is 2+2?"
+    echo "Hello" | deluge run gpt-4.1-mini
+    deluge run claude-4-sonnet --file prompt.txt --max-tokens 4096
+    deluge agent claude-3.5-haiku --mcp-config mcp.json -i "Search for AI news"
+    deluge agent claude-4-sonnet --prefab todo,memory -i "Create a task list"
+"""
+from __future__ import annotations
+import argparse
+import asyncio
+import json
+import sys
+from typing import Any
+from .models import find_models, APIModel
+from .client import LLMClient
+from .prompt import Conversation
+def _model_to_dict(model: APIModel) -> dict[str, Any]:
+    """Convert APIModel to a JSON-serializable dict."""
+    return {
+        "id": model.id,
+        "name": model.name,
+        "provider": model.provider,
+        "api_spec": model.api_spec,
+        "input_cost": model.input_cost,
+        "output_cost": model.output_cost,
+        "supports_json": model.supports_json,
+        "supports_images": model.supports_images,
+        "supports_logprobs": model.supports_logprobs,
+        "reasoning_model": model.reasoning_model,
+    }
+def cmd_list(args: argparse.Namespace) -> int:
+    """List models matching the given criteria."""
+    # Convert boolean flags: only pass True if set, None otherwise
+    models = find_models(
+        provider=args.provider,
+        supports_json=True if args.json_mode else None,
+        supports_images=True if args.images else None,
+        supports_logprobs=True if args.logprobs else None,
+        reasoning_model=True if args.reasoning else None,
+        min_input_cost=args.min_input_cost,
+        max_input_cost=args.max_input_cost,
+        min_output_cost=args.min_output_cost,
+        max_output_cost=args.max_output_cost,
+        name_contains=args.name,
+        sort_by=args.sort,
+        limit=args.limit,
+    )
+    if args.json:
+        output = [_model_to_dict(m) for m in models]
+        print(json.dumps(output, indent=2))
+    else:
+        if not models:
+            print("No models found matching criteria.", file=sys.stderr)
+            return 0
+        # Calculate column widths
+        id_width = max(len(m.id) for m in models)
+        provider_width = max(len(m.provider) for m in models)
+        # Header
+        print(
+            f"{'MODEL':<{id_width}}  {'PROVIDER':<{provider_width}}  {'INPUT $/M':>10}  {'OUTPUT $/M':>10}  FLAGS"
+        )
+        print("-" * (id_width + provider_width + 40))
+        for m in models:
+            flags = []
+            if m.supports_json:
+                flags.append("json")
+            if m.supports_images:
+                flags.append("img")
+            if m.supports_logprobs:
+                flags.append("logp")
+            if m.reasoning_model:
+                flags.append("reason")
+            input_cost = f"${m.input_cost:.2f}" if m.input_cost is not None else "N/A"
+            output_cost = (
+                f"${m.output_cost:.2f}" if m.output_cost is not None else "N/A"
+            )
+            print(
+                f"{m.id:<{id_width}}  {m.provider:<{provider_width}}  {input_cost:>10}  {output_cost:>10}  {','.join(flags)}"
+            )
+        print(f"\nTotal: {len(models)} models")
+    return 0
+def cmd_run(args: argparse.Namespace) -> int:
+    """Run a model on input and output JSON to stdout."""
+    # Determine input text
+    if args.input:
+        prompt_text = args.input
+    elif args.file:
+        try:
+            with open(args.file, "r") as f:
+                prompt_text = f.read()
+        except FileNotFoundError:
+            print(
+                json.dumps({"error": f"File not found: {args.file}"}), file=sys.stdout
+            )
+            return 1
+        except Exception as e:
+            print(json.dumps({"error": f"Failed to read file: {e}"}), file=sys.stdout)
+            return 1
+    elif not sys.stdin.isatty():
+        prompt_text = sys.stdin.read()
+    else:
+        print(
+            json.dumps(
+                {"error": "No input provided. Use --input, --file, or pipe to stdin."}
+            ),
+            file=sys.stdout,
+        )
+        return 1
+    if not prompt_text.strip():
+        print(json.dumps({"error": "Empty input provided."}), file=sys.stdout)
+        return 1
+    # Build conversation
+    image = args.image if hasattr(args, "image") else None
+    if args.system:
+        conv = Conversation().system(args.system).user(prompt_text, image=image)
+    else:
+        conv = Conversation().user(prompt_text, image=image)
+    # Build client params
+    client_kwargs: dict[str, Any] = {
+        "model_names": args.model,
+        "max_new_tokens": args.max_tokens,
+    }
+    if args.temperature is not None:
+        client_kwargs["temperature"] = args.temperature
+    try:
+        client = LLMClient(**client_kwargs)
+        client.open(show_progress=False)
+        response = asyncio.run(client.start(conv))
+    except ValueError as e:
+        print(json.dumps({"error": str(e)}), file=sys.stdout)
+        return 1
+    except Exception as e:
+        print(json.dumps({"error": f"Request failed: {e}"}), file=sys.stdout)
+        return 1
+    # Build output
+    output: dict[str, Any] = {
+        "model": args.model,
+        "completion": response.completion if response.completion else None,
+        "is_error": response.is_error,
+    }
+    if response.is_error:
+        output["error_message"] = response.error_message
+    if response.usage:
+        output["usage"] = {
+            "input_tokens": response.usage.input_tokens,
+            "output_tokens": response.usage.output_tokens,
+        }
+    if response.cost is not None:
+        output["cost"] = response.cost
+    if args.verbose and response.finish_reason:
+        output["finish_reason"] = response.finish_reason
+    print(json.dumps(output, indent=2 if args.pretty else None))
+    return 0 if not response.is_error else 1
+def _print_json(obj: dict[str, Any]) -> None:
+    """Print JSON and flush immediately for streaming."""
+    print(json.dumps(obj), flush=True)
+def cmd_agent(args: argparse.Namespace) -> int:
+    """Run an agent loop with tools and output JSON blocks for each content piece."""
+    from .tool import Tool, MCPServer
+    from .prompt.text import Text
+    from .prompt.tool_calls import ToolCall
+    from .prompt.thinking import Thinking
+    # Determine input text
+    if args.input:
+        prompt_text = args.input
+    elif args.file:
+        try:
+            with open(args.file, "r") as f:
+                prompt_text = f.read()
+        except FileNotFoundError:
+            _print_json({"type": "error", "error": f"File not found: {args.file}"})
+            return 1
+        except Exception as e:
+            _print_json({"type": "error", "error": f"Failed to read file: {e}"})
+            return 1
+    elif not sys.stdin.isatty():
+        prompt_text = sys.stdin.read()
+    else:
+        _print_json(
+            {
+                "type": "error",
+                "error": "No input provided. Use --input, --file, or pipe to stdin.",
+            }
+        )
+        return 1
+    if not prompt_text.strip():
+        _print_json({"type": "error", "error": "Empty input provided."})
+        return 1
+    def print_message_parts(msg_role: str, parts: list) -> None:
+        """Print JSON for each part of a message."""
+        for part in parts:
+            if isinstance(part, Text):
+                _print_json({"type": "text", "role": msg_role, "content": part.text})
+            elif isinstance(part, ToolCall):
+                _print_json(
+                    {
+                        "type": "tool_call",
+                        "id": part.id,
+                        "name": part.name,
+                        "arguments": part.arguments,
+                    }
+                )
+            elif isinstance(part, Thinking):
+                _print_json({"type": "thinking", "content": part.content})
+    async def run_agent() -> int:
+        tools: list[Any] = []
+        tool_map: dict[str, Tool] = {}
+        # Load MCP tools from config
+        if args.mcp_config:
+            try:
+                import json5
+                with open(args.mcp_config, "r") as f:
+                    mcp_config = json5.load(f)
+                # URL-based servers -> MCPServer objects (provider-native)
+                mcp_servers = MCPServer.from_mcp_config(mcp_config)
+                tools.extend(mcp_servers)
+                # Expand MCP servers to tools for local execution
+                for server in mcp_servers:
+                    server_tools = await server.to_tools()
+                    for t in server_tools:
+                        tool_map[t.name] = t
+                # Command-based servers -> Tool objects (local execution)
+                cmd_tools = await Tool.from_mcp_config(mcp_config)
+                tools.extend(cmd_tools)
+                for t in cmd_tools:
+                    tool_map[t.name] = t
+            except FileNotFoundError:
+                _print_json(
+                    {
+                        "type": "error",
+                        "error": f"MCP config not found: {args.mcp_config}",
+                    }
+                )
+                return 1
+            except Exception as e:
+                _print_json(
+                    {"type": "error", "error": f"Failed to load MCP config: {e}"}
+                )
+                return 1
+        # Load prefab tools
+        if args.prefab:
+            prefab_names = [p.strip() for p in args.prefab.split(",")]
+            for name in prefab_names:
+                try:
+                    prefab_tools: list[Tool] = []
+                    if name == "todo":
+                        from .tool.prefab import TodoManager
+                        prefab_tools = TodoManager().get_tools()
+                    elif name == "memory":
+                        from .tool.prefab.memory import MemoryManager
+                        prefab_tools = MemoryManager().get_tools()
+                    elif name == "filesystem":
+                        from .tool.prefab import FilesystemManager
+                        prefab_tools = FilesystemManager().get_tools()
+                    elif name == "sandbox":
+                        import platform
+                        if platform.system() == "Darwin":
+                            from .tool.prefab.sandbox import SeatbeltSandbox
+                            sandbox = SeatbeltSandbox()
+                            await sandbox.__aenter__()
+                            prefab_tools = sandbox.get_tools()
+                        else:
+                            from .tool.prefab.sandbox import DockerSandbox
+                            sandbox = DockerSandbox()
+                            await sandbox.__aenter__()
+                            prefab_tools = sandbox.get_tools()
+                    else:
+                        _print_json(
+                            {
+                                "type": "error",
+                                "error": f"Unknown prefab tool: {name}. Available: todo, memory, filesystem, sandbox",
+                            }
+                        )
+                        return 1
+                    tools.extend(prefab_tools)
+                    for t in prefab_tools:
+                        tool_map[t.name] = t
+                except ImportError as e:
+                    _print_json(
+                        {
+                            "type": "error",
+                            "error": f"Failed to load prefab '{name}': {e}",
+                        }
+                    )
+                    return 1
+        # Build conversation
+        image = args.image if hasattr(args, "image") else None
+        if args.system:
+            conv = Conversation().system(args.system).user(prompt_text, image=image)
+        else:
+            conv = Conversation().user(prompt_text, image=image)
+        # Print initial user message
+        _print_json({"type": "text", "role": "user", "content": prompt_text})
+        # Build client
+        client_kwargs: dict[str, Any] = {
+            "model_names": args.model,
+            "max_new_tokens": args.max_tokens,
+        }
+        if args.temperature is not None:
+            client_kwargs["temperature"] = args.temperature
+        try:
+            client = LLMClient(**client_kwargs)
+            client.open(show_progress=False)
+            # Manual agent loop with streaming output
+            total_usage = {"input_tokens": 0, "output_tokens": 0}
+            total_cost = 0.0
+            last_response = None
+            round_num = 0
+            for round_num in range(args.max_rounds):
+                # Get model response
+                response = await client.start(conv, tools=tools)
+                last_response = response
+                if response.is_error:
+                    _print_json({"type": "error", "error": response.error_message})
+                    break
+                # Track usage
+                if response.usage:
+                    total_usage["input_tokens"] += response.usage.input_tokens or 0
+                    total_usage["output_tokens"] += response.usage.output_tokens or 0
+                if response.cost:
+                    total_cost += response.cost
+                # Print assistant response parts
+                if response.content:
+                    print_message_parts("assistant", response.content.parts)
+                    # Check for tool calls
+                    tool_calls = response.content.tool_calls
+                    if not tool_calls:
+                        # No tool calls, we're done
+                        break
+                    # Add assistant message to conversation
+                    conv = conv.add(response.content)
+                    # Execute tool calls and print results
+                    for call in tool_calls:
+                        tool_obj = tool_map.get(call.name)
+                        if tool_obj:
+                            try:
+                                result = await tool_obj.acall(**call.arguments)
+                                result_str = (
+                                    result
+                                    if isinstance(result, str)
+                                    else json.dumps(result)
+                                )
+                            except Exception as e:
+                                result_str = f"Error: {e}"
+                        else:
+                            result_str = f"Error: Unknown tool '{call.name}'"
+                        _print_json(
+                            {
+                                "type": "tool_result",
+                                "tool_call_id": call.id,
+                                "name": call.name,
+                                "result": result_str,
+                            }
+                        )
+                        # Add tool result to conversation
+                        conv = conv.with_tool_result(call.id, result_str)
+                else:
+                    # No content, we're done
+                    break
+            # Final summary
+            done_output: dict[str, Any] = {"type": "done", "rounds": round_num + 1}
+            if total_usage["input_tokens"] or total_usage["output_tokens"]:
+                done_output["usage"] = total_usage
+            if total_cost > 0:
+                done_output["cost"] = total_cost
+            if last_response and last_response.is_error:
+                done_output["error"] = last_response.error_message
+            _print_json(done_output)
+            return 0 if (last_response and not last_response.is_error) else 1
+        except ValueError as e:
+            _print_json({"type": "error", "error": str(e)})
+            return 1
+        except Exception as e:
+            _print_json({"type": "error", "error": f"Agent loop failed: {e}"})
+            return 1
+    return asyncio.run(run_agent())
+def main():
+    parser = argparse.ArgumentParser(
+        prog="deluge",
+        description="LM-Deluge CLI - Run and manage LLM models",
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+    # ---- list command ----
+    list_parser = subparsers.add_parser(
+        "list",
+        help="List available models",
+        description="List and filter available models in the registry",
+    )
+    list_parser.add_argument(
+        "--provider",
+        type=str,
+        help="Filter by provider/api_spec (e.g., openai, anthropic, google)",
+    )
+    list_parser.add_argument(
+        "--name",
+        type=str,
+        help="Filter by substring in model ID (case-insensitive)",
+    )
+    list_parser.add_argument(
+        "--json-mode",
+        action="store_true",
+        dest="json_mode",
+        help="Only show models that support JSON mode",
+    )
+    list_parser.add_argument(
+        "--images",
+        action="store_true",
+        help="Only show models that support image inputs",
+    )
+    list_parser.add_argument(
+        "--logprobs",
+        action="store_true",
+        help="Only show models that support logprobs",
+    )
+    list_parser.add_argument(
+        "--reasoning",
+        action="store_true",
+        help="Only show reasoning models",
+    )
+    list_parser.add_argument(
+        "--min-input-cost",
+        type=float,
+        help="Minimum input cost ($ per million tokens)",
+    )
+    list_parser.add_argument(
+        "--max-input-cost",
+        type=float,
+        help="Maximum input cost ($ per million tokens)",
+    )
+    list_parser.add_argument(
+        "--min-output-cost",
+        type=float,
+        help="Minimum output cost ($ per million tokens)",
+    )
+    list_parser.add_argument(
+        "--max-output-cost",
+        type=float,
+        help="Maximum output cost ($ per million tokens)",
+    )
+    list_parser.add_argument(
+        "--sort",
+        type=str,
+        choices=["input_cost", "output_cost", "-input_cost", "-output_cost"],
+        help="Sort by cost (prefix with - for descending)",
+    )
+    list_parser.add_argument(
+        "--limit",
+        type=int,
+        help="Maximum number of results",
+    )
+    list_parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Output as JSON",
+    )
+    list_parser.set_defaults(func=cmd_list)
+    # ---- run command ----
+    run_parser = subparsers.add_parser(
+        "run",
+        help="Run a model on input",
+        description="Run a model on input and output JSON to stdout",
+    )
+    run_parser.add_argument(
+        "model",
+        type=str,
+        help="Model ID to use (e.g., claude-3.5-haiku, gpt-4.1-mini)",
+    )
+    input_group = run_parser.add_mutually_exclusive_group()
+    input_group.add_argument(
+        "--input",
+        "-i",
+        type=str,
+        help="Input text (inline)",
+    )
+    input_group.add_argument(
+        "--file",
+        "-f",
+        type=str,
+        help="Read input from file",
+    )
+    run_parser.add_argument(
+        "--system",
+        "-s",
+        type=str,
+        help="System prompt",
+    )
+    run_parser.add_argument(
+        "--image",
+        type=str,
+        help="Path to image file to include with the prompt",
+    )
+    run_parser.add_argument(
+        "--max-tokens",
+        "-m",
+        type=int,
+        default=1024,
+        help="Maximum tokens to generate (default: 1024)",
+    )
+    run_parser.add_argument(
+        "--temperature",
+        "-t",
+        type=float,
+        help="Sampling temperature",
+    )
+    run_parser.add_argument(
+        "--pretty",
+        "-p",
+        action="store_true",
+        help="Pretty-print JSON output",
+    )
+    run_parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Include additional response metadata",
+    )
+    run_parser.set_defaults(func=cmd_run)
+    # ---- agent command ----
+    agent_parser = subparsers.add_parser(
+        "agent",
+        help="Run an agent loop with tools",
+        description="Run an agent loop with MCP servers and/or prefab tools",
+    )
+    agent_parser.add_argument(
+        "model",
+        type=str,
+        help="Model ID to use (e.g., claude-3.5-haiku, gpt-4.1-mini)",
+    )
+    agent_input_group = agent_parser.add_mutually_exclusive_group()
+    agent_input_group.add_argument(
+        "--input",
+        "-i",
+        type=str,
+        help="Input text (inline)",
+    )
+    agent_input_group.add_argument(
+        "--file",
+        "-f",
+        type=str,
+        help="Read input from file",
+    )
+    agent_parser.add_argument(
+        "--system",
+        "-s",
+        type=str,
+        help="System prompt",
+    )
+    agent_parser.add_argument(
+        "--image",
+        type=str,
+        help="Path to image file to include with the prompt",
+    )
+    agent_parser.add_argument(
+        "--mcp-config",
+        type=str,
+        help="Path to MCP config file (Claude Desktop format JSON)",
+    )
+    agent_parser.add_argument(
+        "--prefab",
+        type=str,
+        help="Comma-separated prefab tools: todo,memory,filesystem,sandbox",
+    )
+    agent_parser.add_argument(
+        "--max-rounds",
+        type=int,
+        default=10,
+        help="Maximum agent loop iterations (default: 10)",
+    )
+    agent_parser.add_argument(
+        "--max-tokens",
+        "-m",
+        type=int,
+        default=4096,
+        help="Maximum tokens to generate per response (default: 4096)",
+    )
+    agent_parser.add_argument(
+        "--temperature",
+        "-t",
+        type=float,
+        help="Sampling temperature",
+    )
+    agent_parser.set_defaults(func=cmd_agent)
+    args = parser.parse_args()
+    if not args.command:
+        parser.print_help()
+        return 0
+    return args.func(args)
+if __name__ == "__main__":
+    sys.exit(main())

lm-deluge 0.0.89__py3-none-any.whl → 0.0.91__py3-none-any.whl

lm-deluge 0.0.89py3-none-any.whl → 0.0.91py3-none-any.whl