PyPI - lm-deluge - Versions diffs - 0.0.85__tar.gz → 0.0.87__tar.gz - Mend

lm-deluge 0.0.85tar.gz → 0.0.87tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

{lm_deluge-0.0.85/src/lm_deluge.egg-info → lm_deluge-0.0.87}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.85
+Version: 0.0.87
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.85"
+version = "0.0.87"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

@@ -78,6 +78,10 @@ def _build_anthropic_request(
         if sampling_params.thinking_budget is not None:
             budget = sampling_params.thinking_budget
         elif sampling_params.reasoning_effort is not None:
+            effort = sampling_params.reasoning_effort
+            if effort == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+                effort = "high"
             # translate reasoning effort of low, medium, high to budget tokens
             budget = {
                 "none": 0,
@@ -85,7 +89,7 @@ def _build_anthropic_request(
                 "low": 1024,
                 "medium": 4096,
                 "high": 16384,
-            }.get(sampling_params.reasoning_effort)
+            }.get(effort)
             assert isinstance(budget, int)
         else:
             budget = 0

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

@@ -58,6 +58,10 @@ async def _build_gemini_request(
             maybe_warn("WARN_GEMINI3_NO_REASONING")
             effort = "low"
         else:
+            effort_key = sampling_params.reasoning_effort
+            if effort_key == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
+                effort_key = "high"
             level_map = {
                 "none": "low",
                 "minimal": "low",
@@ -65,7 +69,7 @@ async def _build_gemini_request(
                 "medium": "high",  # change when supported
                 "high": "high",
             }
-            effort = level_map[sampling_params.reasoning_effort]
+            effort = level_map[effort_key]
         thinking_config = {"thinkingLevel": effort}
         request_json["generationConfig"]["thinkingConfig"] = thinking_config
@@ -88,14 +92,18 @@ async def _build_gemini_request(
             # dynamic thinking
             thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
         elif sampling_params.reasoning_effort not in [None, "none"]:
+            effort_key = sampling_params.reasoning_effort
+            if effort_key == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
+                effort_key = "high"
             level_map = {
                 "minimal": 256,
                 "low": 1024,
                 "medium": 4096,
                 "high": 16384,
             }
-            assert sampling_params.reasoning_effort in level_map
-            budget = level_map[sampling_params.reasoning_effort]
+            assert effort_key in level_map
+            budget = level_map[effort_key]
             if "flash-lite" in model.id:
                 budget = max(budget, 512)
             thinking_config = {"includeThoughts": True, "thinkingBudget": budget}

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/openai.py RENAMED Viewed

@@ -61,8 +61,6 @@ async def _build_oa_chat_request(
     else:
         request_json["max_completion_tokens"] = sampling_params.max_new_tokens
     if model.reasoning_model:
-        request_json["temperature"] = 1.0
-        request_json["top_p"] = 1.0
         effort = sampling_params.reasoning_effort
         if effort in [None, "none"]:
             # Disable reasoning for Gemini models when no effort requested
@@ -79,6 +77,17 @@ async def _build_oa_chat_request(
         elif effort == "minimal" and "gpt-5" not in model.id:
             maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
             effort = "low"
+        # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
+        if effort == "xhigh" and not model.supports_xhigh:
+            maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+            effort = "high"
+        # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
+        if model.supports_xhigh and effort != "none":
+            del request_json["temperature"]
+            del request_json["top_p"]
+        else:
+            request_json["temperature"] = 1.0
+            request_json["top_p"] = 1.0
         request_json["reasoning_effort"] = effort
     else:
         if sampling_params.reasoning_effort:
@@ -323,8 +332,17 @@ async def _build_oa_responses_request(
         elif effort == "minimal" and "gpt-5" not in model.id:
             maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
             effort = "low"
-        request_json["temperature"] = 1.0
-        request_json["top_p"] = 1.0
+        # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
+        if effort == "xhigh" and not model.supports_xhigh:
+            maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+            effort = "high"
+        # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
+        if model.supports_xhigh and effort != "none":
+            del request_json["temperature"]
+            del request_json["top_p"]
+        else:
+            request_json["temperature"] = 1.0
+            request_json["top_p"] = 1.0
         request_json["reasoning"] = {
             "effort": effort,
             "summary": "auto",

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/client.py RENAMED Viewed

@@ -54,11 +54,12 @@ class _LLMClient(BaseModel):
     """
     _REASONING_SUFFIXES: ClassVar[
-        dict[str, Literal["low", "medium", "high", "minimal", "none"]]
+        dict[str, Literal["low", "medium", "high", "xhigh", "minimal", "none"]]
     ] = {
         "-low": "low",
         "-medium": "medium",
         "-high": "high",
+        "-xhigh": "xhigh",
         "-minimal": "minimal",
         "-none": "none",
     }
@@ -83,7 +84,9 @@ class _LLMClient(BaseModel):
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 512
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None
     global_effort: Literal["low", "medium", "high"] | None = None
     thinking_budget: int | None = None
     logprobs: bool = False
@@ -172,10 +175,13 @@ class _LLMClient(BaseModel):
     def _normalize_model_names(
         self, models: list[str]
     ) -> tuple[
-        list[str], list[Literal["low", "medium", "high", "minimal", "none"] | None]
+        list[str],
+        list[Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None],
     ]:
         normalized: list[str] = []
-        efforts: list[Literal["low", "medium", "high", "minimal", "none"] | None] = []
+        efforts: list[
+            Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
+        ] = []
         for name in models:
             base_name = self._preprocess_openrouter_model(name)
@@ -190,7 +196,7 @@ class _LLMClient(BaseModel):
     def _align_sampling_params(
         self,
         per_model_efforts: list[
-            Literal["low", "medium", "high", "minimal", "none"] | None
+            Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
         ],
     ) -> None:
         if len(per_model_efforts) < len(self.model_names):
@@ -364,7 +370,9 @@ class _LLMClient(BaseModel):
     @classmethod
     def _strip_reasoning_suffix_if_registered(
         cls, model_name: str
-    ) -> tuple[str, Literal["low", "medium", "high", "minimal", "none"] | None]:
+    ) -> tuple[
+        str, Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
+    ]:
         """Remove reasoning suffix only when the trimmed model already exists."""
         for suffix, effort in cls._REASONING_SUFFIXES.items():
             if model_name.endswith(suffix) and len(model_name) > len(suffix):
@@ -918,7 +926,7 @@ class _LLMClient(BaseModel):
                 if not isinstance(result, (str, dict, list)):
                     result = str(result)
-                conversation.with_tool_result(call.id, result)  # type: ignore
+                conversation = conversation.with_tool_result(call.id, result)  # type: ignore
         if response is None:
             raise RuntimeError("model did not return a response")
@@ -1168,7 +1176,9 @@ def LLMClient(
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None,
     global_effort: Literal["low", "medium", "high"] | None = None,
     thinking_budget: int | None = None,
     logprobs: bool = False,
@@ -1199,7 +1209,9 @@ def LLMClient(
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None,
     global_effort: Literal["low", "medium", "high"] | None = None,
     thinking_budget: int | None = None,
     logprobs: bool = False,
@@ -1229,7 +1241,9 @@ def LLMClient(
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None,
     global_effort: Literal["low", "medium", "high"] | None = None,
     thinking_budget: int | None = None,
     logprobs: bool = False,

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/config.py RENAMED Viewed

@@ -9,7 +9,9 @@ class SamplingParams(BaseModel):
     json_mode: bool = False
     max_new_tokens: int = 2_048
     global_effort: Literal["low", "medium", "high"] = "high"  # for opus-4.5
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None
     thinking_budget: int | None = None
     logprobs: bool = False
     top_logprobs: int | None = None

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/__init__.py RENAMED Viewed

@@ -40,6 +40,9 @@ class APIModel:
     supports_logprobs: bool = False
     supports_responses: bool = False
     reasoning_model: bool = False
+    supports_xhigh: bool = (
+        False  # supports xhigh reasoning_effort (gpt-5.2, gpt-5.1-codex-max)
+    )
     regions: list[str] | dict[str, int] = field(default_factory=list)
     # tokens_per_minute: int | None = None
     # requests_per_minute: int | None = None
@@ -99,6 +102,7 @@ def register_model(
     supports_logprobs: bool = False,
     supports_responses: bool = False,
     reasoning_model: bool = False,
+    supports_xhigh: bool = False,
     regions: list[str] | dict[str, int] = field(default_factory=list),
     # tokens_per_minute: int | None = None,
     # requests_per_minute: int | None = None,
@@ -118,6 +122,7 @@ def register_model(
         supports_logprobs=supports_logprobs,
         supports_responses=supports_responses,
         reasoning_model=reasoning_model,
+        supports_xhigh=supports_xhigh,
         regions=regions,
         # tokens_per_minute=tokens_per_minute,
         # requests_per_minute=requests_per_minute,

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/grok.py RENAMED Viewed

@@ -7,6 +7,30 @@ XAI_MODELS = {
     # 888    888 888    888  888 888888K
     # Y88b  d88P 888    Y88..88P 888 "88b
     #  "Y8888P88 888     "Y88P"  888  888
+    "grok-4.1-fast-reasoning": {
+        "id": "grok-4.1-fast-reasoning",
+        "name": "grok-4-1-fast-reasoning",
+        "api_base": "https://api.x.ai/v1",
+        "api_key_env_var": "GROK_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": True,
+        "api_spec": "openai",
+        "input_cost": 0.2,
+        "output_cost": 1.5,
+        "reasoning_model": False,
+    },
+    "grok-4.1-fast": {
+        "id": "grok-4.1-fast",
+        "name": "grok-4-1-fast-non-reasoning",
+        "api_base": "https://api.x.ai/v1",
+        "api_key_env_var": "GROK_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": True,
+        "api_spec": "openai",
+        "input_cost": 0.2,
+        "output_cost": 1.5,
+        "reasoning_model": False,
+    },
     "grok-code-fast-1": {
         "id": "grok-code-fast-1",
         "name": "grok-code-fast-1",

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/openai.py RENAMED Viewed

@@ -10,12 +10,42 @@ OPENAI_MODELS = {
     #                ░███
     #                █████
     #               ░░░░░
+    "gpt-5.2": {
+        "id": "gpt-5.2",
+        "name": "gpt-5.2",
+        "api_base": "https://api.openai.com/v1",
+        "api_key_env_var": "OPENAI_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "supports_responses": True,
+        "api_spec": "openai",
+        "input_cost": 1.75,
+        "cached_input_cost": 0.175,
+        "output_cost": 14.0,
+        "reasoning_model": True,
+        "supports_xhigh": True,
+    },
+    "gpt-5.1-codex-max": {
+        "id": "gpt-5.1-codex-max",
+        "name": "gpt-5.1-codex-max",
+        "api_base": "https://api.openai.com/v1",
+        "api_key_env_var": "OPENAI_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "supports_responses": True,
+        "api_spec": "openai",
+        "input_cost": 1.25,
+        "cached_input_cost": 0.125,
+        "output_cost": 10.0,
+        "reasoning_model": True,
+        "supports_xhigh": True,
+    },
     "gpt-5.1": {
         "id": "gpt-5.1",
         "name": "gpt-5.1",
         "api_base": "https://api.openai.com/v1",
         "api_key_env_var": "OPENAI_API_KEY",
-        "supports_json": False,
+        "supports_json": True,
         "supports_logprobs": True,
         "supports_responses": True,
         "api_spec": "openai",

{lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/openrouter.py RENAMED Viewed

@@ -1,13 +1,25 @@
 OPENROUTER_MODELS = {
+    "intellect-3-openrouter": {
+        "id": "intellect-3-openrouter",
+        "name": "prime-intellect/intellect-3",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key_env_var": "OPENROUTER_API_KEY",
+        "supports_json": True,
+        "api_spec": "openai",
+        "input_cost": 0.2,
+        "cached_input_cost": 0.2,
+        "cache_write_cost": 0.2,
+        "output_cost": 1.10,
+    },
     "glm-4.6-openrouter": {
         "id": "glm-4.6-openrouter",
-        "name": "z-ai/glm-4.6",
+        "name": "z-ai/glm-4.6:exacto",
         "api_base": "https://openrouter.ai/api/v1",
         "api_key_env_var": "OPENROUTER_API_KEY",
         "supports_json": True,
         "api_spec": "openai",
         "input_cost": 0.6,
-        "cached_input_cost": 0.11,
+        "cached_input_cost": 0.6,
         "cache_write_cost": 0.6,
         "output_cost": 2.20,
     },
@@ -35,9 +47,21 @@ OPENROUTER_MODELS = {
         "cache_write_cost": 0.23,
         "output_cost": 0.9,
     },
+    "deepseek-3.2-exp-openrouter": {
+        "id": "deepseek-3.2-exp-openrouter",
+        "name": "deepseek/deepseek-v3.2-exp",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key_env_var": "OPENROUTER_API_KEY",
+        "supports_json": True,
+        "api_spec": "openai",
+        "input_cost": 0.27,
+        "cached_input_cost": 0.27,
+        "cache_write_cost": 0.27,
+        "output_cost": 0.4,
+    },
     "deepseek-3.2-openrouter": {
         "id": "deepseek-3.2-openrouter",
-        "name": "deepseek/deepseek-v3.2-exp",
+        "name": "deepseek/deepseek-v3.2",
         "api_base": "https://openrouter.ai/api/v1",
         "api_key_env_var": "OPENROUTER_API_KEY",
         "supports_json": True,
@@ -47,19 +71,53 @@ OPENROUTER_MODELS = {
         "cache_write_cost": 0.27,
         "output_cost": 0.4,
     },
-    # "gpt-oss-20b-openrouter": {},
-    # "gpt-oss-120b-openrouter": {},
+    "gpt-oss-20b-openrouter": {
+        "id": "gpt-oss-20b-openrouter",
+        "name": "openai/gpt-oss-20b",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key_env_var": "OPENROUTER_API_KEY",
+        "supports_json": True,
+        "api_spec": "openai",
+        "input_cost": 0.04,
+        "cached_input_cost": 0.04,
+        "cache_write_cost": 0.04,
+        "output_cost": 0.18,
+    },
+    "gpt-oss-120b-openrouter": {
+        "id": "gpt-oss-120b-openrouter",
+        "name": "openai/gpt-oss-120b",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key_env_var": "OPENROUTER_API_KEY",
+        "supports_json": True,
+        "api_spec": "openai",
+        "input_cost": 0.05,
+        "cached_input_cost": 0.05,
+        "cache_write_cost": 0.05,
+        "output_cost": 0.45,
+    },
     "kimi-k2-openrouter": {
         "id": "kimi-k2-openrouter",
-        "name": "z-ai/glm-4.6",
+        "name": "moonshotai/kimi-k2-0905:exacto",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key_env_var": "OPENROUTER_API_KEY",
+        "supports_json": True,
+        "api_spec": "openai",
+        "input_cost": 1,
+        "cached_input_cost": 1,
+        "cache_write_cost": 1,
+        "output_cost": 3,
+    },
+    "kimi-k2-thinking-openrouter": {
+        "id": "kimi-k2-thinking-openrouter",
+        "name": "moonshotai/kimi-k2-thinking",
         "api_base": "https://openrouter.ai/api/v1",
         "api_key_env_var": "OPENROUTER_API_KEY",
         "supports_json": True,
         "api_spec": "openai",
         "input_cost": 0.6,
-        "cached_input_cost": 0.11,
+        "cached_input_cost": 0.6,
         "cache_write_cost": 0.6,
-        "output_cost": 2.20,
+        "output_cost": 2.5,
     },
     "olmo-3-32b-think-openrouter": {
         "id": "olmo-3-32b-think-openrouter",

lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/__init__.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""
+GEPA (Genetic Pareto) prompt optimizer for lm-deluge.
+This module provides an evolutionary optimizer for text components in AI systems.
+It analyzes whole trajectories to propose improvements to prompts, tool descriptions,
+and other text-based configuration.
+Example usage:
+    from lm_deluge import LLMClient
+    from lm_deluge.prompt import Conversation, Message
+    from lm_deluge.pipelines.gepa import Component, EvalResult, optimize
+    # Define components to optimize
+    components = {
+        "system_prompt": Component(
+            description="Instructions given to the model",
+            value="You are a helpful assistant.",
+        ),
+    }
+    # Define how to evaluate one example
+    def evaluate(client: LLMClient, values: dict[str, str], example: dict) -> EvalResult:
+        # Build prompt with current component values
+        conv = Conversation.system(values["system_prompt"])
+        conv = conv.add(Message.user(example["question"]))
+        # Run inference
+        response = client.process_prompts_sync([conv], show_progress=False)[0]
+        answer = response.completion
+        # Score the result
+        correct = example["answer"].lower() in answer.lower()
+        score = 1.0 if correct else 0.0
+        # Build feedback for the proposer
+        feedback = f"Score: {score}. Expected: {example['answer']}"
+        # Return full trajectory
+        full_conv = conv.add(Message.ai(answer))
+        return EvalResult(conversation=full_conv, score=score, feedback=feedback)
+    # Run optimization
+    result = optimize(
+        components=components,
+        evaluate_fn=evaluate,
+        dataset=train_examples,
+        task_client=LLMClient("gpt-4o-mini"),
+        proposer_client=LLMClient("gpt-4o"),
+        max_iterations=50,
+    )
+    print(f"Best score: {result.best_score}")
+    print(f"Best prompt: {result.best_candidate['system_prompt']}")
+"""
+from lm_deluge.pipelines.gepa.core import (
+    Component,
+    EvalResult,
+    GEPAResult,
+    GEPAState,
+    Proposal,
+)
+from lm_deluge.pipelines.gepa.optimizer import GEPAEngine, optimize
+from lm_deluge.pipelines.gepa.proposer import (
+    DEFAULT_PROPOSAL_PROMPT,
+    build_proposal_prompt,
+    parse_proposal_response,
+    propose_improvement_sync,
+)
+from lm_deluge.pipelines.gepa.util import (
+    extract_text_from_response,
+    format_components_for_prompt,
+    format_conversation_compact,
+)
+__all__ = [
+    # Core types
+    "Component",
+    "EvalResult",
+    "Proposal",
+    "GEPAState",
+    "GEPAResult",
+    # Main API
+    "optimize",
+    "GEPAEngine",
+    # Proposer utilities
+    "DEFAULT_PROPOSAL_PROMPT",
+    "build_proposal_prompt",
+    "parse_proposal_response",
+    "propose_improvement_sync",
+    # Formatting utilities
+    "format_conversation_compact",
+    "format_components_for_prompt",
+    "extract_text_from_response",
+]

lm-deluge 0.0.85__tar.gz → 0.0.87__tar.gz

lm-deluge 0.0.85tar.gz → 0.0.87tar.gz