lm-deluge 0.0.79__py3-none-any.whl → 0.0.80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@ from lm_deluge.util.schema import (
16
16
  prepare_output_schema,
17
17
  transform_schema_for_anthropic,
18
18
  )
19
+ from lm_deluge.warnings import maybe_warn
19
20
 
20
21
  from ..models import APIModel
21
22
  from .base import APIRequestBase, APIResponse
@@ -62,20 +63,45 @@ def _build_anthropic_request(
62
63
  "max_tokens": sampling_params.max_new_tokens,
63
64
  }
64
65
 
66
+ if model.id == "claude-4.5-opus" and sampling_params.global_effort:
67
+ request_json["effort"] = sampling_params.global_effort
68
+ _add_beta(base_headers, "effort-2025-11-24")
69
+
65
70
  # handle thinking
66
- if model.reasoning_model and sampling_params.reasoning_effort:
67
- # translate reasoning effort of low, medium, high to budget tokens
68
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
69
- sampling_params.reasoning_effort
70
- )
71
- request_json["thinking"] = {
72
- "type": "enabled",
73
- "budget_tokens": budget,
74
- }
75
- if "top_p" in request_json:
76
- request_json["top_p"] = max(request_json["top_p"], 0.95)
77
- request_json["temperature"] = 1.0
78
- request_json["max_tokens"] += budget
71
+ if model.reasoning_model:
72
+ if (
73
+ sampling_params.thinking_budget is not None
74
+ and sampling_params.reasoning_effort is not None
75
+ ):
76
+ maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
77
+
78
+ if sampling_params.thinking_budget is not None:
79
+ budget = sampling_params.thinking_budget
80
+ elif sampling_params.reasoning_effort is not None:
81
+ # translate reasoning effort of low, medium, high to budget tokens
82
+ budget = {
83
+ "none": 0,
84
+ "minimal": 256,
85
+ "low": 1024,
86
+ "medium": 4096,
87
+ "high": 16384,
88
+ }.get(sampling_params.reasoning_effort)
89
+ assert isinstance(budget, int)
90
+ else:
91
+ budget = 0
92
+
93
+ if budget > 0:
94
+ request_json["thinking"] = {
95
+ "type": "enabled",
96
+ "budget_tokens": budget,
97
+ }
98
+ if "top_p" in request_json:
99
+ request_json["top_p"] = max(request_json["top_p"], 0.95)
100
+ request_json["temperature"] = 1.0
101
+ request_json["max_tokens"] += budget
102
+ else:
103
+ request_json["thinking"] = {"type": "disabled"}
104
+
79
105
  else:
80
106
  request_json["thinking"] = {"type": "disabled"}
81
107
  if sampling_params.reasoning_effort:
@@ -83,10 +109,11 @@ def _build_anthropic_request(
83
109
  if system_message is not None:
84
110
  request_json["system"] = system_message
85
111
 
86
- # handle temp + top_p for opus 4.1/sonnet 4.5
112
+ # handle temp + top_p for opus 4.1/sonnet 4.5.
113
+ # TODO: make clearer / more user-friendly so there can be NotGiven
114
+ # and user can control which one they want to use
87
115
  if "4-1" in model.name or "4-5" in model.name:
88
- if "temperature" in request_json and "top_p" in request_json:
89
- request_json.pop("top_p")
116
+ request_json.pop("top_p")
90
117
 
91
118
  # Handle structured outputs (output_format)
92
119
  if context.output_schema:
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import os
3
- from typing import Any
4
3
 
5
4
  from aiohttp import ClientResponse
6
5
 
@@ -52,47 +51,61 @@ async def _build_gemini_request(
52
51
  request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
53
52
 
54
53
  # Handle reasoning models (thinking)
55
- if model.reasoning_model:
56
- thinking_config: dict[str, Any] | None = None
57
- effort = sampling_params.reasoning_effort
58
- is_gemini_3 = "gemini-3" in model.name.lower()
54
+ is_gemini_3 = "gemini-3" in model.name.lower()
55
+ if is_gemini_3:
56
+ # gemini3 MUST think
57
+ if not sampling_params.reasoning_effort:
58
+ maybe_warn("WARN_GEMINI3_NO_REASONING")
59
+ effort = "low"
60
+ else:
61
+ level_map = {
62
+ "none": "low",
63
+ "minimal": "low",
64
+ "low": "low",
65
+ "medium": "high", # change when supported
66
+ "high": "high",
67
+ }
68
+ effort = level_map[sampling_params.reasoning_effort]
69
+ thinking_config = {"thinkingLevel": effort}
70
+ request_json["generationConfig"]["thinkingConfig"] = thinking_config
59
71
 
60
- if is_gemini_3:
61
- # Gemini 3 uses thinkingLevel instead of thinkingBudget
62
- if effort in {"none", "minimal"}:
63
- thinking_config = {"thinkingLevel": "low"}
64
- elif effort is None:
65
- # Default to high when reasoning is enabled but no preference was provided
66
- thinking_config = {"thinkingLevel": "high"}
67
- else:
68
- # Map reasoning_effort to thinkingLevel
69
- level_map = {
70
- "minimal": "low",
71
- "low": "low",
72
- "medium": "medium", # Will work when supported
73
- "high": "high",
74
- }
75
- thinking_level = level_map.get(effort, "high")
76
- thinking_config = {"thinkingLevel": thinking_level}
72
+ elif model.reasoning_model:
73
+ if (
74
+ sampling_params.thinking_budget is not None
75
+ and sampling_params.reasoning_effort is not None
76
+ ):
77
+ maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
78
+
79
+ if (
80
+ sampling_params.thinking_budget is not None
81
+ and sampling_params.thinking_budget > 0
82
+ ):
83
+ thinking_config = {
84
+ "includeThoughts": True,
85
+ "thinkingBudget": sampling_params.thinking_budget,
86
+ }
87
+ elif sampling_params.thinking_budget == -1:
88
+ # dynamic thinking
89
+ thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
90
+ elif sampling_params.reasoning_effort not in [None, "none"]:
91
+ level_map = {
92
+ "minimal": 256,
93
+ "low": 1024,
94
+ "medium": 4096,
95
+ "high": 16384,
96
+ }
97
+ assert sampling_params.reasoning_effort in level_map
98
+ budget = level_map[sampling_params.reasoning_effort]
99
+ if "flash-lite" in model.id:
100
+ budget = max(budget, 512)
101
+ thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
102
+ elif "2.5-pro" in model.id:
103
+ # 2.5 pro must think.
104
+ thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
77
105
  else:
78
- # Gemini 2.5 uses thinkingBudget (legacy)
79
- if effort is None or effort == "none":
80
- budget = 128 if "2.5-pro" in model.id else 0
81
- # Explicitly disable thoughts when no effort is requested
82
- thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
83
- else:
84
- thinking_config = {"includeThoughts": True}
85
- if (
86
- effort in {"minimal", "low", "medium", "high"}
87
- and "flash" in model.id
88
- ):
89
- budget = {
90
- "minimal": 256,
91
- "low": 1024,
92
- "medium": 4096,
93
- "high": 16384,
94
- }[effort]
95
- thinking_config["thinkingBudget"] = budget
106
+ # no thoughts head empty
107
+ thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
108
+
96
109
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
97
110
 
98
111
  else:
lm_deluge/client.py CHANGED
@@ -79,7 +79,7 @@ class _LLMClient(BaseModel):
79
79
  background: bool = False
80
80
  # sampling params - if provided, and sampling_params is not,
81
81
  # these override the defaults
82
- temperature: float = 0.75
82
+ temperature: float = 1.0
83
83
  top_p: float = 1.0
84
84
  json_mode: bool = False
85
85
  max_new_tokens: int = 512
@@ -337,7 +337,7 @@ class _LLMClient(BaseModel):
337
337
  if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
338
338
  data["sampling_params"] = [
339
339
  SamplingParams(
340
- temperature=data.get("temperature", 0.75),
340
+ temperature=data.get("temperature", 1.0),
341
341
  top_p=data.get("top_p", 1.0),
342
342
  json_mode=data.get("json_mode", False),
343
343
  max_new_tokens=data.get("max_new_tokens", 512),
@@ -1067,7 +1067,7 @@ def LLMClient(
1067
1067
  extra_headers: dict[str, str] | None = None,
1068
1068
  use_responses_api: bool = False,
1069
1069
  background: bool = False,
1070
- temperature: float = 0.75,
1070
+ temperature: float = 1.0,
1071
1071
  top_p: float = 1.0,
1072
1072
  json_mode: bool = False,
1073
1073
  max_new_tokens: int = 512,
@@ -1096,7 +1096,7 @@ def LLMClient(
1096
1096
  extra_headers: dict[str, str] | None = None,
1097
1097
  use_responses_api: bool = False,
1098
1098
  background: bool = False,
1099
- temperature: float = 0.75,
1099
+ temperature: float = 1.0,
1100
1100
  top_p: float = 1.0,
1101
1101
  json_mode: bool = False,
1102
1102
  max_new_tokens: int = 512,
@@ -1124,7 +1124,7 @@ def LLMClient(
1124
1124
  extra_headers: dict[str, str] | None = None,
1125
1125
  use_responses_api: bool = False,
1126
1126
  background: bool = False,
1127
- temperature: float = 0.75,
1127
+ temperature: float = 1.0,
1128
1128
  top_p: float = 1.0,
1129
1129
  json_mode: bool = False,
1130
1130
  max_new_tokens: int = 512,
lm_deluge/config.py CHANGED
@@ -4,11 +4,13 @@ from pydantic import BaseModel
4
4
 
5
5
 
6
6
  class SamplingParams(BaseModel):
7
- temperature: float = 0.0
7
+ temperature: float = 1.0 # more typical for new models
8
8
  top_p: float = 1.0
9
9
  json_mode: bool = False
10
10
  max_new_tokens: int = 2_048
11
+ global_effort: Literal["low", "medium", "high"] = "high" # for opus-4.5
11
12
  reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
13
+ thinking_budget: int | None = None
12
14
  logprobs: bool = False
13
15
  top_logprobs: int | None = None
14
16
  strict_tools: bool = True
@@ -10,6 +10,19 @@ ANTHROPIC_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  #
13
+ "claude-4.5-opus": {
14
+ "id": "claude-4.5-opus",
15
+ "name": "claude-opus-4-5-20251101",
16
+ "api_base": "https://api.anthropic.com/v1",
17
+ "api_key_env_var": "ANTHROPIC_API_KEY",
18
+ "supports_json": False,
19
+ "api_spec": "anthropic",
20
+ "input_cost": 5.0,
21
+ "cached_input_cost": 0.50,
22
+ "cache_write_cost": 6.25,
23
+ "output_cost": 25.0,
24
+ "reasoning_model": True,
25
+ },
13
26
  "claude-4.5-haiku": {
14
27
  "id": "claude-4.5-haiku",
15
28
  "name": "claude-haiku-4-5-20251001",
@@ -21,6 +34,7 @@ ANTHROPIC_MODELS = {
21
34
  "cached_input_cost": 0.10,
22
35
  "cache_write_cost": 1.25,
23
36
  "output_cost": 3.0,
37
+ "reasoning_model": True,
24
38
  },
25
39
  "claude-4.5-sonnet": {
26
40
  "id": "claude-4.5-sonnet",
@@ -33,6 +47,7 @@ ANTHROPIC_MODELS = {
33
47
  "cached_input_cost": 0.30,
34
48
  "cache_write_cost": 3.75,
35
49
  "output_cost": 15.0,
50
+ "reasoning_model": True,
36
51
  },
37
52
  "claude-4.1-opus": {
38
53
  "id": "claude-4.1-opus",
lm_deluge/warnings.py CHANGED
@@ -11,6 +11,8 @@ WARNINGS: dict[str, str] = {
11
11
  "WARN_MINIMAL_TO_NONE": "GPT-5.1 models don't support 'minimal' reasoning effort. Converting to 'none' for {model_name}.",
12
12
  "WARN_MEDIA_RESOLUTION_UNSUPPORTED": "media_resolution parameter is only supported for Gemini 3 models, ignoring for {model_name}.",
13
13
  "WARN_GEMINI3_MISSING_SIGNATURE": "Gemini 3 thought signature missing in {part_type}, injecting dummy signature 'context_engineering_is_the_way_to_go' to avoid API error.",
14
+ "WARN_GEMINI3_NO_REASONING": "Gemini 3 requires reasoning (thinkingConfig). Setting thinkingConfig to low.",
15
+ "WARN_THINKING_BUDGET_AND_REASONING_EFFORT": "`reasoning_effort` and `thinking_budget` both provided. `thinking_budget` will take priority.",
14
16
  }
15
17
 
16
18
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.79
3
+ Version: 0.0.80
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -2,8 +2,8 @@ lm_deluge/__init__.py,sha256=zF5lAitfgJ8A28IXJ5BE9OUCqGOqSnGOWn3ZIlizNyY,822
2
2
  lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
3
3
  lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
4
4
  lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
5
- lm_deluge/client.py,sha256=ZwDD4qkPFJsPxDMCijD6lz2s5ULL-hW58tGFN00BmSI,44796
6
- lm_deluge/config.py,sha256=7pTfqlg4qHf68qpckr21deVtCuao9b0ypiXT2k-nHUE,1210
5
+ lm_deluge/client.py,sha256=VqCuFXM_ylO4v-lev85HMPFRHeU69tZo70favz-I2Uk,44791
6
+ lm_deluge/config.py,sha256=C-_rVwAFL5sivLfKSkaa2ANMqqxKbyDCW86KfQB_Lck,1357
7
7
  lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
8
8
  lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
9
9
  lm_deluge/file.py,sha256=PTmlJQ-IaYcYUFun9V0bJ1NPVP84edJrR0hvCMWFylY,19697
@@ -15,14 +15,14 @@ lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
15
15
  lm_deluge/tool.py,sha256=ipgNy4OpfH3CA9OPQq5zfn1xO8H08GMvDynB8ZPQ5mA,30617
16
16
  lm_deluge/tracker.py,sha256=aeS9GUJpgOSQRVXAnGDvlMO8qYpSxpTNLYj2hrMg0m8,14757
17
17
  lm_deluge/usage.py,sha256=xz9tAw2hqaJvv9aAVhnQ6N1Arn7fS8Shb28VwCW26wI,5136
18
- lm_deluge/warnings.py,sha256=bAG9UXPnppk_oWGIsWpY3k5lWin4tganYFw0U7OEvJQ,2062
18
+ lm_deluge/warnings.py,sha256=12RseSa9mYAFkbY783FQTP0x9RapRBErIQt4o7hzVnM,2321
19
19
  lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
20
- lm_deluge/api_requests/anthropic.py,sha256=OvkciXTHyrG1cFyC1vv6nYyCFTqtMgt1r15Q-pbHiUQ,10411
20
+ lm_deluge/api_requests/anthropic.py,sha256=ytNeADgGeflmlm5gVQ0cJ5bgchJ_EZvKJIIt7Imxf2A,11338
21
21
  lm_deluge/api_requests/base.py,sha256=mXEM85mcU_5LD-ugELpCl28tv-tpHKcaxerTIVLQZVo,10436
22
22
  lm_deluge/api_requests/bedrock.py,sha256=mY1xTvgfCLyqLlfFFmu_baKgkVq1Df1_MJXeN_G1jWQ,15597
23
23
  lm_deluge/api_requests/chat_reasoning.py,sha256=sJvstvKFqsSBUjYcwxzGt2_FH4cEp3Z6gKcBPyPjGwk,236
24
24
  lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
25
- lm_deluge/api_requests/gemini.py,sha256=gHmIfEY48B-MYlJYxYc8hT8ojmK16XSETcvfljRKAH0,10813
25
+ lm_deluge/api_requests/gemini.py,sha256=FjYKisAjD6rW2fA6WyXnnRn3oqJBXMod1_8HtGWIyEU,11099
26
26
  lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
27
27
  lm_deluge/api_requests/openai.py,sha256=E0oakhcb2T5Swfn6ATMjRZKuLyRrx4Zj5SREo1JILfc,28841
28
28
  lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
@@ -49,7 +49,7 @@ lm_deluge/llm_tools/subagents.py,sha256=srJ7On7YR0Y8WuNvf5TJl_7IUfEtG3zlxZeLgmn_
49
49
  lm_deluge/llm_tools/todos.py,sha256=doKJZWLZlh4J_k6HkdwonWHfZTZaxEI9_XHAoNFnfQo,14906
50
50
  lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
51
51
  lm_deluge/models/__init__.py,sha256=54H24K_eADbfdEH9aNORrNEXvDLZCQ4TEekeLiWljSE,4619
52
- lm_deluge/models/anthropic.py,sha256=sFkS-g0OWgRnVoFMKxWkSUt0qy2LVrcO5KtbYAG26iY,6283
52
+ lm_deluge/models/anthropic.py,sha256=X92EYIapos-8LXnIYiypPJcFhI0tqmXja_w8e9H4CF8,6781
53
53
  lm_deluge/models/bedrock.py,sha256=g1PbfceSRH2lWST3ja0mUlF3oTq4e4T-si6RMe7qXgg,4888
54
54
  lm_deluge/models/cerebras.py,sha256=u2FMXJF6xMr0euDRKLKMo_NVTOcvSrrEpehbHr8sSeE,2050
55
55
  lm_deluge/models/cohere.py,sha256=iXjYtM6jy_YL73Op8OfNsrMNopwae9y-Sw-4vF9cEBw,3406
@@ -74,8 +74,8 @@ lm_deluge/util/schema.py,sha256=q6uwhA4s1lM2dHT1Kwc46E7OY1VecMOtTEI0PTFn6tA,1320
74
74
  lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
75
75
  lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
76
76
  lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
77
- lm_deluge-0.0.79.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
78
- lm_deluge-0.0.79.dist-info/METADATA,sha256=wqNdfbJ_BIJT-uZMOvwX9RWgqqzUFM4rZ_a4KblAFus,13705
79
- lm_deluge-0.0.79.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
- lm_deluge-0.0.79.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
81
- lm_deluge-0.0.79.dist-info/RECORD,,
77
+ lm_deluge-0.0.80.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
78
+ lm_deluge-0.0.80.dist-info/METADATA,sha256=LJ2nPTs9WzdiP3kU5KPKUdOy_SuuiHRJCz9PINHEvZk,13705
79
+ lm_deluge-0.0.80.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ lm_deluge-0.0.80.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
81
+ lm_deluge-0.0.80.dist-info/RECORD,,