lm-deluge 0.0.86__tar.gz → 0.0.87__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.86/src/lm_deluge.egg-info → lm_deluge-0.0.87}/PKG-INFO +1 -1
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/pyproject.toml +1 -1
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/anthropic.py +5 -1
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/gemini.py +11 -3
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/openai.py +22 -4
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/client.py +23 -9
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/config.py +3 -1
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/__init__.py +5 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/grok.py +24 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/openai.py +31 -1
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/openrouter.py +66 -8
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/docs/samples.py +696 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/util.py +165 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/warnings.py +1 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge.egg-info/SOURCES.txt +11 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/LICENSE +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/README.md +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/setup.cfg +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/mock_openai.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/anthropic.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/arcee.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/bedrock.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/cohere.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/google.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/groq.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/kimi.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/meta.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/minimax.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/together.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/models/zai.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/__init__.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/classify.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/extract.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/locate.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/ocr.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/score.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/translate.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/prompt.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/__init__.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/anthropic/editor.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/base.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/gemini.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/openai.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/__init__.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/actions.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/base.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/batch.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/converters.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/kernel.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/trycua.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/__init__.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/batch_tool.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/docs.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/email.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/filesystem.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/memory.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/otc/__init__.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/otc/executor.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/otc/parse.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/random.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/sandbox.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/sheets.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/subagents.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/todos.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/tool_search.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/web_search.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/util/schema.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.86 → lm_deluge-0.0.87}/src/lm_deluge.egg-info/top_level.txt +0 -0
|
@@ -78,6 +78,10 @@ def _build_anthropic_request(
|
|
|
78
78
|
if sampling_params.thinking_budget is not None:
|
|
79
79
|
budget = sampling_params.thinking_budget
|
|
80
80
|
elif sampling_params.reasoning_effort is not None:
|
|
81
|
+
effort = sampling_params.reasoning_effort
|
|
82
|
+
if effort == "xhigh":
|
|
83
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
84
|
+
effort = "high"
|
|
81
85
|
# translate reasoning effort of low, medium, high to budget tokens
|
|
82
86
|
budget = {
|
|
83
87
|
"none": 0,
|
|
@@ -85,7 +89,7 @@ def _build_anthropic_request(
|
|
|
85
89
|
"low": 1024,
|
|
86
90
|
"medium": 4096,
|
|
87
91
|
"high": 16384,
|
|
88
|
-
}.get(
|
|
92
|
+
}.get(effort)
|
|
89
93
|
assert isinstance(budget, int)
|
|
90
94
|
else:
|
|
91
95
|
budget = 0
|
|
@@ -58,6 +58,10 @@ async def _build_gemini_request(
|
|
|
58
58
|
maybe_warn("WARN_GEMINI3_NO_REASONING")
|
|
59
59
|
effort = "low"
|
|
60
60
|
else:
|
|
61
|
+
effort_key = sampling_params.reasoning_effort
|
|
62
|
+
if effort_key == "xhigh":
|
|
63
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
|
|
64
|
+
effort_key = "high"
|
|
61
65
|
level_map = {
|
|
62
66
|
"none": "low",
|
|
63
67
|
"minimal": "low",
|
|
@@ -65,7 +69,7 @@ async def _build_gemini_request(
|
|
|
65
69
|
"medium": "high", # change when supported
|
|
66
70
|
"high": "high",
|
|
67
71
|
}
|
|
68
|
-
effort = level_map[
|
|
72
|
+
effort = level_map[effort_key]
|
|
69
73
|
thinking_config = {"thinkingLevel": effort}
|
|
70
74
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
71
75
|
|
|
@@ -88,14 +92,18 @@ async def _build_gemini_request(
|
|
|
88
92
|
# dynamic thinking
|
|
89
93
|
thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
|
|
90
94
|
elif sampling_params.reasoning_effort not in [None, "none"]:
|
|
95
|
+
effort_key = sampling_params.reasoning_effort
|
|
96
|
+
if effort_key == "xhigh":
|
|
97
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
|
|
98
|
+
effort_key = "high"
|
|
91
99
|
level_map = {
|
|
92
100
|
"minimal": 256,
|
|
93
101
|
"low": 1024,
|
|
94
102
|
"medium": 4096,
|
|
95
103
|
"high": 16384,
|
|
96
104
|
}
|
|
97
|
-
assert
|
|
98
|
-
budget = level_map[
|
|
105
|
+
assert effort_key in level_map
|
|
106
|
+
budget = level_map[effort_key]
|
|
99
107
|
if "flash-lite" in model.id:
|
|
100
108
|
budget = max(budget, 512)
|
|
101
109
|
thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
|
|
@@ -61,8 +61,6 @@ async def _build_oa_chat_request(
|
|
|
61
61
|
else:
|
|
62
62
|
request_json["max_completion_tokens"] = sampling_params.max_new_tokens
|
|
63
63
|
if model.reasoning_model:
|
|
64
|
-
request_json["temperature"] = 1.0
|
|
65
|
-
request_json["top_p"] = 1.0
|
|
66
64
|
effort = sampling_params.reasoning_effort
|
|
67
65
|
if effort in [None, "none"]:
|
|
68
66
|
# Disable reasoning for Gemini models when no effort requested
|
|
@@ -79,6 +77,17 @@ async def _build_oa_chat_request(
|
|
|
79
77
|
elif effort == "minimal" and "gpt-5" not in model.id:
|
|
80
78
|
maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
|
|
81
79
|
effort = "low"
|
|
80
|
+
# xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
|
|
81
|
+
if effort == "xhigh" and not model.supports_xhigh:
|
|
82
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
83
|
+
effort = "high"
|
|
84
|
+
# GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
|
|
85
|
+
if model.supports_xhigh and effort != "none":
|
|
86
|
+
del request_json["temperature"]
|
|
87
|
+
del request_json["top_p"]
|
|
88
|
+
else:
|
|
89
|
+
request_json["temperature"] = 1.0
|
|
90
|
+
request_json["top_p"] = 1.0
|
|
82
91
|
request_json["reasoning_effort"] = effort
|
|
83
92
|
else:
|
|
84
93
|
if sampling_params.reasoning_effort:
|
|
@@ -323,8 +332,17 @@ async def _build_oa_responses_request(
|
|
|
323
332
|
elif effort == "minimal" and "gpt-5" not in model.id:
|
|
324
333
|
maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
|
|
325
334
|
effort = "low"
|
|
326
|
-
|
|
327
|
-
|
|
335
|
+
# xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
|
|
336
|
+
if effort == "xhigh" and not model.supports_xhigh:
|
|
337
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
338
|
+
effort = "high"
|
|
339
|
+
# GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
|
|
340
|
+
if model.supports_xhigh and effort != "none":
|
|
341
|
+
del request_json["temperature"]
|
|
342
|
+
del request_json["top_p"]
|
|
343
|
+
else:
|
|
344
|
+
request_json["temperature"] = 1.0
|
|
345
|
+
request_json["top_p"] = 1.0
|
|
328
346
|
request_json["reasoning"] = {
|
|
329
347
|
"effort": effort,
|
|
330
348
|
"summary": "auto",
|
|
@@ -54,11 +54,12 @@ class _LLMClient(BaseModel):
|
|
|
54
54
|
"""
|
|
55
55
|
|
|
56
56
|
_REASONING_SUFFIXES: ClassVar[
|
|
57
|
-
dict[str, Literal["low", "medium", "high", "minimal", "none"]]
|
|
57
|
+
dict[str, Literal["low", "medium", "high", "xhigh", "minimal", "none"]]
|
|
58
58
|
] = {
|
|
59
59
|
"-low": "low",
|
|
60
60
|
"-medium": "medium",
|
|
61
61
|
"-high": "high",
|
|
62
|
+
"-xhigh": "xhigh",
|
|
62
63
|
"-minimal": "minimal",
|
|
63
64
|
"-none": "none",
|
|
64
65
|
}
|
|
@@ -83,7 +84,9 @@ class _LLMClient(BaseModel):
|
|
|
83
84
|
top_p: float = 1.0
|
|
84
85
|
json_mode: bool = False
|
|
85
86
|
max_new_tokens: int = 512
|
|
86
|
-
reasoning_effort: Literal[
|
|
87
|
+
reasoning_effort: Literal[
|
|
88
|
+
"low", "medium", "high", "xhigh", "minimal", "none", None
|
|
89
|
+
] = None
|
|
87
90
|
global_effort: Literal["low", "medium", "high"] | None = None
|
|
88
91
|
thinking_budget: int | None = None
|
|
89
92
|
logprobs: bool = False
|
|
@@ -172,10 +175,13 @@ class _LLMClient(BaseModel):
|
|
|
172
175
|
def _normalize_model_names(
|
|
173
176
|
self, models: list[str]
|
|
174
177
|
) -> tuple[
|
|
175
|
-
list[str],
|
|
178
|
+
list[str],
|
|
179
|
+
list[Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None],
|
|
176
180
|
]:
|
|
177
181
|
normalized: list[str] = []
|
|
178
|
-
efforts: list[
|
|
182
|
+
efforts: list[
|
|
183
|
+
Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
|
|
184
|
+
] = []
|
|
179
185
|
|
|
180
186
|
for name in models:
|
|
181
187
|
base_name = self._preprocess_openrouter_model(name)
|
|
@@ -190,7 +196,7 @@ class _LLMClient(BaseModel):
|
|
|
190
196
|
def _align_sampling_params(
|
|
191
197
|
self,
|
|
192
198
|
per_model_efforts: list[
|
|
193
|
-
Literal["low", "medium", "high", "minimal", "none"] | None
|
|
199
|
+
Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
|
|
194
200
|
],
|
|
195
201
|
) -> None:
|
|
196
202
|
if len(per_model_efforts) < len(self.model_names):
|
|
@@ -364,7 +370,9 @@ class _LLMClient(BaseModel):
|
|
|
364
370
|
@classmethod
|
|
365
371
|
def _strip_reasoning_suffix_if_registered(
|
|
366
372
|
cls, model_name: str
|
|
367
|
-
) -> tuple[
|
|
373
|
+
) -> tuple[
|
|
374
|
+
str, Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
|
|
375
|
+
]:
|
|
368
376
|
"""Remove reasoning suffix only when the trimmed model already exists."""
|
|
369
377
|
for suffix, effort in cls._REASONING_SUFFIXES.items():
|
|
370
378
|
if model_name.endswith(suffix) and len(model_name) > len(suffix):
|
|
@@ -1168,7 +1176,9 @@ def LLMClient(
|
|
|
1168
1176
|
top_p: float = 1.0,
|
|
1169
1177
|
json_mode: bool = False,
|
|
1170
1178
|
max_new_tokens: int = 512,
|
|
1171
|
-
reasoning_effort: Literal[
|
|
1179
|
+
reasoning_effort: Literal[
|
|
1180
|
+
"low", "medium", "high", "xhigh", "minimal", "none", None
|
|
1181
|
+
] = None,
|
|
1172
1182
|
global_effort: Literal["low", "medium", "high"] | None = None,
|
|
1173
1183
|
thinking_budget: int | None = None,
|
|
1174
1184
|
logprobs: bool = False,
|
|
@@ -1199,7 +1209,9 @@ def LLMClient(
|
|
|
1199
1209
|
top_p: float = 1.0,
|
|
1200
1210
|
json_mode: bool = False,
|
|
1201
1211
|
max_new_tokens: int = 512,
|
|
1202
|
-
reasoning_effort: Literal[
|
|
1212
|
+
reasoning_effort: Literal[
|
|
1213
|
+
"low", "medium", "high", "xhigh", "minimal", "none", None
|
|
1214
|
+
] = None,
|
|
1203
1215
|
global_effort: Literal["low", "medium", "high"] | None = None,
|
|
1204
1216
|
thinking_budget: int | None = None,
|
|
1205
1217
|
logprobs: bool = False,
|
|
@@ -1229,7 +1241,9 @@ def LLMClient(
|
|
|
1229
1241
|
top_p: float = 1.0,
|
|
1230
1242
|
json_mode: bool = False,
|
|
1231
1243
|
max_new_tokens: int = 512,
|
|
1232
|
-
reasoning_effort: Literal[
|
|
1244
|
+
reasoning_effort: Literal[
|
|
1245
|
+
"low", "medium", "high", "xhigh", "minimal", "none", None
|
|
1246
|
+
] = None,
|
|
1233
1247
|
global_effort: Literal["low", "medium", "high"] | None = None,
|
|
1234
1248
|
thinking_budget: int | None = None,
|
|
1235
1249
|
logprobs: bool = False,
|
|
@@ -9,7 +9,9 @@ class SamplingParams(BaseModel):
|
|
|
9
9
|
json_mode: bool = False
|
|
10
10
|
max_new_tokens: int = 2_048
|
|
11
11
|
global_effort: Literal["low", "medium", "high"] = "high" # for opus-4.5
|
|
12
|
-
reasoning_effort: Literal[
|
|
12
|
+
reasoning_effort: Literal[
|
|
13
|
+
"low", "medium", "high", "xhigh", "minimal", "none", None
|
|
14
|
+
] = None
|
|
13
15
|
thinking_budget: int | None = None
|
|
14
16
|
logprobs: bool = False
|
|
15
17
|
top_logprobs: int | None = None
|
|
@@ -40,6 +40,9 @@ class APIModel:
|
|
|
40
40
|
supports_logprobs: bool = False
|
|
41
41
|
supports_responses: bool = False
|
|
42
42
|
reasoning_model: bool = False
|
|
43
|
+
supports_xhigh: bool = (
|
|
44
|
+
False # supports xhigh reasoning_effort (gpt-5.2, gpt-5.1-codex-max)
|
|
45
|
+
)
|
|
43
46
|
regions: list[str] | dict[str, int] = field(default_factory=list)
|
|
44
47
|
# tokens_per_minute: int | None = None
|
|
45
48
|
# requests_per_minute: int | None = None
|
|
@@ -99,6 +102,7 @@ def register_model(
|
|
|
99
102
|
supports_logprobs: bool = False,
|
|
100
103
|
supports_responses: bool = False,
|
|
101
104
|
reasoning_model: bool = False,
|
|
105
|
+
supports_xhigh: bool = False,
|
|
102
106
|
regions: list[str] | dict[str, int] = field(default_factory=list),
|
|
103
107
|
# tokens_per_minute: int | None = None,
|
|
104
108
|
# requests_per_minute: int | None = None,
|
|
@@ -118,6 +122,7 @@ def register_model(
|
|
|
118
122
|
supports_logprobs=supports_logprobs,
|
|
119
123
|
supports_responses=supports_responses,
|
|
120
124
|
reasoning_model=reasoning_model,
|
|
125
|
+
supports_xhigh=supports_xhigh,
|
|
121
126
|
regions=regions,
|
|
122
127
|
# tokens_per_minute=tokens_per_minute,
|
|
123
128
|
# requests_per_minute=requests_per_minute,
|
|
@@ -7,6 +7,30 @@ XAI_MODELS = {
|
|
|
7
7
|
# 888 888 888 888 888 888888K
|
|
8
8
|
# Y88b d88P 888 Y88..88P 888 "88b
|
|
9
9
|
# "Y8888P88 888 "Y88P" 888 888
|
|
10
|
+
"grok-4.1-fast-reasoning": {
|
|
11
|
+
"id": "grok-4.1-fast-reasoning",
|
|
12
|
+
"name": "grok-4-1-fast-reasoning",
|
|
13
|
+
"api_base": "https://api.x.ai/v1",
|
|
14
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
15
|
+
"supports_json": True,
|
|
16
|
+
"supports_logprobs": True,
|
|
17
|
+
"api_spec": "openai",
|
|
18
|
+
"input_cost": 0.2,
|
|
19
|
+
"output_cost": 1.5,
|
|
20
|
+
"reasoning_model": False,
|
|
21
|
+
},
|
|
22
|
+
"grok-4.1-fast": {
|
|
23
|
+
"id": "grok-4.1-fast",
|
|
24
|
+
"name": "grok-4-1-fast-non-reasoning",
|
|
25
|
+
"api_base": "https://api.x.ai/v1",
|
|
26
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
27
|
+
"supports_json": True,
|
|
28
|
+
"supports_logprobs": True,
|
|
29
|
+
"api_spec": "openai",
|
|
30
|
+
"input_cost": 0.2,
|
|
31
|
+
"output_cost": 1.5,
|
|
32
|
+
"reasoning_model": False,
|
|
33
|
+
},
|
|
10
34
|
"grok-code-fast-1": {
|
|
11
35
|
"id": "grok-code-fast-1",
|
|
12
36
|
"name": "grok-code-fast-1",
|
|
@@ -10,12 +10,42 @@ OPENAI_MODELS = {
|
|
|
10
10
|
# ░███
|
|
11
11
|
# █████
|
|
12
12
|
# ░░░░░
|
|
13
|
+
"gpt-5.2": {
|
|
14
|
+
"id": "gpt-5.2",
|
|
15
|
+
"name": "gpt-5.2",
|
|
16
|
+
"api_base": "https://api.openai.com/v1",
|
|
17
|
+
"api_key_env_var": "OPENAI_API_KEY",
|
|
18
|
+
"supports_json": True,
|
|
19
|
+
"supports_logprobs": False,
|
|
20
|
+
"supports_responses": True,
|
|
21
|
+
"api_spec": "openai",
|
|
22
|
+
"input_cost": 1.75,
|
|
23
|
+
"cached_input_cost": 0.175,
|
|
24
|
+
"output_cost": 14.0,
|
|
25
|
+
"reasoning_model": True,
|
|
26
|
+
"supports_xhigh": True,
|
|
27
|
+
},
|
|
28
|
+
"gpt-5.1-codex-max": {
|
|
29
|
+
"id": "gpt-5.1-codex-max",
|
|
30
|
+
"name": "gpt-5.1-codex-max",
|
|
31
|
+
"api_base": "https://api.openai.com/v1",
|
|
32
|
+
"api_key_env_var": "OPENAI_API_KEY",
|
|
33
|
+
"supports_json": True,
|
|
34
|
+
"supports_logprobs": False,
|
|
35
|
+
"supports_responses": True,
|
|
36
|
+
"api_spec": "openai",
|
|
37
|
+
"input_cost": 1.25,
|
|
38
|
+
"cached_input_cost": 0.125,
|
|
39
|
+
"output_cost": 10.0,
|
|
40
|
+
"reasoning_model": True,
|
|
41
|
+
"supports_xhigh": True,
|
|
42
|
+
},
|
|
13
43
|
"gpt-5.1": {
|
|
14
44
|
"id": "gpt-5.1",
|
|
15
45
|
"name": "gpt-5.1",
|
|
16
46
|
"api_base": "https://api.openai.com/v1",
|
|
17
47
|
"api_key_env_var": "OPENAI_API_KEY",
|
|
18
|
-
"supports_json":
|
|
48
|
+
"supports_json": True,
|
|
19
49
|
"supports_logprobs": True,
|
|
20
50
|
"supports_responses": True,
|
|
21
51
|
"api_spec": "openai",
|
|
@@ -1,13 +1,25 @@
|
|
|
1
1
|
OPENROUTER_MODELS = {
|
|
2
|
+
"intellect-3-openrouter": {
|
|
3
|
+
"id": "intellect-3-openrouter",
|
|
4
|
+
"name": "prime-intellect/intellect-3",
|
|
5
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
6
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
7
|
+
"supports_json": True,
|
|
8
|
+
"api_spec": "openai",
|
|
9
|
+
"input_cost": 0.2,
|
|
10
|
+
"cached_input_cost": 0.2,
|
|
11
|
+
"cache_write_cost": 0.2,
|
|
12
|
+
"output_cost": 1.10,
|
|
13
|
+
},
|
|
2
14
|
"glm-4.6-openrouter": {
|
|
3
15
|
"id": "glm-4.6-openrouter",
|
|
4
|
-
"name": "z-ai/glm-4.6",
|
|
16
|
+
"name": "z-ai/glm-4.6:exacto",
|
|
5
17
|
"api_base": "https://openrouter.ai/api/v1",
|
|
6
18
|
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
7
19
|
"supports_json": True,
|
|
8
20
|
"api_spec": "openai",
|
|
9
21
|
"input_cost": 0.6,
|
|
10
|
-
"cached_input_cost": 0.
|
|
22
|
+
"cached_input_cost": 0.6,
|
|
11
23
|
"cache_write_cost": 0.6,
|
|
12
24
|
"output_cost": 2.20,
|
|
13
25
|
},
|
|
@@ -35,9 +47,21 @@ OPENROUTER_MODELS = {
|
|
|
35
47
|
"cache_write_cost": 0.23,
|
|
36
48
|
"output_cost": 0.9,
|
|
37
49
|
},
|
|
50
|
+
"deepseek-3.2-exp-openrouter": {
|
|
51
|
+
"id": "deepseek-3.2-exp-openrouter",
|
|
52
|
+
"name": "deepseek/deepseek-v3.2-exp",
|
|
53
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
54
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
55
|
+
"supports_json": True,
|
|
56
|
+
"api_spec": "openai",
|
|
57
|
+
"input_cost": 0.27,
|
|
58
|
+
"cached_input_cost": 0.27,
|
|
59
|
+
"cache_write_cost": 0.27,
|
|
60
|
+
"output_cost": 0.4,
|
|
61
|
+
},
|
|
38
62
|
"deepseek-3.2-openrouter": {
|
|
39
63
|
"id": "deepseek-3.2-openrouter",
|
|
40
|
-
"name": "deepseek/deepseek-v3.2
|
|
64
|
+
"name": "deepseek/deepseek-v3.2",
|
|
41
65
|
"api_base": "https://openrouter.ai/api/v1",
|
|
42
66
|
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
43
67
|
"supports_json": True,
|
|
@@ -47,19 +71,53 @@ OPENROUTER_MODELS = {
|
|
|
47
71
|
"cache_write_cost": 0.27,
|
|
48
72
|
"output_cost": 0.4,
|
|
49
73
|
},
|
|
50
|
-
|
|
51
|
-
|
|
74
|
+
"gpt-oss-20b-openrouter": {
|
|
75
|
+
"id": "gpt-oss-20b-openrouter",
|
|
76
|
+
"name": "openai/gpt-oss-20b",
|
|
77
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
78
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
79
|
+
"supports_json": True,
|
|
80
|
+
"api_spec": "openai",
|
|
81
|
+
"input_cost": 0.04,
|
|
82
|
+
"cached_input_cost": 0.04,
|
|
83
|
+
"cache_write_cost": 0.04,
|
|
84
|
+
"output_cost": 0.18,
|
|
85
|
+
},
|
|
86
|
+
"gpt-oss-120b-openrouter": {
|
|
87
|
+
"id": "gpt-oss-120b-openrouter",
|
|
88
|
+
"name": "openai/gpt-oss-120b",
|
|
89
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
90
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
91
|
+
"supports_json": True,
|
|
92
|
+
"api_spec": "openai",
|
|
93
|
+
"input_cost": 0.05,
|
|
94
|
+
"cached_input_cost": 0.05,
|
|
95
|
+
"cache_write_cost": 0.05,
|
|
96
|
+
"output_cost": 0.45,
|
|
97
|
+
},
|
|
52
98
|
"kimi-k2-openrouter": {
|
|
53
99
|
"id": "kimi-k2-openrouter",
|
|
54
|
-
"name": "
|
|
100
|
+
"name": "moonshotai/kimi-k2-0905:exacto",
|
|
101
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
102
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
103
|
+
"supports_json": True,
|
|
104
|
+
"api_spec": "openai",
|
|
105
|
+
"input_cost": 1,
|
|
106
|
+
"cached_input_cost": 1,
|
|
107
|
+
"cache_write_cost": 1,
|
|
108
|
+
"output_cost": 3,
|
|
109
|
+
},
|
|
110
|
+
"kimi-k2-thinking-openrouter": {
|
|
111
|
+
"id": "kimi-k2-thinking-openrouter",
|
|
112
|
+
"name": "moonshotai/kimi-k2-thinking",
|
|
55
113
|
"api_base": "https://openrouter.ai/api/v1",
|
|
56
114
|
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
57
115
|
"supports_json": True,
|
|
58
116
|
"api_spec": "openai",
|
|
59
117
|
"input_cost": 0.6,
|
|
60
|
-
"cached_input_cost": 0.
|
|
118
|
+
"cached_input_cost": 0.6,
|
|
61
119
|
"cache_write_cost": 0.6,
|
|
62
|
-
"output_cost": 2.
|
|
120
|
+
"output_cost": 2.5,
|
|
63
121
|
},
|
|
64
122
|
"olmo-3-32b-think-openrouter": {
|
|
65
123
|
"id": "olmo-3-32b-think-openrouter",
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GEPA (Genetic Pareto) prompt optimizer for lm-deluge.
|
|
3
|
+
|
|
4
|
+
This module provides an evolutionary optimizer for text components in AI systems.
|
|
5
|
+
It analyzes whole trajectories to propose improvements to prompts, tool descriptions,
|
|
6
|
+
and other text-based configuration.
|
|
7
|
+
|
|
8
|
+
Example usage:
|
|
9
|
+
from lm_deluge import LLMClient
|
|
10
|
+
from lm_deluge.prompt import Conversation, Message
|
|
11
|
+
from lm_deluge.pipelines.gepa import Component, EvalResult, optimize
|
|
12
|
+
|
|
13
|
+
# Define components to optimize
|
|
14
|
+
components = {
|
|
15
|
+
"system_prompt": Component(
|
|
16
|
+
description="Instructions given to the model",
|
|
17
|
+
value="You are a helpful assistant.",
|
|
18
|
+
),
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
# Define how to evaluate one example
|
|
22
|
+
def evaluate(client: LLMClient, values: dict[str, str], example: dict) -> EvalResult:
|
|
23
|
+
# Build prompt with current component values
|
|
24
|
+
conv = Conversation.system(values["system_prompt"])
|
|
25
|
+
conv = conv.add(Message.user(example["question"]))
|
|
26
|
+
|
|
27
|
+
# Run inference
|
|
28
|
+
response = client.process_prompts_sync([conv], show_progress=False)[0]
|
|
29
|
+
answer = response.completion
|
|
30
|
+
|
|
31
|
+
# Score the result
|
|
32
|
+
correct = example["answer"].lower() in answer.lower()
|
|
33
|
+
score = 1.0 if correct else 0.0
|
|
34
|
+
|
|
35
|
+
# Build feedback for the proposer
|
|
36
|
+
feedback = f"Score: {score}. Expected: {example['answer']}"
|
|
37
|
+
|
|
38
|
+
# Return full trajectory
|
|
39
|
+
full_conv = conv.add(Message.ai(answer))
|
|
40
|
+
return EvalResult(conversation=full_conv, score=score, feedback=feedback)
|
|
41
|
+
|
|
42
|
+
# Run optimization
|
|
43
|
+
result = optimize(
|
|
44
|
+
components=components,
|
|
45
|
+
evaluate_fn=evaluate,
|
|
46
|
+
dataset=train_examples,
|
|
47
|
+
task_client=LLMClient("gpt-4o-mini"),
|
|
48
|
+
proposer_client=LLMClient("gpt-4o"),
|
|
49
|
+
max_iterations=50,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
print(f"Best score: {result.best_score}")
|
|
53
|
+
print(f"Best prompt: {result.best_candidate['system_prompt']}")
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
from lm_deluge.pipelines.gepa.core import (
|
|
57
|
+
Component,
|
|
58
|
+
EvalResult,
|
|
59
|
+
GEPAResult,
|
|
60
|
+
GEPAState,
|
|
61
|
+
Proposal,
|
|
62
|
+
)
|
|
63
|
+
from lm_deluge.pipelines.gepa.optimizer import GEPAEngine, optimize
|
|
64
|
+
from lm_deluge.pipelines.gepa.proposer import (
|
|
65
|
+
DEFAULT_PROPOSAL_PROMPT,
|
|
66
|
+
build_proposal_prompt,
|
|
67
|
+
parse_proposal_response,
|
|
68
|
+
propose_improvement_sync,
|
|
69
|
+
)
|
|
70
|
+
from lm_deluge.pipelines.gepa.util import (
|
|
71
|
+
extract_text_from_response,
|
|
72
|
+
format_components_for_prompt,
|
|
73
|
+
format_conversation_compact,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
__all__ = [
|
|
77
|
+
# Core types
|
|
78
|
+
"Component",
|
|
79
|
+
"EvalResult",
|
|
80
|
+
"Proposal",
|
|
81
|
+
"GEPAState",
|
|
82
|
+
"GEPAResult",
|
|
83
|
+
# Main API
|
|
84
|
+
"optimize",
|
|
85
|
+
"GEPAEngine",
|
|
86
|
+
# Proposer utilities
|
|
87
|
+
"DEFAULT_PROPOSAL_PROMPT",
|
|
88
|
+
"build_proposal_prompt",
|
|
89
|
+
"parse_proposal_response",
|
|
90
|
+
"propose_improvement_sync",
|
|
91
|
+
# Formatting utilities
|
|
92
|
+
"format_conversation_compact",
|
|
93
|
+
"format_components_for_prompt",
|
|
94
|
+
"extract_text_from_response",
|
|
95
|
+
]
|