lm-deluge 0.0.85__tar.gz → 0.0.87__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {lm_deluge-0.0.85/src/lm_deluge.egg-info → lm_deluge-0.0.87}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/anthropic.py +5 -1
  4. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/gemini.py +11 -3
  5. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/openai.py +22 -4
  6. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/client.py +24 -10
  7. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/config.py +3 -1
  8. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/__init__.py +5 -0
  9. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/grok.py +24 -0
  10. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/openai.py +31 -1
  11. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/openrouter.py +66 -8
  12. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/__init__.py +95 -0
  13. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/core.py +354 -0
  14. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/docs/samples.py +696 -0
  15. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  16. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  17. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  18. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  19. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  20. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/optimizer.py +435 -0
  21. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/proposer.py +235 -0
  22. lm_deluge-0.0.87/src/lm_deluge/pipelines/gepa/util.py +165 -0
  23. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/web_search.py +2 -2
  24. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/warnings.py +1 -0
  25. {lm_deluge-0.0.85 → lm_deluge-0.0.87/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  26. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge.egg-info/SOURCES.txt +11 -0
  27. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/LICENSE +0 -0
  28. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/README.md +0 -0
  29. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/setup.cfg +0 -0
  30. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/__init__.py +0 -0
  31. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/__init__.py +0 -0
  32. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/base.py +0 -0
  33. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/bedrock.py +0 -0
  34. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
  35. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/common.py +0 -0
  36. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  37. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  38. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  39. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  40. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  41. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/mistral.py +0 -0
  42. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/api_requests/response.py +0 -0
  43. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/batches.py +0 -0
  44. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/cache.py +0 -0
  45. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/cli.py +0 -0
  46. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/embed.py +0 -0
  47. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/errors.py +0 -0
  48. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/file.py +0 -0
  49. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/image.py +0 -0
  50. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/mock_openai.py +0 -0
  51. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/anthropic.py +0 -0
  52. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/arcee.py +0 -0
  53. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/bedrock.py +0 -0
  54. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/cerebras.py +0 -0
  55. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/cohere.py +0 -0
  56. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/deepseek.py +0 -0
  57. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/fireworks.py +0 -0
  58. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/google.py +0 -0
  59. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/groq.py +0 -0
  60. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/kimi.py +0 -0
  61. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/meta.py +0 -0
  62. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/minimax.py +0 -0
  63. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/mistral.py +0 -0
  64. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/together.py +0 -0
  65. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/models/zai.py +0 -0
  66. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/__init__.py +0 -0
  67. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/classify.py +0 -0
  68. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/extract.py +0 -0
  69. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/locate.py +0 -0
  70. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/ocr.py +0 -0
  71. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/score.py +0 -0
  72. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/pipelines/translate.py +0 -0
  73. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/prompt.py +0 -0
  74. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/request_context.py +0 -0
  75. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/rerank.py +0 -0
  76. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/__init__.py +0 -0
  77. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/anthropic/__init__.py +0 -0
  78. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/anthropic/bash.py +0 -0
  79. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
  80. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/anthropic/editor.py +0 -0
  81. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/base.py +0 -0
  82. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/gemini.py +0 -0
  83. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/builtin/openai.py +0 -0
  84. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/__init__.py +0 -0
  85. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/actions.py +0 -0
  86. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/base.py +0 -0
  87. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/batch.py +0 -0
  88. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/converters.py +0 -0
  89. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/kernel.py +0 -0
  90. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/cua/trycua.py +0 -0
  91. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/__init__.py +0 -0
  92. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/batch_tool.py +0 -0
  93. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/docs.py +0 -0
  94. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/email.py +0 -0
  95. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/filesystem.py +0 -0
  96. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/memory.py +0 -0
  97. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/otc/__init__.py +0 -0
  98. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/otc/executor.py +0 -0
  99. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/otc/parse.py +0 -0
  100. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/random.py +0 -0
  101. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/sandbox.py +0 -0
  102. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/sheets.py +0 -0
  103. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/subagents.py +0 -0
  104. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/todos.py +0 -0
  105. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tool/prefab/tool_search.py +0 -0
  106. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/tracker.py +0 -0
  107. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/usage.py +0 -0
  108. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/util/harmony.py +0 -0
  109. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/util/json.py +0 -0
  110. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/util/logprobs.py +0 -0
  111. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/util/schema.py +0 -0
  112. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/util/spatial.py +0 -0
  113. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/util/validation.py +0 -0
  114. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge/util/xml.py +0 -0
  115. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  116. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge.egg-info/requires.txt +0 -0
  117. {lm_deluge-0.0.85 → lm_deluge-0.0.87}/src/lm_deluge.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.85
3
+ Version: 0.0.87
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.85"
6
+ version = "0.0.87"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -78,6 +78,10 @@ def _build_anthropic_request(
78
78
  if sampling_params.thinking_budget is not None:
79
79
  budget = sampling_params.thinking_budget
80
80
  elif sampling_params.reasoning_effort is not None:
81
+ effort = sampling_params.reasoning_effort
82
+ if effort == "xhigh":
83
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
84
+ effort = "high"
81
85
  # translate reasoning effort of low, medium, high to budget tokens
82
86
  budget = {
83
87
  "none": 0,
@@ -85,7 +89,7 @@ def _build_anthropic_request(
85
89
  "low": 1024,
86
90
  "medium": 4096,
87
91
  "high": 16384,
88
- }.get(sampling_params.reasoning_effort)
92
+ }.get(effort)
89
93
  assert isinstance(budget, int)
90
94
  else:
91
95
  budget = 0
@@ -58,6 +58,10 @@ async def _build_gemini_request(
58
58
  maybe_warn("WARN_GEMINI3_NO_REASONING")
59
59
  effort = "low"
60
60
  else:
61
+ effort_key = sampling_params.reasoning_effort
62
+ if effort_key == "xhigh":
63
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
64
+ effort_key = "high"
61
65
  level_map = {
62
66
  "none": "low",
63
67
  "minimal": "low",
@@ -65,7 +69,7 @@ async def _build_gemini_request(
65
69
  "medium": "high", # change when supported
66
70
  "high": "high",
67
71
  }
68
- effort = level_map[sampling_params.reasoning_effort]
72
+ effort = level_map[effort_key]
69
73
  thinking_config = {"thinkingLevel": effort}
70
74
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
71
75
 
@@ -88,14 +92,18 @@ async def _build_gemini_request(
88
92
  # dynamic thinking
89
93
  thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
90
94
  elif sampling_params.reasoning_effort not in [None, "none"]:
95
+ effort_key = sampling_params.reasoning_effort
96
+ if effort_key == "xhigh":
97
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
98
+ effort_key = "high"
91
99
  level_map = {
92
100
  "minimal": 256,
93
101
  "low": 1024,
94
102
  "medium": 4096,
95
103
  "high": 16384,
96
104
  }
97
- assert sampling_params.reasoning_effort in level_map
98
- budget = level_map[sampling_params.reasoning_effort]
105
+ assert effort_key in level_map
106
+ budget = level_map[effort_key]
99
107
  if "flash-lite" in model.id:
100
108
  budget = max(budget, 512)
101
109
  thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
@@ -61,8 +61,6 @@ async def _build_oa_chat_request(
61
61
  else:
62
62
  request_json["max_completion_tokens"] = sampling_params.max_new_tokens
63
63
  if model.reasoning_model:
64
- request_json["temperature"] = 1.0
65
- request_json["top_p"] = 1.0
66
64
  effort = sampling_params.reasoning_effort
67
65
  if effort in [None, "none"]:
68
66
  # Disable reasoning for Gemini models when no effort requested
@@ -79,6 +77,17 @@ async def _build_oa_chat_request(
79
77
  elif effort == "minimal" and "gpt-5" not in model.id:
80
78
  maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
81
79
  effort = "low"
80
+ # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
81
+ if effort == "xhigh" and not model.supports_xhigh:
82
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
83
+ effort = "high"
84
+ # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
85
+ if model.supports_xhigh and effort != "none":
86
+ del request_json["temperature"]
87
+ del request_json["top_p"]
88
+ else:
89
+ request_json["temperature"] = 1.0
90
+ request_json["top_p"] = 1.0
82
91
  request_json["reasoning_effort"] = effort
83
92
  else:
84
93
  if sampling_params.reasoning_effort:
@@ -323,8 +332,17 @@ async def _build_oa_responses_request(
323
332
  elif effort == "minimal" and "gpt-5" not in model.id:
324
333
  maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
325
334
  effort = "low"
326
- request_json["temperature"] = 1.0
327
- request_json["top_p"] = 1.0
335
+ # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
336
+ if effort == "xhigh" and not model.supports_xhigh:
337
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
338
+ effort = "high"
339
+ # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
340
+ if model.supports_xhigh and effort != "none":
341
+ del request_json["temperature"]
342
+ del request_json["top_p"]
343
+ else:
344
+ request_json["temperature"] = 1.0
345
+ request_json["top_p"] = 1.0
328
346
  request_json["reasoning"] = {
329
347
  "effort": effort,
330
348
  "summary": "auto",
@@ -54,11 +54,12 @@ class _LLMClient(BaseModel):
54
54
  """
55
55
 
56
56
  _REASONING_SUFFIXES: ClassVar[
57
- dict[str, Literal["low", "medium", "high", "minimal", "none"]]
57
+ dict[str, Literal["low", "medium", "high", "xhigh", "minimal", "none"]]
58
58
  ] = {
59
59
  "-low": "low",
60
60
  "-medium": "medium",
61
61
  "-high": "high",
62
+ "-xhigh": "xhigh",
62
63
  "-minimal": "minimal",
63
64
  "-none": "none",
64
65
  }
@@ -83,7 +84,9 @@ class _LLMClient(BaseModel):
83
84
  top_p: float = 1.0
84
85
  json_mode: bool = False
85
86
  max_new_tokens: int = 512
86
- reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
87
+ reasoning_effort: Literal[
88
+ "low", "medium", "high", "xhigh", "minimal", "none", None
89
+ ] = None
87
90
  global_effort: Literal["low", "medium", "high"] | None = None
88
91
  thinking_budget: int | None = None
89
92
  logprobs: bool = False
@@ -172,10 +175,13 @@ class _LLMClient(BaseModel):
172
175
  def _normalize_model_names(
173
176
  self, models: list[str]
174
177
  ) -> tuple[
175
- list[str], list[Literal["low", "medium", "high", "minimal", "none"] | None]
178
+ list[str],
179
+ list[Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None],
176
180
  ]:
177
181
  normalized: list[str] = []
178
- efforts: list[Literal["low", "medium", "high", "minimal", "none"] | None] = []
182
+ efforts: list[
183
+ Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
184
+ ] = []
179
185
 
180
186
  for name in models:
181
187
  base_name = self._preprocess_openrouter_model(name)
@@ -190,7 +196,7 @@ class _LLMClient(BaseModel):
190
196
  def _align_sampling_params(
191
197
  self,
192
198
  per_model_efforts: list[
193
- Literal["low", "medium", "high", "minimal", "none"] | None
199
+ Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
194
200
  ],
195
201
  ) -> None:
196
202
  if len(per_model_efforts) < len(self.model_names):
@@ -364,7 +370,9 @@ class _LLMClient(BaseModel):
364
370
  @classmethod
365
371
  def _strip_reasoning_suffix_if_registered(
366
372
  cls, model_name: str
367
- ) -> tuple[str, Literal["low", "medium", "high", "minimal", "none"] | None]:
373
+ ) -> tuple[
374
+ str, Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
375
+ ]:
368
376
  """Remove reasoning suffix only when the trimmed model already exists."""
369
377
  for suffix, effort in cls._REASONING_SUFFIXES.items():
370
378
  if model_name.endswith(suffix) and len(model_name) > len(suffix):
@@ -918,7 +926,7 @@ class _LLMClient(BaseModel):
918
926
  if not isinstance(result, (str, dict, list)):
919
927
  result = str(result)
920
928
 
921
- conversation.with_tool_result(call.id, result) # type: ignore
929
+ conversation = conversation.with_tool_result(call.id, result) # type: ignore
922
930
 
923
931
  if response is None:
924
932
  raise RuntimeError("model did not return a response")
@@ -1168,7 +1176,9 @@ def LLMClient(
1168
1176
  top_p: float = 1.0,
1169
1177
  json_mode: bool = False,
1170
1178
  max_new_tokens: int = 512,
1171
- reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
1179
+ reasoning_effort: Literal[
1180
+ "low", "medium", "high", "xhigh", "minimal", "none", None
1181
+ ] = None,
1172
1182
  global_effort: Literal["low", "medium", "high"] | None = None,
1173
1183
  thinking_budget: int | None = None,
1174
1184
  logprobs: bool = False,
@@ -1199,7 +1209,9 @@ def LLMClient(
1199
1209
  top_p: float = 1.0,
1200
1210
  json_mode: bool = False,
1201
1211
  max_new_tokens: int = 512,
1202
- reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
1212
+ reasoning_effort: Literal[
1213
+ "low", "medium", "high", "xhigh", "minimal", "none", None
1214
+ ] = None,
1203
1215
  global_effort: Literal["low", "medium", "high"] | None = None,
1204
1216
  thinking_budget: int | None = None,
1205
1217
  logprobs: bool = False,
@@ -1229,7 +1241,9 @@ def LLMClient(
1229
1241
  top_p: float = 1.0,
1230
1242
  json_mode: bool = False,
1231
1243
  max_new_tokens: int = 512,
1232
- reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
1244
+ reasoning_effort: Literal[
1245
+ "low", "medium", "high", "xhigh", "minimal", "none", None
1246
+ ] = None,
1233
1247
  global_effort: Literal["low", "medium", "high"] | None = None,
1234
1248
  thinking_budget: int | None = None,
1235
1249
  logprobs: bool = False,
@@ -9,7 +9,9 @@ class SamplingParams(BaseModel):
9
9
  json_mode: bool = False
10
10
  max_new_tokens: int = 2_048
11
11
  global_effort: Literal["low", "medium", "high"] = "high" # for opus-4.5
12
- reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
12
+ reasoning_effort: Literal[
13
+ "low", "medium", "high", "xhigh", "minimal", "none", None
14
+ ] = None
13
15
  thinking_budget: int | None = None
14
16
  logprobs: bool = False
15
17
  top_logprobs: int | None = None
@@ -40,6 +40,9 @@ class APIModel:
40
40
  supports_logprobs: bool = False
41
41
  supports_responses: bool = False
42
42
  reasoning_model: bool = False
43
+ supports_xhigh: bool = (
44
+ False # supports xhigh reasoning_effort (gpt-5.2, gpt-5.1-codex-max)
45
+ )
43
46
  regions: list[str] | dict[str, int] = field(default_factory=list)
44
47
  # tokens_per_minute: int | None = None
45
48
  # requests_per_minute: int | None = None
@@ -99,6 +102,7 @@ def register_model(
99
102
  supports_logprobs: bool = False,
100
103
  supports_responses: bool = False,
101
104
  reasoning_model: bool = False,
105
+ supports_xhigh: bool = False,
102
106
  regions: list[str] | dict[str, int] = field(default_factory=list),
103
107
  # tokens_per_minute: int | None = None,
104
108
  # requests_per_minute: int | None = None,
@@ -118,6 +122,7 @@ def register_model(
118
122
  supports_logprobs=supports_logprobs,
119
123
  supports_responses=supports_responses,
120
124
  reasoning_model=reasoning_model,
125
+ supports_xhigh=supports_xhigh,
121
126
  regions=regions,
122
127
  # tokens_per_minute=tokens_per_minute,
123
128
  # requests_per_minute=requests_per_minute,
@@ -7,6 +7,30 @@ XAI_MODELS = {
7
7
  # 888 888 888 888 888 888888K
8
8
  # Y88b d88P 888 Y88..88P 888 "88b
9
9
  # "Y8888P88 888 "Y88P" 888 888
10
+ "grok-4.1-fast-reasoning": {
11
+ "id": "grok-4.1-fast-reasoning",
12
+ "name": "grok-4-1-fast-reasoning",
13
+ "api_base": "https://api.x.ai/v1",
14
+ "api_key_env_var": "GROK_API_KEY",
15
+ "supports_json": True,
16
+ "supports_logprobs": True,
17
+ "api_spec": "openai",
18
+ "input_cost": 0.2,
19
+ "output_cost": 1.5,
20
+ "reasoning_model": False,
21
+ },
22
+ "grok-4.1-fast": {
23
+ "id": "grok-4.1-fast",
24
+ "name": "grok-4-1-fast-non-reasoning",
25
+ "api_base": "https://api.x.ai/v1",
26
+ "api_key_env_var": "GROK_API_KEY",
27
+ "supports_json": True,
28
+ "supports_logprobs": True,
29
+ "api_spec": "openai",
30
+ "input_cost": 0.2,
31
+ "output_cost": 1.5,
32
+ "reasoning_model": False,
33
+ },
10
34
  "grok-code-fast-1": {
11
35
  "id": "grok-code-fast-1",
12
36
  "name": "grok-code-fast-1",
@@ -10,12 +10,42 @@ OPENAI_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  # ░░░░░
13
+ "gpt-5.2": {
14
+ "id": "gpt-5.2",
15
+ "name": "gpt-5.2",
16
+ "api_base": "https://api.openai.com/v1",
17
+ "api_key_env_var": "OPENAI_API_KEY",
18
+ "supports_json": True,
19
+ "supports_logprobs": False,
20
+ "supports_responses": True,
21
+ "api_spec": "openai",
22
+ "input_cost": 1.75,
23
+ "cached_input_cost": 0.175,
24
+ "output_cost": 14.0,
25
+ "reasoning_model": True,
26
+ "supports_xhigh": True,
27
+ },
28
+ "gpt-5.1-codex-max": {
29
+ "id": "gpt-5.1-codex-max",
30
+ "name": "gpt-5.1-codex-max",
31
+ "api_base": "https://api.openai.com/v1",
32
+ "api_key_env_var": "OPENAI_API_KEY",
33
+ "supports_json": True,
34
+ "supports_logprobs": False,
35
+ "supports_responses": True,
36
+ "api_spec": "openai",
37
+ "input_cost": 1.25,
38
+ "cached_input_cost": 0.125,
39
+ "output_cost": 10.0,
40
+ "reasoning_model": True,
41
+ "supports_xhigh": True,
42
+ },
13
43
  "gpt-5.1": {
14
44
  "id": "gpt-5.1",
15
45
  "name": "gpt-5.1",
16
46
  "api_base": "https://api.openai.com/v1",
17
47
  "api_key_env_var": "OPENAI_API_KEY",
18
- "supports_json": False,
48
+ "supports_json": True,
19
49
  "supports_logprobs": True,
20
50
  "supports_responses": True,
21
51
  "api_spec": "openai",
@@ -1,13 +1,25 @@
1
1
  OPENROUTER_MODELS = {
2
+ "intellect-3-openrouter": {
3
+ "id": "intellect-3-openrouter",
4
+ "name": "prime-intellect/intellect-3",
5
+ "api_base": "https://openrouter.ai/api/v1",
6
+ "api_key_env_var": "OPENROUTER_API_KEY",
7
+ "supports_json": True,
8
+ "api_spec": "openai",
9
+ "input_cost": 0.2,
10
+ "cached_input_cost": 0.2,
11
+ "cache_write_cost": 0.2,
12
+ "output_cost": 1.10,
13
+ },
2
14
  "glm-4.6-openrouter": {
3
15
  "id": "glm-4.6-openrouter",
4
- "name": "z-ai/glm-4.6",
16
+ "name": "z-ai/glm-4.6:exacto",
5
17
  "api_base": "https://openrouter.ai/api/v1",
6
18
  "api_key_env_var": "OPENROUTER_API_KEY",
7
19
  "supports_json": True,
8
20
  "api_spec": "openai",
9
21
  "input_cost": 0.6,
10
- "cached_input_cost": 0.11,
22
+ "cached_input_cost": 0.6,
11
23
  "cache_write_cost": 0.6,
12
24
  "output_cost": 2.20,
13
25
  },
@@ -35,9 +47,21 @@ OPENROUTER_MODELS = {
35
47
  "cache_write_cost": 0.23,
36
48
  "output_cost": 0.9,
37
49
  },
50
+ "deepseek-3.2-exp-openrouter": {
51
+ "id": "deepseek-3.2-exp-openrouter",
52
+ "name": "deepseek/deepseek-v3.2-exp",
53
+ "api_base": "https://openrouter.ai/api/v1",
54
+ "api_key_env_var": "OPENROUTER_API_KEY",
55
+ "supports_json": True,
56
+ "api_spec": "openai",
57
+ "input_cost": 0.27,
58
+ "cached_input_cost": 0.27,
59
+ "cache_write_cost": 0.27,
60
+ "output_cost": 0.4,
61
+ },
38
62
  "deepseek-3.2-openrouter": {
39
63
  "id": "deepseek-3.2-openrouter",
40
- "name": "deepseek/deepseek-v3.2-exp",
64
+ "name": "deepseek/deepseek-v3.2",
41
65
  "api_base": "https://openrouter.ai/api/v1",
42
66
  "api_key_env_var": "OPENROUTER_API_KEY",
43
67
  "supports_json": True,
@@ -47,19 +71,53 @@ OPENROUTER_MODELS = {
47
71
  "cache_write_cost": 0.27,
48
72
  "output_cost": 0.4,
49
73
  },
50
- # "gpt-oss-20b-openrouter": {},
51
- # "gpt-oss-120b-openrouter": {},
74
+ "gpt-oss-20b-openrouter": {
75
+ "id": "gpt-oss-20b-openrouter",
76
+ "name": "openai/gpt-oss-20b",
77
+ "api_base": "https://openrouter.ai/api/v1",
78
+ "api_key_env_var": "OPENROUTER_API_KEY",
79
+ "supports_json": True,
80
+ "api_spec": "openai",
81
+ "input_cost": 0.04,
82
+ "cached_input_cost": 0.04,
83
+ "cache_write_cost": 0.04,
84
+ "output_cost": 0.18,
85
+ },
86
+ "gpt-oss-120b-openrouter": {
87
+ "id": "gpt-oss-120b-openrouter",
88
+ "name": "openai/gpt-oss-120b",
89
+ "api_base": "https://openrouter.ai/api/v1",
90
+ "api_key_env_var": "OPENROUTER_API_KEY",
91
+ "supports_json": True,
92
+ "api_spec": "openai",
93
+ "input_cost": 0.05,
94
+ "cached_input_cost": 0.05,
95
+ "cache_write_cost": 0.05,
96
+ "output_cost": 0.45,
97
+ },
52
98
  "kimi-k2-openrouter": {
53
99
  "id": "kimi-k2-openrouter",
54
- "name": "z-ai/glm-4.6",
100
+ "name": "moonshotai/kimi-k2-0905:exacto",
101
+ "api_base": "https://openrouter.ai/api/v1",
102
+ "api_key_env_var": "OPENROUTER_API_KEY",
103
+ "supports_json": True,
104
+ "api_spec": "openai",
105
+ "input_cost": 1,
106
+ "cached_input_cost": 1,
107
+ "cache_write_cost": 1,
108
+ "output_cost": 3,
109
+ },
110
+ "kimi-k2-thinking-openrouter": {
111
+ "id": "kimi-k2-thinking-openrouter",
112
+ "name": "moonshotai/kimi-k2-thinking",
55
113
  "api_base": "https://openrouter.ai/api/v1",
56
114
  "api_key_env_var": "OPENROUTER_API_KEY",
57
115
  "supports_json": True,
58
116
  "api_spec": "openai",
59
117
  "input_cost": 0.6,
60
- "cached_input_cost": 0.11,
118
+ "cached_input_cost": 0.6,
61
119
  "cache_write_cost": 0.6,
62
- "output_cost": 2.20,
120
+ "output_cost": 2.5,
63
121
  },
64
122
  "olmo-3-32b-think-openrouter": {
65
123
  "id": "olmo-3-32b-think-openrouter",
@@ -0,0 +1,95 @@
1
+ """
2
+ GEPA (Genetic Pareto) prompt optimizer for lm-deluge.
3
+
4
+ This module provides an evolutionary optimizer for text components in AI systems.
5
+ It analyzes whole trajectories to propose improvements to prompts, tool descriptions,
6
+ and other text-based configuration.
7
+
8
+ Example usage:
9
+ from lm_deluge import LLMClient
10
+ from lm_deluge.prompt import Conversation, Message
11
+ from lm_deluge.pipelines.gepa import Component, EvalResult, optimize
12
+
13
+ # Define components to optimize
14
+ components = {
15
+ "system_prompt": Component(
16
+ description="Instructions given to the model",
17
+ value="You are a helpful assistant.",
18
+ ),
19
+ }
20
+
21
+ # Define how to evaluate one example
22
+ def evaluate(client: LLMClient, values: dict[str, str], example: dict) -> EvalResult:
23
+ # Build prompt with current component values
24
+ conv = Conversation.system(values["system_prompt"])
25
+ conv = conv.add(Message.user(example["question"]))
26
+
27
+ # Run inference
28
+ response = client.process_prompts_sync([conv], show_progress=False)[0]
29
+ answer = response.completion
30
+
31
+ # Score the result
32
+ correct = example["answer"].lower() in answer.lower()
33
+ score = 1.0 if correct else 0.0
34
+
35
+ # Build feedback for the proposer
36
+ feedback = f"Score: {score}. Expected: {example['answer']}"
37
+
38
+ # Return full trajectory
39
+ full_conv = conv.add(Message.ai(answer))
40
+ return EvalResult(conversation=full_conv, score=score, feedback=feedback)
41
+
42
+ # Run optimization
43
+ result = optimize(
44
+ components=components,
45
+ evaluate_fn=evaluate,
46
+ dataset=train_examples,
47
+ task_client=LLMClient("gpt-4o-mini"),
48
+ proposer_client=LLMClient("gpt-4o"),
49
+ max_iterations=50,
50
+ )
51
+
52
+ print(f"Best score: {result.best_score}")
53
+ print(f"Best prompt: {result.best_candidate['system_prompt']}")
54
+ """
55
+
56
+ from lm_deluge.pipelines.gepa.core import (
57
+ Component,
58
+ EvalResult,
59
+ GEPAResult,
60
+ GEPAState,
61
+ Proposal,
62
+ )
63
+ from lm_deluge.pipelines.gepa.optimizer import GEPAEngine, optimize
64
+ from lm_deluge.pipelines.gepa.proposer import (
65
+ DEFAULT_PROPOSAL_PROMPT,
66
+ build_proposal_prompt,
67
+ parse_proposal_response,
68
+ propose_improvement_sync,
69
+ )
70
+ from lm_deluge.pipelines.gepa.util import (
71
+ extract_text_from_response,
72
+ format_components_for_prompt,
73
+ format_conversation_compact,
74
+ )
75
+
76
+ __all__ = [
77
+ # Core types
78
+ "Component",
79
+ "EvalResult",
80
+ "Proposal",
81
+ "GEPAState",
82
+ "GEPAResult",
83
+ # Main API
84
+ "optimize",
85
+ "GEPAEngine",
86
+ # Proposer utilities
87
+ "DEFAULT_PROPOSAL_PROMPT",
88
+ "build_proposal_prompt",
89
+ "parse_proposal_response",
90
+ "propose_improvement_sync",
91
+ # Formatting utilities
92
+ "format_conversation_compact",
93
+ "format_components_for_prompt",
94
+ "extract_text_from_response",
95
+ ]