lm-deluge 0.0.89__py3-none-any.whl → 0.0.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. lm_deluge/__init__.py +3 -27
  2. lm_deluge/api_requests/anthropic.py +29 -7
  3. lm_deluge/api_requests/base.py +38 -1
  4. lm_deluge/api_requests/bedrock.py +29 -3
  5. lm_deluge/{request_context.py → api_requests/context.py} +4 -4
  6. lm_deluge/api_requests/gemini.py +30 -14
  7. lm_deluge/api_requests/mistral.py +1 -1
  8. lm_deluge/api_requests/openai.py +34 -5
  9. lm_deluge/batches.py +19 -49
  10. lm_deluge/cache.py +1 -1
  11. lm_deluge/cli.py +672 -300
  12. lm_deluge/{client.py → client/__init__.py} +42 -13
  13. lm_deluge/config.py +9 -31
  14. lm_deluge/embed.py +2 -6
  15. lm_deluge/models/__init__.py +138 -29
  16. lm_deluge/models/anthropic.py +32 -24
  17. lm_deluge/models/bedrock.py +9 -0
  18. lm_deluge/models/cerebras.py +2 -0
  19. lm_deluge/models/cohere.py +2 -0
  20. lm_deluge/models/google.py +13 -0
  21. lm_deluge/models/grok.py +4 -0
  22. lm_deluge/models/groq.py +2 -0
  23. lm_deluge/models/meta.py +2 -0
  24. lm_deluge/models/minimax.py +9 -1
  25. lm_deluge/models/openai.py +24 -1
  26. lm_deluge/models/openrouter.py +155 -1
  27. lm_deluge/models/together.py +3 -0
  28. lm_deluge/models/zai.py +50 -1
  29. lm_deluge/pipelines/extract.py +4 -5
  30. lm_deluge/pipelines/gepa/__init__.py +1 -1
  31. lm_deluge/pipelines/gepa/docs/samples.py +19 -10
  32. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
  33. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
  34. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
  35. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
  36. lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
  37. lm_deluge/prompt/__init__.py +45 -0
  38. lm_deluge/{prompt.py → prompt/conversation.py} +165 -869
  39. lm_deluge/{image.py → prompt/image.py} +0 -10
  40. lm_deluge/prompt/message.py +571 -0
  41. lm_deluge/prompt/serialization.py +21 -0
  42. lm_deluge/prompt/signatures.py +77 -0
  43. lm_deluge/prompt/text.py +47 -0
  44. lm_deluge/prompt/thinking.py +55 -0
  45. lm_deluge/prompt/tool_calls.py +245 -0
  46. lm_deluge/server/__init__.py +24 -0
  47. lm_deluge/server/__main__.py +144 -0
  48. lm_deluge/server/adapters.py +369 -0
  49. lm_deluge/server/app.py +388 -0
  50. lm_deluge/server/auth.py +71 -0
  51. lm_deluge/server/model_policy.py +215 -0
  52. lm_deluge/server/models_anthropic.py +172 -0
  53. lm_deluge/server/models_openai.py +175 -0
  54. lm_deluge/skills/anthropic.py +0 -0
  55. lm_deluge/skills/compat.py +0 -0
  56. lm_deluge/tool/__init__.py +78 -19
  57. lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
  58. lm_deluge/tool/cua/actions.py +26 -26
  59. lm_deluge/tool/cua/batch.py +1 -2
  60. lm_deluge/tool/cua/kernel.py +1 -1
  61. lm_deluge/tool/prefab/filesystem.py +2 -2
  62. lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
  63. lm_deluge/tool/prefab/memory.py +3 -1
  64. lm_deluge/tool/prefab/otc/executor.py +3 -3
  65. lm_deluge/tool/prefab/random.py +30 -54
  66. lm_deluge/tool/prefab/rlm/__init__.py +2 -2
  67. lm_deluge/tool/prefab/rlm/executor.py +1 -1
  68. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  69. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  70. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  71. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  72. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  73. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +829 -0
  74. lm_deluge/tool/prefab/skills.py +0 -0
  75. lm_deluge/tool/prefab/subagents.py +1 -1
  76. lm_deluge/util/logprobs.py +4 -4
  77. lm_deluge/util/schema.py +6 -6
  78. lm_deluge/util/validation.py +14 -9
  79. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/METADATA +12 -12
  80. lm_deluge-0.0.91.dist-info/RECORD +140 -0
  81. lm_deluge-0.0.91.dist-info/entry_points.txt +3 -0
  82. lm_deluge/mock_openai.py +0 -643
  83. lm_deluge/tool/prefab/sandbox.py +0 -1621
  84. lm_deluge-0.0.89.dist-info/RECORD +0 -117
  85. /lm_deluge/{file.py → prompt/file.py} +0 -0
  86. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/WHEEL +0 -0
  87. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/licenses/LICENSE +0 -0
  88. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,7 @@ GOOGLE_MODELS = {
21
21
  "cached_input_cost": 0.025,
22
22
  "output_cost": 0.4,
23
23
  "reasoning_model": False,
24
+ "supports_images": True,
24
25
  },
25
26
  "gemini-2.0-flash-lite-compat": {
26
27
  "id": "gemini-2.0-flash-lite-compat",
@@ -33,6 +34,7 @@ GOOGLE_MODELS = {
33
34
  "input_cost": 0.075,
34
35
  "output_cost": 0.3,
35
36
  "reasoning_model": False,
37
+ "supports_images": True,
36
38
  },
37
39
  "gemini-2.5-pro-compat": {
38
40
  "id": "gemini-2.5-pro-compat",
@@ -46,6 +48,7 @@ GOOGLE_MODELS = {
46
48
  "cached_input_cost": 0.31,
47
49
  "output_cost": 10.0,
48
50
  "reasoning_model": True,
51
+ "supports_images": True,
49
52
  },
50
53
  "gemini-2.5-flash-compat": {
51
54
  "id": "gemini-2.5-flash-compat",
@@ -59,6 +62,7 @@ GOOGLE_MODELS = {
59
62
  "cached_input_cost": 0.075,
60
63
  "output_cost": 2.5,
61
64
  "reasoning_model": True,
65
+ "supports_images": True,
62
66
  },
63
67
  "gemini-2.5-flash-lite-compat": {
64
68
  "id": "gemini-2.5-flash-lite-compat",
@@ -72,6 +76,7 @@ GOOGLE_MODELS = {
72
76
  "cached_input_cost": 0.025,
73
77
  "output_cost": 0.4,
74
78
  "reasoning_model": True,
79
+ "supports_images": True,
75
80
  },
76
81
  # Native Gemini API versions with file support
77
82
  "gemini-2.0-flash": {
@@ -86,6 +91,7 @@ GOOGLE_MODELS = {
86
91
  "cached_input_cost": 0.025,
87
92
  "output_cost": 0.4,
88
93
  "reasoning_model": False,
94
+ "supports_images": True,
89
95
  },
90
96
  "gemini-2.0-flash-lite": {
91
97
  "id": "gemini-2.0-flash-lite",
@@ -98,6 +104,7 @@ GOOGLE_MODELS = {
98
104
  "input_cost": 0.075,
99
105
  "output_cost": 0.3,
100
106
  "reasoning_model": False,
107
+ "supports_images": True,
101
108
  },
102
109
  "gemini-2.5-pro": {
103
110
  "id": "gemini-2.5-pro",
@@ -111,6 +118,7 @@ GOOGLE_MODELS = {
111
118
  "cached_input_cost": 0.31,
112
119
  "output_cost": 10.0,
113
120
  "reasoning_model": True,
121
+ "supports_images": True,
114
122
  },
115
123
  "gemini-2.5-flash": {
116
124
  "id": "gemini-2.5-flash",
@@ -124,6 +132,7 @@ GOOGLE_MODELS = {
124
132
  "cached_input_cost": 0.075,
125
133
  "output_cost": 2.5,
126
134
  "reasoning_model": True,
135
+ "supports_images": True,
127
136
  },
128
137
  "gemini-2.5-flash-lite": {
129
138
  "id": "gemini-2.5-flash-lite",
@@ -137,6 +146,7 @@ GOOGLE_MODELS = {
137
146
  "cached_input_cost": 0.025,
138
147
  "output_cost": 0.4,
139
148
  "reasoning_model": True,
149
+ "supports_images": True,
140
150
  },
141
151
  # Gemini 3 models - advanced reasoning with thought signatures
142
152
  "gemini-3-pro-preview": {
@@ -152,6 +162,7 @@ GOOGLE_MODELS = {
152
162
  "output_cost": 12.0, # <200k tokens
153
163
  # Note: >200k tokens pricing is $4/$18 per million
154
164
  "reasoning_model": True,
165
+ "supports_images": True,
155
166
  },
156
167
  "gemini-3-flash-preview": {
157
168
  "id": "gemini-3-flash-preview",
@@ -165,6 +176,7 @@ GOOGLE_MODELS = {
165
176
  "cached_input_cost": 0.125, # estimated
166
177
  "output_cost": 3.0,
167
178
  "reasoning_model": True,
179
+ "supports_images": True,
168
180
  },
169
181
  # Gemini 2.5 Computer Use model
170
182
  "gemini-2.5-computer-use": {
@@ -179,5 +191,6 @@ GOOGLE_MODELS = {
179
191
  "cached_input_cost": 0.31,
180
192
  "output_cost": 10.0,
181
193
  "reasoning_model": True,
194
+ "supports_images": True,
182
195
  },
183
196
  }
lm_deluge/models/grok.py CHANGED
@@ -18,6 +18,7 @@ XAI_MODELS = {
18
18
  "input_cost": 0.2,
19
19
  "output_cost": 1.5,
20
20
  "reasoning_model": False,
21
+ "supports_images": True,
21
22
  },
22
23
  "grok-4.1-fast": {
23
24
  "id": "grok-4.1-fast",
@@ -30,6 +31,7 @@ XAI_MODELS = {
30
31
  "input_cost": 0.2,
31
32
  "output_cost": 1.5,
32
33
  "reasoning_model": False,
34
+ "supports_images": True,
33
35
  },
34
36
  "grok-code-fast-1": {
35
37
  "id": "grok-code-fast-1",
@@ -54,6 +56,7 @@ XAI_MODELS = {
54
56
  "input_cost": 0.2,
55
57
  "output_cost": 0.5,
56
58
  "reasoning_model": False,
59
+ "supports_images": True,
57
60
  },
58
61
  "grok-4-fast-non-reasoning": {
59
62
  "id": "grok-4-fast-non-reasoning",
@@ -66,6 +69,7 @@ XAI_MODELS = {
66
69
  "input_cost": 0.2,
67
70
  "output_cost": 0.5,
68
71
  "reasoning_model": False,
72
+ "supports_images": True,
69
73
  },
70
74
  "grok-4": {
71
75
  "id": "grok-4",
lm_deluge/models/groq.py CHANGED
@@ -30,6 +30,7 @@ GROQ_MODELS = {
30
30
  "api_key_env_var": "GROQ_API_KEY",
31
31
  "supports_json": False,
32
32
  "api_spec": "openai",
33
+ "supports_images": True,
33
34
  },
34
35
  "llama-4-scout-groq": {
35
36
  "id": "llama-4-scout-groq",
@@ -38,6 +39,7 @@ GROQ_MODELS = {
38
39
  "api_key_env_var": "GROQ_API_KEY",
39
40
  "supports_json": False,
40
41
  "api_spec": "openai",
42
+ "supports_images": True,
41
43
  },
42
44
  "kimi-k2-groq": {
43
45
  "id": "kimi-k2-groq",
lm_deluge/models/meta.py CHANGED
@@ -17,6 +17,7 @@ META_MODELS = {
17
17
  "input_cost": 0.0,
18
18
  "output_cost": 0.0,
19
19
  "reasoning_model": False,
20
+ "supports_images": True,
20
21
  },
21
22
  "llama-4-maverick": {
22
23
  "id": "llama-4-maverick",
@@ -29,6 +30,7 @@ META_MODELS = {
29
30
  "input_cost": 0.0,
30
31
  "output_cost": 0.0,
31
32
  "reasoning_model": False,
33
+ "supports_images": True,
32
34
  },
33
35
  "llama-3.3-70b": {
34
36
  "id": "llama-3.3-70b",
@@ -1,4 +1,12 @@
1
1
  MINIMAX_MODELS = {
2
+ "minimax-m2.1": {
3
+ "id": "minimax-m2.1",
4
+ "name": "MiniMax-M2.1",
5
+ "api_base": "https://api.minimax.io/anthropic/v1",
6
+ "api_key_env_var": "MINIMAX_API_KEY",
7
+ "supports_json": False,
8
+ "api_spec": "anthropic",
9
+ },
2
10
  "minimax-m2": {
3
11
  "id": "minimax-m2",
4
12
  "name": "MiniMax-M2",
@@ -6,5 +14,5 @@ MINIMAX_MODELS = {
6
14
  "api_key_env_var": "MINIMAX_API_KEY",
7
15
  "supports_json": False,
8
16
  "api_spec": "anthropic",
9
- }
17
+ },
10
18
  }
@@ -24,6 +24,7 @@ OPENAI_MODELS = {
24
24
  "output_cost": 14.0,
25
25
  "reasoning_model": True,
26
26
  "supports_xhigh": True,
27
+ "supports_images": True,
27
28
  },
28
29
  "gpt-5.1-codex-max": {
29
30
  "id": "gpt-5.1-codex-max",
@@ -31,7 +32,6 @@ OPENAI_MODELS = {
31
32
  "api_base": "https://api.openai.com/v1",
32
33
  "api_key_env_var": "OPENAI_API_KEY",
33
34
  "supports_json": True,
34
- "supports_logprobs": False,
35
35
  "supports_responses": True,
36
36
  "api_spec": "openai",
37
37
  "input_cost": 1.25,
@@ -39,6 +39,7 @@ OPENAI_MODELS = {
39
39
  "output_cost": 10.0,
40
40
  "reasoning_model": True,
41
41
  "supports_xhigh": True,
42
+ "supports_images": True,
42
43
  },
43
44
  "gpt-5.1": {
44
45
  "id": "gpt-5.1",
@@ -53,6 +54,7 @@ OPENAI_MODELS = {
53
54
  "cached_input_cost": 0.125,
54
55
  "output_cost": 10.0,
55
56
  "reasoning_model": True,
57
+ "supports_images": True,
56
58
  },
57
59
  "gpt-5.1-codex": {
58
60
  "id": "gpt-5.1-codex",
@@ -67,6 +69,7 @@ OPENAI_MODELS = {
67
69
  "cached_input_cost": 0.125,
68
70
  "output_cost": 10.0,
69
71
  "reasoning_model": True,
72
+ "supports_images": True,
70
73
  },
71
74
  "gpt-5.1-codex-mini": {
72
75
  "id": "gpt-5.1-codex-mini",
@@ -95,6 +98,7 @@ OPENAI_MODELS = {
95
98
  "cached_input_cost": 0.125,
96
99
  "output_cost": 10.0,
97
100
  "reasoning_model": True,
101
+ "supports_images": True,
98
102
  },
99
103
  "gpt-5": {
100
104
  "id": "gpt-5",
@@ -109,6 +113,7 @@ OPENAI_MODELS = {
109
113
  "cached_input_cost": 0.125,
110
114
  "output_cost": 10.0,
111
115
  "reasoning_model": True,
116
+ "supports_images": True,
112
117
  },
113
118
  "gpt-5-chat": {
114
119
  "id": "gpt-5-chat",
@@ -123,6 +128,7 @@ OPENAI_MODELS = {
123
128
  "cached_input_cost": 0.125,
124
129
  "output_cost": 10.0,
125
130
  "reasoning_model": False,
131
+ "supports_images": True,
126
132
  },
127
133
  "gpt-5-mini": {
128
134
  "id": "gpt-5-mini",
@@ -137,6 +143,7 @@ OPENAI_MODELS = {
137
143
  "cached_input_cost": 0.025,
138
144
  "output_cost": 2.0,
139
145
  "reasoning_model": True,
146
+ "supports_images": True,
140
147
  },
141
148
  "gpt-5-nano": {
142
149
  "id": "gpt-5-nano",
@@ -151,6 +158,7 @@ OPENAI_MODELS = {
151
158
  "cached_input_cost": 0.005,
152
159
  "output_cost": 0.40,
153
160
  "reasoning_model": True,
161
+ "supports_images": True,
154
162
  },
155
163
  "openai-computer-use-preview": {
156
164
  "id": "openai-computer-use-preview",
@@ -178,6 +186,7 @@ OPENAI_MODELS = {
178
186
  "cached_input_cost": 0.375,
179
187
  "output_cost": 6.0,
180
188
  "reasoning_model": True,
189
+ "supports_images": True,
181
190
  },
182
191
  "o4-mini-deep-research": {
183
192
  "id": "o4-mini-deep-research",
@@ -192,6 +201,7 @@ OPENAI_MODELS = {
192
201
  "cached_input_cost": 0.5,
193
202
  "output_cost": 8.0,
194
203
  "reasoning_model": True,
204
+ "supports_images": True,
195
205
  },
196
206
  "o3-deep-research": {
197
207
  "id": "o3-deep-research",
@@ -206,6 +216,7 @@ OPENAI_MODELS = {
206
216
  "cached_input_cost": 2.50,
207
217
  "output_cost": 40.0,
208
218
  "reasoning_model": True,
219
+ "supports_images": True,
209
220
  },
210
221
  "o3": {
211
222
  "id": "o3",
@@ -220,6 +231,7 @@ OPENAI_MODELS = {
220
231
  "cached_input_cost": 0.50,
221
232
  "output_cost": 8.0,
222
233
  "reasoning_model": True,
234
+ "supports_images": True,
223
235
  },
224
236
  "o4-mini": {
225
237
  "id": "o4-mini",
@@ -234,6 +246,7 @@ OPENAI_MODELS = {
234
246
  "cached_input_cost": 0.275,
235
247
  "output_cost": 4.4,
236
248
  "reasoning_model": True,
249
+ "supports_images": True,
237
250
  },
238
251
  "gpt-4.1": {
239
252
  "id": "gpt-4.1",
@@ -248,6 +261,7 @@ OPENAI_MODELS = {
248
261
  "cached_input_cost": 0.50,
249
262
  "output_cost": 8.0,
250
263
  "reasoning_model": False,
264
+ "supports_images": True,
251
265
  },
252
266
  "gpt-4.1-mini": {
253
267
  "id": "gpt-4.1-mini",
@@ -262,6 +276,7 @@ OPENAI_MODELS = {
262
276
  "cached_input_cost": 0.10,
263
277
  "output_cost": 1.6,
264
278
  "reasoning_model": False,
279
+ "supports_images": True,
265
280
  },
266
281
  "gpt-4.1-nano": {
267
282
  "id": "gpt-4.1-nano",
@@ -276,6 +291,7 @@ OPENAI_MODELS = {
276
291
  "cached_input_cost": 0.025,
277
292
  "output_cost": 0.4,
278
293
  "reasoning_model": False,
294
+ "supports_images": True,
279
295
  },
280
296
  "gpt-4.5": {
281
297
  "id": "gpt-4.5",
@@ -289,6 +305,7 @@ OPENAI_MODELS = {
289
305
  "input_cost": 75.0,
290
306
  "output_cost": 150.0,
291
307
  "reasoning_model": False,
308
+ "supports_images": True,
292
309
  },
293
310
  "o3-mini": {
294
311
  "id": "o3-mini",
@@ -317,6 +334,7 @@ OPENAI_MODELS = {
317
334
  "cached_input_cost": 7.50,
318
335
  "output_cost": 60.0,
319
336
  "reasoning_model": True,
337
+ "supports_images": True,
320
338
  },
321
339
  "o1-preview": {
322
340
  "id": "o1-preview",
@@ -357,6 +375,7 @@ OPENAI_MODELS = {
357
375
  "input_cost": 2.50,
358
376
  "cached_input_cost": 1.25,
359
377
  "output_cost": 10.0,
378
+ "supports_images": True,
360
379
  },
361
380
  "gpt-4o-mini": {
362
381
  "id": "gpt-4o-mini",
@@ -370,6 +389,7 @@ OPENAI_MODELS = {
370
389
  "input_cost": 0.15,
371
390
  "cached_input_cost": 0.075,
372
391
  "output_cost": 0.6,
392
+ "supports_images": True,
373
393
  },
374
394
  "gpt-3.5-turbo": {
375
395
  "id": "gpt-3.5-turbo",
@@ -394,6 +414,7 @@ OPENAI_MODELS = {
394
414
  "api_spec": "openai",
395
415
  "input_cost": 10.0,
396
416
  "output_cost": 30.0,
417
+ "supports_images": True,
397
418
  },
398
419
  "gpt-4": {
399
420
  "id": "gpt-4",
@@ -406,6 +427,7 @@ OPENAI_MODELS = {
406
427
  "api_spec": "openai",
407
428
  "input_cost": 30.0,
408
429
  "output_cost": 60.0,
430
+ "supports_images": True,
409
431
  },
410
432
  "gpt-4-32k": {
411
433
  "id": "gpt-4-32k",
@@ -418,5 +440,6 @@ OPENAI_MODELS = {
418
440
  "api_spec": "openai",
419
441
  "input_cost": 60.0,
420
442
  "output_cost": 120.0,
443
+ "supports_images": True,
421
444
  },
422
445
  }
@@ -1,4 +1,90 @@
1
1
  OPENROUTER_MODELS = {
2
+ "nemotron-3-nano-30b-a3b-free": {
3
+ "id": "nemotron-3-nano-30b-a3b-free",
4
+ "name": "nvidia/nemotron-3-nano-30b-a3b:free",
5
+ "api_base": "https://openrouter.ai/api/v1",
6
+ "api_key_env_var": "OPENROUTER_API_KEY",
7
+ "supports_json": True,
8
+ "api_spec": "openai",
9
+ "input_cost": 0.0,
10
+ "cached_input_cost": 0.0,
11
+ "cache_write_cost": 0.0,
12
+ "output_cost": 0.0,
13
+ },
14
+ "nemotron-3-nano-30b-a3b": {
15
+ "id": "nemotron-3-nano-30b-a3b",
16
+ "name": "nvidia/nemotron-3-nano-30b-a3b",
17
+ "api_base": "https://openrouter.ai/api/v1",
18
+ "api_key_env_var": "OPENROUTER_API_KEY",
19
+ "supports_json": True,
20
+ "api_spec": "openai",
21
+ "input_cost": 0.06,
22
+ "cached_input_cost": 0.06,
23
+ "cache_write_cost": 0.06,
24
+ "output_cost": 0.24,
25
+ },
26
+ "nemotron-nano-12b-2-vl-free": {
27
+ "id": "nemotron-nano-12b-2-vl-free",
28
+ "name": "nvidia/nemotron-nano-12b-v2-vl:free",
29
+ "api_base": "https://openrouter.ai/api/v1",
30
+ "api_key_env_var": "OPENROUTER_API_KEY",
31
+ "supports_json": True,
32
+ "api_spec": "openai",
33
+ "input_cost": 0.0,
34
+ "cached_input_cost": 0.0,
35
+ "cache_write_cost": 0.0,
36
+ "output_cost": 0.0,
37
+ "supports_images": True,
38
+ },
39
+ "nemotron-nano-12b-2-vl": {
40
+ "id": "nemotron-nano-12b-2-vl",
41
+ "name": "nvidia/nemotron-nano-12b-v2-vl",
42
+ "api_base": "https://openrouter.ai/api/v1",
43
+ "api_key_env_var": "OPENROUTER_API_KEY",
44
+ "supports_json": True,
45
+ "api_spec": "openai",
46
+ "input_cost": 0.0,
47
+ "cached_input_cost": 0.0,
48
+ "cache_write_cost": 0.0,
49
+ "output_cost": 0.0,
50
+ "supports_images": True,
51
+ },
52
+ "devstral-2-free-openrouter": {
53
+ "id": "devstral-2-free-openrouter",
54
+ "name": "mistralai/devstral-2512:free",
55
+ "api_base": "https://openrouter.ai/api/v1",
56
+ "api_key_env_var": "OPENROUTER_API_KEY",
57
+ "supports_json": True,
58
+ "api_spec": "openai",
59
+ "input_cost": 0.0,
60
+ "cached_input_cost": 0.0,
61
+ "cache_write_cost": 0.0,
62
+ "output_cost": 0.0,
63
+ },
64
+ "devstral-2-openrouter": {
65
+ "id": "devstral-2-openrouter",
66
+ "name": "mistralai/devstral-2512",
67
+ "api_base": "https://openrouter.ai/api/v1",
68
+ "api_key_env_var": "OPENROUTER_API_KEY",
69
+ "supports_json": True,
70
+ "api_spec": "openai",
71
+ "input_cost": 0.05,
72
+ "cached_input_cost": 0.05,
73
+ "cache_write_cost": 0.05,
74
+ "output_cost": 0.22,
75
+ },
76
+ "mimo-flash-v2-free-openrouter": {
77
+ "id": "mimo-flash-v2-free-openrouter",
78
+ "name": "xiaomi/mimo-v2-flash:free",
79
+ "api_base": "https://openrouter.ai/api/v1",
80
+ "api_key_env_var": "OPENROUTER_API_KEY",
81
+ "supports_json": True,
82
+ "api_spec": "openai",
83
+ "input_cost": 0.0,
84
+ "cached_input_cost": 0.0,
85
+ "cache_write_cost": 0.0,
86
+ "output_cost": 0.0,
87
+ },
2
88
  "intellect-3-openrouter": {
3
89
  "id": "intellect-3-openrouter",
4
90
  "name": "prime-intellect/intellect-3",
@@ -83,6 +169,18 @@ OPENROUTER_MODELS = {
83
169
  "cache_write_cost": 0.04,
84
170
  "output_cost": 0.18,
85
171
  },
172
+ "gpt-oss-20b-free-openrouter": {
173
+ "id": "gpt-oss-20b-openrouter",
174
+ "name": "openai/gpt-oss-20b:free",
175
+ "api_base": "https://openrouter.ai/api/v1",
176
+ "api_key_env_var": "OPENROUTER_API_KEY",
177
+ "supports_json": True,
178
+ "api_spec": "openai",
179
+ "input_cost": 0.0,
180
+ "cached_input_cost": 0.0,
181
+ "cache_write_cost": 0.0,
182
+ "output_cost": 0.0,
183
+ },
86
184
  "gpt-oss-120b-openrouter": {
87
185
  "id": "gpt-oss-120b-openrouter",
88
186
  "name": "openai/gpt-oss-120b",
@@ -95,6 +193,18 @@ OPENROUTER_MODELS = {
95
193
  "cache_write_cost": 0.05,
96
194
  "output_cost": 0.45,
97
195
  },
196
+ "gpt-oss-120b-free-openrouter": {
197
+ "id": "gpt-oss-120b-free-openrouter",
198
+ "name": "openai/gpt-oss-120b:free",
199
+ "api_base": "https://openrouter.ai/api/v1",
200
+ "api_key_env_var": "OPENROUTER_API_KEY",
201
+ "supports_json": True,
202
+ "api_spec": "openai",
203
+ "input_cost": 0.00,
204
+ "cached_input_cost": 0.00,
205
+ "cache_write_cost": 0.00,
206
+ "output_cost": 0.0,
207
+ },
98
208
  "kimi-k2-openrouter": {
99
209
  "id": "kimi-k2-openrouter",
100
210
  "name": "moonshotai/kimi-k2-0905:exacto",
@@ -129,9 +239,29 @@ OPENROUTER_MODELS = {
129
239
  "input_cost": 0.2,
130
240
  "output_cost": 35,
131
241
  },
242
+ "olmo-3.1-32b-think-openrouter": {
243
+ "id": "olmo-3.1-32b-think-openrouter",
244
+ "name": "allenai/olmo-3.1-32b-think:free",
245
+ "api_base": "https://openrouter.ai/api/v1",
246
+ "api_key_env_var": "OPENROUTER_API_KEY",
247
+ "supports_json": True,
248
+ "api_spec": "openai",
249
+ "input_cost": 0.2,
250
+ "output_cost": 35,
251
+ },
252
+ "trinity-mini-free-openrouter": {
253
+ "id": "trinity-mini-free-openrouter",
254
+ "name": "arcee-ai/trinity-mini:free",
255
+ "api_base": "https://openrouter.ai/api/v1",
256
+ "api_key_env_var": "OPENROUTER_API_KEY",
257
+ "supports_json": True,
258
+ "api_spec": "openai",
259
+ "input_cost": 0.045,
260
+ "output_cost": 0.15,
261
+ },
132
262
  "trinity-mini-openrouter": {
133
263
  "id": "trinity-mini-openrouter",
134
- "name": "arcee-ai/trinity-mini:free",
264
+ "name": "arcee-ai/trinity-mini",
135
265
  "api_base": "https://openrouter.ai/api/v1",
136
266
  "api_key_env_var": "OPENROUTER_API_KEY",
137
267
  "supports_json": True,
@@ -139,4 +269,28 @@ OPENROUTER_MODELS = {
139
269
  "input_cost": 0.045,
140
270
  "output_cost": 0.15,
141
271
  },
272
+ "glm-4.7-openrouter": {
273
+ "id": "glm-4.7-openrouter",
274
+ "name": "z-ai/glm-4.7",
275
+ "api_base": "https://openrouter.ai/api/v1",
276
+ "api_key_env_var": "OPENROUTER_API_KEY",
277
+ "supports_json": True,
278
+ "api_spec": "openai",
279
+ "input_cost": 0.6,
280
+ "cached_input_cost": 0.6,
281
+ "cache_write_cost": 0.6,
282
+ "output_cost": 2.20,
283
+ },
284
+ "minimax-m2.1-openrouter": {
285
+ "id": "minimax-m2.1-openrouter",
286
+ "name": "minimax/minimax-m2.1",
287
+ "api_base": "https://openrouter.ai/api/v1",
288
+ "api_key_env_var": "OPENROUTER_API_KEY",
289
+ "supports_json": True,
290
+ "api_spec": "openai",
291
+ "input_cost": 0.3,
292
+ "cached_input_cost": 0.3,
293
+ "cache_write_cost": 0.3,
294
+ "output_cost": 1.20,
295
+ },
142
296
  }
@@ -50,6 +50,7 @@ TOGETHER_MODELS = {
50
50
  "api_spec": "openai",
51
51
  "input_cost": 1.95,
52
52
  "output_cost": 8.0,
53
+ "supports_images": True,
53
54
  },
54
55
  "llama-4-maverick-together": {
55
56
  "id": "llama-4-maverick-together",
@@ -60,6 +61,7 @@ TOGETHER_MODELS = {
60
61
  "api_spec": "openai",
61
62
  "input_cost": 0.27,
62
63
  "output_cost": 0.85,
64
+ "supports_images": True,
63
65
  },
64
66
  "llama-4-scout-together": {
65
67
  "id": "llama-4-scout-together",
@@ -70,6 +72,7 @@ TOGETHER_MODELS = {
70
72
  "api_spec": "openai",
71
73
  "input_cost": 0.18,
72
74
  "output_cost": 0.59,
75
+ "supports_images": True,
73
76
  },
74
77
  "gpt-oss-120b-together": {
75
78
  "id": "gpt-oss-120b-together",
lm_deluge/models/zai.py CHANGED
@@ -1 +1,50 @@
1
- ZAI_MODELS = {}
1
+ ZAI_MODELS = {
2
+ "glm-4.7": {
3
+ "id": "glm-4.7",
4
+ "name": "glm-4.7",
5
+ "api_base": "https://api.z.ai/api/anthropic/v1",
6
+ "api_key_env_var": "ZAI_API_KEY",
7
+ "supports_json": True,
8
+ "api_spec": "anthropic",
9
+ "input_cost": 0.6,
10
+ "cached_input_cost": 0.6,
11
+ "cache_write_cost": 0.6,
12
+ "output_cost": 2.20,
13
+ },
14
+ "glm-4.6": {
15
+ "id": "glm-4.6",
16
+ "name": "glm-4.6",
17
+ "api_base": "https://api.z.ai/api/anthropic/v1",
18
+ "api_key_env_var": "ZAI_API_KEY",
19
+ "supports_json": True,
20
+ "api_spec": "anthropic",
21
+ "input_cost": 0.6,
22
+ "cached_input_cost": 0.6,
23
+ "cache_write_cost": 0.6,
24
+ "output_cost": 2.20,
25
+ },
26
+ "glm-4.5": {
27
+ "id": "glm-4.5",
28
+ "name": "glm-4.5",
29
+ "api_base": "https://api.z.ai/api/anthropic/v1",
30
+ "api_key_env_var": "ZAI_API_KEY",
31
+ "supports_json": True,
32
+ "api_spec": "anthropic",
33
+ "input_cost": 0.6,
34
+ "cached_input_cost": 0.6,
35
+ "cache_write_cost": 0.6,
36
+ "output_cost": 2.20,
37
+ },
38
+ "glm-4.5-air": {
39
+ "id": "glm-4.5-air",
40
+ "name": "glm-4.5-air",
41
+ "api_base": "https://api.z.ai/api/anthropic/v1",
42
+ "api_key_env_var": "ZAI_API_KEY",
43
+ "supports_json": True,
44
+ "api_spec": "anthropic",
45
+ "input_cost": 0.6,
46
+ "cached_input_cost": 0.6,
47
+ "cache_write_cost": 0.6,
48
+ "output_cost": 2.20,
49
+ },
50
+ }
@@ -5,15 +5,14 @@ import os
5
5
  from typing import Any
6
6
 
7
7
  from lm_deluge.client import _LLMClient
8
- from lm_deluge.file import File
9
8
 
10
- from ..prompt import Conversation
9
+ from ..prompt import Conversation, File
11
10
  from ..util.json import load_json
12
11
 
13
12
  try:
14
13
  from PIL import Image as PILImage
15
14
  except ImportError:
16
- PILImage = None
15
+ PILImage: Any = None
17
16
 
18
17
 
19
18
  async def extract_async(
@@ -86,13 +85,13 @@ async def extract_async(
86
85
  buffer = io.BytesIO()
87
86
  input.save(buffer, format="PNG")
88
87
  prompts.append(
89
- Conversation.user(text=image_only_prompt, image=buffer.getvalue())
88
+ Conversation().user(text=image_only_prompt, image=buffer.getvalue())
90
89
  )
91
90
  elif isinstance(input, File):
92
91
  data = input.data
93
92
  if isinstance(data, io.BytesIO):
94
93
  data = data.getvalue()
95
- prompts.append(Conversation.user(text=file_prompt, file=data))
94
+ prompts.append(Conversation().user(text=file_prompt, file=data))
96
95
  else:
97
96
  raise ValueError(
98
97
  "inputs must be a list of strings or PIL images or a File object."
@@ -21,7 +21,7 @@ Example usage:
21
21
  # Define how to evaluate one example
22
22
  def evaluate(client: LLMClient, values: dict[str, str], example: dict) -> EvalResult:
23
23
  # Build prompt with current component values
24
- conv = Conversation.system(values["system_prompt"])
24
+ conv = Conversation().system(values["system_prompt"])
25
25
  conv = conv.add(Message.user(example["question"]))
26
26
 
27
27
  # Run inference