mrmd-ai 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mrmd_ai/custom_programs.py +215 -0
- mrmd_ai/juice.py +403 -61
- mrmd_ai/modules/__init__.py +11 -0
- mrmd_ai/modules/edit.py +102 -0
- mrmd_ai/server.py +275 -23
- mrmd_ai/signatures/__init__.py +15 -0
- mrmd_ai/signatures/edit.py +173 -0
- {mrmd_ai-0.1.0.dist-info → mrmd_ai-0.1.2.dist-info}/METADATA +2 -1
- {mrmd_ai-0.1.0.dist-info → mrmd_ai-0.1.2.dist-info}/RECORD +12 -8
- mrmd_ai-0.1.2.dist-info/licenses/LICENSE +21 -0
- {mrmd_ai-0.1.0.dist-info → mrmd_ai-0.1.2.dist-info}/WHEEL +0 -0
- {mrmd_ai-0.1.0.dist-info → mrmd_ai-0.1.2.dist-info}/entry_points.txt +0 -0
mrmd_ai/juice.py
CHANGED
|
@@ -34,6 +34,85 @@ class JuiceLevel(IntEnum):
|
|
|
34
34
|
ULTIMATE = 4
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
class ReasoningLevel(IntEnum):
|
|
38
|
+
"""Independent reasoning/thinking budget control.
|
|
39
|
+
|
|
40
|
+
This is separate from JuiceLevel and controls how much "thinking"
|
|
41
|
+
the model does, independent of which model is selected.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
# No extended thinking - fastest responses
|
|
45
|
+
OFF = 0
|
|
46
|
+
|
|
47
|
+
# Minimal reasoning
|
|
48
|
+
MINIMAL = 1
|
|
49
|
+
|
|
50
|
+
# Low reasoning effort
|
|
51
|
+
LOW = 2
|
|
52
|
+
|
|
53
|
+
# Medium reasoning effort
|
|
54
|
+
MEDIUM = 3
|
|
55
|
+
|
|
56
|
+
# High reasoning effort
|
|
57
|
+
HIGH = 4
|
|
58
|
+
|
|
59
|
+
# Maximum reasoning budget
|
|
60
|
+
MAXIMUM = 5
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Map reasoning levels to thinking budgets and reasoning_effort values
|
|
64
|
+
# For Anthropic: uses `thinking={"type": "enabled", "budget_tokens": X}`
|
|
65
|
+
# For others: uses `reasoning_effort` ("low", "medium", "high")
|
|
66
|
+
# Note: Anthropic requires max_tokens > thinking.budget_tokens
|
|
67
|
+
REASONING_CONFIGS: dict[ReasoningLevel, dict] = {
|
|
68
|
+
ReasoningLevel.OFF: {
|
|
69
|
+
"budget_tokens": None, # No thinking
|
|
70
|
+
"reasoning_effort": None,
|
|
71
|
+
"temperature": None, # None means use model default
|
|
72
|
+
},
|
|
73
|
+
ReasoningLevel.MINIMAL: {
|
|
74
|
+
"budget_tokens": 1024, # Minimum thinking budget
|
|
75
|
+
"reasoning_effort": "low",
|
|
76
|
+
"temperature": 1.0, # Required for Anthropic extended thinking
|
|
77
|
+
"max_tokens": 4096, # Must be > budget_tokens
|
|
78
|
+
},
|
|
79
|
+
ReasoningLevel.LOW: {
|
|
80
|
+
"budget_tokens": 4096,
|
|
81
|
+
"reasoning_effort": "low",
|
|
82
|
+
"temperature": 1.0,
|
|
83
|
+
"max_tokens": 8192,
|
|
84
|
+
},
|
|
85
|
+
ReasoningLevel.MEDIUM: {
|
|
86
|
+
"budget_tokens": 8192,
|
|
87
|
+
"reasoning_effort": "medium",
|
|
88
|
+
"temperature": 1.0,
|
|
89
|
+
"max_tokens": 16000,
|
|
90
|
+
},
|
|
91
|
+
ReasoningLevel.HIGH: {
|
|
92
|
+
"budget_tokens": 16384,
|
|
93
|
+
"reasoning_effort": "high",
|
|
94
|
+
"temperature": 1.0,
|
|
95
|
+
"max_tokens": 24000,
|
|
96
|
+
},
|
|
97
|
+
ReasoningLevel.MAXIMUM: {
|
|
98
|
+
"budget_tokens": 32768, # Maximum thinking budget
|
|
99
|
+
"reasoning_effort": "high",
|
|
100
|
+
"temperature": 1.0,
|
|
101
|
+
"max_tokens": 48000, # Must be > budget_tokens
|
|
102
|
+
},
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
REASONING_DESCRIPTIONS = {
|
|
107
|
+
ReasoningLevel.OFF: "Off - No extended thinking",
|
|
108
|
+
ReasoningLevel.MINIMAL: "Minimal - Light reasoning",
|
|
109
|
+
ReasoningLevel.LOW: "Low - Some reasoning",
|
|
110
|
+
ReasoningLevel.MEDIUM: "Medium - Moderate reasoning",
|
|
111
|
+
ReasoningLevel.HIGH: "High - Deep reasoning",
|
|
112
|
+
ReasoningLevel.MAXIMUM: "Maximum - Full reasoning budget",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
|
|
37
116
|
@dataclass
|
|
38
117
|
class ModelConfig:
|
|
39
118
|
"""Configuration for a model at a specific juice level."""
|
|
@@ -42,6 +121,7 @@ class ModelConfig:
|
|
|
42
121
|
max_tokens: int = 4096
|
|
43
122
|
reasoning_effort: str | None = None
|
|
44
123
|
thinking: dict | None = None
|
|
124
|
+
supports_reasoning: bool = True # Whether the model supports reasoning_effort
|
|
45
125
|
extra_kwargs: dict = field(default_factory=dict)
|
|
46
126
|
|
|
47
127
|
def to_lm_kwargs(self) -> dict:
|
|
@@ -60,28 +140,33 @@ class ModelConfig:
|
|
|
60
140
|
|
|
61
141
|
|
|
62
142
|
# Model configurations for each juice level
|
|
143
|
+
# supports_reasoning indicates if the model/provider supports reasoning_effort parameter
|
|
63
144
|
JUICE_MODELS: dict[JuiceLevel, ModelConfig] = {
|
|
64
145
|
JuiceLevel.QUICK: ModelConfig(
|
|
65
146
|
model="groq/moonshotai/kimi-k2-instruct-0905",
|
|
66
147
|
temperature=0.7,
|
|
67
148
|
max_tokens=4096,
|
|
149
|
+
supports_reasoning=False, # Groq does NOT support reasoning_effort
|
|
68
150
|
),
|
|
69
151
|
JuiceLevel.BALANCED: ModelConfig(
|
|
70
152
|
model="anthropic/claude-sonnet-4-5",
|
|
71
153
|
temperature=0.7,
|
|
72
154
|
max_tokens=4096,
|
|
155
|
+
supports_reasoning=True, # Anthropic supports reasoning_effort
|
|
73
156
|
),
|
|
74
157
|
JuiceLevel.DEEP: ModelConfig(
|
|
75
158
|
model="gemini/gemini-3-pro-preview",
|
|
76
159
|
temperature=1.0,
|
|
77
160
|
max_tokens=16000,
|
|
78
161
|
reasoning_effort="high",
|
|
162
|
+
supports_reasoning=True, # Gemini supports reasoning_effort
|
|
79
163
|
),
|
|
80
164
|
JuiceLevel.MAXIMUM: ModelConfig(
|
|
81
165
|
model="anthropic/claude-opus-4-5",
|
|
82
166
|
temperature=1.0,
|
|
83
167
|
max_tokens=16000,
|
|
84
168
|
reasoning_effort="high",
|
|
169
|
+
supports_reasoning=True, # Anthropic supports reasoning_effort
|
|
85
170
|
),
|
|
86
171
|
}
|
|
87
172
|
|
|
@@ -93,24 +178,28 @@ ULTIMATE_MODELS: list[ModelConfig] = [
|
|
|
93
178
|
model="openrouter/x-ai/grok-4",
|
|
94
179
|
temperature=0.7,
|
|
95
180
|
max_tokens=8192,
|
|
181
|
+
supports_reasoning=True, # Grok 4 supports reasoning
|
|
96
182
|
),
|
|
97
183
|
ModelConfig(
|
|
98
|
-
model="openai/gpt-5.
|
|
184
|
+
model="openai/gpt-5.2",
|
|
99
185
|
temperature=1.0,
|
|
100
186
|
max_tokens=16000,
|
|
101
187
|
reasoning_effort="high",
|
|
188
|
+
supports_reasoning=True, # OpenAI supports reasoning
|
|
102
189
|
),
|
|
103
190
|
ModelConfig(
|
|
104
191
|
model="gemini/gemini-3-pro-preview",
|
|
105
192
|
temperature=1.0,
|
|
106
193
|
max_tokens=16000,
|
|
107
194
|
reasoning_effort="high",
|
|
195
|
+
supports_reasoning=True, # Gemini supports reasoning
|
|
108
196
|
),
|
|
109
197
|
ModelConfig(
|
|
110
198
|
model="anthropic/claude-opus-4-5",
|
|
111
199
|
temperature=1.0, # Must be 1 for extended thinking
|
|
112
200
|
max_tokens=16000,
|
|
113
201
|
reasoning_effort="high",
|
|
202
|
+
supports_reasoning=True, # Anthropic supports reasoning
|
|
114
203
|
),
|
|
115
204
|
]
|
|
116
205
|
|
|
@@ -120,14 +209,77 @@ SYNTHESIZER_MODEL = ModelConfig(
|
|
|
120
209
|
temperature=0.7,
|
|
121
210
|
max_tokens=32000,
|
|
122
211
|
reasoning_effort="high",
|
|
212
|
+
supports_reasoning=True,
|
|
123
213
|
)
|
|
124
214
|
|
|
125
215
|
|
|
126
|
-
def
|
|
127
|
-
"""Get
|
|
216
|
+
def get_api_key_for_model(model: str, api_keys: dict | None) -> str | None:
|
|
217
|
+
"""Get the appropriate API key for a model based on its provider.
|
|
218
|
+
|
|
219
|
+
Uses LiteLLM model naming convention: provider/model-name
|
|
220
|
+
Supports any provider that the user has configured in settings.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
model: Model identifier (e.g., "anthropic/claude-sonnet-4-5")
|
|
224
|
+
api_keys: Dict of provider -> API key
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
API key string or None if not found/provided.
|
|
228
|
+
"""
|
|
229
|
+
if not api_keys:
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
model_lower = model.lower()
|
|
233
|
+
|
|
234
|
+
# Extract provider from model name (LiteLLM format: provider/model-name)
|
|
235
|
+
if "/" in model:
|
|
236
|
+
provider = model.split("/")[0].lower()
|
|
237
|
+
# Check for direct provider match
|
|
238
|
+
if provider in api_keys and api_keys[provider]:
|
|
239
|
+
return api_keys[provider]
|
|
240
|
+
|
|
241
|
+
# Fallback: Check for known provider patterns in model name
|
|
242
|
+
# This handles cases like "claude-3-sonnet" without prefix
|
|
243
|
+
provider_patterns = {
|
|
244
|
+
"anthropic": ["anthropic/", "claude"],
|
|
245
|
+
"openai": ["openai/", "gpt-", "o1-", "o3-"],
|
|
246
|
+
"groq": ["groq/"],
|
|
247
|
+
"gemini": ["gemini/", "gemini-"],
|
|
248
|
+
"openrouter": ["openrouter/"],
|
|
249
|
+
"together_ai": ["together_ai/", "together/"],
|
|
250
|
+
"fireworks_ai": ["fireworks_ai/", "fireworks/"],
|
|
251
|
+
"mistral": ["mistral/"],
|
|
252
|
+
"cohere": ["cohere/"],
|
|
253
|
+
"deepseek": ["deepseek/"],
|
|
254
|
+
"ollama": ["ollama/"],
|
|
255
|
+
"azure": ["azure/"],
|
|
256
|
+
"bedrock": ["bedrock/"],
|
|
257
|
+
"vertex_ai": ["vertex_ai/"],
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
for provider, patterns in provider_patterns.items():
|
|
261
|
+
for pattern in patterns:
|
|
262
|
+
if pattern in model_lower:
|
|
263
|
+
if provider in api_keys and api_keys[provider]:
|
|
264
|
+
return api_keys[provider]
|
|
265
|
+
break
|
|
266
|
+
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def get_lm(
|
|
271
|
+
juice: JuiceLevel | int = JuiceLevel.QUICK,
|
|
272
|
+
reasoning: ReasoningLevel | int | None = None,
|
|
273
|
+
api_keys: dict | None = None,
|
|
274
|
+
model_override: str | None = None,
|
|
275
|
+
) -> dspy.LM:
|
|
276
|
+
"""Get a dspy.LM configured for the specified juice and reasoning levels.
|
|
128
277
|
|
|
129
278
|
Args:
|
|
130
279
|
juice: Juice level (0-3). Level 4 (ULTIMATE) requires special handling.
|
|
280
|
+
reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
|
|
281
|
+
api_keys: Optional dict of provider -> API key. If provided, overrides env vars.
|
|
282
|
+
model_override: Optional model to use instead of the default for this juice level.
|
|
131
283
|
|
|
132
284
|
Returns:
|
|
133
285
|
Configured dspy.LM instance.
|
|
@@ -139,20 +291,81 @@ def get_lm(juice: JuiceLevel | int = JuiceLevel.QUICK) -> dspy.LM:
|
|
|
139
291
|
raise ValueError("ULTIMATE juice level requires multi-model merger. Use JuicedProgram instead.")
|
|
140
292
|
|
|
141
293
|
config = JUICE_MODELS[juice]
|
|
142
|
-
|
|
294
|
+
kwargs = config.to_lm_kwargs()
|
|
295
|
+
|
|
296
|
+
# Apply model override if provided
|
|
297
|
+
if model_override:
|
|
298
|
+
kwargs["model"] = model_override
|
|
299
|
+
|
|
300
|
+
# Get API key for this model's provider
|
|
301
|
+
api_key = get_api_key_for_model(kwargs["model"], api_keys)
|
|
302
|
+
if api_key:
|
|
303
|
+
kwargs["api_key"] = api_key
|
|
304
|
+
|
|
305
|
+
# Apply reasoning level overrides if specified AND model supports reasoning
|
|
306
|
+
if reasoning is not None and config.supports_reasoning:
|
|
307
|
+
if isinstance(reasoning, int):
|
|
308
|
+
reasoning = ReasoningLevel(reasoning)
|
|
309
|
+
|
|
310
|
+
# Skip if reasoning is OFF
|
|
311
|
+
if reasoning == ReasoningLevel.OFF:
|
|
312
|
+
# Remove any existing reasoning params
|
|
313
|
+
kwargs.pop("reasoning_effort", None)
|
|
314
|
+
kwargs.pop("thinking", None)
|
|
315
|
+
return dspy.LM(**kwargs)
|
|
316
|
+
|
|
317
|
+
reasoning_config = REASONING_CONFIGS[reasoning]
|
|
318
|
+
model = config.model.lower()
|
|
319
|
+
|
|
320
|
+
# Determine provider and use appropriate parameter format
|
|
321
|
+
is_anthropic = "anthropic/" in model or "claude" in model
|
|
322
|
+
is_gemini = "gemini" in model
|
|
323
|
+
is_openai = "openai/" in model or "gpt" in model
|
|
324
|
+
|
|
325
|
+
# Apply temperature (required for Anthropic extended thinking)
|
|
326
|
+
if reasoning_config.get("temperature") is not None:
|
|
327
|
+
kwargs["temperature"] = reasoning_config["temperature"]
|
|
328
|
+
|
|
329
|
+
# Apply max_tokens
|
|
330
|
+
if reasoning_config.get("max_tokens") is not None:
|
|
331
|
+
kwargs["max_tokens"] = reasoning_config["max_tokens"]
|
|
332
|
+
|
|
333
|
+
if is_anthropic:
|
|
334
|
+
# Anthropic uses explicit thinking parameter with budget_tokens
|
|
335
|
+
budget = reasoning_config.get("budget_tokens", 1024)
|
|
336
|
+
kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
|
|
337
|
+
# Remove reasoning_effort if present (not used for thinking)
|
|
338
|
+
kwargs.pop("reasoning_effort", None)
|
|
339
|
+
else:
|
|
340
|
+
# Other providers use reasoning_effort
|
|
341
|
+
if reasoning_config["reasoning_effort"] is not None:
|
|
342
|
+
kwargs["reasoning_effort"] = reasoning_config["reasoning_effort"]
|
|
143
343
|
|
|
344
|
+
return dspy.LM(**kwargs)
|
|
144
345
|
|
|
145
|
-
class SynthesizeResponses(dspy.Signature):
|
|
146
|
-
"""Synthesize multiple AI model responses into an optimal final answer.
|
|
147
346
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
347
|
+
class SynthesizeResponses(dspy.Signature):
|
|
348
|
+
"""Synthesize multiple AI model responses into one optimal final answer.
|
|
349
|
+
|
|
350
|
+
You are given responses from multiple AI models for the same task.
|
|
351
|
+
Your job is to create the BEST possible response by:
|
|
352
|
+
1. Identifying the strongest elements from each model's response
|
|
353
|
+
2. Resolving any contradictions (prefer the most accurate/well-reasoned answer)
|
|
354
|
+
3. Combining complementary insights that don't conflict
|
|
355
|
+
4. Maintaining the original format and style expected for the task
|
|
356
|
+
5. Being concise - don't add unnecessary elaboration
|
|
357
|
+
|
|
358
|
+
For grammar/spelling fixes: Pick the most correct version, don't over-correct.
|
|
359
|
+
For text completion: Choose the most natural, coherent continuation.
|
|
360
|
+
For code: Select the cleanest, most idiomatic solution.
|
|
361
|
+
For lists: You may combine unique items if appropriate.
|
|
362
|
+
|
|
363
|
+
Output ONLY the synthesized response - no explanations or meta-commentary.
|
|
151
364
|
"""
|
|
152
365
|
|
|
153
|
-
original_input: str = dspy.InputField(desc="The original input/
|
|
154
|
-
model_responses: str = dspy.InputField(desc="Responses from multiple AI models, labeled
|
|
155
|
-
synthesized_response: str = dspy.OutputField(desc="The
|
|
366
|
+
original_input: str = dspy.InputField(desc="The original input/task that was given to all models")
|
|
367
|
+
model_responses: str = dspy.InputField(desc="Responses from multiple AI models, each labeled with model name")
|
|
368
|
+
synthesized_response: str = dspy.OutputField(desc="The single best response, synthesized from all model outputs. Output ONLY the response content.")
|
|
156
369
|
|
|
157
370
|
|
|
158
371
|
class JuicedProgram:
|
|
@@ -166,22 +379,31 @@ class JuicedProgram:
|
|
|
166
379
|
self,
|
|
167
380
|
program: dspy.Module,
|
|
168
381
|
juice: JuiceLevel | int = JuiceLevel.QUICK,
|
|
169
|
-
|
|
382
|
+
reasoning: ReasoningLevel | int | None = None,
|
|
383
|
+
progress_callback: Callable[[str, dict], None] | None = None,
|
|
384
|
+
api_keys: dict | None = None,
|
|
385
|
+
model_override: str | None = None,
|
|
170
386
|
):
|
|
171
387
|
"""Initialize a juiced program.
|
|
172
388
|
|
|
173
389
|
Args:
|
|
174
390
|
program: The DSPy program/module to wrap.
|
|
175
391
|
juice: Juice level (0-4).
|
|
392
|
+
reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
|
|
176
393
|
progress_callback: Optional callback for progress events.
|
|
177
394
|
Called with (event_type, data) where event_type is:
|
|
178
395
|
- "status": General status update
|
|
179
396
|
- "model_start": A model is starting (ultimate mode)
|
|
180
397
|
- "model_complete": A model finished (ultimate mode)
|
|
398
|
+
api_keys: Optional dict of provider -> API key. Overrides env vars.
|
|
399
|
+
model_override: Optional model to use instead of the default for this juice level.
|
|
181
400
|
"""
|
|
182
401
|
self.program = program
|
|
183
402
|
self.juice = JuiceLevel(juice) if isinstance(juice, int) else juice
|
|
403
|
+
self.reasoning = ReasoningLevel(reasoning) if isinstance(reasoning, int) else reasoning
|
|
184
404
|
self.progress_callback = progress_callback
|
|
405
|
+
self.api_keys = api_keys
|
|
406
|
+
self.model_override = model_override
|
|
185
407
|
|
|
186
408
|
def _emit(self, event_type: str, data: dict):
|
|
187
409
|
"""Emit a progress event if callback is set."""
|
|
@@ -198,15 +420,23 @@ class JuicedProgram:
|
|
|
198
420
|
def _run_single(self, **kwargs) -> Any:
|
|
199
421
|
"""Run with a single model at the specified juice level."""
|
|
200
422
|
config = JUICE_MODELS[self.juice]
|
|
201
|
-
|
|
423
|
+
|
|
424
|
+
# Use model override if provided, otherwise use default for juice level
|
|
425
|
+
actual_model = self.model_override if self.model_override else config.model
|
|
426
|
+
model_name = actual_model.split("/")[-1]
|
|
427
|
+
|
|
428
|
+
reasoning_desc = ""
|
|
429
|
+
if self.reasoning is not None:
|
|
430
|
+
reasoning_desc = f" (reasoning={self.reasoning.name})"
|
|
202
431
|
|
|
203
432
|
self._emit("status", {
|
|
204
433
|
"step": "calling_model",
|
|
205
434
|
"model": model_name,
|
|
206
|
-
"model_full":
|
|
435
|
+
"model_full": actual_model,
|
|
436
|
+
"reasoning_level": self.reasoning.value if self.reasoning else None,
|
|
207
437
|
})
|
|
208
438
|
|
|
209
|
-
lm = get_lm(self.juice)
|
|
439
|
+
lm = get_lm(self.juice, self.reasoning, api_keys=self.api_keys, model_override=self.model_override)
|
|
210
440
|
with dspy.context(lm=lm):
|
|
211
441
|
result = self.program(**kwargs)
|
|
212
442
|
|
|
@@ -230,12 +460,44 @@ class JuicedProgram:
|
|
|
230
460
|
self._emit("status", {
|
|
231
461
|
"step": "starting_multi_model",
|
|
232
462
|
"models": model_names,
|
|
233
|
-
"total": len(model_names)
|
|
463
|
+
"total": len(model_names),
|
|
464
|
+
"reasoning_level": self.reasoning.value if self.reasoning else None,
|
|
234
465
|
})
|
|
235
466
|
|
|
236
467
|
def run_model(config):
|
|
237
468
|
"""Run a single model - called in parallel."""
|
|
238
|
-
|
|
469
|
+
lm_kwargs = config.to_lm_kwargs()
|
|
470
|
+
|
|
471
|
+
# Apply reasoning level overrides if specified AND model supports reasoning
|
|
472
|
+
if self.reasoning is not None and self.reasoning != ReasoningLevel.OFF and config.supports_reasoning:
|
|
473
|
+
reasoning_config = REASONING_CONFIGS[self.reasoning]
|
|
474
|
+
model = config.model.lower()
|
|
475
|
+
|
|
476
|
+
# Determine provider
|
|
477
|
+
is_anthropic = "anthropic/" in model or "claude" in model
|
|
478
|
+
|
|
479
|
+
# Apply temperature and max_tokens
|
|
480
|
+
if reasoning_config.get("temperature") is not None:
|
|
481
|
+
lm_kwargs["temperature"] = reasoning_config["temperature"]
|
|
482
|
+
if reasoning_config.get("max_tokens") is not None:
|
|
483
|
+
lm_kwargs["max_tokens"] = reasoning_config["max_tokens"]
|
|
484
|
+
|
|
485
|
+
if is_anthropic:
|
|
486
|
+
# Anthropic uses thinking parameter with budget_tokens
|
|
487
|
+
budget = reasoning_config.get("budget_tokens", 1024)
|
|
488
|
+
lm_kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
|
|
489
|
+
lm_kwargs.pop("reasoning_effort", None)
|
|
490
|
+
else:
|
|
491
|
+
# Other providers use reasoning_effort
|
|
492
|
+
if reasoning_config["reasoning_effort"] is not None:
|
|
493
|
+
lm_kwargs["reasoning_effort"] = reasoning_config["reasoning_effort"]
|
|
494
|
+
|
|
495
|
+
# Apply API key for this model's provider
|
|
496
|
+
api_key = get_api_key_for_model(config.model, self.api_keys)
|
|
497
|
+
if api_key:
|
|
498
|
+
lm_kwargs["api_key"] = api_key
|
|
499
|
+
|
|
500
|
+
lm = dspy.LM(**lm_kwargs)
|
|
239
501
|
model_name = config.model.split("/")[-1]
|
|
240
502
|
|
|
241
503
|
# Emit model start
|
|
@@ -250,13 +512,19 @@ class JuicedProgram:
|
|
|
250
512
|
with dspy.context(lm=lm):
|
|
251
513
|
result = self.program(**kwargs)
|
|
252
514
|
|
|
253
|
-
#
|
|
515
|
+
# Extract response text from DSPy Prediction for streaming
|
|
516
|
+
response_data = {}
|
|
517
|
+
if hasattr(result, "_store") and result._store:
|
|
518
|
+
response_data = dict(result._store)
|
|
519
|
+
|
|
520
|
+
# Emit model complete WITH the actual response
|
|
254
521
|
with status_lock:
|
|
255
522
|
models_status[model_name] = "complete"
|
|
256
523
|
self._emit("model_complete", {
|
|
257
524
|
"model": model_name,
|
|
258
525
|
"success": True,
|
|
259
|
-
"models_status": dict(models_status)
|
|
526
|
+
"models_status": dict(models_status),
|
|
527
|
+
"response": response_data, # Include actual response!
|
|
260
528
|
})
|
|
261
529
|
|
|
262
530
|
return {"model": model_name, "result": result, "error": None}
|
|
@@ -268,7 +536,8 @@ class JuicedProgram:
|
|
|
268
536
|
"model": model_name,
|
|
269
537
|
"success": False,
|
|
270
538
|
"error": str(e),
|
|
271
|
-
"models_status": dict(models_status)
|
|
539
|
+
"models_status": dict(models_status),
|
|
540
|
+
"response": None,
|
|
272
541
|
})
|
|
273
542
|
return {"model": model_name, "result": None, "error": str(e)}
|
|
274
543
|
|
|
@@ -285,15 +554,13 @@ class JuicedProgram:
|
|
|
285
554
|
"models_completed": len([r for r in model_results if r["result"] is not None])
|
|
286
555
|
})
|
|
287
556
|
|
|
288
|
-
# Merge results
|
|
289
|
-
return self._merge_results(model_results)
|
|
557
|
+
# Merge results using AI synthesis
|
|
558
|
+
return self._merge_results(model_results, kwargs)
|
|
290
559
|
|
|
291
|
-
def _merge_results(self, model_results: list) -> Any:
|
|
292
|
-
"""Merge results from multiple models
|
|
560
|
+
def _merge_results(self, model_results: list, original_input: dict) -> Any:
|
|
561
|
+
"""Merge results from multiple models using AI synthesis.
|
|
293
562
|
|
|
294
|
-
|
|
295
|
-
For string fields, uses the first successful result.
|
|
296
|
-
Also includes individual model responses for transparency.
|
|
563
|
+
Uses SYNTHESIZER_MODEL to intelligently combine responses from all models.
|
|
297
564
|
"""
|
|
298
565
|
# Get successful results
|
|
299
566
|
successful = [r for r in model_results if r["result"] is not None]
|
|
@@ -302,23 +569,27 @@ class JuicedProgram:
|
|
|
302
569
|
errors = [r["error"] for r in model_results if r["error"]]
|
|
303
570
|
raise RuntimeError(f"All models failed: {errors}")
|
|
304
571
|
|
|
305
|
-
#
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
572
|
+
# If only one model succeeded, just return its result
|
|
573
|
+
if len(successful) == 1:
|
|
574
|
+
result = successful[0]["result"]
|
|
575
|
+
if hasattr(result, "_store"):
|
|
576
|
+
result._individual_responses = [{
|
|
577
|
+
"model": successful[0]["model"],
|
|
578
|
+
"response": str(result._store),
|
|
579
|
+
"error": None
|
|
580
|
+
}]
|
|
581
|
+
return result
|
|
313
582
|
|
|
314
|
-
# Collect individual responses
|
|
583
|
+
# Collect individual responses
|
|
315
584
|
individual_responses = []
|
|
585
|
+
model_outputs = {} # model_name -> {field: value}
|
|
586
|
+
|
|
316
587
|
for r in model_results:
|
|
317
588
|
model_name = r["model"]
|
|
318
589
|
if r["result"] is not None and hasattr(r["result"], "_store"):
|
|
319
|
-
# Extract the main output field (usually 'response', 'completion', etc.)
|
|
320
590
|
store = r["result"]._store
|
|
321
|
-
|
|
591
|
+
model_outputs[model_name] = dict(store)
|
|
592
|
+
# Get main output text for display
|
|
322
593
|
output_text = None
|
|
323
594
|
for key, value in store.items():
|
|
324
595
|
if isinstance(value, str) and len(value) > 10:
|
|
@@ -336,26 +607,90 @@ class JuicedProgram:
|
|
|
336
607
|
"error": r["error"]
|
|
337
608
|
})
|
|
338
609
|
|
|
339
|
-
#
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
610
|
+
# Use first result as template for output fields
|
|
611
|
+
base_result = successful[0]["result"]
|
|
612
|
+
base_store = base_result._store if hasattr(base_result, "_store") else {}
|
|
613
|
+
|
|
614
|
+
# Format original input for synthesizer
|
|
615
|
+
input_text = self._format_input(original_input)
|
|
616
|
+
|
|
617
|
+
# Create synthesized result
|
|
618
|
+
merged = {}
|
|
619
|
+
|
|
620
|
+
# Configure synthesizer LM with API key if provided
|
|
621
|
+
synth_kwargs = SYNTHESIZER_MODEL.to_lm_kwargs()
|
|
622
|
+
api_key = get_api_key_for_model(SYNTHESIZER_MODEL.model, self.api_keys)
|
|
623
|
+
if api_key:
|
|
624
|
+
synth_kwargs["api_key"] = api_key
|
|
625
|
+
synth_lm = dspy.LM(**synth_kwargs)
|
|
626
|
+
|
|
627
|
+
# Synthesize each output field
|
|
628
|
+
for field_name, base_value in base_store.items():
|
|
629
|
+
# Collect this field's values from all models
|
|
630
|
+
field_values = {}
|
|
631
|
+
for model_name, outputs in model_outputs.items():
|
|
632
|
+
if field_name in outputs:
|
|
633
|
+
field_values[model_name] = outputs[field_name]
|
|
634
|
+
|
|
635
|
+
if not field_values:
|
|
636
|
+
merged[field_name] = base_value
|
|
637
|
+
continue
|
|
638
|
+
|
|
639
|
+
# Check if it's a list field (like synonyms)
|
|
640
|
+
if isinstance(base_value, list):
|
|
641
|
+
# For lists, combine unique values from all models
|
|
642
|
+
combined = []
|
|
643
|
+
seen = set()
|
|
644
|
+
for model_name, values in field_values.items():
|
|
645
|
+
if isinstance(values, list):
|
|
646
|
+
for item in values:
|
|
647
|
+
# Get hashable key for deduplication
|
|
648
|
+
# Pydantic models aren't hashable, so convert to JSON
|
|
649
|
+
try:
|
|
650
|
+
if hasattr(item, 'model_dump_json'):
|
|
651
|
+
# Pydantic v2 model
|
|
652
|
+
item_key = item.model_dump_json()
|
|
653
|
+
elif hasattr(item, 'json'):
|
|
654
|
+
# Pydantic v1 model
|
|
655
|
+
item_key = item.json()
|
|
656
|
+
else:
|
|
657
|
+
# Regular hashable item
|
|
658
|
+
item_key = item
|
|
659
|
+
except TypeError:
|
|
660
|
+
# Fallback: convert to string representation
|
|
661
|
+
item_key = str(item)
|
|
662
|
+
|
|
663
|
+
if item_key not in seen:
|
|
664
|
+
combined.append(item)
|
|
665
|
+
seen.add(item_key)
|
|
666
|
+
merged[field_name] = combined
|
|
667
|
+
else:
|
|
668
|
+
# For string/text fields, use AI synthesis
|
|
669
|
+
responses_text = "\n\n".join([
|
|
670
|
+
f"=== {model_name} ===\n{value}"
|
|
671
|
+
for model_name, value in field_values.items()
|
|
672
|
+
])
|
|
673
|
+
|
|
674
|
+
self._emit("status", {
|
|
675
|
+
"step": "synthesizing_field",
|
|
676
|
+
"field": field_name,
|
|
677
|
+
"model": SYNTHESIZER_MODEL.model.split("/")[-1]
|
|
678
|
+
})
|
|
679
|
+
|
|
680
|
+
try:
|
|
681
|
+
with dspy.context(lm=synth_lm):
|
|
682
|
+
predictor = dspy.Predict(SynthesizeResponses)
|
|
683
|
+
synth_result = predictor(
|
|
684
|
+
original_input=input_text,
|
|
685
|
+
model_responses=responses_text
|
|
686
|
+
)
|
|
687
|
+
merged[field_name] = synth_result.synthesized_response
|
|
688
|
+
except Exception as e:
|
|
689
|
+
# Fallback to first model's response on synthesis error
|
|
690
|
+
print(f"[Synthesis] Error synthesizing {field_name}: {e}")
|
|
691
|
+
merged[field_name] = base_value
|
|
692
|
+
|
|
693
|
+
# Return a result object with merged data
|
|
359
694
|
class MergedResult:
|
|
360
695
|
pass
|
|
361
696
|
|
|
@@ -364,6 +699,7 @@ class JuicedProgram:
|
|
|
364
699
|
setattr(result, key, value)
|
|
365
700
|
result._store = merged # For extract_result in server.py
|
|
366
701
|
result._individual_responses = individual_responses # For UI display
|
|
702
|
+
result._synthesized = True # Mark as AI-synthesized
|
|
367
703
|
|
|
368
704
|
return result
|
|
369
705
|
|
|
@@ -391,18 +727,24 @@ def juiced(juice: JuiceLevel | int = JuiceLevel.QUICK):
|
|
|
391
727
|
return decorator
|
|
392
728
|
|
|
393
729
|
|
|
394
|
-
def run_with_juice(
|
|
730
|
+
def run_with_juice(
|
|
731
|
+
program: dspy.Module,
|
|
732
|
+
juice: JuiceLevel | int,
|
|
733
|
+
reasoning: ReasoningLevel | int | None = None,
|
|
734
|
+
**kwargs
|
|
735
|
+
) -> Any:
|
|
395
736
|
"""Convenience function to run a program with a specific juice level.
|
|
396
737
|
|
|
397
738
|
Args:
|
|
398
739
|
program: The DSPy program to run.
|
|
399
740
|
juice: Juice level (0-4).
|
|
741
|
+
reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
|
|
400
742
|
**kwargs: Arguments to pass to the program.
|
|
401
743
|
|
|
402
744
|
Returns:
|
|
403
745
|
The program result.
|
|
404
746
|
"""
|
|
405
|
-
juiced_program = JuicedProgram(program, juice)
|
|
747
|
+
juiced_program = JuicedProgram(program, juice, reasoning=reasoning)
|
|
406
748
|
return juiced_program(**kwargs)
|
|
407
749
|
|
|
408
750
|
|