mrmd-ai 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mrmd_ai/juice.py CHANGED
@@ -34,6 +34,85 @@ class JuiceLevel(IntEnum):
34
34
  ULTIMATE = 4
35
35
 
36
36
 
37
+ class ReasoningLevel(IntEnum):
38
+ """Independent reasoning/thinking budget control.
39
+
40
+ This is separate from JuiceLevel and controls how much "thinking"
41
+ the model does, independent of which model is selected.
42
+ """
43
+
44
+ # No extended thinking - fastest responses
45
+ OFF = 0
46
+
47
+ # Minimal reasoning
48
+ MINIMAL = 1
49
+
50
+ # Low reasoning effort
51
+ LOW = 2
52
+
53
+ # Medium reasoning effort
54
+ MEDIUM = 3
55
+
56
+ # High reasoning effort
57
+ HIGH = 4
58
+
59
+ # Maximum reasoning budget
60
+ MAXIMUM = 5
61
+
62
+
63
+ # Map reasoning levels to thinking budgets and reasoning_effort values
64
+ # For Anthropic: uses `thinking={"type": "enabled", "budget_tokens": X}`
65
+ # For others: uses `reasoning_effort` ("low", "medium", "high")
66
+ # Note: Anthropic requires max_tokens > thinking.budget_tokens
67
+ REASONING_CONFIGS: dict[ReasoningLevel, dict] = {
68
+ ReasoningLevel.OFF: {
69
+ "budget_tokens": None, # No thinking
70
+ "reasoning_effort": None,
71
+ "temperature": None, # None means use model default
72
+ },
73
+ ReasoningLevel.MINIMAL: {
74
+ "budget_tokens": 1024, # Minimum thinking budget
75
+ "reasoning_effort": "low",
76
+ "temperature": 1.0, # Required for Anthropic extended thinking
77
+ "max_tokens": 4096, # Must be > budget_tokens
78
+ },
79
+ ReasoningLevel.LOW: {
80
+ "budget_tokens": 4096,
81
+ "reasoning_effort": "low",
82
+ "temperature": 1.0,
83
+ "max_tokens": 8192,
84
+ },
85
+ ReasoningLevel.MEDIUM: {
86
+ "budget_tokens": 8192,
87
+ "reasoning_effort": "medium",
88
+ "temperature": 1.0,
89
+ "max_tokens": 16000,
90
+ },
91
+ ReasoningLevel.HIGH: {
92
+ "budget_tokens": 16384,
93
+ "reasoning_effort": "high",
94
+ "temperature": 1.0,
95
+ "max_tokens": 24000,
96
+ },
97
+ ReasoningLevel.MAXIMUM: {
98
+ "budget_tokens": 32768, # Maximum thinking budget
99
+ "reasoning_effort": "high",
100
+ "temperature": 1.0,
101
+ "max_tokens": 48000, # Must be > budget_tokens
102
+ },
103
+ }
104
+
105
+
106
+ REASONING_DESCRIPTIONS = {
107
+ ReasoningLevel.OFF: "Off - No extended thinking",
108
+ ReasoningLevel.MINIMAL: "Minimal - Light reasoning",
109
+ ReasoningLevel.LOW: "Low - Some reasoning",
110
+ ReasoningLevel.MEDIUM: "Medium - Moderate reasoning",
111
+ ReasoningLevel.HIGH: "High - Deep reasoning",
112
+ ReasoningLevel.MAXIMUM: "Maximum - Full reasoning budget",
113
+ }
114
+
115
+
37
116
  @dataclass
38
117
  class ModelConfig:
39
118
  """Configuration for a model at a specific juice level."""
@@ -42,6 +121,7 @@ class ModelConfig:
42
121
  max_tokens: int = 4096
43
122
  reasoning_effort: str | None = None
44
123
  thinking: dict | None = None
124
+ supports_reasoning: bool = True # Whether the model supports reasoning_effort
45
125
  extra_kwargs: dict = field(default_factory=dict)
46
126
 
47
127
  def to_lm_kwargs(self) -> dict:
@@ -60,28 +140,33 @@ class ModelConfig:
60
140
 
61
141
 
62
142
  # Model configurations for each juice level
143
+ # supports_reasoning indicates if the model/provider supports reasoning_effort parameter
63
144
  JUICE_MODELS: dict[JuiceLevel, ModelConfig] = {
64
145
  JuiceLevel.QUICK: ModelConfig(
65
146
  model="groq/moonshotai/kimi-k2-instruct-0905",
66
147
  temperature=0.7,
67
148
  max_tokens=4096,
149
+ supports_reasoning=False, # Groq does NOT support reasoning_effort
68
150
  ),
69
151
  JuiceLevel.BALANCED: ModelConfig(
70
152
  model="anthropic/claude-sonnet-4-5",
71
153
  temperature=0.7,
72
154
  max_tokens=4096,
155
+ supports_reasoning=True, # Anthropic supports reasoning_effort
73
156
  ),
74
157
  JuiceLevel.DEEP: ModelConfig(
75
158
  model="gemini/gemini-3-pro-preview",
76
159
  temperature=1.0,
77
160
  max_tokens=16000,
78
161
  reasoning_effort="high",
162
+ supports_reasoning=True, # Gemini supports reasoning_effort
79
163
  ),
80
164
  JuiceLevel.MAXIMUM: ModelConfig(
81
165
  model="anthropic/claude-opus-4-5",
82
166
  temperature=1.0,
83
167
  max_tokens=16000,
84
168
  reasoning_effort="high",
169
+ supports_reasoning=True, # Anthropic supports reasoning_effort
85
170
  ),
86
171
  }
87
172
 
@@ -93,24 +178,28 @@ ULTIMATE_MODELS: list[ModelConfig] = [
93
178
  model="openrouter/x-ai/grok-4",
94
179
  temperature=0.7,
95
180
  max_tokens=8192,
181
+ supports_reasoning=True, # Grok 4 supports reasoning
96
182
  ),
97
183
  ModelConfig(
98
- model="openai/gpt-5.1",
184
+ model="openai/gpt-5.2",
99
185
  temperature=1.0,
100
186
  max_tokens=16000,
101
187
  reasoning_effort="high",
188
+ supports_reasoning=True, # OpenAI supports reasoning
102
189
  ),
103
190
  ModelConfig(
104
191
  model="gemini/gemini-3-pro-preview",
105
192
  temperature=1.0,
106
193
  max_tokens=16000,
107
194
  reasoning_effort="high",
195
+ supports_reasoning=True, # Gemini supports reasoning
108
196
  ),
109
197
  ModelConfig(
110
198
  model="anthropic/claude-opus-4-5",
111
199
  temperature=1.0, # Must be 1 for extended thinking
112
200
  max_tokens=16000,
113
201
  reasoning_effort="high",
202
+ supports_reasoning=True, # Anthropic supports reasoning
114
203
  ),
115
204
  ]
116
205
 
@@ -120,14 +209,77 @@ SYNTHESIZER_MODEL = ModelConfig(
120
209
  temperature=0.7,
121
210
  max_tokens=32000,
122
211
  reasoning_effort="high",
212
+ supports_reasoning=True,
123
213
  )
124
214
 
125
215
 
126
- def get_lm(juice: JuiceLevel | int = JuiceLevel.QUICK) -> dspy.LM:
127
- """Get a dspy.LM configured for the specified juice level.
216
+ def get_api_key_for_model(model: str, api_keys: dict | None) -> str | None:
217
+ """Get the appropriate API key for a model based on its provider.
218
+
219
+ Uses LiteLLM model naming convention: provider/model-name
220
+ Supports any provider that the user has configured in settings.
221
+
222
+ Args:
223
+ model: Model identifier (e.g., "anthropic/claude-sonnet-4-5")
224
+ api_keys: Dict of provider -> API key
225
+
226
+ Returns:
227
+ API key string or None if not found/provided.
228
+ """
229
+ if not api_keys:
230
+ return None
231
+
232
+ model_lower = model.lower()
233
+
234
+ # Extract provider from model name (LiteLLM format: provider/model-name)
235
+ if "/" in model:
236
+ provider = model.split("/")[0].lower()
237
+ # Check for direct provider match
238
+ if provider in api_keys and api_keys[provider]:
239
+ return api_keys[provider]
240
+
241
+ # Fallback: Check for known provider patterns in model name
242
+ # This handles cases like "claude-3-sonnet" without prefix
243
+ provider_patterns = {
244
+ "anthropic": ["anthropic/", "claude"],
245
+ "openai": ["openai/", "gpt-", "o1-", "o3-"],
246
+ "groq": ["groq/"],
247
+ "gemini": ["gemini/", "gemini-"],
248
+ "openrouter": ["openrouter/"],
249
+ "together_ai": ["together_ai/", "together/"],
250
+ "fireworks_ai": ["fireworks_ai/", "fireworks/"],
251
+ "mistral": ["mistral/"],
252
+ "cohere": ["cohere/"],
253
+ "deepseek": ["deepseek/"],
254
+ "ollama": ["ollama/"],
255
+ "azure": ["azure/"],
256
+ "bedrock": ["bedrock/"],
257
+ "vertex_ai": ["vertex_ai/"],
258
+ }
259
+
260
+ for provider, patterns in provider_patterns.items():
261
+ for pattern in patterns:
262
+ if pattern in model_lower:
263
+ if provider in api_keys and api_keys[provider]:
264
+ return api_keys[provider]
265
+ break
266
+
267
+ return None
268
+
269
+
270
+ def get_lm(
271
+ juice: JuiceLevel | int = JuiceLevel.QUICK,
272
+ reasoning: ReasoningLevel | int | None = None,
273
+ api_keys: dict | None = None,
274
+ model_override: str | None = None,
275
+ ) -> dspy.LM:
276
+ """Get a dspy.LM configured for the specified juice and reasoning levels.
128
277
 
129
278
  Args:
130
279
  juice: Juice level (0-3). Level 4 (ULTIMATE) requires special handling.
280
+ reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
281
+ api_keys: Optional dict of provider -> API key. If provided, overrides env vars.
282
+ model_override: Optional model to use instead of the default for this juice level.
131
283
 
132
284
  Returns:
133
285
  Configured dspy.LM instance.
@@ -139,20 +291,81 @@ def get_lm(juice: JuiceLevel | int = JuiceLevel.QUICK) -> dspy.LM:
139
291
  raise ValueError("ULTIMATE juice level requires multi-model merger. Use JuicedProgram instead.")
140
292
 
141
293
  config = JUICE_MODELS[juice]
142
- return dspy.LM(**config.to_lm_kwargs())
294
+ kwargs = config.to_lm_kwargs()
295
+
296
+ # Apply model override if provided
297
+ if model_override:
298
+ kwargs["model"] = model_override
299
+
300
+ # Get API key for this model's provider
301
+ api_key = get_api_key_for_model(kwargs["model"], api_keys)
302
+ if api_key:
303
+ kwargs["api_key"] = api_key
304
+
305
+ # Apply reasoning level overrides if specified AND model supports reasoning
306
+ if reasoning is not None and config.supports_reasoning:
307
+ if isinstance(reasoning, int):
308
+ reasoning = ReasoningLevel(reasoning)
309
+
310
+ # Skip if reasoning is OFF
311
+ if reasoning == ReasoningLevel.OFF:
312
+ # Remove any existing reasoning params
313
+ kwargs.pop("reasoning_effort", None)
314
+ kwargs.pop("thinking", None)
315
+ return dspy.LM(**kwargs)
316
+
317
+ reasoning_config = REASONING_CONFIGS[reasoning]
318
+ model = config.model.lower()
319
+
320
+ # Determine provider and use appropriate parameter format
321
+ is_anthropic = "anthropic/" in model or "claude" in model
322
+ is_gemini = "gemini" in model
323
+ is_openai = "openai/" in model or "gpt" in model
324
+
325
+ # Apply temperature (required for Anthropic extended thinking)
326
+ if reasoning_config.get("temperature") is not None:
327
+ kwargs["temperature"] = reasoning_config["temperature"]
328
+
329
+ # Apply max_tokens
330
+ if reasoning_config.get("max_tokens") is not None:
331
+ kwargs["max_tokens"] = reasoning_config["max_tokens"]
332
+
333
+ if is_anthropic:
334
+ # Anthropic uses explicit thinking parameter with budget_tokens
335
+ budget = reasoning_config.get("budget_tokens", 1024)
336
+ kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
337
+ # Remove reasoning_effort if present (not used for thinking)
338
+ kwargs.pop("reasoning_effort", None)
339
+ else:
340
+ # Other providers use reasoning_effort
341
+ if reasoning_config["reasoning_effort"] is not None:
342
+ kwargs["reasoning_effort"] = reasoning_config["reasoning_effort"]
143
343
 
344
+ return dspy.LM(**kwargs)
144
345
 
145
- class SynthesizeResponses(dspy.Signature):
146
- """Synthesize multiple AI model responses into an optimal final answer.
147
346
 
148
- You are given the original input and responses from multiple AI models.
149
- Analyze all responses, identify the best insights from each, resolve
150
- any contradictions, and produce the ultimate synthesized response.
347
+ class SynthesizeResponses(dspy.Signature):
348
+ """Synthesize multiple AI model responses into one optimal final answer.
349
+
350
+ You are given responses from multiple AI models for the same task.
351
+ Your job is to create the BEST possible response by:
352
+ 1. Identifying the strongest elements from each model's response
353
+ 2. Resolving any contradictions (prefer the most accurate/well-reasoned answer)
354
+ 3. Combining complementary insights that don't conflict
355
+ 4. Maintaining the original format and style expected for the task
356
+ 5. Being concise - don't add unnecessary elaboration
357
+
358
+ For grammar/spelling fixes: Pick the most correct version, don't over-correct.
359
+ For text completion: Choose the most natural, coherent continuation.
360
+ For code: Select the cleanest, most idiomatic solution.
361
+ For lists: You may combine unique items if appropriate.
362
+
363
+ Output ONLY the synthesized response - no explanations or meta-commentary.
151
364
  """
152
365
 
153
- original_input: str = dspy.InputField(desc="The original input/question")
154
- model_responses: str = dspy.InputField(desc="Responses from multiple AI models, labeled by model name")
155
- synthesized_response: str = dspy.OutputField(desc="The optimal synthesized response combining the best from all models")
366
+ original_input: str = dspy.InputField(desc="The original input/task that was given to all models")
367
+ model_responses: str = dspy.InputField(desc="Responses from multiple AI models, each labeled with model name")
368
+ synthesized_response: str = dspy.OutputField(desc="The single best response, synthesized from all model outputs. Output ONLY the response content.")
156
369
 
157
370
 
158
371
  class JuicedProgram:
@@ -166,22 +379,31 @@ class JuicedProgram:
166
379
  self,
167
380
  program: dspy.Module,
168
381
  juice: JuiceLevel | int = JuiceLevel.QUICK,
169
- progress_callback: Callable[[str, dict], None] | None = None
382
+ reasoning: ReasoningLevel | int | None = None,
383
+ progress_callback: Callable[[str, dict], None] | None = None,
384
+ api_keys: dict | None = None,
385
+ model_override: str | None = None,
170
386
  ):
171
387
  """Initialize a juiced program.
172
388
 
173
389
  Args:
174
390
  program: The DSPy program/module to wrap.
175
391
  juice: Juice level (0-4).
392
+ reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
176
393
  progress_callback: Optional callback for progress events.
177
394
  Called with (event_type, data) where event_type is:
178
395
  - "status": General status update
179
396
  - "model_start": A model is starting (ultimate mode)
180
397
  - "model_complete": A model finished (ultimate mode)
398
+ api_keys: Optional dict of provider -> API key. Overrides env vars.
399
+ model_override: Optional model to use instead of the default for this juice level.
181
400
  """
182
401
  self.program = program
183
402
  self.juice = JuiceLevel(juice) if isinstance(juice, int) else juice
403
+ self.reasoning = ReasoningLevel(reasoning) if isinstance(reasoning, int) else reasoning
184
404
  self.progress_callback = progress_callback
405
+ self.api_keys = api_keys
406
+ self.model_override = model_override
185
407
 
186
408
  def _emit(self, event_type: str, data: dict):
187
409
  """Emit a progress event if callback is set."""
@@ -198,15 +420,23 @@ class JuicedProgram:
198
420
  def _run_single(self, **kwargs) -> Any:
199
421
  """Run with a single model at the specified juice level."""
200
422
  config = JUICE_MODELS[self.juice]
201
- model_name = config.model.split("/")[-1]
423
+
424
+ # Use model override if provided, otherwise use default for juice level
425
+ actual_model = self.model_override if self.model_override else config.model
426
+ model_name = actual_model.split("/")[-1]
427
+
428
+ reasoning_desc = ""
429
+ if self.reasoning is not None:
430
+ reasoning_desc = f" (reasoning={self.reasoning.name})"
202
431
 
203
432
  self._emit("status", {
204
433
  "step": "calling_model",
205
434
  "model": model_name,
206
- "model_full": config.model
435
+ "model_full": actual_model,
436
+ "reasoning_level": self.reasoning.value if self.reasoning else None,
207
437
  })
208
438
 
209
- lm = get_lm(self.juice)
439
+ lm = get_lm(self.juice, self.reasoning, api_keys=self.api_keys, model_override=self.model_override)
210
440
  with dspy.context(lm=lm):
211
441
  result = self.program(**kwargs)
212
442
 
@@ -230,12 +460,44 @@ class JuicedProgram:
230
460
  self._emit("status", {
231
461
  "step": "starting_multi_model",
232
462
  "models": model_names,
233
- "total": len(model_names)
463
+ "total": len(model_names),
464
+ "reasoning_level": self.reasoning.value if self.reasoning else None,
234
465
  })
235
466
 
236
467
  def run_model(config):
237
468
  """Run a single model - called in parallel."""
238
- lm = dspy.LM(**config.to_lm_kwargs())
469
+ lm_kwargs = config.to_lm_kwargs()
470
+
471
+ # Apply reasoning level overrides if specified AND model supports reasoning
472
+ if self.reasoning is not None and self.reasoning != ReasoningLevel.OFF and config.supports_reasoning:
473
+ reasoning_config = REASONING_CONFIGS[self.reasoning]
474
+ model = config.model.lower()
475
+
476
+ # Determine provider
477
+ is_anthropic = "anthropic/" in model or "claude" in model
478
+
479
+ # Apply temperature and max_tokens
480
+ if reasoning_config.get("temperature") is not None:
481
+ lm_kwargs["temperature"] = reasoning_config["temperature"]
482
+ if reasoning_config.get("max_tokens") is not None:
483
+ lm_kwargs["max_tokens"] = reasoning_config["max_tokens"]
484
+
485
+ if is_anthropic:
486
+ # Anthropic uses thinking parameter with budget_tokens
487
+ budget = reasoning_config.get("budget_tokens", 1024)
488
+ lm_kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
489
+ lm_kwargs.pop("reasoning_effort", None)
490
+ else:
491
+ # Other providers use reasoning_effort
492
+ if reasoning_config["reasoning_effort"] is not None:
493
+ lm_kwargs["reasoning_effort"] = reasoning_config["reasoning_effort"]
494
+
495
+ # Apply API key for this model's provider
496
+ api_key = get_api_key_for_model(config.model, self.api_keys)
497
+ if api_key:
498
+ lm_kwargs["api_key"] = api_key
499
+
500
+ lm = dspy.LM(**lm_kwargs)
239
501
  model_name = config.model.split("/")[-1]
240
502
 
241
503
  # Emit model start
@@ -250,13 +512,19 @@ class JuicedProgram:
250
512
  with dspy.context(lm=lm):
251
513
  result = self.program(**kwargs)
252
514
 
253
- # Emit model complete
515
+ # Extract response text from DSPy Prediction for streaming
516
+ response_data = {}
517
+ if hasattr(result, "_store") and result._store:
518
+ response_data = dict(result._store)
519
+
520
+ # Emit model complete WITH the actual response
254
521
  with status_lock:
255
522
  models_status[model_name] = "complete"
256
523
  self._emit("model_complete", {
257
524
  "model": model_name,
258
525
  "success": True,
259
- "models_status": dict(models_status)
526
+ "models_status": dict(models_status),
527
+ "response": response_data, # Include actual response!
260
528
  })
261
529
 
262
530
  return {"model": model_name, "result": result, "error": None}
@@ -268,7 +536,8 @@ class JuicedProgram:
268
536
  "model": model_name,
269
537
  "success": False,
270
538
  "error": str(e),
271
- "models_status": dict(models_status)
539
+ "models_status": dict(models_status),
540
+ "response": None,
272
541
  })
273
542
  return {"model": model_name, "result": None, "error": str(e)}
274
543
 
@@ -285,15 +554,13 @@ class JuicedProgram:
285
554
  "models_completed": len([r for r in model_results if r["result"] is not None])
286
555
  })
287
556
 
288
- # Merge results - combine outputs from all successful models
289
- return self._merge_results(model_results)
557
+ # Merge results using AI synthesis
558
+ return self._merge_results(model_results, kwargs)
290
559
 
291
- def _merge_results(self, model_results: list) -> Any:
292
- """Merge results from multiple models into a single response.
560
+ def _merge_results(self, model_results: list, original_input: dict) -> Any:
561
+ """Merge results from multiple models using AI synthesis.
293
562
 
294
- For list fields (like synonyms), combines unique values from all models.
295
- For string fields, uses the first successful result.
296
- Also includes individual model responses for transparency.
563
+ Uses SYNTHESIZER_MODEL to intelligently combine responses from all models.
297
564
  """
298
565
  # Get successful results
299
566
  successful = [r for r in model_results if r["result"] is not None]
@@ -302,23 +569,27 @@ class JuicedProgram:
302
569
  errors = [r["error"] for r in model_results if r["error"]]
303
570
  raise RuntimeError(f"All models failed: {errors}")
304
571
 
305
- # Use first successful result as base
306
- base_result = successful[0]["result"]
307
-
308
- # Get the _store dict from the result (DSPy stores outputs there)
309
- if hasattr(base_result, "_store"):
310
- merged = dict(base_result._store)
311
- else:
312
- merged = {}
572
+ # If only one model succeeded, just return its result
573
+ if len(successful) == 1:
574
+ result = successful[0]["result"]
575
+ if hasattr(result, "_store"):
576
+ result._individual_responses = [{
577
+ "model": successful[0]["model"],
578
+ "response": str(result._store),
579
+ "error": None
580
+ }]
581
+ return result
313
582
 
314
- # Collect individual responses for display
583
+ # Collect individual responses
315
584
  individual_responses = []
585
+ model_outputs = {} # model_name -> {field: value}
586
+
316
587
  for r in model_results:
317
588
  model_name = r["model"]
318
589
  if r["result"] is not None and hasattr(r["result"], "_store"):
319
- # Extract the main output field (usually 'response', 'completion', etc.)
320
590
  store = r["result"]._store
321
- # Get the first string output field
591
+ model_outputs[model_name] = dict(store)
592
+ # Get main output text for display
322
593
  output_text = None
323
594
  for key, value in store.items():
324
595
  if isinstance(value, str) and len(value) > 10:
@@ -336,26 +607,90 @@ class JuicedProgram:
336
607
  "error": r["error"]
337
608
  })
338
609
 
339
- # Merge fields from other models
340
- for r in successful[1:]:
341
- result = r["result"]
342
- if hasattr(result, "_store"):
343
- store = result._store
344
- for key, value in store.items():
345
- if key in merged:
346
- # Merge lists by combining unique values
347
- if isinstance(value, list) and isinstance(merged[key], list):
348
- # Combine and dedupe while preserving order
349
- seen = set(merged[key])
350
- for item in value:
351
- if item not in seen:
352
- merged[key].append(item)
353
- seen.add(item)
354
- # For strings, keep the first (base) value
355
- else:
356
- merged[key] = value
357
-
358
- # Return a simple object with the merged data + individual responses
610
+ # Use first result as template for output fields
611
+ base_result = successful[0]["result"]
612
+ base_store = base_result._store if hasattr(base_result, "_store") else {}
613
+
614
+ # Format original input for synthesizer
615
+ input_text = self._format_input(original_input)
616
+
617
+ # Create synthesized result
618
+ merged = {}
619
+
620
+ # Configure synthesizer LM with API key if provided
621
+ synth_kwargs = SYNTHESIZER_MODEL.to_lm_kwargs()
622
+ api_key = get_api_key_for_model(SYNTHESIZER_MODEL.model, self.api_keys)
623
+ if api_key:
624
+ synth_kwargs["api_key"] = api_key
625
+ synth_lm = dspy.LM(**synth_kwargs)
626
+
627
+ # Synthesize each output field
628
+ for field_name, base_value in base_store.items():
629
+ # Collect this field's values from all models
630
+ field_values = {}
631
+ for model_name, outputs in model_outputs.items():
632
+ if field_name in outputs:
633
+ field_values[model_name] = outputs[field_name]
634
+
635
+ if not field_values:
636
+ merged[field_name] = base_value
637
+ continue
638
+
639
+ # Check if it's a list field (like synonyms)
640
+ if isinstance(base_value, list):
641
+ # For lists, combine unique values from all models
642
+ combined = []
643
+ seen = set()
644
+ for model_name, values in field_values.items():
645
+ if isinstance(values, list):
646
+ for item in values:
647
+ # Get hashable key for deduplication
648
+ # Pydantic models aren't hashable, so convert to JSON
649
+ try:
650
+ if hasattr(item, 'model_dump_json'):
651
+ # Pydantic v2 model
652
+ item_key = item.model_dump_json()
653
+ elif hasattr(item, 'json'):
654
+ # Pydantic v1 model
655
+ item_key = item.json()
656
+ else:
657
+ # Regular hashable item
658
+ item_key = item
659
+ except TypeError:
660
+ # Fallback: convert to string representation
661
+ item_key = str(item)
662
+
663
+ if item_key not in seen:
664
+ combined.append(item)
665
+ seen.add(item_key)
666
+ merged[field_name] = combined
667
+ else:
668
+ # For string/text fields, use AI synthesis
669
+ responses_text = "\n\n".join([
670
+ f"=== {model_name} ===\n{value}"
671
+ for model_name, value in field_values.items()
672
+ ])
673
+
674
+ self._emit("status", {
675
+ "step": "synthesizing_field",
676
+ "field": field_name,
677
+ "model": SYNTHESIZER_MODEL.model.split("/")[-1]
678
+ })
679
+
680
+ try:
681
+ with dspy.context(lm=synth_lm):
682
+ predictor = dspy.Predict(SynthesizeResponses)
683
+ synth_result = predictor(
684
+ original_input=input_text,
685
+ model_responses=responses_text
686
+ )
687
+ merged[field_name] = synth_result.synthesized_response
688
+ except Exception as e:
689
+ # Fallback to first model's response on synthesis error
690
+ print(f"[Synthesis] Error synthesizing {field_name}: {e}")
691
+ merged[field_name] = base_value
692
+
693
+ # Return a result object with merged data
359
694
  class MergedResult:
360
695
  pass
361
696
 
@@ -364,6 +699,7 @@ class JuicedProgram:
364
699
  setattr(result, key, value)
365
700
  result._store = merged # For extract_result in server.py
366
701
  result._individual_responses = individual_responses # For UI display
702
+ result._synthesized = True # Mark as AI-synthesized
367
703
 
368
704
  return result
369
705
 
@@ -391,18 +727,24 @@ def juiced(juice: JuiceLevel | int = JuiceLevel.QUICK):
391
727
  return decorator
392
728
 
393
729
 
394
- def run_with_juice(program: dspy.Module, juice: JuiceLevel | int, **kwargs) -> Any:
730
+ def run_with_juice(
731
+ program: dspy.Module,
732
+ juice: JuiceLevel | int,
733
+ reasoning: ReasoningLevel | int | None = None,
734
+ **kwargs
735
+ ) -> Any:
395
736
  """Convenience function to run a program with a specific juice level.
396
737
 
397
738
  Args:
398
739
  program: The DSPy program to run.
399
740
  juice: Juice level (0-4).
741
+ reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
400
742
  **kwargs: Arguments to pass to the program.
401
743
 
402
744
  Returns:
403
745
  The program result.
404
746
  """
405
- juiced_program = JuicedProgram(program, juice)
747
+ juiced_program = JuicedProgram(program, juice, reasoning=reasoning)
406
748
  return juiced_program(**kwargs)
407
749
 
408
750