mrmd-ai 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/PKG-INFO +1 -1
  2. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/pyproject.toml +1 -1
  3. mrmd_ai-0.1.1/src/mrmd_ai/juice.py +673 -0
  4. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/modules/__init__.py +11 -0
  5. mrmd_ai-0.1.1/src/mrmd_ai/modules/edit.py +102 -0
  6. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/server.py +97 -19
  7. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/signatures/__init__.py +15 -0
  8. mrmd_ai-0.1.1/src/mrmd_ai/signatures/edit.py +173 -0
  9. mrmd_ai-0.1.0/src/mrmd_ai/juice.py +0 -416
  10. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/.gitignore +0 -0
  11. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/README.md +0 -0
  12. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/dspy.config.yaml +0 -0
  13. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/AddTypeHintsPredict.log +0 -0
  14. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/CorrectAndFinishLinePredict.log +0 -0
  15. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/DocumentCodePredict.log +0 -0
  16. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/ExplainCodePredict.log +0 -0
  17. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/FinishCodeLinePredict.log +0 -0
  18. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/FinishCodeSectionPredict.log +0 -0
  19. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/FinishParagraphPredict.log +0 -0
  20. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/FinishSentencePredict.log +0 -0
  21. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/FixGrammarPredict.log +0 -0
  22. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/FixTranscriptionPredict.log +0 -0
  23. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/FormatCodePredict.log +0 -0
  24. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/GetSynonymsPredict.log +0 -0
  25. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/IdentifyReplacementPredict.log +0 -0
  26. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/ImproveNamesPredict.log +0 -0
  27. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/RefactorCodePredict.log +0 -0
  28. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/ReformatMarkdownPredict.log +0 -0
  29. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/logs/server.log +0 -0
  30. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/openapi.json +0 -0
  31. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/__init__.py +0 -0
  32. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/metrics/__init__.py +0 -0
  33. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/modules/code.py +0 -0
  34. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/modules/correct.py +0 -0
  35. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/modules/document.py +0 -0
  36. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/modules/finish.py +0 -0
  37. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/modules/fix.py +0 -0
  38. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/modules/notebook.py +0 -0
  39. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/modules/text.py +0 -0
  40. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/optimizers/__init__.py +0 -0
  41. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/signatures/code.py +0 -0
  42. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/signatures/correct.py +0 -0
  43. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/signatures/document.py +0 -0
  44. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/signatures/finish.py +0 -0
  45. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/signatures/fix.py +0 -0
  46. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/signatures/notebook.py +0 -0
  47. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/signatures/text.py +0 -0
  48. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/src/mrmd_ai/utils/__init__.py +0 -0
  49. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/tests/__init__.py +0 -0
  50. {mrmd_ai-0.1.0 → mrmd_ai-0.1.1}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mrmd-ai
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: AI programs for MRMD editor - completions, fixes, and corrections
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: dspy>=2.6
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mrmd-ai"
3
- version = "0.1.0"
3
+ version = "0.1.1"
4
4
  description = "AI programs for MRMD editor - completions, fixes, and corrections"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -0,0 +1,673 @@
1
+ """
2
+ Juice Level System for MRMD AI Programs.
3
+
4
+ Juice levels control the quality/cost tradeoff of AI responses:
5
+ - Level 0: Kimi K2 on Groq (fast, cheap, default)
6
+ - Level 1: Claude Sonnet 4.5 (better quality)
7
+ - Level 2: Gemini 3 Pro with thinking (deep reasoning)
8
+ - Level 3: Claude Opus 4.5 with high thinking (maximum single-model quality)
9
+ - Level 4: Multi-model merger (Grok 4 + Sonnet 4.5 + Gemini 3 + Opus 4.5, synthesized by Gemini 3)
10
+ """
11
+
12
+ from enum import IntEnum
13
+ from typing import Any, Callable
14
+ from dataclasses import dataclass, field
15
+ import dspy
16
+
17
+
18
+ class JuiceLevel(IntEnum):
19
+ """Progressive quality levels for AI responses."""
20
+
21
+ # Fast & cheap - Kimi K2 on Groq
22
+ QUICK = 0
23
+
24
+ # Better quality - Sonnet 4.5
25
+ BALANCED = 1
26
+
27
+ # Deep reasoning - Gemini 3 with thinking
28
+ DEEP = 2
29
+
30
+ # Maximum single-model - Opus 4.5 with high thinking
31
+ MAXIMUM = 3
32
+
33
+ # Multi-model merger - all models synthesized
34
+ ULTIMATE = 4
35
+
36
+
37
+ class ReasoningLevel(IntEnum):
38
+ """Independent reasoning/thinking budget control.
39
+
40
+ This is separate from JuiceLevel and controls how much "thinking"
41
+ the model does, independent of which model is selected.
42
+ """
43
+
44
+ # No extended thinking - fastest responses
45
+ OFF = 0
46
+
47
+ # Minimal reasoning
48
+ MINIMAL = 1
49
+
50
+ # Low reasoning effort
51
+ LOW = 2
52
+
53
+ # Medium reasoning effort
54
+ MEDIUM = 3
55
+
56
+ # High reasoning effort
57
+ HIGH = 4
58
+
59
+ # Maximum reasoning budget
60
+ MAXIMUM = 5
61
+
62
+
63
+ # Map reasoning levels to thinking budgets and reasoning_effort values
64
+ # For Anthropic: uses `thinking={"type": "enabled", "budget_tokens": X}`
65
+ # For others: uses `reasoning_effort` ("low", "medium", "high")
66
+ # Note: Anthropic requires max_tokens > thinking.budget_tokens
67
+ REASONING_CONFIGS: dict[ReasoningLevel, dict] = {
68
+ ReasoningLevel.OFF: {
69
+ "budget_tokens": None, # No thinking
70
+ "reasoning_effort": None,
71
+ "temperature": None, # None means use model default
72
+ },
73
+ ReasoningLevel.MINIMAL: {
74
+ "budget_tokens": 1024, # Minimum thinking budget
75
+ "reasoning_effort": "low",
76
+ "temperature": 1.0, # Required for Anthropic extended thinking
77
+ "max_tokens": 4096, # Must be > budget_tokens
78
+ },
79
+ ReasoningLevel.LOW: {
80
+ "budget_tokens": 4096,
81
+ "reasoning_effort": "low",
82
+ "temperature": 1.0,
83
+ "max_tokens": 8192,
84
+ },
85
+ ReasoningLevel.MEDIUM: {
86
+ "budget_tokens": 8192,
87
+ "reasoning_effort": "medium",
88
+ "temperature": 1.0,
89
+ "max_tokens": 16000,
90
+ },
91
+ ReasoningLevel.HIGH: {
92
+ "budget_tokens": 16384,
93
+ "reasoning_effort": "high",
94
+ "temperature": 1.0,
95
+ "max_tokens": 24000,
96
+ },
97
+ ReasoningLevel.MAXIMUM: {
98
+ "budget_tokens": 32768, # Maximum thinking budget
99
+ "reasoning_effort": "high",
100
+ "temperature": 1.0,
101
+ "max_tokens": 48000, # Must be > budget_tokens
102
+ },
103
+ }
104
+
105
+
106
+ REASONING_DESCRIPTIONS = {
107
+ ReasoningLevel.OFF: "Off - No extended thinking",
108
+ ReasoningLevel.MINIMAL: "Minimal - Light reasoning",
109
+ ReasoningLevel.LOW: "Low - Some reasoning",
110
+ ReasoningLevel.MEDIUM: "Medium - Moderate reasoning",
111
+ ReasoningLevel.HIGH: "High - Deep reasoning",
112
+ ReasoningLevel.MAXIMUM: "Maximum - Full reasoning budget",
113
+ }
114
+
115
+
116
+ @dataclass
117
+ class ModelConfig:
118
+ """Configuration for a model at a specific juice level."""
119
+ model: str
120
+ temperature: float = 0.7
121
+ max_tokens: int = 4096
122
+ reasoning_effort: str | None = None
123
+ thinking: dict | None = None
124
+ supports_reasoning: bool = True # Whether the model supports reasoning_effort
125
+ extra_kwargs: dict = field(default_factory=dict)
126
+
127
+ def to_lm_kwargs(self) -> dict:
128
+ """Convert to dspy.LM kwargs."""
129
+ kwargs = {
130
+ "model": self.model,
131
+ "temperature": self.temperature,
132
+ "max_tokens": self.max_tokens,
133
+ **self.extra_kwargs,
134
+ }
135
+ if self.reasoning_effort:
136
+ kwargs["reasoning_effort"] = self.reasoning_effort
137
+ if self.thinking:
138
+ kwargs["thinking"] = self.thinking
139
+ return kwargs
140
+
141
+
142
+ # Model configurations for each juice level
143
+ # supports_reasoning indicates if the model/provider supports reasoning_effort parameter
144
+ JUICE_MODELS: dict[JuiceLevel, ModelConfig] = {
145
+ JuiceLevel.QUICK: ModelConfig(
146
+ model="groq/moonshotai/kimi-k2-instruct-0905",
147
+ temperature=0.7,
148
+ max_tokens=4096,
149
+ supports_reasoning=False, # Groq does NOT support reasoning_effort
150
+ ),
151
+ JuiceLevel.BALANCED: ModelConfig(
152
+ model="anthropic/claude-sonnet-4-5",
153
+ temperature=0.7,
154
+ max_tokens=4096,
155
+ supports_reasoning=True, # Anthropic supports reasoning_effort
156
+ ),
157
+ JuiceLevel.DEEP: ModelConfig(
158
+ model="gemini/gemini-3-pro-preview",
159
+ temperature=1.0,
160
+ max_tokens=16000,
161
+ reasoning_effort="high",
162
+ supports_reasoning=True, # Gemini supports reasoning_effort
163
+ ),
164
+ JuiceLevel.MAXIMUM: ModelConfig(
165
+ model="anthropic/claude-opus-4-5",
166
+ temperature=1.0,
167
+ max_tokens=16000,
168
+ reasoning_effort="high",
169
+ supports_reasoning=True, # Anthropic supports reasoning_effort
170
+ ),
171
+ }
172
+
173
+ # For ULTIMATE level, we use all 4 models with highest thinking
174
+ # Grok 4, GPT-5.1, Gemini 3, Opus 4.5
175
+ # NOTE: Anthropic requires temperature=1 when using extended thinking
176
+ ULTIMATE_MODELS: list[ModelConfig] = [
177
+ ModelConfig(
178
+ model="openrouter/x-ai/grok-4",
179
+ temperature=0.7,
180
+ max_tokens=8192,
181
+ supports_reasoning=True, # Grok 4 supports reasoning
182
+ ),
183
+ ModelConfig(
184
+ model="openai/gpt-5.2",
185
+ temperature=1.0,
186
+ max_tokens=16000,
187
+ reasoning_effort="high",
188
+ supports_reasoning=True, # OpenAI supports reasoning
189
+ ),
190
+ ModelConfig(
191
+ model="gemini/gemini-3-pro-preview",
192
+ temperature=1.0,
193
+ max_tokens=16000,
194
+ reasoning_effort="high",
195
+ supports_reasoning=True, # Gemini supports reasoning
196
+ ),
197
+ ModelConfig(
198
+ model="anthropic/claude-opus-4-5",
199
+ temperature=1.0, # Must be 1 for extended thinking
200
+ max_tokens=16000,
201
+ reasoning_effort="high",
202
+ supports_reasoning=True, # Anthropic supports reasoning
203
+ ),
204
+ ]
205
+
206
+ # Synthesizer model for ULTIMATE level (Gemini 3 synthesizes all responses)
207
+ SYNTHESIZER_MODEL = ModelConfig(
208
+ model="gemini/gemini-3-pro-preview",
209
+ temperature=0.7,
210
+ max_tokens=32000,
211
+ reasoning_effort="high",
212
+ supports_reasoning=True,
213
+ )
214
+
215
+
216
+ def get_lm(
217
+ juice: JuiceLevel | int = JuiceLevel.QUICK,
218
+ reasoning: ReasoningLevel | int | None = None
219
+ ) -> dspy.LM:
220
+ """Get a dspy.LM configured for the specified juice and reasoning levels.
221
+
222
+ Args:
223
+ juice: Juice level (0-3). Level 4 (ULTIMATE) requires special handling.
224
+ reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
225
+
226
+ Returns:
227
+ Configured dspy.LM instance.
228
+ """
229
+ if isinstance(juice, int):
230
+ juice = JuiceLevel(juice)
231
+
232
+ if juice == JuiceLevel.ULTIMATE:
233
+ raise ValueError("ULTIMATE juice level requires multi-model merger. Use JuicedProgram instead.")
234
+
235
+ config = JUICE_MODELS[juice]
236
+ kwargs = config.to_lm_kwargs()
237
+
238
+ # Apply reasoning level overrides if specified AND model supports reasoning
239
+ if reasoning is not None and config.supports_reasoning:
240
+ if isinstance(reasoning, int):
241
+ reasoning = ReasoningLevel(reasoning)
242
+
243
+ # Skip if reasoning is OFF
244
+ if reasoning == ReasoningLevel.OFF:
245
+ # Remove any existing reasoning params
246
+ kwargs.pop("reasoning_effort", None)
247
+ kwargs.pop("thinking", None)
248
+ return dspy.LM(**kwargs)
249
+
250
+ reasoning_config = REASONING_CONFIGS[reasoning]
251
+ model = config.model.lower()
252
+
253
+ # Determine provider and use appropriate parameter format
254
+ is_anthropic = "anthropic/" in model or "claude" in model
255
+ is_gemini = "gemini" in model
256
+ is_openai = "openai/" in model or "gpt" in model
257
+
258
+ # Apply temperature (required for Anthropic extended thinking)
259
+ if reasoning_config.get("temperature") is not None:
260
+ kwargs["temperature"] = reasoning_config["temperature"]
261
+
262
+ # Apply max_tokens
263
+ if reasoning_config.get("max_tokens") is not None:
264
+ kwargs["max_tokens"] = reasoning_config["max_tokens"]
265
+
266
+ if is_anthropic:
267
+ # Anthropic uses explicit thinking parameter with budget_tokens
268
+ budget = reasoning_config.get("budget_tokens", 1024)
269
+ kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
270
+ # Remove reasoning_effort if present (not used for thinking)
271
+ kwargs.pop("reasoning_effort", None)
272
+ else:
273
+ # Other providers use reasoning_effort
274
+ if reasoning_config["reasoning_effort"] is not None:
275
+ kwargs["reasoning_effort"] = reasoning_config["reasoning_effort"]
276
+
277
+ return dspy.LM(**kwargs)
278
+
279
+
280
+ class SynthesizeResponses(dspy.Signature):
281
+ """Synthesize multiple AI model responses into one optimal final answer.
282
+
283
+ You are given responses from multiple AI models for the same task.
284
+ Your job is to create the BEST possible response by:
285
+ 1. Identifying the strongest elements from each model's response
286
+ 2. Resolving any contradictions (prefer the most accurate/well-reasoned answer)
287
+ 3. Combining complementary insights that don't conflict
288
+ 4. Maintaining the original format and style expected for the task
289
+ 5. Being concise - don't add unnecessary elaboration
290
+
291
+ For grammar/spelling fixes: Pick the most correct version, don't over-correct.
292
+ For text completion: Choose the most natural, coherent continuation.
293
+ For code: Select the cleanest, most idiomatic solution.
294
+ For lists: You may combine unique items if appropriate.
295
+
296
+ Output ONLY the synthesized response - no explanations or meta-commentary.
297
+ """
298
+
299
+ original_input: str = dspy.InputField(desc="The original input/task that was given to all models")
300
+ model_responses: str = dspy.InputField(desc="Responses from multiple AI models, each labeled with model name")
301
+ synthesized_response: str = dspy.OutputField(desc="The single best response, synthesized from all model outputs. Output ONLY the response content.")
302
+
303
+
304
+ class JuicedProgram:
305
+ """Wrapper that runs any DSPy program with configurable juice levels.
306
+
307
+ For levels 0-3, uses a single model with increasing capability.
308
+ For level 4 (ULTIMATE), runs all models in parallel and synthesizes.
309
+ """
310
+
311
+ def __init__(
312
+ self,
313
+ program: dspy.Module,
314
+ juice: JuiceLevel | int = JuiceLevel.QUICK,
315
+ reasoning: ReasoningLevel | int | None = None,
316
+ progress_callback: Callable[[str, dict], None] | None = None
317
+ ):
318
+ """Initialize a juiced program.
319
+
320
+ Args:
321
+ program: The DSPy program/module to wrap.
322
+ juice: Juice level (0-4).
323
+ reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
324
+ progress_callback: Optional callback for progress events.
325
+ Called with (event_type, data) where event_type is:
326
+ - "status": General status update
327
+ - "model_start": A model is starting (ultimate mode)
328
+ - "model_complete": A model finished (ultimate mode)
329
+ """
330
+ self.program = program
331
+ self.juice = JuiceLevel(juice) if isinstance(juice, int) else juice
332
+ self.reasoning = ReasoningLevel(reasoning) if isinstance(reasoning, int) else reasoning
333
+ self.progress_callback = progress_callback
334
+
335
+ def _emit(self, event_type: str, data: dict):
336
+ """Emit a progress event if callback is set."""
337
+ if self.progress_callback:
338
+ self.progress_callback(event_type, data)
339
+
340
+ def __call__(self, **kwargs) -> Any:
341
+ """Run the program with the configured juice level."""
342
+ if self.juice == JuiceLevel.ULTIMATE:
343
+ return self._run_ultimate(**kwargs)
344
+ else:
345
+ return self._run_single(**kwargs)
346
+
347
+ def _run_single(self, **kwargs) -> Any:
348
+ """Run with a single model at the specified juice level."""
349
+ config = JUICE_MODELS[self.juice]
350
+ model_name = config.model.split("/")[-1]
351
+
352
+ reasoning_desc = ""
353
+ if self.reasoning is not None:
354
+ reasoning_desc = f" (reasoning={self.reasoning.name})"
355
+
356
+ self._emit("status", {
357
+ "step": "calling_model",
358
+ "model": model_name,
359
+ "model_full": config.model,
360
+ "reasoning_level": self.reasoning.value if self.reasoning else None,
361
+ })
362
+
363
+ lm = get_lm(self.juice, self.reasoning)
364
+ with dspy.context(lm=lm):
365
+ result = self.program(**kwargs)
366
+
367
+ self._emit("status", {
368
+ "step": "model_complete",
369
+ "model": model_name
370
+ })
371
+
372
+ return result
373
+
374
+ def _run_ultimate(self, **kwargs) -> Any:
375
+ """Run with all models in PARALLEL and merge results."""
376
+ from concurrent.futures import ThreadPoolExecutor, as_completed
377
+ import threading
378
+
379
+ # Track which models are running
380
+ model_names = [cfg.model.split("/")[-1] for cfg in ULTIMATE_MODELS]
381
+ models_status = {name: "pending" for name in model_names}
382
+ status_lock = threading.Lock()
383
+
384
+ self._emit("status", {
385
+ "step": "starting_multi_model",
386
+ "models": model_names,
387
+ "total": len(model_names),
388
+ "reasoning_level": self.reasoning.value if self.reasoning else None,
389
+ })
390
+
391
+ def run_model(config):
392
+ """Run a single model - called in parallel."""
393
+ lm_kwargs = config.to_lm_kwargs()
394
+
395
+ # Apply reasoning level overrides if specified AND model supports reasoning
396
+ if self.reasoning is not None and self.reasoning != ReasoningLevel.OFF and config.supports_reasoning:
397
+ reasoning_config = REASONING_CONFIGS[self.reasoning]
398
+ model = config.model.lower()
399
+
400
+ # Determine provider
401
+ is_anthropic = "anthropic/" in model or "claude" in model
402
+
403
+ # Apply temperature and max_tokens
404
+ if reasoning_config.get("temperature") is not None:
405
+ lm_kwargs["temperature"] = reasoning_config["temperature"]
406
+ if reasoning_config.get("max_tokens") is not None:
407
+ lm_kwargs["max_tokens"] = reasoning_config["max_tokens"]
408
+
409
+ if is_anthropic:
410
+ # Anthropic uses thinking parameter with budget_tokens
411
+ budget = reasoning_config.get("budget_tokens", 1024)
412
+ lm_kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
413
+ lm_kwargs.pop("reasoning_effort", None)
414
+ else:
415
+ # Other providers use reasoning_effort
416
+ if reasoning_config["reasoning_effort"] is not None:
417
+ lm_kwargs["reasoning_effort"] = reasoning_config["reasoning_effort"]
418
+
419
+ lm = dspy.LM(**lm_kwargs)
420
+ model_name = config.model.split("/")[-1]
421
+
422
+ # Emit model start
423
+ with status_lock:
424
+ models_status[model_name] = "running"
425
+ self._emit("model_start", {
426
+ "model": model_name,
427
+ "models_status": dict(models_status)
428
+ })
429
+
430
+ try:
431
+ with dspy.context(lm=lm):
432
+ result = self.program(**kwargs)
433
+
434
+ # Extract response text from DSPy Prediction for streaming
435
+ response_data = {}
436
+ if hasattr(result, "_store") and result._store:
437
+ response_data = dict(result._store)
438
+
439
+ # Emit model complete WITH the actual response
440
+ with status_lock:
441
+ models_status[model_name] = "complete"
442
+ self._emit("model_complete", {
443
+ "model": model_name,
444
+ "success": True,
445
+ "models_status": dict(models_status),
446
+ "response": response_data, # Include actual response!
447
+ })
448
+
449
+ return {"model": model_name, "result": result, "error": None}
450
+ except Exception as e:
451
+ # Emit model error
452
+ with status_lock:
453
+ models_status[model_name] = "error"
454
+ self._emit("model_complete", {
455
+ "model": model_name,
456
+ "success": False,
457
+ "error": str(e),
458
+ "models_status": dict(models_status),
459
+ "response": None,
460
+ })
461
+ return {"model": model_name, "result": None, "error": str(e)}
462
+
463
+ # Run all 4 models in parallel
464
+ model_results = []
465
+ with ThreadPoolExecutor(max_workers=4) as executor:
466
+ futures = [executor.submit(run_model, config) for config in ULTIMATE_MODELS]
467
+ for future in as_completed(futures):
468
+ model_results.append(future.result())
469
+
470
+ # Emit synthesizing status
471
+ self._emit("status", {
472
+ "step": "synthesizing",
473
+ "models_completed": len([r for r in model_results if r["result"] is not None])
474
+ })
475
+
476
+ # Merge results using AI synthesis
477
+ return self._merge_results(model_results, kwargs)
478
+
479
+ def _merge_results(self, model_results: list, original_input: dict) -> Any:
480
+ """Merge results from multiple models using AI synthesis.
481
+
482
+ Uses SYNTHESIZER_MODEL to intelligently combine responses from all models.
483
+ """
484
+ # Get successful results
485
+ successful = [r for r in model_results if r["result"] is not None]
486
+ if not successful:
487
+ # All failed - return error
488
+ errors = [r["error"] for r in model_results if r["error"]]
489
+ raise RuntimeError(f"All models failed: {errors}")
490
+
491
+ # If only one model succeeded, just return its result
492
+ if len(successful) == 1:
493
+ result = successful[0]["result"]
494
+ if hasattr(result, "_store"):
495
+ result._individual_responses = [{
496
+ "model": successful[0]["model"],
497
+ "response": str(result._store),
498
+ "error": None
499
+ }]
500
+ return result
501
+
502
+ # Collect individual responses
503
+ individual_responses = []
504
+ model_outputs = {} # model_name -> {field: value}
505
+
506
+ for r in model_results:
507
+ model_name = r["model"]
508
+ if r["result"] is not None and hasattr(r["result"], "_store"):
509
+ store = r["result"]._store
510
+ model_outputs[model_name] = dict(store)
511
+ # Get main output text for display
512
+ output_text = None
513
+ for key, value in store.items():
514
+ if isinstance(value, str) and len(value) > 10:
515
+ output_text = value
516
+ break
517
+ individual_responses.append({
518
+ "model": model_name,
519
+ "response": output_text or str(store),
520
+ "error": None
521
+ })
522
+ elif r["error"]:
523
+ individual_responses.append({
524
+ "model": model_name,
525
+ "response": None,
526
+ "error": r["error"]
527
+ })
528
+
529
+ # Use first result as template for output fields
530
+ base_result = successful[0]["result"]
531
+ base_store = base_result._store if hasattr(base_result, "_store") else {}
532
+
533
+ # Format original input for synthesizer
534
+ input_text = self._format_input(original_input)
535
+
536
+ # Create synthesized result
537
+ merged = {}
538
+
539
+ # Configure synthesizer LM
540
+ synth_lm = dspy.LM(**SYNTHESIZER_MODEL.to_lm_kwargs())
541
+
542
+ # Synthesize each output field
543
+ for field_name, base_value in base_store.items():
544
+ # Collect this field's values from all models
545
+ field_values = {}
546
+ for model_name, outputs in model_outputs.items():
547
+ if field_name in outputs:
548
+ field_values[model_name] = outputs[field_name]
549
+
550
+ if not field_values:
551
+ merged[field_name] = base_value
552
+ continue
553
+
554
+ # Check if it's a list field (like synonyms)
555
+ if isinstance(base_value, list):
556
+ # For lists, combine unique values from all models
557
+ combined = []
558
+ seen = set()
559
+ for model_name, values in field_values.items():
560
+ if isinstance(values, list):
561
+ for item in values:
562
+ # Get hashable key for deduplication
563
+ # Pydantic models aren't hashable, so convert to JSON
564
+ try:
565
+ if hasattr(item, 'model_dump_json'):
566
+ # Pydantic v2 model
567
+ item_key = item.model_dump_json()
568
+ elif hasattr(item, 'json'):
569
+ # Pydantic v1 model
570
+ item_key = item.json()
571
+ else:
572
+ # Regular hashable item
573
+ item_key = item
574
+ except TypeError:
575
+ # Fallback: convert to string representation
576
+ item_key = str(item)
577
+
578
+ if item_key not in seen:
579
+ combined.append(item)
580
+ seen.add(item_key)
581
+ merged[field_name] = combined
582
+ else:
583
+ # For string/text fields, use AI synthesis
584
+ responses_text = "\n\n".join([
585
+ f"=== {model_name} ===\n{value}"
586
+ for model_name, value in field_values.items()
587
+ ])
588
+
589
+ self._emit("status", {
590
+ "step": "synthesizing_field",
591
+ "field": field_name,
592
+ "model": SYNTHESIZER_MODEL.model.split("/")[-1]
593
+ })
594
+
595
+ try:
596
+ with dspy.context(lm=synth_lm):
597
+ predictor = dspy.Predict(SynthesizeResponses)
598
+ synth_result = predictor(
599
+ original_input=input_text,
600
+ model_responses=responses_text
601
+ )
602
+ merged[field_name] = synth_result.synthesized_response
603
+ except Exception as e:
604
+ # Fallback to first model's response on synthesis error
605
+ print(f"[Synthesis] Error synthesizing {field_name}: {e}")
606
+ merged[field_name] = base_value
607
+
608
+ # Return a result object with merged data
609
+ class MergedResult:
610
+ pass
611
+
612
+ result = MergedResult()
613
+ for key, value in merged.items():
614
+ setattr(result, key, value)
615
+ result._store = merged # For extract_result in server.py
616
+ result._individual_responses = individual_responses # For UI display
617
+ result._synthesized = True # Mark as AI-synthesized
618
+
619
+ return result
620
+
621
+ def _format_input(self, kwargs: dict) -> str:
622
+ """Format input kwargs as a readable string."""
623
+ parts = []
624
+ for key, value in kwargs.items():
625
+ parts.append(f"{key}: {value}")
626
+ return "\n".join(parts)
627
+
628
+
629
+ def juiced(juice: JuiceLevel | int = JuiceLevel.QUICK):
630
+ """Decorator to run a DSPy program with a specific juice level.
631
+
632
+ Usage:
633
+ @juiced(JuiceLevel.DEEP)
634
+ def my_program():
635
+ return dspy.ChainOfThought(MySignature)
636
+ """
637
+ def decorator(func: Callable) -> Callable:
638
+ def wrapper(*args, **kwargs):
639
+ program = func(*args, **kwargs)
640
+ return JuicedProgram(program, juice)
641
+ return wrapper
642
+ return decorator
643
+
644
+
645
+ def run_with_juice(
646
+ program: dspy.Module,
647
+ juice: JuiceLevel | int,
648
+ reasoning: ReasoningLevel | int | None = None,
649
+ **kwargs
650
+ ) -> Any:
651
+ """Convenience function to run a program with a specific juice level.
652
+
653
+ Args:
654
+ program: The DSPy program to run.
655
+ juice: Juice level (0-4).
656
+ reasoning: Optional reasoning level (0-5). If None, uses juice level's default.
657
+ **kwargs: Arguments to pass to the program.
658
+
659
+ Returns:
660
+ The program result.
661
+ """
662
+ juiced_program = JuicedProgram(program, juice, reasoning=reasoning)
663
+ return juiced_program(**kwargs)
664
+
665
+
666
+ # Juice level descriptions for CLI/UI
667
+ JUICE_DESCRIPTIONS = {
668
+ JuiceLevel.QUICK: "Quick (Kimi K2) - Fast & cheap",
669
+ JuiceLevel.BALANCED: "Balanced (Sonnet 4.5) - Good quality",
670
+ JuiceLevel.DEEP: "Deep (Gemini 3 thinking) - Thorough reasoning",
671
+ JuiceLevel.MAXIMUM: "Maximum (Opus 4.5 thinking) - Best single model",
672
+ JuiceLevel.ULTIMATE: "Ultimate (Multi-model merger) - All models synthesized",
673
+ }