caudate-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/__init__.py +5 -0
- api/anthropic_compat.py +1518 -0
- api/artifact_viewer.py +366 -0
- api/caudate_middleware.py +618 -0
- api/forge_bootstrapper_routes.py +377 -0
- api/forge_routes.py +630 -0
- api/forge_system_routes.py +294 -0
- api/openai_compat.py +1993 -0
- api/server.py +667 -0
- api/storyboard_page.py +677 -0
- caudate_cli-0.1.0.dist-info/METADATA +354 -0
- caudate_cli-0.1.0.dist-info/RECORD +153 -0
- caudate_cli-0.1.0.dist-info/WHEEL +5 -0
- caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
- caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
- cognos_mcp/__init__.py +4 -0
- cognos_mcp/bridge.py +41 -0
- cognos_mcp/client.py +70 -0
- cognos_mcp/config.py +49 -0
- cognos_mcp/server.py +66 -0
- config.py +82 -0
- core/__init__.py +0 -0
- core/agent.py +468 -0
- core/agentic_loop.py +731 -0
- core/anthropic_auth.py +91 -0
- core/background.py +113 -0
- core/banner.py +134 -0
- core/bootstrap.py +292 -0
- core/citations.py +131 -0
- core/compaction.py +109 -0
- core/constitution.py +198 -0
- core/diff_viewer.py +87 -0
- core/export.py +85 -0
- core/file_refs.py +119 -0
- core/files.py +199 -0
- core/hooks.py +209 -0
- core/image.py +599 -0
- core/input.py +91 -0
- core/loop.py +238 -0
- core/memory_md.py +147 -0
- core/notifications.py +99 -0
- core/ownership.py +181 -0
- core/paste.py +81 -0
- core/permissions.py +210 -0
- core/plan_mode.py +215 -0
- core/sandbox_prompt.py +185 -0
- core/scheduler.py +195 -0
- core/schemas.py +202 -0
- core/session.py +90 -0
- core/settings.py +132 -0
- core/skills.py +398 -0
- core/slash_commands.py +977 -0
- core/statusline.py +61 -0
- core/subagent.py +300 -0
- core/thinking.py +50 -0
- core/updater.py +122 -0
- core/usage.py +109 -0
- core/worktree.py +93 -0
- execution/__init__.py +0 -0
- execution/executor.py +329 -0
- execution/plugins.py +108 -0
- execution/tools/__init__.py +0 -0
- execution/tools/agent_tool.py +107 -0
- execution/tools/agentic_tool.py +297 -0
- execution/tools/artifact_tool.py +191 -0
- execution/tools/ask_user_question_tool.py +137 -0
- execution/tools/base.py +81 -0
- execution/tools/calculator_tool.py +137 -0
- execution/tools/cognos_card_tool.py +124 -0
- execution/tools/cron_tool.py +215 -0
- execution/tools/datetime_tool.py +215 -0
- execution/tools/describe_image_tool.py +161 -0
- execution/tools/draw_tool.py +164 -0
- execution/tools/edit_image_tool.py +262 -0
- execution/tools/edit_tool.py +245 -0
- execution/tools/file_tool.py +90 -0
- execution/tools/find_anywhere_tool.py +255 -0
- execution/tools/forge_feature_tools.py +377 -0
- execution/tools/glob_tool.py +59 -0
- execution/tools/grep_tool.py +89 -0
- execution/tools/http_request_tool.py +224 -0
- execution/tools/load_skill_tool.py +104 -0
- execution/tools/longcat_avatar_tool.py +384 -0
- execution/tools/mcp_tool.py +100 -0
- execution/tools/notebook_tool.py +279 -0
- execution/tools/openapi_tool.py +440 -0
- execution/tools/plan_mode_tool.py +95 -0
- execution/tools/push_notification_tool.py +157 -0
- execution/tools/python_tool.py +61 -0
- execution/tools/respond_tool.py +40 -0
- execution/tools/sandbox_tool.py +378 -0
- execution/tools/search_tool.py +153 -0
- execution/tools/semantic_search_tool.py +106 -0
- execution/tools/shell_tool.py +283 -0
- execution/tools/speak_tool.py +134 -0
- execution/tools/storyboard_tool.py +727 -0
- execution/tools/system_info_tool.py +212 -0
- execution/tools/task_tool.py +323 -0
- execution/tools/think_tool.py +49 -0
- execution/tools/transcribe_audio_tool.py +86 -0
- execution/tools/update_memory_tool.py +92 -0
- execution/tools/web_fetch_tool.py +82 -0
- execution/tools/worktree_tool.py +174 -0
- llm/__init__.py +0 -0
- llm/fallback.py +116 -0
- llm/models.py +320 -0
- llm/provider.py +1356 -0
- llm/router.py +373 -0
- main.py +1889 -0
- memory/__init__.py +0 -0
- memory/episodic.py +99 -0
- memory/procedural.py +145 -0
- memory/semantic.py +71 -0
- memory/working.py +64 -0
- nn/__init__.py +43 -0
- nn/auto_evolve.py +245 -0
- nn/caudate.py +136 -0
- nn/config.py +141 -0
- nn/consolidator.py +81 -0
- nn/data.py +1635 -0
- nn/encoder.py +258 -0
- nn/forge_advisor.py +303 -0
- nn/format.py +235 -0
- nn/heads.py +432 -0
- nn/observer.py +994 -0
- nn/policy.py +214 -0
- nn/runtime.py +343 -0
- nn/scorer.py +175 -0
- nn/trainer.py +515 -0
- nn/vision.py +352 -0
- personality/__init__.py +23 -0
- personality/engine.py +129 -0
- personality/identity.py +144 -0
- personality/inner_voice.py +100 -0
- personality/mood.py +205 -0
- planning/__init__.py +0 -0
- planning/dev_server.py +221 -0
- planning/forge_models.py +718 -0
- planning/orchestrator.py +1363 -0
- planning/planner.py +451 -0
- planning/task_graph.py +61 -0
- reflection/__init__.py +0 -0
- reflection/meta_learner.py +156 -0
- reflection/reflector.py +127 -0
- ui/__init__.py +5 -0
- ui/display.py +88 -0
- voice/__init__.py +0 -0
- voice/conversation.py +125 -0
- voice/listener.py +111 -0
- voice/speaker.py +59 -0
- voice/stt.py +126 -0
- voice/tts.py +214 -0
llm/router.py
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
"""Dual-process model router — System 1 (fast) vs System 2 (slow).
|
|
2
|
+
|
|
3
|
+
Cognos runs a tiered LLM stack inspired by Kahneman's Thinking Fast and Slow:
|
|
4
|
+
|
|
5
|
+
- System 1: a small, fast Ollama model. Handles routine tool dispatch,
|
|
6
|
+
short answers, pattern-matching — the "autopilot" of the agent.
|
|
7
|
+
- System 2: a large, capable model. Handles planning, reflection,
|
|
8
|
+
meta-learning, complex synthesis, anything that benefits from depth.
|
|
9
|
+
|
|
10
|
+
The `Router` scores every LLM call against a policy and picks the right tier.
|
|
11
|
+
The `DualLLMProvider` wraps two `LLMProvider`s and exposes the same interface
|
|
12
|
+
as a single provider, so agentic loops and tools don't know or care about
|
|
13
|
+
routing.
|
|
14
|
+
|
|
15
|
+
Caller tags (passed via the `caller` kwarg that LLMProvider already accepts)
|
|
16
|
+
let deliberate-reasoning modules (planning, reflection, meta, compaction)
|
|
17
|
+
always take System 2, while the agentic loop itself routes heuristically.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from typing import Any, AsyncIterator
|
|
25
|
+
|
|
26
|
+
from pydantic import BaseModel
|
|
27
|
+
|
|
28
|
+
from core.schemas import StreamEvent
|
|
29
|
+
from llm.provider import LLMProvider, LLMResponse
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Weights sum to 1.0; tuning note: raising `keyword` weight makes the router
|
|
35
|
+
# escalate on intent more aggressively; raising `depth` escalates on
|
|
36
|
+
# long conversations (synthesis time).
|
|
37
|
+
_WEIGHTS = {"length": 0.25, "depth": 0.10, "keyword": 0.45, "mood": 0.20}
|
|
38
|
+
|
|
39
|
+
_COMPLEX_KEYWORDS = (
|
|
40
|
+
"why", "decide", "strategy", "compare", "evaluate", "analyze",
|
|
41
|
+
"refactor", "design", "architecture", "debug", "explain",
|
|
42
|
+
"plan", "reason", "approach", "trade-off", "tradeoff", "rationale",
|
|
43
|
+
"critique", "assess", "diagnose",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class RoutingPolicy:
|
|
49
|
+
"""Policy knobs for the Router. Defaults favor fast when unsure."""
|
|
50
|
+
|
|
51
|
+
complexity_threshold: float = 0.40
|
|
52
|
+
slow_caller_tags: set[str] = field(default_factory=lambda: {
|
|
53
|
+
"planning", "reflection", "meta", "compaction",
|
|
54
|
+
})
|
|
55
|
+
fast_caller_tags: set[str] = field(default_factory=set)
|
|
56
|
+
escalate_on_stuck: bool = True
|
|
57
|
+
length_tokens_saturation: int = 4000
|
|
58
|
+
depth_messages_saturation: int = 20
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class RoutingDecision:
|
|
63
|
+
tier: str # "fast" | "slow"
|
|
64
|
+
provider: LLMProvider
|
|
65
|
+
score: float
|
|
66
|
+
reasons: list[str]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Router:
|
|
70
|
+
"""Pick a provider for an LLM call based on context + policy."""
|
|
71
|
+
|
|
72
|
+
def __init__(
|
|
73
|
+
self,
|
|
74
|
+
fast: LLMProvider,
|
|
75
|
+
slow: LLMProvider,
|
|
76
|
+
policy: RoutingPolicy | None = None,
|
|
77
|
+
mood=None,
|
|
78
|
+
):
|
|
79
|
+
self.fast = fast
|
|
80
|
+
self.slow = slow
|
|
81
|
+
self.policy = policy or RoutingPolicy()
|
|
82
|
+
self.mood = mood # optional MoodState for escalation
|
|
83
|
+
self.caudate = None # optional CaudateObserver for ADVISOR+ override
|
|
84
|
+
self.stats = {"fast": 0, "slow": 0}
|
|
85
|
+
self.last_decision: RoutingDecision | None = None
|
|
86
|
+
|
|
87
|
+
def set_mood(self, mood) -> None:
|
|
88
|
+
self.mood = mood
|
|
89
|
+
|
|
90
|
+
def set_caudate(self, caudate) -> None:
|
|
91
|
+
"""Wire a CaudateObserver in. Once set, her tier prediction
|
|
92
|
+
overrides the heuristic at ADVISOR level or higher."""
|
|
93
|
+
self.caudate = caudate
|
|
94
|
+
|
|
95
|
+
def choose(
|
|
96
|
+
self,
|
|
97
|
+
messages: list[dict[str, Any]] | None = None,
|
|
98
|
+
tools: list[dict] | None = None,
|
|
99
|
+
caller: str | None = None,
|
|
100
|
+
) -> RoutingDecision:
|
|
101
|
+
"""Return the provider to use for this call."""
|
|
102
|
+
messages = messages or []
|
|
103
|
+
tools = tools or []
|
|
104
|
+
|
|
105
|
+
# Caller-tag overrides are absolute.
|
|
106
|
+
if caller and caller in self.policy.slow_caller_tags:
|
|
107
|
+
decision = RoutingDecision(
|
|
108
|
+
tier="slow", provider=self.slow, score=1.0,
|
|
109
|
+
reasons=[f"caller={caller}"],
|
|
110
|
+
)
|
|
111
|
+
return self._record(decision)
|
|
112
|
+
if caller and caller in self.policy.fast_caller_tags:
|
|
113
|
+
decision = RoutingDecision(
|
|
114
|
+
tier="fast", provider=self.fast, score=0.0,
|
|
115
|
+
reasons=[f"caller={caller}"],
|
|
116
|
+
)
|
|
117
|
+
return self._record(decision)
|
|
118
|
+
|
|
119
|
+
# Stuck mood is an absolute escalation — when frustration/failure
|
|
120
|
+
# streak is high, the fast model has already proven insufficient.
|
|
121
|
+
if self.mood is not None and self.policy.escalate_on_stuck:
|
|
122
|
+
try:
|
|
123
|
+
if self.mood.should_defer_to_user():
|
|
124
|
+
decision = RoutingDecision(
|
|
125
|
+
tier="slow", provider=self.slow, score=1.0,
|
|
126
|
+
reasons=["mood=stuck"],
|
|
127
|
+
)
|
|
128
|
+
return self._record(decision)
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
score, reasons = self._score(messages, tools)
|
|
133
|
+
|
|
134
|
+
# Caudate override at ADVISOR+ trust. She's earned the right to
|
|
135
|
+
# pick the tier; the heuristic becomes a fallback if she's silent.
|
|
136
|
+
caudate_pred = None
|
|
137
|
+
try:
|
|
138
|
+
if self.caudate is not None and self.caudate.can_advise():
|
|
139
|
+
caudate_pred = self.caudate._last_prediction
|
|
140
|
+
except Exception:
|
|
141
|
+
caudate_pred = None
|
|
142
|
+
if caudate_pred is not None and caudate_pred.tier_confidence >= 0.55:
|
|
143
|
+
tier = caudate_pred.tier
|
|
144
|
+
reasons.append(f"caudate={tier}@{caudate_pred.tier_confidence:.2f}")
|
|
145
|
+
else:
|
|
146
|
+
tier = "slow" if score >= self.policy.complexity_threshold else "fast"
|
|
147
|
+
|
|
148
|
+
decision = RoutingDecision(
|
|
149
|
+
tier=tier,
|
|
150
|
+
provider=self.slow if tier == "slow" else self.fast,
|
|
151
|
+
score=score,
|
|
152
|
+
reasons=reasons,
|
|
153
|
+
)
|
|
154
|
+
return self._record(decision)
|
|
155
|
+
|
|
156
|
+
def _record(self, decision: RoutingDecision) -> RoutingDecision:
|
|
157
|
+
self.last_decision = decision
|
|
158
|
+
self.stats[decision.tier] = self.stats.get(decision.tier, 0) + 1
|
|
159
|
+
logger.info(
|
|
160
|
+
f"Router -> {decision.tier} (score={decision.score:.2f}, "
|
|
161
|
+
f"reasons={', '.join(decision.reasons)})"
|
|
162
|
+
)
|
|
163
|
+
return decision
|
|
164
|
+
|
|
165
|
+
def _score(
|
|
166
|
+
self,
|
|
167
|
+
messages: list[dict[str, Any]],
|
|
168
|
+
tools: list[dict],
|
|
169
|
+
) -> tuple[float, list[str]]:
|
|
170
|
+
# Length feature — total chars across all messages, saturating.
|
|
171
|
+
total_chars = sum(len(_message_text(m)) for m in messages)
|
|
172
|
+
length = min(1.0, total_chars / (self.policy.length_tokens_saturation * 4))
|
|
173
|
+
|
|
174
|
+
# Depth feature — how many messages deep are we.
|
|
175
|
+
depth = min(1.0, len(messages) / self.policy.depth_messages_saturation)
|
|
176
|
+
|
|
177
|
+
# Keyword feature — does the latest user message hint at deep reasoning.
|
|
178
|
+
latest_user = next(
|
|
179
|
+
(_message_text(m) for m in reversed(messages)
|
|
180
|
+
if m.get("role") == "user"),
|
|
181
|
+
"",
|
|
182
|
+
).lower()
|
|
183
|
+
keyword = 1.0 if any(k in latest_user for k in _COMPLEX_KEYWORDS) else 0.0
|
|
184
|
+
|
|
185
|
+
# Mood feature — slow down when the agent is uncertain or stuck.
|
|
186
|
+
mood_score = 0.0
|
|
187
|
+
if self.mood is not None and self.policy.escalate_on_stuck:
|
|
188
|
+
try:
|
|
189
|
+
if self.mood.should_defer_to_user() or self.mood.should_slow_down():
|
|
190
|
+
mood_score = 1.0
|
|
191
|
+
except Exception:
|
|
192
|
+
mood_score = 0.0
|
|
193
|
+
|
|
194
|
+
score = (
|
|
195
|
+
_WEIGHTS["length"] * length
|
|
196
|
+
+ _WEIGHTS["depth"] * depth
|
|
197
|
+
+ _WEIGHTS["keyword"] * keyword
|
|
198
|
+
+ _WEIGHTS["mood"] * mood_score
|
|
199
|
+
)
|
|
200
|
+
reasons = [
|
|
201
|
+
f"len={length:.2f}",
|
|
202
|
+
f"depth={depth:.2f}",
|
|
203
|
+
f"kw={keyword:.2f}",
|
|
204
|
+
f"mood={mood_score:.2f}",
|
|
205
|
+
]
|
|
206
|
+
return score, reasons
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class DualLLMProvider:
|
|
210
|
+
"""Drop-in replacement for LLMProvider that routes per call.
|
|
211
|
+
|
|
212
|
+
Exposes the same async surface (chat, complete, stream, structured_output).
|
|
213
|
+
Each method passes the `caller` tag through to the router.
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
def __init__(
|
|
217
|
+
self,
|
|
218
|
+
fast: LLMProvider,
|
|
219
|
+
slow: LLMProvider,
|
|
220
|
+
policy: RoutingPolicy | None = None,
|
|
221
|
+
mood=None,
|
|
222
|
+
):
|
|
223
|
+
self._fast = fast
|
|
224
|
+
self._slow = slow
|
|
225
|
+
self.router = Router(fast=fast, slow=slow, policy=policy, mood=mood)
|
|
226
|
+
|
|
227
|
+
# ------------------------------------------------------------------
|
|
228
|
+
# Compatibility surface (mirrors LLMProvider)
|
|
229
|
+
# ------------------------------------------------------------------
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def model(self) -> str:
|
|
233
|
+
"""A synthetic id so sessions/logs show the tiered setup."""
|
|
234
|
+
return f"dual[fast={self._fast.model},slow={self._slow.model}]"
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def temperature(self) -> float:
|
|
238
|
+
return self._fast.temperature
|
|
239
|
+
|
|
240
|
+
@property
|
|
241
|
+
def max_tokens(self) -> int:
|
|
242
|
+
return self._slow.max_tokens
|
|
243
|
+
|
|
244
|
+
def switch_model(self, model: str) -> None:
|
|
245
|
+
"""Switch the slow (System 2) tier — treat that as the primary target."""
|
|
246
|
+
self._slow.switch_model(model)
|
|
247
|
+
|
|
248
|
+
def set_mood(self, mood) -> None:
|
|
249
|
+
self.router.set_mood(mood)
|
|
250
|
+
|
|
251
|
+
# ------------------------------------------------------------------
|
|
252
|
+
# Hot-swap support — used by /system1 and /system2 slash commands
|
|
253
|
+
# ------------------------------------------------------------------
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def fast_model(self) -> str:
|
|
257
|
+
return self._fast.model
|
|
258
|
+
|
|
259
|
+
@property
|
|
260
|
+
def slow_model(self) -> str:
|
|
261
|
+
return self._slow.model
|
|
262
|
+
|
|
263
|
+
@property
|
|
264
|
+
def last_tier(self) -> str | None:
|
|
265
|
+
"""Which tier was used on the most recent call ('fast' / 'slow' / None)."""
|
|
266
|
+
return self.router.last_decision.tier if self.router.last_decision else None
|
|
267
|
+
|
|
268
|
+
@property
|
|
269
|
+
def last_provider_model(self) -> str | None:
|
|
270
|
+
"""The actual model id that ran on the most recent call."""
|
|
271
|
+
d = self.router.last_decision
|
|
272
|
+
if d is None:
|
|
273
|
+
return None
|
|
274
|
+
return self._fast.model if d.tier == "fast" else self._slow.model
|
|
275
|
+
|
|
276
|
+
def set_fast(self, model: str) -> None:
|
|
277
|
+
"""Hot-swap the fast tier without rebuilding the agent."""
|
|
278
|
+
self._fast.switch_model(model)
|
|
279
|
+
# Router holds a reference to the same _fast object, so it
|
|
280
|
+
# picks up the new model id automatically on next call.
|
|
281
|
+
|
|
282
|
+
def set_slow(self, model: str) -> None:
|
|
283
|
+
"""Hot-swap the slow tier without rebuilding the agent."""
|
|
284
|
+
self._slow.switch_model(model)
|
|
285
|
+
|
|
286
|
+
# ------------------------------------------------------------------
|
|
287
|
+
# Calls
|
|
288
|
+
# ------------------------------------------------------------------
|
|
289
|
+
|
|
290
|
+
async def complete(
|
|
291
|
+
self,
|
|
292
|
+
prompt: str,
|
|
293
|
+
system: str | None = None,
|
|
294
|
+
temperature: float | None = None,
|
|
295
|
+
max_tokens: int | None = None,
|
|
296
|
+
response_format: dict | None = None,
|
|
297
|
+
caller: str | None = None,
|
|
298
|
+
) -> LLMResponse:
|
|
299
|
+
msgs: list[dict[str, Any]] = []
|
|
300
|
+
if system:
|
|
301
|
+
msgs.append({"role": "system", "content": system})
|
|
302
|
+
msgs.append({"role": "user", "content": prompt})
|
|
303
|
+
provider = self.router.choose(msgs, None, caller).provider
|
|
304
|
+
return await provider.complete(
|
|
305
|
+
prompt=prompt, system=system,
|
|
306
|
+
temperature=temperature, max_tokens=max_tokens,
|
|
307
|
+
response_format=response_format,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
async def chat(
|
|
311
|
+
self,
|
|
312
|
+
messages: list[dict[str, Any]],
|
|
313
|
+
temperature: float | None = None,
|
|
314
|
+
max_tokens: int | None = None,
|
|
315
|
+
response_format: dict | None = None,
|
|
316
|
+
tools: list[dict] | None = None,
|
|
317
|
+
tool_choice: str | None = None,
|
|
318
|
+
caller: str | None = None,
|
|
319
|
+
) -> LLMResponse:
|
|
320
|
+
provider = self.router.choose(messages, tools, caller).provider
|
|
321
|
+
return await provider.chat(
|
|
322
|
+
messages=messages,
|
|
323
|
+
temperature=temperature, max_tokens=max_tokens,
|
|
324
|
+
response_format=response_format,
|
|
325
|
+
tools=tools, tool_choice=tool_choice,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
async def stream(
|
|
329
|
+
self,
|
|
330
|
+
messages: list[dict[str, Any]],
|
|
331
|
+
temperature: float | None = None,
|
|
332
|
+
max_tokens: int | None = None,
|
|
333
|
+
tools: list[dict] | None = None,
|
|
334
|
+
tool_choice: str | None = None,
|
|
335
|
+
caller: str | None = None,
|
|
336
|
+
) -> AsyncIterator[StreamEvent]:
|
|
337
|
+
provider = self.router.choose(messages, tools, caller).provider
|
|
338
|
+
async for event in provider.stream(
|
|
339
|
+
messages=messages,
|
|
340
|
+
temperature=temperature, max_tokens=max_tokens,
|
|
341
|
+
tools=tools, tool_choice=tool_choice,
|
|
342
|
+
):
|
|
343
|
+
yield event
|
|
344
|
+
|
|
345
|
+
async def structured_output(
|
|
346
|
+
self,
|
|
347
|
+
prompt: str,
|
|
348
|
+
system: str | None = None,
|
|
349
|
+
schema_hint: str = "",
|
|
350
|
+
response_model: type[BaseModel] | None = None,
|
|
351
|
+
caller: str | None = None,
|
|
352
|
+
) -> Any:
|
|
353
|
+
# Structured output almost always implies deliberate reasoning.
|
|
354
|
+
# Default to "structured" caller so the router's policy can map it.
|
|
355
|
+
tag = caller or "structured"
|
|
356
|
+
msgs = [{"role": "user", "content": prompt}]
|
|
357
|
+
if system:
|
|
358
|
+
msgs.insert(0, {"role": "system", "content": system})
|
|
359
|
+
provider = self.router.choose(msgs, None, tag).provider
|
|
360
|
+
return await provider.structured_output(
|
|
361
|
+
prompt=prompt, system=system,
|
|
362
|
+
schema_hint=schema_hint,
|
|
363
|
+
response_model=response_model,
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _message_text(message: dict[str, Any]) -> str:
|
|
368
|
+
"""Extract a single text chunk from a chat message (handling tool_calls)."""
|
|
369
|
+
out = str(message.get("content") or "")
|
|
370
|
+
for tc in message.get("tool_calls", []) or []:
|
|
371
|
+
fn = tc.get("function", {})
|
|
372
|
+
out += f" {fn.get('name', '')} {fn.get('arguments', '')}"
|
|
373
|
+
return out
|