caudate-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. api/__init__.py +5 -0
  2. api/anthropic_compat.py +1518 -0
  3. api/artifact_viewer.py +366 -0
  4. api/caudate_middleware.py +618 -0
  5. api/forge_bootstrapper_routes.py +377 -0
  6. api/forge_routes.py +630 -0
  7. api/forge_system_routes.py +294 -0
  8. api/openai_compat.py +1993 -0
  9. api/server.py +667 -0
  10. api/storyboard_page.py +677 -0
  11. caudate_cli-0.1.0.dist-info/METADATA +354 -0
  12. caudate_cli-0.1.0.dist-info/RECORD +153 -0
  13. caudate_cli-0.1.0.dist-info/WHEEL +5 -0
  14. caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
  15. caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  16. caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
  17. cognos_mcp/__init__.py +4 -0
  18. cognos_mcp/bridge.py +41 -0
  19. cognos_mcp/client.py +70 -0
  20. cognos_mcp/config.py +49 -0
  21. cognos_mcp/server.py +66 -0
  22. config.py +82 -0
  23. core/__init__.py +0 -0
  24. core/agent.py +468 -0
  25. core/agentic_loop.py +731 -0
  26. core/anthropic_auth.py +91 -0
  27. core/background.py +113 -0
  28. core/banner.py +134 -0
  29. core/bootstrap.py +292 -0
  30. core/citations.py +131 -0
  31. core/compaction.py +109 -0
  32. core/constitution.py +198 -0
  33. core/diff_viewer.py +87 -0
  34. core/export.py +85 -0
  35. core/file_refs.py +119 -0
  36. core/files.py +199 -0
  37. core/hooks.py +209 -0
  38. core/image.py +599 -0
  39. core/input.py +91 -0
  40. core/loop.py +238 -0
  41. core/memory_md.py +147 -0
  42. core/notifications.py +99 -0
  43. core/ownership.py +181 -0
  44. core/paste.py +81 -0
  45. core/permissions.py +210 -0
  46. core/plan_mode.py +215 -0
  47. core/sandbox_prompt.py +185 -0
  48. core/scheduler.py +195 -0
  49. core/schemas.py +202 -0
  50. core/session.py +90 -0
  51. core/settings.py +132 -0
  52. core/skills.py +398 -0
  53. core/slash_commands.py +977 -0
  54. core/statusline.py +61 -0
  55. core/subagent.py +300 -0
  56. core/thinking.py +50 -0
  57. core/updater.py +122 -0
  58. core/usage.py +109 -0
  59. core/worktree.py +93 -0
  60. execution/__init__.py +0 -0
  61. execution/executor.py +329 -0
  62. execution/plugins.py +108 -0
  63. execution/tools/__init__.py +0 -0
  64. execution/tools/agent_tool.py +107 -0
  65. execution/tools/agentic_tool.py +297 -0
  66. execution/tools/artifact_tool.py +191 -0
  67. execution/tools/ask_user_question_tool.py +137 -0
  68. execution/tools/base.py +81 -0
  69. execution/tools/calculator_tool.py +137 -0
  70. execution/tools/cognos_card_tool.py +124 -0
  71. execution/tools/cron_tool.py +215 -0
  72. execution/tools/datetime_tool.py +215 -0
  73. execution/tools/describe_image_tool.py +161 -0
  74. execution/tools/draw_tool.py +164 -0
  75. execution/tools/edit_image_tool.py +262 -0
  76. execution/tools/edit_tool.py +245 -0
  77. execution/tools/file_tool.py +90 -0
  78. execution/tools/find_anywhere_tool.py +255 -0
  79. execution/tools/forge_feature_tools.py +377 -0
  80. execution/tools/glob_tool.py +59 -0
  81. execution/tools/grep_tool.py +89 -0
  82. execution/tools/http_request_tool.py +224 -0
  83. execution/tools/load_skill_tool.py +104 -0
  84. execution/tools/longcat_avatar_tool.py +384 -0
  85. execution/tools/mcp_tool.py +100 -0
  86. execution/tools/notebook_tool.py +279 -0
  87. execution/tools/openapi_tool.py +440 -0
  88. execution/tools/plan_mode_tool.py +95 -0
  89. execution/tools/push_notification_tool.py +157 -0
  90. execution/tools/python_tool.py +61 -0
  91. execution/tools/respond_tool.py +40 -0
  92. execution/tools/sandbox_tool.py +378 -0
  93. execution/tools/search_tool.py +153 -0
  94. execution/tools/semantic_search_tool.py +106 -0
  95. execution/tools/shell_tool.py +283 -0
  96. execution/tools/speak_tool.py +134 -0
  97. execution/tools/storyboard_tool.py +727 -0
  98. execution/tools/system_info_tool.py +212 -0
  99. execution/tools/task_tool.py +323 -0
  100. execution/tools/think_tool.py +49 -0
  101. execution/tools/transcribe_audio_tool.py +86 -0
  102. execution/tools/update_memory_tool.py +92 -0
  103. execution/tools/web_fetch_tool.py +82 -0
  104. execution/tools/worktree_tool.py +174 -0
  105. llm/__init__.py +0 -0
  106. llm/fallback.py +116 -0
  107. llm/models.py +320 -0
  108. llm/provider.py +1356 -0
  109. llm/router.py +373 -0
  110. main.py +1889 -0
  111. memory/__init__.py +0 -0
  112. memory/episodic.py +99 -0
  113. memory/procedural.py +145 -0
  114. memory/semantic.py +71 -0
  115. memory/working.py +64 -0
  116. nn/__init__.py +43 -0
  117. nn/auto_evolve.py +245 -0
  118. nn/caudate.py +136 -0
  119. nn/config.py +141 -0
  120. nn/consolidator.py +81 -0
  121. nn/data.py +1635 -0
  122. nn/encoder.py +258 -0
  123. nn/forge_advisor.py +303 -0
  124. nn/format.py +235 -0
  125. nn/heads.py +432 -0
  126. nn/observer.py +994 -0
  127. nn/policy.py +214 -0
  128. nn/runtime.py +343 -0
  129. nn/scorer.py +175 -0
  130. nn/trainer.py +515 -0
  131. nn/vision.py +352 -0
  132. personality/__init__.py +23 -0
  133. personality/engine.py +129 -0
  134. personality/identity.py +144 -0
  135. personality/inner_voice.py +100 -0
  136. personality/mood.py +205 -0
  137. planning/__init__.py +0 -0
  138. planning/dev_server.py +221 -0
  139. planning/forge_models.py +718 -0
  140. planning/orchestrator.py +1363 -0
  141. planning/planner.py +451 -0
  142. planning/task_graph.py +61 -0
  143. reflection/__init__.py +0 -0
  144. reflection/meta_learner.py +156 -0
  145. reflection/reflector.py +127 -0
  146. ui/__init__.py +5 -0
  147. ui/display.py +88 -0
  148. voice/__init__.py +0 -0
  149. voice/conversation.py +125 -0
  150. voice/listener.py +111 -0
  151. voice/speaker.py +59 -0
  152. voice/stt.py +126 -0
  153. voice/tts.py +214 -0
llm/router.py ADDED
@@ -0,0 +1,373 @@
1
+ """Dual-process model router — System 1 (fast) vs System 2 (slow).
2
+
3
+ Cognos runs a tiered LLM stack inspired by Kahneman's Thinking Fast and Slow:
4
+
5
+ - System 1: a small, fast Ollama model. Handles routine tool dispatch,
6
+ short answers, pattern-matching — the "autopilot" of the agent.
7
+ - System 2: a large, capable model. Handles planning, reflection,
8
+ meta-learning, complex synthesis, anything that benefits from depth.
9
+
10
+ The `Router` scores every LLM call against a policy and picks the right tier.
11
+ The `DualLLMProvider` wraps two `LLMProvider`s and exposes the same interface
12
+ as a single provider, so agentic loops and tools don't know or care about
13
+ routing.
14
+
15
+ Caller tags (passed via the `caller` kwarg that LLMProvider already accepts)
16
+ let deliberate-reasoning modules (planning, reflection, meta, compaction)
17
+ always take System 2, while the agentic loop itself routes heuristically.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from dataclasses import dataclass, field
24
+ from typing import Any, AsyncIterator
25
+
26
+ from pydantic import BaseModel
27
+
28
+ from core.schemas import StreamEvent
29
+ from llm.provider import LLMProvider, LLMResponse
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ # Weights sum to 1.0; tuning note: raising `keyword` weight makes the router
35
+ # escalate on intent more aggressively; raising `depth` escalates on
36
+ # long conversations (synthesis time).
37
+ _WEIGHTS = {"length": 0.25, "depth": 0.10, "keyword": 0.45, "mood": 0.20}
38
+
39
+ _COMPLEX_KEYWORDS = (
40
+ "why", "decide", "strategy", "compare", "evaluate", "analyze",
41
+ "refactor", "design", "architecture", "debug", "explain",
42
+ "plan", "reason", "approach", "trade-off", "tradeoff", "rationale",
43
+ "critique", "assess", "diagnose",
44
+ )
45
+
46
+
47
+ @dataclass
48
+ class RoutingPolicy:
49
+ """Policy knobs for the Router. Defaults favor fast when unsure."""
50
+
51
+ complexity_threshold: float = 0.40
52
+ slow_caller_tags: set[str] = field(default_factory=lambda: {
53
+ "planning", "reflection", "meta", "compaction",
54
+ })
55
+ fast_caller_tags: set[str] = field(default_factory=set)
56
+ escalate_on_stuck: bool = True
57
+ length_tokens_saturation: int = 4000
58
+ depth_messages_saturation: int = 20
59
+
60
+
61
+ @dataclass
62
+ class RoutingDecision:
63
+ tier: str # "fast" | "slow"
64
+ provider: LLMProvider
65
+ score: float
66
+ reasons: list[str]
67
+
68
+
69
+ class Router:
70
+ """Pick a provider for an LLM call based on context + policy."""
71
+
72
+ def __init__(
73
+ self,
74
+ fast: LLMProvider,
75
+ slow: LLMProvider,
76
+ policy: RoutingPolicy | None = None,
77
+ mood=None,
78
+ ):
79
+ self.fast = fast
80
+ self.slow = slow
81
+ self.policy = policy or RoutingPolicy()
82
+ self.mood = mood # optional MoodState for escalation
83
+ self.caudate = None # optional CaudateObserver for ADVISOR+ override
84
+ self.stats = {"fast": 0, "slow": 0}
85
+ self.last_decision: RoutingDecision | None = None
86
+
87
+ def set_mood(self, mood) -> None:
88
+ self.mood = mood
89
+
90
+ def set_caudate(self, caudate) -> None:
91
+ """Wire a CaudateObserver in. Once set, her tier prediction
92
+ overrides the heuristic at ADVISOR level or higher."""
93
+ self.caudate = caudate
94
+
95
+ def choose(
96
+ self,
97
+ messages: list[dict[str, Any]] | None = None,
98
+ tools: list[dict] | None = None,
99
+ caller: str | None = None,
100
+ ) -> RoutingDecision:
101
+ """Return the provider to use for this call."""
102
+ messages = messages or []
103
+ tools = tools or []
104
+
105
+ # Caller-tag overrides are absolute.
106
+ if caller and caller in self.policy.slow_caller_tags:
107
+ decision = RoutingDecision(
108
+ tier="slow", provider=self.slow, score=1.0,
109
+ reasons=[f"caller={caller}"],
110
+ )
111
+ return self._record(decision)
112
+ if caller and caller in self.policy.fast_caller_tags:
113
+ decision = RoutingDecision(
114
+ tier="fast", provider=self.fast, score=0.0,
115
+ reasons=[f"caller={caller}"],
116
+ )
117
+ return self._record(decision)
118
+
119
+ # Stuck mood is an absolute escalation — when frustration/failure
120
+ # streak is high, the fast model has already proven insufficient.
121
+ if self.mood is not None and self.policy.escalate_on_stuck:
122
+ try:
123
+ if self.mood.should_defer_to_user():
124
+ decision = RoutingDecision(
125
+ tier="slow", provider=self.slow, score=1.0,
126
+ reasons=["mood=stuck"],
127
+ )
128
+ return self._record(decision)
129
+ except Exception:
130
+ pass
131
+
132
+ score, reasons = self._score(messages, tools)
133
+
134
+ # Caudate override at ADVISOR+ trust. She's earned the right to
135
+ # pick the tier; the heuristic becomes a fallback if she's silent.
136
+ caudate_pred = None
137
+ try:
138
+ if self.caudate is not None and self.caudate.can_advise():
139
+ caudate_pred = self.caudate._last_prediction
140
+ except Exception:
141
+ caudate_pred = None
142
+ if caudate_pred is not None and caudate_pred.tier_confidence >= 0.55:
143
+ tier = caudate_pred.tier
144
+ reasons.append(f"caudate={tier}@{caudate_pred.tier_confidence:.2f}")
145
+ else:
146
+ tier = "slow" if score >= self.policy.complexity_threshold else "fast"
147
+
148
+ decision = RoutingDecision(
149
+ tier=tier,
150
+ provider=self.slow if tier == "slow" else self.fast,
151
+ score=score,
152
+ reasons=reasons,
153
+ )
154
+ return self._record(decision)
155
+
156
+ def _record(self, decision: RoutingDecision) -> RoutingDecision:
157
+ self.last_decision = decision
158
+ self.stats[decision.tier] = self.stats.get(decision.tier, 0) + 1
159
+ logger.info(
160
+ f"Router -> {decision.tier} (score={decision.score:.2f}, "
161
+ f"reasons={', '.join(decision.reasons)})"
162
+ )
163
+ return decision
164
+
165
+ def _score(
166
+ self,
167
+ messages: list[dict[str, Any]],
168
+ tools: list[dict],
169
+ ) -> tuple[float, list[str]]:
170
+ # Length feature — total chars across all messages, saturating.
171
+ total_chars = sum(len(_message_text(m)) for m in messages)
172
+ length = min(1.0, total_chars / (self.policy.length_tokens_saturation * 4))
173
+
174
+ # Depth feature — how many messages deep are we.
175
+ depth = min(1.0, len(messages) / self.policy.depth_messages_saturation)
176
+
177
+ # Keyword feature — does the latest user message hint at deep reasoning.
178
+ latest_user = next(
179
+ (_message_text(m) for m in reversed(messages)
180
+ if m.get("role") == "user"),
181
+ "",
182
+ ).lower()
183
+ keyword = 1.0 if any(k in latest_user for k in _COMPLEX_KEYWORDS) else 0.0
184
+
185
+ # Mood feature — slow down when the agent is uncertain or stuck.
186
+ mood_score = 0.0
187
+ if self.mood is not None and self.policy.escalate_on_stuck:
188
+ try:
189
+ if self.mood.should_defer_to_user() or self.mood.should_slow_down():
190
+ mood_score = 1.0
191
+ except Exception:
192
+ mood_score = 0.0
193
+
194
+ score = (
195
+ _WEIGHTS["length"] * length
196
+ + _WEIGHTS["depth"] * depth
197
+ + _WEIGHTS["keyword"] * keyword
198
+ + _WEIGHTS["mood"] * mood_score
199
+ )
200
+ reasons = [
201
+ f"len={length:.2f}",
202
+ f"depth={depth:.2f}",
203
+ f"kw={keyword:.2f}",
204
+ f"mood={mood_score:.2f}",
205
+ ]
206
+ return score, reasons
207
+
208
+
209
+ class DualLLMProvider:
210
+ """Drop-in replacement for LLMProvider that routes per call.
211
+
212
+ Exposes the same async surface (chat, complete, stream, structured_output).
213
+ Each method passes the `caller` tag through to the router.
214
+ """
215
+
216
+ def __init__(
217
+ self,
218
+ fast: LLMProvider,
219
+ slow: LLMProvider,
220
+ policy: RoutingPolicy | None = None,
221
+ mood=None,
222
+ ):
223
+ self._fast = fast
224
+ self._slow = slow
225
+ self.router = Router(fast=fast, slow=slow, policy=policy, mood=mood)
226
+
227
+ # ------------------------------------------------------------------
228
+ # Compatibility surface (mirrors LLMProvider)
229
+ # ------------------------------------------------------------------
230
+
231
+ @property
232
+ def model(self) -> str:
233
+ """A synthetic id so sessions/logs show the tiered setup."""
234
+ return f"dual[fast={self._fast.model},slow={self._slow.model}]"
235
+
236
+ @property
237
+ def temperature(self) -> float:
238
+ return self._fast.temperature
239
+
240
+ @property
241
+ def max_tokens(self) -> int:
242
+ return self._slow.max_tokens
243
+
244
+ def switch_model(self, model: str) -> None:
245
+ """Switch the slow (System 2) tier — treat that as the primary target."""
246
+ self._slow.switch_model(model)
247
+
248
+ def set_mood(self, mood) -> None:
249
+ self.router.set_mood(mood)
250
+
251
+ # ------------------------------------------------------------------
252
+ # Hot-swap support — used by /system1 and /system2 slash commands
253
+ # ------------------------------------------------------------------
254
+
255
+ @property
256
+ def fast_model(self) -> str:
257
+ return self._fast.model
258
+
259
+ @property
260
+ def slow_model(self) -> str:
261
+ return self._slow.model
262
+
263
+ @property
264
+ def last_tier(self) -> str | None:
265
+ """Which tier was used on the most recent call ('fast' / 'slow' / None)."""
266
+ return self.router.last_decision.tier if self.router.last_decision else None
267
+
268
+ @property
269
+ def last_provider_model(self) -> str | None:
270
+ """The actual model id that ran on the most recent call."""
271
+ d = self.router.last_decision
272
+ if d is None:
273
+ return None
274
+ return self._fast.model if d.tier == "fast" else self._slow.model
275
+
276
+ def set_fast(self, model: str) -> None:
277
+ """Hot-swap the fast tier without rebuilding the agent."""
278
+ self._fast.switch_model(model)
279
+ # Router holds a reference to the same _fast object, so it
280
+ # picks up the new model id automatically on next call.
281
+
282
+ def set_slow(self, model: str) -> None:
283
+ """Hot-swap the slow tier without rebuilding the agent."""
284
+ self._slow.switch_model(model)
285
+
286
+ # ------------------------------------------------------------------
287
+ # Calls
288
+ # ------------------------------------------------------------------
289
+
290
+ async def complete(
291
+ self,
292
+ prompt: str,
293
+ system: str | None = None,
294
+ temperature: float | None = None,
295
+ max_tokens: int | None = None,
296
+ response_format: dict | None = None,
297
+ caller: str | None = None,
298
+ ) -> LLMResponse:
299
+ msgs: list[dict[str, Any]] = []
300
+ if system:
301
+ msgs.append({"role": "system", "content": system})
302
+ msgs.append({"role": "user", "content": prompt})
303
+ provider = self.router.choose(msgs, None, caller).provider
304
+ return await provider.complete(
305
+ prompt=prompt, system=system,
306
+ temperature=temperature, max_tokens=max_tokens,
307
+ response_format=response_format,
308
+ )
309
+
310
+ async def chat(
311
+ self,
312
+ messages: list[dict[str, Any]],
313
+ temperature: float | None = None,
314
+ max_tokens: int | None = None,
315
+ response_format: dict | None = None,
316
+ tools: list[dict] | None = None,
317
+ tool_choice: str | None = None,
318
+ caller: str | None = None,
319
+ ) -> LLMResponse:
320
+ provider = self.router.choose(messages, tools, caller).provider
321
+ return await provider.chat(
322
+ messages=messages,
323
+ temperature=temperature, max_tokens=max_tokens,
324
+ response_format=response_format,
325
+ tools=tools, tool_choice=tool_choice,
326
+ )
327
+
328
+ async def stream(
329
+ self,
330
+ messages: list[dict[str, Any]],
331
+ temperature: float | None = None,
332
+ max_tokens: int | None = None,
333
+ tools: list[dict] | None = None,
334
+ tool_choice: str | None = None,
335
+ caller: str | None = None,
336
+ ) -> AsyncIterator[StreamEvent]:
337
+ provider = self.router.choose(messages, tools, caller).provider
338
+ async for event in provider.stream(
339
+ messages=messages,
340
+ temperature=temperature, max_tokens=max_tokens,
341
+ tools=tools, tool_choice=tool_choice,
342
+ ):
343
+ yield event
344
+
345
+ async def structured_output(
346
+ self,
347
+ prompt: str,
348
+ system: str | None = None,
349
+ schema_hint: str = "",
350
+ response_model: type[BaseModel] | None = None,
351
+ caller: str | None = None,
352
+ ) -> Any:
353
+ # Structured output almost always implies deliberate reasoning.
354
+ # Default to "structured" caller so the router's policy can map it.
355
+ tag = caller or "structured"
356
+ msgs = [{"role": "user", "content": prompt}]
357
+ if system:
358
+ msgs.insert(0, {"role": "system", "content": system})
359
+ provider = self.router.choose(msgs, None, tag).provider
360
+ return await provider.structured_output(
361
+ prompt=prompt, system=system,
362
+ schema_hint=schema_hint,
363
+ response_model=response_model,
364
+ )
365
+
366
+
367
+ def _message_text(message: dict[str, Any]) -> str:
368
+ """Extract a single text chunk from a chat message (handling tool_calls)."""
369
+ out = str(message.get("content") or "")
370
+ for tc in message.get("tool_calls", []) or []:
371
+ fn = tc.get("function", {})
372
+ out += f" {fn.get('name', '')} {fn.get('arguments', '')}"
373
+ return out