caudate-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. api/__init__.py +5 -0
  2. api/anthropic_compat.py +1518 -0
  3. api/artifact_viewer.py +366 -0
  4. api/caudate_middleware.py +618 -0
  5. api/forge_bootstrapper_routes.py +377 -0
  6. api/forge_routes.py +630 -0
  7. api/forge_system_routes.py +294 -0
  8. api/openai_compat.py +1993 -0
  9. api/server.py +667 -0
  10. api/storyboard_page.py +677 -0
  11. caudate_cli-0.1.0.dist-info/METADATA +354 -0
  12. caudate_cli-0.1.0.dist-info/RECORD +153 -0
  13. caudate_cli-0.1.0.dist-info/WHEEL +5 -0
  14. caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
  15. caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  16. caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
  17. cognos_mcp/__init__.py +4 -0
  18. cognos_mcp/bridge.py +41 -0
  19. cognos_mcp/client.py +70 -0
  20. cognos_mcp/config.py +49 -0
  21. cognos_mcp/server.py +66 -0
  22. config.py +82 -0
  23. core/__init__.py +0 -0
  24. core/agent.py +468 -0
  25. core/agentic_loop.py +731 -0
  26. core/anthropic_auth.py +91 -0
  27. core/background.py +113 -0
  28. core/banner.py +134 -0
  29. core/bootstrap.py +292 -0
  30. core/citations.py +131 -0
  31. core/compaction.py +109 -0
  32. core/constitution.py +198 -0
  33. core/diff_viewer.py +87 -0
  34. core/export.py +85 -0
  35. core/file_refs.py +119 -0
  36. core/files.py +199 -0
  37. core/hooks.py +209 -0
  38. core/image.py +599 -0
  39. core/input.py +91 -0
  40. core/loop.py +238 -0
  41. core/memory_md.py +147 -0
  42. core/notifications.py +99 -0
  43. core/ownership.py +181 -0
  44. core/paste.py +81 -0
  45. core/permissions.py +210 -0
  46. core/plan_mode.py +215 -0
  47. core/sandbox_prompt.py +185 -0
  48. core/scheduler.py +195 -0
  49. core/schemas.py +202 -0
  50. core/session.py +90 -0
  51. core/settings.py +132 -0
  52. core/skills.py +398 -0
  53. core/slash_commands.py +977 -0
  54. core/statusline.py +61 -0
  55. core/subagent.py +300 -0
  56. core/thinking.py +50 -0
  57. core/updater.py +122 -0
  58. core/usage.py +109 -0
  59. core/worktree.py +93 -0
  60. execution/__init__.py +0 -0
  61. execution/executor.py +329 -0
  62. execution/plugins.py +108 -0
  63. execution/tools/__init__.py +0 -0
  64. execution/tools/agent_tool.py +107 -0
  65. execution/tools/agentic_tool.py +297 -0
  66. execution/tools/artifact_tool.py +191 -0
  67. execution/tools/ask_user_question_tool.py +137 -0
  68. execution/tools/base.py +81 -0
  69. execution/tools/calculator_tool.py +137 -0
  70. execution/tools/cognos_card_tool.py +124 -0
  71. execution/tools/cron_tool.py +215 -0
  72. execution/tools/datetime_tool.py +215 -0
  73. execution/tools/describe_image_tool.py +161 -0
  74. execution/tools/draw_tool.py +164 -0
  75. execution/tools/edit_image_tool.py +262 -0
  76. execution/tools/edit_tool.py +245 -0
  77. execution/tools/file_tool.py +90 -0
  78. execution/tools/find_anywhere_tool.py +255 -0
  79. execution/tools/forge_feature_tools.py +377 -0
  80. execution/tools/glob_tool.py +59 -0
  81. execution/tools/grep_tool.py +89 -0
  82. execution/tools/http_request_tool.py +224 -0
  83. execution/tools/load_skill_tool.py +104 -0
  84. execution/tools/longcat_avatar_tool.py +384 -0
  85. execution/tools/mcp_tool.py +100 -0
  86. execution/tools/notebook_tool.py +279 -0
  87. execution/tools/openapi_tool.py +440 -0
  88. execution/tools/plan_mode_tool.py +95 -0
  89. execution/tools/push_notification_tool.py +157 -0
  90. execution/tools/python_tool.py +61 -0
  91. execution/tools/respond_tool.py +40 -0
  92. execution/tools/sandbox_tool.py +378 -0
  93. execution/tools/search_tool.py +153 -0
  94. execution/tools/semantic_search_tool.py +106 -0
  95. execution/tools/shell_tool.py +283 -0
  96. execution/tools/speak_tool.py +134 -0
  97. execution/tools/storyboard_tool.py +727 -0
  98. execution/tools/system_info_tool.py +212 -0
  99. execution/tools/task_tool.py +323 -0
  100. execution/tools/think_tool.py +49 -0
  101. execution/tools/transcribe_audio_tool.py +86 -0
  102. execution/tools/update_memory_tool.py +92 -0
  103. execution/tools/web_fetch_tool.py +82 -0
  104. execution/tools/worktree_tool.py +174 -0
  105. llm/__init__.py +0 -0
  106. llm/fallback.py +116 -0
  107. llm/models.py +320 -0
  108. llm/provider.py +1356 -0
  109. llm/router.py +373 -0
  110. main.py +1889 -0
  111. memory/__init__.py +0 -0
  112. memory/episodic.py +99 -0
  113. memory/procedural.py +145 -0
  114. memory/semantic.py +71 -0
  115. memory/working.py +64 -0
  116. nn/__init__.py +43 -0
  117. nn/auto_evolve.py +245 -0
  118. nn/caudate.py +136 -0
  119. nn/config.py +141 -0
  120. nn/consolidator.py +81 -0
  121. nn/data.py +1635 -0
  122. nn/encoder.py +258 -0
  123. nn/forge_advisor.py +303 -0
  124. nn/format.py +235 -0
  125. nn/heads.py +432 -0
  126. nn/observer.py +994 -0
  127. nn/policy.py +214 -0
  128. nn/runtime.py +343 -0
  129. nn/scorer.py +175 -0
  130. nn/trainer.py +515 -0
  131. nn/vision.py +352 -0
  132. personality/__init__.py +23 -0
  133. personality/engine.py +129 -0
  134. personality/identity.py +144 -0
  135. personality/inner_voice.py +100 -0
  136. personality/mood.py +205 -0
  137. planning/__init__.py +0 -0
  138. planning/dev_server.py +221 -0
  139. planning/forge_models.py +718 -0
  140. planning/orchestrator.py +1363 -0
  141. planning/planner.py +451 -0
  142. planning/task_graph.py +61 -0
  143. reflection/__init__.py +0 -0
  144. reflection/meta_learner.py +156 -0
  145. reflection/reflector.py +127 -0
  146. ui/__init__.py +5 -0
  147. ui/display.py +88 -0
  148. voice/__init__.py +0 -0
  149. voice/conversation.py +125 -0
  150. voice/listener.py +111 -0
  151. voice/speaker.py +59 -0
  152. voice/stt.py +126 -0
  153. voice/tts.py +214 -0
llm/models.py ADDED
@@ -0,0 +1,320 @@
1
+ """Model registry — detect Ollama models and tag their capabilities.
2
+
3
+ Cognos runs local-first on Ollama but LiteLLM lets us also talk to cloud
4
+ models. The registry exposes:
5
+ - available models (from the Ollama API, plus any cloud models known)
6
+ - per-model capability flags (tool calling, JSON mode, context window)
7
+ - presets: `fast`, `balanced`, `powerful` — resolved against what's installed
8
+
9
+ Callers pass a preset name (e.g. `--model fast`) or a concrete model ID
10
+ (e.g. `ollama/gemma3:27b`). The registry resolves presets but passes concrete
11
+ IDs through untouched.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import logging
18
+ import re
19
+ import subprocess
20
+ from dataclasses import dataclass, field
21
+ from typing import Literal
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ # ──────────────────────── VRAM estimation ─────────────────────────────
27
+ #
28
+ # Ported from LocalForge's lib/models/vram-estimates.ts. Conservative
29
+ # Q4-quantization figures with a small overhead for context. Used by:
30
+ # - `cognos models` CLI to flag ✅ / ⚠️ / ❌ next to each entry
31
+ # - `cognos forge` to refuse to start orchestration when no installed
32
+ # model fits the local GPU
33
+ # - the model selector preset logic below
34
+
35
+ _SIZE_TO_VRAM_MB: list[tuple[float, float, int]] = [
36
+ (0, 2, 1500),
37
+ (2, 4, 2500),
38
+ (4, 9, 5500),
39
+ (9, 13, 8500),
40
+ (13, 16, 10500),
41
+ (16, 24, 15000),
42
+ (24, 35, 21000),
43
+ (35, 50, 30000),
44
+ (50, 80, 44000),
45
+ (80, 200, 100000),
46
+ (200, 500, 240000),
47
+ ]
48
+
49
+ # Boundary-anchored param-count match (e.g. "-31b", ":7b", "_8b", " 70b").
50
+ # Same regex as the TS port.
51
+ _PARAM_RE = re.compile(
52
+ r"(?:^|[^a-zA-Z0-9])(\d+(?:\.\d+)?)b(?:[^a-zA-Z0-9]|$)",
53
+ re.IGNORECASE,
54
+ )
55
+
56
+
57
+ @dataclass
58
+ class VramEstimate:
59
+ params_b: float
60
+ vram_mb: int
61
+
62
+
63
+ FitStatus = Literal["fits", "tight", "wont-fit"]
64
+
65
+
66
+ def estimate_vram(model_id: str) -> VramEstimate | None:
67
+ """Parse a model id like ``ollama/qwen2.5:14b`` and return its
68
+ estimated VRAM requirement at Q4. ``None`` if no parameter count
69
+ can be parsed (cloud-only models, embedding models, etc.)."""
70
+ if not model_id:
71
+ return None
72
+ m = _PARAM_RE.search(model_id)
73
+ if not m:
74
+ return None
75
+ try:
76
+ params_b = float(m.group(1))
77
+ except ValueError:
78
+ return None
79
+ if params_b <= 0:
80
+ return None
81
+ for (lo, hi, mb) in _SIZE_TO_VRAM_MB:
82
+ if lo < params_b <= hi:
83
+ return VramEstimate(params_b=params_b, vram_mb=mb)
84
+ return None
85
+
86
+
87
+ def compare_to_available(estimate_mb: int, available_mb: int) -> FitStatus:
88
+ """≤ 70 % = ``fits``; 70–100 % = ``tight``; over = ``wont-fit``."""
89
+ if available_mb <= 0:
90
+ return "wont-fit"
91
+ ratio = estimate_mb / available_mb
92
+ if ratio <= 0.7:
93
+ return "fits"
94
+ if ratio <= 1.0:
95
+ return "tight"
96
+ return "wont-fit"
97
+
98
+
99
+ def pick_best_fit(
100
+ model_ids: list[str], available_mb: int,
101
+ ) -> tuple[str, VramEstimate, FitStatus] | None:
102
+ """Largest comfortable fit (≤ 70 %); fall back to largest tight
103
+ fit if none. Never returns a model that won't fit. Cloud-only ids
104
+ that don't carry a param count are skipped."""
105
+ candidates: list[tuple[str, VramEstimate, FitStatus]] = []
106
+ for mid in model_ids:
107
+ est = estimate_vram(mid)
108
+ if est is None:
109
+ continue
110
+ candidates.append((mid, est, compare_to_available(est.vram_mb, available_mb)))
111
+ if not candidates:
112
+ return None
113
+ fitting = sorted(
114
+ [c for c in candidates if c[2] == "fits"],
115
+ key=lambda c: c[1].params_b, reverse=True,
116
+ )
117
+ if fitting:
118
+ return fitting[0]
119
+ tight = sorted(
120
+ [c for c in candidates if c[2] == "tight"],
121
+ key=lambda c: c[1].params_b, reverse=True,
122
+ )
123
+ return tight[0] if tight else None
124
+
125
+
126
+ def detect_available_vram_mb() -> int | None:
127
+ """Best-effort detection via ``nvidia-smi``. Returns total VRAM of
128
+ GPU 0 in MB, or ``None`` if unavailable (CPU-only host, AMD, etc.)."""
129
+ try:
130
+ out = subprocess.check_output(
131
+ ["nvidia-smi", "--query-gpu=memory.total",
132
+ "--format=csv,noheader,nounits"],
133
+ stderr=subprocess.DEVNULL, timeout=2,
134
+ ).decode().strip().splitlines()
135
+ if not out:
136
+ return None
137
+ return int(float(out[0].strip()))
138
+ except (FileNotFoundError, subprocess.SubprocessError, ValueError):
139
+ return None
140
+
141
+
142
+ # Glyphs for CLI fit indicators
143
+ FIT_GLYPH = {"fits": "OK", "tight": "TIGHT", "wont-fit": "NO-FIT"}
144
+
145
+
146
+ @dataclass
147
+ class ModelInfo:
148
+ id: str # LiteLLM-compatible model id, e.g. "ollama/gemma3:27b"
149
+ name: str # bare name, e.g. "gemma3:27b"
150
+ provider: str # "ollama" | "anthropic" | "openai" | …
151
+ supports_tool_calling: bool = False
152
+ supports_json_mode: bool = False
153
+ context_window: int = 8192
154
+ size_bytes: int = 0
155
+ tags: list[str] = field(default_factory=list)
156
+
157
+
158
+ # Heuristic capability flags, based on model name substrings.
159
+ # Conservative defaults — override per-id as needed.
160
+ _NATIVE_TOOL_CALLING = ("qwen3-coder", "qwen2.5-coder", "llama3.1", "llama3.2",
161
+ "llama3", "mistral", "nemotron", "kimi", "glm", "qwen3-vl")
162
+ _NO_NATIVE_TOOL_CALLING = ("gemma", "gemma2", "gemma3", "gemma4")
163
+ _LARGE_CONTEXT = ("qwen3", "kimi", "nemotron", "gemma3", "gemma4")
164
+
165
+
166
+ def _classify(name: str) -> tuple[bool, bool, int]:
167
+ """Return (supports_tool_calling, supports_json_mode, context_window)."""
168
+ lower = name.lower()
169
+ tool_calling = True
170
+ for bad in _NO_NATIVE_TOOL_CALLING:
171
+ if bad in lower:
172
+ tool_calling = False
173
+ break
174
+ if tool_calling:
175
+ if not any(good in lower for good in _NATIVE_TOOL_CALLING):
176
+ # Unknown model — be conservative
177
+ tool_calling = False
178
+
179
+ json_mode = True # Ollama supports JSON output via response_format for most models
180
+ context = 32000 if any(x in lower for x in _LARGE_CONTEXT) else 8192
181
+ return tool_calling, json_mode, context
182
+
183
+
184
+ class ModelRegistry:
185
+ """Detect local + cloud models and expose capability info."""
186
+
187
+ def __init__(self):
188
+ self._models: dict[str, ModelInfo] = {}
189
+
190
+ async def refresh(self) -> None:
191
+ """Re-query Ollama for its installed models."""
192
+ self._models.clear()
193
+ for m in await _ollama_list():
194
+ self._models[m.id] = m
195
+ for m in _known_cloud_models():
196
+ self._models[m.id] = m
197
+
198
+ def models(self) -> list[ModelInfo]:
199
+ return list(self._models.values())
200
+
201
+ def get(self, model_id: str) -> ModelInfo | None:
202
+ return self._models.get(model_id)
203
+
204
+ def resolve(self, name_or_preset: str) -> str:
205
+ """Resolve a preset name or pass through a concrete model id."""
206
+ preset = name_or_preset.lower()
207
+ if preset == "fast":
208
+ return self._pick_smallest_ollama() or name_or_preset
209
+ if preset == "balanced":
210
+ return self._pick_balanced_ollama() or name_or_preset
211
+ if preset == "powerful":
212
+ return self._pick_largest_ollama() or name_or_preset
213
+ return name_or_preset
214
+
215
+ # ------------------------------------------------------------------
216
+
217
+ def _pick_smallest_ollama(self) -> str | None:
218
+ candidates = [m for m in self._models.values() if m.provider == "ollama"]
219
+ if not candidates:
220
+ return None
221
+ return min(candidates, key=lambda m: m.size_bytes or float("inf")).id
222
+
223
+ def _pick_largest_ollama(self) -> str | None:
224
+ candidates = [m for m in self._models.values() if m.provider == "ollama"]
225
+ if not candidates:
226
+ return None
227
+ return max(candidates, key=lambda m: m.size_bytes or 0).id
228
+
229
+ def _pick_balanced_ollama(self) -> str | None:
230
+ # Prefer a mid-size model with tool calling if available
231
+ candidates = sorted(
232
+ [m for m in self._models.values() if m.provider == "ollama"],
233
+ key=lambda m: m.size_bytes or 0,
234
+ )
235
+ if not candidates:
236
+ return None
237
+ tooled = [m for m in candidates if m.supports_tool_calling]
238
+ if tooled:
239
+ return tooled[len(tooled) // 2].id
240
+ return candidates[len(candidates) // 2].id
241
+
242
+
243
+ # --- Ollama API via CLI (no extra deps) ---
244
+
245
+
246
+ async def _ollama_list() -> list[ModelInfo]:
247
+ """Query `ollama list` and return parsed ModelInfo entries."""
248
+ try:
249
+ proc = await asyncio.create_subprocess_shell(
250
+ "ollama list",
251
+ stdout=asyncio.subprocess.PIPE,
252
+ stderr=asyncio.subprocess.PIPE,
253
+ )
254
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
255
+ except Exception as e:
256
+ logger.debug(f"ollama list failed: {e}")
257
+ return []
258
+
259
+ if proc.returncode != 0:
260
+ return []
261
+
262
+ models: list[ModelInfo] = []
263
+ lines = stdout.decode().splitlines()
264
+ if len(lines) < 2:
265
+ return []
266
+ # Skip header row
267
+ for line in lines[1:]:
268
+ parts = line.split()
269
+ if len(parts) < 3:
270
+ continue
271
+ name = parts[0]
272
+ # size is usually parts[2] + parts[3] (e.g. "17 GB")
273
+ size_str = parts[2] + " " + parts[3] if len(parts) >= 4 else "0 MB"
274
+ size_bytes = _parse_size(size_str)
275
+
276
+ tool_calling, json_mode, context = _classify(name)
277
+ models.append(ModelInfo(
278
+ id=f"ollama/{name}",
279
+ name=name,
280
+ provider="ollama",
281
+ supports_tool_calling=tool_calling,
282
+ supports_json_mode=json_mode,
283
+ context_window=context,
284
+ size_bytes=size_bytes,
285
+ ))
286
+ return models
287
+
288
+
289
+ def _parse_size(s: str) -> int:
290
+ """Parse '17 GB' / '986 MB' into bytes."""
291
+ try:
292
+ num_str, unit = s.split()
293
+ num = float(num_str)
294
+ except (ValueError, AttributeError):
295
+ return 0
296
+ unit = unit.upper()
297
+ mult = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4}.get(unit, 1)
298
+ return int(num * mult)
299
+
300
+
301
+ def _known_cloud_models() -> list[ModelInfo]:
302
+ """Hard-coded entries for common cloud models so callers can pick them."""
303
+ return [
304
+ ModelInfo(
305
+ id="claude-opus-4-7",
306
+ name="claude-opus-4-7",
307
+ provider="anthropic",
308
+ supports_tool_calling=True,
309
+ supports_json_mode=True,
310
+ context_window=200000,
311
+ ),
312
+ ModelInfo(
313
+ id="claude-sonnet-4-6",
314
+ name="claude-sonnet-4-6",
315
+ provider="anthropic",
316
+ supports_tool_calling=True,
317
+ supports_json_mode=True,
318
+ context_window=200000,
319
+ ),
320
+ ]