caudate-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/__init__.py +5 -0
- api/anthropic_compat.py +1518 -0
- api/artifact_viewer.py +366 -0
- api/caudate_middleware.py +618 -0
- api/forge_bootstrapper_routes.py +377 -0
- api/forge_routes.py +630 -0
- api/forge_system_routes.py +294 -0
- api/openai_compat.py +1993 -0
- api/server.py +667 -0
- api/storyboard_page.py +677 -0
- caudate_cli-0.1.0.dist-info/METADATA +354 -0
- caudate_cli-0.1.0.dist-info/RECORD +153 -0
- caudate_cli-0.1.0.dist-info/WHEEL +5 -0
- caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
- caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
- cognos_mcp/__init__.py +4 -0
- cognos_mcp/bridge.py +41 -0
- cognos_mcp/client.py +70 -0
- cognos_mcp/config.py +49 -0
- cognos_mcp/server.py +66 -0
- config.py +82 -0
- core/__init__.py +0 -0
- core/agent.py +468 -0
- core/agentic_loop.py +731 -0
- core/anthropic_auth.py +91 -0
- core/background.py +113 -0
- core/banner.py +134 -0
- core/bootstrap.py +292 -0
- core/citations.py +131 -0
- core/compaction.py +109 -0
- core/constitution.py +198 -0
- core/diff_viewer.py +87 -0
- core/export.py +85 -0
- core/file_refs.py +119 -0
- core/files.py +199 -0
- core/hooks.py +209 -0
- core/image.py +599 -0
- core/input.py +91 -0
- core/loop.py +238 -0
- core/memory_md.py +147 -0
- core/notifications.py +99 -0
- core/ownership.py +181 -0
- core/paste.py +81 -0
- core/permissions.py +210 -0
- core/plan_mode.py +215 -0
- core/sandbox_prompt.py +185 -0
- core/scheduler.py +195 -0
- core/schemas.py +202 -0
- core/session.py +90 -0
- core/settings.py +132 -0
- core/skills.py +398 -0
- core/slash_commands.py +977 -0
- core/statusline.py +61 -0
- core/subagent.py +300 -0
- core/thinking.py +50 -0
- core/updater.py +122 -0
- core/usage.py +109 -0
- core/worktree.py +93 -0
- execution/__init__.py +0 -0
- execution/executor.py +329 -0
- execution/plugins.py +108 -0
- execution/tools/__init__.py +0 -0
- execution/tools/agent_tool.py +107 -0
- execution/tools/agentic_tool.py +297 -0
- execution/tools/artifact_tool.py +191 -0
- execution/tools/ask_user_question_tool.py +137 -0
- execution/tools/base.py +81 -0
- execution/tools/calculator_tool.py +137 -0
- execution/tools/cognos_card_tool.py +124 -0
- execution/tools/cron_tool.py +215 -0
- execution/tools/datetime_tool.py +215 -0
- execution/tools/describe_image_tool.py +161 -0
- execution/tools/draw_tool.py +164 -0
- execution/tools/edit_image_tool.py +262 -0
- execution/tools/edit_tool.py +245 -0
- execution/tools/file_tool.py +90 -0
- execution/tools/find_anywhere_tool.py +255 -0
- execution/tools/forge_feature_tools.py +377 -0
- execution/tools/glob_tool.py +59 -0
- execution/tools/grep_tool.py +89 -0
- execution/tools/http_request_tool.py +224 -0
- execution/tools/load_skill_tool.py +104 -0
- execution/tools/longcat_avatar_tool.py +384 -0
- execution/tools/mcp_tool.py +100 -0
- execution/tools/notebook_tool.py +279 -0
- execution/tools/openapi_tool.py +440 -0
- execution/tools/plan_mode_tool.py +95 -0
- execution/tools/push_notification_tool.py +157 -0
- execution/tools/python_tool.py +61 -0
- execution/tools/respond_tool.py +40 -0
- execution/tools/sandbox_tool.py +378 -0
- execution/tools/search_tool.py +153 -0
- execution/tools/semantic_search_tool.py +106 -0
- execution/tools/shell_tool.py +283 -0
- execution/tools/speak_tool.py +134 -0
- execution/tools/storyboard_tool.py +727 -0
- execution/tools/system_info_tool.py +212 -0
- execution/tools/task_tool.py +323 -0
- execution/tools/think_tool.py +49 -0
- execution/tools/transcribe_audio_tool.py +86 -0
- execution/tools/update_memory_tool.py +92 -0
- execution/tools/web_fetch_tool.py +82 -0
- execution/tools/worktree_tool.py +174 -0
- llm/__init__.py +0 -0
- llm/fallback.py +116 -0
- llm/models.py +320 -0
- llm/provider.py +1356 -0
- llm/router.py +373 -0
- main.py +1889 -0
- memory/__init__.py +0 -0
- memory/episodic.py +99 -0
- memory/procedural.py +145 -0
- memory/semantic.py +71 -0
- memory/working.py +64 -0
- nn/__init__.py +43 -0
- nn/auto_evolve.py +245 -0
- nn/caudate.py +136 -0
- nn/config.py +141 -0
- nn/consolidator.py +81 -0
- nn/data.py +1635 -0
- nn/encoder.py +258 -0
- nn/forge_advisor.py +303 -0
- nn/format.py +235 -0
- nn/heads.py +432 -0
- nn/observer.py +994 -0
- nn/policy.py +214 -0
- nn/runtime.py +343 -0
- nn/scorer.py +175 -0
- nn/trainer.py +515 -0
- nn/vision.py +352 -0
- personality/__init__.py +23 -0
- personality/engine.py +129 -0
- personality/identity.py +144 -0
- personality/inner_voice.py +100 -0
- personality/mood.py +205 -0
- planning/__init__.py +0 -0
- planning/dev_server.py +221 -0
- planning/forge_models.py +718 -0
- planning/orchestrator.py +1363 -0
- planning/planner.py +451 -0
- planning/task_graph.py +61 -0
- reflection/__init__.py +0 -0
- reflection/meta_learner.py +156 -0
- reflection/reflector.py +127 -0
- ui/__init__.py +5 -0
- ui/display.py +88 -0
- voice/__init__.py +0 -0
- voice/conversation.py +125 -0
- voice/listener.py +111 -0
- voice/speaker.py +59 -0
- voice/stt.py +126 -0
- voice/tts.py +214 -0
llm/models.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
"""Model registry — detect Ollama models and tag their capabilities.
|
|
2
|
+
|
|
3
|
+
Cognos runs local-first on Ollama but LiteLLM lets us also talk to cloud
|
|
4
|
+
models. The registry exposes:
|
|
5
|
+
- available models (from the Ollama API, plus any cloud models known)
|
|
6
|
+
- per-model capability flags (tool calling, JSON mode, context window)
|
|
7
|
+
- presets: `fast`, `balanced`, `powerful` — resolved against what's installed
|
|
8
|
+
|
|
9
|
+
Callers pass a preset name (e.g. `--model fast`) or a concrete model ID
|
|
10
|
+
(e.g. `ollama/gemma3:27b`). The registry resolves presets but passes concrete
|
|
11
|
+
IDs through untouched.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
import re
|
|
19
|
+
import subprocess
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from typing import Literal
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ──────────────────────── VRAM estimation ─────────────────────────────
|
|
27
|
+
#
|
|
28
|
+
# Ported from LocalForge's lib/models/vram-estimates.ts. Conservative
|
|
29
|
+
# Q4-quantization figures with a small overhead for context. Used by:
|
|
30
|
+
# - `cognos models` CLI to flag ✅ / ⚠️ / ❌ next to each entry
|
|
31
|
+
# - `cognos forge` to refuse to start orchestration when no installed
|
|
32
|
+
# model fits the local GPU
|
|
33
|
+
# - the model selector preset logic below
|
|
34
|
+
|
|
35
|
+
_SIZE_TO_VRAM_MB: list[tuple[float, float, int]] = [
|
|
36
|
+
(0, 2, 1500),
|
|
37
|
+
(2, 4, 2500),
|
|
38
|
+
(4, 9, 5500),
|
|
39
|
+
(9, 13, 8500),
|
|
40
|
+
(13, 16, 10500),
|
|
41
|
+
(16, 24, 15000),
|
|
42
|
+
(24, 35, 21000),
|
|
43
|
+
(35, 50, 30000),
|
|
44
|
+
(50, 80, 44000),
|
|
45
|
+
(80, 200, 100000),
|
|
46
|
+
(200, 500, 240000),
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
# Boundary-anchored param-count match (e.g. "-31b", ":7b", "_8b", " 70b").
|
|
50
|
+
# Same regex as the TS port.
|
|
51
|
+
_PARAM_RE = re.compile(
|
|
52
|
+
r"(?:^|[^a-zA-Z0-9])(\d+(?:\.\d+)?)b(?:[^a-zA-Z0-9]|$)",
|
|
53
|
+
re.IGNORECASE,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class VramEstimate:
|
|
59
|
+
params_b: float
|
|
60
|
+
vram_mb: int
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
FitStatus = Literal["fits", "tight", "wont-fit"]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def estimate_vram(model_id: str) -> VramEstimate | None:
|
|
67
|
+
"""Parse a model id like ``ollama/qwen2.5:14b`` and return its
|
|
68
|
+
estimated VRAM requirement at Q4. ``None`` if no parameter count
|
|
69
|
+
can be parsed (cloud-only models, embedding models, etc.)."""
|
|
70
|
+
if not model_id:
|
|
71
|
+
return None
|
|
72
|
+
m = _PARAM_RE.search(model_id)
|
|
73
|
+
if not m:
|
|
74
|
+
return None
|
|
75
|
+
try:
|
|
76
|
+
params_b = float(m.group(1))
|
|
77
|
+
except ValueError:
|
|
78
|
+
return None
|
|
79
|
+
if params_b <= 0:
|
|
80
|
+
return None
|
|
81
|
+
for (lo, hi, mb) in _SIZE_TO_VRAM_MB:
|
|
82
|
+
if lo < params_b <= hi:
|
|
83
|
+
return VramEstimate(params_b=params_b, vram_mb=mb)
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def compare_to_available(estimate_mb: int, available_mb: int) -> FitStatus:
|
|
88
|
+
"""≤ 70 % = ``fits``; 70–100 % = ``tight``; over = ``wont-fit``."""
|
|
89
|
+
if available_mb <= 0:
|
|
90
|
+
return "wont-fit"
|
|
91
|
+
ratio = estimate_mb / available_mb
|
|
92
|
+
if ratio <= 0.7:
|
|
93
|
+
return "fits"
|
|
94
|
+
if ratio <= 1.0:
|
|
95
|
+
return "tight"
|
|
96
|
+
return "wont-fit"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def pick_best_fit(
|
|
100
|
+
model_ids: list[str], available_mb: int,
|
|
101
|
+
) -> tuple[str, VramEstimate, FitStatus] | None:
|
|
102
|
+
"""Largest comfortable fit (≤ 70 %); fall back to largest tight
|
|
103
|
+
fit if none. Never returns a model that won't fit. Cloud-only ids
|
|
104
|
+
that don't carry a param count are skipped."""
|
|
105
|
+
candidates: list[tuple[str, VramEstimate, FitStatus]] = []
|
|
106
|
+
for mid in model_ids:
|
|
107
|
+
est = estimate_vram(mid)
|
|
108
|
+
if est is None:
|
|
109
|
+
continue
|
|
110
|
+
candidates.append((mid, est, compare_to_available(est.vram_mb, available_mb)))
|
|
111
|
+
if not candidates:
|
|
112
|
+
return None
|
|
113
|
+
fitting = sorted(
|
|
114
|
+
[c for c in candidates if c[2] == "fits"],
|
|
115
|
+
key=lambda c: c[1].params_b, reverse=True,
|
|
116
|
+
)
|
|
117
|
+
if fitting:
|
|
118
|
+
return fitting[0]
|
|
119
|
+
tight = sorted(
|
|
120
|
+
[c for c in candidates if c[2] == "tight"],
|
|
121
|
+
key=lambda c: c[1].params_b, reverse=True,
|
|
122
|
+
)
|
|
123
|
+
return tight[0] if tight else None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def detect_available_vram_mb() -> int | None:
|
|
127
|
+
"""Best-effort detection via ``nvidia-smi``. Returns total VRAM of
|
|
128
|
+
GPU 0 in MB, or ``None`` if unavailable (CPU-only host, AMD, etc.)."""
|
|
129
|
+
try:
|
|
130
|
+
out = subprocess.check_output(
|
|
131
|
+
["nvidia-smi", "--query-gpu=memory.total",
|
|
132
|
+
"--format=csv,noheader,nounits"],
|
|
133
|
+
stderr=subprocess.DEVNULL, timeout=2,
|
|
134
|
+
).decode().strip().splitlines()
|
|
135
|
+
if not out:
|
|
136
|
+
return None
|
|
137
|
+
return int(float(out[0].strip()))
|
|
138
|
+
except (FileNotFoundError, subprocess.SubprocessError, ValueError):
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Glyphs for CLI fit indicators
|
|
143
|
+
FIT_GLYPH = {"fits": "OK", "tight": "TIGHT", "wont-fit": "NO-FIT"}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass
|
|
147
|
+
class ModelInfo:
|
|
148
|
+
id: str # LiteLLM-compatible model id, e.g. "ollama/gemma3:27b"
|
|
149
|
+
name: str # bare name, e.g. "gemma3:27b"
|
|
150
|
+
provider: str # "ollama" | "anthropic" | "openai" | …
|
|
151
|
+
supports_tool_calling: bool = False
|
|
152
|
+
supports_json_mode: bool = False
|
|
153
|
+
context_window: int = 8192
|
|
154
|
+
size_bytes: int = 0
|
|
155
|
+
tags: list[str] = field(default_factory=list)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# Heuristic capability flags, based on model name substrings.
|
|
159
|
+
# Conservative defaults — override per-id as needed.
|
|
160
|
+
_NATIVE_TOOL_CALLING = ("qwen3-coder", "qwen2.5-coder", "llama3.1", "llama3.2",
|
|
161
|
+
"llama3", "mistral", "nemotron", "kimi", "glm", "qwen3-vl")
|
|
162
|
+
_NO_NATIVE_TOOL_CALLING = ("gemma", "gemma2", "gemma3", "gemma4")
|
|
163
|
+
_LARGE_CONTEXT = ("qwen3", "kimi", "nemotron", "gemma3", "gemma4")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _classify(name: str) -> tuple[bool, bool, int]:
|
|
167
|
+
"""Return (supports_tool_calling, supports_json_mode, context_window)."""
|
|
168
|
+
lower = name.lower()
|
|
169
|
+
tool_calling = True
|
|
170
|
+
for bad in _NO_NATIVE_TOOL_CALLING:
|
|
171
|
+
if bad in lower:
|
|
172
|
+
tool_calling = False
|
|
173
|
+
break
|
|
174
|
+
if tool_calling:
|
|
175
|
+
if not any(good in lower for good in _NATIVE_TOOL_CALLING):
|
|
176
|
+
# Unknown model — be conservative
|
|
177
|
+
tool_calling = False
|
|
178
|
+
|
|
179
|
+
json_mode = True # Ollama supports JSON output via response_format for most models
|
|
180
|
+
context = 32000 if any(x in lower for x in _LARGE_CONTEXT) else 8192
|
|
181
|
+
return tool_calling, json_mode, context
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class ModelRegistry:
|
|
185
|
+
"""Detect local + cloud models and expose capability info."""
|
|
186
|
+
|
|
187
|
+
def __init__(self):
|
|
188
|
+
self._models: dict[str, ModelInfo] = {}
|
|
189
|
+
|
|
190
|
+
async def refresh(self) -> None:
|
|
191
|
+
"""Re-query Ollama for its installed models."""
|
|
192
|
+
self._models.clear()
|
|
193
|
+
for m in await _ollama_list():
|
|
194
|
+
self._models[m.id] = m
|
|
195
|
+
for m in _known_cloud_models():
|
|
196
|
+
self._models[m.id] = m
|
|
197
|
+
|
|
198
|
+
def models(self) -> list[ModelInfo]:
|
|
199
|
+
return list(self._models.values())
|
|
200
|
+
|
|
201
|
+
def get(self, model_id: str) -> ModelInfo | None:
|
|
202
|
+
return self._models.get(model_id)
|
|
203
|
+
|
|
204
|
+
def resolve(self, name_or_preset: str) -> str:
|
|
205
|
+
"""Resolve a preset name or pass through a concrete model id."""
|
|
206
|
+
preset = name_or_preset.lower()
|
|
207
|
+
if preset == "fast":
|
|
208
|
+
return self._pick_smallest_ollama() or name_or_preset
|
|
209
|
+
if preset == "balanced":
|
|
210
|
+
return self._pick_balanced_ollama() or name_or_preset
|
|
211
|
+
if preset == "powerful":
|
|
212
|
+
return self._pick_largest_ollama() or name_or_preset
|
|
213
|
+
return name_or_preset
|
|
214
|
+
|
|
215
|
+
# ------------------------------------------------------------------
|
|
216
|
+
|
|
217
|
+
def _pick_smallest_ollama(self) -> str | None:
|
|
218
|
+
candidates = [m for m in self._models.values() if m.provider == "ollama"]
|
|
219
|
+
if not candidates:
|
|
220
|
+
return None
|
|
221
|
+
return min(candidates, key=lambda m: m.size_bytes or float("inf")).id
|
|
222
|
+
|
|
223
|
+
def _pick_largest_ollama(self) -> str | None:
|
|
224
|
+
candidates = [m for m in self._models.values() if m.provider == "ollama"]
|
|
225
|
+
if not candidates:
|
|
226
|
+
return None
|
|
227
|
+
return max(candidates, key=lambda m: m.size_bytes or 0).id
|
|
228
|
+
|
|
229
|
+
def _pick_balanced_ollama(self) -> str | None:
|
|
230
|
+
# Prefer a mid-size model with tool calling if available
|
|
231
|
+
candidates = sorted(
|
|
232
|
+
[m for m in self._models.values() if m.provider == "ollama"],
|
|
233
|
+
key=lambda m: m.size_bytes or 0,
|
|
234
|
+
)
|
|
235
|
+
if not candidates:
|
|
236
|
+
return None
|
|
237
|
+
tooled = [m for m in candidates if m.supports_tool_calling]
|
|
238
|
+
if tooled:
|
|
239
|
+
return tooled[len(tooled) // 2].id
|
|
240
|
+
return candidates[len(candidates) // 2].id
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# --- Ollama API via CLI (no extra deps) ---
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
async def _ollama_list() -> list[ModelInfo]:
|
|
247
|
+
"""Query `ollama list` and return parsed ModelInfo entries."""
|
|
248
|
+
try:
|
|
249
|
+
proc = await asyncio.create_subprocess_shell(
|
|
250
|
+
"ollama list",
|
|
251
|
+
stdout=asyncio.subprocess.PIPE,
|
|
252
|
+
stderr=asyncio.subprocess.PIPE,
|
|
253
|
+
)
|
|
254
|
+
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
|
|
255
|
+
except Exception as e:
|
|
256
|
+
logger.debug(f"ollama list failed: {e}")
|
|
257
|
+
return []
|
|
258
|
+
|
|
259
|
+
if proc.returncode != 0:
|
|
260
|
+
return []
|
|
261
|
+
|
|
262
|
+
models: list[ModelInfo] = []
|
|
263
|
+
lines = stdout.decode().splitlines()
|
|
264
|
+
if len(lines) < 2:
|
|
265
|
+
return []
|
|
266
|
+
# Skip header row
|
|
267
|
+
for line in lines[1:]:
|
|
268
|
+
parts = line.split()
|
|
269
|
+
if len(parts) < 3:
|
|
270
|
+
continue
|
|
271
|
+
name = parts[0]
|
|
272
|
+
# size is usually parts[2] + parts[3] (e.g. "17 GB")
|
|
273
|
+
size_str = parts[2] + " " + parts[3] if len(parts) >= 4 else "0 MB"
|
|
274
|
+
size_bytes = _parse_size(size_str)
|
|
275
|
+
|
|
276
|
+
tool_calling, json_mode, context = _classify(name)
|
|
277
|
+
models.append(ModelInfo(
|
|
278
|
+
id=f"ollama/{name}",
|
|
279
|
+
name=name,
|
|
280
|
+
provider="ollama",
|
|
281
|
+
supports_tool_calling=tool_calling,
|
|
282
|
+
supports_json_mode=json_mode,
|
|
283
|
+
context_window=context,
|
|
284
|
+
size_bytes=size_bytes,
|
|
285
|
+
))
|
|
286
|
+
return models
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _parse_size(s: str) -> int:
|
|
290
|
+
"""Parse '17 GB' / '986 MB' into bytes."""
|
|
291
|
+
try:
|
|
292
|
+
num_str, unit = s.split()
|
|
293
|
+
num = float(num_str)
|
|
294
|
+
except (ValueError, AttributeError):
|
|
295
|
+
return 0
|
|
296
|
+
unit = unit.upper()
|
|
297
|
+
mult = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4}.get(unit, 1)
|
|
298
|
+
return int(num * mult)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _known_cloud_models() -> list[ModelInfo]:
|
|
302
|
+
"""Hard-coded entries for common cloud models so callers can pick them."""
|
|
303
|
+
return [
|
|
304
|
+
ModelInfo(
|
|
305
|
+
id="claude-opus-4-7",
|
|
306
|
+
name="claude-opus-4-7",
|
|
307
|
+
provider="anthropic",
|
|
308
|
+
supports_tool_calling=True,
|
|
309
|
+
supports_json_mode=True,
|
|
310
|
+
context_window=200000,
|
|
311
|
+
),
|
|
312
|
+
ModelInfo(
|
|
313
|
+
id="claude-sonnet-4-6",
|
|
314
|
+
name="claude-sonnet-4-6",
|
|
315
|
+
provider="anthropic",
|
|
316
|
+
supports_tool_calling=True,
|
|
317
|
+
supports_json_mode=True,
|
|
318
|
+
context_window=200000,
|
|
319
|
+
),
|
|
320
|
+
]
|