caudate-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. api/__init__.py +5 -0
  2. api/anthropic_compat.py +1518 -0
  3. api/artifact_viewer.py +366 -0
  4. api/caudate_middleware.py +618 -0
  5. api/forge_bootstrapper_routes.py +377 -0
  6. api/forge_routes.py +630 -0
  7. api/forge_system_routes.py +294 -0
  8. api/openai_compat.py +1993 -0
  9. api/server.py +667 -0
  10. api/storyboard_page.py +677 -0
  11. caudate_cli-0.1.0.dist-info/METADATA +354 -0
  12. caudate_cli-0.1.0.dist-info/RECORD +153 -0
  13. caudate_cli-0.1.0.dist-info/WHEEL +5 -0
  14. caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
  15. caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  16. caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
  17. cognos_mcp/__init__.py +4 -0
  18. cognos_mcp/bridge.py +41 -0
  19. cognos_mcp/client.py +70 -0
  20. cognos_mcp/config.py +49 -0
  21. cognos_mcp/server.py +66 -0
  22. config.py +82 -0
  23. core/__init__.py +0 -0
  24. core/agent.py +468 -0
  25. core/agentic_loop.py +731 -0
  26. core/anthropic_auth.py +91 -0
  27. core/background.py +113 -0
  28. core/banner.py +134 -0
  29. core/bootstrap.py +292 -0
  30. core/citations.py +131 -0
  31. core/compaction.py +109 -0
  32. core/constitution.py +198 -0
  33. core/diff_viewer.py +87 -0
  34. core/export.py +85 -0
  35. core/file_refs.py +119 -0
  36. core/files.py +199 -0
  37. core/hooks.py +209 -0
  38. core/image.py +599 -0
  39. core/input.py +91 -0
  40. core/loop.py +238 -0
  41. core/memory_md.py +147 -0
  42. core/notifications.py +99 -0
  43. core/ownership.py +181 -0
  44. core/paste.py +81 -0
  45. core/permissions.py +210 -0
  46. core/plan_mode.py +215 -0
  47. core/sandbox_prompt.py +185 -0
  48. core/scheduler.py +195 -0
  49. core/schemas.py +202 -0
  50. core/session.py +90 -0
  51. core/settings.py +132 -0
  52. core/skills.py +398 -0
  53. core/slash_commands.py +977 -0
  54. core/statusline.py +61 -0
  55. core/subagent.py +300 -0
  56. core/thinking.py +50 -0
  57. core/updater.py +122 -0
  58. core/usage.py +109 -0
  59. core/worktree.py +93 -0
  60. execution/__init__.py +0 -0
  61. execution/executor.py +329 -0
  62. execution/plugins.py +108 -0
  63. execution/tools/__init__.py +0 -0
  64. execution/tools/agent_tool.py +107 -0
  65. execution/tools/agentic_tool.py +297 -0
  66. execution/tools/artifact_tool.py +191 -0
  67. execution/tools/ask_user_question_tool.py +137 -0
  68. execution/tools/base.py +81 -0
  69. execution/tools/calculator_tool.py +137 -0
  70. execution/tools/cognos_card_tool.py +124 -0
  71. execution/tools/cron_tool.py +215 -0
  72. execution/tools/datetime_tool.py +215 -0
  73. execution/tools/describe_image_tool.py +161 -0
  74. execution/tools/draw_tool.py +164 -0
  75. execution/tools/edit_image_tool.py +262 -0
  76. execution/tools/edit_tool.py +245 -0
  77. execution/tools/file_tool.py +90 -0
  78. execution/tools/find_anywhere_tool.py +255 -0
  79. execution/tools/forge_feature_tools.py +377 -0
  80. execution/tools/glob_tool.py +59 -0
  81. execution/tools/grep_tool.py +89 -0
  82. execution/tools/http_request_tool.py +224 -0
  83. execution/tools/load_skill_tool.py +104 -0
  84. execution/tools/longcat_avatar_tool.py +384 -0
  85. execution/tools/mcp_tool.py +100 -0
  86. execution/tools/notebook_tool.py +279 -0
  87. execution/tools/openapi_tool.py +440 -0
  88. execution/tools/plan_mode_tool.py +95 -0
  89. execution/tools/push_notification_tool.py +157 -0
  90. execution/tools/python_tool.py +61 -0
  91. execution/tools/respond_tool.py +40 -0
  92. execution/tools/sandbox_tool.py +378 -0
  93. execution/tools/search_tool.py +153 -0
  94. execution/tools/semantic_search_tool.py +106 -0
  95. execution/tools/shell_tool.py +283 -0
  96. execution/tools/speak_tool.py +134 -0
  97. execution/tools/storyboard_tool.py +727 -0
  98. execution/tools/system_info_tool.py +212 -0
  99. execution/tools/task_tool.py +323 -0
  100. execution/tools/think_tool.py +49 -0
  101. execution/tools/transcribe_audio_tool.py +86 -0
  102. execution/tools/update_memory_tool.py +92 -0
  103. execution/tools/web_fetch_tool.py +82 -0
  104. execution/tools/worktree_tool.py +174 -0
  105. llm/__init__.py +0 -0
  106. llm/fallback.py +116 -0
  107. llm/models.py +320 -0
  108. llm/provider.py +1356 -0
  109. llm/router.py +373 -0
  110. main.py +1889 -0
  111. memory/__init__.py +0 -0
  112. memory/episodic.py +99 -0
  113. memory/procedural.py +145 -0
  114. memory/semantic.py +71 -0
  115. memory/working.py +64 -0
  116. nn/__init__.py +43 -0
  117. nn/auto_evolve.py +245 -0
  118. nn/caudate.py +136 -0
  119. nn/config.py +141 -0
  120. nn/consolidator.py +81 -0
  121. nn/data.py +1635 -0
  122. nn/encoder.py +258 -0
  123. nn/forge_advisor.py +303 -0
  124. nn/format.py +235 -0
  125. nn/heads.py +432 -0
  126. nn/observer.py +994 -0
  127. nn/policy.py +214 -0
  128. nn/runtime.py +343 -0
  129. nn/scorer.py +175 -0
  130. nn/trainer.py +515 -0
  131. nn/vision.py +352 -0
  132. personality/__init__.py +23 -0
  133. personality/engine.py +129 -0
  134. personality/identity.py +144 -0
  135. personality/inner_voice.py +100 -0
  136. personality/mood.py +205 -0
  137. planning/__init__.py +0 -0
  138. planning/dev_server.py +221 -0
  139. planning/forge_models.py +718 -0
  140. planning/orchestrator.py +1363 -0
  141. planning/planner.py +451 -0
  142. planning/task_graph.py +61 -0
  143. reflection/__init__.py +0 -0
  144. reflection/meta_learner.py +156 -0
  145. reflection/reflector.py +127 -0
  146. ui/__init__.py +5 -0
  147. ui/display.py +88 -0
  148. voice/__init__.py +0 -0
  149. voice/conversation.py +125 -0
  150. voice/listener.py +111 -0
  151. voice/speaker.py +59 -0
  152. voice/stt.py +126 -0
  153. voice/tts.py +214 -0
nn/encoder.py ADDED
@@ -0,0 +1,258 @@
1
+ """State encoder — turns Cognos's per-turn state into a tensor.
2
+
3
+ Three channels are fused into a sequence the controller can attend over:
4
+
5
+ 1. Text channel — recent N messages embedded with sentence-transformers.
6
+ Frozen during training (kept lightweight) — we learn on top of it
7
+ rather than fine-tuning the embedder.
8
+
9
+ 2. Tool history channel — the last K tool calls as integer ids,
10
+ embedded with a learned `nn.Embedding`.
11
+
12
+ 3. Mood channel — 4 continuous floats projected into d_model space.
13
+
14
+ The output of the encoder is a (B, L, d_model) tensor that flows into the
15
+ transformer controller. L = msg_window + history_window + 1 (mood token).
16
+ A single learned [CLS] token is prepended so heads can read off a fixed
17
+ position regardless of how the channels are weighted at attention time.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ import os
24
+ from typing import Any
25
+
26
+ # Defensive — block transformers' TF import path before sentence-transformers loads.
27
+ os.environ.setdefault("USE_TF", "0")
28
+
29
+ import torch
30
+ import torch.nn as nn
31
+
32
+ from nn.config import NNConfig
33
+ from nn.vision import VisionEncoder
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class _SentenceEmbedder:
39
+ """Lazy wrapper around sentence-transformers. Falls back to a deterministic
40
+ hash-based fake embedder if the lib isn't installed — so the rest of the
41
+ pipeline (data loading, training loop wiring) still exercises end-to-end
42
+ in an offline test environment."""
43
+
44
+ def __init__(self, model_name: str, embed_dim: int):
45
+ self.model_name = model_name
46
+ self.embed_dim = embed_dim
47
+ self._model = None
48
+ self._fake = False
49
+
50
+ def _load(self) -> None:
51
+ if self._model is not None or self._fake:
52
+ return
53
+ try:
54
+ from sentence_transformers import SentenceTransformer
55
+ # Place the embedder on CUDA when available. Without this the
56
+ # MiniLM forward runs on CPU and the numpy round-trip dominates
57
+ # step time (was ~3 s/step with a 5.5M trunk on a 3090).
58
+ device = "cuda" if torch.cuda.is_available() else "cpu"
59
+ self._model = SentenceTransformer(self.model_name, device=device)
60
+ self._device = device
61
+ real_dim = self._model.get_sentence_embedding_dimension()
62
+ if real_dim != self.embed_dim:
63
+ logger.warning(
64
+ f"Embedder dim {real_dim} != configured {self.embed_dim} — "
65
+ "you'll need to retrain if you change models"
66
+ )
67
+ except Exception as e:
68
+ logger.warning(
69
+ f"sentence-transformers unavailable ({e}); "
70
+ "falling back to deterministic hash embedder"
71
+ )
72
+ self._fake = True
73
+
74
+ def encode(self, texts: list[str]) -> torch.Tensor:
75
+ self._load()
76
+ if self._fake or self._model is None:
77
+ return _hash_embed(texts, self.embed_dim)
78
+ with torch.no_grad():
79
+ # Keep the output as a tensor on the embedder's device — no
80
+ # CPU/numpy round-trip. Caller (.to(self.device) in encoder.py)
81
+ # handles any final placement if devices differ.
82
+ t = self._model.encode(
83
+ texts,
84
+ convert_to_tensor=True,
85
+ normalize_embeddings=True,
86
+ )
87
+ # sentence-transformers runs encode() under torch.inference_mode().
88
+ # The returned tensor is an "inference tensor" that autograd refuses
89
+ # to use downstream (the old numpy round-trip detached this flag
90
+ # accidentally). Clone strips it.
91
+ return t.clone().float()
92
+
93
+
94
+ def _hash_embed(texts: list[str], dim: int) -> torch.Tensor:
95
+ """Deterministic per-character hash → fixed-dim vector. NOT semantically
96
+ meaningful; only useful as a placeholder so the pipeline runs offline."""
97
+ out = torch.zeros((len(texts), dim), dtype=torch.float32)
98
+ for i, t in enumerate(texts):
99
+ for j, ch in enumerate((t or "")[: dim * 2]):
100
+ out[i, (j * 16807 + ord(ch)) % dim] += (ord(ch) % 17) / 17.0
101
+ norm = out.norm(dim=-1, keepdim=True).clamp(min=1e-8)
102
+ return out / norm
103
+
104
+
105
+ class StateEncoder(nn.Module):
106
+ """Encode (messages, tool_history, mood, images) → (B, L, d_model)."""
107
+
108
+ def __init__(self, cfg: NNConfig):
109
+ super().__init__()
110
+ self.cfg = cfg
111
+
112
+ # Sentence-transformer is frozen — we project its output instead.
113
+ self.text_embedder = _SentenceEmbedder(cfg.text_encoder_name, cfg.text_embed_dim)
114
+ self.text_proj = nn.Linear(cfg.text_embed_dim, cfg.d_model)
115
+
116
+ # Learned tool-id embedding for the history channel.
117
+ self.tool_embed = nn.Embedding(
118
+ cfg.tool_vocab_size, cfg.tool_embed_dim,
119
+ padding_idx=cfg.tool_pad_token,
120
+ )
121
+ self.tool_proj = nn.Linear(cfg.tool_embed_dim, cfg.d_model)
122
+
123
+ # Mood is 4 floats — project up to d_model.
124
+ self.mood_proj = nn.Sequential(
125
+ nn.Linear(cfg.mood_dim, cfg.d_model),
126
+ nn.GELU(),
127
+ nn.Linear(cfg.d_model, cfg.d_model),
128
+ )
129
+
130
+ # Vision channel — frozen image embedder, projected to d_model.
131
+ # Backend is chosen at config time (CLIP for light, InternVL2
132
+ # for rich semantic features). Caudate learns on top of these.
133
+ if cfg.use_vision:
134
+ from nn.vision import make_vision_encoder
135
+ self.vision_encoder = make_vision_encoder(
136
+ backend=cfg.vision_backend,
137
+ model_name=cfg.vision_encoder_name,
138
+ dtype=getattr(cfg, "vision_dtype", "bfloat16"),
139
+ )
140
+ self.vision_proj = nn.Sequential(
141
+ nn.Linear(cfg.vision_embed_dim, cfg.d_model),
142
+ nn.GELU(),
143
+ nn.Linear(cfg.d_model, cfg.d_model),
144
+ )
145
+ else:
146
+ self.vision_encoder = None
147
+ self.vision_proj = None
148
+
149
+ # Channel-type embedding so the transformer knows which kind of
150
+ # token it's looking at (cls=0, text=1, tool=2, mood=3, image=4).
151
+ n_channel_types = 5 if cfg.use_vision else 4
152
+ self.type_embed = nn.Embedding(n_channel_types, cfg.d_model)
153
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.d_model))
154
+ nn.init.normal_(self.cls_token, std=0.02)
155
+
156
+ # Position embedding across the fused sequence.
157
+ max_len = (
158
+ 1 + cfg.msg_window + cfg.history_window + 1
159
+ + (cfg.image_window if cfg.use_vision else 0)
160
+ )
161
+ self.pos_embed = nn.Embedding(max_len, cfg.d_model)
162
+ self.dropout = nn.Dropout(cfg.dropout)
163
+
164
+ @property
165
+ def device(self) -> torch.device:
166
+ return next(self.parameters()).device
167
+
168
+ def encode_messages(self, messages: list[list[str]]) -> torch.Tensor:
169
+ """Embed a batch of message lists. messages[b] is a list of strings.
170
+
171
+ Pads / truncates to msg_window on the right.
172
+ """
173
+ B = len(messages)
174
+ W = self.cfg.msg_window
175
+ padded: list[list[str]] = []
176
+ for batch in messages:
177
+ batch = list(batch)[-W:]
178
+ if len(batch) < W:
179
+ batch = [""] * (W - len(batch)) + batch
180
+ padded.append(batch)
181
+
182
+ flat = [m for batch in padded for m in batch]
183
+ emb = self.text_embedder.encode(flat).to(self.device) # (B*W, text_embed_dim)
184
+ emb = emb.view(B, W, self.cfg.text_embed_dim)
185
+ return emb
186
+
187
+ def encode_images(
188
+ self, image_paths: list[list[str]],
189
+ ) -> torch.Tensor:
190
+ """Embed a batch of per-sample image-path lists.
191
+
192
+ image_paths[b] is a list of paths to images attached at this
193
+ turn. Pads / truncates to image_window on the right. Returns
194
+ (B, image_window, vision_embed_dim) float.
195
+ """
196
+ if not self.cfg.use_vision or self.vision_encoder is None:
197
+ return torch.zeros(0)
198
+ B = len(image_paths)
199
+ W = self.cfg.image_window
200
+ flat: list[str] = []
201
+ for paths in image_paths:
202
+ paths = list(paths)[-W:]
203
+ if len(paths) < W:
204
+ paths = [""] * (W - len(paths)) + paths
205
+ flat.extend(paths)
206
+ emb = self.vision_encoder.encode_paths(flat).to(self.device)
207
+ return emb.view(B, W, self.cfg.vision_embed_dim)
208
+
209
+ def forward(
210
+ self,
211
+ messages: list[list[str]],
212
+ tool_ids: torch.Tensor, # (B, history_window) long
213
+ mood: torch.Tensor, # (B, mood_dim) float
214
+ image_paths: list[list[str]] | None = None, # (B,) list of paths
215
+ ) -> torch.Tensor:
216
+ B = len(messages)
217
+ W_msg = self.cfg.msg_window
218
+ W_tool = self.cfg.history_window
219
+ W_img = self.cfg.image_window if self.cfg.use_vision else 0
220
+
221
+ # 1. text channel
222
+ text_emb = self.encode_messages(messages)
223
+ text_proj = self.text_proj(text_emb) # (B, W_msg, d)
224
+
225
+ # 2. tool channel
226
+ tool_emb = self.tool_embed(tool_ids.to(self.device)) # (B, W_tool, te)
227
+ tool_proj = self.tool_proj(tool_emb) # (B, W_tool, d)
228
+
229
+ # 3. mood channel — single token
230
+ mood_proj = self.mood_proj(mood.to(self.device)).unsqueeze(1) # (B, 1, d)
231
+
232
+ # 4. vision channel (optional)
233
+ cls = self.cls_token.expand(B, -1, -1)
234
+ chunks = [cls, text_proj, tool_proj, mood_proj]
235
+ if self.cfg.use_vision:
236
+ paths_in = image_paths if image_paths is not None else [[] for _ in range(B)]
237
+ img_emb = self.encode_images(paths_in) # (B, W_img, ve)
238
+ img_proj = self.vision_proj(img_emb) # (B, W_img, d)
239
+ chunks.append(img_proj)
240
+ x = torch.cat(chunks, dim=1) # (B, L, d)
241
+
242
+ # 5. add type + positional embeddings
243
+ L = x.size(1)
244
+ type_ids = torch.zeros(L, dtype=torch.long, device=self.device)
245
+ # 0=cls, 1=text, 2=tool, 3=mood, 4=image
246
+ type_ids[1:1 + W_msg] = 1
247
+ type_ids[1 + W_msg:1 + W_msg + W_tool] = 2
248
+ type_ids[1 + W_msg + W_tool] = 3 # mood (1 token)
249
+ if self.cfg.use_vision:
250
+ start = 2 + W_msg + W_tool
251
+ type_ids[start:start + W_img] = 4
252
+ type_embedding = self.type_embed(type_ids).unsqueeze(0) # (1, L, d)
253
+
254
+ pos_ids = torch.arange(L, device=self.device)
255
+ pos_embedding = self.pos_embed(pos_ids).unsqueeze(0) # (1, L, d)
256
+
257
+ x = x + type_embedding + pos_embedding
258
+ return self.dropout(x)
nn/forge_advisor.py ADDED
@@ -0,0 +1,303 @@
1
+ """Forge feature-level advisor — Caudate's two-way loop with the
2
+ orchestrator.
3
+
4
+ ADR 0006 calls for a single predictor `predict_feature_difficulty`
5
+ that drives four orchestrator behaviours:
6
+ B. Model selection per feature (system1 vs system2)
7
+ C. Backlog ordering when priorities tie
8
+ D. Early revision when a retry looks doomed
9
+
10
+ This module exports the prediction surface and falls back to a
11
+ **heuristic baseline** when no trained Caudate head is available yet.
12
+ The baseline is computed from the rolling history in
13
+ ``data/nn/feature_outcomes.jsonl``:
14
+
15
+ - per-model success rate (global)
16
+ - per-model average n_turns on success vs failure
17
+ - keyword bias on the feature text (the agent succeeded on these
18
+ kinds of feature before / didn't)
19
+
20
+ A swap to a real trained head changes only the body of
21
+ ``_predict_via_caudate``; the public functions and orchestrator
22
+ call-sites stay identical.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import json
28
+ import logging
29
+ import math
30
+ import re
31
+ import time
32
+ from collections import Counter, defaultdict
33
+ from dataclasses import dataclass
34
+ from pathlib import Path
35
+ from typing import Any
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ _OUTCOMES_PATH = Path("data/nn/feature_outcomes.jsonl")
41
+ _AUDIT_PATH = Path("data/nn/forge_predictions.jsonl")
42
+
43
+ # Cache the loaded advisor so we don't reload Caudate's weights on
44
+ # every feature decision. Cleared by reset_advisor_cache() in tests.
45
+ _advisor_cache: object | None = None
46
+ _advisor_cache_attempted: bool = False
47
+
48
+ # Heuristic-baseline knobs. Reasonable defaults; tuned later from
49
+ # real data when Caudate's head is trained.
50
+ _DEFAULT_SUCCESS_PROB = 0.55
51
+ _MAX_FEATURES_FOR_BASELINE = 2000 # bound the rolling window read
52
+
53
+
54
+ @dataclass
55
+ class FeaturePrediction:
56
+ """Result of ``predict_feature_difficulty``."""
57
+ success_prob: float # [0, 1]
58
+ difficulty: float # 1 - success_prob (convenience)
59
+ source: str # 'caudate' | 'baseline' | 'fallback'
60
+ reason: str # short human-readable explanation
61
+
62
+
63
+ # ─── Public surface ───────────────────────────────────────────────────
64
+
65
+
66
+ def predict_feature_difficulty(
67
+ feature_text: str,
68
+ model_id: str | None = None,
69
+ ) -> FeaturePrediction:
70
+ """Best-effort P(success | feature, model). Used by the orchestrator
71
+ to decide whether to promote a feature to system2, order the
72
+ backlog by predicted difficulty, or trigger early revision."""
73
+ pred = _predict_via_caudate(feature_text, model_id)
74
+ if pred is not None:
75
+ _audit(feature_text, model_id, pred)
76
+ return pred
77
+ pred = _predict_via_baseline(feature_text, model_id)
78
+ _audit(feature_text, model_id, pred)
79
+ return pred
80
+
81
+
82
+ def predict_failure_repeats(
83
+ feature_text: str,
84
+ n_prior_failures: int,
85
+ model_id: str | None = None,
86
+ ) -> float:
87
+ """P(next retry also fails | this feature has already failed
88
+ ``n_prior_failures`` times). Used by ADR-0006 Phase D to decide
89
+ whether to skip straight to revision rather than burn more
90
+ sessions."""
91
+ base = predict_feature_difficulty(feature_text, model_id)
92
+ # Conditional on already failing N times, P(next fail) rises
93
+ # quickly. A simple Bayesian-flavoured update — each prior failure
94
+ # halves the remaining success probability.
95
+ success_left = base.success_prob * (0.5 ** max(0, n_prior_failures))
96
+ return float(min(0.99, 1.0 - success_left))
97
+
98
+
99
+ # ─── Caudate path (real head — TODO: wire when trained) ──────────────
100
+
101
+
102
+ def reset_advisor_cache() -> None:
103
+ """Drop the cached CaudateAdvisor — used by tests that swap weights."""
104
+ global _advisor_cache, _advisor_cache_attempted
105
+ _advisor_cache = None
106
+ _advisor_cache_attempted = False
107
+
108
+
109
+ def _get_advisor():
110
+ """Load the CaudateAdvisor once and cache it. None means we tried
111
+ and the checkpoint isn't available — don't re-try every call."""
112
+ global _advisor_cache, _advisor_cache_attempted
113
+ if _advisor_cache_attempted:
114
+ return _advisor_cache
115
+ _advisor_cache_attempted = True
116
+ try:
117
+ from nn.runtime import load_advisor # type: ignore
118
+ except Exception as e:
119
+ logger.debug(f"nn.runtime unavailable: {e}")
120
+ return None
121
+ try:
122
+ _advisor_cache = load_advisor()
123
+ except Exception as e:
124
+ logger.debug(f"load_advisor failed: {e}")
125
+ _advisor_cache = None
126
+ return _advisor_cache
127
+
128
+
129
+ def _predict_via_caudate(
130
+ feature_text: str, model_id: str | None,
131
+ ) -> FeaturePrediction | None:
132
+ """Trained Caudate path. Returns None when:
133
+ - the checkpoint is missing,
134
+ - the feature_success head exists but is at its init weights
135
+ (predict_feature_success itself returns None in that case),
136
+ - inference raises.
137
+
138
+ The advisor instance is cached so we don't reload weights every
139
+ feature decision."""
140
+ advisor = _get_advisor()
141
+ if advisor is None:
142
+ return None
143
+ head_fn = getattr(advisor, "predict_feature_success", None)
144
+ if head_fn is None:
145
+ return None
146
+ try:
147
+ result = head_fn(feature_text=feature_text, model_id=model_id)
148
+ except Exception as e:
149
+ logger.debug(f"caudate feature head call raised: {e}")
150
+ return None
151
+ if not isinstance(result, dict) or "success_prob" not in result:
152
+ return None
153
+ p = float(max(0.0, min(1.0, result["success_prob"])))
154
+ return FeaturePrediction(
155
+ success_prob=p, difficulty=1 - p,
156
+ source="caudate",
157
+ reason=str(result.get("reason", "trained head")),
158
+ )
159
+
160
+
161
+ # ─── Heuristic baseline ───────────────────────────────────────────────
162
+
163
+
164
+ def _predict_via_baseline(
165
+ feature_text: str, model_id: str | None,
166
+ ) -> FeaturePrediction:
167
+ """Fall back to a corpus-driven heuristic. Reads feature_outcomes
168
+ JSONL, computes per-model success rate + keyword-weighted prior
169
+ on the feature text. Defaults to ``_DEFAULT_SUCCESS_PROB`` when no
170
+ history exists."""
171
+ history = _load_recent_outcomes()
172
+ if not history:
173
+ return FeaturePrediction(
174
+ success_prob=_DEFAULT_SUCCESS_PROB,
175
+ difficulty=1 - _DEFAULT_SUCCESS_PROB,
176
+ source="fallback",
177
+ reason="no feature-outcome history yet",
178
+ )
179
+
180
+ # Per-model success rate
181
+ by_model: dict[str, list[bool]] = defaultdict(list)
182
+ for h in history:
183
+ by_model[h.get("model_used", "unknown")].append(bool(h.get("success")))
184
+ model_rates = {
185
+ m: (sum(v) / len(v)) for m, v in by_model.items() if v
186
+ }
187
+
188
+ # Pick the rate that matches the requested model. Fall back to
189
+ # the global success rate if the model has no history.
190
+ rate = None
191
+ reason_parts: list[str] = []
192
+ if model_id and model_id in model_rates and len(by_model[model_id]) >= 3:
193
+ rate = model_rates[model_id]
194
+ reason_parts.append(
195
+ f"model={model_id} n={len(by_model[model_id])} rate={rate:.2f}"
196
+ )
197
+ if rate is None:
198
+ flat = [h for v in by_model.values() for h in v]
199
+ rate = sum(flat) / max(1, len(flat))
200
+ reason_parts.append(f"global n={len(flat)} rate={rate:.2f}")
201
+
202
+ # Keyword bias on the feature text. Tokens that appear in
203
+ # successful features more often than failed → bonus; reverse → penalty.
204
+ bias = _keyword_bias(feature_text, history)
205
+ if bias != 0.0:
206
+ reason_parts.append(f"kw_bias={bias:+.3f}")
207
+
208
+ p = max(0.05, min(0.95, rate + bias))
209
+ return FeaturePrediction(
210
+ success_prob=p,
211
+ difficulty=1 - p,
212
+ source="baseline",
213
+ reason=" · ".join(reason_parts),
214
+ )
215
+
216
+
217
+ def _load_recent_outcomes() -> list[dict[str, Any]]:
218
+ """Read the most recent feature_outcomes rows. Skips garbage lines."""
219
+ if not _OUTCOMES_PATH.exists():
220
+ return []
221
+ try:
222
+ with _OUTCOMES_PATH.open() as f:
223
+ lines = f.readlines()
224
+ except OSError:
225
+ return []
226
+ rows: list[dict[str, Any]] = []
227
+ for line in lines[-_MAX_FEATURES_FOR_BASELINE:]:
228
+ try:
229
+ rows.append(json.loads(line))
230
+ except Exception:
231
+ continue
232
+ return rows
233
+
234
+
235
+ _WORD_RE = re.compile(r"[a-zA-Z][a-zA-Z0-9]{3,}")
236
+ _STOPWORDS = frozenset({
237
+ "feature", "implement", "create", "build", "with", "from", "that",
238
+ "this", "they", "their", "have", "will", "should", "would", "could",
239
+ "must", "needs", "need", "want", "wants", "make", "made", "makes",
240
+ "into", "onto", "over", "under", "between", "where", "when", "what",
241
+ "which", "while", "during", "before", "after", "above", "below",
242
+ "page", "pages", "user", "users", "data", "code", "test", "tests",
243
+ })
244
+
245
+
246
+ def _keyword_bias(feature_text: str, history: list[dict]) -> float:
247
+ """Compute a small additive bias [-0.15, +0.15] from the feature
248
+ text's distinctive tokens. Tokens that appear disproportionately
249
+ in past successes shift the prediction up; failures shift it
250
+ down. Bounded to keep heuristic noise from dominating."""
251
+ text = (feature_text or "").lower()
252
+ tokens = {t for t in _WORD_RE.findall(text) if t not in _STOPWORDS}
253
+ if not tokens:
254
+ return 0.0
255
+
256
+ # Build per-token success/fail counts on history
257
+ success_counts: Counter = Counter()
258
+ failure_counts: Counter = Counter()
259
+ for h in history:
260
+ h_text = (h.get("feature_text") or "").lower()
261
+ h_tokens = {
262
+ t for t in _WORD_RE.findall(h_text) if t not in _STOPWORDS
263
+ }
264
+ target = success_counts if h.get("success") else failure_counts
265
+ for tok in h_tokens:
266
+ target[tok] += 1
267
+
268
+ score = 0.0
269
+ for tok in tokens:
270
+ s, f = success_counts.get(tok, 0), failure_counts.get(tok, 0)
271
+ if s + f < 3:
272
+ continue
273
+ score += (s - f) / (s + f)
274
+ # Squash to [-0.15, +0.15] via tanh
275
+ return 0.15 * math.tanh(score / max(1, len(tokens)))
276
+
277
+
278
+ def _audit(
279
+ feature_text: str, model_id: str | None, pred: FeaturePrediction,
280
+ ) -> None:
281
+ """Append a row to data/nn/forge_predictions.jsonl so we can later
282
+ correlate predictions with actual outcomes."""
283
+ try:
284
+ _AUDIT_PATH.parent.mkdir(parents=True, exist_ok=True)
285
+ with _AUDIT_PATH.open("a", encoding="utf-8") as f:
286
+ f.write(json.dumps({
287
+ "ts": time.time(),
288
+ "feature_text": feature_text[:200],
289
+ "model_id": model_id,
290
+ "success_prob": pred.success_prob,
291
+ "source": pred.source,
292
+ "reason": pred.reason,
293
+ }) + "\n")
294
+ except Exception as e:
295
+ logger.debug(f"forge_advisor audit write failed: {e}")
296
+
297
+
298
+ __all__ = [
299
+ "FeaturePrediction",
300
+ "predict_feature_difficulty",
301
+ "predict_failure_repeats",
302
+ "reset_advisor_cache",
303
+ ]