caudate-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/__init__.py +5 -0
- api/anthropic_compat.py +1518 -0
- api/artifact_viewer.py +366 -0
- api/caudate_middleware.py +618 -0
- api/forge_bootstrapper_routes.py +377 -0
- api/forge_routes.py +630 -0
- api/forge_system_routes.py +294 -0
- api/openai_compat.py +1993 -0
- api/server.py +667 -0
- api/storyboard_page.py +677 -0
- caudate_cli-0.1.0.dist-info/METADATA +354 -0
- caudate_cli-0.1.0.dist-info/RECORD +153 -0
- caudate_cli-0.1.0.dist-info/WHEEL +5 -0
- caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
- caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
- cognos_mcp/__init__.py +4 -0
- cognos_mcp/bridge.py +41 -0
- cognos_mcp/client.py +70 -0
- cognos_mcp/config.py +49 -0
- cognos_mcp/server.py +66 -0
- config.py +82 -0
- core/__init__.py +0 -0
- core/agent.py +468 -0
- core/agentic_loop.py +731 -0
- core/anthropic_auth.py +91 -0
- core/background.py +113 -0
- core/banner.py +134 -0
- core/bootstrap.py +292 -0
- core/citations.py +131 -0
- core/compaction.py +109 -0
- core/constitution.py +198 -0
- core/diff_viewer.py +87 -0
- core/export.py +85 -0
- core/file_refs.py +119 -0
- core/files.py +199 -0
- core/hooks.py +209 -0
- core/image.py +599 -0
- core/input.py +91 -0
- core/loop.py +238 -0
- core/memory_md.py +147 -0
- core/notifications.py +99 -0
- core/ownership.py +181 -0
- core/paste.py +81 -0
- core/permissions.py +210 -0
- core/plan_mode.py +215 -0
- core/sandbox_prompt.py +185 -0
- core/scheduler.py +195 -0
- core/schemas.py +202 -0
- core/session.py +90 -0
- core/settings.py +132 -0
- core/skills.py +398 -0
- core/slash_commands.py +977 -0
- core/statusline.py +61 -0
- core/subagent.py +300 -0
- core/thinking.py +50 -0
- core/updater.py +122 -0
- core/usage.py +109 -0
- core/worktree.py +93 -0
- execution/__init__.py +0 -0
- execution/executor.py +329 -0
- execution/plugins.py +108 -0
- execution/tools/__init__.py +0 -0
- execution/tools/agent_tool.py +107 -0
- execution/tools/agentic_tool.py +297 -0
- execution/tools/artifact_tool.py +191 -0
- execution/tools/ask_user_question_tool.py +137 -0
- execution/tools/base.py +81 -0
- execution/tools/calculator_tool.py +137 -0
- execution/tools/cognos_card_tool.py +124 -0
- execution/tools/cron_tool.py +215 -0
- execution/tools/datetime_tool.py +215 -0
- execution/tools/describe_image_tool.py +161 -0
- execution/tools/draw_tool.py +164 -0
- execution/tools/edit_image_tool.py +262 -0
- execution/tools/edit_tool.py +245 -0
- execution/tools/file_tool.py +90 -0
- execution/tools/find_anywhere_tool.py +255 -0
- execution/tools/forge_feature_tools.py +377 -0
- execution/tools/glob_tool.py +59 -0
- execution/tools/grep_tool.py +89 -0
- execution/tools/http_request_tool.py +224 -0
- execution/tools/load_skill_tool.py +104 -0
- execution/tools/longcat_avatar_tool.py +384 -0
- execution/tools/mcp_tool.py +100 -0
- execution/tools/notebook_tool.py +279 -0
- execution/tools/openapi_tool.py +440 -0
- execution/tools/plan_mode_tool.py +95 -0
- execution/tools/push_notification_tool.py +157 -0
- execution/tools/python_tool.py +61 -0
- execution/tools/respond_tool.py +40 -0
- execution/tools/sandbox_tool.py +378 -0
- execution/tools/search_tool.py +153 -0
- execution/tools/semantic_search_tool.py +106 -0
- execution/tools/shell_tool.py +283 -0
- execution/tools/speak_tool.py +134 -0
- execution/tools/storyboard_tool.py +727 -0
- execution/tools/system_info_tool.py +212 -0
- execution/tools/task_tool.py +323 -0
- execution/tools/think_tool.py +49 -0
- execution/tools/transcribe_audio_tool.py +86 -0
- execution/tools/update_memory_tool.py +92 -0
- execution/tools/web_fetch_tool.py +82 -0
- execution/tools/worktree_tool.py +174 -0
- llm/__init__.py +0 -0
- llm/fallback.py +116 -0
- llm/models.py +320 -0
- llm/provider.py +1356 -0
- llm/router.py +373 -0
- main.py +1889 -0
- memory/__init__.py +0 -0
- memory/episodic.py +99 -0
- memory/procedural.py +145 -0
- memory/semantic.py +71 -0
- memory/working.py +64 -0
- nn/__init__.py +43 -0
- nn/auto_evolve.py +245 -0
- nn/caudate.py +136 -0
- nn/config.py +141 -0
- nn/consolidator.py +81 -0
- nn/data.py +1635 -0
- nn/encoder.py +258 -0
- nn/forge_advisor.py +303 -0
- nn/format.py +235 -0
- nn/heads.py +432 -0
- nn/observer.py +994 -0
- nn/policy.py +214 -0
- nn/runtime.py +343 -0
- nn/scorer.py +175 -0
- nn/trainer.py +515 -0
- nn/vision.py +352 -0
- personality/__init__.py +23 -0
- personality/engine.py +129 -0
- personality/identity.py +144 -0
- personality/inner_voice.py +100 -0
- personality/mood.py +205 -0
- planning/__init__.py +0 -0
- planning/dev_server.py +221 -0
- planning/forge_models.py +718 -0
- planning/orchestrator.py +1363 -0
- planning/planner.py +451 -0
- planning/task_graph.py +61 -0
- reflection/__init__.py +0 -0
- reflection/meta_learner.py +156 -0
- reflection/reflector.py +127 -0
- ui/__init__.py +5 -0
- ui/display.py +88 -0
- voice/__init__.py +0 -0
- voice/conversation.py +125 -0
- voice/listener.py +111 -0
- voice/speaker.py +59 -0
- voice/stt.py +126 -0
- voice/tts.py +214 -0
nn/encoder.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""State encoder — turns Cognos's per-turn state into a tensor.
|
|
2
|
+
|
|
3
|
+
Three channels are fused into a sequence the controller can attend over:
|
|
4
|
+
|
|
5
|
+
1. Text channel — recent N messages embedded with sentence-transformers.
|
|
6
|
+
Frozen during training (kept lightweight) — we learn on top of it
|
|
7
|
+
rather than fine-tuning the embedder.
|
|
8
|
+
|
|
9
|
+
2. Tool history channel — the last K tool calls as integer ids,
|
|
10
|
+
embedded with a learned `nn.Embedding`.
|
|
11
|
+
|
|
12
|
+
3. Mood channel — 4 continuous floats projected into d_model space.
|
|
13
|
+
|
|
14
|
+
The output of the encoder is a (B, L, d_model) tensor that flows into the
|
|
15
|
+
transformer controller. L = msg_window + history_window + 1 (mood token).
|
|
16
|
+
A single learned [CLS] token is prepended so heads can read off a fixed
|
|
17
|
+
position regardless of how the channels are weighted at attention time.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
import os
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
# Defensive — block transformers' TF import path before sentence-transformers loads.
|
|
27
|
+
os.environ.setdefault("USE_TF", "0")
|
|
28
|
+
|
|
29
|
+
import torch
|
|
30
|
+
import torch.nn as nn
|
|
31
|
+
|
|
32
|
+
from nn.config import NNConfig
|
|
33
|
+
from nn.vision import VisionEncoder
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class _SentenceEmbedder:
|
|
39
|
+
"""Lazy wrapper around sentence-transformers. Falls back to a deterministic
|
|
40
|
+
hash-based fake embedder if the lib isn't installed — so the rest of the
|
|
41
|
+
pipeline (data loading, training loop wiring) still exercises end-to-end
|
|
42
|
+
in an offline test environment."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, model_name: str, embed_dim: int):
|
|
45
|
+
self.model_name = model_name
|
|
46
|
+
self.embed_dim = embed_dim
|
|
47
|
+
self._model = None
|
|
48
|
+
self._fake = False
|
|
49
|
+
|
|
50
|
+
def _load(self) -> None:
|
|
51
|
+
if self._model is not None or self._fake:
|
|
52
|
+
return
|
|
53
|
+
try:
|
|
54
|
+
from sentence_transformers import SentenceTransformer
|
|
55
|
+
# Place the embedder on CUDA when available. Without this the
|
|
56
|
+
# MiniLM forward runs on CPU and the numpy round-trip dominates
|
|
57
|
+
# step time (was ~3 s/step with a 5.5M trunk on a 3090).
|
|
58
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
59
|
+
self._model = SentenceTransformer(self.model_name, device=device)
|
|
60
|
+
self._device = device
|
|
61
|
+
real_dim = self._model.get_sentence_embedding_dimension()
|
|
62
|
+
if real_dim != self.embed_dim:
|
|
63
|
+
logger.warning(
|
|
64
|
+
f"Embedder dim {real_dim} != configured {self.embed_dim} — "
|
|
65
|
+
"you'll need to retrain if you change models"
|
|
66
|
+
)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.warning(
|
|
69
|
+
f"sentence-transformers unavailable ({e}); "
|
|
70
|
+
"falling back to deterministic hash embedder"
|
|
71
|
+
)
|
|
72
|
+
self._fake = True
|
|
73
|
+
|
|
74
|
+
def encode(self, texts: list[str]) -> torch.Tensor:
|
|
75
|
+
self._load()
|
|
76
|
+
if self._fake or self._model is None:
|
|
77
|
+
return _hash_embed(texts, self.embed_dim)
|
|
78
|
+
with torch.no_grad():
|
|
79
|
+
# Keep the output as a tensor on the embedder's device — no
|
|
80
|
+
# CPU/numpy round-trip. Caller (.to(self.device) in encoder.py)
|
|
81
|
+
# handles any final placement if devices differ.
|
|
82
|
+
t = self._model.encode(
|
|
83
|
+
texts,
|
|
84
|
+
convert_to_tensor=True,
|
|
85
|
+
normalize_embeddings=True,
|
|
86
|
+
)
|
|
87
|
+
# sentence-transformers runs encode() under torch.inference_mode().
|
|
88
|
+
# The returned tensor is an "inference tensor" that autograd refuses
|
|
89
|
+
# to use downstream (the old numpy round-trip detached this flag
|
|
90
|
+
# accidentally). Clone strips it.
|
|
91
|
+
return t.clone().float()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _hash_embed(texts: list[str], dim: int) -> torch.Tensor:
|
|
95
|
+
"""Deterministic per-character hash → fixed-dim vector. NOT semantically
|
|
96
|
+
meaningful; only useful as a placeholder so the pipeline runs offline."""
|
|
97
|
+
out = torch.zeros((len(texts), dim), dtype=torch.float32)
|
|
98
|
+
for i, t in enumerate(texts):
|
|
99
|
+
for j, ch in enumerate((t or "")[: dim * 2]):
|
|
100
|
+
out[i, (j * 16807 + ord(ch)) % dim] += (ord(ch) % 17) / 17.0
|
|
101
|
+
norm = out.norm(dim=-1, keepdim=True).clamp(min=1e-8)
|
|
102
|
+
return out / norm
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class StateEncoder(nn.Module):
|
|
106
|
+
"""Encode (messages, tool_history, mood, images) → (B, L, d_model)."""
|
|
107
|
+
|
|
108
|
+
def __init__(self, cfg: NNConfig):
|
|
109
|
+
super().__init__()
|
|
110
|
+
self.cfg = cfg
|
|
111
|
+
|
|
112
|
+
# Sentence-transformer is frozen — we project its output instead.
|
|
113
|
+
self.text_embedder = _SentenceEmbedder(cfg.text_encoder_name, cfg.text_embed_dim)
|
|
114
|
+
self.text_proj = nn.Linear(cfg.text_embed_dim, cfg.d_model)
|
|
115
|
+
|
|
116
|
+
# Learned tool-id embedding for the history channel.
|
|
117
|
+
self.tool_embed = nn.Embedding(
|
|
118
|
+
cfg.tool_vocab_size, cfg.tool_embed_dim,
|
|
119
|
+
padding_idx=cfg.tool_pad_token,
|
|
120
|
+
)
|
|
121
|
+
self.tool_proj = nn.Linear(cfg.tool_embed_dim, cfg.d_model)
|
|
122
|
+
|
|
123
|
+
# Mood is 4 floats — project up to d_model.
|
|
124
|
+
self.mood_proj = nn.Sequential(
|
|
125
|
+
nn.Linear(cfg.mood_dim, cfg.d_model),
|
|
126
|
+
nn.GELU(),
|
|
127
|
+
nn.Linear(cfg.d_model, cfg.d_model),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Vision channel — frozen image embedder, projected to d_model.
|
|
131
|
+
# Backend is chosen at config time (CLIP for light, InternVL2
|
|
132
|
+
# for rich semantic features). Caudate learns on top of these.
|
|
133
|
+
if cfg.use_vision:
|
|
134
|
+
from nn.vision import make_vision_encoder
|
|
135
|
+
self.vision_encoder = make_vision_encoder(
|
|
136
|
+
backend=cfg.vision_backend,
|
|
137
|
+
model_name=cfg.vision_encoder_name,
|
|
138
|
+
dtype=getattr(cfg, "vision_dtype", "bfloat16"),
|
|
139
|
+
)
|
|
140
|
+
self.vision_proj = nn.Sequential(
|
|
141
|
+
nn.Linear(cfg.vision_embed_dim, cfg.d_model),
|
|
142
|
+
nn.GELU(),
|
|
143
|
+
nn.Linear(cfg.d_model, cfg.d_model),
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
self.vision_encoder = None
|
|
147
|
+
self.vision_proj = None
|
|
148
|
+
|
|
149
|
+
# Channel-type embedding so the transformer knows which kind of
|
|
150
|
+
# token it's looking at (cls=0, text=1, tool=2, mood=3, image=4).
|
|
151
|
+
n_channel_types = 5 if cfg.use_vision else 4
|
|
152
|
+
self.type_embed = nn.Embedding(n_channel_types, cfg.d_model)
|
|
153
|
+
self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.d_model))
|
|
154
|
+
nn.init.normal_(self.cls_token, std=0.02)
|
|
155
|
+
|
|
156
|
+
# Position embedding across the fused sequence.
|
|
157
|
+
max_len = (
|
|
158
|
+
1 + cfg.msg_window + cfg.history_window + 1
|
|
159
|
+
+ (cfg.image_window if cfg.use_vision else 0)
|
|
160
|
+
)
|
|
161
|
+
self.pos_embed = nn.Embedding(max_len, cfg.d_model)
|
|
162
|
+
self.dropout = nn.Dropout(cfg.dropout)
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def device(self) -> torch.device:
|
|
166
|
+
return next(self.parameters()).device
|
|
167
|
+
|
|
168
|
+
def encode_messages(self, messages: list[list[str]]) -> torch.Tensor:
|
|
169
|
+
"""Embed a batch of message lists. messages[b] is a list of strings.
|
|
170
|
+
|
|
171
|
+
Pads / truncates to msg_window on the right.
|
|
172
|
+
"""
|
|
173
|
+
B = len(messages)
|
|
174
|
+
W = self.cfg.msg_window
|
|
175
|
+
padded: list[list[str]] = []
|
|
176
|
+
for batch in messages:
|
|
177
|
+
batch = list(batch)[-W:]
|
|
178
|
+
if len(batch) < W:
|
|
179
|
+
batch = [""] * (W - len(batch)) + batch
|
|
180
|
+
padded.append(batch)
|
|
181
|
+
|
|
182
|
+
flat = [m for batch in padded for m in batch]
|
|
183
|
+
emb = self.text_embedder.encode(flat).to(self.device) # (B*W, text_embed_dim)
|
|
184
|
+
emb = emb.view(B, W, self.cfg.text_embed_dim)
|
|
185
|
+
return emb
|
|
186
|
+
|
|
187
|
+
def encode_images(
|
|
188
|
+
self, image_paths: list[list[str]],
|
|
189
|
+
) -> torch.Tensor:
|
|
190
|
+
"""Embed a batch of per-sample image-path lists.
|
|
191
|
+
|
|
192
|
+
image_paths[b] is a list of paths to images attached at this
|
|
193
|
+
turn. Pads / truncates to image_window on the right. Returns
|
|
194
|
+
(B, image_window, vision_embed_dim) float.
|
|
195
|
+
"""
|
|
196
|
+
if not self.cfg.use_vision or self.vision_encoder is None:
|
|
197
|
+
return torch.zeros(0)
|
|
198
|
+
B = len(image_paths)
|
|
199
|
+
W = self.cfg.image_window
|
|
200
|
+
flat: list[str] = []
|
|
201
|
+
for paths in image_paths:
|
|
202
|
+
paths = list(paths)[-W:]
|
|
203
|
+
if len(paths) < W:
|
|
204
|
+
paths = [""] * (W - len(paths)) + paths
|
|
205
|
+
flat.extend(paths)
|
|
206
|
+
emb = self.vision_encoder.encode_paths(flat).to(self.device)
|
|
207
|
+
return emb.view(B, W, self.cfg.vision_embed_dim)
|
|
208
|
+
|
|
209
|
+
def forward(
|
|
210
|
+
self,
|
|
211
|
+
messages: list[list[str]],
|
|
212
|
+
tool_ids: torch.Tensor, # (B, history_window) long
|
|
213
|
+
mood: torch.Tensor, # (B, mood_dim) float
|
|
214
|
+
image_paths: list[list[str]] | None = None, # (B,) list of paths
|
|
215
|
+
) -> torch.Tensor:
|
|
216
|
+
B = len(messages)
|
|
217
|
+
W_msg = self.cfg.msg_window
|
|
218
|
+
W_tool = self.cfg.history_window
|
|
219
|
+
W_img = self.cfg.image_window if self.cfg.use_vision else 0
|
|
220
|
+
|
|
221
|
+
# 1. text channel
|
|
222
|
+
text_emb = self.encode_messages(messages)
|
|
223
|
+
text_proj = self.text_proj(text_emb) # (B, W_msg, d)
|
|
224
|
+
|
|
225
|
+
# 2. tool channel
|
|
226
|
+
tool_emb = self.tool_embed(tool_ids.to(self.device)) # (B, W_tool, te)
|
|
227
|
+
tool_proj = self.tool_proj(tool_emb) # (B, W_tool, d)
|
|
228
|
+
|
|
229
|
+
# 3. mood channel — single token
|
|
230
|
+
mood_proj = self.mood_proj(mood.to(self.device)).unsqueeze(1) # (B, 1, d)
|
|
231
|
+
|
|
232
|
+
# 4. vision channel (optional)
|
|
233
|
+
cls = self.cls_token.expand(B, -1, -1)
|
|
234
|
+
chunks = [cls, text_proj, tool_proj, mood_proj]
|
|
235
|
+
if self.cfg.use_vision:
|
|
236
|
+
paths_in = image_paths if image_paths is not None else [[] for _ in range(B)]
|
|
237
|
+
img_emb = self.encode_images(paths_in) # (B, W_img, ve)
|
|
238
|
+
img_proj = self.vision_proj(img_emb) # (B, W_img, d)
|
|
239
|
+
chunks.append(img_proj)
|
|
240
|
+
x = torch.cat(chunks, dim=1) # (B, L, d)
|
|
241
|
+
|
|
242
|
+
# 5. add type + positional embeddings
|
|
243
|
+
L = x.size(1)
|
|
244
|
+
type_ids = torch.zeros(L, dtype=torch.long, device=self.device)
|
|
245
|
+
# 0=cls, 1=text, 2=tool, 3=mood, 4=image
|
|
246
|
+
type_ids[1:1 + W_msg] = 1
|
|
247
|
+
type_ids[1 + W_msg:1 + W_msg + W_tool] = 2
|
|
248
|
+
type_ids[1 + W_msg + W_tool] = 3 # mood (1 token)
|
|
249
|
+
if self.cfg.use_vision:
|
|
250
|
+
start = 2 + W_msg + W_tool
|
|
251
|
+
type_ids[start:start + W_img] = 4
|
|
252
|
+
type_embedding = self.type_embed(type_ids).unsqueeze(0) # (1, L, d)
|
|
253
|
+
|
|
254
|
+
pos_ids = torch.arange(L, device=self.device)
|
|
255
|
+
pos_embedding = self.pos_embed(pos_ids).unsqueeze(0) # (1, L, d)
|
|
256
|
+
|
|
257
|
+
x = x + type_embedding + pos_embedding
|
|
258
|
+
return self.dropout(x)
|
nn/forge_advisor.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""Forge feature-level advisor — Caudate's two-way loop with the
|
|
2
|
+
orchestrator.
|
|
3
|
+
|
|
4
|
+
ADR 0006 calls for a single predictor `predict_feature_difficulty`
|
|
5
|
+
that drives four orchestrator behaviours:
|
|
6
|
+
B. Model selection per feature (system1 vs system2)
|
|
7
|
+
C. Backlog ordering when priorities tie
|
|
8
|
+
D. Early revision when a retry looks doomed
|
|
9
|
+
|
|
10
|
+
This module exports the prediction surface and falls back to a
|
|
11
|
+
**heuristic baseline** when no trained Caudate head is available yet.
|
|
12
|
+
The baseline is computed from the rolling history in
|
|
13
|
+
``data/nn/feature_outcomes.jsonl``:
|
|
14
|
+
|
|
15
|
+
- per-model success rate (global)
|
|
16
|
+
- per-model average n_turns on success vs failure
|
|
17
|
+
- keyword bias on the feature text (the agent succeeded on these
|
|
18
|
+
kinds of feature before / didn't)
|
|
19
|
+
|
|
20
|
+
A swap to a real trained head changes only the body of
|
|
21
|
+
``_predict_via_caudate``; the public functions and orchestrator
|
|
22
|
+
call-sites stay identical.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
import logging
|
|
29
|
+
import math
|
|
30
|
+
import re
|
|
31
|
+
import time
|
|
32
|
+
from collections import Counter, defaultdict
|
|
33
|
+
from dataclasses import dataclass
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
_OUTCOMES_PATH = Path("data/nn/feature_outcomes.jsonl")
|
|
41
|
+
_AUDIT_PATH = Path("data/nn/forge_predictions.jsonl")
|
|
42
|
+
|
|
43
|
+
# Cache the loaded advisor so we don't reload Caudate's weights on
|
|
44
|
+
# every feature decision. Cleared by reset_advisor_cache() in tests.
|
|
45
|
+
_advisor_cache: object | None = None
|
|
46
|
+
_advisor_cache_attempted: bool = False
|
|
47
|
+
|
|
48
|
+
# Heuristic-baseline knobs. Reasonable defaults; tuned later from
|
|
49
|
+
# real data when Caudate's head is trained.
|
|
50
|
+
_DEFAULT_SUCCESS_PROB = 0.55
|
|
51
|
+
_MAX_FEATURES_FOR_BASELINE = 2000 # bound the rolling window read
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class FeaturePrediction:
|
|
56
|
+
"""Result of ``predict_feature_difficulty``."""
|
|
57
|
+
success_prob: float # [0, 1]
|
|
58
|
+
difficulty: float # 1 - success_prob (convenience)
|
|
59
|
+
source: str # 'caudate' | 'baseline' | 'fallback'
|
|
60
|
+
reason: str # short human-readable explanation
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ─── Public surface ───────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def predict_feature_difficulty(
|
|
67
|
+
feature_text: str,
|
|
68
|
+
model_id: str | None = None,
|
|
69
|
+
) -> FeaturePrediction:
|
|
70
|
+
"""Best-effort P(success | feature, model). Used by the orchestrator
|
|
71
|
+
to decide whether to promote a feature to system2, order the
|
|
72
|
+
backlog by predicted difficulty, or trigger early revision."""
|
|
73
|
+
pred = _predict_via_caudate(feature_text, model_id)
|
|
74
|
+
if pred is not None:
|
|
75
|
+
_audit(feature_text, model_id, pred)
|
|
76
|
+
return pred
|
|
77
|
+
pred = _predict_via_baseline(feature_text, model_id)
|
|
78
|
+
_audit(feature_text, model_id, pred)
|
|
79
|
+
return pred
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def predict_failure_repeats(
|
|
83
|
+
feature_text: str,
|
|
84
|
+
n_prior_failures: int,
|
|
85
|
+
model_id: str | None = None,
|
|
86
|
+
) -> float:
|
|
87
|
+
"""P(next retry also fails | this feature has already failed
|
|
88
|
+
``n_prior_failures`` times). Used by ADR-0006 Phase D to decide
|
|
89
|
+
whether to skip straight to revision rather than burn more
|
|
90
|
+
sessions."""
|
|
91
|
+
base = predict_feature_difficulty(feature_text, model_id)
|
|
92
|
+
# Conditional on already failing N times, P(next fail) rises
|
|
93
|
+
# quickly. A simple Bayesian-flavoured update — each prior failure
|
|
94
|
+
# halves the remaining success probability.
|
|
95
|
+
success_left = base.success_prob * (0.5 ** max(0, n_prior_failures))
|
|
96
|
+
return float(min(0.99, 1.0 - success_left))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ─── Caudate path (real head — TODO: wire when trained) ──────────────
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def reset_advisor_cache() -> None:
|
|
103
|
+
"""Drop the cached CaudateAdvisor — used by tests that swap weights."""
|
|
104
|
+
global _advisor_cache, _advisor_cache_attempted
|
|
105
|
+
_advisor_cache = None
|
|
106
|
+
_advisor_cache_attempted = False
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _get_advisor():
|
|
110
|
+
"""Load the CaudateAdvisor once and cache it. None means we tried
|
|
111
|
+
and the checkpoint isn't available — don't re-try every call."""
|
|
112
|
+
global _advisor_cache, _advisor_cache_attempted
|
|
113
|
+
if _advisor_cache_attempted:
|
|
114
|
+
return _advisor_cache
|
|
115
|
+
_advisor_cache_attempted = True
|
|
116
|
+
try:
|
|
117
|
+
from nn.runtime import load_advisor # type: ignore
|
|
118
|
+
except Exception as e:
|
|
119
|
+
logger.debug(f"nn.runtime unavailable: {e}")
|
|
120
|
+
return None
|
|
121
|
+
try:
|
|
122
|
+
_advisor_cache = load_advisor()
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.debug(f"load_advisor failed: {e}")
|
|
125
|
+
_advisor_cache = None
|
|
126
|
+
return _advisor_cache
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _predict_via_caudate(
|
|
130
|
+
feature_text: str, model_id: str | None,
|
|
131
|
+
) -> FeaturePrediction | None:
|
|
132
|
+
"""Trained Caudate path. Returns None when:
|
|
133
|
+
- the checkpoint is missing,
|
|
134
|
+
- the feature_success head exists but is at its init weights
|
|
135
|
+
(predict_feature_success itself returns None in that case),
|
|
136
|
+
- inference raises.
|
|
137
|
+
|
|
138
|
+
The advisor instance is cached so we don't reload weights every
|
|
139
|
+
feature decision."""
|
|
140
|
+
advisor = _get_advisor()
|
|
141
|
+
if advisor is None:
|
|
142
|
+
return None
|
|
143
|
+
head_fn = getattr(advisor, "predict_feature_success", None)
|
|
144
|
+
if head_fn is None:
|
|
145
|
+
return None
|
|
146
|
+
try:
|
|
147
|
+
result = head_fn(feature_text=feature_text, model_id=model_id)
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.debug(f"caudate feature head call raised: {e}")
|
|
150
|
+
return None
|
|
151
|
+
if not isinstance(result, dict) or "success_prob" not in result:
|
|
152
|
+
return None
|
|
153
|
+
p = float(max(0.0, min(1.0, result["success_prob"])))
|
|
154
|
+
return FeaturePrediction(
|
|
155
|
+
success_prob=p, difficulty=1 - p,
|
|
156
|
+
source="caudate",
|
|
157
|
+
reason=str(result.get("reason", "trained head")),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ─── Heuristic baseline ───────────────────────────────────────────────
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _predict_via_baseline(
|
|
165
|
+
feature_text: str, model_id: str | None,
|
|
166
|
+
) -> FeaturePrediction:
|
|
167
|
+
"""Fall back to a corpus-driven heuristic. Reads feature_outcomes
|
|
168
|
+
JSONL, computes per-model success rate + keyword-weighted prior
|
|
169
|
+
on the feature text. Defaults to ``_DEFAULT_SUCCESS_PROB`` when no
|
|
170
|
+
history exists."""
|
|
171
|
+
history = _load_recent_outcomes()
|
|
172
|
+
if not history:
|
|
173
|
+
return FeaturePrediction(
|
|
174
|
+
success_prob=_DEFAULT_SUCCESS_PROB,
|
|
175
|
+
difficulty=1 - _DEFAULT_SUCCESS_PROB,
|
|
176
|
+
source="fallback",
|
|
177
|
+
reason="no feature-outcome history yet",
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Per-model success rate
|
|
181
|
+
by_model: dict[str, list[bool]] = defaultdict(list)
|
|
182
|
+
for h in history:
|
|
183
|
+
by_model[h.get("model_used", "unknown")].append(bool(h.get("success")))
|
|
184
|
+
model_rates = {
|
|
185
|
+
m: (sum(v) / len(v)) for m, v in by_model.items() if v
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
# Pick the rate that matches the requested model. Fall back to
|
|
189
|
+
# the global success rate if the model has no history.
|
|
190
|
+
rate = None
|
|
191
|
+
reason_parts: list[str] = []
|
|
192
|
+
if model_id and model_id in model_rates and len(by_model[model_id]) >= 3:
|
|
193
|
+
rate = model_rates[model_id]
|
|
194
|
+
reason_parts.append(
|
|
195
|
+
f"model={model_id} n={len(by_model[model_id])} rate={rate:.2f}"
|
|
196
|
+
)
|
|
197
|
+
if rate is None:
|
|
198
|
+
flat = [h for v in by_model.values() for h in v]
|
|
199
|
+
rate = sum(flat) / max(1, len(flat))
|
|
200
|
+
reason_parts.append(f"global n={len(flat)} rate={rate:.2f}")
|
|
201
|
+
|
|
202
|
+
# Keyword bias on the feature text. Tokens that appear in
|
|
203
|
+
# successful features more often than failed → bonus; reverse → penalty.
|
|
204
|
+
bias = _keyword_bias(feature_text, history)
|
|
205
|
+
if bias != 0.0:
|
|
206
|
+
reason_parts.append(f"kw_bias={bias:+.3f}")
|
|
207
|
+
|
|
208
|
+
p = max(0.05, min(0.95, rate + bias))
|
|
209
|
+
return FeaturePrediction(
|
|
210
|
+
success_prob=p,
|
|
211
|
+
difficulty=1 - p,
|
|
212
|
+
source="baseline",
|
|
213
|
+
reason=" · ".join(reason_parts),
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _load_recent_outcomes() -> list[dict[str, Any]]:
|
|
218
|
+
"""Read the most recent feature_outcomes rows. Skips garbage lines."""
|
|
219
|
+
if not _OUTCOMES_PATH.exists():
|
|
220
|
+
return []
|
|
221
|
+
try:
|
|
222
|
+
with _OUTCOMES_PATH.open() as f:
|
|
223
|
+
lines = f.readlines()
|
|
224
|
+
except OSError:
|
|
225
|
+
return []
|
|
226
|
+
rows: list[dict[str, Any]] = []
|
|
227
|
+
for line in lines[-_MAX_FEATURES_FOR_BASELINE:]:
|
|
228
|
+
try:
|
|
229
|
+
rows.append(json.loads(line))
|
|
230
|
+
except Exception:
|
|
231
|
+
continue
|
|
232
|
+
return rows
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
_WORD_RE = re.compile(r"[a-zA-Z][a-zA-Z0-9]{3,}")
|
|
236
|
+
_STOPWORDS = frozenset({
|
|
237
|
+
"feature", "implement", "create", "build", "with", "from", "that",
|
|
238
|
+
"this", "they", "their", "have", "will", "should", "would", "could",
|
|
239
|
+
"must", "needs", "need", "want", "wants", "make", "made", "makes",
|
|
240
|
+
"into", "onto", "over", "under", "between", "where", "when", "what",
|
|
241
|
+
"which", "while", "during", "before", "after", "above", "below",
|
|
242
|
+
"page", "pages", "user", "users", "data", "code", "test", "tests",
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _keyword_bias(feature_text: str, history: list[dict]) -> float:
|
|
247
|
+
"""Compute a small additive bias [-0.15, +0.15] from the feature
|
|
248
|
+
text's distinctive tokens. Tokens that appear disproportionately
|
|
249
|
+
in past successes shift the prediction up; failures shift it
|
|
250
|
+
down. Bounded to keep heuristic noise from dominating."""
|
|
251
|
+
text = (feature_text or "").lower()
|
|
252
|
+
tokens = {t for t in _WORD_RE.findall(text) if t not in _STOPWORDS}
|
|
253
|
+
if not tokens:
|
|
254
|
+
return 0.0
|
|
255
|
+
|
|
256
|
+
# Build per-token success/fail counts on history
|
|
257
|
+
success_counts: Counter = Counter()
|
|
258
|
+
failure_counts: Counter = Counter()
|
|
259
|
+
for h in history:
|
|
260
|
+
h_text = (h.get("feature_text") or "").lower()
|
|
261
|
+
h_tokens = {
|
|
262
|
+
t for t in _WORD_RE.findall(h_text) if t not in _STOPWORDS
|
|
263
|
+
}
|
|
264
|
+
target = success_counts if h.get("success") else failure_counts
|
|
265
|
+
for tok in h_tokens:
|
|
266
|
+
target[tok] += 1
|
|
267
|
+
|
|
268
|
+
score = 0.0
|
|
269
|
+
for tok in tokens:
|
|
270
|
+
s, f = success_counts.get(tok, 0), failure_counts.get(tok, 0)
|
|
271
|
+
if s + f < 3:
|
|
272
|
+
continue
|
|
273
|
+
score += (s - f) / (s + f)
|
|
274
|
+
# Squash to [-0.15, +0.15] via tanh
|
|
275
|
+
return 0.15 * math.tanh(score / max(1, len(tokens)))
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _audit(
|
|
279
|
+
feature_text: str, model_id: str | None, pred: FeaturePrediction,
|
|
280
|
+
) -> None:
|
|
281
|
+
"""Append a row to data/nn/forge_predictions.jsonl so we can later
|
|
282
|
+
correlate predictions with actual outcomes."""
|
|
283
|
+
try:
|
|
284
|
+
_AUDIT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
285
|
+
with _AUDIT_PATH.open("a", encoding="utf-8") as f:
|
|
286
|
+
f.write(json.dumps({
|
|
287
|
+
"ts": time.time(),
|
|
288
|
+
"feature_text": feature_text[:200],
|
|
289
|
+
"model_id": model_id,
|
|
290
|
+
"success_prob": pred.success_prob,
|
|
291
|
+
"source": pred.source,
|
|
292
|
+
"reason": pred.reason,
|
|
293
|
+
}) + "\n")
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logger.debug(f"forge_advisor audit write failed: {e}")
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
__all__ = [
|
|
299
|
+
"FeaturePrediction",
|
|
300
|
+
"predict_feature_difficulty",
|
|
301
|
+
"predict_failure_repeats",
|
|
302
|
+
"reset_advisor_cache",
|
|
303
|
+
]
|