caudate-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/__init__.py +5 -0
- api/anthropic_compat.py +1518 -0
- api/artifact_viewer.py +366 -0
- api/caudate_middleware.py +618 -0
- api/forge_bootstrapper_routes.py +377 -0
- api/forge_routes.py +630 -0
- api/forge_system_routes.py +294 -0
- api/openai_compat.py +1993 -0
- api/server.py +667 -0
- api/storyboard_page.py +677 -0
- caudate_cli-0.1.0.dist-info/METADATA +354 -0
- caudate_cli-0.1.0.dist-info/RECORD +153 -0
- caudate_cli-0.1.0.dist-info/WHEEL +5 -0
- caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
- caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
- cognos_mcp/__init__.py +4 -0
- cognos_mcp/bridge.py +41 -0
- cognos_mcp/client.py +70 -0
- cognos_mcp/config.py +49 -0
- cognos_mcp/server.py +66 -0
- config.py +82 -0
- core/__init__.py +0 -0
- core/agent.py +468 -0
- core/agentic_loop.py +731 -0
- core/anthropic_auth.py +91 -0
- core/background.py +113 -0
- core/banner.py +134 -0
- core/bootstrap.py +292 -0
- core/citations.py +131 -0
- core/compaction.py +109 -0
- core/constitution.py +198 -0
- core/diff_viewer.py +87 -0
- core/export.py +85 -0
- core/file_refs.py +119 -0
- core/files.py +199 -0
- core/hooks.py +209 -0
- core/image.py +599 -0
- core/input.py +91 -0
- core/loop.py +238 -0
- core/memory_md.py +147 -0
- core/notifications.py +99 -0
- core/ownership.py +181 -0
- core/paste.py +81 -0
- core/permissions.py +210 -0
- core/plan_mode.py +215 -0
- core/sandbox_prompt.py +185 -0
- core/scheduler.py +195 -0
- core/schemas.py +202 -0
- core/session.py +90 -0
- core/settings.py +132 -0
- core/skills.py +398 -0
- core/slash_commands.py +977 -0
- core/statusline.py +61 -0
- core/subagent.py +300 -0
- core/thinking.py +50 -0
- core/updater.py +122 -0
- core/usage.py +109 -0
- core/worktree.py +93 -0
- execution/__init__.py +0 -0
- execution/executor.py +329 -0
- execution/plugins.py +108 -0
- execution/tools/__init__.py +0 -0
- execution/tools/agent_tool.py +107 -0
- execution/tools/agentic_tool.py +297 -0
- execution/tools/artifact_tool.py +191 -0
- execution/tools/ask_user_question_tool.py +137 -0
- execution/tools/base.py +81 -0
- execution/tools/calculator_tool.py +137 -0
- execution/tools/cognos_card_tool.py +124 -0
- execution/tools/cron_tool.py +215 -0
- execution/tools/datetime_tool.py +215 -0
- execution/tools/describe_image_tool.py +161 -0
- execution/tools/draw_tool.py +164 -0
- execution/tools/edit_image_tool.py +262 -0
- execution/tools/edit_tool.py +245 -0
- execution/tools/file_tool.py +90 -0
- execution/tools/find_anywhere_tool.py +255 -0
- execution/tools/forge_feature_tools.py +377 -0
- execution/tools/glob_tool.py +59 -0
- execution/tools/grep_tool.py +89 -0
- execution/tools/http_request_tool.py +224 -0
- execution/tools/load_skill_tool.py +104 -0
- execution/tools/longcat_avatar_tool.py +384 -0
- execution/tools/mcp_tool.py +100 -0
- execution/tools/notebook_tool.py +279 -0
- execution/tools/openapi_tool.py +440 -0
- execution/tools/plan_mode_tool.py +95 -0
- execution/tools/push_notification_tool.py +157 -0
- execution/tools/python_tool.py +61 -0
- execution/tools/respond_tool.py +40 -0
- execution/tools/sandbox_tool.py +378 -0
- execution/tools/search_tool.py +153 -0
- execution/tools/semantic_search_tool.py +106 -0
- execution/tools/shell_tool.py +283 -0
- execution/tools/speak_tool.py +134 -0
- execution/tools/storyboard_tool.py +727 -0
- execution/tools/system_info_tool.py +212 -0
- execution/tools/task_tool.py +323 -0
- execution/tools/think_tool.py +49 -0
- execution/tools/transcribe_audio_tool.py +86 -0
- execution/tools/update_memory_tool.py +92 -0
- execution/tools/web_fetch_tool.py +82 -0
- execution/tools/worktree_tool.py +174 -0
- llm/__init__.py +0 -0
- llm/fallback.py +116 -0
- llm/models.py +320 -0
- llm/provider.py +1356 -0
- llm/router.py +373 -0
- main.py +1889 -0
- memory/__init__.py +0 -0
- memory/episodic.py +99 -0
- memory/procedural.py +145 -0
- memory/semantic.py +71 -0
- memory/working.py +64 -0
- nn/__init__.py +43 -0
- nn/auto_evolve.py +245 -0
- nn/caudate.py +136 -0
- nn/config.py +141 -0
- nn/consolidator.py +81 -0
- nn/data.py +1635 -0
- nn/encoder.py +258 -0
- nn/forge_advisor.py +303 -0
- nn/format.py +235 -0
- nn/heads.py +432 -0
- nn/observer.py +994 -0
- nn/policy.py +214 -0
- nn/runtime.py +343 -0
- nn/scorer.py +175 -0
- nn/trainer.py +515 -0
- nn/vision.py +352 -0
- personality/__init__.py +23 -0
- personality/engine.py +129 -0
- personality/identity.py +144 -0
- personality/inner_voice.py +100 -0
- personality/mood.py +205 -0
- planning/__init__.py +0 -0
- planning/dev_server.py +221 -0
- planning/forge_models.py +718 -0
- planning/orchestrator.py +1363 -0
- planning/planner.py +451 -0
- planning/task_graph.py +61 -0
- reflection/__init__.py +0 -0
- reflection/meta_learner.py +156 -0
- reflection/reflector.py +127 -0
- ui/__init__.py +5 -0
- ui/display.py +88 -0
- voice/__init__.py +0 -0
- voice/conversation.py +125 -0
- voice/listener.py +111 -0
- voice/speaker.py +59 -0
- voice/stt.py +126 -0
- voice/tts.py +214 -0
nn/caudate.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Caudate — Cognos's neural-network brain.
|
|
2
|
+
|
|
3
|
+
A 4.6M-parameter transformer with text/vision/tool embedders, four
|
|
4
|
+
attention layers, and a registry of output heads. The trunk is shared;
|
|
5
|
+
each head specialises one routing job (which tool, which tier, should-
|
|
6
|
+
think, expected-reward, plus growth slots for memory-write, cache,
|
|
7
|
+
permission). Adding a new routing job means appending one HeadSpec to
|
|
8
|
+
the registry — see `nn/heads.py`. Caudate is *not* a router; routing
|
|
9
|
+
is the family of jobs the brain currently does.
|
|
10
|
+
|
|
11
|
+
Stack:
|
|
12
|
+
StateEncoder ─► TransformerEncoder (4 layers, pre-norm) ─► CLS pool
|
|
13
|
+
│
|
|
14
|
+
+ source_embed bias
|
|
15
|
+
│
|
|
16
|
+
HeadRegistry → {head_name: tensor}
|
|
17
|
+
|
|
18
|
+
PyTorch's stock TransformerEncoder is used so the architecture stays
|
|
19
|
+
inspectable: multi-head self-attention, feed-forward, residual,
|
|
20
|
+
LayerNorm. Xavier init on the heads so the early loss isn't dominated
|
|
21
|
+
by random output scale.
|
|
22
|
+
|
|
23
|
+
Caudate runs as an *advisor* alongside the agent — predictions are
|
|
24
|
+
logged but don't override the LLM's choices until trust is established.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import torch
|
|
30
|
+
import torch.nn as nn
|
|
31
|
+
|
|
32
|
+
from nn.config import NNConfig
|
|
33
|
+
from nn.encoder import StateEncoder
|
|
34
|
+
from nn.heads import ALL_HEADS, ContrastiveToolHead, HeadRegistry, HeadSpec
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Caudate(nn.Module):
|
|
38
|
+
"""End-to-end neural network. Forward: state → registered head outputs."""
|
|
39
|
+
|
|
40
|
+
name: str = "Caudate"
|
|
41
|
+
|
|
42
|
+
def __init__(self, cfg: NNConfig, head_specs: tuple[HeadSpec, ...] | None = None):
|
|
43
|
+
super().__init__()
|
|
44
|
+
self.cfg = cfg
|
|
45
|
+
self.encoder = StateEncoder(cfg)
|
|
46
|
+
|
|
47
|
+
layer = nn.TransformerEncoderLayer(
|
|
48
|
+
d_model=cfg.d_model,
|
|
49
|
+
nhead=cfg.n_heads,
|
|
50
|
+
dim_feedforward=cfg.d_ff,
|
|
51
|
+
dropout=cfg.dropout,
|
|
52
|
+
activation="gelu",
|
|
53
|
+
batch_first=True,
|
|
54
|
+
norm_first=True, # pre-norm — more stable on small data
|
|
55
|
+
)
|
|
56
|
+
self.transformer = nn.TransformerEncoder(layer, num_layers=cfg.n_layers)
|
|
57
|
+
self.norm = nn.LayerNorm(cfg.d_model)
|
|
58
|
+
|
|
59
|
+
# Standard heads (tier/think/value + extended D-heads). The
|
|
60
|
+
# tool head was removed from this registry in Phase 2 of the
|
|
61
|
+
# format migration and replaced with `self.tool_head`, a
|
|
62
|
+
# contrastive open-vocab predictor that takes per-sample
|
|
63
|
+
# candidate tool definitions. The registry pattern still
|
|
64
|
+
# drives every OTHER head (uniform loss path in the trainer).
|
|
65
|
+
if head_specs is None:
|
|
66
|
+
specs = ALL_HEADS
|
|
67
|
+
else:
|
|
68
|
+
specs = head_specs
|
|
69
|
+
self.heads = HeadRegistry(cfg.d_model, cfg.dropout, specs)
|
|
70
|
+
|
|
71
|
+
# Phase-2 open-vocab tool predictor. Shares the state encoder's
|
|
72
|
+
# text embedder so we don't double-load sentence-transformers.
|
|
73
|
+
self.tool_head = ContrastiveToolHead(cfg, self.encoder.text_embedder)
|
|
74
|
+
|
|
75
|
+
# Phase 2 of CAUDATE_EVOLUTION.md: source-conditioned predictions.
|
|
76
|
+
# A learned per-teacher-model bias added to the pooled CLS token
|
|
77
|
+
# before the heads. Initialized to zero so legacy checkpoints
|
|
78
|
+
# loaded with strict=False (no source_embed in state_dict) keep
|
|
79
|
+
# their original behaviour until the next retrain.
|
|
80
|
+
n_sources = max(1, int(getattr(cfg, "source_vocab_size", 16)))
|
|
81
|
+
self.source_embed = nn.Embedding(n_sources, cfg.d_model)
|
|
82
|
+
nn.init.zeros_(self.source_embed.weight)
|
|
83
|
+
|
|
84
|
+
def forward(
|
|
85
|
+
self,
|
|
86
|
+
messages: list[list[str]],
|
|
87
|
+
tool_ids: torch.Tensor,
|
|
88
|
+
mood: torch.Tensor,
|
|
89
|
+
image_paths: list[list[str]] | None = None,
|
|
90
|
+
source_id: torch.Tensor | None = None,
|
|
91
|
+
tool_specs: list[list[str]] | None = None,
|
|
92
|
+
) -> dict[str, torch.Tensor]:
|
|
93
|
+
"""tool_specs: per-sample list of "{name}: {description}"
|
|
94
|
+
strings. When supplied, the contrastive tool head runs and
|
|
95
|
+
emits `tool_logits` + `tool_mask` in the output dict. When
|
|
96
|
+
omitted or empty, those keys are absent (the trainer/advisor
|
|
97
|
+
treats that as "no tool prediction available for this sample").
|
|
98
|
+
"""
|
|
99
|
+
x = self.encoder(messages, tool_ids, mood, image_paths) # (B, L, d)
|
|
100
|
+
h = self.transformer(x) # (B, L, d)
|
|
101
|
+
cls = self.norm(h[:, 0]) # (B, d)
|
|
102
|
+
|
|
103
|
+
# Source-conditioning: add a learned bias keyed on which model
|
|
104
|
+
# produced this turn's response. Default to slot 0 (<unknown>)
|
|
105
|
+
# which is initialized to zero — i.e. unconditioned baseline.
|
|
106
|
+
if source_id is None:
|
|
107
|
+
source_id = torch.zeros(cls.shape[0], dtype=torch.long, device=cls.device)
|
|
108
|
+
else:
|
|
109
|
+
source_id = source_id.to(device=cls.device, dtype=torch.long)
|
|
110
|
+
# Clamp out-of-range ids back to <unknown> rather than crash
|
|
111
|
+
n_sources = self.source_embed.num_embeddings
|
|
112
|
+
source_id = source_id.clamp(min=0, max=n_sources - 1)
|
|
113
|
+
cls = cls + self.source_embed(source_id)
|
|
114
|
+
|
|
115
|
+
head_out = self.heads(cls) # dict of {output_key: tensor}
|
|
116
|
+
head_out["pooled"] = cls # (B, d) — for inspection
|
|
117
|
+
|
|
118
|
+
# Contrastive tool head — runs only when candidates are
|
|
119
|
+
# supplied. Empty/missing → no tool prediction for this batch.
|
|
120
|
+
if tool_specs is not None and any(ts for ts in tool_specs):
|
|
121
|
+
tool_emb, tool_mask = self.tool_head.embed_tools(tool_specs)
|
|
122
|
+
head_out["tool_logits"] = self.tool_head(cls, tool_emb, tool_mask)
|
|
123
|
+
head_out["tool_mask"] = tool_mask
|
|
124
|
+
|
|
125
|
+
return head_out
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def head_specs(self) -> tuple[HeadSpec, ...]:
|
|
129
|
+
return self.heads.specs
|
|
130
|
+
|
|
131
|
+
def num_parameters(self) -> int:
|
|
132
|
+
return sum(p.numel() for p in self.parameters() if p.requires_grad)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# Backward-compat alias — old code that imported CognosController still works.
|
|
136
|
+
CognosController = Caudate
|
nn/config.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Neural-network hyperparameters.
|
|
2
|
+
|
|
3
|
+
Single source of truth for the controller's shape, training schedule,
|
|
4
|
+
and runtime behavior. Persisted alongside checkpoints so loaded weights
|
|
5
|
+
always match the matching architecture.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class NNConfig(BaseModel):
|
|
16
|
+
"""Hyperparameters + paths for the cognitive controller."""
|
|
17
|
+
|
|
18
|
+
# ----- Architecture -----
|
|
19
|
+
text_embed_dim: int = 384 # sentence-transformers MiniLM dim
|
|
20
|
+
# Phase-2: bumped 64 → 4096 to accommodate public tool-use corpora
|
|
21
|
+
# (xLAM has ~10K unique tool names; ToolBench similar). Tool ids
|
|
22
|
+
# that still overflow this cap are clamped at collate time so the
|
|
23
|
+
# embedding lookup never crashes — the cap is a hard ceiling.
|
|
24
|
+
tool_vocab_size: int = 4096 # max distinct tools we'll embed
|
|
25
|
+
tool_embed_dim: int = 64
|
|
26
|
+
# Phase 2 of CAUDATE_EVOLUTION.md: source-conditioned predictions.
|
|
27
|
+
# Each ConversationSample is tagged with the model that produced it;
|
|
28
|
+
# we look that name up in a small SourceVocab and bias the encoder's
|
|
29
|
+
# pooled state with a learned per-source embedding. Slot 0 is
|
|
30
|
+
# reserved for "<unknown>" so legacy untagged samples (and any
|
|
31
|
+
# model id we haven't seen before) get the zero baseline.
|
|
32
|
+
source_vocab_size: int = 16 # cap on distinct teacher-model ids
|
|
33
|
+
mood_dim: int = 4 # confidence, curiosity, frustration, satisfaction
|
|
34
|
+
history_window: int = 16 # last N tool calls fed to encoder
|
|
35
|
+
# Phase-2 format migration: bumped 4 → 12 so multi-turn ToolBench-
|
|
36
|
+
# /xLAM-style conversations fit in the encoder window. Pos-embed
|
|
37
|
+
# shape changes with this, so any pre-Phase-2 checkpoint is
|
|
38
|
+
# incompatible — fresh retrain required.
|
|
39
|
+
msg_window: int = 12 # last N messages fed to encoder
|
|
40
|
+
|
|
41
|
+
# Vision channel — frozen image embedder.
|
|
42
|
+
use_vision: bool = True
|
|
43
|
+
image_window: int = 4 # last N images fed to encoder
|
|
44
|
+
# Backend = "clip" (light, 512-D) or "internvl" (rich, 4096-D).
|
|
45
|
+
# Match `vision_embed_dim` to whichever you pick.
|
|
46
|
+
vision_backend: str = "internvl"
|
|
47
|
+
vision_embed_dim: int = 4096
|
|
48
|
+
vision_encoder_name: str = "OpenGVLab/InternVL2-8B"
|
|
49
|
+
# InternVL only: the dtype to load with. bfloat16 needs ~16GB
|
|
50
|
+
# for InternVL2-8B on bf16-capable GPUs; float16 is similar; if
|
|
51
|
+
# you only have CPU set this to "float32".
|
|
52
|
+
vision_dtype: str = "bfloat16"
|
|
53
|
+
|
|
54
|
+
d_model: int = 256 # transformer hidden size
|
|
55
|
+
n_heads: int = 8
|
|
56
|
+
n_layers: int = 4
|
|
57
|
+
d_ff: int = 1024
|
|
58
|
+
dropout: float = 0.1
|
|
59
|
+
|
|
60
|
+
# ----- Output heads -----
|
|
61
|
+
# Phase-2 tool head is contrastive (open-vocab) — at inference the
|
|
62
|
+
# set of "tools the assistant can call right now" is supplied, and
|
|
63
|
+
# the model scores each candidate against the pooled CLS via a
|
|
64
|
+
# learned projection. n_tools_out is the projection's output
|
|
65
|
+
# dimension (= tool description embedding dim). 256 is roomy
|
|
66
|
+
# enough to cleanly separate the function-calling tool vocab in
|
|
67
|
+
# ToolBench / xLAM (~10K unique names) without ballooning params.
|
|
68
|
+
n_tools_out: int = 256
|
|
69
|
+
# Max tools considered per turn. Datasets typically expose 1-20;
|
|
70
|
+
# we pad to this cap with zero embeddings + a mask so the batch
|
|
71
|
+
# tensor shape is fixed.
|
|
72
|
+
max_tools_per_sample: int = 32
|
|
73
|
+
# value head outputs a single scalar; tier head outputs 2 logits;
|
|
74
|
+
# think head outputs 1 logit (sigmoid).
|
|
75
|
+
|
|
76
|
+
# ----- Training -----
|
|
77
|
+
batch_size: int = 32
|
|
78
|
+
learning_rate: float = 3e-4
|
|
79
|
+
weight_decay: float = 1e-2
|
|
80
|
+
grad_clip: float = 1.0
|
|
81
|
+
max_steps: int = 5000
|
|
82
|
+
eval_every: int = 200
|
|
83
|
+
save_every: int = 500
|
|
84
|
+
eval_split: float = 0.15
|
|
85
|
+
# Cap eval batches during training so the intra-fit eval ticks
|
|
86
|
+
# stay cheap on big public corpora. 100 batches × default 16 =
|
|
87
|
+
# 1600 samples — enough for a stable loss estimate, fast enough
|
|
88
|
+
# to not dominate the training loop. The end-of-fit final eval
|
|
89
|
+
# ignores this cap and runs over the full held-out split.
|
|
90
|
+
eval_max_batches_intra: int = 100
|
|
91
|
+
seed: int = 42
|
|
92
|
+
|
|
93
|
+
# Loss weights — controller has 4 heads, balance them so no one
|
|
94
|
+
# signal dominates training.
|
|
95
|
+
w_tool: float = 1.0
|
|
96
|
+
w_tier: float = 0.5
|
|
97
|
+
w_think: float = 0.2
|
|
98
|
+
w_value: float = 0.5
|
|
99
|
+
|
|
100
|
+
# Class weight on the contrastive tool head's slot-0 (<no_tool>).
|
|
101
|
+
# Cognos-toolbox is ~63% <no_tool>, which biased R3 step-11000 to
|
|
102
|
+
# argmax slot-0 on 100% of real-tool turns (target-in-top-5 was
|
|
103
|
+
# 52.5%, but slot-0 always won). 1.0 = unweighted (original
|
|
104
|
+
# behavior). 0.5 = moderate downweight; 0.3 = aggressive.
|
|
105
|
+
tool_no_tool_class_weight: float = 1.0
|
|
106
|
+
|
|
107
|
+
# ----- Replay buffer -----
|
|
108
|
+
replay_capacity: int = 8192
|
|
109
|
+
min_episodes_to_train: int = 64
|
|
110
|
+
|
|
111
|
+
# ----- Runtime -----
|
|
112
|
+
advisor_min_confidence: float = 0.55 # below this, advisor stays silent
|
|
113
|
+
# ADR-0007 constrained routing — at ADVISOR+, when tool_confidence
|
|
114
|
+
# clears `advisor_min_confidence` and the prediction is a real tool
|
|
115
|
+
# (not <no_tool>), the LLM's tool list is pruned to this many of
|
|
116
|
+
# Caudate's top picks. The predicted tool itself is always included,
|
|
117
|
+
# and `advisor_top_k_floor` is enforced so the LLM never sees fewer
|
|
118
|
+
# than that many options (mitigates "confident wrong hides the right
|
|
119
|
+
# tool" per ADR-0007 Consequences).
|
|
120
|
+
advisor_top_k: int = 5
|
|
121
|
+
advisor_top_k_floor: int = 3
|
|
122
|
+
advisor_log_path: str = "data/nn/predictions.jsonl"
|
|
123
|
+
|
|
124
|
+
# ----- Paths -----
|
|
125
|
+
data_dir: str = "data/nn"
|
|
126
|
+
checkpoint_path: str = "data/nn/caudate.pt"
|
|
127
|
+
metadata_path: str = "data/nn/caudate.meta.json"
|
|
128
|
+
text_encoder_name: str = "sentence-transformers/all-MiniLM-L6-v2"
|
|
129
|
+
|
|
130
|
+
# Special tokens for the tool history channel
|
|
131
|
+
tool_pad_token: int = 0
|
|
132
|
+
tool_unk_token: int = 1
|
|
133
|
+
tool_bos_token: int = 2
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def total_input_dim(self) -> int:
|
|
137
|
+
return self.text_embed_dim + self.tool_embed_dim + self.mood_dim
|
|
138
|
+
|
|
139
|
+
def ensure_dirs(self) -> None:
|
|
140
|
+
Path(self.data_dir).mkdir(parents=True, exist_ok=True)
|
|
141
|
+
Path(self.advisor_log_path).parent.mkdir(parents=True, exist_ok=True)
|
nn/consolidator.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Memory consolidator — periodic offline replay → training pairs.
|
|
2
|
+
|
|
3
|
+
Loosely modeled on hippocampal-cortical consolidation: while Cognos is
|
|
4
|
+
idle (or on demand), it walks recent episodic memories, distills them
|
|
5
|
+
into ConversationSample rows, and pushes them into the replay buffer.
|
|
6
|
+
The trainer can then run additional gradient steps without needing
|
|
7
|
+
fresh data from a live session.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from nn.config import NNConfig
|
|
17
|
+
from nn.data import ConversationSample, ReplayBuffer
|
|
18
|
+
from nn.format import ChatMessage, ToolCall
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def consolidate_from_episodic(
|
|
24
|
+
episodic: Any,
|
|
25
|
+
replay: ReplayBuffer,
|
|
26
|
+
cfg: NNConfig,
|
|
27
|
+
limit: int = 128,
|
|
28
|
+
) -> int:
|
|
29
|
+
"""Pull up to `limit` episodes from episodic memory into the replay buffer.
|
|
30
|
+
|
|
31
|
+
Returns number of samples added. If episodic memory exposes a
|
|
32
|
+
`recall_recent(limit)` method we use that; otherwise we degrade to
|
|
33
|
+
`recall("", limit=...)` which most stores accept.
|
|
34
|
+
"""
|
|
35
|
+
if episodic is None:
|
|
36
|
+
return 0
|
|
37
|
+
try:
|
|
38
|
+
if hasattr(episodic, "recall_recent"):
|
|
39
|
+
episodes = episodic.recall_recent(limit=limit)
|
|
40
|
+
else:
|
|
41
|
+
episodes = episodic.recall("", limit=limit)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
logger.warning(f"consolidator: episodic recall failed: {e}")
|
|
44
|
+
return 0
|
|
45
|
+
|
|
46
|
+
added = 0
|
|
47
|
+
for ep in episodes or []:
|
|
48
|
+
try:
|
|
49
|
+
tool = getattr(ep, "tool_name", None) or "unknown"
|
|
50
|
+
args = getattr(ep, "tool_args", {}) or {}
|
|
51
|
+
result = getattr(ep, "result", None)
|
|
52
|
+
success = (
|
|
53
|
+
getattr(result, "status", None)
|
|
54
|
+
and str(getattr(result, "status").value if hasattr(getattr(result, "status"), "value") else getattr(result, "status")) == "success"
|
|
55
|
+
)
|
|
56
|
+
action_text = (getattr(ep, "action", "") or "")[:400]
|
|
57
|
+
try:
|
|
58
|
+
args_str = json.dumps(args) if args else ""
|
|
59
|
+
except Exception:
|
|
60
|
+
args_str = ""
|
|
61
|
+
sample = ConversationSample(
|
|
62
|
+
# One episodic memory → a 2-turn conversation: the
|
|
63
|
+
# action description as a user turn, followed by the
|
|
64
|
+
# assistant calling the tool that was actually invoked.
|
|
65
|
+
conversation=[
|
|
66
|
+
ChatMessage(role="user", content=action_text),
|
|
67
|
+
ChatMessage(
|
|
68
|
+
role="assistant", content="",
|
|
69
|
+
tool_calls=[ToolCall(name=tool, arguments=args_str)],
|
|
70
|
+
),
|
|
71
|
+
],
|
|
72
|
+
tools=[],
|
|
73
|
+
mood=[0.5, 0.5, 0.5, 0.5],
|
|
74
|
+
target_tool=tool,
|
|
75
|
+
target_value=(0.7 if success else 0.3),
|
|
76
|
+
)
|
|
77
|
+
replay.push(sample)
|
|
78
|
+
added += 1
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.debug(f"consolidator: skipped episode: {e}")
|
|
81
|
+
return added
|