caudate-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. api/__init__.py +5 -0
  2. api/anthropic_compat.py +1518 -0
  3. api/artifact_viewer.py +366 -0
  4. api/caudate_middleware.py +618 -0
  5. api/forge_bootstrapper_routes.py +377 -0
  6. api/forge_routes.py +630 -0
  7. api/forge_system_routes.py +294 -0
  8. api/openai_compat.py +1993 -0
  9. api/server.py +667 -0
  10. api/storyboard_page.py +677 -0
  11. caudate_cli-0.1.0.dist-info/METADATA +354 -0
  12. caudate_cli-0.1.0.dist-info/RECORD +153 -0
  13. caudate_cli-0.1.0.dist-info/WHEEL +5 -0
  14. caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
  15. caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  16. caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
  17. cognos_mcp/__init__.py +4 -0
  18. cognos_mcp/bridge.py +41 -0
  19. cognos_mcp/client.py +70 -0
  20. cognos_mcp/config.py +49 -0
  21. cognos_mcp/server.py +66 -0
  22. config.py +82 -0
  23. core/__init__.py +0 -0
  24. core/agent.py +468 -0
  25. core/agentic_loop.py +731 -0
  26. core/anthropic_auth.py +91 -0
  27. core/background.py +113 -0
  28. core/banner.py +134 -0
  29. core/bootstrap.py +292 -0
  30. core/citations.py +131 -0
  31. core/compaction.py +109 -0
  32. core/constitution.py +198 -0
  33. core/diff_viewer.py +87 -0
  34. core/export.py +85 -0
  35. core/file_refs.py +119 -0
  36. core/files.py +199 -0
  37. core/hooks.py +209 -0
  38. core/image.py +599 -0
  39. core/input.py +91 -0
  40. core/loop.py +238 -0
  41. core/memory_md.py +147 -0
  42. core/notifications.py +99 -0
  43. core/ownership.py +181 -0
  44. core/paste.py +81 -0
  45. core/permissions.py +210 -0
  46. core/plan_mode.py +215 -0
  47. core/sandbox_prompt.py +185 -0
  48. core/scheduler.py +195 -0
  49. core/schemas.py +202 -0
  50. core/session.py +90 -0
  51. core/settings.py +132 -0
  52. core/skills.py +398 -0
  53. core/slash_commands.py +977 -0
  54. core/statusline.py +61 -0
  55. core/subagent.py +300 -0
  56. core/thinking.py +50 -0
  57. core/updater.py +122 -0
  58. core/usage.py +109 -0
  59. core/worktree.py +93 -0
  60. execution/__init__.py +0 -0
  61. execution/executor.py +329 -0
  62. execution/plugins.py +108 -0
  63. execution/tools/__init__.py +0 -0
  64. execution/tools/agent_tool.py +107 -0
  65. execution/tools/agentic_tool.py +297 -0
  66. execution/tools/artifact_tool.py +191 -0
  67. execution/tools/ask_user_question_tool.py +137 -0
  68. execution/tools/base.py +81 -0
  69. execution/tools/calculator_tool.py +137 -0
  70. execution/tools/cognos_card_tool.py +124 -0
  71. execution/tools/cron_tool.py +215 -0
  72. execution/tools/datetime_tool.py +215 -0
  73. execution/tools/describe_image_tool.py +161 -0
  74. execution/tools/draw_tool.py +164 -0
  75. execution/tools/edit_image_tool.py +262 -0
  76. execution/tools/edit_tool.py +245 -0
  77. execution/tools/file_tool.py +90 -0
  78. execution/tools/find_anywhere_tool.py +255 -0
  79. execution/tools/forge_feature_tools.py +377 -0
  80. execution/tools/glob_tool.py +59 -0
  81. execution/tools/grep_tool.py +89 -0
  82. execution/tools/http_request_tool.py +224 -0
  83. execution/tools/load_skill_tool.py +104 -0
  84. execution/tools/longcat_avatar_tool.py +384 -0
  85. execution/tools/mcp_tool.py +100 -0
  86. execution/tools/notebook_tool.py +279 -0
  87. execution/tools/openapi_tool.py +440 -0
  88. execution/tools/plan_mode_tool.py +95 -0
  89. execution/tools/push_notification_tool.py +157 -0
  90. execution/tools/python_tool.py +61 -0
  91. execution/tools/respond_tool.py +40 -0
  92. execution/tools/sandbox_tool.py +378 -0
  93. execution/tools/search_tool.py +153 -0
  94. execution/tools/semantic_search_tool.py +106 -0
  95. execution/tools/shell_tool.py +283 -0
  96. execution/tools/speak_tool.py +134 -0
  97. execution/tools/storyboard_tool.py +727 -0
  98. execution/tools/system_info_tool.py +212 -0
  99. execution/tools/task_tool.py +323 -0
  100. execution/tools/think_tool.py +49 -0
  101. execution/tools/transcribe_audio_tool.py +86 -0
  102. execution/tools/update_memory_tool.py +92 -0
  103. execution/tools/web_fetch_tool.py +82 -0
  104. execution/tools/worktree_tool.py +174 -0
  105. llm/__init__.py +0 -0
  106. llm/fallback.py +116 -0
  107. llm/models.py +320 -0
  108. llm/provider.py +1356 -0
  109. llm/router.py +373 -0
  110. main.py +1889 -0
  111. memory/__init__.py +0 -0
  112. memory/episodic.py +99 -0
  113. memory/procedural.py +145 -0
  114. memory/semantic.py +71 -0
  115. memory/working.py +64 -0
  116. nn/__init__.py +43 -0
  117. nn/auto_evolve.py +245 -0
  118. nn/caudate.py +136 -0
  119. nn/config.py +141 -0
  120. nn/consolidator.py +81 -0
  121. nn/data.py +1635 -0
  122. nn/encoder.py +258 -0
  123. nn/forge_advisor.py +303 -0
  124. nn/format.py +235 -0
  125. nn/heads.py +432 -0
  126. nn/observer.py +994 -0
  127. nn/policy.py +214 -0
  128. nn/runtime.py +343 -0
  129. nn/scorer.py +175 -0
  130. nn/trainer.py +515 -0
  131. nn/vision.py +352 -0
  132. personality/__init__.py +23 -0
  133. personality/engine.py +129 -0
  134. personality/identity.py +144 -0
  135. personality/inner_voice.py +100 -0
  136. personality/mood.py +205 -0
  137. planning/__init__.py +0 -0
  138. planning/dev_server.py +221 -0
  139. planning/forge_models.py +718 -0
  140. planning/orchestrator.py +1363 -0
  141. planning/planner.py +451 -0
  142. planning/task_graph.py +61 -0
  143. reflection/__init__.py +0 -0
  144. reflection/meta_learner.py +156 -0
  145. reflection/reflector.py +127 -0
  146. ui/__init__.py +5 -0
  147. ui/display.py +88 -0
  148. voice/__init__.py +0 -0
  149. voice/conversation.py +125 -0
  150. voice/listener.py +111 -0
  151. voice/speaker.py +59 -0
  152. voice/stt.py +126 -0
  153. voice/tts.py +214 -0
nn/caudate.py ADDED
@@ -0,0 +1,136 @@
1
+ """Caudate — Cognos's neural-network brain.
2
+
3
+ A 4.6M-parameter transformer with text/vision/tool embedders, four
4
+ attention layers, and a registry of output heads. The trunk is shared;
5
+ each head specialises one routing job (which tool, which tier, should-
6
+ think, expected-reward, plus growth slots for memory-write, cache,
7
+ permission). Adding a new routing job means appending one HeadSpec to
8
+ the registry — see `nn/heads.py`. Caudate is *not* a router; routing
9
+ is the family of jobs the brain currently does.
10
+
11
+ Stack:
12
+ StateEncoder ─► TransformerEncoder (4 layers, pre-norm) ─► CLS pool
13
+
14
+ + source_embed bias
15
+
16
+ HeadRegistry → {head_name: tensor}
17
+
18
+ PyTorch's stock TransformerEncoder is used so the architecture stays
19
+ inspectable: multi-head self-attention, feed-forward, residual,
20
+ LayerNorm. Xavier init on the heads so the early loss isn't dominated
21
+ by random output scale.
22
+
23
+ Caudate runs as an *advisor* alongside the agent — predictions are
24
+ logged but don't override the LLM's choices until trust is established.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import torch
30
+ import torch.nn as nn
31
+
32
+ from nn.config import NNConfig
33
+ from nn.encoder import StateEncoder
34
+ from nn.heads import ALL_HEADS, ContrastiveToolHead, HeadRegistry, HeadSpec
35
+
36
+
37
+ class Caudate(nn.Module):
38
+ """End-to-end neural network. Forward: state → registered head outputs."""
39
+
40
+ name: str = "Caudate"
41
+
42
+ def __init__(self, cfg: NNConfig, head_specs: tuple[HeadSpec, ...] | None = None):
43
+ super().__init__()
44
+ self.cfg = cfg
45
+ self.encoder = StateEncoder(cfg)
46
+
47
+ layer = nn.TransformerEncoderLayer(
48
+ d_model=cfg.d_model,
49
+ nhead=cfg.n_heads,
50
+ dim_feedforward=cfg.d_ff,
51
+ dropout=cfg.dropout,
52
+ activation="gelu",
53
+ batch_first=True,
54
+ norm_first=True, # pre-norm — more stable on small data
55
+ )
56
+ self.transformer = nn.TransformerEncoder(layer, num_layers=cfg.n_layers)
57
+ self.norm = nn.LayerNorm(cfg.d_model)
58
+
59
+ # Standard heads (tier/think/value + extended D-heads). The
60
+ # tool head was removed from this registry in Phase 2 of the
61
+ # format migration and replaced with `self.tool_head`, a
62
+ # contrastive open-vocab predictor that takes per-sample
63
+ # candidate tool definitions. The registry pattern still
64
+ # drives every OTHER head (uniform loss path in the trainer).
65
+ if head_specs is None:
66
+ specs = ALL_HEADS
67
+ else:
68
+ specs = head_specs
69
+ self.heads = HeadRegistry(cfg.d_model, cfg.dropout, specs)
70
+
71
+ # Phase-2 open-vocab tool predictor. Shares the state encoder's
72
+ # text embedder so we don't double-load sentence-transformers.
73
+ self.tool_head = ContrastiveToolHead(cfg, self.encoder.text_embedder)
74
+
75
+ # Phase 2 of CAUDATE_EVOLUTION.md: source-conditioned predictions.
76
+ # A learned per-teacher-model bias added to the pooled CLS token
77
+ # before the heads. Initialized to zero so legacy checkpoints
78
+ # loaded with strict=False (no source_embed in state_dict) keep
79
+ # their original behaviour until the next retrain.
80
+ n_sources = max(1, int(getattr(cfg, "source_vocab_size", 16)))
81
+ self.source_embed = nn.Embedding(n_sources, cfg.d_model)
82
+ nn.init.zeros_(self.source_embed.weight)
83
+
84
+ def forward(
85
+ self,
86
+ messages: list[list[str]],
87
+ tool_ids: torch.Tensor,
88
+ mood: torch.Tensor,
89
+ image_paths: list[list[str]] | None = None,
90
+ source_id: torch.Tensor | None = None,
91
+ tool_specs: list[list[str]] | None = None,
92
+ ) -> dict[str, torch.Tensor]:
93
+ """tool_specs: per-sample list of "{name}: {description}"
94
+ strings. When supplied, the contrastive tool head runs and
95
+ emits `tool_logits` + `tool_mask` in the output dict. When
96
+ omitted or empty, those keys are absent (the trainer/advisor
97
+ treats that as "no tool prediction available for this sample").
98
+ """
99
+ x = self.encoder(messages, tool_ids, mood, image_paths) # (B, L, d)
100
+ h = self.transformer(x) # (B, L, d)
101
+ cls = self.norm(h[:, 0]) # (B, d)
102
+
103
+ # Source-conditioning: add a learned bias keyed on which model
104
+ # produced this turn's response. Default to slot 0 (<unknown>)
105
+ # which is initialized to zero — i.e. unconditioned baseline.
106
+ if source_id is None:
107
+ source_id = torch.zeros(cls.shape[0], dtype=torch.long, device=cls.device)
108
+ else:
109
+ source_id = source_id.to(device=cls.device, dtype=torch.long)
110
+ # Clamp out-of-range ids back to <unknown> rather than crash
111
+ n_sources = self.source_embed.num_embeddings
112
+ source_id = source_id.clamp(min=0, max=n_sources - 1)
113
+ cls = cls + self.source_embed(source_id)
114
+
115
+ head_out = self.heads(cls) # dict of {output_key: tensor}
116
+ head_out["pooled"] = cls # (B, d) — for inspection
117
+
118
+ # Contrastive tool head — runs only when candidates are
119
+ # supplied. Empty/missing → no tool prediction for this batch.
120
+ if tool_specs is not None and any(ts for ts in tool_specs):
121
+ tool_emb, tool_mask = self.tool_head.embed_tools(tool_specs)
122
+ head_out["tool_logits"] = self.tool_head(cls, tool_emb, tool_mask)
123
+ head_out["tool_mask"] = tool_mask
124
+
125
+ return head_out
126
+
127
+ @property
128
+ def head_specs(self) -> tuple[HeadSpec, ...]:
129
+ return self.heads.specs
130
+
131
+ def num_parameters(self) -> int:
132
+ return sum(p.numel() for p in self.parameters() if p.requires_grad)
133
+
134
+
135
+ # Backward-compat alias — old code that imported CognosController still works.
136
+ CognosController = Caudate
nn/config.py ADDED
@@ -0,0 +1,141 @@
1
+ """Neural-network hyperparameters.
2
+
3
+ Single source of truth for the controller's shape, training schedule,
4
+ and runtime behavior. Persisted alongside checkpoints so loaded weights
5
+ always match the matching architecture.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+
15
+ class NNConfig(BaseModel):
16
+ """Hyperparameters + paths for the cognitive controller."""
17
+
18
+ # ----- Architecture -----
19
+ text_embed_dim: int = 384 # sentence-transformers MiniLM dim
20
+ # Phase-2: bumped 64 → 4096 to accommodate public tool-use corpora
21
+ # (xLAM has ~10K unique tool names; ToolBench similar). Tool ids
22
+ # that still overflow this cap are clamped at collate time so the
23
+ # embedding lookup never crashes — the cap is a hard ceiling.
24
+ tool_vocab_size: int = 4096 # max distinct tools we'll embed
25
+ tool_embed_dim: int = 64
26
+ # Phase 2 of CAUDATE_EVOLUTION.md: source-conditioned predictions.
27
+ # Each ConversationSample is tagged with the model that produced it;
28
+ # we look that name up in a small SourceVocab and bias the encoder's
29
+ # pooled state with a learned per-source embedding. Slot 0 is
30
+ # reserved for "<unknown>" so legacy untagged samples (and any
31
+ # model id we haven't seen before) get the zero baseline.
32
+ source_vocab_size: int = 16 # cap on distinct teacher-model ids
33
+ mood_dim: int = 4 # confidence, curiosity, frustration, satisfaction
34
+ history_window: int = 16 # last N tool calls fed to encoder
35
+ # Phase-2 format migration: bumped 4 → 12 so multi-turn ToolBench-
36
+ # /xLAM-style conversations fit in the encoder window. Pos-embed
37
+ # shape changes with this, so any pre-Phase-2 checkpoint is
38
+ # incompatible — fresh retrain required.
39
+ msg_window: int = 12 # last N messages fed to encoder
40
+
41
+ # Vision channel — frozen image embedder.
42
+ use_vision: bool = True
43
+ image_window: int = 4 # last N images fed to encoder
44
+ # Backend = "clip" (light, 512-D) or "internvl" (rich, 4096-D).
45
+ # Match `vision_embed_dim` to whichever you pick.
46
+ vision_backend: str = "internvl"
47
+ vision_embed_dim: int = 4096
48
+ vision_encoder_name: str = "OpenGVLab/InternVL2-8B"
49
+ # InternVL only: the dtype to load with. bfloat16 needs ~16GB
50
+ # for InternVL2-8B on bf16-capable GPUs; float16 is similar; if
51
+ # you only have CPU set this to "float32".
52
+ vision_dtype: str = "bfloat16"
53
+
54
+ d_model: int = 256 # transformer hidden size
55
+ n_heads: int = 8
56
+ n_layers: int = 4
57
+ d_ff: int = 1024
58
+ dropout: float = 0.1
59
+
60
+ # ----- Output heads -----
61
+ # Phase-2 tool head is contrastive (open-vocab) — at inference the
62
+ # set of "tools the assistant can call right now" is supplied, and
63
+ # the model scores each candidate against the pooled CLS via a
64
+ # learned projection. n_tools_out is the projection's output
65
+ # dimension (= tool description embedding dim). 256 is roomy
66
+ # enough to cleanly separate the function-calling tool vocab in
67
+ # ToolBench / xLAM (~10K unique names) without ballooning params.
68
+ n_tools_out: int = 256
69
+ # Max tools considered per turn. Datasets typically expose 1-20;
70
+ # we pad to this cap with zero embeddings + a mask so the batch
71
+ # tensor shape is fixed.
72
+ max_tools_per_sample: int = 32
73
+ # value head outputs a single scalar; tier head outputs 2 logits;
74
+ # think head outputs 1 logit (sigmoid).
75
+
76
+ # ----- Training -----
77
+ batch_size: int = 32
78
+ learning_rate: float = 3e-4
79
+ weight_decay: float = 1e-2
80
+ grad_clip: float = 1.0
81
+ max_steps: int = 5000
82
+ eval_every: int = 200
83
+ save_every: int = 500
84
+ eval_split: float = 0.15
85
+ # Cap eval batches during training so the intra-fit eval ticks
86
+ # stay cheap on big public corpora. 100 batches × default 16 =
87
+ # 1600 samples — enough for a stable loss estimate, fast enough
88
+ # to not dominate the training loop. The end-of-fit final eval
89
+ # ignores this cap and runs over the full held-out split.
90
+ eval_max_batches_intra: int = 100
91
+ seed: int = 42
92
+
93
+ # Loss weights — controller has 4 heads, balance them so no one
94
+ # signal dominates training.
95
+ w_tool: float = 1.0
96
+ w_tier: float = 0.5
97
+ w_think: float = 0.2
98
+ w_value: float = 0.5
99
+
100
+ # Class weight on the contrastive tool head's slot-0 (<no_tool>).
101
+ # Cognos-toolbox is ~63% <no_tool>, which biased R3 step-11000 to
102
+ # argmax slot-0 on 100% of real-tool turns (target-in-top-5 was
103
+ # 52.5%, but slot-0 always won). 1.0 = unweighted (original
104
+ # behavior). 0.5 = moderate downweight; 0.3 = aggressive.
105
+ tool_no_tool_class_weight: float = 1.0
106
+
107
+ # ----- Replay buffer -----
108
+ replay_capacity: int = 8192
109
+ min_episodes_to_train: int = 64
110
+
111
+ # ----- Runtime -----
112
+ advisor_min_confidence: float = 0.55 # below this, advisor stays silent
113
+ # ADR-0007 constrained routing — at ADVISOR+, when tool_confidence
114
+ # clears `advisor_min_confidence` and the prediction is a real tool
115
+ # (not <no_tool>), the LLM's tool list is pruned to this many of
116
+ # Caudate's top picks. The predicted tool itself is always included,
117
+ # and `advisor_top_k_floor` is enforced so the LLM never sees fewer
118
+ # than that many options (mitigates "confident wrong hides the right
119
+ # tool" per ADR-0007 Consequences).
120
+ advisor_top_k: int = 5
121
+ advisor_top_k_floor: int = 3
122
+ advisor_log_path: str = "data/nn/predictions.jsonl"
123
+
124
+ # ----- Paths -----
125
+ data_dir: str = "data/nn"
126
+ checkpoint_path: str = "data/nn/caudate.pt"
127
+ metadata_path: str = "data/nn/caudate.meta.json"
128
+ text_encoder_name: str = "sentence-transformers/all-MiniLM-L6-v2"
129
+
130
+ # Special tokens for the tool history channel
131
+ tool_pad_token: int = 0
132
+ tool_unk_token: int = 1
133
+ tool_bos_token: int = 2
134
+
135
+ @property
136
+ def total_input_dim(self) -> int:
137
+ return self.text_embed_dim + self.tool_embed_dim + self.mood_dim
138
+
139
+ def ensure_dirs(self) -> None:
140
+ Path(self.data_dir).mkdir(parents=True, exist_ok=True)
141
+ Path(self.advisor_log_path).parent.mkdir(parents=True, exist_ok=True)
nn/consolidator.py ADDED
@@ -0,0 +1,81 @@
1
+ """Memory consolidator — periodic offline replay → training pairs.
2
+
3
+ Loosely modeled on hippocampal-cortical consolidation: while Cognos is
4
+ idle (or on demand), it walks recent episodic memories, distills them
5
+ into ConversationSample rows, and pushes them into the replay buffer.
6
+ The trainer can then run additional gradient steps without needing
7
+ fresh data from a live session.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import logging
14
+ from typing import Any
15
+
16
+ from nn.config import NNConfig
17
+ from nn.data import ConversationSample, ReplayBuffer
18
+ from nn.format import ChatMessage, ToolCall
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def consolidate_from_episodic(
24
+ episodic: Any,
25
+ replay: ReplayBuffer,
26
+ cfg: NNConfig,
27
+ limit: int = 128,
28
+ ) -> int:
29
+ """Pull up to `limit` episodes from episodic memory into the replay buffer.
30
+
31
+ Returns number of samples added. If episodic memory exposes a
32
+ `recall_recent(limit)` method we use that; otherwise we degrade to
33
+ `recall("", limit=...)` which most stores accept.
34
+ """
35
+ if episodic is None:
36
+ return 0
37
+ try:
38
+ if hasattr(episodic, "recall_recent"):
39
+ episodes = episodic.recall_recent(limit=limit)
40
+ else:
41
+ episodes = episodic.recall("", limit=limit)
42
+ except Exception as e:
43
+ logger.warning(f"consolidator: episodic recall failed: {e}")
44
+ return 0
45
+
46
+ added = 0
47
+ for ep in episodes or []:
48
+ try:
49
+ tool = getattr(ep, "tool_name", None) or "unknown"
50
+ args = getattr(ep, "tool_args", {}) or {}
51
+ result = getattr(ep, "result", None)
52
+ success = (
53
+ getattr(result, "status", None)
54
+ and str(getattr(result, "status").value if hasattr(getattr(result, "status"), "value") else getattr(result, "status")) == "success"
55
+ )
56
+ action_text = (getattr(ep, "action", "") or "")[:400]
57
+ try:
58
+ args_str = json.dumps(args) if args else ""
59
+ except Exception:
60
+ args_str = ""
61
+ sample = ConversationSample(
62
+ # One episodic memory → a 2-turn conversation: the
63
+ # action description as a user turn, followed by the
64
+ # assistant calling the tool that was actually invoked.
65
+ conversation=[
66
+ ChatMessage(role="user", content=action_text),
67
+ ChatMessage(
68
+ role="assistant", content="",
69
+ tool_calls=[ToolCall(name=tool, arguments=args_str)],
70
+ ),
71
+ ],
72
+ tools=[],
73
+ mood=[0.5, 0.5, 0.5, 0.5],
74
+ target_tool=tool,
75
+ target_value=(0.7 if success else 0.3),
76
+ )
77
+ replay.push(sample)
78
+ added += 1
79
+ except Exception as e:
80
+ logger.debug(f"consolidator: skipped episode: {e}")
81
+ return added