switchroom 0.12.26 → 0.12.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/agent-scheduler/index.js +80 -80
  2. package/dist/auth-broker/index.js +80 -80
  3. package/dist/cli/drive-write-pretool.mjs +10 -10
  4. package/dist/cli/skill-validate-pretool.mjs +72 -72
  5. package/dist/cli/switchroom.js +359 -357
  6. package/dist/host-control/main.js +99 -99
  7. package/dist/vault/approvals/kernel-server.js +82 -82
  8. package/dist/vault/broker/server.js +83 -83
  9. package/package.json +2 -1
  10. package/telegram-plugin/dist/bridge/bridge.js +112 -112
  11. package/telegram-plugin/dist/gateway/gateway.js +368 -209
  12. package/telegram-plugin/dist/server.js +160 -160
  13. package/telegram-plugin/gateway/gateway.ts +55 -40
  14. package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts +188 -0
  15. package/telegram-plugin/stderr-timestamps.ts +106 -0
  16. package/telegram-plugin/tests/inbound-delivery-machine-dispatch.test.ts +240 -0
  17. package/telegram-plugin/tests/stderr-timestamps.test.ts +113 -0
  18. package/vendor/hindsight-memory/.claude-plugin/plugin.json +8 -0
  19. package/vendor/hindsight-memory/CHANGELOG.md +32 -0
  20. package/vendor/hindsight-memory/LICENSE +21 -0
  21. package/vendor/hindsight-memory/README.md +329 -0
  22. package/vendor/hindsight-memory/hooks/hooks.json +49 -0
  23. package/vendor/hindsight-memory/scripts/drain_pending.py +190 -0
  24. package/vendor/hindsight-memory/scripts/lib/__init__.py +0 -0
  25. package/vendor/hindsight-memory/scripts/lib/bank.py +122 -0
  26. package/vendor/hindsight-memory/scripts/lib/client.py +204 -0
  27. package/vendor/hindsight-memory/scripts/lib/config.py +180 -0
  28. package/vendor/hindsight-memory/scripts/lib/content.py +493 -0
  29. package/vendor/hindsight-memory/scripts/lib/daemon.py +334 -0
  30. package/vendor/hindsight-memory/scripts/lib/directives.py +119 -0
  31. package/vendor/hindsight-memory/scripts/lib/gateway_ipc.py +126 -0
  32. package/vendor/hindsight-memory/scripts/lib/llm.py +146 -0
  33. package/vendor/hindsight-memory/scripts/lib/pending.py +218 -0
  34. package/vendor/hindsight-memory/scripts/lib/state.py +196 -0
  35. package/vendor/hindsight-memory/scripts/recall.py +873 -0
  36. package/vendor/hindsight-memory/scripts/retain.py +286 -0
  37. package/vendor/hindsight-memory/scripts/session_end.py +122 -0
  38. package/vendor/hindsight-memory/scripts/session_start.py +76 -0
  39. package/vendor/hindsight-memory/scripts/setup_hooks.py +115 -0
  40. package/vendor/hindsight-memory/scripts/tests/__init__.py +0 -0
  41. package/vendor/hindsight-memory/scripts/tests/test_directives.py +211 -0
  42. package/vendor/hindsight-memory/scripts/tests/test_gateway_ipc.py +205 -0
  43. package/vendor/hindsight-memory/scripts/tests/test_recall_integration.py +621 -0
  44. package/vendor/hindsight-memory/settings.json +37 -0
  45. package/vendor/hindsight-memory/skills/setup.md +24 -0
  46. package/vendor/hindsight-memory/tests/conftest.py +94 -0
  47. package/vendor/hindsight-memory/tests/test_bank.py +142 -0
  48. package/vendor/hindsight-memory/tests/test_client.py +232 -0
  49. package/vendor/hindsight-memory/tests/test_config.py +128 -0
  50. package/vendor/hindsight-memory/tests/test_content.py +471 -0
  51. package/vendor/hindsight-memory/tests/test_drain_pending.py +192 -0
  52. package/vendor/hindsight-memory/tests/test_hooks.py +808 -0
  53. package/vendor/hindsight-memory/tests/test_manifest.py +14 -0
  54. package/vendor/hindsight-memory/tests/test_pending.py +152 -0
  55. package/vendor/hindsight-memory/tests/test_recall_exit_codes.py +325 -0
  56. package/vendor/hindsight-memory/tests/test_session_end_pending.py +205 -0
  57. package/vendor/hindsight-memory/tests/test_state.py +125 -0
@@ -0,0 +1,180 @@
1
+ """Configuration management for Hindsight plugin.
2
+
3
+ Loads settings from settings.json (plugin defaults) merged with environment
4
+ variable overrides. Full config schema matching Openclaw's 30+ options.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import sys
10
+
11
+ DEFAULTS = {
12
+ # Recall
13
+ "autoRecall": True,
14
+ # Switchroom default: "low" — vector search only, no LLM reranking.
15
+ # Cuts the recall hook latency from ~5s (mid budget) to ~1-2s (low).
16
+ # Operators who want richer recall can set HINDSIGHT_RECALL_BUDGET=mid
17
+ # via per-agent env or write `recallBudget: "mid"` into the user
18
+ # config file. Forensics on real klanker turns showed mid-budget
19
+ # recall was ~5s of wall-clock latency dominated by the LLM filter
20
+ # pass; for chat-pattern agents the vector hits alone are fine and
21
+ # the 5s is the second-largest contributor to perceived dead air
22
+ # (after the model TTFT).
23
+ "recallBudget": "low",
24
+ "recallMaxTokens": 1024,
25
+ # Switchroom-local: cap on the number of memories injected into the
26
+ # `<hindsight_memories>` block, regardless of token budget. Plugin v0.4.0
27
+ # exposes `recallTopK` only in the Openclaw integration, not the
28
+ # Claude Code integration, so we slice client-side in recall.py before
29
+ # formatting. Set to 0 (or any non-positive value) to disable the cap
30
+ # and inject everything Hindsight returns.
31
+ "recallMaxMemories": 12,
32
+ # Switchroom-local: minimum lexical (Jaccard) overlap between the
33
+ # user's query terms and a memory's text terms. Memories below this
34
+ # threshold are dropped before formatting. 0.0 disables the gate
35
+ # (current behaviour: inject everything Hindsight returns up to the
36
+ # count cap). Hindsight's HTTP API does not expose similarity
37
+ # scores, so this is the switchroom-side quality filter — see #475.
38
+ "recallMinOverlap": 0.0,
39
+ "recallTypes": ["world", "experience"],
40
+ "recallContextTurns": 1,
41
+ "recallMaxQueryChars": 800,
42
+ "recallRoles": ["user", "assistant"],
43
+ "recallPromptPreamble": (
44
+ "Relevant memories from past conversations (prioritize recent when "
45
+ "conflicting). Only use memories that are directly useful to continue "
46
+ "this conversation; ignore the rest:"
47
+ ),
48
+ # Retain
49
+ "autoRetain": True,
50
+ "retainMode": "full-session",
51
+ "retainRoles": ["user", "assistant"],
52
+ "retainEveryNTurns": 10,
53
+ "retainOverlapTurns": 2,
54
+ "retainToolCalls": True,
55
+ "retainContext": "claude-code",
56
+ "retainTags": [],
57
+ "retainMetadata": {},
58
+ "recallAdditionalBanks": [],
59
+ # Connection
60
+ "hindsightApiUrl": None,
61
+ "hindsightApiToken": None,
62
+ "apiPort": 9077,
63
+ "daemonIdleTimeout": 0,
64
+ "embedVersion": "latest",
65
+ "embedPackagePath": None,
66
+ # Bank
67
+ "bankId": None,
68
+ "bankIdPrefix": "",
69
+ "dynamicBankId": False,
70
+ "dynamicBankGranularity": ["agent", "project"],
71
+ "bankMission": "",
72
+ "retainMission": None,
73
+ "agentName": "claude-code",
74
+ # LLM (for daemon mode)
75
+ "llmProvider": None,
76
+ "llmModel": None,
77
+ "llmApiKeyEnv": None,
78
+ # Misc
79
+ "debug": False,
80
+ }
81
+
82
+ # Map env var names to config keys and their types
83
+ ENV_OVERRIDES = {
84
+ "HINDSIGHT_API_URL": ("hindsightApiUrl", str),
85
+ "HINDSIGHT_API_TOKEN": ("hindsightApiToken", str),
86
+ "HINDSIGHT_BANK_ID": ("bankId", str),
87
+ "HINDSIGHT_AGENT_NAME": ("agentName", str),
88
+ "HINDSIGHT_AUTO_RECALL": ("autoRecall", bool),
89
+ "HINDSIGHT_AUTO_RETAIN": ("autoRetain", bool),
90
+ "HINDSIGHT_RETAIN_MODE": ("retainMode", str),
91
+ "HINDSIGHT_RECALL_BUDGET": ("recallBudget", str),
92
+ "HINDSIGHT_RECALL_MAX_TOKENS": ("recallMaxTokens", int),
93
+ # Switchroom-local: count cap. Set by start.sh from
94
+ # agents.<name>.memory.recall.max_memories (cascading through
95
+ # defaults.memory.recall.max_memories) when present in switchroom.yaml.
96
+ "HINDSIGHT_RECALL_MAX_MEMORIES": ("recallMaxMemories", int),
97
+ # Switchroom-local: lexical-overlap threshold (#475). Float in
98
+ # [0.0, 1.0]. Set by start.sh from agents.<name>.memory.recall.min_overlap
99
+ # (cascading through defaults). 0.0 = off (current behaviour).
100
+ "HINDSIGHT_RECALL_MIN_OVERLAP": ("recallMinOverlap", float),
101
+ "HINDSIGHT_RECALL_MAX_QUERY_CHARS": ("recallMaxQueryChars", int),
102
+ "HINDSIGHT_RECALL_CONTEXT_TURNS": ("recallContextTurns", int),
103
+ "HINDSIGHT_API_PORT": ("apiPort", int),
104
+ "HINDSIGHT_DAEMON_IDLE_TIMEOUT": ("daemonIdleTimeout", int),
105
+ "HINDSIGHT_EMBED_VERSION": ("embedVersion", str),
106
+ "HINDSIGHT_EMBED_PACKAGE_PATH": ("embedPackagePath", str),
107
+ "HINDSIGHT_DYNAMIC_BANK_ID": ("dynamicBankId", bool),
108
+ "HINDSIGHT_BANK_MISSION": ("bankMission", str),
109
+ "HINDSIGHT_LLM_PROVIDER": ("llmProvider", str),
110
+ "HINDSIGHT_LLM_MODEL": ("llmModel", str),
111
+ "HINDSIGHT_DEBUG": ("debug", bool),
112
+ }
113
+
114
+
115
+ def _cast_env(value: str, typ):
116
+ """Cast environment variable string to target type. Returns None on failure."""
117
+ try:
118
+ if typ is bool:
119
+ return value.lower() in ("true", "1", "yes")
120
+ if typ is int:
121
+ return int(value)
122
+ if typ is float:
123
+ return float(value)
124
+ return value
125
+ except (ValueError, AttributeError):
126
+ return None
127
+
128
+
129
+ def _load_settings_file(path: str, config: dict) -> None:
130
+ """Merge a settings.json file into config in-place. Silently skips if missing."""
131
+ if not os.path.exists(path):
132
+ return
133
+ try:
134
+ with open(path) as f:
135
+ file_config = json.load(f)
136
+ config.update({k: v for k, v in file_config.items() if v is not None})
137
+ except (json.JSONDecodeError, OSError) as e:
138
+ debug_log(config, f"Failed to load {path}: {e}")
139
+
140
+
141
+ def load_config() -> dict:
142
+ """Load plugin configuration from settings.json + env overrides.
143
+
144
+ Loading order (later entries win):
145
+ 1. Built-in defaults
146
+ 2. Plugin default settings.json (CLAUDE_PLUGIN_ROOT/settings.json)
147
+ 3. User config (~/.hindsight/claude-code.json)
148
+ 4. Environment variable overrides
149
+
150
+ ~/.hindsight/claude-code.json is the recommended place to configure the
151
+ plugin — same convention as ~/.openclaw/openclaw.json. It is stable across
152
+ plugin updates and marketplace changes.
153
+ """
154
+ config = dict(DEFAULTS)
155
+
156
+ # 1. Plugin default settings.json (ships with the plugin, version-specific path)
157
+ plugin_root = os.environ.get("CLAUDE_PLUGIN_ROOT", "")
158
+ if not plugin_root:
159
+ plugin_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
160
+ _load_settings_file(os.path.join(plugin_root, "settings.json"), config)
161
+
162
+ # 2. User config — stable, version-independent, matches openclaw convention
163
+ user_config_path = os.path.join(os.path.expanduser("~"), ".hindsight", "claude-code.json")
164
+ _load_settings_file(user_config_path, config)
165
+
166
+ # Apply environment variable overrides
167
+ for env_name, (key, typ) in ENV_OVERRIDES.items():
168
+ val = os.environ.get(env_name)
169
+ if val is not None:
170
+ cast_val = _cast_env(val, typ)
171
+ if cast_val is not None:
172
+ config[key] = cast_val
173
+
174
+ return config
175
+
176
+
177
+ def debug_log(config: dict, *args):
178
+ """Log to stderr if debug mode is enabled."""
179
+ if config.get("debug"):
180
+ print("[Hindsight]", *args, file=sys.stderr)
@@ -0,0 +1,493 @@
1
+ """Content processing utilities.
2
+
3
+ Faithful port of Openclaw plugin's content processing: memory tag stripping,
4
+ query composition/truncation, transcript formatting, and memory formatting.
5
+
6
+ Source: reference/openclaw-source/index.js — stripMemoryTags, composeRecallQuery,
7
+ truncateRecallQuery, sliceLastTurnsByUserBoundary, prepareRetentionTranscript,
8
+ formatMemories.
9
+ """
10
+
11
+ import re
12
+ from datetime import datetime, timezone
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Memory tag stripping (anti-feedback-loop)
16
+ # ---------------------------------------------------------------------------
17
+
18
+
19
+ def strip_channel_envelope(content: str) -> str:
20
+ """Strip Claude Code channel XML wrappers from user messages.
21
+
22
+ Claude Code wraps incoming channel messages in XML:
23
+ <channel source="plugin:telegram:telegram" chat_id="..." ...>
24
+ actual message text
25
+ </channel>
26
+
27
+ This is the Claude Code equivalent of Openclaw's stripMetadataEnvelopes().
28
+ Extracts the inner text, preserving the actual user message while removing
29
+ transport metadata that Hindsight doesn't need.
30
+ """
31
+ # Match <channel ...>content</channel> — extract inner text
32
+ match = re.search(r"<channel\b[^>]*>([\s\S]*?)</channel>", content)
33
+ if match:
34
+ return match.group(1).strip()
35
+ return content
36
+
37
+
38
+ def strip_memory_tags(content: str) -> str:
39
+ """Remove <hindsight_memories> and <relevant_memories> blocks.
40
+
41
+ Prevents retain feedback loop — these were injected during recall and
42
+ should not be re-stored.
43
+
44
+ Port of: stripMemoryTags() in index.js
45
+ """
46
+ content = re.sub(r"<hindsight_memories>[\s\S]*?</hindsight_memories>", "", content)
47
+ content = re.sub(r"<relevant_memories>[\s\S]*?</relevant_memories>", "", content)
48
+ return content
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Recall: query composition and truncation
53
+ # ---------------------------------------------------------------------------
54
+
55
+
56
+ def compose_recall_query(
57
+ latest_query: str,
58
+ messages: list,
59
+ recall_context_turns: int,
60
+ recall_roles: list = None,
61
+ ) -> str:
62
+ """Compose a multi-turn recall query from conversation history.
63
+
64
+ Port of: composeRecallQuery() in index.js
65
+
66
+ When recallContextTurns > 1, includes prior context from the transcript
67
+ above the latest user query. Format:
68
+
69
+ Prior context:
70
+
71
+ user: ...
72
+ assistant: ...
73
+
74
+ <latest query>
75
+ """
76
+ latest = latest_query.strip()
77
+ if recall_context_turns <= 1 or not isinstance(messages, list) or not messages:
78
+ return latest
79
+
80
+ allowed_roles = set(recall_roles or ["user", "assistant"])
81
+ contextual_messages = slice_last_turns_by_user_boundary(messages, recall_context_turns)
82
+
83
+ context_lines = []
84
+ for msg in contextual_messages:
85
+ role = msg.get("role")
86
+ if role not in allowed_roles:
87
+ continue
88
+
89
+ content = _extract_text_content(msg.get("content", ""), role=role)
90
+ content = strip_channel_envelope(content)
91
+ content = strip_memory_tags(content).strip()
92
+ if not content:
93
+ continue
94
+
95
+ # Skip if this is the same as the latest query (avoid duplication)
96
+ if role == "user" and content == latest:
97
+ continue
98
+
99
+ context_lines.append(f"{role}: {content}")
100
+
101
+ if not context_lines:
102
+ return latest
103
+
104
+ return "\n\n".join(
105
+ [
106
+ "Prior context:",
107
+ "\n".join(context_lines),
108
+ latest,
109
+ ]
110
+ )
111
+
112
+
113
+ def truncate_recall_query(query: str, latest_query: str, max_chars: int) -> str:
114
+ """Truncate a composed recall query to max_chars.
115
+
116
+ Port of: truncateRecallQuery() in index.js
117
+
118
+ Preserves the latest user message. When the query contains "Prior context:",
119
+ drops oldest context lines first (from the top) to fit within the limit.
120
+ """
121
+ if max_chars <= 0:
122
+ return query
123
+
124
+ latest = latest_query.strip()
125
+ if len(query) <= max_chars:
126
+ return query
127
+
128
+ # If even the latest alone is too long, hard-truncate it
129
+ latest_only = latest[:max_chars] if len(latest) > max_chars else latest
130
+
131
+ if "Prior context:" not in query:
132
+ return latest_only
133
+
134
+ context_marker = "Prior context:\n\n"
135
+ marker_index = query.find(context_marker)
136
+ if marker_index == -1:
137
+ return latest_only
138
+
139
+ suffix_marker = "\n\n" + latest
140
+ suffix_index = query.rfind(suffix_marker)
141
+ if suffix_index == -1:
142
+ return latest_only
143
+
144
+ suffix = query[suffix_index:] # \n\n<latest>
145
+ if len(suffix) >= max_chars:
146
+ return latest_only
147
+
148
+ context_body = query[marker_index + len(context_marker) : suffix_index]
149
+ context_lines = [line for line in context_body.split("\n") if line]
150
+
151
+ # Add context lines from newest (bottom) to oldest (top), stop when exceeding
152
+ kept = []
153
+ for i in range(len(context_lines) - 1, -1, -1):
154
+ kept.insert(0, context_lines[i])
155
+ candidate = f"{context_marker}{chr(10).join(kept)}{suffix}"
156
+ if len(candidate) > max_chars:
157
+ kept.pop(0)
158
+ break
159
+
160
+ if kept:
161
+ return f"{context_marker}{chr(10).join(kept)}{suffix}"
162
+ return latest_only
163
+
164
+
165
+ # ---------------------------------------------------------------------------
166
+ # Turn slicing
167
+ # ---------------------------------------------------------------------------
168
+
169
+
170
+ def slice_last_turns_by_user_boundary(messages: list, turns: int) -> list:
171
+ """Slice messages to the last N turns, where a turn starts at a user message.
172
+
173
+ Port of: sliceLastTurnsByUserBoundary() in index.js
174
+
175
+ Walks backward counting user messages as turn boundaries. Returns
176
+ messages from the Nth user boundary to the end.
177
+ """
178
+ if not isinstance(messages, list) or not messages or turns <= 0:
179
+ return []
180
+
181
+ user_turns_seen = 0
182
+ start_index = -1
183
+
184
+ for i in range(len(messages) - 1, -1, -1):
185
+ if messages[i].get("role") == "user":
186
+ user_turns_seen += 1
187
+ if user_turns_seen >= turns:
188
+ start_index = i
189
+ break
190
+
191
+ if start_index == -1:
192
+ return list(messages)
193
+
194
+ return messages[start_index:]
195
+
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # Memory formatting (recall results → context string)
199
+ # ---------------------------------------------------------------------------
200
+
201
+
202
+ def format_memories(results: list) -> str:
203
+ """Format recall results into human-readable text.
204
+
205
+ Port of: formatMemories() in index.js
206
+ Format: - <text> [<type>] (<mentioned_at>)
207
+ """
208
+ if not results:
209
+ return ""
210
+ lines = []
211
+ for r in results:
212
+ text = r.get("text", "")
213
+ mem_type = r.get("type", "")
214
+ mentioned_at = r.get("mentioned_at", "")
215
+ type_str = f" [{mem_type}]" if mem_type else ""
216
+ date_str = f" ({mentioned_at})" if mentioned_at else ""
217
+ lines.append(f"- {text}{type_str}{date_str}")
218
+ return "\n\n".join(lines)
219
+
220
+
221
+ def format_current_time() -> str:
222
+ """Format current UTC time for recall context.
223
+
224
+ Port of: formatCurrentTimeForRecall() in index.js
225
+ """
226
+ now = datetime.now(timezone.utc)
227
+ return now.strftime("%Y-%m-%d %H:%M")
228
+
229
+
230
+ # ---------------------------------------------------------------------------
231
+ # Retention transcript formatting
232
+ # ---------------------------------------------------------------------------
233
+
234
+
235
+ def _extract_message_blocks(content, role: str = "") -> list:
236
+ """Extract structured content blocks from a message for JSON retention.
237
+
238
+ Returns a list of dicts, each representing a content block:
239
+ - {"type": "text", "text": "..."} for text blocks
240
+ - {"type": "tool_use", "name": "...", "input": {...}} for tool calls
241
+ - Channel message tool_use blocks get their text extracted inline.
242
+ """
243
+ if isinstance(content, str):
244
+ cleaned = strip_channel_envelope(strip_memory_tags(content)).strip()
245
+ return [{"type": "text", "text": cleaned}] if cleaned else []
246
+
247
+ if not isinstance(content, list):
248
+ return []
249
+
250
+ blocks = []
251
+ for block in content:
252
+ if not isinstance(block, dict):
253
+ continue
254
+ block_type = block.get("type", "")
255
+
256
+ if block_type == "text":
257
+ text = strip_channel_envelope(strip_memory_tags(block.get("text", ""))).strip()
258
+ if text:
259
+ blocks.append({"type": "text", "text": text})
260
+
261
+ elif block_type == "tool_use" and role == "assistant":
262
+ if _is_channel_message_tool(block):
263
+ # Channel messages: extract the outgoing text
264
+ tool_input = block.get("input", {})
265
+ for field in _MESSAGE_TEXT_FIELDS:
266
+ val = tool_input.get(field)
267
+ if isinstance(val, str) and val.strip():
268
+ blocks.append({"type": "text", "text": val.strip()})
269
+ break
270
+ else:
271
+ name = block.get("name", "unknown")
272
+ inp = block.get("input", {})
273
+ # Skip Hindsight MCP tools to avoid feedback loops
274
+ if name.startswith("mcp__") and _OPERATIONAL_TOOL_PATTERN.search(name.split("__")[-1]):
275
+ continue
276
+ blocks.append({"type": "tool_use", "name": name, "input": inp})
277
+
278
+ elif block_type == "tool_result":
279
+ # Include tool results for context.
280
+ # content can be a plain string or a list of content blocks
281
+ # (e.g. [{"type": "text", "text": "..."}] for Agent results).
282
+ result_content = block.get("content", "")
283
+ if isinstance(result_content, list):
284
+ # Extract text from content blocks
285
+ parts = []
286
+ for item in result_content:
287
+ if isinstance(item, dict) and item.get("type") == "text":
288
+ t = item.get("text", "").strip()
289
+ if t:
290
+ parts.append(t)
291
+ result_content = "\n".join(parts)
292
+ if isinstance(result_content, str) and result_content.strip():
293
+ text = result_content.strip()
294
+ # Truncate very long results
295
+ if len(text) > 2000:
296
+ text = text[:2000] + "... (truncated)"
297
+ blocks.append({"type": "tool_result", "tool_use_id": block.get("tool_use_id", ""), "content": text})
298
+
299
+ return blocks
300
+
301
+
302
+ def prepare_retention_transcript(
303
+ messages: list,
304
+ retain_roles: list = None,
305
+ retain_full_window: bool = False,
306
+ include_tool_calls: bool = False,
307
+ ) -> tuple:
308
+ """Format messages into a retention transcript.
309
+
310
+ When include_tool_calls is True, outputs JSON with full message structure
311
+ including tool calls and their inputs. Otherwise outputs the legacy
312
+ text format with [role: ...]...[role:end] markers.
313
+
314
+ Args:
315
+ messages: List of message dicts with 'role' and 'content'.
316
+ retain_roles: Roles to include (default: ['user', 'assistant']).
317
+ retain_full_window: If True, retain all messages (chunked mode).
318
+ If False, retain only the last turn (last user msg + responses).
319
+ include_tool_calls: If True, output JSON format with full tool call data.
320
+
321
+ Returns:
322
+ (transcript_text, message_count) or (None, 0) if nothing to retain.
323
+ """
324
+ if not messages:
325
+ return None, 0
326
+
327
+ if retain_full_window:
328
+ target_messages = messages
329
+ else:
330
+ # Default: retain only the last turn
331
+ last_user_idx = -1
332
+ for i in range(len(messages) - 1, -1, -1):
333
+ if messages[i].get("role") == "user":
334
+ last_user_idx = i
335
+ break
336
+ if last_user_idx == -1:
337
+ return None, 0
338
+ target_messages = messages[last_user_idx:]
339
+
340
+ allowed_roles = set(retain_roles or ["user", "assistant"])
341
+
342
+ if include_tool_calls:
343
+ return _prepare_json_transcript(target_messages, allowed_roles)
344
+ return _prepare_text_transcript(target_messages, allowed_roles)
345
+
346
+
347
+ def _prepare_json_transcript(messages: list, allowed_roles: set) -> tuple:
348
+ """Format messages as JSON with full tool call data."""
349
+ import json
350
+
351
+ structured_messages = []
352
+ for msg in messages:
353
+ role = msg.get("role", "unknown")
354
+ if role not in allowed_roles:
355
+ continue
356
+
357
+ blocks = _extract_message_blocks(msg.get("content", ""), role=role)
358
+ if not blocks:
359
+ continue
360
+
361
+ structured_messages.append({"role": role, "content": blocks})
362
+
363
+ if not structured_messages:
364
+ return None, 0
365
+
366
+ transcript = json.dumps(structured_messages, indent=None, ensure_ascii=False)
367
+ if len(transcript.strip()) < 10:
368
+ return None, 0
369
+
370
+ return transcript, len(structured_messages)
371
+
372
+
373
+ def _prepare_text_transcript(messages: list, allowed_roles: set) -> tuple:
374
+ """Format messages as legacy text with [role:]...[role:end] markers."""
375
+ parts = []
376
+
377
+ for msg in messages:
378
+ role = msg.get("role", "unknown")
379
+ if role not in allowed_roles:
380
+ continue
381
+
382
+ content = _extract_text_content(msg.get("content", ""), role=role)
383
+ content = strip_channel_envelope(content)
384
+ content = strip_memory_tags(content).strip()
385
+
386
+ if not content:
387
+ continue
388
+
389
+ parts.append(f"[role: {role}]\n{content}\n[{role}:end]")
390
+
391
+ if not parts:
392
+ return None, 0
393
+
394
+ transcript = "\n\n".join(parts)
395
+ if len(transcript.strip()) < 10:
396
+ return None, 0
397
+
398
+ return transcript, len(parts)
399
+
400
+
401
+ # ---------------------------------------------------------------------------
402
+ # Helpers
403
+ # ---------------------------------------------------------------------------
404
+
405
+ # Fields in tool_use input that carry the outgoing message text.
406
+ # Ordered by likelihood — first match wins.
407
+ _MESSAGE_TEXT_FIELDS = ("text", "body", "message", "content")
408
+
409
+ # MCP tool name suffixes that are operational, not conversational.
410
+ # Checked against the last segment of the tool name (after the last __).
411
+ import re as _re
412
+
413
+ _OPERATIONAL_TOOL_PATTERN = _re.compile(
414
+ r"(?:recall|retain|reflect|search|extract|create_|delete_|update_|get_|list_)",
415
+ _re.IGNORECASE,
416
+ )
417
+
418
+
419
+ def _is_channel_message_tool(block: dict) -> bool:
420
+ """Detect if a tool_use block is a channel message (reply/send).
421
+
422
+ Uses a structural approach rather than name-matching for robustness:
423
+ 1. Must be an MCP tool (name starts with "mcp__")
424
+ 2. Must NOT match known operational patterns (recall, search, CRUD)
425
+ 3. Must have a text-like field in input (text, body, message, content)
426
+
427
+ This catches any channel plugin (Telegram, Slack, Discord, Matrix,
428
+ future channels) without hardcoding tool names. Built-in tools (Bash,
429
+ Read, Write) don't start with mcp__. MCP tools for non-messaging
430
+ purposes (hindsight recall, search) are excluded by pattern and by
431
+ lacking text/body fields.
432
+ """
433
+ name = block.get("name", "")
434
+ if not name.startswith("mcp__"):
435
+ return False
436
+
437
+ # Exclude operational MCP tools (check only the tool suffix, not server name)
438
+ tool_suffix = name.split("__")[-1]
439
+ if _OPERATIONAL_TOOL_PATTERN.search(tool_suffix):
440
+ return False
441
+
442
+ tool_input = block.get("input", {})
443
+ if not isinstance(tool_input, dict):
444
+ return False
445
+
446
+ # Must have a text-carrying field with actual content
447
+ return any(isinstance(tool_input.get(f), str) and tool_input[f].strip() for f in _MESSAGE_TEXT_FIELDS)
448
+
449
+
450
+ def _extract_text_content(content, role: str = "") -> str:
451
+ """Extract text from message content (string or content blocks array).
452
+
453
+ For user messages: extracts from plain strings (channel XML wrappers
454
+ are stripped separately by strip_channel_envelope).
455
+
456
+ For assistant messages: extracts from:
457
+ - {type: "text"} blocks — terminal output/narration
458
+ - {type: "tool_use"} blocks detected as channel messages — the agent's
459
+ actual responses to the user. Detection is structural (MCP tool with
460
+ text-like input field), not name-based, for channel-agnosticism.
461
+
462
+ Excludes:
463
+ - {type: "thinking"} — internal reasoning
464
+ - {type: "tool_use"} for operational tools — Bash, Read, Write, recall, etc.
465
+ - {type: "tool_result"} — operational results, not conversation
466
+ """
467
+ if isinstance(content, str):
468
+ return content
469
+ if isinstance(content, list):
470
+ texts = []
471
+ for block in content:
472
+ if not isinstance(block, dict):
473
+ continue
474
+ block_type = block.get("type", "")
475
+
476
+ # Text blocks: terminal output / narration
477
+ if block_type == "text":
478
+ text = block.get("text", "").strip()
479
+ if text:
480
+ texts.append(text)
481
+
482
+ # Tool use blocks: extract channel messages
483
+ elif block_type == "tool_use" and role == "assistant":
484
+ if _is_channel_message_tool(block):
485
+ tool_input = block.get("input", {})
486
+ for field in _MESSAGE_TEXT_FIELDS:
487
+ val = tool_input.get(field)
488
+ if isinstance(val, str) and val.strip():
489
+ texts.append(val.strip())
490
+ break
491
+
492
+ return "\n".join(texts)
493
+ return ""