nexo-brain 3.1.9 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +21 -0
- package/package.json +1 -1
- package/src/scripts/deep-sleep/collect.py +6 -200
- package/src/server.py +41 -0
- package/src/system_catalog.py +419 -0
- package/src/tools_system_catalog.py +19 -0
- package/src/tools_transcripts.py +98 -0
- package/src/transcript_utils.py +412 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.2.0",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
Start here:
|
|
22
22
|
- [5-minute quickstart](docs/quickstart-5-minutes.md)
|
|
23
23
|
- [Workflow quickstart](docs/workflows-quickstart.md)
|
|
24
|
+
- [Recent memory fallbacks + live system catalog](docs/recent-memory-fallbacks-and-system-catalog.md)
|
|
24
25
|
- [Supported client guides](docs/integrations/cursor.md)
|
|
25
26
|
- [Docker setup](docs/docker-setup.md)
|
|
26
27
|
- [Architecture visuals](docs/architecture-visuals.md)
|
|
@@ -79,6 +80,13 @@ Versions `3.0.0` and `3.0.1` close the next execution gap:
|
|
|
79
80
|
- `cost_per_solved_task`
|
|
80
81
|
- SDK/API/quickstart surface
|
|
81
82
|
|
|
83
|
+
Versions `3.1.7` through `3.2.0` close the recent-memory gap:
|
|
84
|
+
|
|
85
|
+
- recent operational continuity is now first-class through `hot context` and `recent events`
|
|
86
|
+
- the runtime can build a reusable pre-action bundle instead of reconstructing the last few hours from diaries and durable recall only
|
|
87
|
+
- when even that misses, NEXO now exposes raw transcript fallback tools for Claude Code and Codex session stores
|
|
88
|
+
- NEXO can now inspect itself through a live system catalog derived from canonical sources instead of relying only on stale docs or operator memory
|
|
89
|
+
|
|
82
90
|
### Client Capability Matrix
|
|
83
91
|
|
|
84
92
|
| Capability | Claude Code | Codex | Claude Desktop |
|
|
@@ -340,6 +348,15 @@ NEXO Brain provides **150+ MCP tools** across 23 categories. These features impl
|
|
|
340
348
|
| **Auto-Merge Duplicates** | Batch cosine deduplication during the 03:00 sleep cycle. Respects sibling discrimination — similar memories about different contexts are kept separate. |
|
|
341
349
|
| **Memory Dreaming** | Discovers hidden connections between recent memories during the 03:00 sleep cycle and now feeds a 60-day long-horizon Deep Sleep blend, so older patterns can reappear when they become relevant again. |
|
|
342
350
|
|
|
351
|
+
### Operational Continuity
|
|
352
|
+
|
|
353
|
+
| Feature | What It Does |
|
|
354
|
+
|---------|-------------|
|
|
355
|
+
| **Hot Context 24h** | Keeps active topics, blockers, and waiting states fresh across sessions, clients, cron ticks, and channel changes. This is the shared recent-memory substrate for operational continuity. |
|
|
356
|
+
| **Pre-Action Context Bundle** | Loads recent contexts, recent events, related reminders, and related followups before acting, so continuity is explicit instead of prompt-only. |
|
|
357
|
+
| **Transcript Fallback** | When recent-memory capture is thin or missing, NEXO can now search and read recent Claude Code / Codex transcripts directly through MCP instead of pretending the conversation is lost. |
|
|
358
|
+
| **Live System Catalog** | NEXO can now inspect its own current surface — core tools, plugin tools, skills, scripts, crons, projects, and artifacts — through a live catalog derived from canonical sources at read time. |
|
|
359
|
+
|
|
343
360
|
### Retrieval
|
|
344
361
|
|
|
345
362
|
| Feature | What It Does |
|
|
@@ -724,10 +741,14 @@ Public entry points for the mental model now stay intentionally small:
|
|
|
724
741
|
- `nexo_memory_recall`
|
|
725
742
|
- `nexo_consolidate`
|
|
726
743
|
- `nexo_run_workflow`
|
|
744
|
+
- `nexo_pre_action_context`
|
|
745
|
+
- `nexo_transcript_search`
|
|
746
|
+
- `nexo_system_catalog`
|
|
727
747
|
|
|
728
748
|
If you want the shell or Python wrappers instead of raw MCP tools:
|
|
729
749
|
- [docs/quickstart-5-minutes.md](docs/quickstart-5-minutes.md)
|
|
730
750
|
- [docs/memory-classes.md](docs/memory-classes.md)
|
|
751
|
+
- [docs/recent-memory-fallbacks-and-system-catalog.md](docs/recent-memory-fallbacks-and-system-catalog.md)
|
|
731
752
|
- [docs/sdk-python.md](docs/sdk-python.md)
|
|
732
753
|
- [docs/reference-verticals.md](docs/reference-verticals.md)
|
|
733
754
|
- [compare/README.md](compare/README.md)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.2.0",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
|
@@ -19,6 +19,7 @@ import sys
|
|
|
19
19
|
from collections import Counter
|
|
20
20
|
from datetime import datetime, timedelta
|
|
21
21
|
from pathlib import Path
|
|
22
|
+
import transcript_utils as _transcripts
|
|
22
23
|
|
|
23
24
|
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
24
25
|
NEXO_CODE = Path(os.environ.get("NEXO_CODE", ""))
|
|
@@ -64,196 +65,22 @@ def _session_identifier(client: str, session_file: str) -> str:
|
|
|
64
65
|
|
|
65
66
|
def find_claude_session_files() -> list[Path]:
|
|
66
67
|
"""Find Claude Code session JSONL files under ~/.claude/projects."""
|
|
67
|
-
|
|
68
|
-
if not claude_dir.exists():
|
|
69
|
-
return []
|
|
70
|
-
return sorted(claude_dir.rglob("*.jsonl"))
|
|
68
|
+
return _transcripts.find_claude_session_files()
|
|
71
69
|
|
|
72
70
|
|
|
73
71
|
def find_codex_session_files() -> list[Path]:
|
|
74
72
|
"""Find Codex session JSONL files under ~/.codex/sessions and archived_sessions."""
|
|
75
|
-
|
|
76
|
-
Path.home() / ".codex" / "sessions",
|
|
77
|
-
Path.home() / ".codex" / "archived_sessions",
|
|
78
|
-
]
|
|
79
|
-
files: list[Path] = []
|
|
80
|
-
seen: set[str] = set()
|
|
81
|
-
for root in roots:
|
|
82
|
-
if not root.exists():
|
|
83
|
-
continue
|
|
84
|
-
for jsonl in sorted(root.rglob("*.jsonl")):
|
|
85
|
-
key = jsonl.name
|
|
86
|
-
if key in seen:
|
|
87
|
-
continue
|
|
88
|
-
seen.add(key)
|
|
89
|
-
files.append(jsonl)
|
|
90
|
-
return files
|
|
73
|
+
return _transcripts.find_codex_session_files()
|
|
91
74
|
|
|
92
75
|
|
|
93
76
|
def extract_claude_session(jsonl_path: Path) -> dict | None:
|
|
94
77
|
"""Extract clean transcript from a Claude Code JSONL session."""
|
|
95
|
-
|
|
96
|
-
tool_uses = []
|
|
97
|
-
user_msg_count = 0
|
|
98
|
-
|
|
99
|
-
try:
|
|
100
|
-
with open(jsonl_path, "r") as f:
|
|
101
|
-
for line_no, line in enumerate(f, 1):
|
|
102
|
-
line = line.strip()
|
|
103
|
-
if not line:
|
|
104
|
-
continue
|
|
105
|
-
try:
|
|
106
|
-
d = json.loads(line)
|
|
107
|
-
except json.JSONDecodeError:
|
|
108
|
-
continue
|
|
109
|
-
|
|
110
|
-
msg_type = d.get("type")
|
|
111
|
-
|
|
112
|
-
# User messages
|
|
113
|
-
if msg_type == "user":
|
|
114
|
-
content = d.get("message", {}).get("content", "")
|
|
115
|
-
if isinstance(content, str) and content.strip():
|
|
116
|
-
if content.startswith("<system-reminder>"):
|
|
117
|
-
continue
|
|
118
|
-
messages.append({
|
|
119
|
-
"role": "user",
|
|
120
|
-
"index": line_no,
|
|
121
|
-
"text": _redact_sensitive(content[:5000]),
|
|
122
|
-
"uuid": d.get("uuid", "")
|
|
123
|
-
})
|
|
124
|
-
user_msg_count += 1
|
|
125
|
-
|
|
126
|
-
# Assistant messages
|
|
127
|
-
elif msg_type in ("message", "assistant"):
|
|
128
|
-
msg = d.get("message", {})
|
|
129
|
-
content_blocks = msg.get("content", [])
|
|
130
|
-
text_parts = []
|
|
131
|
-
for block in content_blocks:
|
|
132
|
-
if isinstance(block, dict):
|
|
133
|
-
if block.get("type") == "text":
|
|
134
|
-
text_parts.append(block.get("text", ""))
|
|
135
|
-
elif block.get("type") == "tool_use":
|
|
136
|
-
tool_input = block.get("input", {})
|
|
137
|
-
raw_file = (
|
|
138
|
-
tool_input.get("file_path", "")
|
|
139
|
-
or str(tool_input.get("command", ""))[:100]
|
|
140
|
-
) if isinstance(tool_input, dict) else ""
|
|
141
|
-
tool_uses.append({
|
|
142
|
-
"tool": block.get("name", ""),
|
|
143
|
-
"input_keys": list(tool_input.keys()) if isinstance(tool_input, dict) else [],
|
|
144
|
-
"file": _redact_sensitive(raw_file)
|
|
145
|
-
})
|
|
146
|
-
if text_parts:
|
|
147
|
-
combined = "\n".join(text_parts)[:5000]
|
|
148
|
-
combined = _redact_sensitive(combined)
|
|
149
|
-
messages.append({
|
|
150
|
-
"role": "assistant",
|
|
151
|
-
"index": line_no,
|
|
152
|
-
"text": combined
|
|
153
|
-
})
|
|
154
|
-
|
|
155
|
-
except Exception as e:
|
|
156
|
-
print(f" [collect] Error reading {jsonl_path}: {e}", file=sys.stderr)
|
|
157
|
-
return None
|
|
158
|
-
|
|
159
|
-
if user_msg_count < MIN_USER_MESSAGES:
|
|
160
|
-
return None
|
|
161
|
-
|
|
162
|
-
return {
|
|
163
|
-
"client": "claude_code",
|
|
164
|
-
"session_file": _session_identifier("claude_code", jsonl_path.name),
|
|
165
|
-
"display_name": jsonl_path.name,
|
|
166
|
-
"session_path": str(jsonl_path),
|
|
167
|
-
"message_count": len(messages),
|
|
168
|
-
"user_message_count": user_msg_count,
|
|
169
|
-
"tool_use_count": len(tool_uses),
|
|
170
|
-
"messages": messages,
|
|
171
|
-
"tool_uses": tool_uses,
|
|
172
|
-
"source": "claude_projects",
|
|
173
|
-
}
|
|
78
|
+
return _transcripts.extract_claude_session(jsonl_path)
|
|
174
79
|
|
|
175
80
|
|
|
176
81
|
def extract_codex_session(jsonl_path: Path) -> dict | None:
|
|
177
82
|
"""Extract clean transcript from a Codex JSONL session."""
|
|
178
|
-
|
|
179
|
-
tool_uses = []
|
|
180
|
-
user_msg_count = 0
|
|
181
|
-
session_meta: dict = {}
|
|
182
|
-
|
|
183
|
-
try:
|
|
184
|
-
with open(jsonl_path, "r") as f:
|
|
185
|
-
for line_no, line in enumerate(f, 1):
|
|
186
|
-
line = line.strip()
|
|
187
|
-
if not line:
|
|
188
|
-
continue
|
|
189
|
-
try:
|
|
190
|
-
d = json.loads(line)
|
|
191
|
-
except json.JSONDecodeError:
|
|
192
|
-
continue
|
|
193
|
-
|
|
194
|
-
item_type = d.get("type")
|
|
195
|
-
payload = d.get("payload", {})
|
|
196
|
-
|
|
197
|
-
if item_type == "session_meta" and isinstance(payload, dict):
|
|
198
|
-
session_meta = payload
|
|
199
|
-
continue
|
|
200
|
-
|
|
201
|
-
if item_type == "event_msg" and isinstance(payload, dict) and payload.get("type") == "user_message":
|
|
202
|
-
content = str(payload.get("message", "") or "").strip()
|
|
203
|
-
if not content or content.startswith("<environment_context>"):
|
|
204
|
-
continue
|
|
205
|
-
messages.append({
|
|
206
|
-
"role": "user",
|
|
207
|
-
"index": line_no,
|
|
208
|
-
"text": _redact_sensitive(content[:5000]),
|
|
209
|
-
})
|
|
210
|
-
user_msg_count += 1
|
|
211
|
-
continue
|
|
212
|
-
|
|
213
|
-
if item_type == "response_item" and isinstance(payload, dict):
|
|
214
|
-
response_type = payload.get("type")
|
|
215
|
-
role = payload.get("role")
|
|
216
|
-
if response_type == "message" and role == "assistant":
|
|
217
|
-
text_parts = []
|
|
218
|
-
for block in payload.get("content", []) or []:
|
|
219
|
-
if isinstance(block, dict) and block.get("type") == "output_text":
|
|
220
|
-
text_parts.append(str(block.get("text", "")))
|
|
221
|
-
combined = "\n".join(part for part in text_parts if part).strip()
|
|
222
|
-
if combined:
|
|
223
|
-
messages.append({
|
|
224
|
-
"role": "assistant",
|
|
225
|
-
"index": line_no,
|
|
226
|
-
"text": _redact_sensitive(combined[:5000]),
|
|
227
|
-
})
|
|
228
|
-
elif response_type == "function_call":
|
|
229
|
-
tool_uses.append({
|
|
230
|
-
"tool": payload.get("name", ""),
|
|
231
|
-
"input_keys": [],
|
|
232
|
-
"file": _redact_sensitive(str(payload.get("arguments", ""))[:100]),
|
|
233
|
-
})
|
|
234
|
-
|
|
235
|
-
except Exception as e:
|
|
236
|
-
print(f" [collect] Error reading {jsonl_path}: {e}", file=sys.stderr)
|
|
237
|
-
return None
|
|
238
|
-
|
|
239
|
-
if user_msg_count < MIN_USER_MESSAGES:
|
|
240
|
-
return None
|
|
241
|
-
|
|
242
|
-
return {
|
|
243
|
-
"client": "codex",
|
|
244
|
-
"session_file": _session_identifier("codex", jsonl_path.name),
|
|
245
|
-
"display_name": jsonl_path.name,
|
|
246
|
-
"session_path": str(jsonl_path),
|
|
247
|
-
"message_count": len(messages),
|
|
248
|
-
"user_message_count": user_msg_count,
|
|
249
|
-
"tool_use_count": len(tool_uses),
|
|
250
|
-
"messages": messages,
|
|
251
|
-
"tool_uses": tool_uses,
|
|
252
|
-
"source": session_meta.get("source", "codex"),
|
|
253
|
-
"cwd": session_meta.get("cwd", ""),
|
|
254
|
-
"originator": session_meta.get("originator", ""),
|
|
255
|
-
"session_uid": session_meta.get("id", ""),
|
|
256
|
-
}
|
|
83
|
+
return _transcripts.extract_codex_session(jsonl_path)
|
|
257
84
|
|
|
258
85
|
|
|
259
86
|
def collect_transcripts_since(since_iso: str, until_iso: str = "") -> list[dict]:
|
|
@@ -262,28 +89,7 @@ def collect_transcripts_since(since_iso: str, until_iso: str = "") -> list[dict]
|
|
|
262
89
|
Uses a watermark approach: deep sleep tracks the last processed timestamp
|
|
263
90
|
so nothing is missed regardless of when sessions happen (day, night, etc.).
|
|
264
91
|
"""
|
|
265
|
-
|
|
266
|
-
until_dt = datetime.fromisoformat(until_iso) if until_iso else datetime.now()
|
|
267
|
-
|
|
268
|
-
sessions = []
|
|
269
|
-
transcript_files: list[tuple[str, Path]] = [
|
|
270
|
-
("claude_code", path) for path in find_claude_session_files()
|
|
271
|
-
] + [
|
|
272
|
-
("codex", path) for path in find_codex_session_files()
|
|
273
|
-
]
|
|
274
|
-
for client, session_file in transcript_files:
|
|
275
|
-
try:
|
|
276
|
-
mtime = datetime.fromtimestamp(session_file.stat().st_mtime)
|
|
277
|
-
except OSError:
|
|
278
|
-
continue
|
|
279
|
-
if not (since_dt < mtime <= until_dt):
|
|
280
|
-
continue
|
|
281
|
-
session = extract_codex_session(session_file) if client == "codex" else extract_claude_session(session_file)
|
|
282
|
-
if session:
|
|
283
|
-
session["modified"] = mtime.isoformat()
|
|
284
|
-
sessions.append(session)
|
|
285
|
-
sessions.sort(key=lambda s: s["modified"])
|
|
286
|
-
return sessions
|
|
92
|
+
return _transcripts.collect_transcripts_since(since_iso, until_iso)
|
|
287
93
|
|
|
288
94
|
|
|
289
95
|
# ── Database queries ──────────────────────────────────────────────────────
|
package/src/server.py
CHANGED
|
@@ -23,6 +23,15 @@ from tools_hot_context import (
|
|
|
23
23
|
handle_recent_context_resolve,
|
|
24
24
|
handle_hot_context_list,
|
|
25
25
|
)
|
|
26
|
+
from tools_transcripts import (
|
|
27
|
+
handle_transcript_recent,
|
|
28
|
+
handle_transcript_search,
|
|
29
|
+
handle_transcript_read,
|
|
30
|
+
)
|
|
31
|
+
from tools_system_catalog import (
|
|
32
|
+
handle_system_catalog,
|
|
33
|
+
handle_tool_explain,
|
|
34
|
+
)
|
|
26
35
|
from user_context import get_context as _get_ctx
|
|
27
36
|
from tools_coordination import (
|
|
28
37
|
handle_track, handle_untrack, handle_files,
|
|
@@ -209,6 +218,8 @@ mcp = FastMCP(
|
|
|
209
218
|
"- **Delegate:** prefer direct. If needed: `nexo_context_packet(area)` + guard + 'if unsure STOP'\n"
|
|
210
219
|
"- **Memory:** `nexo_recall` searches all. For fresh 24h continuity use `nexo_pre_action_context(query='...')` before acting and "
|
|
211
220
|
"`nexo_recent_context_capture(...)` / `nexo_recent_context_resolve(...)` for important ongoing threads. "
|
|
221
|
+
"If that is not enough, use `nexo_transcript_search(...)` / `nexo_transcript_read(...)` as the raw fallback to full conversations. "
|
|
222
|
+
"Use `nexo_system_catalog(...)` / `nexo_tool_explain(...)` when you need the live map of NEXO itself. "
|
|
212
223
|
"Capture: errors→`nexo_learning_add`, prefs, entities, decisions\n"
|
|
213
224
|
"- **Change log:** `nexo_task_close` should be the default closure path. If you bypass it, call `nexo_change_log(...)` after production edits. NOT for config dir\n"
|
|
214
225
|
"- **Diary:** When user signals end of session (any language, any style — 'bye', 'done', 'cierro', etc.), "
|
|
@@ -380,6 +391,36 @@ def nexo_hot_context_list(hours: int = 24, limit: int = 10, state: str = "") ->
|
|
|
380
391
|
return handle_hot_context_list(hours, limit, state)
|
|
381
392
|
|
|
382
393
|
|
|
394
|
+
@mcp.tool
|
|
395
|
+
def nexo_transcript_recent(hours: int = 24, client: str = "", limit: int = 10) -> str:
|
|
396
|
+
"""List recent Claude Code / Codex transcripts visible to NEXO."""
|
|
397
|
+
return handle_transcript_recent(hours, client, limit)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
@mcp.tool
|
|
401
|
+
def nexo_transcript_search(query: str = "", hours: int = 24, client: str = "", limit: int = 10) -> str:
|
|
402
|
+
"""Search recent transcripts directly when recall/hot-context are not enough."""
|
|
403
|
+
return handle_transcript_search(query, hours, client, limit)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
@mcp.tool
|
|
407
|
+
def nexo_transcript_read(session_ref: str = "", transcript_path: str = "", client: str = "", max_messages: int = 80) -> str:
|
|
408
|
+
"""Read a full transcript fallback by session id, transcript display name, session_uid, or exact path."""
|
|
409
|
+
return handle_transcript_read(session_ref, transcript_path, client, max_messages)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
@mcp.tool
|
|
413
|
+
def nexo_system_catalog(section: str = "", query: str = "", limit: int = 20) -> str:
|
|
414
|
+
"""Read NEXO's live system catalog built from core tools, plugins, skills, scripts, crons, projects, and artifacts."""
|
|
415
|
+
return handle_system_catalog(section, query, limit)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
@mcp.tool
|
|
419
|
+
def nexo_tool_explain(name: str) -> str:
|
|
420
|
+
"""Explain a live NEXO tool/capability from the generated system catalog."""
|
|
421
|
+
return handle_tool_explain(name)
|
|
422
|
+
|
|
423
|
+
|
|
383
424
|
@mcp.tool
|
|
384
425
|
def nexo_smart_startup() -> str:
|
|
385
426
|
"""Pre-load relevant cognitive memories based on pending followups, due reminders, and last session topics.
|
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
"""Live system catalog / ontology derived from canonical NEXO sources."""
|
|
3
|
+
|
|
4
|
+
import ast
|
|
5
|
+
import importlib.util
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from db import get_db, list_skills, sync_skill_directories
|
|
12
|
+
from plugin_loader import PERSONAL_PLUGINS_DIR, PLUGINS_DIR, list_plugins
|
|
13
|
+
from script_registry import list_scripts
|
|
14
|
+
|
|
15
|
+
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
16
|
+
NEXO_CODE = Path(__file__).resolve().parent
|
|
17
|
+
SERVER_PATH = NEXO_CODE / "server.py"
|
|
18
|
+
MANIFEST_PATHS = [NEXO_CODE / "crons" / "manifest.json", NEXO_HOME / "crons" / "manifest.json"]
|
|
19
|
+
ATLAS_PATH = NEXO_HOME / "brain" / "project-atlas.json"
|
|
20
|
+
|
|
21
|
+
SECTION_ORDER = (
|
|
22
|
+
"core_tools",
|
|
23
|
+
"plugin_tools",
|
|
24
|
+
"skills",
|
|
25
|
+
"scripts",
|
|
26
|
+
"crons",
|
|
27
|
+
"projects",
|
|
28
|
+
"artifacts",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _normalize_text(text: str | None) -> str:
|
|
33
|
+
return str(text or "").strip().lower()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _tokenize(text: str | None) -> set[str]:
|
|
37
|
+
import re
|
|
38
|
+
normalized = _normalize_text(text)
|
|
39
|
+
return {
|
|
40
|
+
token
|
|
41
|
+
for token in re.findall(r"[a-z0-9][a-z0-9._:-]{1,}", normalized)
|
|
42
|
+
if len(token) >= 3
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _score(query_tokens: set[str], haystack: str) -> float:
|
|
47
|
+
if not query_tokens:
|
|
48
|
+
return 0.0
|
|
49
|
+
haystack_tokens = _tokenize(haystack)
|
|
50
|
+
if not haystack_tokens:
|
|
51
|
+
return 0.0
|
|
52
|
+
overlap = query_tokens & haystack_tokens
|
|
53
|
+
if not overlap:
|
|
54
|
+
return 0.0
|
|
55
|
+
return len(overlap) / max(1, min(len(query_tokens), len(haystack_tokens)))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _truncate(text: str | None, limit: int = 180) -> str:
|
|
59
|
+
clean = str(text or "").strip()
|
|
60
|
+
if len(clean) <= limit:
|
|
61
|
+
return clean
|
|
62
|
+
return clean[: limit - 3] + "..."
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _tool_category(name: str) -> str:
|
|
66
|
+
if name.startswith("nexo_recent_context") or name.startswith("nexo_pre_action_context") or name.startswith("nexo_hot_context"):
|
|
67
|
+
return "recent_memory"
|
|
68
|
+
if name.startswith("nexo_transcript"):
|
|
69
|
+
return "transcripts"
|
|
70
|
+
if name.startswith("nexo_session") or name.startswith("nexo_checkpoint"):
|
|
71
|
+
return "sessions"
|
|
72
|
+
if name.startswith("nexo_followup") or name.startswith("nexo_reminder"):
|
|
73
|
+
return "reminders"
|
|
74
|
+
if name.startswith("nexo_skill"):
|
|
75
|
+
return "skills"
|
|
76
|
+
if name.startswith("nexo_plugin"):
|
|
77
|
+
return "plugins"
|
|
78
|
+
if name.startswith("nexo_goal") or name.startswith("nexo_workflow"):
|
|
79
|
+
return "workflow"
|
|
80
|
+
if name.startswith("nexo_learning"):
|
|
81
|
+
return "learnings"
|
|
82
|
+
if name.startswith("nexo_guard") or name.startswith("nexo_task") or name.startswith("nexo_cortex"):
|
|
83
|
+
return "protocol"
|
|
84
|
+
return "general"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _parse_core_tools() -> list[dict]:
|
|
88
|
+
if not SERVER_PATH.is_file():
|
|
89
|
+
return []
|
|
90
|
+
try:
|
|
91
|
+
tree = ast.parse(SERVER_PATH.read_text())
|
|
92
|
+
except Exception:
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
entries: list[dict] = []
|
|
96
|
+
for node in tree.body:
|
|
97
|
+
if not isinstance(node, ast.FunctionDef):
|
|
98
|
+
continue
|
|
99
|
+
if not any(
|
|
100
|
+
isinstance(dec, ast.Attribute) and getattr(dec.value, "id", "") == "mcp" and dec.attr == "tool"
|
|
101
|
+
for dec in node.decorator_list
|
|
102
|
+
):
|
|
103
|
+
continue
|
|
104
|
+
doc = ast.get_docstring(node) or ""
|
|
105
|
+
first_line = doc.strip().splitlines()[0].strip() if doc.strip() else ""
|
|
106
|
+
entries.append(
|
|
107
|
+
{
|
|
108
|
+
"kind": "core_tool",
|
|
109
|
+
"name": node.name,
|
|
110
|
+
"description": first_line,
|
|
111
|
+
"category": _tool_category(node.name),
|
|
112
|
+
"path": str(SERVER_PATH),
|
|
113
|
+
"line": int(getattr(node, "lineno", 0) or 0),
|
|
114
|
+
"source": "core",
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
return entries
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _plugin_module_tools(filename: str, created_by: str) -> dict[str, str]:
|
|
121
|
+
module_name = f"plugins.{filename[:-3]}"
|
|
122
|
+
module = sys.modules.get(module_name)
|
|
123
|
+
if module is None:
|
|
124
|
+
plugin_dir = PLUGINS_DIR if created_by == "repo" else PERSONAL_PLUGINS_DIR
|
|
125
|
+
path = Path(plugin_dir) / filename
|
|
126
|
+
if not path.is_file():
|
|
127
|
+
return {}
|
|
128
|
+
try:
|
|
129
|
+
spec = importlib.util.spec_from_file_location(module_name, path)
|
|
130
|
+
if spec is None or spec.loader is None:
|
|
131
|
+
return {}
|
|
132
|
+
module = importlib.util.module_from_spec(spec)
|
|
133
|
+
spec.loader.exec_module(module)
|
|
134
|
+
except Exception:
|
|
135
|
+
return {}
|
|
136
|
+
tools = getattr(module, "TOOLS", []) or []
|
|
137
|
+
result: dict[str, str] = {}
|
|
138
|
+
for item in tools:
|
|
139
|
+
try:
|
|
140
|
+
_, name, description = item
|
|
141
|
+
except Exception:
|
|
142
|
+
continue
|
|
143
|
+
result[str(name)] = str(description or "")
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _plugin_entries() -> list[dict]:
|
|
148
|
+
rows = list_plugins()
|
|
149
|
+
entries: list[dict] = []
|
|
150
|
+
for row in rows:
|
|
151
|
+
filename = str(row.get("filename") or "")
|
|
152
|
+
created_by = str(row.get("created_by") or row.get("source") or "repo")
|
|
153
|
+
descriptions = _plugin_module_tools(filename, created_by)
|
|
154
|
+
names = str(row.get("tool_names") or "").split(",")
|
|
155
|
+
for name in [n.strip() for n in names if n.strip()]:
|
|
156
|
+
entries.append(
|
|
157
|
+
{
|
|
158
|
+
"kind": "plugin_tool",
|
|
159
|
+
"name": name,
|
|
160
|
+
"description": descriptions.get(name, ""),
|
|
161
|
+
"plugin": filename,
|
|
162
|
+
"source": created_by,
|
|
163
|
+
"category": _tool_category(name),
|
|
164
|
+
}
|
|
165
|
+
)
|
|
166
|
+
return entries
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _skill_entries() -> list[dict]:
|
|
170
|
+
try:
|
|
171
|
+
sync_skill_directories()
|
|
172
|
+
except Exception:
|
|
173
|
+
pass
|
|
174
|
+
entries: list[dict] = []
|
|
175
|
+
for row in list_skills():
|
|
176
|
+
entries.append(
|
|
177
|
+
{
|
|
178
|
+
"kind": "skill",
|
|
179
|
+
"name": row.get("id", ""),
|
|
180
|
+
"display_name": row.get("name", ""),
|
|
181
|
+
"description": row.get("description", "") or "",
|
|
182
|
+
"source": row.get("source_kind", "") or "",
|
|
183
|
+
"level": row.get("level", "") or "",
|
|
184
|
+
"mode": row.get("mode", "") or "",
|
|
185
|
+
"execution_level": row.get("execution_level", "") or "",
|
|
186
|
+
"trust_score": row.get("trust_score", 0),
|
|
187
|
+
"tags": row.get("tags", "[]"),
|
|
188
|
+
}
|
|
189
|
+
)
|
|
190
|
+
return entries
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _script_entries() -> list[dict]:
|
|
194
|
+
entries: list[dict] = []
|
|
195
|
+
for row in list_scripts(include_core=True):
|
|
196
|
+
entries.append(
|
|
197
|
+
{
|
|
198
|
+
"kind": "script",
|
|
199
|
+
"name": row.get("name", ""),
|
|
200
|
+
"description": row.get("description", "") or "",
|
|
201
|
+
"runtime": row.get("runtime", "") or "",
|
|
202
|
+
"path": row.get("path", "") or "",
|
|
203
|
+
"source": "core" if row.get("core") else "personal",
|
|
204
|
+
"classification": row.get("classification", "") or "",
|
|
205
|
+
"declared_schedule": row.get("declared_schedule", {}) or {},
|
|
206
|
+
}
|
|
207
|
+
)
|
|
208
|
+
return entries
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _cron_entries() -> list[dict]:
|
|
212
|
+
manifest = None
|
|
213
|
+
for path in MANIFEST_PATHS:
|
|
214
|
+
if path.is_file():
|
|
215
|
+
try:
|
|
216
|
+
manifest = json.loads(path.read_text())
|
|
217
|
+
break
|
|
218
|
+
except Exception:
|
|
219
|
+
continue
|
|
220
|
+
if not isinstance(manifest, dict):
|
|
221
|
+
return []
|
|
222
|
+
entries: list[dict] = []
|
|
223
|
+
for cron in manifest.get("crons", []) or []:
|
|
224
|
+
entries.append(
|
|
225
|
+
{
|
|
226
|
+
"kind": "cron",
|
|
227
|
+
"name": cron.get("id", ""),
|
|
228
|
+
"description": cron.get("description", "") or "",
|
|
229
|
+
"script": cron.get("script", "") or "",
|
|
230
|
+
"schedule": cron.get("schedule", {}) or {},
|
|
231
|
+
"optional": bool(cron.get("optional", False)),
|
|
232
|
+
}
|
|
233
|
+
)
|
|
234
|
+
return entries
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _project_entries() -> list[dict]:
|
|
238
|
+
if not ATLAS_PATH.is_file():
|
|
239
|
+
return []
|
|
240
|
+
try:
|
|
241
|
+
payload = json.loads(ATLAS_PATH.read_text())
|
|
242
|
+
except Exception:
|
|
243
|
+
return []
|
|
244
|
+
entries: list[dict] = []
|
|
245
|
+
if isinstance(payload, dict):
|
|
246
|
+
for key, value in payload.items():
|
|
247
|
+
if str(key).startswith("_"):
|
|
248
|
+
continue
|
|
249
|
+
if not isinstance(value, dict):
|
|
250
|
+
continue
|
|
251
|
+
entries.append(
|
|
252
|
+
{
|
|
253
|
+
"kind": "project",
|
|
254
|
+
"name": key,
|
|
255
|
+
"path": value.get("path", "") or "",
|
|
256
|
+
"domain": value.get("domain", "") or "",
|
|
257
|
+
"aliases": value.get("aliases", []) or [],
|
|
258
|
+
"services": value.get("services", {}) or {},
|
|
259
|
+
"plugins": value.get("plugins", "") or value.get("plugin_path", "") or "",
|
|
260
|
+
}
|
|
261
|
+
)
|
|
262
|
+
return entries
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _artifact_entries() -> list[dict]:
|
|
266
|
+
conn = get_db()
|
|
267
|
+
try:
|
|
268
|
+
rows = conn.execute(
|
|
269
|
+
"SELECT canonical_name, kind, domain, state, uri, paths, ports, aliases FROM artifact_registry ORDER BY last_touched_at DESC LIMIT 100"
|
|
270
|
+
).fetchall()
|
|
271
|
+
except Exception:
|
|
272
|
+
return []
|
|
273
|
+
return [
|
|
274
|
+
{
|
|
275
|
+
"kind": "artifact",
|
|
276
|
+
"name": row["canonical_name"],
|
|
277
|
+
"artifact_kind": row["kind"],
|
|
278
|
+
"domain": row["domain"],
|
|
279
|
+
"state": row["state"],
|
|
280
|
+
"uri": row["uri"],
|
|
281
|
+
"paths": row["paths"],
|
|
282
|
+
"ports": row["ports"],
|
|
283
|
+
"aliases": row["aliases"],
|
|
284
|
+
}
|
|
285
|
+
for row in rows
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def build_system_catalog() -> dict:
|
|
290
|
+
catalog = {
|
|
291
|
+
"core_tools": _parse_core_tools(),
|
|
292
|
+
"plugin_tools": _plugin_entries(),
|
|
293
|
+
"skills": _skill_entries(),
|
|
294
|
+
"scripts": _script_entries(),
|
|
295
|
+
"crons": _cron_entries(),
|
|
296
|
+
"projects": _project_entries(),
|
|
297
|
+
"artifacts": _artifact_entries(),
|
|
298
|
+
}
|
|
299
|
+
catalog["summary"] = {
|
|
300
|
+
section: len(catalog.get(section) or [])
|
|
301
|
+
for section in SECTION_ORDER
|
|
302
|
+
}
|
|
303
|
+
return catalog
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def search_system_catalog(query: str, *, section: str = "", limit: int = 20) -> list[dict]:
|
|
307
|
+
catalog = build_system_catalog()
|
|
308
|
+
query_tokens = _tokenize(query)
|
|
309
|
+
sections = [section] if section in SECTION_ORDER else list(SECTION_ORDER)
|
|
310
|
+
matches: list[dict] = []
|
|
311
|
+
for section_name in sections:
|
|
312
|
+
for entry in catalog.get(section_name) or []:
|
|
313
|
+
haystack = " ".join(
|
|
314
|
+
[
|
|
315
|
+
section_name,
|
|
316
|
+
str(entry.get("name", "") or ""),
|
|
317
|
+
str(entry.get("display_name", "") or ""),
|
|
318
|
+
str(entry.get("description", "") or ""),
|
|
319
|
+
str(entry.get("source", "") or ""),
|
|
320
|
+
str(entry.get("category", "") or ""),
|
|
321
|
+
str(entry.get("plugin", "") or ""),
|
|
322
|
+
str(entry.get("domain", "") or ""),
|
|
323
|
+
str(entry.get("path", "") or ""),
|
|
324
|
+
json.dumps(entry, ensure_ascii=False),
|
|
325
|
+
]
|
|
326
|
+
)
|
|
327
|
+
score = _score(query_tokens, haystack) if query_tokens else 0.5
|
|
328
|
+
if query_tokens and score <= 0:
|
|
329
|
+
continue
|
|
330
|
+
row = dict(entry)
|
|
331
|
+
row["_section"] = section_name
|
|
332
|
+
row["_score"] = round(score, 4)
|
|
333
|
+
matches.append(row)
|
|
334
|
+
matches.sort(key=lambda row: (row["_score"], row.get("name", "")), reverse=True)
|
|
335
|
+
return matches[: max(1, int(limit or 20))]
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def explain_tool(name: str) -> dict | None:
|
|
339
|
+
clean = _normalize_text(name)
|
|
340
|
+
if not clean:
|
|
341
|
+
return None
|
|
342
|
+
exact = search_system_catalog(clean, limit=200)
|
|
343
|
+
for row in exact:
|
|
344
|
+
if _normalize_text(row.get("name")) == clean:
|
|
345
|
+
return row
|
|
346
|
+
for row in exact:
|
|
347
|
+
if clean in _normalize_text(row.get("name")):
|
|
348
|
+
return row
|
|
349
|
+
return None
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def format_catalog(catalog: dict, *, section: str = "", query: str = "", limit: int = 20) -> str:
|
|
353
|
+
summary = catalog.get("summary") or {}
|
|
354
|
+
if query:
|
|
355
|
+
matches = search_system_catalog(query, section=section, limit=limit)
|
|
356
|
+
if not matches:
|
|
357
|
+
scope = section or "all sections"
|
|
358
|
+
return f"No system-catalog matches for '{query}' in {scope}."
|
|
359
|
+
lines = [f"SYSTEM CATALOG SEARCH — '{query}' ({len(matches)} match(es))"]
|
|
360
|
+
for row in matches:
|
|
361
|
+
label = row.get("_section", "")
|
|
362
|
+
title = row.get("display_name") or row.get("name") or "(unnamed)"
|
|
363
|
+
desc = _truncate(row.get("description") or row.get("path") or row.get("script") or "", 180)
|
|
364
|
+
suffix = f" — {desc}" if desc else ""
|
|
365
|
+
lines.append(f"- [{label}] {title}{suffix}")
|
|
366
|
+
return "\n".join(lines)
|
|
367
|
+
|
|
368
|
+
if section in SECTION_ORDER:
|
|
369
|
+
entries = catalog.get(section) or []
|
|
370
|
+
if not entries:
|
|
371
|
+
return f"SYSTEM CATALOG — {section}: empty"
|
|
372
|
+
lines = [f"SYSTEM CATALOG — {section} ({len(entries)})"]
|
|
373
|
+
for row in entries[: max(1, int(limit or 20))]:
|
|
374
|
+
title = row.get("display_name") or row.get("name") or "(unnamed)"
|
|
375
|
+
desc = _truncate(row.get("description") or row.get("path") or row.get("script") or "", 180)
|
|
376
|
+
suffix = f" — {desc}" if desc else ""
|
|
377
|
+
lines.append(f"- {title}{suffix}")
|
|
378
|
+
return "\n".join(lines)
|
|
379
|
+
|
|
380
|
+
lines = ["SYSTEM CATALOG SUMMARY"]
|
|
381
|
+
for name in SECTION_ORDER:
|
|
382
|
+
lines.append(f"- {name}: {summary.get(name, 0)}")
|
|
383
|
+
return "\n".join(lines)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def format_tool_explanation(entry: dict | None) -> str:
|
|
387
|
+
if not entry:
|
|
388
|
+
return "Tool/capability not found in the live system catalog."
|
|
389
|
+
lines = [
|
|
390
|
+
f"CATALOG ENTRY — {entry.get('name') or entry.get('display_name')}",
|
|
391
|
+
f"Section: {entry.get('_section') or entry.get('kind')}",
|
|
392
|
+
]
|
|
393
|
+
if entry.get("display_name"):
|
|
394
|
+
lines.append(f"Display name: {entry['display_name']}")
|
|
395
|
+
if entry.get("description"):
|
|
396
|
+
lines.append(f"Description: {entry['description']}")
|
|
397
|
+
if entry.get("category"):
|
|
398
|
+
lines.append(f"Category: {entry['category']}")
|
|
399
|
+
if entry.get("source"):
|
|
400
|
+
lines.append(f"Source: {entry['source']}")
|
|
401
|
+
if entry.get("plugin"):
|
|
402
|
+
lines.append(f"Plugin: {entry['plugin']}")
|
|
403
|
+
if entry.get("path"):
|
|
404
|
+
lines.append(f"Path: {entry['path']}")
|
|
405
|
+
if entry.get("line"):
|
|
406
|
+
lines.append(f"Line: {entry['line']}")
|
|
407
|
+
if entry.get("script"):
|
|
408
|
+
lines.append(f"Script: {entry['script']}")
|
|
409
|
+
if entry.get("runtime"):
|
|
410
|
+
lines.append(f"Runtime: {entry['runtime']}")
|
|
411
|
+
if entry.get("level"):
|
|
412
|
+
lines.append(f"Level: {entry['level']}")
|
|
413
|
+
if entry.get("mode"):
|
|
414
|
+
lines.append(f"Mode: {entry['mode']}")
|
|
415
|
+
if entry.get("execution_level"):
|
|
416
|
+
lines.append(f"Execution level: {entry['execution_level']}")
|
|
417
|
+
if entry.get("domain"):
|
|
418
|
+
lines.append(f"Domain: {entry['domain']}")
|
|
419
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Public MCP tools for the live NEXO system catalog / ontology."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from system_catalog import build_system_catalog, explain_tool, format_catalog, format_tool_explanation
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def handle_system_catalog(section: str = "", query: str = "", limit: int = 20) -> str:
|
|
9
|
+
catalog = build_system_catalog()
|
|
10
|
+
return format_catalog(
|
|
11
|
+
catalog,
|
|
12
|
+
section=(section or "").strip(),
|
|
13
|
+
query=(query or "").strip(),
|
|
14
|
+
limit=max(1, int(limit or 20)),
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def handle_tool_explain(name: str) -> str:
|
|
19
|
+
return format_tool_explanation(explain_tool(name))
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Public MCP tools for transcript fallback access."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from transcript_utils import (
|
|
6
|
+
clamp_transcript_hours,
|
|
7
|
+
list_recent_transcripts,
|
|
8
|
+
load_transcript,
|
|
9
|
+
search_transcripts,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def handle_transcript_search(query: str = "", hours: int = 24, client: str = "", limit: int = 10) -> str:
|
|
14
|
+
"""Search recent Claude Code / Codex transcripts as a fallback when memory is insufficient."""
|
|
15
|
+
window = clamp_transcript_hours(hours)
|
|
16
|
+
rows = search_transcripts(query or "", hours=window, client=(client or "").strip(), limit=limit)
|
|
17
|
+
if not rows:
|
|
18
|
+
scope = f"query='{query}'" if query else "recent transcripts"
|
|
19
|
+
return f"No transcript matches for {scope} in the last {window}h."
|
|
20
|
+
|
|
21
|
+
lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h"]
|
|
22
|
+
for item in rows:
|
|
23
|
+
lines.append(
|
|
24
|
+
f"- {item.get('session_file')}: [{item.get('client')}] {item.get('display_name')} "
|
|
25
|
+
f"(modified={item.get('modified')}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
|
|
26
|
+
)
|
|
27
|
+
if item.get("cwd"):
|
|
28
|
+
lines.append(f" cwd: {item['cwd']}")
|
|
29
|
+
if item.get("session_uid"):
|
|
30
|
+
lines.append(f" session_uid: {item['session_uid']}")
|
|
31
|
+
for snippet in item.get("matched_messages") or []:
|
|
32
|
+
lines.append(
|
|
33
|
+
f" [{snippet.get('role')}#{snippet.get('index')}] {snippet.get('snippet')}"
|
|
34
|
+
)
|
|
35
|
+
return "\n".join(lines)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def handle_transcript_recent(hours: int = 24, client: str = "", limit: int = 10) -> str:
|
|
39
|
+
"""List recent transcripts without searching full text."""
|
|
40
|
+
window = clamp_transcript_hours(hours)
|
|
41
|
+
rows = list_recent_transcripts(hours=window, client=(client or "").strip(), limit=limit)
|
|
42
|
+
if not rows:
|
|
43
|
+
return f"No transcripts found in the last {window}h."
|
|
44
|
+
|
|
45
|
+
lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h"]
|
|
46
|
+
for item in rows:
|
|
47
|
+
lines.append(
|
|
48
|
+
f"- {item.get('session_file')}: [{item.get('client')}] {item.get('display_name')} "
|
|
49
|
+
f"(modified={item.get('modified')}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
|
|
50
|
+
)
|
|
51
|
+
return "\n".join(lines)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def handle_transcript_read(
|
|
55
|
+
session_ref: str = "",
|
|
56
|
+
transcript_path: str = "",
|
|
57
|
+
client: str = "",
|
|
58
|
+
max_messages: int = 80,
|
|
59
|
+
) -> str:
|
|
60
|
+
"""Read a transcript in fallback mode. Accepts session_file, display name, session_uid or exact path."""
|
|
61
|
+
transcript = load_transcript(
|
|
62
|
+
session_ref=(session_ref or "").strip(),
|
|
63
|
+
transcript_path=(transcript_path or "").strip(),
|
|
64
|
+
client=(client or "").strip(),
|
|
65
|
+
)
|
|
66
|
+
if not transcript:
|
|
67
|
+
target = session_ref or transcript_path or "(empty ref)"
|
|
68
|
+
return f"Transcript not found for {target}."
|
|
69
|
+
|
|
70
|
+
limit = max(1, min(int(max_messages or 80), 200))
|
|
71
|
+
messages = transcript.get("messages") or []
|
|
72
|
+
truncated = len(messages) > limit
|
|
73
|
+
visible = messages[-limit:] if truncated else messages
|
|
74
|
+
|
|
75
|
+
lines = [
|
|
76
|
+
f"TRANSCRIPT {transcript.get('session_file')}",
|
|
77
|
+
f"Client: {transcript.get('client')}",
|
|
78
|
+
f"Display: {transcript.get('display_name')}",
|
|
79
|
+
f"Path: {transcript.get('session_path')}",
|
|
80
|
+
f"Modified: {transcript.get('modified')}",
|
|
81
|
+
f"Messages: {transcript.get('message_count')} (user={transcript.get('user_message_count')}, tools={transcript.get('tool_use_count')})",
|
|
82
|
+
]
|
|
83
|
+
if transcript.get("cwd"):
|
|
84
|
+
lines.append(f"CWD: {transcript.get('cwd')}")
|
|
85
|
+
if transcript.get("session_uid"):
|
|
86
|
+
lines.append(f"Session UID: {transcript.get('session_uid')}")
|
|
87
|
+
if truncated:
|
|
88
|
+
lines.append(f"Showing last {limit} messages.")
|
|
89
|
+
|
|
90
|
+
for message in visible:
|
|
91
|
+
role = str(message.get("role") or "?").upper()
|
|
92
|
+
index = message.get("index", "?")
|
|
93
|
+
text = str(message.get("text") or "").strip()
|
|
94
|
+
lines.append("")
|
|
95
|
+
lines.append(f"[{role} #{index}]")
|
|
96
|
+
lines.append(text)
|
|
97
|
+
|
|
98
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
"""Shared transcript helpers for Deep Sleep and public MCP fallback tools."""
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import unicodedata
|
|
8
|
+
from datetime import datetime, timedelta
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
MIN_USER_MESSAGES = 3
|
|
12
|
+
DEFAULT_TRANSCRIPT_HOURS = 24
|
|
13
|
+
MAX_TRANSCRIPT_HOURS = 30 * 24
|
|
14
|
+
|
|
15
|
+
_SENSITIVE_PATTERNS = re.compile(
|
|
16
|
+
r'(?:'
|
|
17
|
+
r'sk-ant-[A-Za-z0-9_-]+'
|
|
18
|
+
r'|shpat_[A-Fa-f0-9]+'
|
|
19
|
+
r'|shpss_[A-Fa-f0-9]+'
|
|
20
|
+
r'|sk-[A-Za-z0-9]{20,}'
|
|
21
|
+
r'|ghp_[A-Za-z0-9]{36,}'
|
|
22
|
+
r'|gho_[A-Za-z0-9]{36,}'
|
|
23
|
+
r'|AIza[A-Za-z0-9_-]{35}'
|
|
24
|
+
r'|ya29\.[A-Za-z0-9_-]+'
|
|
25
|
+
r'|xox[bpsa]-[A-Za-z0-9-]+'
|
|
26
|
+
r'|EAAG[A-Za-z0-9]+'
|
|
27
|
+
r'|[Pp]assword\s*[:=]\s*\S+'
|
|
28
|
+
r'|[Ss]ecret\s*[:=]\s*\S+'
|
|
29
|
+
r'|[Tt]oken\s*[:=]\s*\S+'
|
|
30
|
+
r'|[Aa]pi[_-]?[Kk]ey\s*[:=]\s*\S+'
|
|
31
|
+
r')'
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _redact_sensitive(text: str) -> str:
|
|
36
|
+
return _SENSITIVE_PATTERNS.sub("[REDACTED]", text)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _normalize_text(text: str | None) -> str:
|
|
40
|
+
if not text:
|
|
41
|
+
return ""
|
|
42
|
+
normalized = unicodedata.normalize("NFKD", str(text))
|
|
43
|
+
ascii_text = normalized.encode("ascii", "ignore").decode("ascii")
|
|
44
|
+
return ascii_text.lower()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _tokenize(text: str | None) -> set[str]:
|
|
48
|
+
normalized = _normalize_text(text)
|
|
49
|
+
return {
|
|
50
|
+
token
|
|
51
|
+
for token in re.findall(r"[a-z0-9][a-z0-9._:-]{1,}", normalized)
|
|
52
|
+
if len(token) >= 3
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _score_text_match(query_tokens: set[str], haystack: str) -> float:
|
|
57
|
+
if not query_tokens:
|
|
58
|
+
return 0.0
|
|
59
|
+
haystack_tokens = _tokenize(haystack)
|
|
60
|
+
if not haystack_tokens:
|
|
61
|
+
return 0.0
|
|
62
|
+
intersection = query_tokens & haystack_tokens
|
|
63
|
+
if not intersection:
|
|
64
|
+
return 0.0
|
|
65
|
+
smaller = min(len(query_tokens), len(haystack_tokens))
|
|
66
|
+
return len(intersection) / max(1, smaller)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _truncate(text: str | None, limit: int = 240) -> str:
|
|
70
|
+
if not text:
|
|
71
|
+
return ""
|
|
72
|
+
clean = str(text).strip()
|
|
73
|
+
return clean if len(clean) <= limit else clean[: limit - 3] + "..."
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _session_identifier(client: str, session_file: str) -> str:
|
|
77
|
+
return f"{client}:{session_file}"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _claude_root() -> Path:
|
|
81
|
+
return Path.home() / ".claude" / "projects"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _codex_roots() -> list[Path]:
|
|
85
|
+
return [
|
|
86
|
+
Path.home() / ".codex" / "sessions",
|
|
87
|
+
Path.home() / ".codex" / "archived_sessions",
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def clamp_transcript_hours(hours: int | float | str | None) -> int:
|
|
92
|
+
try:
|
|
93
|
+
value = int(float(hours or DEFAULT_TRANSCRIPT_HOURS))
|
|
94
|
+
except Exception:
|
|
95
|
+
value = DEFAULT_TRANSCRIPT_HOURS
|
|
96
|
+
return max(1, min(value, MAX_TRANSCRIPT_HOURS))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def find_claude_session_files() -> list[Path]:
|
|
100
|
+
claude_dir = _claude_root()
|
|
101
|
+
if not claude_dir.exists():
|
|
102
|
+
return []
|
|
103
|
+
return sorted(claude_dir.rglob("*.jsonl"))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def find_codex_session_files() -> list[Path]:
|
|
107
|
+
files: list[Path] = []
|
|
108
|
+
seen: set[str] = set()
|
|
109
|
+
for root in _codex_roots():
|
|
110
|
+
if not root.exists():
|
|
111
|
+
continue
|
|
112
|
+
for jsonl in sorted(root.rglob("*.jsonl")):
|
|
113
|
+
key = jsonl.name
|
|
114
|
+
if key in seen:
|
|
115
|
+
continue
|
|
116
|
+
seen.add(key)
|
|
117
|
+
files.append(jsonl)
|
|
118
|
+
return files
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def extract_claude_session(jsonl_path: Path) -> dict | None:
|
|
122
|
+
messages = []
|
|
123
|
+
tool_uses = []
|
|
124
|
+
user_msg_count = 0
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
with open(jsonl_path, "r") as f:
|
|
128
|
+
for line_no, line in enumerate(f, 1):
|
|
129
|
+
line = line.strip()
|
|
130
|
+
if not line:
|
|
131
|
+
continue
|
|
132
|
+
try:
|
|
133
|
+
payload = json.loads(line)
|
|
134
|
+
except json.JSONDecodeError:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
msg_type = payload.get("type")
|
|
138
|
+
if msg_type == "user":
|
|
139
|
+
content = payload.get("message", {}).get("content", "")
|
|
140
|
+
if isinstance(content, str) and content.strip():
|
|
141
|
+
if content.startswith("<system-reminder>"):
|
|
142
|
+
continue
|
|
143
|
+
messages.append(
|
|
144
|
+
{
|
|
145
|
+
"role": "user",
|
|
146
|
+
"index": line_no,
|
|
147
|
+
"text": _redact_sensitive(content[:5000]),
|
|
148
|
+
"uuid": payload.get("uuid", ""),
|
|
149
|
+
}
|
|
150
|
+
)
|
|
151
|
+
user_msg_count += 1
|
|
152
|
+
elif msg_type in ("message", "assistant"):
|
|
153
|
+
msg = payload.get("message", {})
|
|
154
|
+
content_blocks = msg.get("content", [])
|
|
155
|
+
text_parts = []
|
|
156
|
+
for block in content_blocks:
|
|
157
|
+
if not isinstance(block, dict):
|
|
158
|
+
continue
|
|
159
|
+
if block.get("type") == "text":
|
|
160
|
+
text_parts.append(block.get("text", ""))
|
|
161
|
+
elif block.get("type") == "tool_use":
|
|
162
|
+
tool_input = block.get("input", {})
|
|
163
|
+
raw_file = (
|
|
164
|
+
tool_input.get("file_path", "")
|
|
165
|
+
or str(tool_input.get("command", ""))[:100]
|
|
166
|
+
) if isinstance(tool_input, dict) else ""
|
|
167
|
+
tool_uses.append(
|
|
168
|
+
{
|
|
169
|
+
"tool": block.get("name", ""),
|
|
170
|
+
"input_keys": list(tool_input.keys()) if isinstance(tool_input, dict) else [],
|
|
171
|
+
"file": _redact_sensitive(raw_file),
|
|
172
|
+
}
|
|
173
|
+
)
|
|
174
|
+
combined = "\n".join(part for part in text_parts if part).strip()
|
|
175
|
+
if combined:
|
|
176
|
+
messages.append(
|
|
177
|
+
{
|
|
178
|
+
"role": "assistant",
|
|
179
|
+
"index": line_no,
|
|
180
|
+
"text": _redact_sensitive(combined[:5000]),
|
|
181
|
+
}
|
|
182
|
+
)
|
|
183
|
+
except Exception:
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
if user_msg_count < MIN_USER_MESSAGES:
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"client": "claude_code",
|
|
191
|
+
"session_file": _session_identifier("claude_code", jsonl_path.name),
|
|
192
|
+
"display_name": jsonl_path.name,
|
|
193
|
+
"session_path": str(jsonl_path),
|
|
194
|
+
"message_count": len(messages),
|
|
195
|
+
"user_message_count": user_msg_count,
|
|
196
|
+
"tool_use_count": len(tool_uses),
|
|
197
|
+
"messages": messages,
|
|
198
|
+
"tool_uses": tool_uses,
|
|
199
|
+
"source": "claude_projects",
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def extract_codex_session(jsonl_path: Path) -> dict | None:
|
|
204
|
+
messages = []
|
|
205
|
+
tool_uses = []
|
|
206
|
+
user_msg_count = 0
|
|
207
|
+
session_meta: dict = {}
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
with open(jsonl_path, "r") as f:
|
|
211
|
+
for line_no, line in enumerate(f, 1):
|
|
212
|
+
line = line.strip()
|
|
213
|
+
if not line:
|
|
214
|
+
continue
|
|
215
|
+
try:
|
|
216
|
+
payload = json.loads(line)
|
|
217
|
+
except json.JSONDecodeError:
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
item_type = payload.get("type")
|
|
221
|
+
data = payload.get("payload", {})
|
|
222
|
+
|
|
223
|
+
if item_type == "session_meta" and isinstance(data, dict):
|
|
224
|
+
session_meta = data
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
if item_type == "event_msg" and isinstance(data, dict) and data.get("type") == "user_message":
|
|
228
|
+
content = str(data.get("message", "") or "").strip()
|
|
229
|
+
if not content or content.startswith("<environment_context>"):
|
|
230
|
+
continue
|
|
231
|
+
messages.append(
|
|
232
|
+
{
|
|
233
|
+
"role": "user",
|
|
234
|
+
"index": line_no,
|
|
235
|
+
"text": _redact_sensitive(content[:5000]),
|
|
236
|
+
}
|
|
237
|
+
)
|
|
238
|
+
user_msg_count += 1
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
if item_type == "response_item" and isinstance(data, dict):
|
|
242
|
+
response_type = data.get("type")
|
|
243
|
+
role = data.get("role")
|
|
244
|
+
if response_type == "message" and role == "assistant":
|
|
245
|
+
text_parts = []
|
|
246
|
+
for block in data.get("content", []) or []:
|
|
247
|
+
if isinstance(block, dict) and block.get("type") == "output_text":
|
|
248
|
+
text_parts.append(str(block.get("text", "")))
|
|
249
|
+
combined = "\n".join(part for part in text_parts if part).strip()
|
|
250
|
+
if combined:
|
|
251
|
+
messages.append(
|
|
252
|
+
{
|
|
253
|
+
"role": "assistant",
|
|
254
|
+
"index": line_no,
|
|
255
|
+
"text": _redact_sensitive(combined[:5000]),
|
|
256
|
+
}
|
|
257
|
+
)
|
|
258
|
+
elif response_type == "function_call":
|
|
259
|
+
tool_uses.append(
|
|
260
|
+
{
|
|
261
|
+
"tool": data.get("name", ""),
|
|
262
|
+
"input_keys": [],
|
|
263
|
+
"file": _redact_sensitive(str(data.get("arguments", ""))[:100]),
|
|
264
|
+
}
|
|
265
|
+
)
|
|
266
|
+
except Exception:
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
if user_msg_count < MIN_USER_MESSAGES:
|
|
270
|
+
return None
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
"client": "codex",
|
|
274
|
+
"session_file": _session_identifier("codex", jsonl_path.name),
|
|
275
|
+
"display_name": jsonl_path.name,
|
|
276
|
+
"session_path": str(jsonl_path),
|
|
277
|
+
"message_count": len(messages),
|
|
278
|
+
"user_message_count": user_msg_count,
|
|
279
|
+
"tool_use_count": len(tool_uses),
|
|
280
|
+
"messages": messages,
|
|
281
|
+
"tool_uses": tool_uses,
|
|
282
|
+
"source": session_meta.get("source", "codex"),
|
|
283
|
+
"cwd": session_meta.get("cwd", ""),
|
|
284
|
+
"originator": session_meta.get("originator", ""),
|
|
285
|
+
"session_uid": session_meta.get("id", ""),
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def collect_transcripts_since(since_iso: str, until_iso: str = "") -> list[dict]:
|
|
290
|
+
since_dt = datetime.fromisoformat(since_iso)
|
|
291
|
+
until_dt = datetime.fromisoformat(until_iso) if until_iso else datetime.now()
|
|
292
|
+
sessions = []
|
|
293
|
+
transcript_files: list[tuple[str, Path]] = [
|
|
294
|
+
("claude_code", path) for path in find_claude_session_files()
|
|
295
|
+
] + [
|
|
296
|
+
("codex", path) for path in find_codex_session_files()
|
|
297
|
+
]
|
|
298
|
+
for client, session_file in transcript_files:
|
|
299
|
+
try:
|
|
300
|
+
mtime = datetime.fromtimestamp(session_file.stat().st_mtime)
|
|
301
|
+
except OSError:
|
|
302
|
+
continue
|
|
303
|
+
if not (since_dt < mtime <= until_dt):
|
|
304
|
+
continue
|
|
305
|
+
session = extract_codex_session(session_file) if client == "codex" else extract_claude_session(session_file)
|
|
306
|
+
if session:
|
|
307
|
+
session["modified"] = mtime.isoformat()
|
|
308
|
+
sessions.append(session)
|
|
309
|
+
sessions.sort(key=lambda row: row["modified"])
|
|
310
|
+
return sessions
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def list_recent_transcripts(hours: int = DEFAULT_TRANSCRIPT_HOURS, client: str = "", limit: int = 10) -> list[dict]:
|
|
314
|
+
window = clamp_transcript_hours(hours)
|
|
315
|
+
since = datetime.now() - timedelta(hours=window)
|
|
316
|
+
sessions = collect_transcripts_since(since.isoformat())
|
|
317
|
+
filtered = []
|
|
318
|
+
for item in sessions:
|
|
319
|
+
if client and item.get("client") != client:
|
|
320
|
+
continue
|
|
321
|
+
filtered.append(item)
|
|
322
|
+
filtered.sort(key=lambda row: row.get("modified", ""), reverse=True)
|
|
323
|
+
return filtered[: max(1, int(limit or 10))]
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, client: str = "", limit: int = 10) -> list[dict]:
|
|
327
|
+
rows = list_recent_transcripts(hours=hours, client=client, limit=200)
|
|
328
|
+
query_tokens = _tokenize(query)
|
|
329
|
+
if not query_tokens:
|
|
330
|
+
return rows[: max(1, int(limit or 10))]
|
|
331
|
+
|
|
332
|
+
matches: list[dict] = []
|
|
333
|
+
cutoff_seconds = clamp_transcript_hours(hours) * 3600
|
|
334
|
+
now = datetime.now().timestamp()
|
|
335
|
+
for item in rows:
|
|
336
|
+
snippets = []
|
|
337
|
+
best_score = 0.0
|
|
338
|
+
for message in item.get("messages") or []:
|
|
339
|
+
text = str(message.get("text", "") or "")
|
|
340
|
+
score = _score_text_match(query_tokens, text)
|
|
341
|
+
if score <= 0:
|
|
342
|
+
continue
|
|
343
|
+
best_score = max(best_score, score)
|
|
344
|
+
snippets.append(
|
|
345
|
+
{
|
|
346
|
+
"role": message.get("role", ""),
|
|
347
|
+
"index": message.get("index", 0),
|
|
348
|
+
"snippet": _truncate(text, 220),
|
|
349
|
+
"score": round(score, 4),
|
|
350
|
+
}
|
|
351
|
+
)
|
|
352
|
+
meta_text = " ".join(
|
|
353
|
+
[
|
|
354
|
+
str(item.get("display_name", "") or ""),
|
|
355
|
+
str(item.get("session_file", "") or ""),
|
|
356
|
+
str(item.get("source", "") or ""),
|
|
357
|
+
str(item.get("cwd", "") or ""),
|
|
358
|
+
]
|
|
359
|
+
)
|
|
360
|
+
meta_score = _score_text_match(query_tokens, meta_text)
|
|
361
|
+
best_score = max(best_score, meta_score)
|
|
362
|
+
if best_score <= 0:
|
|
363
|
+
continue
|
|
364
|
+
modified = item.get("modified", "")
|
|
365
|
+
try:
|
|
366
|
+
modified_ts = datetime.fromisoformat(modified).timestamp()
|
|
367
|
+
except Exception:
|
|
368
|
+
modified_ts = now
|
|
369
|
+
recency = max(0.0, 1.0 - ((now - modified_ts) / max(1, cutoff_seconds)))
|
|
370
|
+
item["_score"] = round(best_score + recency * 0.35, 4)
|
|
371
|
+
item["matched_messages"] = sorted(snippets, key=lambda row: row["score"], reverse=True)[:3]
|
|
372
|
+
matches.append(item)
|
|
373
|
+
|
|
374
|
+
matches.sort(key=lambda row: (row.get("_score", 0), row.get("modified", "")), reverse=True)
|
|
375
|
+
return matches[: max(1, int(limit or 10))]
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def load_transcript(session_ref: str = "", transcript_path: str = "", client: str = "") -> dict | None:
|
|
379
|
+
ref = str(session_ref or "").strip()
|
|
380
|
+
path_ref = str(transcript_path or "").strip()
|
|
381
|
+
|
|
382
|
+
transcript_files: list[tuple[str, Path]] = [
|
|
383
|
+
("claude_code", path) for path in find_claude_session_files()
|
|
384
|
+
] + [
|
|
385
|
+
("codex", path) for path in find_codex_session_files()
|
|
386
|
+
]
|
|
387
|
+
for detected_client, path in transcript_files:
|
|
388
|
+
if client and detected_client != client:
|
|
389
|
+
continue
|
|
390
|
+
if path_ref:
|
|
391
|
+
try:
|
|
392
|
+
if Path(path_ref).expanduser().resolve() != path.resolve():
|
|
393
|
+
continue
|
|
394
|
+
except Exception:
|
|
395
|
+
continue
|
|
396
|
+
session = extract_codex_session(path) if detected_client == "codex" else extract_claude_session(path)
|
|
397
|
+
if not session:
|
|
398
|
+
continue
|
|
399
|
+
if ref:
|
|
400
|
+
if ref not in {
|
|
401
|
+
str(session.get("session_file", "")),
|
|
402
|
+
str(session.get("display_name", "")),
|
|
403
|
+
str(session.get("session_uid", "")),
|
|
404
|
+
str(path),
|
|
405
|
+
}:
|
|
406
|
+
continue
|
|
407
|
+
try:
|
|
408
|
+
session["modified"] = datetime.fromtimestamp(path.stat().st_mtime).isoformat()
|
|
409
|
+
except OSError:
|
|
410
|
+
session["modified"] = ""
|
|
411
|
+
return session
|
|
412
|
+
return None
|