nexo-brain 3.1.8 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +21 -0
- package/package.json +1 -1
- package/src/auto_update.py +27 -30
- package/src/scripts/deep-sleep/collect.py +6 -200
- package/src/server.py +41 -0
- package/src/system_catalog.py +419 -0
- package/src/tools_system_catalog.py +19 -0
- package/src/tools_transcripts.py +98 -0
- package/src/transcript_utils.py +412 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
"""Shared transcript helpers for Deep Sleep and public MCP fallback tools."""
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import unicodedata
|
|
8
|
+
from datetime import datetime, timedelta
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
MIN_USER_MESSAGES = 3
|
|
12
|
+
DEFAULT_TRANSCRIPT_HOURS = 24
|
|
13
|
+
MAX_TRANSCRIPT_HOURS = 30 * 24
|
|
14
|
+
|
|
15
|
+
_SENSITIVE_PATTERNS = re.compile(
|
|
16
|
+
r'(?:'
|
|
17
|
+
r'sk-ant-[A-Za-z0-9_-]+'
|
|
18
|
+
r'|shpat_[A-Fa-f0-9]+'
|
|
19
|
+
r'|shpss_[A-Fa-f0-9]+'
|
|
20
|
+
r'|sk-[A-Za-z0-9]{20,}'
|
|
21
|
+
r'|ghp_[A-Za-z0-9]{36,}'
|
|
22
|
+
r'|gho_[A-Za-z0-9]{36,}'
|
|
23
|
+
r'|AIza[A-Za-z0-9_-]{35}'
|
|
24
|
+
r'|ya29\.[A-Za-z0-9_-]+'
|
|
25
|
+
r'|xox[bpsa]-[A-Za-z0-9-]+'
|
|
26
|
+
r'|EAAG[A-Za-z0-9]+'
|
|
27
|
+
r'|[Pp]assword\s*[:=]\s*\S+'
|
|
28
|
+
r'|[Ss]ecret\s*[:=]\s*\S+'
|
|
29
|
+
r'|[Tt]oken\s*[:=]\s*\S+'
|
|
30
|
+
r'|[Aa]pi[_-]?[Kk]ey\s*[:=]\s*\S+'
|
|
31
|
+
r')'
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _redact_sensitive(text: str) -> str:
|
|
36
|
+
return _SENSITIVE_PATTERNS.sub("[REDACTED]", text)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _normalize_text(text: str | None) -> str:
|
|
40
|
+
if not text:
|
|
41
|
+
return ""
|
|
42
|
+
normalized = unicodedata.normalize("NFKD", str(text))
|
|
43
|
+
ascii_text = normalized.encode("ascii", "ignore").decode("ascii")
|
|
44
|
+
return ascii_text.lower()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _tokenize(text: str | None) -> set[str]:
|
|
48
|
+
normalized = _normalize_text(text)
|
|
49
|
+
return {
|
|
50
|
+
token
|
|
51
|
+
for token in re.findall(r"[a-z0-9][a-z0-9._:-]{1,}", normalized)
|
|
52
|
+
if len(token) >= 3
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _score_text_match(query_tokens: set[str], haystack: str) -> float:
|
|
57
|
+
if not query_tokens:
|
|
58
|
+
return 0.0
|
|
59
|
+
haystack_tokens = _tokenize(haystack)
|
|
60
|
+
if not haystack_tokens:
|
|
61
|
+
return 0.0
|
|
62
|
+
intersection = query_tokens & haystack_tokens
|
|
63
|
+
if not intersection:
|
|
64
|
+
return 0.0
|
|
65
|
+
smaller = min(len(query_tokens), len(haystack_tokens))
|
|
66
|
+
return len(intersection) / max(1, smaller)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _truncate(text: str | None, limit: int = 240) -> str:
|
|
70
|
+
if not text:
|
|
71
|
+
return ""
|
|
72
|
+
clean = str(text).strip()
|
|
73
|
+
return clean if len(clean) <= limit else clean[: limit - 3] + "..."
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _session_identifier(client: str, session_file: str) -> str:
|
|
77
|
+
return f"{client}:{session_file}"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _claude_root() -> Path:
|
|
81
|
+
return Path.home() / ".claude" / "projects"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _codex_roots() -> list[Path]:
|
|
85
|
+
return [
|
|
86
|
+
Path.home() / ".codex" / "sessions",
|
|
87
|
+
Path.home() / ".codex" / "archived_sessions",
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def clamp_transcript_hours(hours: int | float | str | None) -> int:
|
|
92
|
+
try:
|
|
93
|
+
value = int(float(hours or DEFAULT_TRANSCRIPT_HOURS))
|
|
94
|
+
except Exception:
|
|
95
|
+
value = DEFAULT_TRANSCRIPT_HOURS
|
|
96
|
+
return max(1, min(value, MAX_TRANSCRIPT_HOURS))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def find_claude_session_files() -> list[Path]:
|
|
100
|
+
claude_dir = _claude_root()
|
|
101
|
+
if not claude_dir.exists():
|
|
102
|
+
return []
|
|
103
|
+
return sorted(claude_dir.rglob("*.jsonl"))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def find_codex_session_files() -> list[Path]:
|
|
107
|
+
files: list[Path] = []
|
|
108
|
+
seen: set[str] = set()
|
|
109
|
+
for root in _codex_roots():
|
|
110
|
+
if not root.exists():
|
|
111
|
+
continue
|
|
112
|
+
for jsonl in sorted(root.rglob("*.jsonl")):
|
|
113
|
+
key = jsonl.name
|
|
114
|
+
if key in seen:
|
|
115
|
+
continue
|
|
116
|
+
seen.add(key)
|
|
117
|
+
files.append(jsonl)
|
|
118
|
+
return files
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def extract_claude_session(jsonl_path: Path) -> dict | None:
|
|
122
|
+
messages = []
|
|
123
|
+
tool_uses = []
|
|
124
|
+
user_msg_count = 0
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
with open(jsonl_path, "r") as f:
|
|
128
|
+
for line_no, line in enumerate(f, 1):
|
|
129
|
+
line = line.strip()
|
|
130
|
+
if not line:
|
|
131
|
+
continue
|
|
132
|
+
try:
|
|
133
|
+
payload = json.loads(line)
|
|
134
|
+
except json.JSONDecodeError:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
msg_type = payload.get("type")
|
|
138
|
+
if msg_type == "user":
|
|
139
|
+
content = payload.get("message", {}).get("content", "")
|
|
140
|
+
if isinstance(content, str) and content.strip():
|
|
141
|
+
if content.startswith("<system-reminder>"):
|
|
142
|
+
continue
|
|
143
|
+
messages.append(
|
|
144
|
+
{
|
|
145
|
+
"role": "user",
|
|
146
|
+
"index": line_no,
|
|
147
|
+
"text": _redact_sensitive(content[:5000]),
|
|
148
|
+
"uuid": payload.get("uuid", ""),
|
|
149
|
+
}
|
|
150
|
+
)
|
|
151
|
+
user_msg_count += 1
|
|
152
|
+
elif msg_type in ("message", "assistant"):
|
|
153
|
+
msg = payload.get("message", {})
|
|
154
|
+
content_blocks = msg.get("content", [])
|
|
155
|
+
text_parts = []
|
|
156
|
+
for block in content_blocks:
|
|
157
|
+
if not isinstance(block, dict):
|
|
158
|
+
continue
|
|
159
|
+
if block.get("type") == "text":
|
|
160
|
+
text_parts.append(block.get("text", ""))
|
|
161
|
+
elif block.get("type") == "tool_use":
|
|
162
|
+
tool_input = block.get("input", {})
|
|
163
|
+
raw_file = (
|
|
164
|
+
tool_input.get("file_path", "")
|
|
165
|
+
or str(tool_input.get("command", ""))[:100]
|
|
166
|
+
) if isinstance(tool_input, dict) else ""
|
|
167
|
+
tool_uses.append(
|
|
168
|
+
{
|
|
169
|
+
"tool": block.get("name", ""),
|
|
170
|
+
"input_keys": list(tool_input.keys()) if isinstance(tool_input, dict) else [],
|
|
171
|
+
"file": _redact_sensitive(raw_file),
|
|
172
|
+
}
|
|
173
|
+
)
|
|
174
|
+
combined = "\n".join(part for part in text_parts if part).strip()
|
|
175
|
+
if combined:
|
|
176
|
+
messages.append(
|
|
177
|
+
{
|
|
178
|
+
"role": "assistant",
|
|
179
|
+
"index": line_no,
|
|
180
|
+
"text": _redact_sensitive(combined[:5000]),
|
|
181
|
+
}
|
|
182
|
+
)
|
|
183
|
+
except Exception:
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
if user_msg_count < MIN_USER_MESSAGES:
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"client": "claude_code",
|
|
191
|
+
"session_file": _session_identifier("claude_code", jsonl_path.name),
|
|
192
|
+
"display_name": jsonl_path.name,
|
|
193
|
+
"session_path": str(jsonl_path),
|
|
194
|
+
"message_count": len(messages),
|
|
195
|
+
"user_message_count": user_msg_count,
|
|
196
|
+
"tool_use_count": len(tool_uses),
|
|
197
|
+
"messages": messages,
|
|
198
|
+
"tool_uses": tool_uses,
|
|
199
|
+
"source": "claude_projects",
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def extract_codex_session(jsonl_path: Path) -> dict | None:
|
|
204
|
+
messages = []
|
|
205
|
+
tool_uses = []
|
|
206
|
+
user_msg_count = 0
|
|
207
|
+
session_meta: dict = {}
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
with open(jsonl_path, "r") as f:
|
|
211
|
+
for line_no, line in enumerate(f, 1):
|
|
212
|
+
line = line.strip()
|
|
213
|
+
if not line:
|
|
214
|
+
continue
|
|
215
|
+
try:
|
|
216
|
+
payload = json.loads(line)
|
|
217
|
+
except json.JSONDecodeError:
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
item_type = payload.get("type")
|
|
221
|
+
data = payload.get("payload", {})
|
|
222
|
+
|
|
223
|
+
if item_type == "session_meta" and isinstance(data, dict):
|
|
224
|
+
session_meta = data
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
if item_type == "event_msg" and isinstance(data, dict) and data.get("type") == "user_message":
|
|
228
|
+
content = str(data.get("message", "") or "").strip()
|
|
229
|
+
if not content or content.startswith("<environment_context>"):
|
|
230
|
+
continue
|
|
231
|
+
messages.append(
|
|
232
|
+
{
|
|
233
|
+
"role": "user",
|
|
234
|
+
"index": line_no,
|
|
235
|
+
"text": _redact_sensitive(content[:5000]),
|
|
236
|
+
}
|
|
237
|
+
)
|
|
238
|
+
user_msg_count += 1
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
if item_type == "response_item" and isinstance(data, dict):
|
|
242
|
+
response_type = data.get("type")
|
|
243
|
+
role = data.get("role")
|
|
244
|
+
if response_type == "message" and role == "assistant":
|
|
245
|
+
text_parts = []
|
|
246
|
+
for block in data.get("content", []) or []:
|
|
247
|
+
if isinstance(block, dict) and block.get("type") == "output_text":
|
|
248
|
+
text_parts.append(str(block.get("text", "")))
|
|
249
|
+
combined = "\n".join(part for part in text_parts if part).strip()
|
|
250
|
+
if combined:
|
|
251
|
+
messages.append(
|
|
252
|
+
{
|
|
253
|
+
"role": "assistant",
|
|
254
|
+
"index": line_no,
|
|
255
|
+
"text": _redact_sensitive(combined[:5000]),
|
|
256
|
+
}
|
|
257
|
+
)
|
|
258
|
+
elif response_type == "function_call":
|
|
259
|
+
tool_uses.append(
|
|
260
|
+
{
|
|
261
|
+
"tool": data.get("name", ""),
|
|
262
|
+
"input_keys": [],
|
|
263
|
+
"file": _redact_sensitive(str(data.get("arguments", ""))[:100]),
|
|
264
|
+
}
|
|
265
|
+
)
|
|
266
|
+
except Exception:
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
if user_msg_count < MIN_USER_MESSAGES:
|
|
270
|
+
return None
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
"client": "codex",
|
|
274
|
+
"session_file": _session_identifier("codex", jsonl_path.name),
|
|
275
|
+
"display_name": jsonl_path.name,
|
|
276
|
+
"session_path": str(jsonl_path),
|
|
277
|
+
"message_count": len(messages),
|
|
278
|
+
"user_message_count": user_msg_count,
|
|
279
|
+
"tool_use_count": len(tool_uses),
|
|
280
|
+
"messages": messages,
|
|
281
|
+
"tool_uses": tool_uses,
|
|
282
|
+
"source": session_meta.get("source", "codex"),
|
|
283
|
+
"cwd": session_meta.get("cwd", ""),
|
|
284
|
+
"originator": session_meta.get("originator", ""),
|
|
285
|
+
"session_uid": session_meta.get("id", ""),
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def collect_transcripts_since(since_iso: str, until_iso: str = "") -> list[dict]:
|
|
290
|
+
since_dt = datetime.fromisoformat(since_iso)
|
|
291
|
+
until_dt = datetime.fromisoformat(until_iso) if until_iso else datetime.now()
|
|
292
|
+
sessions = []
|
|
293
|
+
transcript_files: list[tuple[str, Path]] = [
|
|
294
|
+
("claude_code", path) for path in find_claude_session_files()
|
|
295
|
+
] + [
|
|
296
|
+
("codex", path) for path in find_codex_session_files()
|
|
297
|
+
]
|
|
298
|
+
for client, session_file in transcript_files:
|
|
299
|
+
try:
|
|
300
|
+
mtime = datetime.fromtimestamp(session_file.stat().st_mtime)
|
|
301
|
+
except OSError:
|
|
302
|
+
continue
|
|
303
|
+
if not (since_dt < mtime <= until_dt):
|
|
304
|
+
continue
|
|
305
|
+
session = extract_codex_session(session_file) if client == "codex" else extract_claude_session(session_file)
|
|
306
|
+
if session:
|
|
307
|
+
session["modified"] = mtime.isoformat()
|
|
308
|
+
sessions.append(session)
|
|
309
|
+
sessions.sort(key=lambda row: row["modified"])
|
|
310
|
+
return sessions
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def list_recent_transcripts(hours: int = DEFAULT_TRANSCRIPT_HOURS, client: str = "", limit: int = 10) -> list[dict]:
|
|
314
|
+
window = clamp_transcript_hours(hours)
|
|
315
|
+
since = datetime.now() - timedelta(hours=window)
|
|
316
|
+
sessions = collect_transcripts_since(since.isoformat())
|
|
317
|
+
filtered = []
|
|
318
|
+
for item in sessions:
|
|
319
|
+
if client and item.get("client") != client:
|
|
320
|
+
continue
|
|
321
|
+
filtered.append(item)
|
|
322
|
+
filtered.sort(key=lambda row: row.get("modified", ""), reverse=True)
|
|
323
|
+
return filtered[: max(1, int(limit or 10))]
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, client: str = "", limit: int = 10) -> list[dict]:
|
|
327
|
+
rows = list_recent_transcripts(hours=hours, client=client, limit=200)
|
|
328
|
+
query_tokens = _tokenize(query)
|
|
329
|
+
if not query_tokens:
|
|
330
|
+
return rows[: max(1, int(limit or 10))]
|
|
331
|
+
|
|
332
|
+
matches: list[dict] = []
|
|
333
|
+
cutoff_seconds = clamp_transcript_hours(hours) * 3600
|
|
334
|
+
now = datetime.now().timestamp()
|
|
335
|
+
for item in rows:
|
|
336
|
+
snippets = []
|
|
337
|
+
best_score = 0.0
|
|
338
|
+
for message in item.get("messages") or []:
|
|
339
|
+
text = str(message.get("text", "") or "")
|
|
340
|
+
score = _score_text_match(query_tokens, text)
|
|
341
|
+
if score <= 0:
|
|
342
|
+
continue
|
|
343
|
+
best_score = max(best_score, score)
|
|
344
|
+
snippets.append(
|
|
345
|
+
{
|
|
346
|
+
"role": message.get("role", ""),
|
|
347
|
+
"index": message.get("index", 0),
|
|
348
|
+
"snippet": _truncate(text, 220),
|
|
349
|
+
"score": round(score, 4),
|
|
350
|
+
}
|
|
351
|
+
)
|
|
352
|
+
meta_text = " ".join(
|
|
353
|
+
[
|
|
354
|
+
str(item.get("display_name", "") or ""),
|
|
355
|
+
str(item.get("session_file", "") or ""),
|
|
356
|
+
str(item.get("source", "") or ""),
|
|
357
|
+
str(item.get("cwd", "") or ""),
|
|
358
|
+
]
|
|
359
|
+
)
|
|
360
|
+
meta_score = _score_text_match(query_tokens, meta_text)
|
|
361
|
+
best_score = max(best_score, meta_score)
|
|
362
|
+
if best_score <= 0:
|
|
363
|
+
continue
|
|
364
|
+
modified = item.get("modified", "")
|
|
365
|
+
try:
|
|
366
|
+
modified_ts = datetime.fromisoformat(modified).timestamp()
|
|
367
|
+
except Exception:
|
|
368
|
+
modified_ts = now
|
|
369
|
+
recency = max(0.0, 1.0 - ((now - modified_ts) / max(1, cutoff_seconds)))
|
|
370
|
+
item["_score"] = round(best_score + recency * 0.35, 4)
|
|
371
|
+
item["matched_messages"] = sorted(snippets, key=lambda row: row["score"], reverse=True)[:3]
|
|
372
|
+
matches.append(item)
|
|
373
|
+
|
|
374
|
+
matches.sort(key=lambda row: (row.get("_score", 0), row.get("modified", "")), reverse=True)
|
|
375
|
+
return matches[: max(1, int(limit or 10))]
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def load_transcript(session_ref: str = "", transcript_path: str = "", client: str = "") -> dict | None:
|
|
379
|
+
ref = str(session_ref or "").strip()
|
|
380
|
+
path_ref = str(transcript_path or "").strip()
|
|
381
|
+
|
|
382
|
+
transcript_files: list[tuple[str, Path]] = [
|
|
383
|
+
("claude_code", path) for path in find_claude_session_files()
|
|
384
|
+
] + [
|
|
385
|
+
("codex", path) for path in find_codex_session_files()
|
|
386
|
+
]
|
|
387
|
+
for detected_client, path in transcript_files:
|
|
388
|
+
if client and detected_client != client:
|
|
389
|
+
continue
|
|
390
|
+
if path_ref:
|
|
391
|
+
try:
|
|
392
|
+
if Path(path_ref).expanduser().resolve() != path.resolve():
|
|
393
|
+
continue
|
|
394
|
+
except Exception:
|
|
395
|
+
continue
|
|
396
|
+
session = extract_codex_session(path) if detected_client == "codex" else extract_claude_session(path)
|
|
397
|
+
if not session:
|
|
398
|
+
continue
|
|
399
|
+
if ref:
|
|
400
|
+
if ref not in {
|
|
401
|
+
str(session.get("session_file", "")),
|
|
402
|
+
str(session.get("display_name", "")),
|
|
403
|
+
str(session.get("session_uid", "")),
|
|
404
|
+
str(path),
|
|
405
|
+
}:
|
|
406
|
+
continue
|
|
407
|
+
try:
|
|
408
|
+
session["modified"] = datetime.fromtimestamp(path.stat().st_mtime).isoformat()
|
|
409
|
+
except OSError:
|
|
410
|
+
session["modified"] = ""
|
|
411
|
+
return session
|
|
412
|
+
return None
|