memory-map-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memory_map_mcp/__init__.py +1 -0
- memory_map_mcp/history_hook.py +262 -0
- memory_map_mcp/history_store.py +805 -0
- memory_map_mcp/redact.py +59 -0
- memory_map_mcp/server.py +1409 -0
- memory_map_mcp-0.1.0.dist-info/METADATA +573 -0
- memory_map_mcp-0.1.0.dist-info/RECORD +10 -0
- memory_map_mcp-0.1.0.dist-info/WHEEL +4 -0
- memory_map_mcp-0.1.0.dist-info/entry_points.txt +3 -0
- memory_map_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Claude Code hook: conversation history persistence.
|
|
4
|
+
|
|
5
|
+
Fires on UserPromptSubmit (every message), PreCompact, and Stop.
|
|
6
|
+
Extracts complete Q&A pairs since the last watermark, saves each pair as its
|
|
7
|
+
own MongoDB document. If a pair exceeds MAX_CHUNK_CHARS it is split into
|
|
8
|
+
overlapping chunks linked by group_id + part/total_parts.
|
|
9
|
+
|
|
10
|
+
Zero LLM calls — tags extracted by local keyword matching.
|
|
11
|
+
|
|
12
|
+
Usage (configured in .claude/settings.json or settings.local.json):
|
|
13
|
+
echo '{"session_id":"...","transcript_path":"...","cwd":"..."}' | python history_hook.py
|
|
14
|
+
echo '{"session_id":"...","transcript_path":"...","cwd":"..."}' | python history_hook.py --force
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import sys
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import pathlib
|
|
21
|
+
import tempfile
|
|
22
|
+
import textwrap
|
|
23
|
+
import uuid
|
|
24
|
+
from datetime import datetime
|
|
25
|
+
|
|
26
|
+
from memory_map_mcp import history_store
|
|
27
|
+
from memory_map_mcp.redact import redact_secrets
|
|
28
|
+
|
|
29
|
+
MAX_TURN_CHARS = int(os.environ.get("MCP_MAX_TURN_CHARS", "3000"))
|
|
30
|
+
MAX_CHUNK_CHARS = int(os.environ.get("MCP_MAX_CHUNK_CHARS", "4000"))
|
|
31
|
+
OVERLAP_CHARS = int(os.environ.get("MCP_OVERLAP_CHARS", "100"))
|
|
32
|
+
TEMP_FILE_TTL_DAYS = 7
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# --- Temp file cleanup ---
|
|
36
|
+
|
|
37
|
+
def _cleanup_stale_temp_files():
|
|
38
|
+
tmp_dir = pathlib.Path(tempfile.gettempdir())
|
|
39
|
+
cutoff = datetime.now().timestamp() - TEMP_FILE_TTL_DAYS * 86400
|
|
40
|
+
for f in tmp_dir.glob("claude_hist_wm_*.txt"):
|
|
41
|
+
try:
|
|
42
|
+
if f.stat().st_mtime < cutoff:
|
|
43
|
+
f.unlink()
|
|
44
|
+
except OSError:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# --- Watermark (stored in OS temp dir) ---
|
|
49
|
+
|
|
50
|
+
def _watermark_path(session_id: str) -> pathlib.Path:
|
|
51
|
+
return pathlib.Path(tempfile.gettempdir()) / f"claude_hist_wm_{session_id[:8]}.txt"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def read_watermark(session_id: str) -> int:
|
|
55
|
+
p = _watermark_path(session_id)
|
|
56
|
+
if p.exists():
|
|
57
|
+
try:
|
|
58
|
+
return int(p.read_text().strip())
|
|
59
|
+
except (ValueError, OSError):
|
|
60
|
+
return 0
|
|
61
|
+
return 0
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def write_watermark(session_id: str, line_num: int):
|
|
65
|
+
# Atomic write: write to a temp file then rename so a concurrent reader
|
|
66
|
+
# never sees a partial value.
|
|
67
|
+
p = _watermark_path(session_id)
|
|
68
|
+
fd, tmp = tempfile.mkstemp(dir=p.parent, suffix=".tmp")
|
|
69
|
+
try:
|
|
70
|
+
with os.fdopen(fd, "w") as f:
|
|
71
|
+
f.write(str(line_num))
|
|
72
|
+
os.replace(tmp, p)
|
|
73
|
+
except Exception:
|
|
74
|
+
try:
|
|
75
|
+
os.unlink(tmp)
|
|
76
|
+
except OSError:
|
|
77
|
+
pass
|
|
78
|
+
raise
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# --- Transcript parsing ---
|
|
82
|
+
|
|
83
|
+
def extract_qa_pairs(transcript_path: str, watermark: int) -> tuple:
|
|
84
|
+
"""Read transcript from watermark, return (pairs, new_watermark).
|
|
85
|
+
|
|
86
|
+
pairs: list of {"user": str, "assistant": str} — only complete pairs.
|
|
87
|
+
new_watermark: line index AFTER the last complete pair's assistant line.
|
|
88
|
+
Any trailing unpaired user message is left for the next call.
|
|
89
|
+
"""
|
|
90
|
+
raw = [] # list of (role, content, line_end)
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
with open(transcript_path, "r", encoding="utf-8") as f:
|
|
94
|
+
for i, line in enumerate(f):
|
|
95
|
+
if i < watermark:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
entry = json.loads(line)
|
|
100
|
+
except json.JSONDecodeError:
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
entry_type = entry.get("type", "")
|
|
104
|
+
msg = entry.get("message", {})
|
|
105
|
+
role = msg.get("role", "")
|
|
106
|
+
content = msg.get("content", "")
|
|
107
|
+
|
|
108
|
+
if entry_type not in ("user", "assistant") or role not in ("user", "assistant"):
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
parts = []
|
|
112
|
+
if isinstance(content, str):
|
|
113
|
+
if content.startswith("<local-command") or content.startswith("<command-name>"):
|
|
114
|
+
continue
|
|
115
|
+
t = content.strip()
|
|
116
|
+
if t:
|
|
117
|
+
parts.append(t)
|
|
118
|
+
elif isinstance(content, list):
|
|
119
|
+
for block in content:
|
|
120
|
+
if not isinstance(block, dict):
|
|
121
|
+
continue
|
|
122
|
+
btype = block.get("type")
|
|
123
|
+
if btype == "text":
|
|
124
|
+
t = block.get("text", "").strip()
|
|
125
|
+
if t:
|
|
126
|
+
parts.append(t)
|
|
127
|
+
elif btype == "tool_use" and role == "assistant":
|
|
128
|
+
# Capture file-modifying tools so code changes appear in history
|
|
129
|
+
tool_name = block.get("name", "")
|
|
130
|
+
inp = block.get("input", {})
|
|
131
|
+
if tool_name == "Edit":
|
|
132
|
+
fp = inp.get("file_path", "")
|
|
133
|
+
new_s = textwrap.dedent(inp.get("new_string", "")).strip()
|
|
134
|
+
parts.append(f"[Edit: {fp}]\n{new_s[:400]}")
|
|
135
|
+
elif tool_name == "Write":
|
|
136
|
+
fp = inp.get("file_path", "")
|
|
137
|
+
c = textwrap.dedent(inp.get("content", "")).strip()
|
|
138
|
+
parts.append(f"[Write: {fp}]\n{c[:400]}")
|
|
139
|
+
elif tool_name in ("Bash", "PowerShell"):
|
|
140
|
+
cmd = inp.get("command", "")
|
|
141
|
+
parts.append(f"[{tool_name}: {cmd[:200]}]")
|
|
142
|
+
|
|
143
|
+
text = "\n".join(parts)[:MAX_TURN_CHARS]
|
|
144
|
+
if text:
|
|
145
|
+
raw.append((role, text, i + 1))
|
|
146
|
+
|
|
147
|
+
except (OSError, IOError):
|
|
148
|
+
return [], watermark
|
|
149
|
+
|
|
150
|
+
# Collapse consecutive same-role entries into turns.
|
|
151
|
+
# A complex multi-tool assistant response produces many transcript entries;
|
|
152
|
+
# we join them all so the saved pair contains the full assistant output,
|
|
153
|
+
# not just the preamble before the first tool call.
|
|
154
|
+
turns = [] # list of [role, combined_text, last_line_end]
|
|
155
|
+
for role, content, line_end in raw:
|
|
156
|
+
if turns and turns[-1][0] == role:
|
|
157
|
+
turns[-1][1] += "\n" + content
|
|
158
|
+
turns[-1][2] = line_end
|
|
159
|
+
else:
|
|
160
|
+
turns.append([role, content, line_end])
|
|
161
|
+
|
|
162
|
+
# Pair user + assistant turns
|
|
163
|
+
pairs = []
|
|
164
|
+
new_watermark = watermark
|
|
165
|
+
i = 0
|
|
166
|
+
while i < len(turns) - 1:
|
|
167
|
+
role1, content1, _ = turns[i]
|
|
168
|
+
role2, content2, line_end2 = turns[i + 1]
|
|
169
|
+
if role1 == "user" and role2 == "assistant":
|
|
170
|
+
pairs.append({"user": content1, "assistant": content2})
|
|
171
|
+
new_watermark = line_end2
|
|
172
|
+
i += 2
|
|
173
|
+
else:
|
|
174
|
+
i += 1
|
|
175
|
+
|
|
176
|
+
return pairs, new_watermark
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# --- Splitting ---
|
|
180
|
+
|
|
181
|
+
def split_into_chunks(text: str) -> list:
|
|
182
|
+
"""Split text into MAX_CHUNK_CHARS chunks with OVERLAP_CHARS overlap."""
|
|
183
|
+
if len(text) <= MAX_CHUNK_CHARS:
|
|
184
|
+
return [text]
|
|
185
|
+
chunks = []
|
|
186
|
+
start = 0
|
|
187
|
+
while start < len(text):
|
|
188
|
+
chunks.append(text[start: start + MAX_CHUNK_CHARS])
|
|
189
|
+
start += MAX_CHUNK_CHARS - OVERLAP_CHARS
|
|
190
|
+
return chunks
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# --- Main ---
|
|
194
|
+
|
|
195
|
+
def main():
|
|
196
|
+
_cleanup_stale_temp_files()
|
|
197
|
+
|
|
198
|
+
force = "--force" in sys.argv
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
stdin_data = json.loads(sys.stdin.read())
|
|
202
|
+
except (json.JSONDecodeError, EOFError):
|
|
203
|
+
print("{}")
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
session_id = stdin_data.get("session_id", "unknown")
|
|
207
|
+
transcript_path = stdin_data.get("transcript_path", "")
|
|
208
|
+
cwd = stdin_data.get("cwd", "")
|
|
209
|
+
|
|
210
|
+
if not cwd:
|
|
211
|
+
print("{}")
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
if not transcript_path or not os.path.exists(transcript_path):
|
|
215
|
+
print("{}")
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
watermark = read_watermark(session_id)
|
|
219
|
+
pairs, new_watermark = extract_qa_pairs(transcript_path, watermark)
|
|
220
|
+
|
|
221
|
+
if not pairs:
|
|
222
|
+
print("{}")
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
total_tokens = 0
|
|
226
|
+
all_tags = set()
|
|
227
|
+
|
|
228
|
+
for pair in pairs:
|
|
229
|
+
dialogue = redact_secrets(f"user: {pair['user']}\nassistant: {pair['assistant']}")
|
|
230
|
+
tags = history_store.extract_tags(dialogue)
|
|
231
|
+
all_tags.update(tags)
|
|
232
|
+
chunks = split_into_chunks(dialogue)
|
|
233
|
+
n = len(chunks)
|
|
234
|
+
gid = uuid.uuid4().hex[:8] if n > 1 else None
|
|
235
|
+
|
|
236
|
+
for idx, chunk in enumerate(chunks, 1):
|
|
237
|
+
history_store.save_chunk(
|
|
238
|
+
cwd,
|
|
239
|
+
session_id[:8],
|
|
240
|
+
chunk,
|
|
241
|
+
tags,
|
|
242
|
+
group_id=gid,
|
|
243
|
+
part=(idx if n > 1 else None),
|
|
244
|
+
total_parts=(n if n > 1 else None),
|
|
245
|
+
embed=False, # hooks must return quickly; embeddings backfilled separately
|
|
246
|
+
)
|
|
247
|
+
total_tokens += history_store.compute_stats(chunk)["tokens"]
|
|
248
|
+
|
|
249
|
+
write_watermark(session_id, new_watermark)
|
|
250
|
+
|
|
251
|
+
tag_str = ",".join(sorted(all_tags)) if all_tags else "untagged"
|
|
252
|
+
n_pairs = len(pairs)
|
|
253
|
+
output = {
|
|
254
|
+
"systemMessage": (
|
|
255
|
+
f"[history] {n_pairs} pair(s) saved — tags:[{tag_str}] tokens:{total_tokens}"
|
|
256
|
+
)
|
|
257
|
+
}
|
|
258
|
+
print(json.dumps(output))
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
if __name__ == "__main__":
|
|
262
|
+
main()
|