memory-map-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,262 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Claude Code hook: conversation history persistence.
4
+
5
+ Fires on UserPromptSubmit (every message), PreCompact, and Stop.
6
+ Extracts complete Q&A pairs since the last watermark, saves each pair as its
7
+ own MongoDB document. If a pair exceeds MAX_CHUNK_CHARS it is split into
8
+ overlapping chunks linked by group_id + part/total_parts.
9
+
10
+ Zero LLM calls — tags extracted by local keyword matching.
11
+
12
+ Usage (configured in .claude/settings.json or settings.local.json):
13
+ echo '{"session_id":"...","transcript_path":"...","cwd":"..."}' | python history_hook.py
14
+ echo '{"session_id":"...","transcript_path":"...","cwd":"..."}' | python history_hook.py --force
15
+ """
16
+
17
+ import sys
18
+ import json
19
+ import os
20
+ import pathlib
21
+ import tempfile
22
+ import textwrap
23
+ import uuid
24
+ from datetime import datetime
25
+
26
+ from memory_map_mcp import history_store
27
+ from memory_map_mcp.redact import redact_secrets
28
+
29
+ MAX_TURN_CHARS = int(os.environ.get("MCP_MAX_TURN_CHARS", "3000"))
30
+ MAX_CHUNK_CHARS = int(os.environ.get("MCP_MAX_CHUNK_CHARS", "4000"))
31
+ OVERLAP_CHARS = int(os.environ.get("MCP_OVERLAP_CHARS", "100"))
32
+ TEMP_FILE_TTL_DAYS = 7
33
+
34
+
35
+ # --- Temp file cleanup ---
36
+
37
+ def _cleanup_stale_temp_files():
38
+ tmp_dir = pathlib.Path(tempfile.gettempdir())
39
+ cutoff = datetime.now().timestamp() - TEMP_FILE_TTL_DAYS * 86400
40
+ for f in tmp_dir.glob("claude_hist_wm_*.txt"):
41
+ try:
42
+ if f.stat().st_mtime < cutoff:
43
+ f.unlink()
44
+ except OSError:
45
+ pass
46
+
47
+
48
+ # --- Watermark (stored in OS temp dir) ---
49
+
50
+ def _watermark_path(session_id: str) -> pathlib.Path:
51
+ return pathlib.Path(tempfile.gettempdir()) / f"claude_hist_wm_{session_id[:8]}.txt"
52
+
53
+
54
+ def read_watermark(session_id: str) -> int:
55
+ p = _watermark_path(session_id)
56
+ if p.exists():
57
+ try:
58
+ return int(p.read_text().strip())
59
+ except (ValueError, OSError):
60
+ return 0
61
+ return 0
62
+
63
+
64
+ def write_watermark(session_id: str, line_num: int):
65
+ # Atomic write: write to a temp file then rename so a concurrent reader
66
+ # never sees a partial value.
67
+ p = _watermark_path(session_id)
68
+ fd, tmp = tempfile.mkstemp(dir=p.parent, suffix=".tmp")
69
+ try:
70
+ with os.fdopen(fd, "w") as f:
71
+ f.write(str(line_num))
72
+ os.replace(tmp, p)
73
+ except Exception:
74
+ try:
75
+ os.unlink(tmp)
76
+ except OSError:
77
+ pass
78
+ raise
79
+
80
+
81
+ # --- Transcript parsing ---
82
+
83
+ def extract_qa_pairs(transcript_path: str, watermark: int) -> tuple:
84
+ """Read transcript from watermark, return (pairs, new_watermark).
85
+
86
+ pairs: list of {"user": str, "assistant": str} — only complete pairs.
87
+ new_watermark: line index AFTER the last complete pair's assistant line.
88
+ Any trailing unpaired user message is left for the next call.
89
+ """
90
+ raw = [] # list of (role, content, line_end)
91
+
92
+ try:
93
+ with open(transcript_path, "r", encoding="utf-8") as f:
94
+ for i, line in enumerate(f):
95
+ if i < watermark:
96
+ continue
97
+
98
+ try:
99
+ entry = json.loads(line)
100
+ except json.JSONDecodeError:
101
+ continue
102
+
103
+ entry_type = entry.get("type", "")
104
+ msg = entry.get("message", {})
105
+ role = msg.get("role", "")
106
+ content = msg.get("content", "")
107
+
108
+ if entry_type not in ("user", "assistant") or role not in ("user", "assistant"):
109
+ continue
110
+
111
+ parts = []
112
+ if isinstance(content, str):
113
+ if content.startswith("<local-command") or content.startswith("<command-name>"):
114
+ continue
115
+ t = content.strip()
116
+ if t:
117
+ parts.append(t)
118
+ elif isinstance(content, list):
119
+ for block in content:
120
+ if not isinstance(block, dict):
121
+ continue
122
+ btype = block.get("type")
123
+ if btype == "text":
124
+ t = block.get("text", "").strip()
125
+ if t:
126
+ parts.append(t)
127
+ elif btype == "tool_use" and role == "assistant":
128
+ # Capture file-modifying tools so code changes appear in history
129
+ tool_name = block.get("name", "")
130
+ inp = block.get("input", {})
131
+ if tool_name == "Edit":
132
+ fp = inp.get("file_path", "")
133
+ new_s = textwrap.dedent(inp.get("new_string", "")).strip()
134
+ parts.append(f"[Edit: {fp}]\n{new_s[:400]}")
135
+ elif tool_name == "Write":
136
+ fp = inp.get("file_path", "")
137
+ c = textwrap.dedent(inp.get("content", "")).strip()
138
+ parts.append(f"[Write: {fp}]\n{c[:400]}")
139
+ elif tool_name in ("Bash", "PowerShell"):
140
+ cmd = inp.get("command", "")
141
+ parts.append(f"[{tool_name}: {cmd[:200]}]")
142
+
143
+ text = "\n".join(parts)[:MAX_TURN_CHARS]
144
+ if text:
145
+ raw.append((role, text, i + 1))
146
+
147
+ except (OSError, IOError):
148
+ return [], watermark
149
+
150
+ # Collapse consecutive same-role entries into turns.
151
+ # A complex multi-tool assistant response produces many transcript entries;
152
+ # we join them all so the saved pair contains the full assistant output,
153
+ # not just the preamble before the first tool call.
154
+ turns = [] # list of [role, combined_text, last_line_end]
155
+ for role, content, line_end in raw:
156
+ if turns and turns[-1][0] == role:
157
+ turns[-1][1] += "\n" + content
158
+ turns[-1][2] = line_end
159
+ else:
160
+ turns.append([role, content, line_end])
161
+
162
+ # Pair user + assistant turns
163
+ pairs = []
164
+ new_watermark = watermark
165
+ i = 0
166
+ while i < len(turns) - 1:
167
+ role1, content1, _ = turns[i]
168
+ role2, content2, line_end2 = turns[i + 1]
169
+ if role1 == "user" and role2 == "assistant":
170
+ pairs.append({"user": content1, "assistant": content2})
171
+ new_watermark = line_end2
172
+ i += 2
173
+ else:
174
+ i += 1
175
+
176
+ return pairs, new_watermark
177
+
178
+
179
+ # --- Splitting ---
180
+
181
+ def split_into_chunks(text: str) -> list:
182
+ """Split text into MAX_CHUNK_CHARS chunks with OVERLAP_CHARS overlap."""
183
+ if len(text) <= MAX_CHUNK_CHARS:
184
+ return [text]
185
+ chunks = []
186
+ start = 0
187
+ while start < len(text):
188
+ chunks.append(text[start: start + MAX_CHUNK_CHARS])
189
+ start += MAX_CHUNK_CHARS - OVERLAP_CHARS
190
+ return chunks
191
+
192
+
193
+ # --- Main ---
194
+
195
+ def main():
196
+ _cleanup_stale_temp_files()
197
+
198
+ force = "--force" in sys.argv
199
+
200
+ try:
201
+ stdin_data = json.loads(sys.stdin.read())
202
+ except (json.JSONDecodeError, EOFError):
203
+ print("{}")
204
+ return
205
+
206
+ session_id = stdin_data.get("session_id", "unknown")
207
+ transcript_path = stdin_data.get("transcript_path", "")
208
+ cwd = stdin_data.get("cwd", "")
209
+
210
+ if not cwd:
211
+ print("{}")
212
+ return
213
+
214
+ if not transcript_path or not os.path.exists(transcript_path):
215
+ print("{}")
216
+ return
217
+
218
+ watermark = read_watermark(session_id)
219
+ pairs, new_watermark = extract_qa_pairs(transcript_path, watermark)
220
+
221
+ if not pairs:
222
+ print("{}")
223
+ return
224
+
225
+ total_tokens = 0
226
+ all_tags = set()
227
+
228
+ for pair in pairs:
229
+ dialogue = redact_secrets(f"user: {pair['user']}\nassistant: {pair['assistant']}")
230
+ tags = history_store.extract_tags(dialogue)
231
+ all_tags.update(tags)
232
+ chunks = split_into_chunks(dialogue)
233
+ n = len(chunks)
234
+ gid = uuid.uuid4().hex[:8] if n > 1 else None
235
+
236
+ for idx, chunk in enumerate(chunks, 1):
237
+ history_store.save_chunk(
238
+ cwd,
239
+ session_id[:8],
240
+ chunk,
241
+ tags,
242
+ group_id=gid,
243
+ part=(idx if n > 1 else None),
244
+ total_parts=(n if n > 1 else None),
245
+ embed=False, # hooks must return quickly; embeddings backfilled separately
246
+ )
247
+ total_tokens += history_store.compute_stats(chunk)["tokens"]
248
+
249
+ write_watermark(session_id, new_watermark)
250
+
251
+ tag_str = ",".join(sorted(all_tags)) if all_tags else "untagged"
252
+ n_pairs = len(pairs)
253
+ output = {
254
+ "systemMessage": (
255
+ f"[history] {n_pairs} pair(s) saved — tags:[{tag_str}] tokens:{total_tokens}"
256
+ )
257
+ }
258
+ print(json.dumps(output))
259
+
260
+
261
+ if __name__ == "__main__":
262
+ main()