claude-memory-agent 2.0.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +206 -206
- package/agent_card.py +186 -0
- package/bin/cli.js +327 -185
- package/bin/lib/banner.js +39 -0
- package/bin/lib/environment.js +166 -0
- package/bin/lib/installer.js +291 -0
- package/bin/lib/models.js +95 -0
- package/bin/lib/steps/advanced.js +101 -0
- package/bin/lib/steps/confirm.js +87 -0
- package/bin/lib/steps/model.js +57 -0
- package/bin/lib/steps/provider.js +65 -0
- package/bin/lib/steps/scope.js +59 -0
- package/bin/lib/steps/server.js +74 -0
- package/bin/lib/ui.js +75 -0
- package/bin/onboarding.js +164 -0
- package/bin/postinstall.js +35 -270
- package/config.py +103 -4
- package/dashboard.html +4902 -2689
- package/hooks/extract_memories.py +439 -0
- package/hooks/grounding-hook.py +422 -348
- package/hooks/pre_compact_hook.py +76 -0
- package/hooks/session_end.py +293 -192
- package/hooks/session_end_hook.py +149 -0
- package/hooks/session_start.py +227 -227
- package/hooks/stop_hook.py +372 -0
- package/install.py +972 -902
- package/main.py +5240 -2859
- package/mcp_server.py +451 -0
- package/package.json +58 -47
- package/requirements.txt +12 -8
- package/services/__init__.py +50 -50
- package/services/adaptive_ranker.py +272 -0
- package/services/agent_catalog.json +153 -0
- package/services/agent_registry.py +245 -730
- package/services/claude_md_sync.py +320 -4
- package/services/consolidation.py +417 -0
- package/services/curator.py +1606 -0
- package/services/database.py +4118 -2485
- package/services/embedding_pipeline.py +262 -0
- package/services/embeddings.py +493 -85
- package/services/memory_decay.py +408 -0
- package/services/native_memory_paths.py +86 -0
- package/services/native_memory_sync.py +496 -0
- package/services/response_manager.py +183 -0
- package/services/terminal_ui.py +199 -0
- package/services/tier_manager.py +235 -0
- package/services/websocket.py +26 -6
- package/skills/__init__.py +21 -1
- package/skills/confidence_tracker.py +441 -0
- package/skills/context.py +675 -0
- package/skills/curator.py +348 -0
- package/skills/search.py +444 -213
- package/skills/session_review.py +605 -0
- package/skills/store.py +484 -179
- package/terminal_dashboard.py +474 -0
- package/update_system.py +829 -817
- package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
- package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
- package/services/__pycache__/__init__.cpython-312.pyc +0 -0
- package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
- package/services/__pycache__/auth.cpython-312.pyc +0 -0
- package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
- package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
- package/services/__pycache__/confidence.cpython-312.pyc +0 -0
- package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
- package/services/__pycache__/database.cpython-312.pyc +0 -0
- package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
- package/services/__pycache__/insights.cpython-312.pyc +0 -0
- package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
- package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
- package/services/__pycache__/timeline.cpython-312.pyc +0 -0
- package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
- package/services/__pycache__/websocket.cpython-312.pyc +0 -0
- package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/__pycache__/admin.cpython-312.pyc +0 -0
- package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
- package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
- package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
- package/skills/__pycache__/insights.cpython-312.pyc +0 -0
- package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
- package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
- package/skills/__pycache__/search.cpython-312.pyc +0 -0
- package/skills/__pycache__/state.cpython-312.pyc +0 -0
- package/skills/__pycache__/store.cpython-312.pyc +0 -0
- package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
- package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
- package/skills/__pycache__/verification.cpython-312.pyc +0 -0
- package/test_automation.py +0 -221
- package/test_complete.py +0 -338
- package/test_full.py +0 -322
- package/verify_db.py +0 -134
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Extract memories from conversation transcripts.
|
|
4
|
+
|
|
5
|
+
This script reads a Claude Code conversation transcript, extracts key
|
|
6
|
+
decisions, errors, patterns, and facts using keyword/pattern matching,
|
|
7
|
+
and stores them via the memory agent's HTTP API.
|
|
8
|
+
|
|
9
|
+
It tracks what has already been extracted using a cursor file so that
|
|
10
|
+
repeated calls (e.g., multiple PreCompact events) do not duplicate
|
|
11
|
+
extracted memories.
|
|
12
|
+
|
|
13
|
+
Design constraints:
|
|
14
|
+
- Must complete in under 5 seconds
|
|
15
|
+
- Uses simple keyword matching, NOT an LLM call
|
|
16
|
+
- Fails silently (exit 0) to never block compaction or session end
|
|
17
|
+
- Idempotent: cursor tracking prevents duplicate extraction
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
import json
|
|
23
|
+
import re
|
|
24
|
+
import time
|
|
25
|
+
import hashlib
|
|
26
|
+
from datetime import datetime
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Configuration
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
MEMORY_AGENT_URL = os.getenv("MEMORY_AGENT_URL", "http://localhost:8102")
|
|
35
|
+
API_KEY = os.getenv("MEMORY_API_KEY", "")
|
|
36
|
+
CURSOR_DIR = Path.home() / ".claude"
|
|
37
|
+
CURSOR_FILE = CURSOR_DIR / "memory-agent-cursor.json"
|
|
38
|
+
MAX_EXTRACTION_TIME_SECONDS = 4.0 # Leave 1s headroom under the 5s budget
|
|
39
|
+
MAX_MEMORIES_PER_RUN = 10 # Cap to stay fast
|
|
40
|
+
MAX_CONTENT_LENGTH = 500 # Truncate long content for storage
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Extraction patterns
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
DECISION_PATTERNS = [
|
|
47
|
+
# Explicit decision language
|
|
48
|
+
re.compile(r"(?:^|\n)\s*(?:I |We |Let's |Going to )?(?:decided|decide) (?:to |that )(.*?)(?:\.|$)", re.IGNORECASE | re.MULTILINE),
|
|
49
|
+
re.compile(r"(?:^|\n)\s*(?:Let's use|Going with|Chose|Choosing|Will use|Using|Went with) (.*?)(?:\.|$)", re.IGNORECASE | re.MULTILINE),
|
|
50
|
+
re.compile(r"(?:^|\n)\s*(?:The approach|The plan|The strategy|The solution) (?:is|will be) (.*?)(?:\.|$)", re.IGNORECASE | re.MULTILINE),
|
|
51
|
+
re.compile(r"(?:^|\n)\s*(?:I'll implement|We'll implement|Implementing) (.*?)(?:\.|$)", re.IGNORECASE | re.MULTILINE),
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
ERROR_PATTERNS = [
|
|
55
|
+
# Error/bug language
|
|
56
|
+
re.compile(r"(?:^|\n)\s*(?:Error|ERROR|Bug|BUG|ISSUE|Issue|PROBLEM|Problem|CRITICAL|FATAL)[:\s]+(.*?)(?:\n|$)", re.IGNORECASE | re.MULTILINE),
|
|
57
|
+
re.compile(r"(?:^|\n)\s*(?:Fixed|Fixing|Fix for|Resolved|Resolution)[:\s]+(.*?)(?:\n|$)", re.IGNORECASE | re.MULTILINE),
|
|
58
|
+
re.compile(r"(?:Traceback|Exception|raise \w+Error)(.*?)(?:\n\n|\Z)", re.DOTALL),
|
|
59
|
+
re.compile(r"(?:^|\n)\s*(?:Root cause|The bug was|The issue was|The problem was)[:\s]+(.*?)(?:\.|$)", re.IGNORECASE | re.MULTILINE),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
PATTERN_PATTERNS = [
|
|
63
|
+
# Architecture/pattern language
|
|
64
|
+
re.compile(r"(?:^|\n)\s*(?:The pattern|A pattern|Pattern)[:\s]+(.*?)(?:\.|$)", re.IGNORECASE | re.MULTILINE),
|
|
65
|
+
re.compile(r"(?:^|\n)\s*(?:The approach|Best practice|Convention|Architecture)[:\s]+(.*?)(?:\.|$)", re.IGNORECASE | re.MULTILINE),
|
|
66
|
+
re.compile(r"(?:^|\n)\s*(?:Always|Never|Should always|Should never|Must always|Must never) (.*?)(?:\.|$)", re.IGNORECASE | re.MULTILINE),
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
# Broader keyword triggers (used for line-level scanning)
|
|
70
|
+
DECISION_KEYWORDS = {"decided", "let's use", "going with", "chose", "choosing", "will use", "the plan is", "approach is", "strategy is", "i'll implement", "we'll implement"}
|
|
71
|
+
ERROR_KEYWORDS = {"error", "bug", "fix", "issue", "traceback", "exception", "failed", "failure", "broken", "crash", "root cause"}
|
|
72
|
+
PATTERN_KEYWORDS = {"pattern", "approach", "architecture", "convention", "best practice", "always", "never", "rule"}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
# Cursor management - tracks what we already extracted
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
def load_cursor(session_id: str) -> Dict[str, Any]:
|
|
80
|
+
"""Load the extraction cursor for a session."""
|
|
81
|
+
try:
|
|
82
|
+
if CURSOR_FILE.exists():
|
|
83
|
+
data = json.loads(CURSOR_FILE.read_text(encoding="utf-8"))
|
|
84
|
+
return data.get(session_id, {"byte_offset": 0, "extracted_hashes": []})
|
|
85
|
+
except (json.JSONDecodeError, OSError):
|
|
86
|
+
pass
|
|
87
|
+
return {"byte_offset": 0, "extracted_hashes": []}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def save_cursor(session_id: str, cursor: Dict[str, Any]):
|
|
91
|
+
"""Save the extraction cursor for a session."""
|
|
92
|
+
try:
|
|
93
|
+
CURSOR_DIR.mkdir(parents=True, exist_ok=True)
|
|
94
|
+
data = {}
|
|
95
|
+
if CURSOR_FILE.exists():
|
|
96
|
+
try:
|
|
97
|
+
data = json.loads(CURSOR_FILE.read_text(encoding="utf-8"))
|
|
98
|
+
except (json.JSONDecodeError, OSError):
|
|
99
|
+
data = {}
|
|
100
|
+
|
|
101
|
+
data[session_id] = cursor
|
|
102
|
+
|
|
103
|
+
# Prune old sessions (keep last 20)
|
|
104
|
+
if len(data) > 20:
|
|
105
|
+
sorted_keys = sorted(data.keys())
|
|
106
|
+
for old_key in sorted_keys[:-20]:
|
|
107
|
+
del data[old_key]
|
|
108
|
+
|
|
109
|
+
CURSOR_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
110
|
+
except OSError:
|
|
111
|
+
pass # Fail silently
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def cleanup_cursor(session_id: str):
|
|
115
|
+
"""Remove cursor data for a completed session."""
|
|
116
|
+
try:
|
|
117
|
+
if CURSOR_FILE.exists():
|
|
118
|
+
data = json.loads(CURSOR_FILE.read_text(encoding="utf-8"))
|
|
119
|
+
if session_id in data:
|
|
120
|
+
del data[session_id]
|
|
121
|
+
CURSOR_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
122
|
+
except (json.JSONDecodeError, OSError):
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def content_hash(text: str) -> str:
|
|
127
|
+
"""Create a short hash to deduplicate extracted content."""
|
|
128
|
+
return hashlib.md5(text.strip().lower().encode("utf-8")).hexdigest()[:12]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
# Transcript reading
|
|
133
|
+
# ---------------------------------------------------------------------------
|
|
134
|
+
|
|
135
|
+
def read_transcript(transcript_path: str, byte_offset: int = 0) -> Tuple[str, int]:
|
|
136
|
+
"""
|
|
137
|
+
Read the transcript file from the given byte offset.
|
|
138
|
+
Returns (new_text, new_byte_offset).
|
|
139
|
+
"""
|
|
140
|
+
path = Path(transcript_path)
|
|
141
|
+
if not path.exists():
|
|
142
|
+
return "", byte_offset
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
file_size = path.stat().st_size
|
|
146
|
+
if file_size <= byte_offset:
|
|
147
|
+
return "", byte_offset
|
|
148
|
+
|
|
149
|
+
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
150
|
+
f.seek(byte_offset)
|
|
151
|
+
text = f.read()
|
|
152
|
+
new_offset = f.tell()
|
|
153
|
+
|
|
154
|
+
return text, new_offset
|
|
155
|
+
except OSError:
|
|
156
|
+
return "", byte_offset
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
# Extraction logic
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
def extract_context_around(text: str, match_start: int, match_end: int, context_chars: int = 200) -> str:
|
|
164
|
+
"""Get surrounding context for a match to make the extraction more useful."""
|
|
165
|
+
start = max(0, match_start - context_chars)
|
|
166
|
+
end = min(len(text), match_end + context_chars)
|
|
167
|
+
|
|
168
|
+
# Try to align to line boundaries
|
|
169
|
+
while start > 0 and text[start] != '\n':
|
|
170
|
+
start -= 1
|
|
171
|
+
while end < len(text) and text[end] != '\n':
|
|
172
|
+
end += 1
|
|
173
|
+
|
|
174
|
+
return text[start:end].strip()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def extract_from_text(text: str, existing_hashes: set) -> List[Dict[str, Any]]:
|
|
178
|
+
"""
|
|
179
|
+
Extract memories from transcript text using keyword/pattern matching.
|
|
180
|
+
Returns a list of extracted memory dicts.
|
|
181
|
+
"""
|
|
182
|
+
extractions = []
|
|
183
|
+
seen_hashes = set(existing_hashes)
|
|
184
|
+
|
|
185
|
+
def add_extraction(content: str, memory_type: str, importance: int, tags: List[str]):
|
|
186
|
+
"""Add an extraction if not already seen."""
|
|
187
|
+
if len(extractions) >= MAX_MEMORIES_PER_RUN:
|
|
188
|
+
return
|
|
189
|
+
h = content_hash(content)
|
|
190
|
+
if h in seen_hashes:
|
|
191
|
+
return
|
|
192
|
+
seen_hashes.add(h)
|
|
193
|
+
# Truncate content
|
|
194
|
+
if len(content) > MAX_CONTENT_LENGTH:
|
|
195
|
+
content = content[:MAX_CONTENT_LENGTH] + "..."
|
|
196
|
+
extractions.append({
|
|
197
|
+
"content": content,
|
|
198
|
+
"type": memory_type,
|
|
199
|
+
"importance": importance,
|
|
200
|
+
"tags": tags + ["auto-extracted", "hook"],
|
|
201
|
+
"hash": h,
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
# --- Regex-based extraction ---
|
|
205
|
+
|
|
206
|
+
# Decisions
|
|
207
|
+
for pattern in DECISION_PATTERNS:
|
|
208
|
+
for match in pattern.finditer(text):
|
|
209
|
+
context = extract_context_around(text, match.start(), match.end())
|
|
210
|
+
if len(context) > 30: # Skip very short matches
|
|
211
|
+
add_extraction(context, "decision", 6, ["decision"])
|
|
212
|
+
|
|
213
|
+
# Errors
|
|
214
|
+
for pattern in ERROR_PATTERNS:
|
|
215
|
+
for match in pattern.finditer(text):
|
|
216
|
+
context = extract_context_around(text, match.start(), match.end())
|
|
217
|
+
if len(context) > 30:
|
|
218
|
+
add_extraction(context, "error", 7, ["error"])
|
|
219
|
+
|
|
220
|
+
# Patterns
|
|
221
|
+
for pattern in PATTERN_PATTERNS:
|
|
222
|
+
for match in pattern.finditer(text):
|
|
223
|
+
context = extract_context_around(text, match.start(), match.end())
|
|
224
|
+
if len(context) > 30:
|
|
225
|
+
add_extraction(context, "code", 6, ["pattern"])
|
|
226
|
+
|
|
227
|
+
# --- Line-level keyword scanning (fallback for cases regex misses) ---
|
|
228
|
+
# Only do this if we have not yet hit our cap
|
|
229
|
+
if len(extractions) < MAX_MEMORIES_PER_RUN:
|
|
230
|
+
lines = text.split('\n')
|
|
231
|
+
i = 0
|
|
232
|
+
while i < len(lines) and len(extractions) < MAX_MEMORIES_PER_RUN:
|
|
233
|
+
line_lower = lines[i].lower().strip()
|
|
234
|
+
|
|
235
|
+
# Skip very short or empty lines
|
|
236
|
+
if len(line_lower) < 20:
|
|
237
|
+
i += 1
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Check for decision keywords
|
|
241
|
+
if any(kw in line_lower for kw in DECISION_KEYWORDS):
|
|
242
|
+
# Grab this line plus next 2 for context
|
|
243
|
+
block = '\n'.join(lines[i:i+3]).strip()
|
|
244
|
+
if len(block) > 30:
|
|
245
|
+
add_extraction(block, "decision", 5, ["decision", "keyword-match"])
|
|
246
|
+
|
|
247
|
+
# Check for error keywords
|
|
248
|
+
elif any(kw in line_lower for kw in ERROR_KEYWORDS):
|
|
249
|
+
block = '\n'.join(lines[i:i+3]).strip()
|
|
250
|
+
if len(block) > 30:
|
|
251
|
+
add_extraction(block, "error", 6, ["error", "keyword-match"])
|
|
252
|
+
|
|
253
|
+
# Check for pattern keywords
|
|
254
|
+
elif any(kw in line_lower for kw in PATTERN_KEYWORDS):
|
|
255
|
+
block = '\n'.join(lines[i:i+3]).strip()
|
|
256
|
+
if len(block) > 30:
|
|
257
|
+
add_extraction(block, "code", 5, ["pattern", "keyword-match"])
|
|
258
|
+
|
|
259
|
+
i += 1
|
|
260
|
+
|
|
261
|
+
return extractions
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# ---------------------------------------------------------------------------
|
|
265
|
+
# API calls to memory agent
|
|
266
|
+
# ---------------------------------------------------------------------------
|
|
267
|
+
|
|
268
|
+
def store_memory_sync(extraction: Dict[str, Any], project_path: Optional[str] = None) -> bool:
|
|
269
|
+
"""
|
|
270
|
+
Store a single extracted memory via the memory agent API.
|
|
271
|
+
Uses urllib to avoid requiring httpx/requests for the hook scripts.
|
|
272
|
+
"""
|
|
273
|
+
import urllib.request
|
|
274
|
+
import urllib.error
|
|
275
|
+
|
|
276
|
+
payload = {
|
|
277
|
+
"jsonrpc": "2.0",
|
|
278
|
+
"method": "tasks/send",
|
|
279
|
+
"params": {
|
|
280
|
+
"message": {"parts": [{"type": "text", "text": ""}]},
|
|
281
|
+
"metadata": {
|
|
282
|
+
"skill_id": "store_memory",
|
|
283
|
+
"params": {
|
|
284
|
+
"content": extraction["content"],
|
|
285
|
+
"type": extraction["type"],
|
|
286
|
+
"importance": extraction["importance"],
|
|
287
|
+
"tags": extraction["tags"],
|
|
288
|
+
"project_path": project_path,
|
|
289
|
+
"agent_type": "hook-extractor",
|
|
290
|
+
"outcome_status": "pending",
|
|
291
|
+
"confidence": 0.4, # Lower confidence for auto-extracted
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
},
|
|
295
|
+
"id": f"extract-{extraction['hash']}-{int(time.time())}"
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
headers = {"Content-Type": "application/json"}
|
|
299
|
+
if API_KEY:
|
|
300
|
+
headers["X-Memory-Key"] = API_KEY
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
data = json.dumps(payload).encode("utf-8")
|
|
304
|
+
req = urllib.request.Request(
|
|
305
|
+
f"{MEMORY_AGENT_URL}/a2a",
|
|
306
|
+
data=data,
|
|
307
|
+
headers=headers,
|
|
308
|
+
method="POST"
|
|
309
|
+
)
|
|
310
|
+
with urllib.request.urlopen(req, timeout=2) as resp:
|
|
311
|
+
return resp.status == 200
|
|
312
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError, TimeoutError):
|
|
313
|
+
return False
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# ---------------------------------------------------------------------------
|
|
317
|
+
# Main entry point
|
|
318
|
+
# ---------------------------------------------------------------------------
|
|
319
|
+
|
|
320
|
+
def run_extraction(session_id: str, transcript_path: str, project_path: Optional[str] = None, is_session_end: bool = False) -> Dict[str, Any]:
|
|
321
|
+
"""
|
|
322
|
+
Main extraction function.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
session_id: The session identifier
|
|
326
|
+
transcript_path: Path to the conversation transcript file
|
|
327
|
+
project_path: Optional project path for memory context
|
|
328
|
+
is_session_end: If True, clean up cursor after extraction
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
Summary dict with extraction results
|
|
332
|
+
"""
|
|
333
|
+
start_time = time.time()
|
|
334
|
+
results = {
|
|
335
|
+
"extracted": 0,
|
|
336
|
+
"stored": 0,
|
|
337
|
+
"skipped_duplicate": 0,
|
|
338
|
+
"errors": 0,
|
|
339
|
+
"elapsed_seconds": 0,
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
# Load cursor state
|
|
343
|
+
cursor = load_cursor(session_id)
|
|
344
|
+
byte_offset = cursor.get("byte_offset", 0)
|
|
345
|
+
existing_hashes = set(cursor.get("extracted_hashes", []))
|
|
346
|
+
|
|
347
|
+
# Read new transcript content
|
|
348
|
+
new_text, new_offset = read_transcript(transcript_path, byte_offset)
|
|
349
|
+
if not new_text:
|
|
350
|
+
results["elapsed_seconds"] = time.time() - start_time
|
|
351
|
+
if is_session_end:
|
|
352
|
+
cleanup_cursor(session_id)
|
|
353
|
+
return results
|
|
354
|
+
|
|
355
|
+
# Extract memories from text
|
|
356
|
+
extractions = extract_from_text(new_text, existing_hashes)
|
|
357
|
+
results["extracted"] = len(extractions)
|
|
358
|
+
|
|
359
|
+
# Store each extraction via API (with time budget)
|
|
360
|
+
stored_hashes = []
|
|
361
|
+
for extraction in extractions:
|
|
362
|
+
# Check time budget
|
|
363
|
+
elapsed = time.time() - start_time
|
|
364
|
+
if elapsed >= MAX_EXTRACTION_TIME_SECONDS:
|
|
365
|
+
break
|
|
366
|
+
|
|
367
|
+
success = store_memory_sync(extraction, project_path)
|
|
368
|
+
if success:
|
|
369
|
+
results["stored"] += 1
|
|
370
|
+
stored_hashes.append(extraction["hash"])
|
|
371
|
+
else:
|
|
372
|
+
results["errors"] += 1
|
|
373
|
+
|
|
374
|
+
# Update cursor
|
|
375
|
+
all_hashes = list(existing_hashes | set(stored_hashes))
|
|
376
|
+
# Keep only the last 200 hashes to prevent unbounded growth
|
|
377
|
+
if len(all_hashes) > 200:
|
|
378
|
+
all_hashes = all_hashes[-200:]
|
|
379
|
+
|
|
380
|
+
cursor = {
|
|
381
|
+
"byte_offset": new_offset,
|
|
382
|
+
"extracted_hashes": all_hashes,
|
|
383
|
+
"last_run": datetime.now().isoformat(),
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
if is_session_end:
|
|
387
|
+
# Final save then cleanup
|
|
388
|
+
save_cursor(session_id, cursor)
|
|
389
|
+
cleanup_cursor(session_id)
|
|
390
|
+
else:
|
|
391
|
+
save_cursor(session_id, cursor)
|
|
392
|
+
|
|
393
|
+
results["elapsed_seconds"] = round(time.time() - start_time, 2)
|
|
394
|
+
return results
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def main():
|
|
398
|
+
"""Entry point: reads hook JSON from stdin."""
|
|
399
|
+
try:
|
|
400
|
+
hook_data = {}
|
|
401
|
+
if not sys.stdin.isatty():
|
|
402
|
+
raw = sys.stdin.read()
|
|
403
|
+
if raw.strip():
|
|
404
|
+
hook_data = json.loads(raw)
|
|
405
|
+
|
|
406
|
+
session_id = hook_data.get("session_id", f"unknown-{int(time.time())}")
|
|
407
|
+
transcript_path = hook_data.get("transcript_path", "")
|
|
408
|
+
project_path = hook_data.get("cwd") or hook_data.get("project_path", "")
|
|
409
|
+
hook_event = hook_data.get("hook_event_name", "")
|
|
410
|
+
is_session_end = hook_event == "SessionEnd"
|
|
411
|
+
|
|
412
|
+
if not transcript_path:
|
|
413
|
+
# No transcript path provided - nothing to extract
|
|
414
|
+
sys.exit(0)
|
|
415
|
+
|
|
416
|
+
results = run_extraction(
|
|
417
|
+
session_id=session_id,
|
|
418
|
+
transcript_path=transcript_path,
|
|
419
|
+
project_path=project_path,
|
|
420
|
+
is_session_end=is_session_end,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# Output summary to stderr (stdout is reserved for hook output)
|
|
424
|
+
print(
|
|
425
|
+
f"[MemoryExtractor] session={session_id} event={hook_event} "
|
|
426
|
+
f"extracted={results['extracted']} stored={results['stored']} "
|
|
427
|
+
f"errors={results['errors']} elapsed={results['elapsed_seconds']}s",
|
|
428
|
+
file=sys.stderr,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
except Exception as e:
|
|
432
|
+
# Fail silently - never block the user's workflow
|
|
433
|
+
print(f"[MemoryExtractor] Error: {e}", file=sys.stderr)
|
|
434
|
+
|
|
435
|
+
sys.exit(0)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
if __name__ == "__main__":
|
|
439
|
+
main()
|