mrstack 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mrstack/__init__.py +4 -0
- mrstack/_data/config/com.mrstack.claude-telegram.plist +25 -0
- mrstack/_data/config/mcp-config.example.json +23 -0
- mrstack/_data/config/start-daemon.sh +53 -0
- mrstack/_data/config/start.sh +29 -0
- mrstack/_data/schedulers/manage-jobs.sh +87 -0
- mrstack/_data/schedulers/morning-briefing.sh +29 -0
- mrstack/_data/schedulers/register-jobs.py +182 -0
- mrstack/_data/schedulers/run-threads-briefing.sh +36 -0
- mrstack/_data/schedulers/weekly-review.sh +26 -0
- mrstack/_data/templates/DESIGN-GUIDE.md +160 -0
- mrstack/_data/templates/alert.md +56 -0
- mrstack/_data/templates/evening-summary.md +73 -0
- mrstack/_data/templates/jarvis-alert.md +64 -0
- mrstack/_data/templates/morning-briefing.md +53 -0
- mrstack/_data/templates/weekly-review.md +79 -0
- mrstack/_overlay/api/dashboard.py +223 -0
- mrstack/_overlay/api/templates/dashboard.html +328 -0
- mrstack/_overlay/bot/handlers/callback.py +1432 -0
- mrstack/_overlay/bot/handlers/command.py +1541 -0
- mrstack/_overlay/bot/utils/keyboards.py +125 -0
- mrstack/_overlay/bot/utils/ui_components.py +166 -0
- mrstack/_overlay/claude/session.py +341 -0
- mrstack/_overlay/jarvis/__init__.py +77 -0
- mrstack/_overlay/jarvis/coach.py +122 -0
- mrstack/_overlay/jarvis/context_engine.py +463 -0
- mrstack/_overlay/jarvis/pattern_learner.py +255 -0
- mrstack/_overlay/jarvis/persona.py +84 -0
- mrstack/_overlay/jarvis/platform.py +182 -0
- mrstack/_overlay/knowledge/__init__.py +6 -0
- mrstack/_overlay/knowledge/manager.py +464 -0
- mrstack/_overlay/knowledge/memory_index.py +180 -0
- mrstack/cli.py +330 -0
- mrstack/constants.py +77 -0
- mrstack/daemon.py +325 -0
- mrstack/patcher.py +169 -0
- mrstack/wizard.py +271 -0
- mrstack-1.1.0.dist-info/METADATA +640 -0
- mrstack-1.1.0.dist-info/RECORD +42 -0
- mrstack-1.1.0.dist-info/WHEEL +4 -0
- mrstack-1.1.0.dist-info/entry_points.txt +2 -0
- mrstack-1.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
"""Knowledge base manager — ingest, classify, and search user-provided knowledge.
|
|
2
|
+
|
|
3
|
+
Triggered only when the user includes "학습" in their message.
|
|
4
|
+
Stores summaries in ~/claude-telegram/knowledge/items/{uuid}.md
|
|
5
|
+
with a catalog in ~/claude-telegram/knowledge/index.json.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import re
|
|
10
|
+
import uuid
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
import structlog
|
|
17
|
+
|
|
18
|
+
logger = structlog.get_logger()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class KnowledgeItem:
|
|
23
|
+
"""A single knowledge entry."""
|
|
24
|
+
|
|
25
|
+
id: str
|
|
26
|
+
title: str
|
|
27
|
+
category: str
|
|
28
|
+
tags: list[str]
|
|
29
|
+
summary: str
|
|
30
|
+
source_type: str # "url", "text", "image"
|
|
31
|
+
source_ref: str # original URL or filename
|
|
32
|
+
created_at: str
|
|
33
|
+
|
|
34
|
+
def to_dict(self) -> dict:
|
|
35
|
+
return {
|
|
36
|
+
"id": self.id,
|
|
37
|
+
"title": self.title,
|
|
38
|
+
"category": self.category,
|
|
39
|
+
"tags": self.tags,
|
|
40
|
+
"summary": self.summary,
|
|
41
|
+
"source_type": self.source_type,
|
|
42
|
+
"source_ref": self.source_ref,
|
|
43
|
+
"created_at": self.created_at,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class KnowledgeManager:
|
|
48
|
+
"""Manages knowledge ingestion, storage, and retrieval."""
|
|
49
|
+
|
|
50
|
+
CATEGORIES = ["tech", "business", "research", "reference"]
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
knowledge_dir: str = "~/claude-telegram/knowledge/",
|
|
55
|
+
claude_integration=None,
|
|
56
|
+
):
|
|
57
|
+
self.knowledge_dir = Path(knowledge_dir).expanduser()
|
|
58
|
+
self.items_dir = self.knowledge_dir / "items"
|
|
59
|
+
self.index_path = self.knowledge_dir / "index.json"
|
|
60
|
+
self.claude = claude_integration
|
|
61
|
+
self._index: list[dict] = []
|
|
62
|
+
|
|
63
|
+
# Ensure directories exist
|
|
64
|
+
self.items_dir.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
self._load_index()
|
|
66
|
+
|
|
67
|
+
def _load_index(self) -> None:
|
|
68
|
+
"""Load catalog from disk."""
|
|
69
|
+
if self.index_path.exists():
|
|
70
|
+
try:
|
|
71
|
+
with open(self.index_path, encoding="utf-8") as f:
|
|
72
|
+
self._index = json.load(f)
|
|
73
|
+
except Exception:
|
|
74
|
+
self._index = []
|
|
75
|
+
|
|
76
|
+
def _save_index(self) -> None:
|
|
77
|
+
"""Persist catalog to disk."""
|
|
78
|
+
try:
|
|
79
|
+
with open(self.index_path, "w", encoding="utf-8") as f:
|
|
80
|
+
json.dump(self._index, f, ensure_ascii=False, indent=2)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error("Failed to save knowledge index", error=str(e))
|
|
83
|
+
|
|
84
|
+
_YT_PATTERN = re.compile(
|
|
85
|
+
r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)[\w-]+'
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
async def ingest_url(self, url: str, working_directory=None) -> KnowledgeItem:
|
|
89
|
+
"""Fetch URL content via Claude, summarize, classify, and store.
|
|
90
|
+
|
|
91
|
+
YouTube URLs are handled specially — subtitles are extracted locally
|
|
92
|
+
via yt-dlp so that the actual video content can be learned.
|
|
93
|
+
"""
|
|
94
|
+
# YouTube detection
|
|
95
|
+
if self._YT_PATTERN.search(url):
|
|
96
|
+
return await self._ingest_youtube(url, working_directory)
|
|
97
|
+
|
|
98
|
+
prompt = (
|
|
99
|
+
f"다음 URL의 내용을 분석해주세요: {url}\n\n"
|
|
100
|
+
"1. WebFetch 도구로 URL 내용을 가져와주세요\n"
|
|
101
|
+
"2. 아래 JSON 형식으로만 응답해주세요 (다른 텍스트 없이):\n"
|
|
102
|
+
'```json\n'
|
|
103
|
+
'{\n'
|
|
104
|
+
' "title": "제목",\n'
|
|
105
|
+
' "category": "tech|business|research|reference 중 하나",\n'
|
|
106
|
+
' "tags": ["태그1", "태그2", "태그3"],\n'
|
|
107
|
+
' "summary": "핵심 내용 3-5문장 요약",\n'
|
|
108
|
+
' "key_points": ["핵심 포인트 1", "핵심 포인트 2", "핵심 포인트 3"]\n'
|
|
109
|
+
'}\n'
|
|
110
|
+
'```'
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
parsed = await self._call_claude_for_classification(prompt, working_directory)
|
|
114
|
+
return self._store_item(parsed, source_type="url", source_ref=url)
|
|
115
|
+
|
|
116
|
+
async def _ingest_youtube(self, url: str, working_directory=None) -> KnowledgeItem:
|
|
117
|
+
"""Extract YouTube subtitles via yt-dlp, then classify and store."""
|
|
118
|
+
import asyncio
|
|
119
|
+
import tempfile
|
|
120
|
+
import os
|
|
121
|
+
|
|
122
|
+
logger.info("Ingesting YouTube video", url=url)
|
|
123
|
+
|
|
124
|
+
# Extract title + subtitles with yt-dlp
|
|
125
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
126
|
+
sub_path = os.path.join(tmpdir, "sub")
|
|
127
|
+
|
|
128
|
+
# Get video title
|
|
129
|
+
title_proc = await asyncio.create_subprocess_exec(
|
|
130
|
+
"yt-dlp", "--print", "title", "--no-download", url,
|
|
131
|
+
stdout=asyncio.subprocess.PIPE,
|
|
132
|
+
stderr=asyncio.subprocess.PIPE,
|
|
133
|
+
)
|
|
134
|
+
title_out, _ = await asyncio.wait_for(title_proc.communicate(), timeout=30)
|
|
135
|
+
video_title = title_out.decode().strip() or "YouTube Video"
|
|
136
|
+
|
|
137
|
+
# Try auto-generated subtitles (ko → en → any)
|
|
138
|
+
transcript = ""
|
|
139
|
+
for lang in ["ko", "en", "ja"]:
|
|
140
|
+
proc = await asyncio.create_subprocess_exec(
|
|
141
|
+
"yt-dlp",
|
|
142
|
+
"--write-auto-sub",
|
|
143
|
+
"--sub-lang", lang,
|
|
144
|
+
"--sub-format", "vtt",
|
|
145
|
+
"--skip-download",
|
|
146
|
+
"-o", sub_path,
|
|
147
|
+
url,
|
|
148
|
+
stdout=asyncio.subprocess.PIPE,
|
|
149
|
+
stderr=asyncio.subprocess.PIPE,
|
|
150
|
+
)
|
|
151
|
+
await asyncio.wait_for(proc.communicate(), timeout=60)
|
|
152
|
+
|
|
153
|
+
# Find the generated .vtt file
|
|
154
|
+
for f in os.listdir(tmpdir):
|
|
155
|
+
if f.endswith(".vtt"):
|
|
156
|
+
vtt_path = os.path.join(tmpdir, f)
|
|
157
|
+
transcript = self._parse_vtt(vtt_path)
|
|
158
|
+
break
|
|
159
|
+
if transcript:
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
# Fallback: try manual subtitles
|
|
163
|
+
if not transcript:
|
|
164
|
+
proc = await asyncio.create_subprocess_exec(
|
|
165
|
+
"yt-dlp",
|
|
166
|
+
"--write-sub",
|
|
167
|
+
"--sub-lang", "ko,en,ja",
|
|
168
|
+
"--sub-format", "vtt",
|
|
169
|
+
"--skip-download",
|
|
170
|
+
"-o", sub_path,
|
|
171
|
+
url,
|
|
172
|
+
stdout=asyncio.subprocess.PIPE,
|
|
173
|
+
stderr=asyncio.subprocess.PIPE,
|
|
174
|
+
)
|
|
175
|
+
await asyncio.wait_for(proc.communicate(), timeout=60)
|
|
176
|
+
|
|
177
|
+
for f in os.listdir(tmpdir):
|
|
178
|
+
if f.endswith(".vtt"):
|
|
179
|
+
vtt_path = os.path.join(tmpdir, f)
|
|
180
|
+
transcript = self._parse_vtt(vtt_path)
|
|
181
|
+
break
|
|
182
|
+
|
|
183
|
+
if not transcript:
|
|
184
|
+
logger.warning("No subtitles found for YouTube video", url=url)
|
|
185
|
+
# Fallback: just store title + URL as reference
|
|
186
|
+
parsed = {
|
|
187
|
+
"title": video_title,
|
|
188
|
+
"category": "reference",
|
|
189
|
+
"tags": ["youtube", "video"],
|
|
190
|
+
"summary": f"자막을 추출할 수 없는 YouTube 영상: {video_title}",
|
|
191
|
+
"key_points": [url],
|
|
192
|
+
}
|
|
193
|
+
return self._store_item(parsed, source_type="youtube", source_ref=url)
|
|
194
|
+
|
|
195
|
+
# Truncate if too long
|
|
196
|
+
if len(transcript) > 30000:
|
|
197
|
+
transcript = transcript[:30000] + "\n... (truncated)"
|
|
198
|
+
|
|
199
|
+
prompt = (
|
|
200
|
+
f"다음은 YouTube 영상 '{video_title}'의 자막입니다:\n\n"
|
|
201
|
+
f"{transcript}\n\n"
|
|
202
|
+
"아래 JSON 형식으로만 응답해주세요 (다른 텍스트 없이):\n"
|
|
203
|
+
'```json\n'
|
|
204
|
+
'{\n'
|
|
205
|
+
' "title": "제목",\n'
|
|
206
|
+
' "category": "tech|business|research|reference 중 하나",\n'
|
|
207
|
+
' "tags": ["태그1", "태그2", "태그3"],\n'
|
|
208
|
+
' "summary": "핵심 내용 3-5문장 요약",\n'
|
|
209
|
+
' "key_points": ["핵심 포인트 1", "핵심 포인트 2", "핵심 포인트 3"]\n'
|
|
210
|
+
'}\n'
|
|
211
|
+
'```'
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
parsed = await self._call_claude_for_classification(prompt, working_directory)
|
|
215
|
+
# Preserve actual video title if Claude didn't extract it well
|
|
216
|
+
if parsed.get("title") == "Untitled":
|
|
217
|
+
parsed["title"] = video_title
|
|
218
|
+
|
|
219
|
+
return self._store_item(parsed, source_type="youtube", source_ref=url)
|
|
220
|
+
|
|
221
|
+
@staticmethod
|
|
222
|
+
def _parse_vtt(vtt_path: str) -> str:
|
|
223
|
+
"""Parse VTT subtitle file into clean text (deduplicated)."""
|
|
224
|
+
try:
|
|
225
|
+
with open(vtt_path, encoding="utf-8") as f:
|
|
226
|
+
lines = f.readlines()
|
|
227
|
+
except Exception:
|
|
228
|
+
return ""
|
|
229
|
+
|
|
230
|
+
seen = set()
|
|
231
|
+
text_lines = []
|
|
232
|
+
for line in lines:
|
|
233
|
+
line = line.strip()
|
|
234
|
+
# Skip VTT headers, timestamps, empty lines
|
|
235
|
+
if not line or line.startswith("WEBVTT") or line.startswith("Kind:") or line.startswith("Language:"):
|
|
236
|
+
continue
|
|
237
|
+
if "-->" in line:
|
|
238
|
+
continue
|
|
239
|
+
if line.startswith("NOTE"):
|
|
240
|
+
continue
|
|
241
|
+
# Remove HTML tags
|
|
242
|
+
clean = re.sub(r'<[^>]+>', '', line)
|
|
243
|
+
if clean and clean not in seen:
|
|
244
|
+
seen.add(clean)
|
|
245
|
+
text_lines.append(clean)
|
|
246
|
+
|
|
247
|
+
return "\n".join(text_lines)
|
|
248
|
+
|
|
249
|
+
async def ingest_text(
|
|
250
|
+
self, text: str, filename: Optional[str] = None, working_directory=None
|
|
251
|
+
) -> KnowledgeItem:
|
|
252
|
+
"""Summarize and classify text/file content."""
|
|
253
|
+
# Truncate if too long
|
|
254
|
+
if len(text) > 30000:
|
|
255
|
+
text = text[:30000] + "\n... (truncated)"
|
|
256
|
+
|
|
257
|
+
source = filename or "direct_text"
|
|
258
|
+
prompt = (
|
|
259
|
+
f"다음 텍스트 내용을 분석해주세요:\n\n{text}\n\n"
|
|
260
|
+
"아래 JSON 형식으로만 응답해주세요 (다른 텍스트 없이):\n"
|
|
261
|
+
'```json\n'
|
|
262
|
+
'{\n'
|
|
263
|
+
' "title": "제목",\n'
|
|
264
|
+
' "category": "tech|business|research|reference 중 하나",\n'
|
|
265
|
+
' "tags": ["태그1", "태그2", "태그3"],\n'
|
|
266
|
+
' "summary": "핵심 내용 3-5문장 요약",\n'
|
|
267
|
+
' "key_points": ["핵심 포인트 1", "핵심 포인트 2", "핵심 포인트 3"]\n'
|
|
268
|
+
'}\n'
|
|
269
|
+
'```'
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
parsed = await self._call_claude_for_classification(prompt, working_directory)
|
|
273
|
+
return self._store_item(parsed, source_type="text", source_ref=source)
|
|
274
|
+
|
|
275
|
+
async def ingest_image(
|
|
276
|
+
self, image_path: str, caption: str = "", working_directory=None
|
|
277
|
+
) -> KnowledgeItem:
|
|
278
|
+
"""Analyze image via Claude and store as knowledge."""
|
|
279
|
+
prompt = (
|
|
280
|
+
f"이미지를 분석해서 학습 자료로 정리해주세요.\n"
|
|
281
|
+
f"이미지 파일 경로: {image_path}\n"
|
|
282
|
+
f"Read 도구로 이 이미지 파일을 읽어서 분석해주세요.\n"
|
|
283
|
+
)
|
|
284
|
+
if caption:
|
|
285
|
+
prompt += f"사용자 메모: {caption}\n"
|
|
286
|
+
|
|
287
|
+
prompt += (
|
|
288
|
+
"\n아래 JSON 형식으로만 응답해주세요 (다른 텍스트 없이):\n"
|
|
289
|
+
'```json\n'
|
|
290
|
+
'{\n'
|
|
291
|
+
' "title": "제목",\n'
|
|
292
|
+
' "category": "tech|business|research|reference 중 하나",\n'
|
|
293
|
+
' "tags": ["태그1", "태그2", "태그3"],\n'
|
|
294
|
+
' "summary": "이미지 내용 3-5문장 요약",\n'
|
|
295
|
+
' "key_points": ["핵심 포인트 1", "핵심 포인트 2"]\n'
|
|
296
|
+
'}\n'
|
|
297
|
+
'```'
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
parsed = await self._call_claude_for_classification(prompt, working_directory)
|
|
301
|
+
return self._store_item(parsed, source_type="image", source_ref=image_path)
|
|
302
|
+
|
|
303
|
+
async def _call_claude_for_classification(
|
|
304
|
+
self, prompt: str, working_directory=None
|
|
305
|
+
) -> dict:
|
|
306
|
+
"""Call Claude once to get classification + summary."""
|
|
307
|
+
if not self.claude:
|
|
308
|
+
logger.warning("No Claude integration for knowledge classification")
|
|
309
|
+
return {
|
|
310
|
+
"title": "Untitled",
|
|
311
|
+
"category": "reference",
|
|
312
|
+
"tags": [],
|
|
313
|
+
"summary": "분류 실패 — Claude 연동 없음",
|
|
314
|
+
"key_points": [],
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
from pathlib import Path
|
|
319
|
+
|
|
320
|
+
wd = working_directory or Path.home()
|
|
321
|
+
response = await self.claude.run_command(
|
|
322
|
+
prompt=prompt,
|
|
323
|
+
working_directory=wd,
|
|
324
|
+
user_id=0,
|
|
325
|
+
force_new=True,
|
|
326
|
+
model="sonnet",
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
return self._parse_json_response(response.content)
|
|
330
|
+
except Exception as e:
|
|
331
|
+
logger.error("Knowledge classification failed", error=str(e))
|
|
332
|
+
return {
|
|
333
|
+
"title": "Untitled",
|
|
334
|
+
"category": "reference",
|
|
335
|
+
"tags": [],
|
|
336
|
+
"summary": f"분류 실패: {str(e)[:100]}",
|
|
337
|
+
"key_points": [],
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
def _parse_json_response(self, text: str) -> dict:
|
|
341
|
+
"""Extract JSON from Claude's response."""
|
|
342
|
+
# Try to find JSON block
|
|
343
|
+
json_match = re.search(r'```(?:json)?\s*\n?(.*?)\n?```', text, re.DOTALL)
|
|
344
|
+
if json_match:
|
|
345
|
+
try:
|
|
346
|
+
return json.loads(json_match.group(1))
|
|
347
|
+
except json.JSONDecodeError:
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
# Try parsing the whole text as JSON
|
|
351
|
+
try:
|
|
352
|
+
return json.loads(text)
|
|
353
|
+
except json.JSONDecodeError:
|
|
354
|
+
pass
|
|
355
|
+
|
|
356
|
+
# Fallback: extract what we can
|
|
357
|
+
return {
|
|
358
|
+
"title": text[:50].strip(),
|
|
359
|
+
"category": "reference",
|
|
360
|
+
"tags": [],
|
|
361
|
+
"summary": text[:300].strip(),
|
|
362
|
+
"key_points": [],
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
def _store_item(
|
|
366
|
+
self, parsed: dict, source_type: str, source_ref: str
|
|
367
|
+
) -> KnowledgeItem:
|
|
368
|
+
"""Save knowledge item to disk and update index."""
|
|
369
|
+
item_id = str(uuid.uuid4())[:8]
|
|
370
|
+
now = datetime.now().isoformat()
|
|
371
|
+
|
|
372
|
+
# Validate category
|
|
373
|
+
category = parsed.get("category", "reference")
|
|
374
|
+
if category not in self.CATEGORIES:
|
|
375
|
+
category = "reference"
|
|
376
|
+
|
|
377
|
+
item = KnowledgeItem(
|
|
378
|
+
id=item_id,
|
|
379
|
+
title=parsed.get("title", "Untitled"),
|
|
380
|
+
category=category,
|
|
381
|
+
tags=parsed.get("tags", []),
|
|
382
|
+
summary=parsed.get("summary", ""),
|
|
383
|
+
source_type=source_type,
|
|
384
|
+
source_ref=source_ref,
|
|
385
|
+
created_at=now,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# Write detailed item file
|
|
389
|
+
key_points = parsed.get("key_points", [])
|
|
390
|
+
md_content = (
|
|
391
|
+
f"# {item.title}\n\n"
|
|
392
|
+
f"- **Category:** {item.category}\n"
|
|
393
|
+
f"- **Tags:** {', '.join(item.tags)}\n"
|
|
394
|
+
f"- **Source:** {source_type} — {source_ref}\n"
|
|
395
|
+
f"- **Created:** {now}\n\n"
|
|
396
|
+
f"## Summary\n\n{item.summary}\n\n"
|
|
397
|
+
)
|
|
398
|
+
if key_points:
|
|
399
|
+
md_content += "## Key Points\n\n"
|
|
400
|
+
for point in key_points:
|
|
401
|
+
md_content += f"- {point}\n"
|
|
402
|
+
|
|
403
|
+
item_path = self.items_dir / f"{item_id}.md"
|
|
404
|
+
try:
|
|
405
|
+
item_path.write_text(md_content, encoding="utf-8")
|
|
406
|
+
except Exception as e:
|
|
407
|
+
logger.error("Failed to write knowledge item", error=str(e))
|
|
408
|
+
|
|
409
|
+
# Update index
|
|
410
|
+
self._index.append(item.to_dict())
|
|
411
|
+
self._save_index()
|
|
412
|
+
|
|
413
|
+
logger.info(
|
|
414
|
+
"Knowledge item stored",
|
|
415
|
+
id=item_id,
|
|
416
|
+
title=item.title,
|
|
417
|
+
category=item.category,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
return item
|
|
421
|
+
|
|
422
|
+
def search(self, query: str, limit: int = 5) -> list[KnowledgeItem]:
|
|
423
|
+
"""Search knowledge base by keyword/tag matching (local, zero tokens)."""
|
|
424
|
+
query_lower = query.lower()
|
|
425
|
+
query_words = set(re.findall(r'[a-z가-힣]{2,}', query_lower))
|
|
426
|
+
|
|
427
|
+
scored = []
|
|
428
|
+
for entry in self._index:
|
|
429
|
+
score = 0
|
|
430
|
+
# Title match
|
|
431
|
+
if any(w in entry.get("title", "").lower() for w in query_words):
|
|
432
|
+
score += 3
|
|
433
|
+
# Tag match
|
|
434
|
+
for tag in entry.get("tags", []):
|
|
435
|
+
if any(w in tag.lower() for w in query_words):
|
|
436
|
+
score += 2
|
|
437
|
+
# Summary match
|
|
438
|
+
if any(w in entry.get("summary", "").lower() for w in query_words):
|
|
439
|
+
score += 1
|
|
440
|
+
# Category match
|
|
441
|
+
if any(w in entry.get("category", "").lower() for w in query_words):
|
|
442
|
+
score += 1
|
|
443
|
+
|
|
444
|
+
if score > 0:
|
|
445
|
+
scored.append((score, entry))
|
|
446
|
+
|
|
447
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
448
|
+
|
|
449
|
+
return [
|
|
450
|
+
KnowledgeItem(**{k: v for k, v in e.items() if k != "key_points"})
|
|
451
|
+
for _, e in scored[:limit]
|
|
452
|
+
]
|
|
453
|
+
|
|
454
|
+
def get_stats(self) -> dict:
|
|
455
|
+
"""Return knowledge base statistics."""
|
|
456
|
+
categories: dict[str, int] = {}
|
|
457
|
+
for item in self._index:
|
|
458
|
+
cat = item.get("category", "unknown")
|
|
459
|
+
categories[cat] = categories.get(cat, 0) + 1
|
|
460
|
+
|
|
461
|
+
return {
|
|
462
|
+
"total_items": len(self._index),
|
|
463
|
+
"categories": categories,
|
|
464
|
+
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Memory index for efficient context retrieval.
|
|
2
|
+
|
|
3
|
+
Scans ~/claude-telegram/memory/ and builds a keyword index so that
|
|
4
|
+
only relevant memory snippets are injected into Claude's system prompt,
|
|
5
|
+
instead of dumping all memory files every call.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
import structlog
|
|
15
|
+
|
|
16
|
+
logger = structlog.get_logger()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MemoryIndex:
|
|
20
|
+
"""Indexes memory files and retrieves relevant context by keyword match."""
|
|
21
|
+
|
|
22
|
+
SCAN_DIRS = ["people", "projects", "preferences", "knowledge", "decisions"]
|
|
23
|
+
|
|
24
|
+
def __init__(self, memory_dir: str = "~/claude-telegram/memory/"):
|
|
25
|
+
self.memory_dir = Path(memory_dir).expanduser()
|
|
26
|
+
self.index_path = self.memory_dir / "index.json"
|
|
27
|
+
self.knowledge_dir = Path("~/claude-telegram/knowledge/").expanduser()
|
|
28
|
+
self.knowledge_index_path = self.knowledge_dir / "index.json"
|
|
29
|
+
self._index: list[dict] = []
|
|
30
|
+
self._knowledge_index: list[dict] = []
|
|
31
|
+
|
|
32
|
+
def rebuild_index(self) -> list[dict]:
|
|
33
|
+
"""Scan all memory files and build index.json with summaries."""
|
|
34
|
+
entries = []
|
|
35
|
+
|
|
36
|
+
for subdir in self.SCAN_DIRS:
|
|
37
|
+
dir_path = self.memory_dir / subdir
|
|
38
|
+
if not dir_path.is_dir():
|
|
39
|
+
continue
|
|
40
|
+
|
|
41
|
+
for md_file in dir_path.glob("*.md"):
|
|
42
|
+
try:
|
|
43
|
+
text = md_file.read_text(encoding="utf-8", errors="ignore")
|
|
44
|
+
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
|
45
|
+
summary = " ".join(lines[:5])[:300]
|
|
46
|
+
keywords = self._extract_keywords(text)
|
|
47
|
+
|
|
48
|
+
entries.append({
|
|
49
|
+
"path": str(md_file.relative_to(self.memory_dir)),
|
|
50
|
+
"category": subdir,
|
|
51
|
+
"summary": summary,
|
|
52
|
+
"keywords": keywords,
|
|
53
|
+
})
|
|
54
|
+
except Exception as e:
|
|
55
|
+
logger.debug("Failed to index memory file", path=str(md_file), error=str(e))
|
|
56
|
+
|
|
57
|
+
self._index = entries
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
self.index_path.parent.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
with open(self.index_path, "w", encoding="utf-8") as f:
|
|
62
|
+
json.dump(entries, f, ensure_ascii=False, indent=2)
|
|
63
|
+
except Exception as e:
|
|
64
|
+
logger.warning("Failed to write memory index", error=str(e))
|
|
65
|
+
|
|
66
|
+
# Also load knowledge index if it exists
|
|
67
|
+
self._load_knowledge_index()
|
|
68
|
+
|
|
69
|
+
logger.info("Memory index rebuilt", entry_count=len(entries))
|
|
70
|
+
return entries
|
|
71
|
+
|
|
72
|
+
def _load_knowledge_index(self) -> None:
|
|
73
|
+
"""Load knowledge base index for combined search."""
|
|
74
|
+
if self.knowledge_index_path.exists():
|
|
75
|
+
try:
|
|
76
|
+
with open(self.knowledge_index_path, encoding="utf-8") as f:
|
|
77
|
+
self._knowledge_index = json.load(f)
|
|
78
|
+
except Exception:
|
|
79
|
+
self._knowledge_index = []
|
|
80
|
+
|
|
81
|
+
def _extract_keywords(self, text: str) -> list[str]:
|
|
82
|
+
"""Extract meaningful keywords from text (local, no LLM call)."""
|
|
83
|
+
text_lower = text.lower()
|
|
84
|
+
# Remove markdown headers, links, formatting
|
|
85
|
+
text_clean = re.sub(r'[#*\[\]()>`_~|]', ' ', text_lower)
|
|
86
|
+
# Split and filter
|
|
87
|
+
words = re.findall(r'[a-z가-힣]{2,}', text_clean)
|
|
88
|
+
# Count frequency, keep top keywords
|
|
89
|
+
freq: dict[str, int] = {}
|
|
90
|
+
stop_words = {
|
|
91
|
+
"the", "and", "for", "that", "this", "with", "from", "have",
|
|
92
|
+
"are", "was", "were", "been", "will", "not", "but", "all",
|
|
93
|
+
"can", "has", "had", "its", "you", "your", "they", "what",
|
|
94
|
+
"에서", "으로", "에서", "하는", "있는", "하고", "그리고",
|
|
95
|
+
"또는", "대한", "위한", "통해", "에서의", "것을",
|
|
96
|
+
}
|
|
97
|
+
for w in words:
|
|
98
|
+
if w not in stop_words and len(w) > 1:
|
|
99
|
+
freq[w] = freq.get(w, 0) + 1
|
|
100
|
+
|
|
101
|
+
sorted_kw = sorted(freq.items(), key=lambda x: x[1], reverse=True)
|
|
102
|
+
return [kw for kw, _ in sorted_kw[:20]]
|
|
103
|
+
|
|
104
|
+
def get_relevant_context(self, query: str, max_tokens: int = 500) -> str:
|
|
105
|
+
"""Return memory snippets relevant to the query.
|
|
106
|
+
|
|
107
|
+
Uses keyword matching against the index (no LLM call, zero tokens).
|
|
108
|
+
"""
|
|
109
|
+
if not self._index:
|
|
110
|
+
# Try loading from disk
|
|
111
|
+
if self.index_path.exists():
|
|
112
|
+
try:
|
|
113
|
+
with open(self.index_path, encoding="utf-8") as f:
|
|
114
|
+
self._index = json.load(f)
|
|
115
|
+
except Exception:
|
|
116
|
+
return ""
|
|
117
|
+
if not self._index:
|
|
118
|
+
return ""
|
|
119
|
+
|
|
120
|
+
query_keywords = set(self._extract_keywords(query))
|
|
121
|
+
if not query_keywords:
|
|
122
|
+
return ""
|
|
123
|
+
|
|
124
|
+
# Score each entry by keyword overlap
|
|
125
|
+
scored = []
|
|
126
|
+
for entry in self._index:
|
|
127
|
+
overlap = query_keywords & set(entry.get("keywords", []))
|
|
128
|
+
if overlap:
|
|
129
|
+
scored.append((len(overlap), entry))
|
|
130
|
+
|
|
131
|
+
# Also search knowledge index
|
|
132
|
+
knowledge_matches = []
|
|
133
|
+
if self._knowledge_index:
|
|
134
|
+
for item in self._knowledge_index:
|
|
135
|
+
item_text = f"{item.get('title', '')} {item.get('summary', '')} {' '.join(item.get('tags', []))}"
|
|
136
|
+
item_keywords = set(self._extract_keywords(item_text))
|
|
137
|
+
overlap = query_keywords & item_keywords
|
|
138
|
+
if overlap:
|
|
139
|
+
knowledge_matches.append((len(overlap), item))
|
|
140
|
+
|
|
141
|
+
if not scored and not knowledge_matches:
|
|
142
|
+
return ""
|
|
143
|
+
|
|
144
|
+
# Build context string within token budget (approx 4 chars per token)
|
|
145
|
+
char_budget = max_tokens * 4
|
|
146
|
+
parts = []
|
|
147
|
+
|
|
148
|
+
# Memory context
|
|
149
|
+
if scored:
|
|
150
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
151
|
+
parts.append("[Memory Context]")
|
|
152
|
+
for _, entry in scored[:3]:
|
|
153
|
+
snippet = f"- [{entry['category']}] {entry['summary']}"
|
|
154
|
+
parts.append(snippet)
|
|
155
|
+
|
|
156
|
+
# Knowledge context
|
|
157
|
+
if knowledge_matches:
|
|
158
|
+
knowledge_matches.sort(key=lambda x: x[0], reverse=True)
|
|
159
|
+
parts.append("\n[Knowledge]")
|
|
160
|
+
for _, item in knowledge_matches[:3]:
|
|
161
|
+
snippet = f"- [{item.get('category', '?')}] {item.get('title', '?')}: {item.get('summary', '')[:150]}"
|
|
162
|
+
parts.append(snippet)
|
|
163
|
+
|
|
164
|
+
result = "\n".join(parts)
|
|
165
|
+
if len(result) > char_budget:
|
|
166
|
+
result = result[:char_budget] + "..."
|
|
167
|
+
|
|
168
|
+
return result
|
|
169
|
+
|
|
170
|
+
def load_index(self) -> None:
|
|
171
|
+
"""Load existing index from disk (called at startup)."""
|
|
172
|
+
if self.index_path.exists():
|
|
173
|
+
try:
|
|
174
|
+
with open(self.index_path, encoding="utf-8") as f:
|
|
175
|
+
self._index = json.load(f)
|
|
176
|
+
logger.info("Memory index loaded", entry_count=len(self._index))
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.warning("Failed to load memory index", error=str(e))
|
|
179
|
+
self._index = []
|
|
180
|
+
self._load_knowledge_index()
|