mdmemory 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdmemory/__init__.py ADDED
@@ -0,0 +1,54 @@
1
+ """MdMemory - Markdown-first, LLM-driven memory framework.
2
+
3
+ A Markdown-first, LLM-driven memory framework that organizes agent knowledge
4
+ into a hierarchical Knowledge Tree. It prioritizes context efficiency by using
5
+ a hierarchical folder structure and a "Hybrid Indexing" strategy, ensuring the
6
+ agent only loads what is necessary.
7
+
8
+ Core Features:
9
+ - Human-Readable: Data stored as standard .md files on the filesystem
10
+ - LLM-Organized: Uses LLM to automatically determine folder structure
11
+ - Context-Aware: Hybrid indexing strategy keeps the root index compact
12
+ - Efficient Navigation: Central index.md and .registry.json for direct access
13
+ - Provider Agnostic: Works with any LLM provider via callbacks
14
+
15
+ Example Usage:
16
+ >>> from mdmemory import MdMemory
17
+ >>> memory = MdMemory() # Uses defaults: LiteLLMCallback + "./MdMemory"
18
+ >>> topic = memory.store("user1", "My knowledge content")
19
+ >>> content = memory.get("user1", topic)
20
+ >>> index = memory.retrieve("user1")
21
+ """
22
+
23
+ __title__ = "mdmemory"
24
+ __version__ = "0.1.0"
25
+ __author__ = "Contributors"
26
+ __license__ = "MIT"
27
+ __copyright__ = "Copyright 2026 Contributors"
28
+ __description__ = (
29
+ "Markdown-first, LLM-driven memory framework organized "
30
+ "into a hierarchical Knowledge Tree"
31
+ )
32
+
33
+ from .core import (
34
+ MdMemory,
35
+ LLMCallback,
36
+ LiteLLMCallback,
37
+ OpenAICallback,
38
+ AnthropicCallback,
39
+ )
40
+ from .models import FrontMatter, LLMResponse
41
+
42
+ __all__ = [
43
+ "MdMemory",
44
+ "LLMCallback",
45
+ "LiteLLMCallback",
46
+ "OpenAICallback",
47
+ "AnthropicCallback",
48
+ "FrontMatter",
49
+ "LLMResponse",
50
+ "__version__",
51
+ "__title__",
52
+ "__author__",
53
+ "__license__",
54
+ ]
mdmemory/core.py ADDED
@@ -0,0 +1,487 @@
1
+ """Core MdMemory implementation."""
2
+
3
+ import asyncio
4
+ import json
5
+ import re
6
+ import shutil
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Callable, Dict, List, Optional
10
+
11
+ from .models import FrontMatter, LLMResponse
12
+ from .registry import PathRegistry
13
+ from .utils import (
14
+ ensure_dir_exists,
15
+ line_count,
16
+ parse_topic_title,
17
+ read_markdown_file,
18
+ save_json_safe,
19
+ write_markdown_file,
20
+ )
21
+
22
+ # Type alias for LLM callback function
23
+ LLMCallback = Callable[[List[Dict[str, str]]], str]
24
+
25
+
26
+ class LiteLLMCallback:
27
+ """Built-in callback for LiteLLM integration."""
28
+
29
+ def __init__(self, model: str = "gpt-3.5-turbo"):
30
+ """Initialize LiteLLM callback.
31
+
32
+ Args:
33
+ model: LLM model name (default: gpt-3.5-turbo)
34
+ """
35
+ self.model = model
36
+ self.completion = None
37
+ self._initialize()
38
+
39
+ def _initialize(self):
40
+ """Lazy initialize LiteLLM to avoid import errors."""
41
+ try:
42
+ from litellm import completion
43
+
44
+ self.completion = completion
45
+ except ImportError:
46
+ raise ImportError("LiteLLM not installed. Install with: pip install litellm")
47
+
48
+ def __call__(self, messages: List[Dict[str, str]]) -> str:
49
+ """Call LiteLLM API.
50
+
51
+ Args:
52
+ messages: List of message dicts with 'role' and 'content'
53
+
54
+ Returns:
55
+ LLM response as string
56
+ """
57
+ if self.completion is None:
58
+ self._initialize()
59
+
60
+ response = self.completion(model=self.model, messages=messages)
61
+ return response.choices[0].message.content
62
+
63
+
64
+ class OpenAICallback:
65
+ """Built-in callback for OpenAI integration."""
66
+
67
+ def __init__(self, model: str = "gpt-3.5-turbo", api_key: Optional[str] = None):
68
+ """Initialize OpenAI callback.
69
+
70
+ Args:
71
+ model: OpenAI model name (default: gpt-3.5-turbo)
72
+ api_key: OpenAI API key (if not set, uses OPENAI_API_KEY env var)
73
+ """
74
+ self.model = model
75
+ self.api_key = api_key
76
+ self.client = None
77
+ self._initialize()
78
+
79
+ def _initialize(self):
80
+ """Lazy initialize OpenAI client."""
81
+ try:
82
+ from openai import OpenAI
83
+
84
+ self.client = OpenAI(api_key=self.api_key) if self.api_key else OpenAI()
85
+ except ImportError:
86
+ raise ImportError("OpenAI not installed. Install with: pip install openai")
87
+
88
+ def __call__(self, messages: List[Dict[str, str]]) -> str:
89
+ """Call OpenAI API.
90
+
91
+ Args:
92
+ messages: List of message dicts with 'role' and 'content'
93
+
94
+ Returns:
95
+ LLM response as string
96
+ """
97
+ if self.client is None:
98
+ self._initialize()
99
+
100
+ response = self.client.chat.completions.create(model=self.model, messages=messages)
101
+ return response.choices[0].message.content
102
+
103
+
104
+ class AnthropicCallback:
105
+ """Built-in callback for Anthropic Claude integration."""
106
+
107
+ def __init__(self, model: str = "claude-3-sonnet-20240229", api_key: Optional[str] = None):
108
+ """Initialize Anthropic callback.
109
+
110
+ Args:
111
+ model: Claude model name (default: claude-3-sonnet-20240229)
112
+ api_key: Anthropic API key (if not set, uses ANTHROPIC_API_KEY env var)
113
+ """
114
+ self.model = model
115
+ self.api_key = api_key
116
+ self.client = None
117
+ self._initialize()
118
+
119
+ def _initialize(self):
120
+ """Lazy initialize Anthropic client."""
121
+ try:
122
+ from anthropic import Anthropic
123
+
124
+ self.client = Anthropic(api_key=self.api_key) if self.api_key else Anthropic()
125
+ except ImportError:
126
+ raise ImportError("Anthropic not installed. Install with: pip install anthropic")
127
+
128
+ def __call__(self, messages: List[Dict[str, str]]) -> str:
129
+ """Call Anthropic API.
130
+
131
+ Args:
132
+ messages: List of message dicts with 'role' and 'content'
133
+
134
+ Returns:
135
+ LLM response as string
136
+ """
137
+ if self.client is None:
138
+ self._initialize()
139
+
140
+ response = self.client.messages.create(model=self.model, messages=messages, max_tokens=4096)
141
+ return response.content[0].text
142
+
143
+
144
+ class MdMemory:
145
+ """Markdown-first, LLM-driven memory framework."""
146
+
147
+ SYSTEM_PROMPT = """You are the MdMemory Librarian. Your goal is to maintain a clean, hierarchical Markdown Knowledge Tree. When storing data, choose a logical path. When optimizing, group related files into sub-directories to keep the root index under 50 lines. Always return JSON containing: `action`, `recommended_path`, `frontmatter`, and `optimize_suggested`."""
148
+
149
+ def __init__(
150
+ self,
151
+ llm_callback: Optional[LLMCallback] = None,
152
+ storage_path: str = "./MdMemory",
153
+ optimize_threshold: int = 20,
154
+ ):
155
+ """Initialize MdMemory.
156
+
157
+ Args:
158
+ llm_callback: Callback function for LLM calls.
159
+ Signature: (messages: List[Dict[str, str]]) -> str
160
+ Messages format: [{"role": "user", "content": "prompt"}]
161
+ Should return LLM response as a string (preferably JSON)
162
+ If not provided, uses LiteLLMCallback with gpt-3.5-turbo
163
+ storage_path: Root path for storage (default: ./MdMemory)
164
+ optimize_threshold: Line count threshold for triggering optimize
165
+ """
166
+ # Use LiteLLMCallback as default if not provided
167
+ if llm_callback is None:
168
+ llm_callback = LiteLLMCallback()
169
+
170
+ self.llm_callback = llm_callback
171
+ self.storage_path = Path(storage_path)
172
+ self.optimize_threshold = optimize_threshold
173
+
174
+ # Initialize storage structure
175
+ ensure_dir_exists(self.storage_path)
176
+
177
+ # Load or create registry
178
+ self.registry_path = self.storage_path / ".registry.json"
179
+ # Ensure registry file exists
180
+ if not self.registry_path.exists():
181
+ save_json_safe(self.registry_path, {})
182
+ self.registry = PathRegistry(self.registry_path)
183
+
184
+ # Initialize root index
185
+ self.root_index_path = self.storage_path / "index.md"
186
+ if not self.root_index_path.exists():
187
+ self._init_root_index()
188
+
189
+ def _init_root_index(self) -> None:
190
+ """Initialize the root index.md."""
191
+ metadata = {
192
+ "title": "MdMemory Root Index",
193
+ "created_at": datetime.now().isoformat(),
194
+ }
195
+ content = "# Knowledge Tree\n\nWelcome to MdMemory. This is the root index.\n\n"
196
+ write_markdown_file(self.root_index_path, metadata, content)
197
+
198
+ def _get_llm_decision(self, action: str, context: Dict[str, str]) -> Optional[LLMResponse]:
199
+ """Call LLM for organizational decisions.
200
+
201
+ Args:
202
+ action: The action type ("store", "optimize", etc.)
203
+ context: Context dict for the LLM
204
+
205
+ Returns:
206
+ LLMResponse or None if error
207
+ """
208
+ try:
209
+ # Prepare the prompt
210
+ prompt = f"""
211
+ {self.SYSTEM_PROMPT}
212
+
213
+ Action: {action}
214
+ Context: {json.dumps(context, indent=2)}
215
+
216
+ Please respond with a JSON object containing:
217
+ - action: The action being performed
218
+ - recommended_path: The logical folder path (e.g., "coding/python")
219
+ - frontmatter: Object with topic, summary, tags
220
+ - optimize_suggested: Boolean indicating if optimization is needed
221
+ """
222
+
223
+ # Call LLM callback with messages
224
+ messages = [{"role": "user", "content": prompt}]
225
+ response_text = self.llm_callback(messages)
226
+
227
+ # Try to extract JSON from response
228
+ try:
229
+ json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
230
+ if json_match:
231
+ response_json = json.loads(json_match.group())
232
+ return LLMResponse(**response_json)
233
+ except (json.JSONDecodeError, ValueError):
234
+ pass
235
+
236
+ return None
237
+ except Exception as e:
238
+ print(f"LLM call error: {e}")
239
+ return None
240
+
241
+ def store(self, usr_id: str, query: str, topic: Optional[str] = None) -> Optional[str]:
242
+ """Store a memory item.
243
+
244
+ Args:
245
+ usr_id: User ID
246
+ query: The content to store
247
+ topic: Topic identifier (optional - LLM will generate if not provided)
248
+
249
+ Returns:
250
+ The topic ID that was used/generated, or None if failed
251
+ """
252
+ # Prepare context for LLM
253
+ context = {"query": query, "user_id": usr_id}
254
+ if topic:
255
+ context["topic"] = topic
256
+ else:
257
+ context["topic"] = (
258
+ "NOT_PROVIDED - Please generate a concise topic ID from the query content"
259
+ )
260
+
261
+ # Call LLM to determine folder path and frontmatter
262
+ llm_response = self._get_llm_decision("store", context)
263
+
264
+ if not llm_response:
265
+ # Fallback: use provided topic or generate simple one from query
266
+ if topic:
267
+ used_topic = topic
268
+ recommended_path = "uncategorized"
269
+ else:
270
+ # Generate simple topic from first 30 chars of query
271
+ used_topic = query[:30].replace(" ", "_").replace("\n", "_").lower()
272
+ recommended_path = "uncategorized"
273
+
274
+ frontmatter = FrontMatter(
275
+ topic=used_topic,
276
+ summary=query[:100],
277
+ tags=[],
278
+ created_at=datetime.now().isoformat(),
279
+ )
280
+ else:
281
+ # Use LLM-determined topic (from frontmatter or generated)
282
+ used_topic = llm_response.frontmatter.topic
283
+ recommended_path = llm_response.recommended_path
284
+ frontmatter = llm_response.frontmatter
285
+
286
+ # Set timestamps
287
+ frontmatter.created_at = datetime.now().isoformat()
288
+ frontmatter.updated_at = datetime.now().isoformat()
289
+
290
+ # Create file path
291
+ folder_path = self.storage_path / recommended_path
292
+ ensure_dir_exists(folder_path)
293
+ file_path = folder_path / f"{used_topic}.md"
294
+
295
+ # Write the file
296
+ metadata = frontmatter.model_dump()
297
+ success = write_markdown_file(file_path, metadata, query)
298
+
299
+ if success:
300
+ # Update registry
301
+ relative_path = str(file_path.relative_to(self.storage_path))
302
+ self.registry.put(used_topic, relative_path)
303
+
304
+ # Update parent index
305
+ self._update_index_for_path(recommended_path, used_topic, frontmatter.summary)
306
+
307
+ # Check if optimization is needed
308
+ if llm_response and llm_response.optimize_suggested:
309
+ self.optimize(usr_id)
310
+
311
+ return used_topic
312
+
313
+ return None
314
+
315
+ def _update_index_for_path(self, folder_path: str, topic: str, summary: str) -> None:
316
+ """Update the appropriate index file for a folder.
317
+
318
+ Args:
319
+ folder_path: Relative folder path (e.g., "coding/python")
320
+ topic: Topic name
321
+ summary: One-line summary
322
+ """
323
+ parts = folder_path.split("/")
324
+
325
+ # Update root index if storing directly
326
+ if len(parts) == 1:
327
+ self._append_to_index(self.root_index_path, topic, summary)
328
+ else:
329
+ # Create/update sub-index
330
+ sub_index_path = self.storage_path / folder_path / "index.md"
331
+ ensure_dir_exists(sub_index_path.parent)
332
+
333
+ if not sub_index_path.exists():
334
+ metadata = {"title": f"Index: {folder_path.replace('/', ' > ')}"}
335
+ content = f"# {folder_path.replace('/', ' > ')}\n\n"
336
+ write_markdown_file(sub_index_path, metadata, content)
337
+
338
+ self._append_to_index(sub_index_path, topic, summary)
339
+
340
+ def _append_to_index(self, index_path: Path, topic: str, summary: str) -> None:
341
+ """Append an entry to an index file.
342
+
343
+ Args:
344
+ index_path: Path to the index file
345
+ topic: Topic name
346
+ summary: One-line summary
347
+ """
348
+ metadata, content = read_markdown_file(index_path)
349
+ topic_title = parse_topic_title(topic)
350
+ new_entry = f"- **{topic_title}**: {summary}\n"
351
+
352
+ if new_entry not in content:
353
+ content += new_entry
354
+ write_markdown_file(index_path, metadata, content)
355
+
356
+ def retrieve(self, usr_id: str) -> str:
357
+ """Retrieve the root index (knowledge tree overview).
358
+
359
+ Args:
360
+ usr_id: User ID
361
+
362
+ Returns:
363
+ Content of root index.md
364
+ """
365
+ metadata, content = read_markdown_file(self.root_index_path)
366
+ return content
367
+
368
+ def get(self, usr_id: str, topic: str) -> Optional[str]:
369
+ """Get full content of a specific topic.
370
+
371
+ Args:
372
+ usr_id: User ID
373
+ topic: Topic identifier
374
+
375
+ Returns:
376
+ Full content (frontmatter + markdown) or None if not found
377
+ """
378
+ relative_path = self.registry.get(topic)
379
+ if not relative_path:
380
+ return None
381
+
382
+ file_path = self.storage_path / relative_path
383
+ if not file_path.exists():
384
+ return None
385
+
386
+ metadata, content = read_markdown_file(file_path)
387
+
388
+ # Return frontmatter + content
389
+ import frontmatter as fm
390
+
391
+ post = fm.Post(content, **metadata)
392
+ return fm.dumps(post)
393
+
394
+ def delete(self, usr_id: str, topic: str) -> bool:
395
+ """Delete a topic from memory.
396
+
397
+ Args:
398
+ usr_id: User ID
399
+ topic: Topic identifier
400
+
401
+ Returns:
402
+ True if successful
403
+ """
404
+ relative_path = self.registry.get(topic)
405
+ if not relative_path:
406
+ return False
407
+
408
+ file_path = self.storage_path / relative_path
409
+ try:
410
+ if file_path.exists():
411
+ file_path.unlink()
412
+ self.registry.delete(topic)
413
+
414
+ # Update index files
415
+ self._prune_from_indexes(topic, file_path)
416
+
417
+ return True
418
+ except Exception as e:
419
+ print(f"Error deleting {topic}: {e}")
420
+ return False
421
+
422
+ def _prune_from_indexes(self, topic: str, file_path: Path) -> None:
423
+ """Remove topic from all relevant index files.
424
+
425
+ Args:
426
+ topic: Topic identifier
427
+ file_path: File path (to find parent folders)
428
+ """
429
+ topic_title = parse_topic_title(topic)
430
+
431
+ # Update parent folder index if exists
432
+ parent_index = file_path.parent / "index.md"
433
+ if parent_index.exists():
434
+ metadata, content = read_markdown_file(parent_index)
435
+ content = content.replace(f"- **{topic_title}**:", "")
436
+ if content.strip():
437
+ write_markdown_file(parent_index, metadata, content)
438
+
439
+ def optimize(self, usr_id: str) -> None:
440
+ """Optimize the knowledge tree structure.
441
+
442
+ Args:
443
+ usr_id: User ID
444
+ """
445
+ # Analyze root index
446
+ metadata, content = read_markdown_file(self.root_index_path)
447
+ lines = content.strip().split("\n")
448
+
449
+ if len(lines) > self.optimize_threshold:
450
+ # Get list of folders and their files
451
+ folders = {}
452
+ for item in self.storage_path.iterdir():
453
+ if item.is_dir() and not item.name.startswith("."):
454
+ md_files = list(item.glob("*.md"))
455
+ if md_files:
456
+ folders[item.name] = md_files
457
+
458
+ # Call LLM to suggest optimization
459
+ context = {
460
+ "current_structure": list(folders.keys()),
461
+ "total_root_lines": len(lines),
462
+ "threshold": self.optimize_threshold,
463
+ }
464
+
465
+ llm_response = self._get_llm_decision("optimize", context)
466
+
467
+ if llm_response:
468
+ # LLM response should contain recommendations for reorganization
469
+ self._apply_optimization(llm_response)
470
+
471
+ def _apply_optimization(self, llm_response: LLMResponse) -> None:
472
+ """Apply optimization recommendations from LLM.
473
+
474
+ Args:
475
+ llm_response: LLM response with optimization suggestions
476
+ """
477
+ # This is a placeholder for applying optimization
478
+ # In production, the LLM response would contain specific reorganization steps
479
+ pass
480
+
481
+ def list_topics(self) -> Dict[str, str]:
482
+ """List all topics in the registry.
483
+
484
+ Returns:
485
+ Dictionary of topic ID -> path mappings
486
+ """
487
+ return self.registry.list_all()
mdmemory/models.py ADDED
@@ -0,0 +1,26 @@
1
+ """Data models for MdMemory."""
2
+
3
+ from typing import Any, Dict, List, Optional
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class FrontMatter(BaseModel):
8
+ """Frontmatter metadata for Markdown files."""
9
+
10
+ topic: str
11
+ summary: str
12
+ tags: List[str] = Field(default_factory=list)
13
+ created_at: Optional[str] = None
14
+ updated_at: Optional[str] = None
15
+ custom: Dict[str, Any] = Field(default_factory=dict)
16
+
17
+
18
+ class LLMResponse(BaseModel):
19
+ """Expected response from LLM for organizational decisions."""
20
+
21
+ action: str # "store", "optimize", etc.
22
+ recommended_path: str
23
+ frontmatter: FrontMatter
24
+ optimize_suggested: bool = False
25
+ reason: Optional[str] = None
26
+ generated_topic: Optional[str] = None # Topic generated by LLM if not provided by user
mdmemory/registry.py ADDED
@@ -0,0 +1,85 @@
1
+ """Registry management for MdMemory."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Optional
6
+ from .utils import load_json_safe, save_json_safe
7
+
8
+
9
+ class PathRegistry:
10
+ """Manages the global path map (.registry.json)."""
11
+
12
+ def __init__(self, registry_path: Path):
13
+ """Initialize registry.
14
+
15
+ Args:
16
+ registry_path: Path to .registry.json file
17
+ """
18
+ self.registry_path = registry_path
19
+ self._cache = load_json_safe(registry_path)
20
+
21
+ def get(self, topic_id: str) -> Optional[str]:
22
+ """Get physical path for a topic ID.
23
+
24
+ Args:
25
+ topic_id: The topic identifier
26
+
27
+ Returns:
28
+ Relative path to the file, or None if not found
29
+ """
30
+ return self._cache.get(topic_id)
31
+
32
+ def put(self, topic_id: str, path: str) -> bool:
33
+ """Register a topic ID to a physical path.
34
+
35
+ Args:
36
+ topic_id: The topic identifier
37
+ path: Relative path to the file
38
+
39
+ Returns:
40
+ True if successful
41
+ """
42
+ self._cache[topic_id] = path
43
+ return save_json_safe(self.registry_path, self._cache)
44
+
45
+ def delete(self, topic_id: str) -> bool:
46
+ """Remove a topic from the registry.
47
+
48
+ Args:
49
+ topic_id: The topic identifier
50
+
51
+ Returns:
52
+ True if successful
53
+ """
54
+ if topic_id in self._cache:
55
+ del self._cache[topic_id]
56
+ return save_json_safe(self.registry_path, self._cache)
57
+ return True
58
+
59
+ def update_path(self, old_path: str, new_path: str) -> bool:
60
+ """Update all references from old_path to new_path (for optimize).
61
+
62
+ Args:
63
+ old_path: The old relative path
64
+ new_path: The new relative path
65
+
66
+ Returns:
67
+ True if successful
68
+ """
69
+ updated = False
70
+ for topic_id, path in self._cache.items():
71
+ if path == old_path:
72
+ self._cache[topic_id] = new_path
73
+ updated = True
74
+ if updated:
75
+ return save_json_safe(self.registry_path, self._cache)
76
+ return True
77
+
78
+ def list_all(self) -> dict:
79
+ """Return all mappings."""
80
+ return self._cache.copy()
81
+
82
+ def reload(self) -> bool:
83
+ """Reload registry from disk."""
84
+ self._cache = load_json_safe(self.registry_path)
85
+ return True
mdmemory/utils.py ADDED
@@ -0,0 +1,84 @@
1
+ """Utility functions for MdMemory."""
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Dict
7
+ import frontmatter as fm
8
+
9
+
10
+ def load_json_safe(filepath: Path) -> Dict[str, Any]:
11
+ """Load JSON file with error handling."""
12
+ if not filepath.exists():
13
+ return {}
14
+ try:
15
+ with open(filepath, "r", encoding="utf-8") as f:
16
+ return json.load(f)
17
+ except (json.JSONDecodeError, IOError) as e:
18
+ print(f"Error loading {filepath}: {e}")
19
+ return {}
20
+
21
+
22
+ def save_json_safe(filepath: Path, data: Dict[str, Any]) -> bool:
23
+ """Save JSON file with error handling."""
24
+ try:
25
+ filepath.parent.mkdir(parents=True, exist_ok=True)
26
+ with open(filepath, "w", encoding="utf-8") as f:
27
+ json.dump(data, f, indent=2, ensure_ascii=False)
28
+ return True
29
+ except IOError as e:
30
+ print(f"Error saving {filepath}: {e}")
31
+ return False
32
+
33
+
34
+ def read_markdown_file(filepath: Path) -> tuple[dict, str]:
35
+ """Read Markdown file with frontmatter.
36
+
37
+ Returns (frontmatter_dict, content)
38
+ """
39
+ if not filepath.exists():
40
+ return {}, ""
41
+ try:
42
+ with open(filepath, "r", encoding="utf-8") as f:
43
+ post = fm.load(f)
44
+ return post.metadata, post.content
45
+ except Exception as e:
46
+ print(f"Error reading {filepath}: {e}")
47
+ return {}, ""
48
+
49
+
50
+ def write_markdown_file(filepath: Path, metadata: dict, content: str) -> bool:
51
+ """Write Markdown file with frontmatter."""
52
+ try:
53
+ filepath.parent.mkdir(parents=True, exist_ok=True)
54
+ post = fm.Post(content, **metadata)
55
+ with open(filepath, "w", encoding="utf-8") as f:
56
+ f.write(fm.dumps(post))
57
+ return True
58
+ except Exception as e:
59
+ print(f"Error writing {filepath}: {e}")
60
+ return False
61
+
62
+
63
+ def ensure_dir_exists(dirpath: Path) -> bool:
64
+ """Ensure directory exists."""
65
+ try:
66
+ dirpath.mkdir(parents=True, exist_ok=True)
67
+ return True
68
+ except Exception as e:
69
+ print(f"Error creating directory {dirpath}: {e}")
70
+ return False
71
+
72
+
73
+ def parse_topic_title(filename: str) -> str:
74
+ """Convert filename (e.g., 'python.md') to title (e.g., 'Python')."""
75
+ return filename.replace(".md", "").replace("_", " ").title()
76
+
77
+
78
+ def line_count(filepath: Path) -> int:
79
+ """Count lines in a file."""
80
+ try:
81
+ with open(filepath, "r", encoding="utf-8") as f:
82
+ return sum(1 for _ in f)
83
+ except Exception:
84
+ return 0
@@ -0,0 +1,270 @@
1
+ Metadata-Version: 2.4
2
+ Name: mdmemory
3
+ Version: 0.1.2
4
+ Summary: Markdown-first, LLM-driven memory framework organized into a hierarchical Knowledge Tree
5
+ Project-URL: Homepage, https://github.com/pvkarthikk/MdMemory
6
+ Project-URL: Repository, https://github.com/pvkarthikk/MdMemory.git
7
+ Project-URL: Documentation, https://github.com/pvkarthikk/MdMemory/blob/main/README.md
8
+ Project-URL: Bug Tracker, https://github.com/pvkarthikk/MdMemory/issues
9
+ Project-URL: Changelog, https://github.com/pvkarthikk/MdMemory/blob/main/CHANGELOG.md
10
+ Author-email: Contributors <contributor@example.com>
11
+ License: MIT
12
+ License-File: LICENSE
13
+ Keywords: ai,hierarchical,knowledge-management,knowledge-tree,llm,markdown,memory
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Natural Language :: English
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
26
+ Requires-Python: >=3.9
27
+ Requires-Dist: aiofiles>=23.0.0
28
+ Requires-Dist: litellm>=1.0.0
29
+ Requires-Dist: pydantic>=2.0.0
30
+ Requires-Dist: python-frontmatter>=1.0.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: black>=23.0.0; extra == 'dev'
33
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
34
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
35
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
36
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
37
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # MdMemory
41
+
42
+ A Markdown-first, LLM-driven memory framework that organizes agent knowledge into a hierarchical **Knowledge Tree**.
43
+
44
+ ## Features
45
+
46
+ - **Human-Readable**: Data stored as standard `.md` files on the filesystem
47
+ - **LLM-Organized**: Uses LLM to automatically determine folder structure and organization
48
+ - **Context-Aware**: Hybrid indexing strategy keeps the root index compact
49
+ - **Efficient Navigation**: Central `index.md` and `.registry.json` Path Map for direct access
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install mdmemory
55
+ ```
56
+
57
+ Or with development dependencies:
58
+
59
+ ```bash
60
+ pip install -e ".[dev]"
61
+ ```
62
+
63
+ ## Quick Start
64
+
65
+ ```python
66
+ from mdmemory import MdMemory
67
+
68
+ # Define your LLM callback function
69
+ # It receives messages and should return LLM response as a string
70
+ def llm_callback(messages: list) -> str:
71
+ """
72
+ LLM callback function that handles LLM provider communication.
73
+
74
+ You can use any LLM provider: OpenAI, Claude, Gemini, Ollama, etc.
75
+ """
76
+ # Example with LiteLLM (supports all major providers)
77
+ from litellm import completion
78
+ response = completion(model="gpt-3.5-turbo", messages=messages)
79
+ return response.choices[0].message.content
80
+
81
+ # Or use OpenAI directly
82
+ # from openai import OpenAI
83
+ # client = OpenAI()
84
+ # response = client.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
85
+ # return response.choices[0].message.content
86
+
87
+ # Initialize MdMemory with the callback
88
+ memory = MdMemory(
89
+ llm_callback,
90
+ storage_path="./knowledge_base",
91
+ optimize_threshold=20
92
+ )
93
+
94
+ # Store a memory WITH explicit topic
95
+ topic = memory.store(
96
+ usr_id="user123",
97
+ query="Decorators are functions that modify other functions...",
98
+ topic="python_decorators" # Optional - provide explicit topic
99
+ )
100
+
101
+ # Store a memory WITHOUT topic - LLM generates one automatically
102
+ generated_topic = memory.store(
103
+ usr_id="user123",
104
+ query="List comprehensions are concise ways to create lists in Python..."
105
+ # topic parameter omitted - LLM will infer topic from content
106
+ )
107
+ print(f"Generated topic: {generated_topic}")
108
+
109
+ # Retrieve the knowledge tree
110
+ index = memory.retrieve("user123")
111
+ print(index)
112
+
113
+ # Get a specific topic
114
+ content = memory.get("user123", "python_decorators")
115
+ print(content)
116
+
117
+ # Delete a topic
118
+ memory.delete("user123", "python_decorators")
119
+
120
+ # List all topics
121
+ topics = memory.list_topics()
122
+ print(topics)
123
+
124
+ # Optimize structure
125
+ memory.optimize("user123")
126
+ ```
127
+
128
+ ## Directory Structure
129
+
130
+ ```
131
+ storage_root/
132
+ ├── .registry.json # Global Path Map (Topic ID -> Physical Path)
133
+ ├── index.md # Root Knowledge Tree
134
+ └── /categories/ # Auto-created folders
135
+ ├── coding/
136
+ │ ├── index.md # Sub-index
137
+ │ └── python.md # Knowledge file
138
+ └── finance/
139
+ └── taxes.md
140
+ ```
141
+
142
+ ## Architecture
143
+
144
+ ### Core Components
145
+
146
+ - **MdMemory**: Main class providing the public API
147
+ - **PathRegistry**: Manages `.registry.json` for topic ID -> file path mapping
148
+ - **FrontMatter**: Metadata attached to each knowledge file
149
+ - **LLMResponse**: Structured response from LLM decisions
150
+
151
+ ### Key Concepts
152
+
153
+ #### Hybrid Indexing
154
+
155
+ - **Root `index.md`**: High-level overview of all knowledge
156
+ - **Sub-folder `index.md`**: Generated when folder exceeds `optimize_threshold`
157
+ - **Compression**: Parent index replaced with link to folder index when compressed
158
+
159
+ #### LLM Integration
160
+
161
+ The library queries the LLM for:
162
+
163
+ 1. **Path Recommendation**: Where to store new knowledge
164
+ 2. **Frontmatter Generation**: Metadata (summary, tags) for files
165
+ 3. **Optimization Suggestions**: When to reorganize structure
166
+
167
+ ### System Prompt
168
+
169
+ ```
170
+ You are the MdMemory Librarian. Your goal is to maintain a clean, hierarchical
171
+ Markdown Knowledge Tree. When storing data, choose a logical path. When optimizing,
172
+ group related files into sub-directories to keep the root index under 50 lines.
173
+ ```
174
+
175
+ ## API Reference
176
+
177
+ ### `__init__(llm_callback, storage_path, optimize_threshold=20)`
178
+
179
+ Initialize MdMemory with an LLM callback function.
180
+
181
+ **Parameters:**
182
+ - `llm_callback`: Callback function that receives messages and returns LLM response
183
+ - Signature: `(messages: List[Dict[str, str]]) -> str`
184
+ - Messages format: `[{"role": "user", "content": "prompt"}]`
185
+ - Should return the LLM response as a string (preferably JSON)
186
+ - `storage_path`: Root directory path for storing markdown files
187
+ - `optimize_threshold` (optional): Line count threshold for triggering auto-optimization (default: 20)
188
+
189
+ **Example:**
190
+ ```python
191
+ # Define a callback for your LLM provider
192
+ def llm_callback(messages):
193
+ # Use any LLM provider here
194
+ from litellm import completion
195
+ response = completion(model="gpt-3.5-turbo", messages=messages)
196
+ return response.choices[0].message.content
197
+
198
+ memory = MdMemory(llm_callback, "./knowledge_base")
199
+
200
+ # Or use built-in callbacks
201
+ from mdmemory import LiteLLMCallback, OpenAICallback, AnthropicCallback
202
+
203
+ memory = MdMemory(LiteLLMCallback("gpt-3.5-turbo"), "./knowledge_base")
204
+ memory = MdMemory(OpenAICallback("gpt-4"), "./knowledge_base")
205
+ memory = MdMemory(AnthropicCallback("claude-3-sonnet"), "./knowledge_base")
206
+ ```
207
+
208
+ ### `store(usr_id, query, topic=None) -> Optional[str]`
209
+
210
+ Store a new memory item.
211
+
212
+ **Parameters:**
213
+ - `usr_id`: User identifier
214
+ - `query`: Content to store (Markdown text)
215
+ - `topic` (optional): Topic identifier. If not provided, LLM will generate one from the query content
216
+
217
+ **Returns:** The topic ID that was used or generated, or None if storage failed
218
+
219
+ **Example:**
220
+ ```python
221
+ # With explicit topic
222
+ topic = memory.store("user1", "Content here", topic="my_topic")
223
+
224
+ # With LLM-generated topic
225
+ topic = memory.store("user1", "Content here") # LLM generates topic from content
226
+ ```
227
+
228
+ ### `retrieve(usr_id) -> str`
229
+
230
+ Get the root index (knowledge tree overview).
231
+
232
+ ### `get(usr_id, topic) -> Optional[str]`
233
+
234
+ Get full content of a specific topic.
235
+
236
+ ### `delete(usr_id, topic) -> bool`
237
+
238
+ Remove a topic from memory.
239
+
240
+ ### `optimize(usr_id) -> None`
241
+
242
+ Reorganize knowledge tree structure.
243
+
244
+ ### `list_topics() -> Dict[str, str]`
245
+
246
+ List all topics in the registry.
247
+
248
+ ## Development
249
+
250
+ ### Running Tests
251
+
252
+ ```bash
253
+ pytest tests/
254
+ ```
255
+
256
+ ### Code Quality
257
+
258
+ ```bash
259
+ black src/
260
+ ruff check src/
261
+ mypy src/
262
+ ```
263
+
264
+ ## License
265
+
266
+ MIT
267
+
268
+ ## Specification
269
+
270
+ See [spec.md](spec.md) for the full implementation specification.
@@ -0,0 +1,9 @@
1
+ mdmemory/__init__.py,sha256=c8V2oKaxKaavDvLexBbGlEzHkwdtYOmKi3G8eZIupI0,1663
2
+ mdmemory/core.py,sha256=-N-yk8mVpAkoCoQSNUf9hAHLcNjSk1fkhUCB7gcdxfs,16453
3
+ mdmemory/models.py,sha256=3YTEZIOixf4mDmJu37tLSCG2DdtzegfpLjmXGv9MQT4,775
4
+ mdmemory/registry.py,sha256=hY5vT4Qe_aTlTGdGEIHU1Y4ExFZhXuw1cTeV8XeOLpg,2365
5
+ mdmemory/utils.py,sha256=E_gpaOCmXkuBPgRic3UuQn3ylGt3hST9wVnpkQyaAas,2480
6
+ mdmemory-0.1.2.dist-info/METADATA,sha256=Pp4FUm_dDPLwGRDFT2fnoqYfoXR1aRLF5-Win2dWlqo,8128
7
+ mdmemory-0.1.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
8
+ mdmemory-0.1.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
+ mdmemory-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.