basic-memory 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +3 -0
- basic_memory/api/__init__.py +4 -0
- basic_memory/api/app.py +42 -0
- basic_memory/api/routers/__init__.py +8 -0
- basic_memory/api/routers/knowledge_router.py +168 -0
- basic_memory/api/routers/memory_router.py +123 -0
- basic_memory/api/routers/resource_router.py +34 -0
- basic_memory/api/routers/search_router.py +34 -0
- basic_memory/cli/__init__.py +1 -0
- basic_memory/cli/app.py +4 -0
- basic_memory/cli/commands/__init__.py +9 -0
- basic_memory/cli/commands/init.py +38 -0
- basic_memory/cli/commands/status.py +152 -0
- basic_memory/cli/commands/sync.py +254 -0
- basic_memory/cli/main.py +48 -0
- basic_memory/config.py +53 -0
- basic_memory/db.py +135 -0
- basic_memory/deps.py +182 -0
- basic_memory/file_utils.py +248 -0
- basic_memory/markdown/__init__.py +19 -0
- basic_memory/markdown/entity_parser.py +137 -0
- basic_memory/markdown/markdown_processor.py +153 -0
- basic_memory/markdown/plugins.py +236 -0
- basic_memory/markdown/schemas.py +73 -0
- basic_memory/markdown/utils.py +144 -0
- basic_memory/mcp/__init__.py +1 -0
- basic_memory/mcp/async_client.py +10 -0
- basic_memory/mcp/main.py +21 -0
- basic_memory/mcp/server.py +39 -0
- basic_memory/mcp/tools/__init__.py +34 -0
- basic_memory/mcp/tools/ai_edit.py +84 -0
- basic_memory/mcp/tools/knowledge.py +56 -0
- basic_memory/mcp/tools/memory.py +142 -0
- basic_memory/mcp/tools/notes.py +122 -0
- basic_memory/mcp/tools/search.py +28 -0
- basic_memory/mcp/tools/utils.py +154 -0
- basic_memory/models/__init__.py +12 -0
- basic_memory/models/base.py +9 -0
- basic_memory/models/knowledge.py +204 -0
- basic_memory/models/search.py +34 -0
- basic_memory/repository/__init__.py +7 -0
- basic_memory/repository/entity_repository.py +156 -0
- basic_memory/repository/observation_repository.py +40 -0
- basic_memory/repository/relation_repository.py +78 -0
- basic_memory/repository/repository.py +303 -0
- basic_memory/repository/search_repository.py +259 -0
- basic_memory/schemas/__init__.py +73 -0
- basic_memory/schemas/base.py +216 -0
- basic_memory/schemas/delete.py +38 -0
- basic_memory/schemas/discovery.py +25 -0
- basic_memory/schemas/memory.py +111 -0
- basic_memory/schemas/request.py +77 -0
- basic_memory/schemas/response.py +220 -0
- basic_memory/schemas/search.py +117 -0
- basic_memory/services/__init__.py +11 -0
- basic_memory/services/context_service.py +274 -0
- basic_memory/services/entity_service.py +281 -0
- basic_memory/services/exceptions.py +15 -0
- basic_memory/services/file_service.py +213 -0
- basic_memory/services/link_resolver.py +126 -0
- basic_memory/services/search_service.py +218 -0
- basic_memory/services/service.py +36 -0
- basic_memory/sync/__init__.py +5 -0
- basic_memory/sync/file_change_scanner.py +162 -0
- basic_memory/sync/sync_service.py +140 -0
- basic_memory/sync/utils.py +66 -0
- basic_memory/sync/watch_service.py +197 -0
- basic_memory/utils.py +78 -0
- basic_memory-0.0.0.dist-info/METADATA +71 -0
- basic_memory-0.0.0.dist-info/RECORD +73 -0
- basic_memory-0.0.0.dist-info/WHEEL +4 -0
- basic_memory-0.0.0.dist-info/entry_points.txt +2 -0
- basic_memory-0.0.0.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""Utilities for file operations."""
|
|
2
|
+
import hashlib
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Dict, Any, Tuple
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
from loguru import logger
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileError(Exception):
|
|
11
|
+
"""Base exception for file operations."""
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FileWriteError(FileError):
|
|
16
|
+
"""Raised when file operations fail."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ParseError(FileError):
|
|
21
|
+
"""Raised when parsing file content fails."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def compute_checksum(content: str) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Compute SHA-256 checksum of content.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
content: Text content to hash
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
SHA-256 hex digest
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
FileError: If checksum computation fails
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
return hashlib.sha256(content.encode()).hexdigest()
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.error(f"Failed to compute checksum: {e}")
|
|
42
|
+
raise FileError(f"Failed to compute checksum: {e}")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def ensure_directory(path: Path) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Ensure directory exists, creating if necessary.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
path: Directory path to ensure
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
FileWriteError: If directory creation fails
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.error(f"Failed to create directory: {path}: {e}")
|
|
59
|
+
raise FileWriteError(f"Failed to create directory {path}: {e}")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def write_file_atomic(path: Path, content: str) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Write file with atomic operation using temporary file.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
path: Target file path
|
|
68
|
+
content: Content to write
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
FileWriteError: If write operation fails
|
|
72
|
+
"""
|
|
73
|
+
temp_path = path.with_suffix(".tmp")
|
|
74
|
+
try:
|
|
75
|
+
temp_path.write_text(content)
|
|
76
|
+
|
|
77
|
+
# TODO check for path.exists()
|
|
78
|
+
temp_path.replace(path)
|
|
79
|
+
logger.debug(f"wrote file: {path}")
|
|
80
|
+
except Exception as e:
|
|
81
|
+
temp_path.unlink(missing_ok=True)
|
|
82
|
+
logger.error(f"Failed to write file: {path}: {e}")
|
|
83
|
+
raise FileWriteError(f"Failed to write file {path}: {e}")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def has_frontmatter(content: str) -> bool:
|
|
87
|
+
"""
|
|
88
|
+
Check if content contains YAML frontmatter.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
content: Content to check
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
True if content has frontmatter delimiter (---), False otherwise
|
|
95
|
+
"""
|
|
96
|
+
content = content.strip()
|
|
97
|
+
return content.startswith("---") and "---" in content[3:]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def parse_frontmatter(content: str) -> Dict[str, Any]:
|
|
101
|
+
"""
|
|
102
|
+
Parse YAML frontmatter from content.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
content: Content with YAML frontmatter
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Dictionary of frontmatter values
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
ParseError: If frontmatter is invalid or parsing fails
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
if not has_frontmatter(content):
|
|
115
|
+
raise ParseError("Content has no frontmatter")
|
|
116
|
+
|
|
117
|
+
# Split on first two occurrences of ---
|
|
118
|
+
parts = content.split("---", 2)
|
|
119
|
+
if len(parts) < 3:
|
|
120
|
+
raise ParseError("Invalid frontmatter format")
|
|
121
|
+
|
|
122
|
+
# Parse YAML
|
|
123
|
+
try:
|
|
124
|
+
frontmatter = yaml.safe_load(parts[1])
|
|
125
|
+
# Handle empty frontmatter (None from yaml.safe_load)
|
|
126
|
+
if frontmatter is None:
|
|
127
|
+
return {}
|
|
128
|
+
if not isinstance(frontmatter, dict):
|
|
129
|
+
raise ParseError("Frontmatter must be a YAML dictionary")
|
|
130
|
+
return frontmatter
|
|
131
|
+
|
|
132
|
+
except yaml.YAMLError as e:
|
|
133
|
+
raise ParseError(f"Invalid YAML in frontmatter: {e}")
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
if not isinstance(e, ParseError):
|
|
137
|
+
logger.error(f"Failed to parse frontmatter: {e}")
|
|
138
|
+
raise ParseError(f"Failed to parse frontmatter: {e}")
|
|
139
|
+
raise
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def remove_frontmatter(content: str) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Remove YAML frontmatter from content.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
content: Content with frontmatter
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Content with frontmatter removed
|
|
151
|
+
|
|
152
|
+
Raises:
|
|
153
|
+
ParseError: If frontmatter format is invalid
|
|
154
|
+
"""
|
|
155
|
+
try:
|
|
156
|
+
if not has_frontmatter(content):
|
|
157
|
+
return content.strip()
|
|
158
|
+
|
|
159
|
+
# Split on first two occurrences of ---
|
|
160
|
+
parts = content.split("---", 2)
|
|
161
|
+
if len(parts) < 3:
|
|
162
|
+
raise ParseError("Invalid frontmatter format")
|
|
163
|
+
|
|
164
|
+
return parts[2].strip()
|
|
165
|
+
|
|
166
|
+
except Exception as e:
|
|
167
|
+
if not isinstance(e, ParseError):
|
|
168
|
+
logger.error(f"Failed to remove frontmatter: {e}")
|
|
169
|
+
raise ParseError(f"Failed to remove frontmatter: {e}")
|
|
170
|
+
raise
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def remove_frontmatter_lenient(content: str) -> str:
|
|
174
|
+
"""
|
|
175
|
+
Remove frontmatter markers and anything between them without validation.
|
|
176
|
+
|
|
177
|
+
This is a more permissive version of remove_frontmatter that doesn't
|
|
178
|
+
try to validate the YAML content. It simply removes everything between
|
|
179
|
+
the first two '---' markers if they exist.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
content: Content that may contain frontmatter
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Content with any frontmatter markers and content removed
|
|
186
|
+
"""
|
|
187
|
+
content = content.strip()
|
|
188
|
+
if not content.startswith("---"):
|
|
189
|
+
return content
|
|
190
|
+
|
|
191
|
+
# Find the second marker
|
|
192
|
+
rest = content[3:].strip()
|
|
193
|
+
if "---" not in rest:
|
|
194
|
+
return content
|
|
195
|
+
|
|
196
|
+
# Split on the second marker and take everything after
|
|
197
|
+
parts = rest.split("---", 1)
|
|
198
|
+
return parts[1].strip()
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
async def add_frontmatter(content: str, frontmatter: Dict[str, Any]) -> str:
|
|
202
|
+
"""
|
|
203
|
+
Add YAML frontmatter to content.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
content: Main content text
|
|
207
|
+
frontmatter: Key-value pairs for frontmatter
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Content with YAML frontmatter prepended
|
|
211
|
+
|
|
212
|
+
Raises:
|
|
213
|
+
ParseError: If YAML serialization fails
|
|
214
|
+
"""
|
|
215
|
+
try:
|
|
216
|
+
yaml_fm = yaml.dump(frontmatter, sort_keys=False)
|
|
217
|
+
return f"---\n{yaml_fm}---\n\n{content.strip()}"
|
|
218
|
+
except yaml.YAMLError as e:
|
|
219
|
+
logger.error(f"Failed to add frontmatter: {e}")
|
|
220
|
+
raise ParseError(f"Failed to add frontmatter: {e}")
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
async def parse_content_with_frontmatter(content: str) -> Tuple[Dict[str, Any], str]:
|
|
224
|
+
"""
|
|
225
|
+
Parse both frontmatter and content.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
content: Text content with optional frontmatter
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Tuple of (frontmatter dict, content without frontmatter)
|
|
232
|
+
|
|
233
|
+
Raises:
|
|
234
|
+
ParseError: If parsing fails
|
|
235
|
+
"""
|
|
236
|
+
try:
|
|
237
|
+
if not has_frontmatter(content):
|
|
238
|
+
return {}, content.strip()
|
|
239
|
+
|
|
240
|
+
frontmatter = parse_frontmatter(content)
|
|
241
|
+
remaining = remove_frontmatter(content)
|
|
242
|
+
return frontmatter, remaining
|
|
243
|
+
|
|
244
|
+
except Exception as e:
|
|
245
|
+
if not isinstance(e, ParseError):
|
|
246
|
+
logger.error(f"Failed to parse content with frontmatter: {e}")
|
|
247
|
+
raise ParseError(f"Failed to parse content with frontmatter: {e}")
|
|
248
|
+
raise
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Base package for markdown parsing."""
|
|
2
|
+
|
|
3
|
+
from basic_memory.file_utils import ParseError
|
|
4
|
+
from basic_memory.markdown.entity_parser import EntityParser
|
|
5
|
+
from basic_memory.markdown.schemas import (
|
|
6
|
+
EntityMarkdown,
|
|
7
|
+
EntityFrontmatter,
|
|
8
|
+
Observation,
|
|
9
|
+
Relation,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"EntityMarkdown",
|
|
14
|
+
"EntityFrontmatter",
|
|
15
|
+
"EntityParser",
|
|
16
|
+
"Observation",
|
|
17
|
+
"Relation",
|
|
18
|
+
"ParseError",
|
|
19
|
+
]
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Parser for markdown files into Entity objects.
|
|
2
|
+
|
|
3
|
+
Uses markdown-it with plugins to parse structured data from markdown content.
|
|
4
|
+
"""
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
import dateparser
|
|
10
|
+
|
|
11
|
+
from markdown_it import MarkdownIt
|
|
12
|
+
import frontmatter
|
|
13
|
+
|
|
14
|
+
from basic_memory.markdown.plugins import observation_plugin, relation_plugin
|
|
15
|
+
from basic_memory.markdown.schemas import (
|
|
16
|
+
EntityMarkdown,
|
|
17
|
+
EntityFrontmatter,
|
|
18
|
+
Observation,
|
|
19
|
+
Relation,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
md = MarkdownIt().use(observation_plugin).use(relation_plugin)
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class EntityContent:
|
|
26
|
+
content: str
|
|
27
|
+
observations: list[Observation] = field(default_factory=list)
|
|
28
|
+
relations: list[Relation] = field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def parse(content: str) -> EntityContent:
|
|
32
|
+
"""Parse markdown content into EntityMarkdown."""
|
|
33
|
+
|
|
34
|
+
# Parse content for observations and relations using markdown-it
|
|
35
|
+
observations = []
|
|
36
|
+
relations = []
|
|
37
|
+
|
|
38
|
+
if content:
|
|
39
|
+
for token in md.parse(content):
|
|
40
|
+
# check for observations and relations
|
|
41
|
+
if token.meta:
|
|
42
|
+
if "observation" in token.meta:
|
|
43
|
+
obs = token.meta["observation"]
|
|
44
|
+
observation = Observation.model_validate(obs)
|
|
45
|
+
observations.append(observation)
|
|
46
|
+
if "relations" in token.meta:
|
|
47
|
+
rels = token.meta["relations"]
|
|
48
|
+
relations.extend([Relation.model_validate(r) for r in rels])
|
|
49
|
+
|
|
50
|
+
return EntityContent(
|
|
51
|
+
content=content,
|
|
52
|
+
observations=observations,
|
|
53
|
+
relations=relations,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def parse_tags(tags: Any) -> list[str]:
|
|
57
|
+
"""Parse tags into list of strings."""
|
|
58
|
+
if isinstance(tags, str):
|
|
59
|
+
return [t.strip() for t in tags.split(",") if t.strip()]
|
|
60
|
+
if isinstance(tags, (list, tuple)):
|
|
61
|
+
return [str(t).strip() for t in tags if str(t).strip()]
|
|
62
|
+
return []
|
|
63
|
+
|
|
64
|
+
class EntityParser:
|
|
65
|
+
"""Parser for markdown files into Entity objects."""
|
|
66
|
+
|
|
67
|
+
def __init__(self, base_path: Path):
|
|
68
|
+
"""Initialize parser with base path for relative permalink generation."""
|
|
69
|
+
self.base_path = base_path.resolve()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def relative_path(self, file_path: Path) -> str:
|
|
73
|
+
"""Get file path relative to base_path.
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
base_path: /project/root
|
|
77
|
+
file_path: /project/root/design/models/data.md
|
|
78
|
+
returns: "design/models/data"
|
|
79
|
+
"""
|
|
80
|
+
# Get relative path and remove .md extension
|
|
81
|
+
rel_path = file_path.resolve().relative_to(self.base_path)
|
|
82
|
+
if rel_path.suffix.lower() == ".md":
|
|
83
|
+
return str(rel_path.with_suffix(""))
|
|
84
|
+
return str(rel_path)
|
|
85
|
+
|
|
86
|
+
def parse_date(self, value: Any) -> Optional[datetime]:
|
|
87
|
+
"""Parse date strings using dateparser for maximum flexibility.
|
|
88
|
+
|
|
89
|
+
Supports human friendly formats like:
|
|
90
|
+
- 2024-01-15
|
|
91
|
+
- Jan 15, 2024
|
|
92
|
+
- 2024-01-15 10:00 AM
|
|
93
|
+
- yesterday
|
|
94
|
+
- 2 days ago
|
|
95
|
+
"""
|
|
96
|
+
if isinstance(value, datetime):
|
|
97
|
+
return value
|
|
98
|
+
if isinstance(value, str):
|
|
99
|
+
try:
|
|
100
|
+
parsed = dateparser.parse(value)
|
|
101
|
+
if parsed:
|
|
102
|
+
return parsed
|
|
103
|
+
except Exception:
|
|
104
|
+
pass
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
async def parse_file(self, file_path: Path) -> EntityMarkdown:
|
|
108
|
+
"""Parse markdown file into EntityMarkdown."""
|
|
109
|
+
|
|
110
|
+
absolute_path = self.base_path / file_path
|
|
111
|
+
# Parse frontmatter and content using python-frontmatter
|
|
112
|
+
post = frontmatter.load(str(absolute_path))
|
|
113
|
+
|
|
114
|
+
# Extract file stat info
|
|
115
|
+
file_stats = absolute_path.stat()
|
|
116
|
+
|
|
117
|
+
metadata = post.metadata
|
|
118
|
+
metadata["title"] = post.metadata.get("title", file_path.name)
|
|
119
|
+
metadata["type"] = metadata.get("type", "note")
|
|
120
|
+
metadata["tags"] = parse_tags(post.metadata.get("tags", []))
|
|
121
|
+
|
|
122
|
+
# frontmatter
|
|
123
|
+
entity_frontmatter = EntityFrontmatter(
|
|
124
|
+
metadata=post.metadata,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
entity_content = parse(post.content)
|
|
128
|
+
|
|
129
|
+
return EntityMarkdown(
|
|
130
|
+
frontmatter=entity_frontmatter,
|
|
131
|
+
content=post.content,
|
|
132
|
+
observations=entity_content.observations,
|
|
133
|
+
relations=entity_content.relations,
|
|
134
|
+
created=datetime.fromtimestamp(file_stats.st_ctime),
|
|
135
|
+
modified=datetime.fromtimestamp(file_stats.st_mtime),
|
|
136
|
+
)
|
|
137
|
+
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Process markdown files with structured sections.
|
|
2
|
+
|
|
3
|
+
This module follows a Read -> Modify -> Write pattern for all file operations:
|
|
4
|
+
1. Read entire file and parse into EntityMarkdown schema
|
|
5
|
+
2. Modify the schema (add relation, update content, etc)
|
|
6
|
+
3. Write entire file atomically using temp file + swap
|
|
7
|
+
|
|
8
|
+
No in-place updates are performed. Each write reconstructs the entire file from the schema.
|
|
9
|
+
The file format has two distinct types of content:
|
|
10
|
+
1. User content - Free form text that is preserved exactly as written
|
|
11
|
+
2. Structured sections - Observations and Relations that are always formatted
|
|
12
|
+
in a standard way and can be overwritten since they're tracked in our schema
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Optional
|
|
17
|
+
from collections import OrderedDict
|
|
18
|
+
|
|
19
|
+
import frontmatter
|
|
20
|
+
from frontmatter import Post
|
|
21
|
+
from loguru import logger
|
|
22
|
+
|
|
23
|
+
from basic_memory import file_utils
|
|
24
|
+
from basic_memory.markdown.entity_parser import EntityParser
|
|
25
|
+
from basic_memory.markdown.schemas import EntityMarkdown, Observation, Relation
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DirtyFileError(Exception):
|
|
29
|
+
"""Raised when attempting to write to a file that has been modified."""
|
|
30
|
+
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MarkdownProcessor:
|
|
35
|
+
"""Process markdown files while preserving content and structure.
|
|
36
|
+
|
|
37
|
+
This class handles the file I/O aspects of our markdown processing. It:
|
|
38
|
+
1. Uses EntityParser for reading/parsing files into our schema
|
|
39
|
+
2. Handles writing files with proper frontmatter
|
|
40
|
+
3. Formats structured sections (observations/relations) consistently
|
|
41
|
+
4. Preserves user content exactly as written
|
|
42
|
+
5. Performs atomic writes using temp files
|
|
43
|
+
|
|
44
|
+
It does NOT:
|
|
45
|
+
1. Modify the schema directly (that's done by services)
|
|
46
|
+
2. Handle in-place updates (everything is read->modify->write)
|
|
47
|
+
3. Track schema changes (that's done by the database)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, entity_parser: EntityParser):
|
|
51
|
+
"""Initialize processor with base path and parser."""
|
|
52
|
+
self.entity_parser = entity_parser
|
|
53
|
+
|
|
54
|
+
async def read_file(self, path: Path) -> EntityMarkdown:
|
|
55
|
+
"""Read and parse file into EntityMarkdown schema.
|
|
56
|
+
|
|
57
|
+
This is step 1 of our read->modify->write pattern.
|
|
58
|
+
We use EntityParser to handle all the markdown parsing.
|
|
59
|
+
"""
|
|
60
|
+
return await self.entity_parser.parse_file(path)
|
|
61
|
+
|
|
62
|
+
async def write_file(
|
|
63
|
+
self,
|
|
64
|
+
path: Path,
|
|
65
|
+
markdown: EntityMarkdown,
|
|
66
|
+
expected_checksum: Optional[str] = None,
|
|
67
|
+
) -> str:
|
|
68
|
+
"""Write EntityMarkdown schema back to file.
|
|
69
|
+
|
|
70
|
+
This is step 3 of our read->modify->write pattern.
|
|
71
|
+
The entire file is rewritten atomically on each update.
|
|
72
|
+
|
|
73
|
+
File Structure:
|
|
74
|
+
---
|
|
75
|
+
frontmatter fields
|
|
76
|
+
---
|
|
77
|
+
user content area (preserved exactly)
|
|
78
|
+
|
|
79
|
+
## Observations (if any)
|
|
80
|
+
formatted observations
|
|
81
|
+
|
|
82
|
+
## Relations (if any)
|
|
83
|
+
formatted relations
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
path: Where to write the file
|
|
87
|
+
markdown: Complete schema to write
|
|
88
|
+
expected_checksum: If provided, verify file hasn't changed
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Checksum of written file
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
DirtyFileError: If file has been modified (when expected_checksum provided)
|
|
95
|
+
"""
|
|
96
|
+
# Dirty check if needed
|
|
97
|
+
if expected_checksum is not None:
|
|
98
|
+
current_content = path.read_text()
|
|
99
|
+
current_checksum = await file_utils.compute_checksum(current_content)
|
|
100
|
+
if current_checksum != expected_checksum:
|
|
101
|
+
raise DirtyFileError(f"File {path} has been modified")
|
|
102
|
+
|
|
103
|
+
# Convert frontmatter to dict
|
|
104
|
+
frontmatter_dict = OrderedDict()
|
|
105
|
+
frontmatter_dict["title"] = markdown.frontmatter.title
|
|
106
|
+
frontmatter_dict["type"] = markdown.frontmatter.type
|
|
107
|
+
frontmatter_dict["permalink"] = markdown.frontmatter.permalink
|
|
108
|
+
|
|
109
|
+
metadata = markdown.frontmatter.metadata or {}
|
|
110
|
+
for k,v in metadata.items():
|
|
111
|
+
frontmatter_dict[k] = v
|
|
112
|
+
|
|
113
|
+
# Start with user content (or minimal title for new files)
|
|
114
|
+
content = markdown.content or f"# {markdown.frontmatter.title}\n"
|
|
115
|
+
|
|
116
|
+
# Add structured sections with proper spacing
|
|
117
|
+
content = content.rstrip() # Remove trailing whitespace
|
|
118
|
+
|
|
119
|
+
# add a blank line if we have semantic content
|
|
120
|
+
if markdown.observations or markdown.relations:
|
|
121
|
+
content += "\n"
|
|
122
|
+
|
|
123
|
+
if markdown.observations:
|
|
124
|
+
content += self.format_observations(markdown.observations)
|
|
125
|
+
if markdown.relations:
|
|
126
|
+
content += self.format_relations(markdown.relations)
|
|
127
|
+
|
|
128
|
+
# Create Post object for frontmatter
|
|
129
|
+
post = Post(content, **frontmatter_dict)
|
|
130
|
+
final_content = frontmatter.dumps(post, sort_keys=False)
|
|
131
|
+
|
|
132
|
+
logger.debug(f"writing file {path} with content:\n{final_content}")
|
|
133
|
+
|
|
134
|
+
# Write atomically and return checksum of updated file
|
|
135
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
136
|
+
await file_utils.write_file_atomic(path, final_content)
|
|
137
|
+
return await file_utils.compute_checksum(final_content)
|
|
138
|
+
|
|
139
|
+
def format_observations(self, observations: list[Observation]) -> str:
|
|
140
|
+
"""Format observations section in standard way.
|
|
141
|
+
|
|
142
|
+
Format: - [category] content #tag1 #tag2 (context)
|
|
143
|
+
"""
|
|
144
|
+
lines = [f"{obs}" for obs in observations]
|
|
145
|
+
return "\n".join(lines) + "\n"
|
|
146
|
+
|
|
147
|
+
def format_relations(self, relations: list[Relation]) -> str:
|
|
148
|
+
"""Format relations section in standard way.
|
|
149
|
+
|
|
150
|
+
Format: - relation_type [[target]] (context)
|
|
151
|
+
"""
|
|
152
|
+
lines = [f"{rel}" for rel in relations]
|
|
153
|
+
return "\n".join(lines) + "\n"
|