basic-memory 0.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- basic_memory/__init__.py +7 -0
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +185 -0
- basic_memory/alembic/migrations.py +24 -0
- basic_memory/alembic/script.py.mako +26 -0
- basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
- basic_memory/alembic/versions/3dae7c7b1564_initial_schema.py +93 -0
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
- basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
- basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
- basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
- basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
- basic_memory/api/__init__.py +5 -0
- basic_memory/api/app.py +131 -0
- basic_memory/api/routers/__init__.py +11 -0
- basic_memory/api/routers/directory_router.py +84 -0
- basic_memory/api/routers/importer_router.py +152 -0
- basic_memory/api/routers/knowledge_router.py +318 -0
- basic_memory/api/routers/management_router.py +80 -0
- basic_memory/api/routers/memory_router.py +90 -0
- basic_memory/api/routers/project_router.py +448 -0
- basic_memory/api/routers/prompt_router.py +260 -0
- basic_memory/api/routers/resource_router.py +249 -0
- basic_memory/api/routers/search_router.py +36 -0
- basic_memory/api/routers/utils.py +169 -0
- basic_memory/api/template_loader.py +292 -0
- basic_memory/api/v2/__init__.py +35 -0
- basic_memory/api/v2/routers/__init__.py +21 -0
- basic_memory/api/v2/routers/directory_router.py +93 -0
- basic_memory/api/v2/routers/importer_router.py +182 -0
- basic_memory/api/v2/routers/knowledge_router.py +413 -0
- basic_memory/api/v2/routers/memory_router.py +130 -0
- basic_memory/api/v2/routers/project_router.py +342 -0
- basic_memory/api/v2/routers/prompt_router.py +270 -0
- basic_memory/api/v2/routers/resource_router.py +286 -0
- basic_memory/api/v2/routers/search_router.py +73 -0
- basic_memory/cli/__init__.py +1 -0
- basic_memory/cli/app.py +84 -0
- basic_memory/cli/auth.py +277 -0
- basic_memory/cli/commands/__init__.py +18 -0
- basic_memory/cli/commands/cloud/__init__.py +6 -0
- basic_memory/cli/commands/cloud/api_client.py +112 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
- basic_memory/cli/commands/cloud/cloud_utils.py +101 -0
- basic_memory/cli/commands/cloud/core_commands.py +195 -0
- basic_memory/cli/commands/cloud/rclone_commands.py +371 -0
- basic_memory/cli/commands/cloud/rclone_config.py +110 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
- basic_memory/cli/commands/cloud/upload.py +233 -0
- basic_memory/cli/commands/cloud/upload_command.py +124 -0
- basic_memory/cli/commands/command_utils.py +77 -0
- basic_memory/cli/commands/db.py +44 -0
- basic_memory/cli/commands/format.py +198 -0
- basic_memory/cli/commands/import_chatgpt.py +84 -0
- basic_memory/cli/commands/import_claude_conversations.py +87 -0
- basic_memory/cli/commands/import_claude_projects.py +86 -0
- basic_memory/cli/commands/import_memory_json.py +87 -0
- basic_memory/cli/commands/mcp.py +76 -0
- basic_memory/cli/commands/project.py +889 -0
- basic_memory/cli/commands/status.py +174 -0
- basic_memory/cli/commands/telemetry.py +81 -0
- basic_memory/cli/commands/tool.py +341 -0
- basic_memory/cli/main.py +28 -0
- basic_memory/config.py +616 -0
- basic_memory/db.py +394 -0
- basic_memory/deps.py +705 -0
- basic_memory/file_utils.py +478 -0
- basic_memory/ignore_utils.py +297 -0
- basic_memory/importers/__init__.py +27 -0
- basic_memory/importers/base.py +79 -0
- basic_memory/importers/chatgpt_importer.py +232 -0
- basic_memory/importers/claude_conversations_importer.py +180 -0
- basic_memory/importers/claude_projects_importer.py +148 -0
- basic_memory/importers/memory_json_importer.py +108 -0
- basic_memory/importers/utils.py +61 -0
- basic_memory/markdown/__init__.py +21 -0
- basic_memory/markdown/entity_parser.py +279 -0
- basic_memory/markdown/markdown_processor.py +160 -0
- basic_memory/markdown/plugins.py +242 -0
- basic_memory/markdown/schemas.py +70 -0
- basic_memory/markdown/utils.py +117 -0
- basic_memory/mcp/__init__.py +1 -0
- basic_memory/mcp/async_client.py +139 -0
- basic_memory/mcp/project_context.py +141 -0
- basic_memory/mcp/prompts/__init__.py +19 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
- basic_memory/mcp/prompts/continue_conversation.py +62 -0
- basic_memory/mcp/prompts/recent_activity.py +188 -0
- basic_memory/mcp/prompts/search.py +57 -0
- basic_memory/mcp/prompts/utils.py +162 -0
- basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
- basic_memory/mcp/resources/project_info.py +71 -0
- basic_memory/mcp/server.py +81 -0
- basic_memory/mcp/tools/__init__.py +48 -0
- basic_memory/mcp/tools/build_context.py +120 -0
- basic_memory/mcp/tools/canvas.py +152 -0
- basic_memory/mcp/tools/chatgpt_tools.py +190 -0
- basic_memory/mcp/tools/delete_note.py +242 -0
- basic_memory/mcp/tools/edit_note.py +324 -0
- basic_memory/mcp/tools/list_directory.py +168 -0
- basic_memory/mcp/tools/move_note.py +551 -0
- basic_memory/mcp/tools/project_management.py +201 -0
- basic_memory/mcp/tools/read_content.py +281 -0
- basic_memory/mcp/tools/read_note.py +267 -0
- basic_memory/mcp/tools/recent_activity.py +534 -0
- basic_memory/mcp/tools/search.py +385 -0
- basic_memory/mcp/tools/utils.py +540 -0
- basic_memory/mcp/tools/view_note.py +78 -0
- basic_memory/mcp/tools/write_note.py +230 -0
- basic_memory/models/__init__.py +15 -0
- basic_memory/models/base.py +10 -0
- basic_memory/models/knowledge.py +226 -0
- basic_memory/models/project.py +87 -0
- basic_memory/models/search.py +85 -0
- basic_memory/repository/__init__.py +11 -0
- basic_memory/repository/entity_repository.py +503 -0
- basic_memory/repository/observation_repository.py +73 -0
- basic_memory/repository/postgres_search_repository.py +379 -0
- basic_memory/repository/project_info_repository.py +10 -0
- basic_memory/repository/project_repository.py +128 -0
- basic_memory/repository/relation_repository.py +146 -0
- basic_memory/repository/repository.py +385 -0
- basic_memory/repository/search_index_row.py +95 -0
- basic_memory/repository/search_repository.py +94 -0
- basic_memory/repository/search_repository_base.py +241 -0
- basic_memory/repository/sqlite_search_repository.py +439 -0
- basic_memory/schemas/__init__.py +86 -0
- basic_memory/schemas/base.py +297 -0
- basic_memory/schemas/cloud.py +50 -0
- basic_memory/schemas/delete.py +37 -0
- basic_memory/schemas/directory.py +30 -0
- basic_memory/schemas/importer.py +35 -0
- basic_memory/schemas/memory.py +285 -0
- basic_memory/schemas/project_info.py +212 -0
- basic_memory/schemas/prompt.py +90 -0
- basic_memory/schemas/request.py +112 -0
- basic_memory/schemas/response.py +229 -0
- basic_memory/schemas/search.py +117 -0
- basic_memory/schemas/sync_report.py +72 -0
- basic_memory/schemas/v2/__init__.py +27 -0
- basic_memory/schemas/v2/entity.py +129 -0
- basic_memory/schemas/v2/resource.py +46 -0
- basic_memory/services/__init__.py +8 -0
- basic_memory/services/context_service.py +601 -0
- basic_memory/services/directory_service.py +308 -0
- basic_memory/services/entity_service.py +864 -0
- basic_memory/services/exceptions.py +37 -0
- basic_memory/services/file_service.py +541 -0
- basic_memory/services/initialization.py +216 -0
- basic_memory/services/link_resolver.py +121 -0
- basic_memory/services/project_service.py +880 -0
- basic_memory/services/search_service.py +404 -0
- basic_memory/services/service.py +15 -0
- basic_memory/sync/__init__.py +6 -0
- basic_memory/sync/background_sync.py +26 -0
- basic_memory/sync/sync_service.py +1259 -0
- basic_memory/sync/watch_service.py +510 -0
- basic_memory/telemetry.py +249 -0
- basic_memory/templates/prompts/continue_conversation.hbs +110 -0
- basic_memory/templates/prompts/search.hbs +101 -0
- basic_memory/utils.py +468 -0
- basic_memory-0.17.1.dist-info/METADATA +617 -0
- basic_memory-0.17.1.dist-info/RECORD +171 -0
- basic_memory-0.17.1.dist-info/WHEEL +4 -0
- basic_memory-0.17.1.dist-info/entry_points.txt +3 -0
- basic_memory-0.17.1.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Parser for markdown files into Entity objects.
|
|
2
|
+
|
|
3
|
+
Uses markdown-it with plugins to parse structured data from markdown content.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import date, datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
import dateparser
|
|
12
|
+
import frontmatter
|
|
13
|
+
import yaml
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from markdown_it import MarkdownIt
|
|
16
|
+
|
|
17
|
+
from basic_memory.markdown.plugins import observation_plugin, relation_plugin
|
|
18
|
+
from basic_memory.markdown.schemas import (
|
|
19
|
+
EntityFrontmatter,
|
|
20
|
+
EntityMarkdown,
|
|
21
|
+
Observation,
|
|
22
|
+
Relation,
|
|
23
|
+
)
|
|
24
|
+
from basic_memory.utils import parse_tags
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
md = MarkdownIt().use(observation_plugin).use(relation_plugin)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def normalize_frontmatter_value(value: Any) -> Any:
|
|
31
|
+
"""Normalize frontmatter values to safe types for processing.
|
|
32
|
+
|
|
33
|
+
PyYAML automatically converts various string-like values into native Python types:
|
|
34
|
+
- Date strings ("2025-10-24") → datetime.date objects
|
|
35
|
+
- Numbers ("1.0") → int or float
|
|
36
|
+
- Booleans ("true") → bool
|
|
37
|
+
- Lists → list objects
|
|
38
|
+
|
|
39
|
+
This can cause AttributeError when code expects strings and calls string methods
|
|
40
|
+
like .strip() on these values (see GitHub issue #236).
|
|
41
|
+
|
|
42
|
+
This function normalizes all frontmatter values to safe types:
|
|
43
|
+
- Dates/datetimes → ISO format strings
|
|
44
|
+
- Numbers (int/float) → strings
|
|
45
|
+
- Booleans → strings ("True"/"False")
|
|
46
|
+
- Lists → preserved as lists, but items are recursively normalized
|
|
47
|
+
- Dicts → preserved as dicts, but values are recursively normalized
|
|
48
|
+
- Strings → kept as-is
|
|
49
|
+
- None → kept as None
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
value: The frontmatter value to normalize
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
The normalized value safe for string operations
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
>>> normalize_frontmatter_value(datetime.date(2025, 10, 24))
|
|
59
|
+
'2025-10-24'
|
|
60
|
+
>>> normalize_frontmatter_value([datetime.date(2025, 10, 24), "tag", 123])
|
|
61
|
+
['2025-10-24', 'tag', '123']
|
|
62
|
+
>>> normalize_frontmatter_value(True)
|
|
63
|
+
'True'
|
|
64
|
+
"""
|
|
65
|
+
# Convert date/datetime objects to ISO format strings
|
|
66
|
+
if isinstance(value, datetime):
|
|
67
|
+
return value.isoformat()
|
|
68
|
+
if isinstance(value, date):
|
|
69
|
+
return value.isoformat()
|
|
70
|
+
|
|
71
|
+
# Convert boolean to string (must come before int check since bool is subclass of int)
|
|
72
|
+
if isinstance(value, bool):
|
|
73
|
+
return str(value)
|
|
74
|
+
|
|
75
|
+
# Convert numbers to strings
|
|
76
|
+
if isinstance(value, (int, float)):
|
|
77
|
+
return str(value)
|
|
78
|
+
|
|
79
|
+
# Recursively process lists (preserve as list, normalize items)
|
|
80
|
+
if isinstance(value, list):
|
|
81
|
+
return [normalize_frontmatter_value(item) for item in value]
|
|
82
|
+
|
|
83
|
+
# Recursively process dicts (preserve as dict, normalize values)
|
|
84
|
+
if isinstance(value, dict):
|
|
85
|
+
return {key: normalize_frontmatter_value(val) for key, val in value.items()}
|
|
86
|
+
|
|
87
|
+
# Keep strings and None as-is
|
|
88
|
+
return value
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def normalize_frontmatter_metadata(metadata: dict) -> dict:
|
|
92
|
+
"""Normalize all values in frontmatter metadata dict.
|
|
93
|
+
|
|
94
|
+
Converts date/datetime objects to ISO format strings to prevent
|
|
95
|
+
AttributeError when code expects strings (GitHub issue #236).
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
metadata: The frontmatter metadata dictionary
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
A new dictionary with all values normalized
|
|
102
|
+
"""
|
|
103
|
+
return {key: normalize_frontmatter_value(value) for key, value in metadata.items()}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass
|
|
107
|
+
class EntityContent:
|
|
108
|
+
content: str
|
|
109
|
+
observations: list[Observation] = field(default_factory=list)
|
|
110
|
+
relations: list[Relation] = field(default_factory=list)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def parse(content: str) -> EntityContent:
|
|
114
|
+
"""Parse markdown content into EntityMarkdown."""
|
|
115
|
+
|
|
116
|
+
# Parse content for observations and relations using markdown-it
|
|
117
|
+
observations = []
|
|
118
|
+
relations = []
|
|
119
|
+
|
|
120
|
+
if content:
|
|
121
|
+
for token in md.parse(content):
|
|
122
|
+
# check for observations and relations
|
|
123
|
+
if token.meta:
|
|
124
|
+
if "observation" in token.meta:
|
|
125
|
+
obs = token.meta["observation"]
|
|
126
|
+
observation = Observation.model_validate(obs)
|
|
127
|
+
observations.append(observation)
|
|
128
|
+
if "relations" in token.meta:
|
|
129
|
+
rels = token.meta["relations"]
|
|
130
|
+
relations.extend([Relation.model_validate(r) for r in rels])
|
|
131
|
+
|
|
132
|
+
return EntityContent(
|
|
133
|
+
content=content,
|
|
134
|
+
observations=observations,
|
|
135
|
+
relations=relations,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# def parse_tags(tags: Any) -> list[str]:
|
|
140
|
+
# """Parse tags into list of strings."""
|
|
141
|
+
# if isinstance(tags, (list, tuple)):
|
|
142
|
+
# return [str(t).strip() for t in tags if str(t).strip()]
|
|
143
|
+
# return [t.strip() for t in tags.split(",") if t.strip()]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class EntityParser:
|
|
147
|
+
"""Parser for markdown files into Entity objects."""
|
|
148
|
+
|
|
149
|
+
def __init__(self, base_path: Path):
|
|
150
|
+
"""Initialize parser with base path for relative permalink generation."""
|
|
151
|
+
self.base_path = base_path.resolve()
|
|
152
|
+
|
|
153
|
+
def parse_date(self, value: Any) -> Optional[datetime]:
|
|
154
|
+
"""Parse date strings using dateparser for maximum flexibility.
|
|
155
|
+
|
|
156
|
+
Supports human friendly formats like:
|
|
157
|
+
- 2024-01-15
|
|
158
|
+
- Jan 15, 2024
|
|
159
|
+
- 2024-01-15 10:00 AM
|
|
160
|
+
- yesterday
|
|
161
|
+
- 2 days ago
|
|
162
|
+
"""
|
|
163
|
+
if isinstance(value, datetime):
|
|
164
|
+
return value
|
|
165
|
+
if isinstance(value, str):
|
|
166
|
+
parsed = dateparser.parse(value)
|
|
167
|
+
if parsed:
|
|
168
|
+
return parsed
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
async def parse_file(self, path: Path | str) -> EntityMarkdown:
|
|
172
|
+
"""Parse markdown file into EntityMarkdown."""
|
|
173
|
+
|
|
174
|
+
# Check if the path is already absolute
|
|
175
|
+
if (
|
|
176
|
+
isinstance(path, Path)
|
|
177
|
+
and path.is_absolute()
|
|
178
|
+
or (isinstance(path, str) and Path(path).is_absolute())
|
|
179
|
+
):
|
|
180
|
+
absolute_path = Path(path)
|
|
181
|
+
else:
|
|
182
|
+
absolute_path = self.get_file_path(path)
|
|
183
|
+
|
|
184
|
+
# Parse frontmatter and content using python-frontmatter
|
|
185
|
+
file_content = absolute_path.read_text(encoding="utf-8")
|
|
186
|
+
return await self.parse_file_content(absolute_path, file_content)
|
|
187
|
+
|
|
188
|
+
def get_file_path(self, path):
|
|
189
|
+
"""Get absolute path for a file using the base path for the project."""
|
|
190
|
+
return self.base_path / path
|
|
191
|
+
|
|
192
|
+
async def parse_file_content(self, absolute_path, file_content):
|
|
193
|
+
"""Parse markdown content from file stats.
|
|
194
|
+
|
|
195
|
+
Delegates to parse_markdown_content() for actual parsing logic.
|
|
196
|
+
Exists for backwards compatibility with code that passes file paths.
|
|
197
|
+
"""
|
|
198
|
+
# Extract file stat info for timestamps
|
|
199
|
+
file_stats = absolute_path.stat()
|
|
200
|
+
|
|
201
|
+
# Delegate to parse_markdown_content with timestamps from file stats
|
|
202
|
+
return await self.parse_markdown_content(
|
|
203
|
+
file_path=absolute_path,
|
|
204
|
+
content=file_content,
|
|
205
|
+
mtime=file_stats.st_mtime,
|
|
206
|
+
ctime=file_stats.st_ctime,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
async def parse_markdown_content(
|
|
210
|
+
self,
|
|
211
|
+
file_path: Path,
|
|
212
|
+
content: str,
|
|
213
|
+
mtime: Optional[float] = None,
|
|
214
|
+
ctime: Optional[float] = None,
|
|
215
|
+
) -> EntityMarkdown:
|
|
216
|
+
"""Parse markdown content without requiring file to exist on disk.
|
|
217
|
+
|
|
218
|
+
Useful for parsing content from S3 or other remote sources where the file
|
|
219
|
+
is not available locally.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
file_path: Path for metadata (doesn't need to exist on disk)
|
|
223
|
+
content: Markdown content as string
|
|
224
|
+
mtime: Optional modification time (Unix timestamp)
|
|
225
|
+
ctime: Optional creation time (Unix timestamp)
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
EntityMarkdown with parsed content
|
|
229
|
+
"""
|
|
230
|
+
# Strip BOM before parsing (can be present in files from Windows or certain sources)
|
|
231
|
+
# See issue #452
|
|
232
|
+
from basic_memory.file_utils import strip_bom
|
|
233
|
+
|
|
234
|
+
content = strip_bom(content)
|
|
235
|
+
|
|
236
|
+
# Parse frontmatter with proper error handling for malformed YAML
|
|
237
|
+
try:
|
|
238
|
+
post = frontmatter.loads(content)
|
|
239
|
+
except yaml.YAMLError as e:
|
|
240
|
+
logger.warning(
|
|
241
|
+
f"Failed to parse YAML frontmatter in {file_path}: {e}. "
|
|
242
|
+
f"Treating file as plain markdown without frontmatter."
|
|
243
|
+
)
|
|
244
|
+
post = frontmatter.Post(content, metadata={})
|
|
245
|
+
|
|
246
|
+
# Normalize frontmatter values
|
|
247
|
+
metadata = normalize_frontmatter_metadata(post.metadata)
|
|
248
|
+
|
|
249
|
+
# Ensure required fields have defaults
|
|
250
|
+
title = metadata.get("title")
|
|
251
|
+
if not title or title == "None":
|
|
252
|
+
metadata["title"] = file_path.stem
|
|
253
|
+
else:
|
|
254
|
+
metadata["title"] = title
|
|
255
|
+
|
|
256
|
+
entity_type = metadata.get("type")
|
|
257
|
+
metadata["type"] = entity_type if entity_type is not None else "note"
|
|
258
|
+
|
|
259
|
+
tags = parse_tags(metadata.get("tags", [])) # pyright: ignore
|
|
260
|
+
if tags:
|
|
261
|
+
metadata["tags"] = tags
|
|
262
|
+
|
|
263
|
+
# Parse content for observations and relations
|
|
264
|
+
entity_frontmatter = EntityFrontmatter(metadata=metadata)
|
|
265
|
+
entity_content = parse(post.content)
|
|
266
|
+
|
|
267
|
+
# Use provided timestamps or current time as fallback
|
|
268
|
+
now = datetime.now().astimezone()
|
|
269
|
+
created = datetime.fromtimestamp(ctime).astimezone() if ctime else now
|
|
270
|
+
modified = datetime.fromtimestamp(mtime).astimezone() if mtime else now
|
|
271
|
+
|
|
272
|
+
return EntityMarkdown(
|
|
273
|
+
frontmatter=entity_frontmatter,
|
|
274
|
+
content=post.content,
|
|
275
|
+
observations=entity_content.observations,
|
|
276
|
+
relations=entity_content.relations,
|
|
277
|
+
created=created,
|
|
278
|
+
modified=modified,
|
|
279
|
+
)
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
|
|
5
|
+
from frontmatter import Post
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from basic_memory import file_utils
|
|
10
|
+
from basic_memory.file_utils import dump_frontmatter
|
|
11
|
+
from basic_memory.markdown.entity_parser import EntityParser
|
|
12
|
+
from basic_memory.markdown.schemas import EntityMarkdown, Observation, Relation
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from basic_memory.config import BasicMemoryConfig
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DirtyFileError(Exception):
|
|
19
|
+
"""Raised when attempting to write to a file that has been modified."""
|
|
20
|
+
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MarkdownProcessor:
|
|
25
|
+
"""Process markdown files while preserving content and structure.
|
|
26
|
+
|
|
27
|
+
used only for import
|
|
28
|
+
|
|
29
|
+
This class handles the file I/O aspects of our markdown processing. It:
|
|
30
|
+
1. Uses EntityParser for reading/parsing files into our schema
|
|
31
|
+
2. Handles writing files with proper frontmatter
|
|
32
|
+
3. Formats structured sections (observations/relations) consistently
|
|
33
|
+
4. Preserves user content exactly as written
|
|
34
|
+
5. Performs atomic writes using temp files
|
|
35
|
+
|
|
36
|
+
It does NOT:
|
|
37
|
+
1. Modify the schema directly (that's done by services)
|
|
38
|
+
2. Handle in-place updates (everything is read->modify->write)
|
|
39
|
+
3. Track schema changes (that's done by the database)
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
entity_parser: EntityParser,
|
|
45
|
+
app_config: Optional["BasicMemoryConfig"] = None,
|
|
46
|
+
):
|
|
47
|
+
"""Initialize processor with parser and optional config."""
|
|
48
|
+
self.entity_parser = entity_parser
|
|
49
|
+
self.app_config = app_config
|
|
50
|
+
|
|
51
|
+
async def read_file(self, path: Path) -> EntityMarkdown:
|
|
52
|
+
"""Read and parse file into EntityMarkdown schema.
|
|
53
|
+
|
|
54
|
+
This is step 1 of our read->modify->write pattern.
|
|
55
|
+
We use EntityParser to handle all the markdown parsing.
|
|
56
|
+
"""
|
|
57
|
+
return await self.entity_parser.parse_file(path)
|
|
58
|
+
|
|
59
|
+
async def write_file(
|
|
60
|
+
self,
|
|
61
|
+
path: Path,
|
|
62
|
+
markdown: EntityMarkdown,
|
|
63
|
+
expected_checksum: Optional[str] = None,
|
|
64
|
+
) -> str:
|
|
65
|
+
"""Write EntityMarkdown schema back to file.
|
|
66
|
+
|
|
67
|
+
This is step 3 of our read->modify->write pattern.
|
|
68
|
+
The entire file is rewritten atomically on each update.
|
|
69
|
+
|
|
70
|
+
File Structure:
|
|
71
|
+
---
|
|
72
|
+
frontmatter fields
|
|
73
|
+
---
|
|
74
|
+
user content area (preserved exactly)
|
|
75
|
+
|
|
76
|
+
## Observations (if any)
|
|
77
|
+
formatted observations
|
|
78
|
+
|
|
79
|
+
## Relations (if any)
|
|
80
|
+
formatted relations
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
path: Where to write the file
|
|
84
|
+
markdown: Complete schema to write
|
|
85
|
+
expected_checksum: If provided, verify file hasn't changed
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Checksum of written file
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
DirtyFileError: If file has been modified (when expected_checksum provided)
|
|
92
|
+
"""
|
|
93
|
+
# Dirty check if needed
|
|
94
|
+
if expected_checksum is not None:
|
|
95
|
+
current_content = path.read_text(encoding="utf-8")
|
|
96
|
+
current_checksum = await file_utils.compute_checksum(current_content)
|
|
97
|
+
if current_checksum != expected_checksum:
|
|
98
|
+
raise DirtyFileError(f"File {path} has been modified")
|
|
99
|
+
|
|
100
|
+
# Convert frontmatter to dict
|
|
101
|
+
frontmatter_dict = OrderedDict()
|
|
102
|
+
frontmatter_dict["title"] = markdown.frontmatter.title
|
|
103
|
+
frontmatter_dict["type"] = markdown.frontmatter.type
|
|
104
|
+
frontmatter_dict["permalink"] = markdown.frontmatter.permalink
|
|
105
|
+
|
|
106
|
+
metadata = markdown.frontmatter.metadata or {}
|
|
107
|
+
for k, v in metadata.items():
|
|
108
|
+
frontmatter_dict[k] = v
|
|
109
|
+
|
|
110
|
+
# Start with user content (or minimal title for new files)
|
|
111
|
+
content = markdown.content or f"# {markdown.frontmatter.title}\n"
|
|
112
|
+
|
|
113
|
+
# Add structured sections with proper spacing
|
|
114
|
+
content = content.rstrip() # Remove trailing whitespace
|
|
115
|
+
|
|
116
|
+
# add a blank line if we have semantic content
|
|
117
|
+
if markdown.observations or markdown.relations:
|
|
118
|
+
content += "\n"
|
|
119
|
+
|
|
120
|
+
if markdown.observations:
|
|
121
|
+
content += self.format_observations(markdown.observations)
|
|
122
|
+
if markdown.relations:
|
|
123
|
+
content += self.format_relations(markdown.relations)
|
|
124
|
+
|
|
125
|
+
# Create Post object for frontmatter
|
|
126
|
+
post = Post(content, **frontmatter_dict)
|
|
127
|
+
final_content = dump_frontmatter(post)
|
|
128
|
+
|
|
129
|
+
logger.debug(f"writing file {path} with content:\n{final_content}")
|
|
130
|
+
|
|
131
|
+
# Write atomically and return checksum of updated file
|
|
132
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
133
|
+
await file_utils.write_file_atomic(path, final_content)
|
|
134
|
+
|
|
135
|
+
# Format file if configured (MarkdownProcessor always handles markdown files)
|
|
136
|
+
content_for_checksum = final_content
|
|
137
|
+
if self.app_config:
|
|
138
|
+
formatted_content = await file_utils.format_file(
|
|
139
|
+
path, self.app_config, is_markdown=True
|
|
140
|
+
)
|
|
141
|
+
if formatted_content is not None:
|
|
142
|
+
content_for_checksum = formatted_content
|
|
143
|
+
|
|
144
|
+
return await file_utils.compute_checksum(content_for_checksum)
|
|
145
|
+
|
|
146
|
+
def format_observations(self, observations: list[Observation]) -> str:
|
|
147
|
+
"""Format observations section in standard way.
|
|
148
|
+
|
|
149
|
+
Format: - [category] content #tag1 #tag2 (context)
|
|
150
|
+
"""
|
|
151
|
+
lines = [f"{obs}" for obs in observations]
|
|
152
|
+
return "\n".join(lines) + "\n"
|
|
153
|
+
|
|
154
|
+
def format_relations(self, relations: list[Relation]) -> str:
|
|
155
|
+
"""Format relations section in standard way.
|
|
156
|
+
|
|
157
|
+
Format: - relation_type [[target]] (context)
|
|
158
|
+
"""
|
|
159
|
+
lines = [f"{rel}" for rel in relations]
|
|
160
|
+
return "\n".join(lines) + "\n"
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Markdown-it plugins for Basic Memory markdown parsing."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Any, Dict
|
|
4
|
+
from markdown_it import MarkdownIt
|
|
5
|
+
from markdown_it.token import Token
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Observation handling functions
|
|
9
|
+
def is_observation(token: Token) -> bool:
|
|
10
|
+
"""Check if token looks like our observation format."""
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
if token.type != "inline": # pragma: no cover
|
|
14
|
+
return False
|
|
15
|
+
# Use token.tag which contains the actual content for test tokens, fallback to content
|
|
16
|
+
content = (token.tag or token.content).strip()
|
|
17
|
+
if not content: # pragma: no cover
|
|
18
|
+
return False
|
|
19
|
+
# if it's a markdown_task, return false
|
|
20
|
+
if content.startswith("[ ]") or content.startswith("[x]") or content.startswith("[-]"):
|
|
21
|
+
return False
|
|
22
|
+
|
|
23
|
+
# Exclude markdown links: [text](url)
|
|
24
|
+
if re.match(r"^\[.*?\]\(.*?\)$", content):
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
# Exclude wiki links: [[text]]
|
|
28
|
+
if re.match(r"^\[\[.*?\]\]$", content):
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
# Check for proper observation format: [category] content
|
|
32
|
+
match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
|
|
33
|
+
# Check for standalone hashtags (words starting with #)
|
|
34
|
+
# This excludes # in HTML attributes like color="#4285F4"
|
|
35
|
+
has_tags = any(part.startswith("#") for part in content.split())
|
|
36
|
+
return bool(match) or has_tags
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def parse_observation(token: Token) -> Dict[str, Any]:
|
|
40
|
+
"""Extract observation parts from token."""
|
|
41
|
+
import re
|
|
42
|
+
|
|
43
|
+
# Use token.tag which contains the actual content for test tokens, fallback to content
|
|
44
|
+
content = (token.tag or token.content).strip()
|
|
45
|
+
|
|
46
|
+
# Parse [category] with regex
|
|
47
|
+
match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
|
|
48
|
+
category = None
|
|
49
|
+
if match:
|
|
50
|
+
category = match.group(1).strip()
|
|
51
|
+
content = match.group(2).strip()
|
|
52
|
+
else:
|
|
53
|
+
# Handle empty brackets [] followed by content
|
|
54
|
+
empty_match = re.match(r"^\[\]\s+(.+)", content)
|
|
55
|
+
if empty_match:
|
|
56
|
+
content = empty_match.group(1).strip()
|
|
57
|
+
|
|
58
|
+
# Parse (context)
|
|
59
|
+
context = None
|
|
60
|
+
if content.endswith(")"):
|
|
61
|
+
start = content.rfind("(")
|
|
62
|
+
if start != -1:
|
|
63
|
+
context = content[start + 1 : -1].strip()
|
|
64
|
+
content = content[:start].strip()
|
|
65
|
+
|
|
66
|
+
# Extract tags and keep original content
|
|
67
|
+
tags = []
|
|
68
|
+
parts = content.split()
|
|
69
|
+
for part in parts:
|
|
70
|
+
if part.startswith("#"):
|
|
71
|
+
if "#" in part[1:]:
|
|
72
|
+
subtags = [t for t in part.split("#") if t]
|
|
73
|
+
tags.extend(subtags)
|
|
74
|
+
else:
|
|
75
|
+
tags.append(part[1:])
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
"category": category,
|
|
79
|
+
"content": content,
|
|
80
|
+
"tags": tags if tags else None,
|
|
81
|
+
"context": context,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# Relation handling functions
|
|
86
|
+
def is_explicit_relation(token: Token) -> bool:
|
|
87
|
+
"""Check if token looks like our relation format."""
|
|
88
|
+
if token.type != "inline": # pragma: no cover
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
# Use token.tag which contains the actual content for test tokens, fallback to content
|
|
92
|
+
content = (token.tag or token.content).strip()
|
|
93
|
+
return "[[" in content and "]]" in content
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def parse_relation(token: Token) -> Dict[str, Any] | None:
|
|
97
|
+
"""Extract relation parts from token."""
|
|
98
|
+
# Remove bullet point if present
|
|
99
|
+
# Use token.tag which contains the actual content for test tokens, fallback to content
|
|
100
|
+
content = (token.tag or token.content).strip()
|
|
101
|
+
|
|
102
|
+
# Extract [[target]]
|
|
103
|
+
target = None
|
|
104
|
+
rel_type = "relates_to" # default
|
|
105
|
+
context = None
|
|
106
|
+
|
|
107
|
+
start = content.find("[[")
|
|
108
|
+
end = content.find("]]")
|
|
109
|
+
|
|
110
|
+
if start != -1 and end != -1:
|
|
111
|
+
# Get text before link as relation type
|
|
112
|
+
before = content[:start].strip()
|
|
113
|
+
if before:
|
|
114
|
+
rel_type = before
|
|
115
|
+
|
|
116
|
+
# Get target
|
|
117
|
+
target = content[start + 2 : end].strip()
|
|
118
|
+
|
|
119
|
+
# Look for context after
|
|
120
|
+
after = content[end + 2 :].strip()
|
|
121
|
+
if after.startswith("(") and after.endswith(")"):
|
|
122
|
+
context = after[1:-1].strip() or None
|
|
123
|
+
|
|
124
|
+
if not target: # pragma: no cover
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
return {"type": rel_type, "target": target, "context": context}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def parse_inline_relations(content: str) -> List[Dict[str, Any]]:
|
|
131
|
+
"""Find wiki-style links in regular content."""
|
|
132
|
+
relations = []
|
|
133
|
+
start = 0
|
|
134
|
+
|
|
135
|
+
while True:
|
|
136
|
+
# Find next outer-most [[
|
|
137
|
+
start = content.find("[[", start)
|
|
138
|
+
if start == -1: # pragma: no cover
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
# Find matching ]]
|
|
142
|
+
depth = 1
|
|
143
|
+
pos = start + 2
|
|
144
|
+
end = -1
|
|
145
|
+
|
|
146
|
+
while pos < len(content):
|
|
147
|
+
if content[pos : pos + 2] == "[[":
|
|
148
|
+
depth += 1
|
|
149
|
+
pos += 2
|
|
150
|
+
elif content[pos : pos + 2] == "]]":
|
|
151
|
+
depth -= 1
|
|
152
|
+
if depth == 0:
|
|
153
|
+
end = pos
|
|
154
|
+
break
|
|
155
|
+
pos += 2
|
|
156
|
+
else:
|
|
157
|
+
pos += 1
|
|
158
|
+
|
|
159
|
+
if end == -1:
|
|
160
|
+
# No matching ]] found
|
|
161
|
+
break
|
|
162
|
+
|
|
163
|
+
target = content[start + 2 : end].strip()
|
|
164
|
+
if target:
|
|
165
|
+
relations.append({"type": "links_to", "target": target, "context": None})
|
|
166
|
+
|
|
167
|
+
start = end + 2
|
|
168
|
+
|
|
169
|
+
return relations
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def observation_plugin(md: MarkdownIt) -> None:
|
|
173
|
+
"""Plugin for parsing observation format:
|
|
174
|
+
- [category] Content text #tag1 #tag2 (context)
|
|
175
|
+
- Content text #tag1 (context) # No category is also valid
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
def observation_rule(state: Any) -> None:
|
|
179
|
+
"""Process observations in token stream."""
|
|
180
|
+
tokens = state.tokens
|
|
181
|
+
|
|
182
|
+
for idx in range(len(tokens)):
|
|
183
|
+
token = tokens[idx]
|
|
184
|
+
|
|
185
|
+
# Initialize meta for all tokens
|
|
186
|
+
token.meta = token.meta or {}
|
|
187
|
+
|
|
188
|
+
# Parse observations in list items
|
|
189
|
+
if token.type == "inline" and is_observation(token):
|
|
190
|
+
obs = parse_observation(token)
|
|
191
|
+
if obs["content"]: # Only store if we have content
|
|
192
|
+
token.meta["observation"] = obs
|
|
193
|
+
|
|
194
|
+
# Add the rule after inline processing
|
|
195
|
+
md.core.ruler.after("inline", "observations", observation_rule)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def relation_plugin(md: MarkdownIt) -> None:
|
|
199
|
+
"""Plugin for parsing relation formats:
|
|
200
|
+
|
|
201
|
+
Explicit relations:
|
|
202
|
+
- relation_type [[target]] (context)
|
|
203
|
+
|
|
204
|
+
Implicit relations (links in content):
|
|
205
|
+
Some text with [[target]] reference
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def relation_rule(state: Any) -> None:
|
|
209
|
+
"""Process relations in token stream."""
|
|
210
|
+
tokens = state.tokens
|
|
211
|
+
in_list_item = False
|
|
212
|
+
|
|
213
|
+
for idx in range(len(tokens)):
|
|
214
|
+
token = tokens[idx]
|
|
215
|
+
|
|
216
|
+
# Track list nesting
|
|
217
|
+
if token.type == "list_item_open":
|
|
218
|
+
in_list_item = True
|
|
219
|
+
elif token.type == "list_item_close":
|
|
220
|
+
in_list_item = False
|
|
221
|
+
|
|
222
|
+
# Initialize meta for all tokens
|
|
223
|
+
token.meta = token.meta or {}
|
|
224
|
+
|
|
225
|
+
# Only process inline tokens
|
|
226
|
+
if token.type == "inline":
|
|
227
|
+
# Check for explicit relations in list items
|
|
228
|
+
if in_list_item and is_explicit_relation(token):
|
|
229
|
+
rel = parse_relation(token)
|
|
230
|
+
if rel:
|
|
231
|
+
token.meta["relations"] = [rel]
|
|
232
|
+
|
|
233
|
+
# Always check for inline links in any text
|
|
234
|
+
else:
|
|
235
|
+
content = token.tag or token.content
|
|
236
|
+
if "[[" in content:
|
|
237
|
+
rels = parse_inline_relations(content)
|
|
238
|
+
if rels:
|
|
239
|
+
token.meta["relations"] = token.meta.get("relations", []) + rels
|
|
240
|
+
|
|
241
|
+
# Add the rule after inline processing
|
|
242
|
+
md.core.ruler.after("inline", "relations", relation_rule)
|