basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +5 -1
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +130 -20
- basic_memory/alembic/migrations.py +4 -9
- basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
- basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
- basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
- basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
- basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
- basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
- basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
- basic_memory/api/app.py +87 -20
- basic_memory/api/container.py +133 -0
- basic_memory/api/routers/__init__.py +4 -1
- basic_memory/api/routers/directory_router.py +84 -0
- basic_memory/api/routers/importer_router.py +152 -0
- basic_memory/api/routers/knowledge_router.py +180 -23
- basic_memory/api/routers/management_router.py +80 -0
- basic_memory/api/routers/memory_router.py +9 -64
- basic_memory/api/routers/project_router.py +460 -0
- basic_memory/api/routers/prompt_router.py +260 -0
- basic_memory/api/routers/resource_router.py +136 -11
- basic_memory/api/routers/search_router.py +5 -5
- basic_memory/api/routers/utils.py +169 -0
- basic_memory/api/template_loader.py +292 -0
- basic_memory/api/v2/__init__.py +35 -0
- basic_memory/api/v2/routers/__init__.py +21 -0
- basic_memory/api/v2/routers/directory_router.py +93 -0
- basic_memory/api/v2/routers/importer_router.py +181 -0
- basic_memory/api/v2/routers/knowledge_router.py +427 -0
- basic_memory/api/v2/routers/memory_router.py +130 -0
- basic_memory/api/v2/routers/project_router.py +359 -0
- basic_memory/api/v2/routers/prompt_router.py +269 -0
- basic_memory/api/v2/routers/resource_router.py +286 -0
- basic_memory/api/v2/routers/search_router.py +73 -0
- basic_memory/cli/app.py +80 -10
- basic_memory/cli/auth.py +300 -0
- basic_memory/cli/commands/__init__.py +15 -2
- basic_memory/cli/commands/cloud/__init__.py +6 -0
- basic_memory/cli/commands/cloud/api_client.py +127 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
- basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
- basic_memory/cli/commands/cloud/core_commands.py +195 -0
- basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
- basic_memory/cli/commands/cloud/rclone_config.py +110 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
- basic_memory/cli/commands/cloud/upload.py +240 -0
- basic_memory/cli/commands/cloud/upload_command.py +124 -0
- basic_memory/cli/commands/command_utils.py +99 -0
- basic_memory/cli/commands/db.py +87 -12
- basic_memory/cli/commands/format.py +198 -0
- basic_memory/cli/commands/import_chatgpt.py +47 -223
- basic_memory/cli/commands/import_claude_conversations.py +48 -171
- basic_memory/cli/commands/import_claude_projects.py +53 -160
- basic_memory/cli/commands/import_memory_json.py +55 -111
- basic_memory/cli/commands/mcp.py +67 -11
- basic_memory/cli/commands/project.py +889 -0
- basic_memory/cli/commands/status.py +52 -34
- basic_memory/cli/commands/telemetry.py +81 -0
- basic_memory/cli/commands/tool.py +341 -0
- basic_memory/cli/container.py +84 -0
- basic_memory/cli/main.py +14 -6
- basic_memory/config.py +580 -26
- basic_memory/db.py +285 -28
- basic_memory/deps/__init__.py +293 -0
- basic_memory/deps/config.py +26 -0
- basic_memory/deps/db.py +56 -0
- basic_memory/deps/importers.py +200 -0
- basic_memory/deps/projects.py +238 -0
- basic_memory/deps/repositories.py +179 -0
- basic_memory/deps/services.py +480 -0
- basic_memory/deps.py +16 -185
- basic_memory/file_utils.py +318 -54
- basic_memory/ignore_utils.py +297 -0
- basic_memory/importers/__init__.py +27 -0
- basic_memory/importers/base.py +100 -0
- basic_memory/importers/chatgpt_importer.py +245 -0
- basic_memory/importers/claude_conversations_importer.py +192 -0
- basic_memory/importers/claude_projects_importer.py +184 -0
- basic_memory/importers/memory_json_importer.py +128 -0
- basic_memory/importers/utils.py +61 -0
- basic_memory/markdown/entity_parser.py +182 -23
- basic_memory/markdown/markdown_processor.py +70 -7
- basic_memory/markdown/plugins.py +43 -23
- basic_memory/markdown/schemas.py +1 -1
- basic_memory/markdown/utils.py +38 -14
- basic_memory/mcp/async_client.py +135 -4
- basic_memory/mcp/clients/__init__.py +28 -0
- basic_memory/mcp/clients/directory.py +70 -0
- basic_memory/mcp/clients/knowledge.py +176 -0
- basic_memory/mcp/clients/memory.py +120 -0
- basic_memory/mcp/clients/project.py +89 -0
- basic_memory/mcp/clients/resource.py +71 -0
- basic_memory/mcp/clients/search.py +65 -0
- basic_memory/mcp/container.py +110 -0
- basic_memory/mcp/project_context.py +155 -0
- basic_memory/mcp/prompts/__init__.py +19 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
- basic_memory/mcp/prompts/continue_conversation.py +62 -0
- basic_memory/mcp/prompts/recent_activity.py +188 -0
- basic_memory/mcp/prompts/search.py +57 -0
- basic_memory/mcp/prompts/utils.py +162 -0
- basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
- basic_memory/mcp/resources/project_info.py +71 -0
- basic_memory/mcp/server.py +61 -9
- basic_memory/mcp/tools/__init__.py +33 -21
- basic_memory/mcp/tools/build_context.py +120 -0
- basic_memory/mcp/tools/canvas.py +152 -0
- basic_memory/mcp/tools/chatgpt_tools.py +190 -0
- basic_memory/mcp/tools/delete_note.py +249 -0
- basic_memory/mcp/tools/edit_note.py +325 -0
- basic_memory/mcp/tools/list_directory.py +157 -0
- basic_memory/mcp/tools/move_note.py +549 -0
- basic_memory/mcp/tools/project_management.py +204 -0
- basic_memory/mcp/tools/read_content.py +281 -0
- basic_memory/mcp/tools/read_note.py +265 -0
- basic_memory/mcp/tools/recent_activity.py +528 -0
- basic_memory/mcp/tools/search.py +377 -24
- basic_memory/mcp/tools/utils.py +402 -16
- basic_memory/mcp/tools/view_note.py +78 -0
- basic_memory/mcp/tools/write_note.py +230 -0
- basic_memory/models/__init__.py +3 -2
- basic_memory/models/knowledge.py +82 -17
- basic_memory/models/project.py +93 -0
- basic_memory/models/search.py +68 -8
- basic_memory/project_resolver.py +222 -0
- basic_memory/repository/__init__.py +2 -0
- basic_memory/repository/entity_repository.py +437 -8
- basic_memory/repository/observation_repository.py +36 -3
- basic_memory/repository/postgres_search_repository.py +451 -0
- basic_memory/repository/project_info_repository.py +10 -0
- basic_memory/repository/project_repository.py +140 -0
- basic_memory/repository/relation_repository.py +79 -4
- basic_memory/repository/repository.py +148 -29
- basic_memory/repository/search_index_row.py +95 -0
- basic_memory/repository/search_repository.py +79 -268
- basic_memory/repository/search_repository_base.py +241 -0
- basic_memory/repository/sqlite_search_repository.py +437 -0
- basic_memory/runtime.py +61 -0
- basic_memory/schemas/__init__.py +22 -9
- basic_memory/schemas/base.py +131 -12
- basic_memory/schemas/cloud.py +50 -0
- basic_memory/schemas/directory.py +31 -0
- basic_memory/schemas/importer.py +35 -0
- basic_memory/schemas/memory.py +194 -25
- basic_memory/schemas/project_info.py +213 -0
- basic_memory/schemas/prompt.py +90 -0
- basic_memory/schemas/request.py +56 -2
- basic_memory/schemas/response.py +85 -28
- basic_memory/schemas/search.py +36 -35
- basic_memory/schemas/sync_report.py +72 -0
- basic_memory/schemas/v2/__init__.py +27 -0
- basic_memory/schemas/v2/entity.py +133 -0
- basic_memory/schemas/v2/resource.py +47 -0
- basic_memory/services/__init__.py +2 -1
- basic_memory/services/context_service.py +451 -138
- basic_memory/services/directory_service.py +310 -0
- basic_memory/services/entity_service.py +636 -71
- basic_memory/services/exceptions.py +21 -0
- basic_memory/services/file_service.py +402 -33
- basic_memory/services/initialization.py +216 -0
- basic_memory/services/link_resolver.py +50 -56
- basic_memory/services/project_service.py +888 -0
- basic_memory/services/search_service.py +232 -37
- basic_memory/sync/__init__.py +4 -2
- basic_memory/sync/background_sync.py +26 -0
- basic_memory/sync/coordinator.py +160 -0
- basic_memory/sync/sync_service.py +1200 -109
- basic_memory/sync/watch_service.py +432 -135
- basic_memory/telemetry.py +249 -0
- basic_memory/templates/prompts/continue_conversation.hbs +110 -0
- basic_memory/templates/prompts/search.hbs +101 -0
- basic_memory/utils.py +407 -54
- basic_memory-0.17.4.dist-info/METADATA +617 -0
- basic_memory-0.17.4.dist-info/RECORD +193 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
- basic_memory/alembic/README +0 -1
- basic_memory/cli/commands/sync.py +0 -206
- basic_memory/cli/commands/tools.py +0 -157
- basic_memory/mcp/tools/knowledge.py +0 -68
- basic_memory/mcp/tools/memory.py +0 -170
- basic_memory/mcp/tools/notes.py +0 -202
- basic_memory/schemas/discovery.py +0 -28
- basic_memory/sync/file_change_scanner.py +0 -158
- basic_memory/sync/utils.py +0 -31
- basic_memory-0.7.0.dist-info/METADATA +0 -378
- basic_memory-0.7.0.dist-info/RECORD +0 -82
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,25 +4,105 @@ Uses markdown-it with plugins to parse structured data from markdown content.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import date, datetime
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from datetime import datetime
|
|
9
9
|
from typing import Any, Optional
|
|
10
|
-
import dateparser
|
|
11
10
|
|
|
12
|
-
|
|
11
|
+
import dateparser
|
|
13
12
|
import frontmatter
|
|
13
|
+
import yaml
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from markdown_it import MarkdownIt
|
|
14
16
|
|
|
15
17
|
from basic_memory.markdown.plugins import observation_plugin, relation_plugin
|
|
16
18
|
from basic_memory.markdown.schemas import (
|
|
17
|
-
EntityMarkdown,
|
|
18
19
|
EntityFrontmatter,
|
|
20
|
+
EntityMarkdown,
|
|
19
21
|
Observation,
|
|
20
22
|
Relation,
|
|
21
23
|
)
|
|
24
|
+
from basic_memory.utils import parse_tags
|
|
25
|
+
|
|
22
26
|
|
|
23
27
|
md = MarkdownIt().use(observation_plugin).use(relation_plugin)
|
|
24
28
|
|
|
25
29
|
|
|
30
|
+
def normalize_frontmatter_value(value: Any) -> Any:
|
|
31
|
+
"""Normalize frontmatter values to safe types for processing.
|
|
32
|
+
|
|
33
|
+
PyYAML automatically converts various string-like values into native Python types:
|
|
34
|
+
- Date strings ("2025-10-24") → datetime.date objects
|
|
35
|
+
- Numbers ("1.0") → int or float
|
|
36
|
+
- Booleans ("true") → bool
|
|
37
|
+
- Lists → list objects
|
|
38
|
+
|
|
39
|
+
This can cause AttributeError when code expects strings and calls string methods
|
|
40
|
+
like .strip() on these values (see GitHub issue #236).
|
|
41
|
+
|
|
42
|
+
This function normalizes all frontmatter values to safe types:
|
|
43
|
+
- Dates/datetimes → ISO format strings
|
|
44
|
+
- Numbers (int/float) → strings
|
|
45
|
+
- Booleans → strings ("True"/"False")
|
|
46
|
+
- Lists → preserved as lists, but items are recursively normalized
|
|
47
|
+
- Dicts → preserved as dicts, but values are recursively normalized
|
|
48
|
+
- Strings → kept as-is
|
|
49
|
+
- None → kept as None
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
value: The frontmatter value to normalize
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
The normalized value safe for string operations
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
>>> normalize_frontmatter_value(datetime.date(2025, 10, 24))
|
|
59
|
+
'2025-10-24'
|
|
60
|
+
>>> normalize_frontmatter_value([datetime.date(2025, 10, 24), "tag", 123])
|
|
61
|
+
['2025-10-24', 'tag', '123']
|
|
62
|
+
>>> normalize_frontmatter_value(True)
|
|
63
|
+
'True'
|
|
64
|
+
"""
|
|
65
|
+
# Convert date/datetime objects to ISO format strings
|
|
66
|
+
if isinstance(value, datetime):
|
|
67
|
+
return value.isoformat()
|
|
68
|
+
if isinstance(value, date):
|
|
69
|
+
return value.isoformat()
|
|
70
|
+
|
|
71
|
+
# Convert boolean to string (must come before int check since bool is subclass of int)
|
|
72
|
+
if isinstance(value, bool):
|
|
73
|
+
return str(value)
|
|
74
|
+
|
|
75
|
+
# Convert numbers to strings
|
|
76
|
+
if isinstance(value, (int, float)):
|
|
77
|
+
return str(value)
|
|
78
|
+
|
|
79
|
+
# Recursively process lists (preserve as list, normalize items)
|
|
80
|
+
if isinstance(value, list):
|
|
81
|
+
return [normalize_frontmatter_value(item) for item in value]
|
|
82
|
+
|
|
83
|
+
# Recursively process dicts (preserve as dict, normalize values)
|
|
84
|
+
if isinstance(value, dict):
|
|
85
|
+
return {key: normalize_frontmatter_value(val) for key, val in value.items()}
|
|
86
|
+
|
|
87
|
+
# Keep strings and None as-is
|
|
88
|
+
return value
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def normalize_frontmatter_metadata(metadata: dict) -> dict:
|
|
92
|
+
"""Normalize all values in frontmatter metadata dict.
|
|
93
|
+
|
|
94
|
+
Converts date/datetime objects to ISO format strings to prevent
|
|
95
|
+
AttributeError when code expects strings (GitHub issue #236).
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
metadata: The frontmatter metadata dictionary
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
A new dictionary with all values normalized
|
|
102
|
+
"""
|
|
103
|
+
return {key: normalize_frontmatter_value(value) for key, value in metadata.items()}
|
|
104
|
+
|
|
105
|
+
|
|
26
106
|
@dataclass
|
|
27
107
|
class EntityContent:
|
|
28
108
|
content: str
|
|
@@ -56,11 +136,11 @@ def parse(content: str) -> EntityContent:
|
|
|
56
136
|
)
|
|
57
137
|
|
|
58
138
|
|
|
59
|
-
def parse_tags(tags: Any) -> list[str]:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
139
|
+
# def parse_tags(tags: Any) -> list[str]:
|
|
140
|
+
# """Parse tags into list of strings."""
|
|
141
|
+
# if isinstance(tags, (list, tuple)):
|
|
142
|
+
# return [str(t).strip() for t in tags if str(t).strip()]
|
|
143
|
+
# return [t.strip() for t in tags.split(",") if t.strip()]
|
|
64
144
|
|
|
65
145
|
|
|
66
146
|
class EntityParser:
|
|
@@ -88,33 +168,112 @@ class EntityParser:
|
|
|
88
168
|
return parsed
|
|
89
169
|
return None
|
|
90
170
|
|
|
91
|
-
async def parse_file(self,
|
|
171
|
+
async def parse_file(self, path: Path | str) -> EntityMarkdown:
|
|
92
172
|
"""Parse markdown file into EntityMarkdown."""
|
|
93
173
|
|
|
94
|
-
|
|
174
|
+
# Check if the path is already absolute
|
|
175
|
+
if (
|
|
176
|
+
isinstance(path, Path)
|
|
177
|
+
and path.is_absolute()
|
|
178
|
+
or (isinstance(path, str) and Path(path).is_absolute())
|
|
179
|
+
):
|
|
180
|
+
absolute_path = Path(path)
|
|
181
|
+
else:
|
|
182
|
+
absolute_path = self.get_file_path(path)
|
|
183
|
+
|
|
95
184
|
# Parse frontmatter and content using python-frontmatter
|
|
96
|
-
|
|
185
|
+
file_content = absolute_path.read_text(encoding="utf-8")
|
|
186
|
+
return await self.parse_file_content(absolute_path, file_content)
|
|
97
187
|
|
|
98
|
-
|
|
99
|
-
|
|
188
|
+
def get_file_path(self, path):
|
|
189
|
+
"""Get absolute path for a file using the base path for the project."""
|
|
190
|
+
return self.base_path / path
|
|
100
191
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
metadata["type"] = post.metadata.get("type", "note")
|
|
104
|
-
metadata["tags"] = parse_tags(post.metadata.get("tags", []))
|
|
192
|
+
async def parse_file_content(self, absolute_path, file_content):
|
|
193
|
+
"""Parse markdown content from file stats.
|
|
105
194
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
195
|
+
Delegates to parse_markdown_content() for actual parsing logic.
|
|
196
|
+
Exists for backwards compatibility with code that passes file paths.
|
|
197
|
+
"""
|
|
198
|
+
# Extract file stat info for timestamps
|
|
199
|
+
file_stats = absolute_path.stat()
|
|
200
|
+
|
|
201
|
+
# Delegate to parse_markdown_content with timestamps from file stats
|
|
202
|
+
return await self.parse_markdown_content(
|
|
203
|
+
file_path=absolute_path,
|
|
204
|
+
content=file_content,
|
|
205
|
+
mtime=file_stats.st_mtime,
|
|
206
|
+
ctime=file_stats.st_ctime,
|
|
109
207
|
)
|
|
110
208
|
|
|
209
|
+
async def parse_markdown_content(
|
|
210
|
+
self,
|
|
211
|
+
file_path: Path,
|
|
212
|
+
content: str,
|
|
213
|
+
mtime: Optional[float] = None,
|
|
214
|
+
ctime: Optional[float] = None,
|
|
215
|
+
) -> EntityMarkdown:
|
|
216
|
+
"""Parse markdown content without requiring file to exist on disk.
|
|
217
|
+
|
|
218
|
+
Useful for parsing content from S3 or other remote sources where the file
|
|
219
|
+
is not available locally.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
file_path: Path for metadata (doesn't need to exist on disk)
|
|
223
|
+
content: Markdown content as string
|
|
224
|
+
mtime: Optional modification time (Unix timestamp)
|
|
225
|
+
ctime: Optional creation time (Unix timestamp)
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
EntityMarkdown with parsed content
|
|
229
|
+
"""
|
|
230
|
+
# Strip BOM before parsing (can be present in files from Windows or certain sources)
|
|
231
|
+
# See issue #452
|
|
232
|
+
from basic_memory.file_utils import strip_bom
|
|
233
|
+
|
|
234
|
+
content = strip_bom(content)
|
|
235
|
+
|
|
236
|
+
# Parse frontmatter with proper error handling for malformed YAML
|
|
237
|
+
try:
|
|
238
|
+
post = frontmatter.loads(content)
|
|
239
|
+
except yaml.YAMLError as e:
|
|
240
|
+
logger.warning(
|
|
241
|
+
f"Failed to parse YAML frontmatter in {file_path}: {e}. "
|
|
242
|
+
f"Treating file as plain markdown without frontmatter."
|
|
243
|
+
)
|
|
244
|
+
post = frontmatter.Post(content, metadata={})
|
|
245
|
+
|
|
246
|
+
# Normalize frontmatter values
|
|
247
|
+
metadata = normalize_frontmatter_metadata(post.metadata)
|
|
248
|
+
|
|
249
|
+
# Ensure required fields have defaults
|
|
250
|
+
title = metadata.get("title")
|
|
251
|
+
if not title or title == "None":
|
|
252
|
+
metadata["title"] = file_path.stem
|
|
253
|
+
else:
|
|
254
|
+
metadata["title"] = title
|
|
255
|
+
|
|
256
|
+
entity_type = metadata.get("type")
|
|
257
|
+
metadata["type"] = entity_type if entity_type is not None else "note"
|
|
258
|
+
|
|
259
|
+
tags = parse_tags(metadata.get("tags", [])) # pyright: ignore
|
|
260
|
+
if tags:
|
|
261
|
+
metadata["tags"] = tags
|
|
262
|
+
|
|
263
|
+
# Parse content for observations and relations
|
|
264
|
+
entity_frontmatter = EntityFrontmatter(metadata=metadata)
|
|
111
265
|
entity_content = parse(post.content)
|
|
112
266
|
|
|
267
|
+
# Use provided timestamps or current time as fallback
|
|
268
|
+
now = datetime.now().astimezone()
|
|
269
|
+
created = datetime.fromtimestamp(ctime).astimezone() if ctime else now
|
|
270
|
+
modified = datetime.fromtimestamp(mtime).astimezone() if mtime else now
|
|
271
|
+
|
|
113
272
|
return EntityMarkdown(
|
|
114
273
|
frontmatter=entity_frontmatter,
|
|
115
274
|
content=post.content,
|
|
116
275
|
observations=entity_content.observations,
|
|
117
276
|
relations=entity_content.relations,
|
|
118
|
-
created=
|
|
119
|
-
modified=
|
|
277
|
+
created=created,
|
|
278
|
+
modified=modified,
|
|
120
279
|
)
|
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import Optional
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
3
|
from collections import OrderedDict
|
|
4
4
|
|
|
5
|
-
import frontmatter
|
|
6
5
|
from frontmatter import Post
|
|
7
6
|
from loguru import logger
|
|
8
7
|
|
|
8
|
+
|
|
9
9
|
from basic_memory import file_utils
|
|
10
|
+
from basic_memory.file_utils import dump_frontmatter
|
|
10
11
|
from basic_memory.markdown.entity_parser import EntityParser
|
|
11
12
|
from basic_memory.markdown.schemas import EntityMarkdown, Observation, Relation
|
|
12
13
|
|
|
14
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
15
|
+
from basic_memory.config import BasicMemoryConfig
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
class DirtyFileError(Exception):
|
|
15
19
|
"""Raised when attempting to write to a file that has been modified."""
|
|
@@ -35,9 +39,14 @@ class MarkdownProcessor:
|
|
|
35
39
|
3. Track schema changes (that's done by the database)
|
|
36
40
|
"""
|
|
37
41
|
|
|
38
|
-
def __init__(
|
|
39
|
-
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
entity_parser: EntityParser,
|
|
45
|
+
app_config: Optional["BasicMemoryConfig"] = None,
|
|
46
|
+
):
|
|
47
|
+
"""Initialize processor with parser and optional config."""
|
|
40
48
|
self.entity_parser = entity_parser
|
|
49
|
+
self.app_config = app_config
|
|
41
50
|
|
|
42
51
|
async def read_file(self, path: Path) -> EntityMarkdown:
|
|
43
52
|
"""Read and parse file into EntityMarkdown schema.
|
|
@@ -83,7 +92,7 @@ class MarkdownProcessor:
|
|
|
83
92
|
"""
|
|
84
93
|
# Dirty check if needed
|
|
85
94
|
if expected_checksum is not None:
|
|
86
|
-
current_content = path.read_text()
|
|
95
|
+
current_content = path.read_text(encoding="utf-8")
|
|
87
96
|
current_checksum = await file_utils.compute_checksum(current_content)
|
|
88
97
|
if current_checksum != expected_checksum:
|
|
89
98
|
raise DirtyFileError(f"File {path} has been modified")
|
|
@@ -115,14 +124,68 @@ class MarkdownProcessor:
|
|
|
115
124
|
|
|
116
125
|
# Create Post object for frontmatter
|
|
117
126
|
post = Post(content, **frontmatter_dict)
|
|
118
|
-
final_content =
|
|
127
|
+
final_content = dump_frontmatter(post)
|
|
119
128
|
|
|
120
129
|
logger.debug(f"writing file {path} with content:\n{final_content}")
|
|
121
130
|
|
|
122
131
|
# Write atomically and return checksum of updated file
|
|
123
132
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
124
133
|
await file_utils.write_file_atomic(path, final_content)
|
|
125
|
-
|
|
134
|
+
|
|
135
|
+
# Format file if configured (MarkdownProcessor always handles markdown files)
|
|
136
|
+
content_for_checksum = final_content
|
|
137
|
+
if self.app_config:
|
|
138
|
+
formatted_content = await file_utils.format_file( # pragma: no cover
|
|
139
|
+
path, self.app_config, is_markdown=True
|
|
140
|
+
)
|
|
141
|
+
if formatted_content is not None: # pragma: no cover
|
|
142
|
+
content_for_checksum = formatted_content # pragma: no cover
|
|
143
|
+
|
|
144
|
+
return await file_utils.compute_checksum(content_for_checksum)
|
|
145
|
+
|
|
146
|
+
def to_markdown_string(self, markdown: EntityMarkdown) -> str:
|
|
147
|
+
"""Convert EntityMarkdown to markdown string with frontmatter.
|
|
148
|
+
|
|
149
|
+
This method handles serialization only - it does not write to files.
|
|
150
|
+
Use FileService.write_file() to persist the output.
|
|
151
|
+
|
|
152
|
+
This enables cloud environments to override file operations via
|
|
153
|
+
dependency injection while reusing the serialization logic.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
markdown: EntityMarkdown schema to serialize
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Complete markdown string with frontmatter, content, and structured sections
|
|
160
|
+
"""
|
|
161
|
+
# Convert frontmatter to dict
|
|
162
|
+
frontmatter_dict = OrderedDict()
|
|
163
|
+
frontmatter_dict["title"] = markdown.frontmatter.title
|
|
164
|
+
frontmatter_dict["type"] = markdown.frontmatter.type
|
|
165
|
+
frontmatter_dict["permalink"] = markdown.frontmatter.permalink
|
|
166
|
+
|
|
167
|
+
metadata = markdown.frontmatter.metadata or {}
|
|
168
|
+
for k, v in metadata.items():
|
|
169
|
+
frontmatter_dict[k] = v
|
|
170
|
+
|
|
171
|
+
# Start with user content (or minimal title for new files)
|
|
172
|
+
content = markdown.content or f"# {markdown.frontmatter.title}\n"
|
|
173
|
+
|
|
174
|
+
# Add structured sections with proper spacing
|
|
175
|
+
content = content.rstrip() # Remove trailing whitespace
|
|
176
|
+
|
|
177
|
+
# Add a blank line if we have semantic content
|
|
178
|
+
if markdown.observations or markdown.relations:
|
|
179
|
+
content += "\n"
|
|
180
|
+
|
|
181
|
+
if markdown.observations:
|
|
182
|
+
content += self.format_observations(markdown.observations)
|
|
183
|
+
if markdown.relations:
|
|
184
|
+
content += self.format_relations(markdown.relations)
|
|
185
|
+
|
|
186
|
+
# Create Post object for frontmatter
|
|
187
|
+
post = Post(content, **frontmatter_dict)
|
|
188
|
+
return dump_frontmatter(post)
|
|
126
189
|
|
|
127
190
|
def format_observations(self, observations: list[Observation]) -> str:
|
|
128
191
|
"""Format observations section in standard way.
|
basic_memory/markdown/plugins.py
CHANGED
|
@@ -8,34 +8,52 @@ from markdown_it.token import Token
|
|
|
8
8
|
# Observation handling functions
|
|
9
9
|
def is_observation(token: Token) -> bool:
|
|
10
10
|
"""Check if token looks like our observation format."""
|
|
11
|
+
import re
|
|
12
|
+
|
|
11
13
|
if token.type != "inline": # pragma: no cover
|
|
12
14
|
return False
|
|
13
|
-
|
|
14
|
-
content = token.content.strip()
|
|
15
|
+
# Use token.tag which contains the actual content for test tokens, fallback to content
|
|
16
|
+
content = (token.tag or token.content).strip()
|
|
15
17
|
if not content: # pragma: no cover
|
|
16
18
|
return False
|
|
17
|
-
|
|
18
19
|
# if it's a markdown_task, return false
|
|
19
20
|
if content.startswith("[ ]") or content.startswith("[x]") or content.startswith("[-]"):
|
|
20
21
|
return False
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
# Exclude markdown links: [text](url)
|
|
24
|
+
if re.match(r"^\[.*?\]\(.*?\)$", content):
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
# Exclude wiki links: [[text]]
|
|
28
|
+
if re.match(r"^\[\[.*?\]\]$", content):
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
# Check for proper observation format: [category] content
|
|
32
|
+
match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
|
|
33
|
+
# Check for standalone hashtags (words starting with #)
|
|
34
|
+
# This excludes # in HTML attributes like color="#4285F4"
|
|
35
|
+
has_tags = any(part.startswith("#") for part in content.split())
|
|
36
|
+
return bool(match) or has_tags
|
|
25
37
|
|
|
26
38
|
|
|
27
39
|
def parse_observation(token: Token) -> Dict[str, Any]:
|
|
28
40
|
"""Extract observation parts from token."""
|
|
29
|
-
|
|
30
|
-
|
|
41
|
+
import re
|
|
42
|
+
|
|
43
|
+
# Use token.tag which contains the actual content for test tokens, fallback to content
|
|
44
|
+
content = (token.tag or token.content).strip()
|
|
31
45
|
|
|
32
|
-
# Parse [category]
|
|
46
|
+
# Parse [category] with regex
|
|
47
|
+
match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
|
|
33
48
|
category = None
|
|
34
|
-
if
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
49
|
+
if match:
|
|
50
|
+
category = match.group(1).strip()
|
|
51
|
+
content = match.group(2).strip()
|
|
52
|
+
else:
|
|
53
|
+
# Handle empty brackets [] followed by content
|
|
54
|
+
empty_match = re.match(r"^\[\]\s+(.+)", content)
|
|
55
|
+
if empty_match:
|
|
56
|
+
content = empty_match.group(1).strip()
|
|
39
57
|
|
|
40
58
|
# Parse (context)
|
|
41
59
|
context = None
|
|
@@ -50,9 +68,7 @@ def parse_observation(token: Token) -> Dict[str, Any]:
|
|
|
50
68
|
parts = content.split()
|
|
51
69
|
for part in parts:
|
|
52
70
|
if part.startswith("#"):
|
|
53
|
-
# Handle multiple #tags stuck together
|
|
54
71
|
if "#" in part[1:]:
|
|
55
|
-
# Split on # but keep non-empty tags
|
|
56
72
|
subtags = [t for t in part.split("#") if t]
|
|
57
73
|
tags.extend(subtags)
|
|
58
74
|
else:
|
|
@@ -72,14 +88,16 @@ def is_explicit_relation(token: Token) -> bool:
|
|
|
72
88
|
if token.type != "inline": # pragma: no cover
|
|
73
89
|
return False
|
|
74
90
|
|
|
75
|
-
|
|
91
|
+
# Use token.tag which contains the actual content for test tokens, fallback to content
|
|
92
|
+
content = (token.tag or token.content).strip()
|
|
76
93
|
return "[[" in content and "]]" in content
|
|
77
94
|
|
|
78
95
|
|
|
79
96
|
def parse_relation(token: Token) -> Dict[str, Any] | None:
|
|
80
97
|
"""Extract relation parts from token."""
|
|
81
98
|
# Remove bullet point if present
|
|
82
|
-
|
|
99
|
+
# Use token.tag which contains the actual content for test tokens, fallback to content
|
|
100
|
+
content = (token.tag or token.content).strip()
|
|
83
101
|
|
|
84
102
|
# Extract [[target]]
|
|
85
103
|
target = None
|
|
@@ -144,7 +162,7 @@ def parse_inline_relations(content: str) -> List[Dict[str, Any]]:
|
|
|
144
162
|
|
|
145
163
|
target = content[start + 2 : end].strip()
|
|
146
164
|
if target:
|
|
147
|
-
relations.append({"type": "
|
|
165
|
+
relations.append({"type": "links_to", "target": target, "context": None})
|
|
148
166
|
|
|
149
167
|
start = end + 2
|
|
150
168
|
|
|
@@ -213,10 +231,12 @@ def relation_plugin(md: MarkdownIt) -> None:
|
|
|
213
231
|
token.meta["relations"] = [rel]
|
|
214
232
|
|
|
215
233
|
# Always check for inline links in any text
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
if
|
|
219
|
-
|
|
234
|
+
else:
|
|
235
|
+
content = token.tag or token.content
|
|
236
|
+
if "[[" in content:
|
|
237
|
+
rels = parse_inline_relations(content)
|
|
238
|
+
if rels:
|
|
239
|
+
token.meta["relations"] = token.meta.get("relations", []) + rels
|
|
220
240
|
|
|
221
241
|
# Add the rule after inline processing
|
|
222
242
|
md.core.ruler.after("inline", "relations", relation_rule)
|
basic_memory/markdown/schemas.py
CHANGED
|
@@ -42,7 +42,7 @@ class EntityFrontmatter(BaseModel):
|
|
|
42
42
|
|
|
43
43
|
@property
|
|
44
44
|
def tags(self) -> List[str]:
|
|
45
|
-
return self.metadata.get("tags") if self.metadata else
|
|
45
|
+
return self.metadata.get("tags") if self.metadata else None # pyright: ignore
|
|
46
46
|
|
|
47
47
|
@property
|
|
48
48
|
def title(self) -> str:
|
basic_memory/markdown/utils.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
1
1
|
"""Utilities for converting between markdown and entity models."""
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
5
6
|
|
|
6
7
|
from frontmatter import Post
|
|
7
8
|
|
|
9
|
+
from basic_memory.file_utils import has_frontmatter, remove_frontmatter, parse_frontmatter
|
|
8
10
|
from basic_memory.markdown import EntityMarkdown
|
|
9
|
-
from basic_memory.models import Entity
|
|
10
|
-
from basic_memory.
|
|
11
|
+
from basic_memory.models import Entity
|
|
12
|
+
from basic_memory.models import Observation as ObservationModel
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
def entity_model_from_markdown(
|
|
14
|
-
file_path: Path,
|
|
16
|
+
file_path: Path,
|
|
17
|
+
markdown: EntityMarkdown,
|
|
18
|
+
entity: Optional[Entity] = None,
|
|
19
|
+
project_id: Optional[int] = None,
|
|
15
20
|
) -> Entity:
|
|
16
21
|
"""
|
|
17
22
|
Convert markdown entity to model. Does not include relations.
|
|
@@ -20,6 +25,7 @@ def entity_model_from_markdown(
|
|
|
20
25
|
file_path: Path to the markdown file
|
|
21
26
|
markdown: Parsed markdown entity
|
|
22
27
|
entity: Optional existing entity to update
|
|
28
|
+
project_id: Project ID for new observations (uses entity.project_id if not provided)
|
|
23
29
|
|
|
24
30
|
Returns:
|
|
25
31
|
Entity model populated from markdown
|
|
@@ -31,17 +37,16 @@ def entity_model_from_markdown(
|
|
|
31
37
|
if not markdown.created or not markdown.modified: # pragma: no cover
|
|
32
38
|
raise ValueError("Both created and modified dates are required in markdown")
|
|
33
39
|
|
|
34
|
-
# Generate permalink if not provided
|
|
35
|
-
permalink = markdown.frontmatter.permalink or generate_permalink(file_path)
|
|
36
|
-
|
|
37
40
|
# Create or update entity
|
|
38
41
|
model = entity or Entity()
|
|
39
42
|
|
|
40
43
|
# Update basic fields
|
|
41
44
|
model.title = markdown.frontmatter.title
|
|
42
45
|
model.entity_type = markdown.frontmatter.type
|
|
43
|
-
|
|
44
|
-
|
|
46
|
+
# Only update permalink if it exists in frontmatter, otherwise preserve existing
|
|
47
|
+
if markdown.frontmatter.permalink is not None:
|
|
48
|
+
model.permalink = markdown.frontmatter.permalink
|
|
49
|
+
model.file_path = file_path.as_posix()
|
|
45
50
|
model.content_type = "text/markdown"
|
|
46
51
|
model.created_at = markdown.created
|
|
47
52
|
model.updated_at = markdown.modified
|
|
@@ -50,9 +55,13 @@ def entity_model_from_markdown(
|
|
|
50
55
|
metadata = markdown.frontmatter.metadata or {}
|
|
51
56
|
model.entity_metadata = {k: str(v) for k, v in metadata.items() if v is not None}
|
|
52
57
|
|
|
58
|
+
# Get project_id from entity if not provided
|
|
59
|
+
obs_project_id = project_id or (model.project_id if hasattr(model, "project_id") else None)
|
|
60
|
+
|
|
53
61
|
# Convert observations
|
|
54
62
|
model.observations = [
|
|
55
63
|
ObservationModel(
|
|
64
|
+
project_id=obs_project_id,
|
|
56
65
|
content=obs.content,
|
|
57
66
|
category=obs.category,
|
|
58
67
|
context=obs.context,
|
|
@@ -76,18 +85,33 @@ async def schema_to_markdown(schema: Any) -> Post:
|
|
|
76
85
|
"""
|
|
77
86
|
# Extract content and metadata
|
|
78
87
|
content = schema.content or ""
|
|
79
|
-
|
|
88
|
+
entity_metadata = dict(schema.entity_metadata or {})
|
|
89
|
+
|
|
90
|
+
# if the content contains frontmatter, remove it and merge
|
|
91
|
+
if has_frontmatter(content):
|
|
92
|
+
content_frontmatter = parse_frontmatter(content)
|
|
93
|
+
content = remove_frontmatter(content)
|
|
94
|
+
|
|
95
|
+
# Merge content frontmatter with entity metadata
|
|
96
|
+
# (entity_metadata takes precedence for conflicts)
|
|
97
|
+
content_frontmatter.update(entity_metadata)
|
|
98
|
+
entity_metadata = content_frontmatter
|
|
80
99
|
|
|
81
100
|
# Remove special fields for ordered frontmatter
|
|
82
101
|
for field in ["type", "title", "permalink"]:
|
|
83
|
-
|
|
102
|
+
entity_metadata.pop(field, None)
|
|
84
103
|
|
|
85
|
-
# Create Post with ordered
|
|
104
|
+
# Create Post with fields ordered by insert order
|
|
86
105
|
post = Post(
|
|
87
106
|
content,
|
|
88
107
|
title=schema.title,
|
|
89
108
|
type=schema.entity_type,
|
|
90
|
-
permalink=schema.permalink,
|
|
91
|
-
**frontmatter_metadata,
|
|
92
109
|
)
|
|
110
|
+
# set the permalink if passed in
|
|
111
|
+
if schema.permalink:
|
|
112
|
+
post.metadata["permalink"] = schema.permalink
|
|
113
|
+
|
|
114
|
+
if entity_metadata:
|
|
115
|
+
post.metadata.update(entity_metadata)
|
|
116
|
+
|
|
93
117
|
return post
|