basic-memory 0.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. basic_memory/__init__.py +7 -0
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +185 -0
  4. basic_memory/alembic/migrations.py +24 -0
  5. basic_memory/alembic/script.py.mako +26 -0
  6. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  7. basic_memory/alembic/versions/3dae7c7b1564_initial_schema.py +93 -0
  8. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  9. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  10. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  11. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  12. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  13. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  14. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  15. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  16. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  17. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  18. basic_memory/api/__init__.py +5 -0
  19. basic_memory/api/app.py +131 -0
  20. basic_memory/api/routers/__init__.py +11 -0
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +318 -0
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +90 -0
  26. basic_memory/api/routers/project_router.py +448 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +249 -0
  29. basic_memory/api/routers/search_router.py +36 -0
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +182 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +413 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +342 -0
  39. basic_memory/api/v2/routers/prompt_router.py +270 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/__init__.py +1 -0
  43. basic_memory/cli/app.py +84 -0
  44. basic_memory/cli/auth.py +277 -0
  45. basic_memory/cli/commands/__init__.py +18 -0
  46. basic_memory/cli/commands/cloud/__init__.py +6 -0
  47. basic_memory/cli/commands/cloud/api_client.py +112 -0
  48. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  49. basic_memory/cli/commands/cloud/cloud_utils.py +101 -0
  50. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  51. basic_memory/cli/commands/cloud/rclone_commands.py +371 -0
  52. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  53. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  54. basic_memory/cli/commands/cloud/upload.py +233 -0
  55. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  56. basic_memory/cli/commands/command_utils.py +77 -0
  57. basic_memory/cli/commands/db.py +44 -0
  58. basic_memory/cli/commands/format.py +198 -0
  59. basic_memory/cli/commands/import_chatgpt.py +84 -0
  60. basic_memory/cli/commands/import_claude_conversations.py +87 -0
  61. basic_memory/cli/commands/import_claude_projects.py +86 -0
  62. basic_memory/cli/commands/import_memory_json.py +87 -0
  63. basic_memory/cli/commands/mcp.py +76 -0
  64. basic_memory/cli/commands/project.py +889 -0
  65. basic_memory/cli/commands/status.py +174 -0
  66. basic_memory/cli/commands/telemetry.py +81 -0
  67. basic_memory/cli/commands/tool.py +341 -0
  68. basic_memory/cli/main.py +28 -0
  69. basic_memory/config.py +616 -0
  70. basic_memory/db.py +394 -0
  71. basic_memory/deps.py +705 -0
  72. basic_memory/file_utils.py +478 -0
  73. basic_memory/ignore_utils.py +297 -0
  74. basic_memory/importers/__init__.py +27 -0
  75. basic_memory/importers/base.py +79 -0
  76. basic_memory/importers/chatgpt_importer.py +232 -0
  77. basic_memory/importers/claude_conversations_importer.py +180 -0
  78. basic_memory/importers/claude_projects_importer.py +148 -0
  79. basic_memory/importers/memory_json_importer.py +108 -0
  80. basic_memory/importers/utils.py +61 -0
  81. basic_memory/markdown/__init__.py +21 -0
  82. basic_memory/markdown/entity_parser.py +279 -0
  83. basic_memory/markdown/markdown_processor.py +160 -0
  84. basic_memory/markdown/plugins.py +242 -0
  85. basic_memory/markdown/schemas.py +70 -0
  86. basic_memory/markdown/utils.py +117 -0
  87. basic_memory/mcp/__init__.py +1 -0
  88. basic_memory/mcp/async_client.py +139 -0
  89. basic_memory/mcp/project_context.py +141 -0
  90. basic_memory/mcp/prompts/__init__.py +19 -0
  91. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  92. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  93. basic_memory/mcp/prompts/recent_activity.py +188 -0
  94. basic_memory/mcp/prompts/search.py +57 -0
  95. basic_memory/mcp/prompts/utils.py +162 -0
  96. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  97. basic_memory/mcp/resources/project_info.py +71 -0
  98. basic_memory/mcp/server.py +81 -0
  99. basic_memory/mcp/tools/__init__.py +48 -0
  100. basic_memory/mcp/tools/build_context.py +120 -0
  101. basic_memory/mcp/tools/canvas.py +152 -0
  102. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  103. basic_memory/mcp/tools/delete_note.py +242 -0
  104. basic_memory/mcp/tools/edit_note.py +324 -0
  105. basic_memory/mcp/tools/list_directory.py +168 -0
  106. basic_memory/mcp/tools/move_note.py +551 -0
  107. basic_memory/mcp/tools/project_management.py +201 -0
  108. basic_memory/mcp/tools/read_content.py +281 -0
  109. basic_memory/mcp/tools/read_note.py +267 -0
  110. basic_memory/mcp/tools/recent_activity.py +534 -0
  111. basic_memory/mcp/tools/search.py +385 -0
  112. basic_memory/mcp/tools/utils.py +540 -0
  113. basic_memory/mcp/tools/view_note.py +78 -0
  114. basic_memory/mcp/tools/write_note.py +230 -0
  115. basic_memory/models/__init__.py +15 -0
  116. basic_memory/models/base.py +10 -0
  117. basic_memory/models/knowledge.py +226 -0
  118. basic_memory/models/project.py +87 -0
  119. basic_memory/models/search.py +85 -0
  120. basic_memory/repository/__init__.py +11 -0
  121. basic_memory/repository/entity_repository.py +503 -0
  122. basic_memory/repository/observation_repository.py +73 -0
  123. basic_memory/repository/postgres_search_repository.py +379 -0
  124. basic_memory/repository/project_info_repository.py +10 -0
  125. basic_memory/repository/project_repository.py +128 -0
  126. basic_memory/repository/relation_repository.py +146 -0
  127. basic_memory/repository/repository.py +385 -0
  128. basic_memory/repository/search_index_row.py +95 -0
  129. basic_memory/repository/search_repository.py +94 -0
  130. basic_memory/repository/search_repository_base.py +241 -0
  131. basic_memory/repository/sqlite_search_repository.py +439 -0
  132. basic_memory/schemas/__init__.py +86 -0
  133. basic_memory/schemas/base.py +297 -0
  134. basic_memory/schemas/cloud.py +50 -0
  135. basic_memory/schemas/delete.py +37 -0
  136. basic_memory/schemas/directory.py +30 -0
  137. basic_memory/schemas/importer.py +35 -0
  138. basic_memory/schemas/memory.py +285 -0
  139. basic_memory/schemas/project_info.py +212 -0
  140. basic_memory/schemas/prompt.py +90 -0
  141. basic_memory/schemas/request.py +112 -0
  142. basic_memory/schemas/response.py +229 -0
  143. basic_memory/schemas/search.py +117 -0
  144. basic_memory/schemas/sync_report.py +72 -0
  145. basic_memory/schemas/v2/__init__.py +27 -0
  146. basic_memory/schemas/v2/entity.py +129 -0
  147. basic_memory/schemas/v2/resource.py +46 -0
  148. basic_memory/services/__init__.py +8 -0
  149. basic_memory/services/context_service.py +601 -0
  150. basic_memory/services/directory_service.py +308 -0
  151. basic_memory/services/entity_service.py +864 -0
  152. basic_memory/services/exceptions.py +37 -0
  153. basic_memory/services/file_service.py +541 -0
  154. basic_memory/services/initialization.py +216 -0
  155. basic_memory/services/link_resolver.py +121 -0
  156. basic_memory/services/project_service.py +880 -0
  157. basic_memory/services/search_service.py +404 -0
  158. basic_memory/services/service.py +15 -0
  159. basic_memory/sync/__init__.py +6 -0
  160. basic_memory/sync/background_sync.py +26 -0
  161. basic_memory/sync/sync_service.py +1259 -0
  162. basic_memory/sync/watch_service.py +510 -0
  163. basic_memory/telemetry.py +249 -0
  164. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  165. basic_memory/templates/prompts/search.hbs +101 -0
  166. basic_memory/utils.py +468 -0
  167. basic_memory-0.17.1.dist-info/METADATA +617 -0
  168. basic_memory-0.17.1.dist-info/RECORD +171 -0
  169. basic_memory-0.17.1.dist-info/WHEEL +4 -0
  170. basic_memory-0.17.1.dist-info/entry_points.txt +3 -0
  171. basic_memory-0.17.1.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,279 @@
1
+ """Parser for markdown files into Entity objects.
2
+
3
+ Uses markdown-it with plugins to parse structured data from markdown content.
4
+ """
5
+
6
+ from dataclasses import dataclass, field
7
+ from datetime import date, datetime
8
+ from pathlib import Path
9
+ from typing import Any, Optional
10
+
11
+ import dateparser
12
+ import frontmatter
13
+ import yaml
14
+ from loguru import logger
15
+ from markdown_it import MarkdownIt
16
+
17
+ from basic_memory.markdown.plugins import observation_plugin, relation_plugin
18
+ from basic_memory.markdown.schemas import (
19
+ EntityFrontmatter,
20
+ EntityMarkdown,
21
+ Observation,
22
+ Relation,
23
+ )
24
+ from basic_memory.utils import parse_tags
25
+
26
+
27
+ md = MarkdownIt().use(observation_plugin).use(relation_plugin)
28
+
29
+
30
+ def normalize_frontmatter_value(value: Any) -> Any:
31
+ """Normalize frontmatter values to safe types for processing.
32
+
33
+ PyYAML automatically converts various string-like values into native Python types:
34
+ - Date strings ("2025-10-24") → datetime.date objects
35
+ - Numbers ("1.0") → int or float
36
+ - Booleans ("true") → bool
37
+ - Lists → list objects
38
+
39
+ This can cause AttributeError when code expects strings and calls string methods
40
+ like .strip() on these values (see GitHub issue #236).
41
+
42
+ This function normalizes all frontmatter values to safe types:
43
+ - Dates/datetimes → ISO format strings
44
+ - Numbers (int/float) → strings
45
+ - Booleans → strings ("True"/"False")
46
+ - Lists → preserved as lists, but items are recursively normalized
47
+ - Dicts → preserved as dicts, but values are recursively normalized
48
+ - Strings → kept as-is
49
+ - None → kept as None
50
+
51
+ Args:
52
+ value: The frontmatter value to normalize
53
+
54
+ Returns:
55
+ The normalized value safe for string operations
56
+
57
+ Example:
58
+ >>> normalize_frontmatter_value(datetime.date(2025, 10, 24))
59
+ '2025-10-24'
60
+ >>> normalize_frontmatter_value([datetime.date(2025, 10, 24), "tag", 123])
61
+ ['2025-10-24', 'tag', '123']
62
+ >>> normalize_frontmatter_value(True)
63
+ 'True'
64
+ """
65
+ # Convert date/datetime objects to ISO format strings
66
+ if isinstance(value, datetime):
67
+ return value.isoformat()
68
+ if isinstance(value, date):
69
+ return value.isoformat()
70
+
71
+ # Convert boolean to string (must come before int check since bool is subclass of int)
72
+ if isinstance(value, bool):
73
+ return str(value)
74
+
75
+ # Convert numbers to strings
76
+ if isinstance(value, (int, float)):
77
+ return str(value)
78
+
79
+ # Recursively process lists (preserve as list, normalize items)
80
+ if isinstance(value, list):
81
+ return [normalize_frontmatter_value(item) for item in value]
82
+
83
+ # Recursively process dicts (preserve as dict, normalize values)
84
+ if isinstance(value, dict):
85
+ return {key: normalize_frontmatter_value(val) for key, val in value.items()}
86
+
87
+ # Keep strings and None as-is
88
+ return value
89
+
90
+
91
+ def normalize_frontmatter_metadata(metadata: dict) -> dict:
92
+ """Normalize all values in frontmatter metadata dict.
93
+
94
+ Converts date/datetime objects to ISO format strings to prevent
95
+ AttributeError when code expects strings (GitHub issue #236).
96
+
97
+ Args:
98
+ metadata: The frontmatter metadata dictionary
99
+
100
+ Returns:
101
+ A new dictionary with all values normalized
102
+ """
103
+ return {key: normalize_frontmatter_value(value) for key, value in metadata.items()}
104
+
105
+
106
+ @dataclass
107
+ class EntityContent:
108
+ content: str
109
+ observations: list[Observation] = field(default_factory=list)
110
+ relations: list[Relation] = field(default_factory=list)
111
+
112
+
113
+ def parse(content: str) -> EntityContent:
114
+ """Parse markdown content into EntityMarkdown."""
115
+
116
+ # Parse content for observations and relations using markdown-it
117
+ observations = []
118
+ relations = []
119
+
120
+ if content:
121
+ for token in md.parse(content):
122
+ # check for observations and relations
123
+ if token.meta:
124
+ if "observation" in token.meta:
125
+ obs = token.meta["observation"]
126
+ observation = Observation.model_validate(obs)
127
+ observations.append(observation)
128
+ if "relations" in token.meta:
129
+ rels = token.meta["relations"]
130
+ relations.extend([Relation.model_validate(r) for r in rels])
131
+
132
+ return EntityContent(
133
+ content=content,
134
+ observations=observations,
135
+ relations=relations,
136
+ )
137
+
138
+
139
+ # def parse_tags(tags: Any) -> list[str]:
140
+ # """Parse tags into list of strings."""
141
+ # if isinstance(tags, (list, tuple)):
142
+ # return [str(t).strip() for t in tags if str(t).strip()]
143
+ # return [t.strip() for t in tags.split(",") if t.strip()]
144
+
145
+
146
+ class EntityParser:
147
+ """Parser for markdown files into Entity objects."""
148
+
149
+ def __init__(self, base_path: Path):
150
+ """Initialize parser with base path for relative permalink generation."""
151
+ self.base_path = base_path.resolve()
152
+
153
+ def parse_date(self, value: Any) -> Optional[datetime]:
154
+ """Parse date strings using dateparser for maximum flexibility.
155
+
156
+ Supports human friendly formats like:
157
+ - 2024-01-15
158
+ - Jan 15, 2024
159
+ - 2024-01-15 10:00 AM
160
+ - yesterday
161
+ - 2 days ago
162
+ """
163
+ if isinstance(value, datetime):
164
+ return value
165
+ if isinstance(value, str):
166
+ parsed = dateparser.parse(value)
167
+ if parsed:
168
+ return parsed
169
+ return None
170
+
171
+ async def parse_file(self, path: Path | str) -> EntityMarkdown:
172
+ """Parse markdown file into EntityMarkdown."""
173
+
174
+ # Check if the path is already absolute
175
+ if (
176
+ isinstance(path, Path)
177
+ and path.is_absolute()
178
+ or (isinstance(path, str) and Path(path).is_absolute())
179
+ ):
180
+ absolute_path = Path(path)
181
+ else:
182
+ absolute_path = self.get_file_path(path)
183
+
184
+ # Parse frontmatter and content using python-frontmatter
185
+ file_content = absolute_path.read_text(encoding="utf-8")
186
+ return await self.parse_file_content(absolute_path, file_content)
187
+
188
+ def get_file_path(self, path):
189
+ """Get absolute path for a file using the base path for the project."""
190
+ return self.base_path / path
191
+
192
+ async def parse_file_content(self, absolute_path, file_content):
193
+ """Parse markdown content from file stats.
194
+
195
+ Delegates to parse_markdown_content() for actual parsing logic.
196
+ Exists for backwards compatibility with code that passes file paths.
197
+ """
198
+ # Extract file stat info for timestamps
199
+ file_stats = absolute_path.stat()
200
+
201
+ # Delegate to parse_markdown_content with timestamps from file stats
202
+ return await self.parse_markdown_content(
203
+ file_path=absolute_path,
204
+ content=file_content,
205
+ mtime=file_stats.st_mtime,
206
+ ctime=file_stats.st_ctime,
207
+ )
208
+
209
+ async def parse_markdown_content(
210
+ self,
211
+ file_path: Path,
212
+ content: str,
213
+ mtime: Optional[float] = None,
214
+ ctime: Optional[float] = None,
215
+ ) -> EntityMarkdown:
216
+ """Parse markdown content without requiring file to exist on disk.
217
+
218
+ Useful for parsing content from S3 or other remote sources where the file
219
+ is not available locally.
220
+
221
+ Args:
222
+ file_path: Path for metadata (doesn't need to exist on disk)
223
+ content: Markdown content as string
224
+ mtime: Optional modification time (Unix timestamp)
225
+ ctime: Optional creation time (Unix timestamp)
226
+
227
+ Returns:
228
+ EntityMarkdown with parsed content
229
+ """
230
+ # Strip BOM before parsing (can be present in files from Windows or certain sources)
231
+ # See issue #452
232
+ from basic_memory.file_utils import strip_bom
233
+
234
+ content = strip_bom(content)
235
+
236
+ # Parse frontmatter with proper error handling for malformed YAML
237
+ try:
238
+ post = frontmatter.loads(content)
239
+ except yaml.YAMLError as e:
240
+ logger.warning(
241
+ f"Failed to parse YAML frontmatter in {file_path}: {e}. "
242
+ f"Treating file as plain markdown without frontmatter."
243
+ )
244
+ post = frontmatter.Post(content, metadata={})
245
+
246
+ # Normalize frontmatter values
247
+ metadata = normalize_frontmatter_metadata(post.metadata)
248
+
249
+ # Ensure required fields have defaults
250
+ title = metadata.get("title")
251
+ if not title or title == "None":
252
+ metadata["title"] = file_path.stem
253
+ else:
254
+ metadata["title"] = title
255
+
256
+ entity_type = metadata.get("type")
257
+ metadata["type"] = entity_type if entity_type is not None else "note"
258
+
259
+ tags = parse_tags(metadata.get("tags", [])) # pyright: ignore
260
+ if tags:
261
+ metadata["tags"] = tags
262
+
263
+ # Parse content for observations and relations
264
+ entity_frontmatter = EntityFrontmatter(metadata=metadata)
265
+ entity_content = parse(post.content)
266
+
267
+ # Use provided timestamps or current time as fallback
268
+ now = datetime.now().astimezone()
269
+ created = datetime.fromtimestamp(ctime).astimezone() if ctime else now
270
+ modified = datetime.fromtimestamp(mtime).astimezone() if mtime else now
271
+
272
+ return EntityMarkdown(
273
+ frontmatter=entity_frontmatter,
274
+ content=post.content,
275
+ observations=entity_content.observations,
276
+ relations=entity_content.relations,
277
+ created=created,
278
+ modified=modified,
279
+ )
@@ -0,0 +1,160 @@
1
+ from pathlib import Path
2
+ from typing import TYPE_CHECKING, Optional
3
+ from collections import OrderedDict
4
+
5
+ from frontmatter import Post
6
+ from loguru import logger
7
+
8
+
9
+ from basic_memory import file_utils
10
+ from basic_memory.file_utils import dump_frontmatter
11
+ from basic_memory.markdown.entity_parser import EntityParser
12
+ from basic_memory.markdown.schemas import EntityMarkdown, Observation, Relation
13
+
14
+ if TYPE_CHECKING:
15
+ from basic_memory.config import BasicMemoryConfig
16
+
17
+
18
+ class DirtyFileError(Exception):
19
+ """Raised when attempting to write to a file that has been modified."""
20
+
21
+ pass
22
+
23
+
24
+ class MarkdownProcessor:
25
+ """Process markdown files while preserving content and structure.
26
+
27
+ used only for import
28
+
29
+ This class handles the file I/O aspects of our markdown processing. It:
30
+ 1. Uses EntityParser for reading/parsing files into our schema
31
+ 2. Handles writing files with proper frontmatter
32
+ 3. Formats structured sections (observations/relations) consistently
33
+ 4. Preserves user content exactly as written
34
+ 5. Performs atomic writes using temp files
35
+
36
+ It does NOT:
37
+ 1. Modify the schema directly (that's done by services)
38
+ 2. Handle in-place updates (everything is read->modify->write)
39
+ 3. Track schema changes (that's done by the database)
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ entity_parser: EntityParser,
45
+ app_config: Optional["BasicMemoryConfig"] = None,
46
+ ):
47
+ """Initialize processor with parser and optional config."""
48
+ self.entity_parser = entity_parser
49
+ self.app_config = app_config
50
+
51
+ async def read_file(self, path: Path) -> EntityMarkdown:
52
+ """Read and parse file into EntityMarkdown schema.
53
+
54
+ This is step 1 of our read->modify->write pattern.
55
+ We use EntityParser to handle all the markdown parsing.
56
+ """
57
+ return await self.entity_parser.parse_file(path)
58
+
59
+ async def write_file(
60
+ self,
61
+ path: Path,
62
+ markdown: EntityMarkdown,
63
+ expected_checksum: Optional[str] = None,
64
+ ) -> str:
65
+ """Write EntityMarkdown schema back to file.
66
+
67
+ This is step 3 of our read->modify->write pattern.
68
+ The entire file is rewritten atomically on each update.
69
+
70
+ File Structure:
71
+ ---
72
+ frontmatter fields
73
+ ---
74
+ user content area (preserved exactly)
75
+
76
+ ## Observations (if any)
77
+ formatted observations
78
+
79
+ ## Relations (if any)
80
+ formatted relations
81
+
82
+ Args:
83
+ path: Where to write the file
84
+ markdown: Complete schema to write
85
+ expected_checksum: If provided, verify file hasn't changed
86
+
87
+ Returns:
88
+ Checksum of written file
89
+
90
+ Raises:
91
+ DirtyFileError: If file has been modified (when expected_checksum provided)
92
+ """
93
+ # Dirty check if needed
94
+ if expected_checksum is not None:
95
+ current_content = path.read_text(encoding="utf-8")
96
+ current_checksum = await file_utils.compute_checksum(current_content)
97
+ if current_checksum != expected_checksum:
98
+ raise DirtyFileError(f"File {path} has been modified")
99
+
100
+ # Convert frontmatter to dict
101
+ frontmatter_dict = OrderedDict()
102
+ frontmatter_dict["title"] = markdown.frontmatter.title
103
+ frontmatter_dict["type"] = markdown.frontmatter.type
104
+ frontmatter_dict["permalink"] = markdown.frontmatter.permalink
105
+
106
+ metadata = markdown.frontmatter.metadata or {}
107
+ for k, v in metadata.items():
108
+ frontmatter_dict[k] = v
109
+
110
+ # Start with user content (or minimal title for new files)
111
+ content = markdown.content or f"# {markdown.frontmatter.title}\n"
112
+
113
+ # Add structured sections with proper spacing
114
+ content = content.rstrip() # Remove trailing whitespace
115
+
116
+ # add a blank line if we have semantic content
117
+ if markdown.observations or markdown.relations:
118
+ content += "\n"
119
+
120
+ if markdown.observations:
121
+ content += self.format_observations(markdown.observations)
122
+ if markdown.relations:
123
+ content += self.format_relations(markdown.relations)
124
+
125
+ # Create Post object for frontmatter
126
+ post = Post(content, **frontmatter_dict)
127
+ final_content = dump_frontmatter(post)
128
+
129
+ logger.debug(f"writing file {path} with content:\n{final_content}")
130
+
131
+ # Write atomically and return checksum of updated file
132
+ path.parent.mkdir(parents=True, exist_ok=True)
133
+ await file_utils.write_file_atomic(path, final_content)
134
+
135
+ # Format file if configured (MarkdownProcessor always handles markdown files)
136
+ content_for_checksum = final_content
137
+ if self.app_config:
138
+ formatted_content = await file_utils.format_file(
139
+ path, self.app_config, is_markdown=True
140
+ )
141
+ if formatted_content is not None:
142
+ content_for_checksum = formatted_content
143
+
144
+ return await file_utils.compute_checksum(content_for_checksum)
145
+
146
+ def format_observations(self, observations: list[Observation]) -> str:
147
+ """Format observations section in standard way.
148
+
149
+ Format: - [category] content #tag1 #tag2 (context)
150
+ """
151
+ lines = [f"{obs}" for obs in observations]
152
+ return "\n".join(lines) + "\n"
153
+
154
+ def format_relations(self, relations: list[Relation]) -> str:
155
+ """Format relations section in standard way.
156
+
157
+ Format: - relation_type [[target]] (context)
158
+ """
159
+ lines = [f"{rel}" for rel in relations]
160
+ return "\n".join(lines) + "\n"
@@ -0,0 +1,242 @@
1
+ """Markdown-it plugins for Basic Memory markdown parsing."""
2
+
3
+ from typing import List, Any, Dict
4
+ from markdown_it import MarkdownIt
5
+ from markdown_it.token import Token
6
+
7
+
8
+ # Observation handling functions
9
+ def is_observation(token: Token) -> bool:
10
+ """Check if token looks like our observation format."""
11
+ import re
12
+
13
+ if token.type != "inline": # pragma: no cover
14
+ return False
15
+ # Use token.tag which contains the actual content for test tokens, fallback to content
16
+ content = (token.tag or token.content).strip()
17
+ if not content: # pragma: no cover
18
+ return False
19
+ # if it's a markdown_task, return false
20
+ if content.startswith("[ ]") or content.startswith("[x]") or content.startswith("[-]"):
21
+ return False
22
+
23
+ # Exclude markdown links: [text](url)
24
+ if re.match(r"^\[.*?\]\(.*?\)$", content):
25
+ return False
26
+
27
+ # Exclude wiki links: [[text]]
28
+ if re.match(r"^\[\[.*?\]\]$", content):
29
+ return False
30
+
31
+ # Check for proper observation format: [category] content
32
+ match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
33
+ # Check for standalone hashtags (words starting with #)
34
+ # This excludes # in HTML attributes like color="#4285F4"
35
+ has_tags = any(part.startswith("#") for part in content.split())
36
+ return bool(match) or has_tags
37
+
38
+
39
+ def parse_observation(token: Token) -> Dict[str, Any]:
40
+ """Extract observation parts from token."""
41
+ import re
42
+
43
+ # Use token.tag which contains the actual content for test tokens, fallback to content
44
+ content = (token.tag or token.content).strip()
45
+
46
+ # Parse [category] with regex
47
+ match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
48
+ category = None
49
+ if match:
50
+ category = match.group(1).strip()
51
+ content = match.group(2).strip()
52
+ else:
53
+ # Handle empty brackets [] followed by content
54
+ empty_match = re.match(r"^\[\]\s+(.+)", content)
55
+ if empty_match:
56
+ content = empty_match.group(1).strip()
57
+
58
+ # Parse (context)
59
+ context = None
60
+ if content.endswith(")"):
61
+ start = content.rfind("(")
62
+ if start != -1:
63
+ context = content[start + 1 : -1].strip()
64
+ content = content[:start].strip()
65
+
66
+ # Extract tags and keep original content
67
+ tags = []
68
+ parts = content.split()
69
+ for part in parts:
70
+ if part.startswith("#"):
71
+ if "#" in part[1:]:
72
+ subtags = [t for t in part.split("#") if t]
73
+ tags.extend(subtags)
74
+ else:
75
+ tags.append(part[1:])
76
+
77
+ return {
78
+ "category": category,
79
+ "content": content,
80
+ "tags": tags if tags else None,
81
+ "context": context,
82
+ }
83
+
84
+
85
+ # Relation handling functions
86
+ def is_explicit_relation(token: Token) -> bool:
87
+ """Check if token looks like our relation format."""
88
+ if token.type != "inline": # pragma: no cover
89
+ return False
90
+
91
+ # Use token.tag which contains the actual content for test tokens, fallback to content
92
+ content = (token.tag or token.content).strip()
93
+ return "[[" in content and "]]" in content
94
+
95
+
96
+ def parse_relation(token: Token) -> Dict[str, Any] | None:
97
+ """Extract relation parts from token."""
98
+ # Remove bullet point if present
99
+ # Use token.tag which contains the actual content for test tokens, fallback to content
100
+ content = (token.tag or token.content).strip()
101
+
102
+ # Extract [[target]]
103
+ target = None
104
+ rel_type = "relates_to" # default
105
+ context = None
106
+
107
+ start = content.find("[[")
108
+ end = content.find("]]")
109
+
110
+ if start != -1 and end != -1:
111
+ # Get text before link as relation type
112
+ before = content[:start].strip()
113
+ if before:
114
+ rel_type = before
115
+
116
+ # Get target
117
+ target = content[start + 2 : end].strip()
118
+
119
+ # Look for context after
120
+ after = content[end + 2 :].strip()
121
+ if after.startswith("(") and after.endswith(")"):
122
+ context = after[1:-1].strip() or None
123
+
124
+ if not target: # pragma: no cover
125
+ return None
126
+
127
+ return {"type": rel_type, "target": target, "context": context}
128
+
129
+
130
+ def parse_inline_relations(content: str) -> List[Dict[str, Any]]:
131
+ """Find wiki-style links in regular content."""
132
+ relations = []
133
+ start = 0
134
+
135
+ while True:
136
+ # Find next outer-most [[
137
+ start = content.find("[[", start)
138
+ if start == -1: # pragma: no cover
139
+ break
140
+
141
+ # Find matching ]]
142
+ depth = 1
143
+ pos = start + 2
144
+ end = -1
145
+
146
+ while pos < len(content):
147
+ if content[pos : pos + 2] == "[[":
148
+ depth += 1
149
+ pos += 2
150
+ elif content[pos : pos + 2] == "]]":
151
+ depth -= 1
152
+ if depth == 0:
153
+ end = pos
154
+ break
155
+ pos += 2
156
+ else:
157
+ pos += 1
158
+
159
+ if end == -1:
160
+ # No matching ]] found
161
+ break
162
+
163
+ target = content[start + 2 : end].strip()
164
+ if target:
165
+ relations.append({"type": "links_to", "target": target, "context": None})
166
+
167
+ start = end + 2
168
+
169
+ return relations
170
+
171
+
172
+ def observation_plugin(md: MarkdownIt) -> None:
173
+ """Plugin for parsing observation format:
174
+ - [category] Content text #tag1 #tag2 (context)
175
+ - Content text #tag1 (context) # No category is also valid
176
+ """
177
+
178
+ def observation_rule(state: Any) -> None:
179
+ """Process observations in token stream."""
180
+ tokens = state.tokens
181
+
182
+ for idx in range(len(tokens)):
183
+ token = tokens[idx]
184
+
185
+ # Initialize meta for all tokens
186
+ token.meta = token.meta or {}
187
+
188
+ # Parse observations in list items
189
+ if token.type == "inline" and is_observation(token):
190
+ obs = parse_observation(token)
191
+ if obs["content"]: # Only store if we have content
192
+ token.meta["observation"] = obs
193
+
194
+ # Add the rule after inline processing
195
+ md.core.ruler.after("inline", "observations", observation_rule)
196
+
197
+
198
+ def relation_plugin(md: MarkdownIt) -> None:
199
+ """Plugin for parsing relation formats:
200
+
201
+ Explicit relations:
202
+ - relation_type [[target]] (context)
203
+
204
+ Implicit relations (links in content):
205
+ Some text with [[target]] reference
206
+ """
207
+
208
+ def relation_rule(state: Any) -> None:
209
+ """Process relations in token stream."""
210
+ tokens = state.tokens
211
+ in_list_item = False
212
+
213
+ for idx in range(len(tokens)):
214
+ token = tokens[idx]
215
+
216
+ # Track list nesting
217
+ if token.type == "list_item_open":
218
+ in_list_item = True
219
+ elif token.type == "list_item_close":
220
+ in_list_item = False
221
+
222
+ # Initialize meta for all tokens
223
+ token.meta = token.meta or {}
224
+
225
+ # Only process inline tokens
226
+ if token.type == "inline":
227
+ # Check for explicit relations in list items
228
+ if in_list_item and is_explicit_relation(token):
229
+ rel = parse_relation(token)
230
+ if rel:
231
+ token.meta["relations"] = [rel]
232
+
233
+ # Always check for inline links in any text
234
+ else:
235
+ content = token.tag or token.content
236
+ if "[[" in content:
237
+ rels = parse_inline_relations(content)
238
+ if rels:
239
+ token.meta["relations"] = token.meta.get("relations", []) + rels
240
+
241
+ # Add the rule after inline processing
242
+ md.core.ruler.after("inline", "relations", relation_rule)