basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (195) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +130 -20
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  6. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  7. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  8. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  9. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  10. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  11. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  12. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  13. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  14. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  15. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  16. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  17. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  18. basic_memory/api/app.py +87 -20
  19. basic_memory/api/container.py +133 -0
  20. basic_memory/api/routers/__init__.py +4 -1
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +180 -23
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +9 -64
  26. basic_memory/api/routers/project_router.py +460 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +136 -11
  29. basic_memory/api/routers/search_router.py +5 -5
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +181 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +359 -0
  39. basic_memory/api/v2/routers/prompt_router.py +269 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/app.py +80 -10
  43. basic_memory/cli/auth.py +300 -0
  44. basic_memory/cli/commands/__init__.py +15 -2
  45. basic_memory/cli/commands/cloud/__init__.py +6 -0
  46. basic_memory/cli/commands/cloud/api_client.py +127 -0
  47. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  48. basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
  49. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  50. basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
  51. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  52. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  53. basic_memory/cli/commands/cloud/upload.py +240 -0
  54. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  55. basic_memory/cli/commands/command_utils.py +99 -0
  56. basic_memory/cli/commands/db.py +87 -12
  57. basic_memory/cli/commands/format.py +198 -0
  58. basic_memory/cli/commands/import_chatgpt.py +47 -223
  59. basic_memory/cli/commands/import_claude_conversations.py +48 -171
  60. basic_memory/cli/commands/import_claude_projects.py +53 -160
  61. basic_memory/cli/commands/import_memory_json.py +55 -111
  62. basic_memory/cli/commands/mcp.py +67 -11
  63. basic_memory/cli/commands/project.py +889 -0
  64. basic_memory/cli/commands/status.py +52 -34
  65. basic_memory/cli/commands/telemetry.py +81 -0
  66. basic_memory/cli/commands/tool.py +341 -0
  67. basic_memory/cli/container.py +84 -0
  68. basic_memory/cli/main.py +14 -6
  69. basic_memory/config.py +580 -26
  70. basic_memory/db.py +285 -28
  71. basic_memory/deps/__init__.py +293 -0
  72. basic_memory/deps/config.py +26 -0
  73. basic_memory/deps/db.py +56 -0
  74. basic_memory/deps/importers.py +200 -0
  75. basic_memory/deps/projects.py +238 -0
  76. basic_memory/deps/repositories.py +179 -0
  77. basic_memory/deps/services.py +480 -0
  78. basic_memory/deps.py +16 -185
  79. basic_memory/file_utils.py +318 -54
  80. basic_memory/ignore_utils.py +297 -0
  81. basic_memory/importers/__init__.py +27 -0
  82. basic_memory/importers/base.py +100 -0
  83. basic_memory/importers/chatgpt_importer.py +245 -0
  84. basic_memory/importers/claude_conversations_importer.py +192 -0
  85. basic_memory/importers/claude_projects_importer.py +184 -0
  86. basic_memory/importers/memory_json_importer.py +128 -0
  87. basic_memory/importers/utils.py +61 -0
  88. basic_memory/markdown/entity_parser.py +182 -23
  89. basic_memory/markdown/markdown_processor.py +70 -7
  90. basic_memory/markdown/plugins.py +43 -23
  91. basic_memory/markdown/schemas.py +1 -1
  92. basic_memory/markdown/utils.py +38 -14
  93. basic_memory/mcp/async_client.py +135 -4
  94. basic_memory/mcp/clients/__init__.py +28 -0
  95. basic_memory/mcp/clients/directory.py +70 -0
  96. basic_memory/mcp/clients/knowledge.py +176 -0
  97. basic_memory/mcp/clients/memory.py +120 -0
  98. basic_memory/mcp/clients/project.py +89 -0
  99. basic_memory/mcp/clients/resource.py +71 -0
  100. basic_memory/mcp/clients/search.py +65 -0
  101. basic_memory/mcp/container.py +110 -0
  102. basic_memory/mcp/project_context.py +155 -0
  103. basic_memory/mcp/prompts/__init__.py +19 -0
  104. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  105. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  106. basic_memory/mcp/prompts/recent_activity.py +188 -0
  107. basic_memory/mcp/prompts/search.py +57 -0
  108. basic_memory/mcp/prompts/utils.py +162 -0
  109. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  110. basic_memory/mcp/resources/project_info.py +71 -0
  111. basic_memory/mcp/server.py +61 -9
  112. basic_memory/mcp/tools/__init__.py +33 -21
  113. basic_memory/mcp/tools/build_context.py +120 -0
  114. basic_memory/mcp/tools/canvas.py +152 -0
  115. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  116. basic_memory/mcp/tools/delete_note.py +249 -0
  117. basic_memory/mcp/tools/edit_note.py +325 -0
  118. basic_memory/mcp/tools/list_directory.py +157 -0
  119. basic_memory/mcp/tools/move_note.py +549 -0
  120. basic_memory/mcp/tools/project_management.py +204 -0
  121. basic_memory/mcp/tools/read_content.py +281 -0
  122. basic_memory/mcp/tools/read_note.py +265 -0
  123. basic_memory/mcp/tools/recent_activity.py +528 -0
  124. basic_memory/mcp/tools/search.py +377 -24
  125. basic_memory/mcp/tools/utils.py +402 -16
  126. basic_memory/mcp/tools/view_note.py +78 -0
  127. basic_memory/mcp/tools/write_note.py +230 -0
  128. basic_memory/models/__init__.py +3 -2
  129. basic_memory/models/knowledge.py +82 -17
  130. basic_memory/models/project.py +93 -0
  131. basic_memory/models/search.py +68 -8
  132. basic_memory/project_resolver.py +222 -0
  133. basic_memory/repository/__init__.py +2 -0
  134. basic_memory/repository/entity_repository.py +437 -8
  135. basic_memory/repository/observation_repository.py +36 -3
  136. basic_memory/repository/postgres_search_repository.py +451 -0
  137. basic_memory/repository/project_info_repository.py +10 -0
  138. basic_memory/repository/project_repository.py +140 -0
  139. basic_memory/repository/relation_repository.py +79 -4
  140. basic_memory/repository/repository.py +148 -29
  141. basic_memory/repository/search_index_row.py +95 -0
  142. basic_memory/repository/search_repository.py +79 -268
  143. basic_memory/repository/search_repository_base.py +241 -0
  144. basic_memory/repository/sqlite_search_repository.py +437 -0
  145. basic_memory/runtime.py +61 -0
  146. basic_memory/schemas/__init__.py +22 -9
  147. basic_memory/schemas/base.py +131 -12
  148. basic_memory/schemas/cloud.py +50 -0
  149. basic_memory/schemas/directory.py +31 -0
  150. basic_memory/schemas/importer.py +35 -0
  151. basic_memory/schemas/memory.py +194 -25
  152. basic_memory/schemas/project_info.py +213 -0
  153. basic_memory/schemas/prompt.py +90 -0
  154. basic_memory/schemas/request.py +56 -2
  155. basic_memory/schemas/response.py +85 -28
  156. basic_memory/schemas/search.py +36 -35
  157. basic_memory/schemas/sync_report.py +72 -0
  158. basic_memory/schemas/v2/__init__.py +27 -0
  159. basic_memory/schemas/v2/entity.py +133 -0
  160. basic_memory/schemas/v2/resource.py +47 -0
  161. basic_memory/services/__init__.py +2 -1
  162. basic_memory/services/context_service.py +451 -138
  163. basic_memory/services/directory_service.py +310 -0
  164. basic_memory/services/entity_service.py +636 -71
  165. basic_memory/services/exceptions.py +21 -0
  166. basic_memory/services/file_service.py +402 -33
  167. basic_memory/services/initialization.py +216 -0
  168. basic_memory/services/link_resolver.py +50 -56
  169. basic_memory/services/project_service.py +888 -0
  170. basic_memory/services/search_service.py +232 -37
  171. basic_memory/sync/__init__.py +4 -2
  172. basic_memory/sync/background_sync.py +26 -0
  173. basic_memory/sync/coordinator.py +160 -0
  174. basic_memory/sync/sync_service.py +1200 -109
  175. basic_memory/sync/watch_service.py +432 -135
  176. basic_memory/telemetry.py +249 -0
  177. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  178. basic_memory/templates/prompts/search.hbs +101 -0
  179. basic_memory/utils.py +407 -54
  180. basic_memory-0.17.4.dist-info/METADATA +617 -0
  181. basic_memory-0.17.4.dist-info/RECORD +193 -0
  182. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  183. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
  184. basic_memory/alembic/README +0 -1
  185. basic_memory/cli/commands/sync.py +0 -206
  186. basic_memory/cli/commands/tools.py +0 -157
  187. basic_memory/mcp/tools/knowledge.py +0 -68
  188. basic_memory/mcp/tools/memory.py +0 -170
  189. basic_memory/mcp/tools/notes.py +0 -202
  190. basic_memory/schemas/discovery.py +0 -28
  191. basic_memory/sync/file_change_scanner.py +0 -158
  192. basic_memory/sync/utils.py +0 -31
  193. basic_memory-0.7.0.dist-info/METADATA +0 -378
  194. basic_memory-0.7.0.dist-info/RECORD +0 -82
  195. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -4,25 +4,105 @@ Uses markdown-it with plugins to parse structured data from markdown content.
4
4
  """
5
5
 
6
6
  from dataclasses import dataclass, field
7
+ from datetime import date, datetime
7
8
  from pathlib import Path
8
- from datetime import datetime
9
9
  from typing import Any, Optional
10
- import dateparser
11
10
 
12
- from markdown_it import MarkdownIt
11
+ import dateparser
13
12
  import frontmatter
13
+ import yaml
14
+ from loguru import logger
15
+ from markdown_it import MarkdownIt
14
16
 
15
17
  from basic_memory.markdown.plugins import observation_plugin, relation_plugin
16
18
  from basic_memory.markdown.schemas import (
17
- EntityMarkdown,
18
19
  EntityFrontmatter,
20
+ EntityMarkdown,
19
21
  Observation,
20
22
  Relation,
21
23
  )
24
+ from basic_memory.utils import parse_tags
25
+
22
26
 
23
27
  md = MarkdownIt().use(observation_plugin).use(relation_plugin)
24
28
 
25
29
 
30
+ def normalize_frontmatter_value(value: Any) -> Any:
31
+ """Normalize frontmatter values to safe types for processing.
32
+
33
+ PyYAML automatically converts various string-like values into native Python types:
34
+ - Date strings ("2025-10-24") → datetime.date objects
35
+ - Numbers ("1.0") → int or float
36
+ - Booleans ("true") → bool
37
+ - Lists → list objects
38
+
39
+ This can cause AttributeError when code expects strings and calls string methods
40
+ like .strip() on these values (see GitHub issue #236).
41
+
42
+ This function normalizes all frontmatter values to safe types:
43
+ - Dates/datetimes → ISO format strings
44
+ - Numbers (int/float) → strings
45
+ - Booleans → strings ("True"/"False")
46
+ - Lists → preserved as lists, but items are recursively normalized
47
+ - Dicts → preserved as dicts, but values are recursively normalized
48
+ - Strings → kept as-is
49
+ - None → kept as None
50
+
51
+ Args:
52
+ value: The frontmatter value to normalize
53
+
54
+ Returns:
55
+ The normalized value safe for string operations
56
+
57
+ Example:
58
+ >>> normalize_frontmatter_value(datetime.date(2025, 10, 24))
59
+ '2025-10-24'
60
+ >>> normalize_frontmatter_value([datetime.date(2025, 10, 24), "tag", 123])
61
+ ['2025-10-24', 'tag', '123']
62
+ >>> normalize_frontmatter_value(True)
63
+ 'True'
64
+ """
65
+ # Convert date/datetime objects to ISO format strings
66
+ if isinstance(value, datetime):
67
+ return value.isoformat()
68
+ if isinstance(value, date):
69
+ return value.isoformat()
70
+
71
+ # Convert boolean to string (must come before int check since bool is subclass of int)
72
+ if isinstance(value, bool):
73
+ return str(value)
74
+
75
+ # Convert numbers to strings
76
+ if isinstance(value, (int, float)):
77
+ return str(value)
78
+
79
+ # Recursively process lists (preserve as list, normalize items)
80
+ if isinstance(value, list):
81
+ return [normalize_frontmatter_value(item) for item in value]
82
+
83
+ # Recursively process dicts (preserve as dict, normalize values)
84
+ if isinstance(value, dict):
85
+ return {key: normalize_frontmatter_value(val) for key, val in value.items()}
86
+
87
+ # Keep strings and None as-is
88
+ return value
89
+
90
+
91
+ def normalize_frontmatter_metadata(metadata: dict) -> dict:
92
+ """Normalize all values in frontmatter metadata dict.
93
+
94
+ Converts date/datetime objects to ISO format strings to prevent
95
+ AttributeError when code expects strings (GitHub issue #236).
96
+
97
+ Args:
98
+ metadata: The frontmatter metadata dictionary
99
+
100
+ Returns:
101
+ A new dictionary with all values normalized
102
+ """
103
+ return {key: normalize_frontmatter_value(value) for key, value in metadata.items()}
104
+
105
+
26
106
  @dataclass
27
107
  class EntityContent:
28
108
  content: str
@@ -56,11 +136,11 @@ def parse(content: str) -> EntityContent:
56
136
  )
57
137
 
58
138
 
59
- def parse_tags(tags: Any) -> list[str]:
60
- """Parse tags into list of strings."""
61
- if isinstance(tags, (list, tuple)):
62
- return [str(t).strip() for t in tags if str(t).strip()]
63
- return [t.strip() for t in tags.split(",") if t.strip()]
139
+ # def parse_tags(tags: Any) -> list[str]:
140
+ # """Parse tags into list of strings."""
141
+ # if isinstance(tags, (list, tuple)):
142
+ # return [str(t).strip() for t in tags if str(t).strip()]
143
+ # return [t.strip() for t in tags.split(",") if t.strip()]
64
144
 
65
145
 
66
146
  class EntityParser:
@@ -88,33 +168,112 @@ class EntityParser:
88
168
  return parsed
89
169
  return None
90
170
 
91
- async def parse_file(self, file_path: Path) -> EntityMarkdown:
171
+ async def parse_file(self, path: Path | str) -> EntityMarkdown:
92
172
  """Parse markdown file into EntityMarkdown."""
93
173
 
94
- absolute_path = self.base_path / file_path
174
+ # Check if the path is already absolute
175
+ if (
176
+ isinstance(path, Path)
177
+ and path.is_absolute()
178
+ or (isinstance(path, str) and Path(path).is_absolute())
179
+ ):
180
+ absolute_path = Path(path)
181
+ else:
182
+ absolute_path = self.get_file_path(path)
183
+
95
184
  # Parse frontmatter and content using python-frontmatter
96
- post = frontmatter.load(str(absolute_path))
185
+ file_content = absolute_path.read_text(encoding="utf-8")
186
+ return await self.parse_file_content(absolute_path, file_content)
97
187
 
98
- # Extract file stat info
99
- file_stats = absolute_path.stat()
188
+ def get_file_path(self, path):
189
+ """Get absolute path for a file using the base path for the project."""
190
+ return self.base_path / path
100
191
 
101
- metadata = post.metadata
102
- metadata["title"] = post.metadata.get("title", file_path.name)
103
- metadata["type"] = post.metadata.get("type", "note")
104
- metadata["tags"] = parse_tags(post.metadata.get("tags", []))
192
+ async def parse_file_content(self, absolute_path, file_content):
193
+ """Parse markdown content from file stats.
105
194
 
106
- # frontmatter
107
- entity_frontmatter = EntityFrontmatter(
108
- metadata=post.metadata,
195
+ Delegates to parse_markdown_content() for actual parsing logic.
196
+ Exists for backwards compatibility with code that passes file paths.
197
+ """
198
+ # Extract file stat info for timestamps
199
+ file_stats = absolute_path.stat()
200
+
201
+ # Delegate to parse_markdown_content with timestamps from file stats
202
+ return await self.parse_markdown_content(
203
+ file_path=absolute_path,
204
+ content=file_content,
205
+ mtime=file_stats.st_mtime,
206
+ ctime=file_stats.st_ctime,
109
207
  )
110
208
 
209
+ async def parse_markdown_content(
210
+ self,
211
+ file_path: Path,
212
+ content: str,
213
+ mtime: Optional[float] = None,
214
+ ctime: Optional[float] = None,
215
+ ) -> EntityMarkdown:
216
+ """Parse markdown content without requiring file to exist on disk.
217
+
218
+ Useful for parsing content from S3 or other remote sources where the file
219
+ is not available locally.
220
+
221
+ Args:
222
+ file_path: Path for metadata (doesn't need to exist on disk)
223
+ content: Markdown content as string
224
+ mtime: Optional modification time (Unix timestamp)
225
+ ctime: Optional creation time (Unix timestamp)
226
+
227
+ Returns:
228
+ EntityMarkdown with parsed content
229
+ """
230
+ # Strip BOM before parsing (can be present in files from Windows or certain sources)
231
+ # See issue #452
232
+ from basic_memory.file_utils import strip_bom
233
+
234
+ content = strip_bom(content)
235
+
236
+ # Parse frontmatter with proper error handling for malformed YAML
237
+ try:
238
+ post = frontmatter.loads(content)
239
+ except yaml.YAMLError as e:
240
+ logger.warning(
241
+ f"Failed to parse YAML frontmatter in {file_path}: {e}. "
242
+ f"Treating file as plain markdown without frontmatter."
243
+ )
244
+ post = frontmatter.Post(content, metadata={})
245
+
246
+ # Normalize frontmatter values
247
+ metadata = normalize_frontmatter_metadata(post.metadata)
248
+
249
+ # Ensure required fields have defaults
250
+ title = metadata.get("title")
251
+ if not title or title == "None":
252
+ metadata["title"] = file_path.stem
253
+ else:
254
+ metadata["title"] = title
255
+
256
+ entity_type = metadata.get("type")
257
+ metadata["type"] = entity_type if entity_type is not None else "note"
258
+
259
+ tags = parse_tags(metadata.get("tags", [])) # pyright: ignore
260
+ if tags:
261
+ metadata["tags"] = tags
262
+
263
+ # Parse content for observations and relations
264
+ entity_frontmatter = EntityFrontmatter(metadata=metadata)
111
265
  entity_content = parse(post.content)
112
266
 
267
+ # Use provided timestamps or current time as fallback
268
+ now = datetime.now().astimezone()
269
+ created = datetime.fromtimestamp(ctime).astimezone() if ctime else now
270
+ modified = datetime.fromtimestamp(mtime).astimezone() if mtime else now
271
+
113
272
  return EntityMarkdown(
114
273
  frontmatter=entity_frontmatter,
115
274
  content=post.content,
116
275
  observations=entity_content.observations,
117
276
  relations=entity_content.relations,
118
- created=datetime.fromtimestamp(file_stats.st_ctime),
119
- modified=datetime.fromtimestamp(file_stats.st_mtime),
277
+ created=created,
278
+ modified=modified,
120
279
  )
@@ -1,15 +1,19 @@
1
1
  from pathlib import Path
2
- from typing import Optional
2
+ from typing import TYPE_CHECKING, Optional
3
3
  from collections import OrderedDict
4
4
 
5
- import frontmatter
6
5
  from frontmatter import Post
7
6
  from loguru import logger
8
7
 
8
+
9
9
  from basic_memory import file_utils
10
+ from basic_memory.file_utils import dump_frontmatter
10
11
  from basic_memory.markdown.entity_parser import EntityParser
11
12
  from basic_memory.markdown.schemas import EntityMarkdown, Observation, Relation
12
13
 
14
+ if TYPE_CHECKING: # pragma: no cover
15
+ from basic_memory.config import BasicMemoryConfig
16
+
13
17
 
14
18
  class DirtyFileError(Exception):
15
19
  """Raised when attempting to write to a file that has been modified."""
@@ -35,9 +39,14 @@ class MarkdownProcessor:
35
39
  3. Track schema changes (that's done by the database)
36
40
  """
37
41
 
38
- def __init__(self, entity_parser: EntityParser):
39
- """Initialize processor with base path and parser."""
42
+ def __init__(
43
+ self,
44
+ entity_parser: EntityParser,
45
+ app_config: Optional["BasicMemoryConfig"] = None,
46
+ ):
47
+ """Initialize processor with parser and optional config."""
40
48
  self.entity_parser = entity_parser
49
+ self.app_config = app_config
41
50
 
42
51
  async def read_file(self, path: Path) -> EntityMarkdown:
43
52
  """Read and parse file into EntityMarkdown schema.
@@ -83,7 +92,7 @@ class MarkdownProcessor:
83
92
  """
84
93
  # Dirty check if needed
85
94
  if expected_checksum is not None:
86
- current_content = path.read_text()
95
+ current_content = path.read_text(encoding="utf-8")
87
96
  current_checksum = await file_utils.compute_checksum(current_content)
88
97
  if current_checksum != expected_checksum:
89
98
  raise DirtyFileError(f"File {path} has been modified")
@@ -115,14 +124,68 @@ class MarkdownProcessor:
115
124
 
116
125
  # Create Post object for frontmatter
117
126
  post = Post(content, **frontmatter_dict)
118
- final_content = frontmatter.dumps(post, sort_keys=False)
127
+ final_content = dump_frontmatter(post)
119
128
 
120
129
  logger.debug(f"writing file {path} with content:\n{final_content}")
121
130
 
122
131
  # Write atomically and return checksum of updated file
123
132
  path.parent.mkdir(parents=True, exist_ok=True)
124
133
  await file_utils.write_file_atomic(path, final_content)
125
- return await file_utils.compute_checksum(final_content)
134
+
135
+ # Format file if configured (MarkdownProcessor always handles markdown files)
136
+ content_for_checksum = final_content
137
+ if self.app_config:
138
+ formatted_content = await file_utils.format_file( # pragma: no cover
139
+ path, self.app_config, is_markdown=True
140
+ )
141
+ if formatted_content is not None: # pragma: no cover
142
+ content_for_checksum = formatted_content # pragma: no cover
143
+
144
+ return await file_utils.compute_checksum(content_for_checksum)
145
+
146
+ def to_markdown_string(self, markdown: EntityMarkdown) -> str:
147
+ """Convert EntityMarkdown to markdown string with frontmatter.
148
+
149
+ This method handles serialization only - it does not write to files.
150
+ Use FileService.write_file() to persist the output.
151
+
152
+ This enables cloud environments to override file operations via
153
+ dependency injection while reusing the serialization logic.
154
+
155
+ Args:
156
+ markdown: EntityMarkdown schema to serialize
157
+
158
+ Returns:
159
+ Complete markdown string with frontmatter, content, and structured sections
160
+ """
161
+ # Convert frontmatter to dict
162
+ frontmatter_dict = OrderedDict()
163
+ frontmatter_dict["title"] = markdown.frontmatter.title
164
+ frontmatter_dict["type"] = markdown.frontmatter.type
165
+ frontmatter_dict["permalink"] = markdown.frontmatter.permalink
166
+
167
+ metadata = markdown.frontmatter.metadata or {}
168
+ for k, v in metadata.items():
169
+ frontmatter_dict[k] = v
170
+
171
+ # Start with user content (or minimal title for new files)
172
+ content = markdown.content or f"# {markdown.frontmatter.title}\n"
173
+
174
+ # Add structured sections with proper spacing
175
+ content = content.rstrip() # Remove trailing whitespace
176
+
177
+ # Add a blank line if we have semantic content
178
+ if markdown.observations or markdown.relations:
179
+ content += "\n"
180
+
181
+ if markdown.observations:
182
+ content += self.format_observations(markdown.observations)
183
+ if markdown.relations:
184
+ content += self.format_relations(markdown.relations)
185
+
186
+ # Create Post object for frontmatter
187
+ post = Post(content, **frontmatter_dict)
188
+ return dump_frontmatter(post)
126
189
 
127
190
  def format_observations(self, observations: list[Observation]) -> str:
128
191
  """Format observations section in standard way.
@@ -8,34 +8,52 @@ from markdown_it.token import Token
8
8
  # Observation handling functions
9
9
  def is_observation(token: Token) -> bool:
10
10
  """Check if token looks like our observation format."""
11
+ import re
12
+
11
13
  if token.type != "inline": # pragma: no cover
12
14
  return False
13
-
14
- content = token.content.strip()
15
+ # Use token.tag which contains the actual content for test tokens, fallback to content
16
+ content = (token.tag or token.content).strip()
15
17
  if not content: # pragma: no cover
16
18
  return False
17
-
18
19
  # if it's a markdown_task, return false
19
20
  if content.startswith("[ ]") or content.startswith("[x]") or content.startswith("[-]"):
20
21
  return False
21
22
 
22
- has_category = content.startswith("[") and "]" in content
23
- has_tags = "#" in content
24
- return has_category or has_tags
23
+ # Exclude markdown links: [text](url)
24
+ if re.match(r"^\[.*?\]\(.*?\)$", content):
25
+ return False
26
+
27
+ # Exclude wiki links: [[text]]
28
+ if re.match(r"^\[\[.*?\]\]$", content):
29
+ return False
30
+
31
+ # Check for proper observation format: [category] content
32
+ match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
33
+ # Check for standalone hashtags (words starting with #)
34
+ # This excludes # in HTML attributes like color="#4285F4"
35
+ has_tags = any(part.startswith("#") for part in content.split())
36
+ return bool(match) or has_tags
25
37
 
26
38
 
27
39
  def parse_observation(token: Token) -> Dict[str, Any]:
28
40
  """Extract observation parts from token."""
29
- # Strip bullet point if present
30
- content = token.content.strip()
41
+ import re
42
+
43
+ # Use token.tag which contains the actual content for test tokens, fallback to content
44
+ content = (token.tag or token.content).strip()
31
45
 
32
- # Parse [category]
46
+ # Parse [category] with regex
47
+ match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
33
48
  category = None
34
- if content.startswith("["):
35
- end = content.find("]")
36
- if end != -1:
37
- category = content[1:end].strip() or None # Convert empty to None
38
- content = content[end + 1 :].strip()
49
+ if match:
50
+ category = match.group(1).strip()
51
+ content = match.group(2).strip()
52
+ else:
53
+ # Handle empty brackets [] followed by content
54
+ empty_match = re.match(r"^\[\]\s+(.+)", content)
55
+ if empty_match:
56
+ content = empty_match.group(1).strip()
39
57
 
40
58
  # Parse (context)
41
59
  context = None
@@ -50,9 +68,7 @@ def parse_observation(token: Token) -> Dict[str, Any]:
50
68
  parts = content.split()
51
69
  for part in parts:
52
70
  if part.startswith("#"):
53
- # Handle multiple #tags stuck together
54
71
  if "#" in part[1:]:
55
- # Split on # but keep non-empty tags
56
72
  subtags = [t for t in part.split("#") if t]
57
73
  tags.extend(subtags)
58
74
  else:
@@ -72,14 +88,16 @@ def is_explicit_relation(token: Token) -> bool:
72
88
  if token.type != "inline": # pragma: no cover
73
89
  return False
74
90
 
75
- content = token.content.strip()
91
+ # Use token.tag which contains the actual content for test tokens, fallback to content
92
+ content = (token.tag or token.content).strip()
76
93
  return "[[" in content and "]]" in content
77
94
 
78
95
 
79
96
  def parse_relation(token: Token) -> Dict[str, Any] | None:
80
97
  """Extract relation parts from token."""
81
98
  # Remove bullet point if present
82
- content = token.content.strip()
99
+ # Use token.tag which contains the actual content for test tokens, fallback to content
100
+ content = (token.tag or token.content).strip()
83
101
 
84
102
  # Extract [[target]]
85
103
  target = None
@@ -144,7 +162,7 @@ def parse_inline_relations(content: str) -> List[Dict[str, Any]]:
144
162
 
145
163
  target = content[start + 2 : end].strip()
146
164
  if target:
147
- relations.append({"type": "links to", "target": target, "context": None})
165
+ relations.append({"type": "links_to", "target": target, "context": None})
148
166
 
149
167
  start = end + 2
150
168
 
@@ -213,10 +231,12 @@ def relation_plugin(md: MarkdownIt) -> None:
213
231
  token.meta["relations"] = [rel]
214
232
 
215
233
  # Always check for inline links in any text
216
- elif "[[" in token.content:
217
- rels = parse_inline_relations(token.content)
218
- if rels:
219
- token.meta["relations"] = token.meta.get("relations", []) + rels
234
+ else:
235
+ content = token.tag or token.content
236
+ if "[[" in content:
237
+ rels = parse_inline_relations(content)
238
+ if rels:
239
+ token.meta["relations"] = token.meta.get("relations", []) + rels
220
240
 
221
241
  # Add the rule after inline processing
222
242
  md.core.ruler.after("inline", "relations", relation_rule)
@@ -42,7 +42,7 @@ class EntityFrontmatter(BaseModel):
42
42
 
43
43
  @property
44
44
  def tags(self) -> List[str]:
45
- return self.metadata.get("tags") if self.metadata else [] # pyright: ignore
45
+ return self.metadata.get("tags") if self.metadata else None # pyright: ignore
46
46
 
47
47
  @property
48
48
  def title(self) -> str:
@@ -1,17 +1,22 @@
1
1
  """Utilities for converting between markdown and entity models."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import Optional, Any
4
+ from typing import Any, Optional
5
+
5
6
 
6
7
  from frontmatter import Post
7
8
 
9
+ from basic_memory.file_utils import has_frontmatter, remove_frontmatter, parse_frontmatter
8
10
  from basic_memory.markdown import EntityMarkdown
9
- from basic_memory.models import Entity, Observation as ObservationModel
10
- from basic_memory.utils import generate_permalink
11
+ from basic_memory.models import Entity
12
+ from basic_memory.models import Observation as ObservationModel
11
13
 
12
14
 
13
15
  def entity_model_from_markdown(
14
- file_path: Path, markdown: EntityMarkdown, entity: Optional[Entity] = None
16
+ file_path: Path,
17
+ markdown: EntityMarkdown,
18
+ entity: Optional[Entity] = None,
19
+ project_id: Optional[int] = None,
15
20
  ) -> Entity:
16
21
  """
17
22
  Convert markdown entity to model. Does not include relations.
@@ -20,6 +25,7 @@ def entity_model_from_markdown(
20
25
  file_path: Path to the markdown file
21
26
  markdown: Parsed markdown entity
22
27
  entity: Optional existing entity to update
28
+ project_id: Project ID for new observations (uses entity.project_id if not provided)
23
29
 
24
30
  Returns:
25
31
  Entity model populated from markdown
@@ -31,17 +37,16 @@ def entity_model_from_markdown(
31
37
  if not markdown.created or not markdown.modified: # pragma: no cover
32
38
  raise ValueError("Both created and modified dates are required in markdown")
33
39
 
34
- # Generate permalink if not provided
35
- permalink = markdown.frontmatter.permalink or generate_permalink(file_path)
36
-
37
40
  # Create or update entity
38
41
  model = entity or Entity()
39
42
 
40
43
  # Update basic fields
41
44
  model.title = markdown.frontmatter.title
42
45
  model.entity_type = markdown.frontmatter.type
43
- model.permalink = permalink
44
- model.file_path = str(file_path)
46
+ # Only update permalink if it exists in frontmatter, otherwise preserve existing
47
+ if markdown.frontmatter.permalink is not None:
48
+ model.permalink = markdown.frontmatter.permalink
49
+ model.file_path = file_path.as_posix()
45
50
  model.content_type = "text/markdown"
46
51
  model.created_at = markdown.created
47
52
  model.updated_at = markdown.modified
@@ -50,9 +55,13 @@ def entity_model_from_markdown(
50
55
  metadata = markdown.frontmatter.metadata or {}
51
56
  model.entity_metadata = {k: str(v) for k, v in metadata.items() if v is not None}
52
57
 
58
+ # Get project_id from entity if not provided
59
+ obs_project_id = project_id or (model.project_id if hasattr(model, "project_id") else None)
60
+
53
61
  # Convert observations
54
62
  model.observations = [
55
63
  ObservationModel(
64
+ project_id=obs_project_id,
56
65
  content=obs.content,
57
66
  category=obs.category,
58
67
  context=obs.context,
@@ -76,18 +85,33 @@ async def schema_to_markdown(schema: Any) -> Post:
76
85
  """
77
86
  # Extract content and metadata
78
87
  content = schema.content or ""
79
- frontmatter_metadata = dict(schema.entity_metadata or {})
88
+ entity_metadata = dict(schema.entity_metadata or {})
89
+
90
+ # if the content contains frontmatter, remove it and merge
91
+ if has_frontmatter(content):
92
+ content_frontmatter = parse_frontmatter(content)
93
+ content = remove_frontmatter(content)
94
+
95
+ # Merge content frontmatter with entity metadata
96
+ # (entity_metadata takes precedence for conflicts)
97
+ content_frontmatter.update(entity_metadata)
98
+ entity_metadata = content_frontmatter
80
99
 
81
100
  # Remove special fields for ordered frontmatter
82
101
  for field in ["type", "title", "permalink"]:
83
- frontmatter_metadata.pop(field, None)
102
+ entity_metadata.pop(field, None)
84
103
 
85
- # Create Post with ordered fields
104
+ # Create Post with fields ordered by insert order
86
105
  post = Post(
87
106
  content,
88
107
  title=schema.title,
89
108
  type=schema.entity_type,
90
- permalink=schema.permalink,
91
- **frontmatter_metadata,
92
109
  )
110
+ # set the permalink if passed in
111
+ if schema.permalink:
112
+ post.metadata["permalink"] = schema.permalink
113
+
114
+ if entity_metadata:
115
+ post.metadata.update(entity_metadata)
116
+
93
117
  return post