basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (195) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +130 -20
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  6. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  7. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  8. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  9. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  10. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  11. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  12. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  13. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  14. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  15. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  16. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  17. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  18. basic_memory/api/app.py +87 -20
  19. basic_memory/api/container.py +133 -0
  20. basic_memory/api/routers/__init__.py +4 -1
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +180 -23
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +9 -64
  26. basic_memory/api/routers/project_router.py +460 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +136 -11
  29. basic_memory/api/routers/search_router.py +5 -5
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +181 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +359 -0
  39. basic_memory/api/v2/routers/prompt_router.py +269 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/app.py +80 -10
  43. basic_memory/cli/auth.py +300 -0
  44. basic_memory/cli/commands/__init__.py +15 -2
  45. basic_memory/cli/commands/cloud/__init__.py +6 -0
  46. basic_memory/cli/commands/cloud/api_client.py +127 -0
  47. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  48. basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
  49. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  50. basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
  51. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  52. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  53. basic_memory/cli/commands/cloud/upload.py +240 -0
  54. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  55. basic_memory/cli/commands/command_utils.py +99 -0
  56. basic_memory/cli/commands/db.py +87 -12
  57. basic_memory/cli/commands/format.py +198 -0
  58. basic_memory/cli/commands/import_chatgpt.py +47 -223
  59. basic_memory/cli/commands/import_claude_conversations.py +48 -171
  60. basic_memory/cli/commands/import_claude_projects.py +53 -160
  61. basic_memory/cli/commands/import_memory_json.py +55 -111
  62. basic_memory/cli/commands/mcp.py +67 -11
  63. basic_memory/cli/commands/project.py +889 -0
  64. basic_memory/cli/commands/status.py +52 -34
  65. basic_memory/cli/commands/telemetry.py +81 -0
  66. basic_memory/cli/commands/tool.py +341 -0
  67. basic_memory/cli/container.py +84 -0
  68. basic_memory/cli/main.py +14 -6
  69. basic_memory/config.py +580 -26
  70. basic_memory/db.py +285 -28
  71. basic_memory/deps/__init__.py +293 -0
  72. basic_memory/deps/config.py +26 -0
  73. basic_memory/deps/db.py +56 -0
  74. basic_memory/deps/importers.py +200 -0
  75. basic_memory/deps/projects.py +238 -0
  76. basic_memory/deps/repositories.py +179 -0
  77. basic_memory/deps/services.py +480 -0
  78. basic_memory/deps.py +16 -185
  79. basic_memory/file_utils.py +318 -54
  80. basic_memory/ignore_utils.py +297 -0
  81. basic_memory/importers/__init__.py +27 -0
  82. basic_memory/importers/base.py +100 -0
  83. basic_memory/importers/chatgpt_importer.py +245 -0
  84. basic_memory/importers/claude_conversations_importer.py +192 -0
  85. basic_memory/importers/claude_projects_importer.py +184 -0
  86. basic_memory/importers/memory_json_importer.py +128 -0
  87. basic_memory/importers/utils.py +61 -0
  88. basic_memory/markdown/entity_parser.py +182 -23
  89. basic_memory/markdown/markdown_processor.py +70 -7
  90. basic_memory/markdown/plugins.py +43 -23
  91. basic_memory/markdown/schemas.py +1 -1
  92. basic_memory/markdown/utils.py +38 -14
  93. basic_memory/mcp/async_client.py +135 -4
  94. basic_memory/mcp/clients/__init__.py +28 -0
  95. basic_memory/mcp/clients/directory.py +70 -0
  96. basic_memory/mcp/clients/knowledge.py +176 -0
  97. basic_memory/mcp/clients/memory.py +120 -0
  98. basic_memory/mcp/clients/project.py +89 -0
  99. basic_memory/mcp/clients/resource.py +71 -0
  100. basic_memory/mcp/clients/search.py +65 -0
  101. basic_memory/mcp/container.py +110 -0
  102. basic_memory/mcp/project_context.py +155 -0
  103. basic_memory/mcp/prompts/__init__.py +19 -0
  104. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  105. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  106. basic_memory/mcp/prompts/recent_activity.py +188 -0
  107. basic_memory/mcp/prompts/search.py +57 -0
  108. basic_memory/mcp/prompts/utils.py +162 -0
  109. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  110. basic_memory/mcp/resources/project_info.py +71 -0
  111. basic_memory/mcp/server.py +61 -9
  112. basic_memory/mcp/tools/__init__.py +33 -21
  113. basic_memory/mcp/tools/build_context.py +120 -0
  114. basic_memory/mcp/tools/canvas.py +152 -0
  115. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  116. basic_memory/mcp/tools/delete_note.py +249 -0
  117. basic_memory/mcp/tools/edit_note.py +325 -0
  118. basic_memory/mcp/tools/list_directory.py +157 -0
  119. basic_memory/mcp/tools/move_note.py +549 -0
  120. basic_memory/mcp/tools/project_management.py +204 -0
  121. basic_memory/mcp/tools/read_content.py +281 -0
  122. basic_memory/mcp/tools/read_note.py +265 -0
  123. basic_memory/mcp/tools/recent_activity.py +528 -0
  124. basic_memory/mcp/tools/search.py +377 -24
  125. basic_memory/mcp/tools/utils.py +402 -16
  126. basic_memory/mcp/tools/view_note.py +78 -0
  127. basic_memory/mcp/tools/write_note.py +230 -0
  128. basic_memory/models/__init__.py +3 -2
  129. basic_memory/models/knowledge.py +82 -17
  130. basic_memory/models/project.py +93 -0
  131. basic_memory/models/search.py +68 -8
  132. basic_memory/project_resolver.py +222 -0
  133. basic_memory/repository/__init__.py +2 -0
  134. basic_memory/repository/entity_repository.py +437 -8
  135. basic_memory/repository/observation_repository.py +36 -3
  136. basic_memory/repository/postgres_search_repository.py +451 -0
  137. basic_memory/repository/project_info_repository.py +10 -0
  138. basic_memory/repository/project_repository.py +140 -0
  139. basic_memory/repository/relation_repository.py +79 -4
  140. basic_memory/repository/repository.py +148 -29
  141. basic_memory/repository/search_index_row.py +95 -0
  142. basic_memory/repository/search_repository.py +79 -268
  143. basic_memory/repository/search_repository_base.py +241 -0
  144. basic_memory/repository/sqlite_search_repository.py +437 -0
  145. basic_memory/runtime.py +61 -0
  146. basic_memory/schemas/__init__.py +22 -9
  147. basic_memory/schemas/base.py +131 -12
  148. basic_memory/schemas/cloud.py +50 -0
  149. basic_memory/schemas/directory.py +31 -0
  150. basic_memory/schemas/importer.py +35 -0
  151. basic_memory/schemas/memory.py +194 -25
  152. basic_memory/schemas/project_info.py +213 -0
  153. basic_memory/schemas/prompt.py +90 -0
  154. basic_memory/schemas/request.py +56 -2
  155. basic_memory/schemas/response.py +85 -28
  156. basic_memory/schemas/search.py +36 -35
  157. basic_memory/schemas/sync_report.py +72 -0
  158. basic_memory/schemas/v2/__init__.py +27 -0
  159. basic_memory/schemas/v2/entity.py +133 -0
  160. basic_memory/schemas/v2/resource.py +47 -0
  161. basic_memory/services/__init__.py +2 -1
  162. basic_memory/services/context_service.py +451 -138
  163. basic_memory/services/directory_service.py +310 -0
  164. basic_memory/services/entity_service.py +636 -71
  165. basic_memory/services/exceptions.py +21 -0
  166. basic_memory/services/file_service.py +402 -33
  167. basic_memory/services/initialization.py +216 -0
  168. basic_memory/services/link_resolver.py +50 -56
  169. basic_memory/services/project_service.py +888 -0
  170. basic_memory/services/search_service.py +232 -37
  171. basic_memory/sync/__init__.py +4 -2
  172. basic_memory/sync/background_sync.py +26 -0
  173. basic_memory/sync/coordinator.py +160 -0
  174. basic_memory/sync/sync_service.py +1200 -109
  175. basic_memory/sync/watch_service.py +432 -135
  176. basic_memory/telemetry.py +249 -0
  177. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  178. basic_memory/templates/prompts/search.hbs +101 -0
  179. basic_memory/utils.py +407 -54
  180. basic_memory-0.17.4.dist-info/METADATA +617 -0
  181. basic_memory-0.17.4.dist-info/RECORD +193 -0
  182. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  183. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
  184. basic_memory/alembic/README +0 -1
  185. basic_memory/cli/commands/sync.py +0 -206
  186. basic_memory/cli/commands/tools.py +0 -157
  187. basic_memory/mcp/tools/knowledge.py +0 -68
  188. basic_memory/mcp/tools/memory.py +0 -170
  189. basic_memory/mcp/tools/notes.py +0 -202
  190. basic_memory/schemas/discovery.py +0 -28
  191. basic_memory/sync/file_change_scanner.py +0 -158
  192. basic_memory/sync/utils.py +0 -31
  193. basic_memory-0.7.0.dist-info/METADATA +0 -378
  194. basic_memory-0.7.0.dist-info/RECORD +0 -82
  195. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -14,3 +14,24 @@ class EntityCreationError(Exception):
14
14
  """Raised when an entity cannot be created"""
15
15
 
16
16
  pass
17
+
18
+
19
+ class DirectoryOperationError(Exception):
20
+ """Raised when directory operations fail"""
21
+
22
+ pass
23
+
24
+
25
+ class SyncFatalError(Exception):
26
+ """Raised when sync encounters a fatal error that prevents continuation.
27
+
28
+ Fatal errors include:
29
+ - Project deleted during sync (FOREIGN KEY constraint)
30
+ - Database corruption
31
+ - Critical system failures
32
+
33
+ When this exception is raised, the entire sync operation should be terminated
34
+ immediately rather than attempting to continue with remaining files.
35
+ """
36
+
37
+ pass
@@ -1,25 +1,39 @@
1
1
  """Service for file operations with checksum tracking."""
2
2
 
3
+ import asyncio
4
+ import hashlib
5
+ import mimetypes
6
+ from datetime import datetime
3
7
  from pathlib import Path
4
- from typing import Tuple, Union
8
+ from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
5
9
 
6
- from loguru import logger
10
+ import aiofiles
11
+
12
+ import yaml
7
13
 
8
14
  from basic_memory import file_utils
15
+
16
+ if TYPE_CHECKING: # pragma: no cover
17
+ from basic_memory.config import BasicMemoryConfig
18
+ from basic_memory.file_utils import FileError, FileMetadata, ParseError
9
19
  from basic_memory.markdown.markdown_processor import MarkdownProcessor
10
20
  from basic_memory.models import Entity as EntityModel
11
21
  from basic_memory.schemas import Entity as EntitySchema
12
22
  from basic_memory.services.exceptions import FileOperationError
23
+ from basic_memory.utils import FilePath
24
+ from loguru import logger
13
25
 
14
26
 
15
27
  class FileService:
16
- """Service for handling file operations.
28
+ """Service for handling file operations with concurrency control.
17
29
 
18
30
  All paths are handled as Path objects internally. Strings are converted to
19
31
  Path objects when passed in. Relative paths are assumed to be relative to
20
32
  base_path.
21
33
 
22
34
  Features:
35
+ - True async I/O with aiofiles (non-blocking)
36
+ - Built-in concurrency limits (semaphore)
23
37
  - Consistent file writing with checksums
24
38
  - Frontmatter management
25
39
  - Atomic operations
@@ -30,9 +44,15 @@ class FileService:
30
44
  self,
31
45
  base_path: Path,
32
46
  markdown_processor: MarkdownProcessor,
47
+ max_concurrent_files: int = 10,
48
+ app_config: Optional["BasicMemoryConfig"] = None,
33
49
  ):
34
50
  self.base_path = base_path.resolve() # Get absolute path
35
51
  self.markdown_processor = markdown_processor
52
+ self.app_config = app_config
53
+ # Semaphore to limit concurrent file operations
54
+ # Prevents OOM on large projects by processing files in batches
55
+ self._file_semaphore = asyncio.Semaphore(max_concurrent_files)
36
56
 
37
57
  def get_entity_path(self, entity: Union[EntityModel, EntitySchema]) -> Path:
38
58
  """Generate absolute filesystem path for entity.
@@ -57,7 +77,7 @@ class FileService:
57
77
  Returns:
58
78
  Raw content string without metadata sections
59
79
  """
60
- logger.debug(f"Reading entity with permalink: {entity.permalink}")
80
+ logger.debug(f"Reading entity content, entity_id={entity.id}, permalink={entity.permalink}")
61
81
 
62
82
  file_path = self.get_entity_path(entity)
63
83
  markdown = await self.markdown_processor.read_file(file_path)
@@ -75,13 +95,13 @@ class FileService:
75
95
  path = self.get_entity_path(entity)
76
96
  await self.delete_file(path)
77
97
 
78
- async def exists(self, path: Union[Path, str]) -> bool:
98
+ async def exists(self, path: FilePath) -> bool:
79
99
  """Check if file exists at the provided path.
80
100
 
81
101
  If path is relative, it is assumed to be relative to base_path.
82
102
 
83
103
  Args:
84
- path: Path to check (Path object or string)
104
+ path: Path to check (Path or string)
85
105
 
86
106
  Returns:
87
107
  True if file exists, False otherwise
@@ -90,58 +110,183 @@ class FileService:
90
110
  FileOperationError: If check fails
91
111
  """
92
112
  try:
93
- path = Path(path)
94
- if path.is_absolute():
95
- return path.exists()
113
+ # Convert string to Path if needed
114
+ path_obj = self.base_path / path if isinstance(path, str) else path
115
+ logger.debug(f"Checking file existence: path={path_obj}")
116
+ if path_obj.is_absolute():
117
+ return path_obj.exists()
96
118
  else:
97
- return (self.base_path / path).exists()
119
+ return (self.base_path / path_obj).exists()
98
120
  except Exception as e:
99
- logger.error(f"Failed to check file existence {path}: {e}")
121
+ logger.error("Failed to check file existence", path=str(path), error=str(e))
100
122
  raise FileOperationError(f"Failed to check file existence: {e}")
101
123
 
102
- async def write_file(self, path: Union[Path, str], content: str) -> str:
124
+ async def ensure_directory(self, path: FilePath) -> None:
125
+ """Ensure directory exists, creating if necessary.
126
+
127
+ Uses semaphore to control concurrency for directory creation operations.
128
+
129
+ Args:
130
+ path: Directory path to ensure (Path or string)
131
+
132
+ Raises:
133
+ FileOperationError: If directory creation fails
134
+ """
135
+ try:
136
+ # Convert string to Path if needed
137
+ path_obj = self.base_path / path if isinstance(path, str) else path
138
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
139
+
140
+ # Use semaphore for concurrency control
141
+ async with self._file_semaphore:
142
+ # Run blocking mkdir in thread pool
143
+ loop = asyncio.get_event_loop()
144
+ await loop.run_in_executor(
145
+ None, lambda: full_path.mkdir(parents=True, exist_ok=True)
146
+ )
147
+ except Exception as e: # pragma: no cover
148
+ logger.error("Failed to create directory", path=str(path), error=str(e))
149
+ raise FileOperationError(f"Failed to create directory {path}: {e}")
150
+
151
+ async def write_file(self, path: FilePath, content: str) -> str:
103
152
  """Write content to file and return checksum.
104
153
 
105
154
  Handles both absolute and relative paths. Relative paths are resolved
106
155
  against base_path.
107
156
 
157
+ If format_on_save is enabled in config, runs the configured formatter
158
+ after writing and returns the checksum of the formatted content.
159
+
108
160
  Args:
109
- path: Where to write (Path object or string)
161
+ path: Where to write (Path or string)
110
162
  content: Content to write
111
163
 
112
164
  Returns:
113
- Checksum of written content
165
+ Checksum of written content (or formatted content if formatting enabled)
114
166
 
115
167
  Raises:
116
168
  FileOperationError: If write fails
117
169
  """
118
- path = Path(path)
119
- full_path = path if path.is_absolute() else self.base_path / path
170
+ # Convert string to Path if needed
171
+ path_obj = self.base_path / path if isinstance(path, str) else path
172
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
120
173
 
121
174
  try:
122
175
  # Ensure parent directory exists
123
- await file_utils.ensure_directory(full_path.parent)
176
+ await self.ensure_directory(full_path.parent)
124
177
 
125
178
  # Write content atomically
179
+ logger.info(
180
+ "Writing file: "
181
+ f"path={path_obj}, "
182
+ f"content_length={len(content)}, "
183
+ f"is_markdown={full_path.suffix.lower() == '.md'}"
184
+ )
185
+
126
186
  await file_utils.write_file_atomic(full_path, content)
127
187
 
128
- # Compute and return checksum
129
- checksum = await file_utils.compute_checksum(content)
130
- logger.debug(f"wrote file: {full_path}, checksum: {checksum}")
188
+ # Format file if configured
189
+ final_content = content
190
+ if self.app_config:
191
+ formatted_content = await file_utils.format_file(
192
+ full_path, self.app_config, is_markdown=self.is_markdown(path)
193
+ )
194
+ if formatted_content is not None:
195
+ final_content = formatted_content # pragma: no cover
196
+
197
+ # Compute and return checksum of final content
198
+ checksum = await file_utils.compute_checksum(final_content)
199
+ logger.debug(f"File write completed path={full_path}, {checksum=}")
131
200
  return checksum
132
201
 
133
202
  except Exception as e:
134
- logger.error(f"Failed to write file {full_path}: {e}")
203
+ logger.exception("File write error", path=str(full_path), error=str(e))
135
204
  raise FileOperationError(f"Failed to write file: {e}")
136
205
 
137
- async def read_file(self, path: Union[Path, str]) -> Tuple[str, str]:
138
- """Read file and compute checksum.
206
+ async def read_file_content(self, path: FilePath) -> str:
207
+ """Read file content using true async I/O with aiofiles.
139
208
 
140
209
  Handles both absolute and relative paths. Relative paths are resolved
141
210
  against base_path.
142
211
 
143
212
  Args:
144
- path: Path to read (Path object or string)
213
+ path: Path to read (Path or string)
214
+
215
+ Returns:
216
+ File content as string
217
+
218
+ Raises:
219
+ FileOperationError: If read fails
220
+ """
221
+ # Convert string to Path if needed
222
+ path_obj = self.base_path / path if isinstance(path, str) else path
223
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
224
+
225
+ try:
226
+ logger.debug("Reading file content", operation="read_file_content", path=str(full_path))
227
+ async with aiofiles.open(full_path, mode="r", encoding="utf-8") as f:
228
+ content = await f.read()
229
+
230
+ logger.debug(
231
+ "File read completed",
232
+ path=str(full_path),
233
+ content_length=len(content),
234
+ )
235
+ return content
236
+
237
+ except FileNotFoundError:
238
+ # Preserve FileNotFoundError so callers (e.g. sync) can treat it as deletion.
239
+ logger.warning("File not found", operation="read_file_content", path=str(full_path))
240
+ raise
241
+ except Exception as e:
242
+ logger.exception("File read error", path=str(full_path), error=str(e))
243
+ raise FileOperationError(f"Failed to read file: {e}")
244
+
245
+ async def read_file_bytes(self, path: FilePath) -> bytes:
246
+ """Read file content as bytes using true async I/O with aiofiles.
247
+
248
+ This method reads files in binary mode, suitable for non-text files
249
+ like images, PDFs, etc. For cloud compatibility with S3FileService.
250
+
251
+ Args:
252
+ path: Path to read (Path or string)
253
+
254
+ Returns:
255
+ File content as bytes
256
+
257
+ Raises:
258
+ FileOperationError: If read fails
259
+ """
260
+ # Convert string to Path if needed
261
+ path_obj = self.base_path / path if isinstance(path, str) else path
262
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
263
+
264
+ try:
265
+ logger.debug("Reading file bytes", operation="read_file_bytes", path=str(full_path))
266
+ async with aiofiles.open(full_path, mode="rb") as f:
267
+ content = await f.read()
268
+
269
+ logger.debug(
270
+ "File read completed",
271
+ path=str(full_path),
272
+ content_length=len(content),
273
+ )
274
+ return content
275
+
276
+ except Exception as e:
277
+ logger.exception("File read error", path=str(full_path), error=str(e))
278
+ raise FileOperationError(f"Failed to read file: {e}")
279
+
280
+ async def read_file(self, path: FilePath) -> Tuple[str, str]:
281
+ """Read file and compute checksum using true async I/O.
282
+
283
+ Uses aiofiles for non-blocking file reads.
284
+
285
+ Handles both absolute and relative paths. Relative paths are resolved
286
+ against base_path.
287
+
288
+ Args:
289
+ path: Path to read (Path or string)
145
290
 
146
291
  Returns:
147
292
  Tuple of (content, checksum)
@@ -149,28 +294,252 @@ class FileService:
149
294
  Raises:
150
295
  FileOperationError: If read fails
151
296
  """
152
- path = Path(path)
153
- full_path = path if path.is_absolute() else self.base_path / path
297
+ # Convert string to Path if needed
298
+ path_obj = self.base_path / path if isinstance(path, str) else path
299
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
154
300
 
155
301
  try:
156
- content = path.read_text()
302
+ logger.debug("Reading file", operation="read_file", path=str(full_path))
303
+
304
+ # Use aiofiles for non-blocking read
305
+ async with aiofiles.open(full_path, mode="r", encoding="utf-8") as f:
306
+ content = await f.read()
307
+
157
308
  checksum = await file_utils.compute_checksum(content)
158
- logger.debug(f"read file: {full_path}, checksum: {checksum}")
309
+
310
+ logger.debug(
311
+ "File read completed",
312
+ path=str(full_path),
313
+ checksum=checksum,
314
+ content_length=len(content),
315
+ )
159
316
  return content, checksum
160
317
 
161
318
  except Exception as e:
162
- logger.error(f"Failed to read file {full_path}: {e}")
319
+ logger.exception("File read error", path=str(full_path), error=str(e))
163
320
  raise FileOperationError(f"Failed to read file: {e}")
164
321
 
165
- async def delete_file(self, path: Union[Path, str]) -> None:
322
+ async def delete_file(self, path: FilePath) -> None:
166
323
  """Delete file if it exists.
167
324
 
168
325
  Handles both absolute and relative paths. Relative paths are resolved
169
326
  against base_path.
170
327
 
171
328
  Args:
172
- path: Path to delete (Path object or string)
329
+ path: Path to delete (Path or string)
173
330
  """
174
- path = Path(path)
175
- full_path = path if path.is_absolute() else self.base_path / path
331
+ # Convert string to Path if needed
332
+ path_obj = self.base_path / path if isinstance(path, str) else path
333
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
176
334
  full_path.unlink(missing_ok=True)
335
+
336
+ async def move_file(self, source: FilePath, destination: FilePath) -> None:
337
+ """Move/rename a file from source to destination.
338
+
339
+ This method abstracts the underlying storage (filesystem vs cloud).
340
+ Default implementation uses atomic filesystem rename, but cloud-backed
341
+ implementations (e.g., S3) can override to copy+delete.
342
+
343
+ Args:
344
+ source: Source path (relative to base_path or absolute)
345
+ destination: Destination path (relative to base_path or absolute)
346
+
347
+ Raises:
348
+ FileOperationError: If the move fails
349
+ """
350
+ # Convert strings to Paths and resolve relative paths against base_path
351
+ src_obj = self.base_path / source if isinstance(source, str) else source
352
+ dst_obj = self.base_path / destination if isinstance(destination, str) else destination
353
+ src_full = src_obj if src_obj.is_absolute() else self.base_path / src_obj
354
+ dst_full = dst_obj if dst_obj.is_absolute() else self.base_path / dst_obj
355
+
356
+ try:
357
+ # Ensure destination directory exists
358
+ await self.ensure_directory(dst_full.parent)
359
+
360
+ # Use semaphore for concurrency control and run blocking rename in executor
361
+ async with self._file_semaphore:
362
+ loop = asyncio.get_event_loop()
363
+ await loop.run_in_executor(None, lambda: src_full.rename(dst_full))
364
+ except Exception as e: # pragma: no cover
365
+ logger.exception(
366
+ "File move error",
367
+ source=str(src_full),
368
+ destination=str(dst_full),
369
+ error=str(e),
370
+ )
371
+ raise FileOperationError(f"Failed to move file {source} -> {destination}: {e}")
372
+
373
+ async def update_frontmatter(self, path: FilePath, updates: Dict[str, Any]) -> str:
374
+ """Update frontmatter fields in a file while preserving all content.
375
+
376
+ Only modifies the frontmatter section, leaving all content untouched.
377
+ Creates frontmatter section if none exists.
378
+ Returns checksum of updated file.
379
+
380
+ Uses aiofiles for true async I/O (non-blocking).
381
+
382
+ Args:
383
+ path: Path to markdown file (Path or string)
384
+ updates: Dict of frontmatter fields to update
385
+
386
+ Returns:
387
+ Checksum of updated file
388
+
389
+ Raises:
390
+ FileOperationError: If file operations fail
391
+ ParseError: If frontmatter parsing fails
392
+ """
393
+ # Convert string to Path if needed
394
+ path_obj = self.base_path / path if isinstance(path, str) else path
395
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
396
+
397
+ try:
398
+ # Read current content using aiofiles
399
+ async with aiofiles.open(full_path, mode="r", encoding="utf-8") as f:
400
+ content = await f.read()
401
+
402
+ # Parse current frontmatter with proper error handling for malformed YAML
403
+ current_fm = {}
404
+ if file_utils.has_frontmatter(content):
405
+ try:
406
+ current_fm = file_utils.parse_frontmatter(content)
407
+ content = file_utils.remove_frontmatter(content)
408
+ except (ParseError, yaml.YAMLError) as e: # pragma: no cover
409
+ # Log warning and treat as plain markdown without frontmatter
410
+ logger.warning( # pragma: no cover
411
+ f"Failed to parse YAML frontmatter in {full_path}: {e}. "
412
+ "Treating file as plain markdown without frontmatter."
413
+ )
414
+ # Keep full content, treat as having no frontmatter
415
+ current_fm = {} # pragma: no cover
416
+
417
+ # Update frontmatter
418
+ new_fm = {**current_fm, **updates}
419
+
420
+ # Write new file with updated frontmatter
421
+ yaml_fm = yaml.dump(new_fm, sort_keys=False, allow_unicode=True)
422
+ final_content = f"---\n{yaml_fm}---\n\n{content.strip()}"
423
+
424
+ logger.debug(
425
+ "Updating frontmatter", path=str(full_path), update_keys=list(updates.keys())
426
+ )
427
+
428
+ await file_utils.write_file_atomic(full_path, final_content)
429
+
430
+ # Format file if configured
431
+ content_for_checksum = final_content
432
+ if self.app_config:
433
+ formatted_content = await file_utils.format_file(
434
+ full_path, self.app_config, is_markdown=self.is_markdown(path)
435
+ )
436
+ if formatted_content is not None:
437
+ content_for_checksum = formatted_content # pragma: no cover
438
+
439
+ return await file_utils.compute_checksum(content_for_checksum)
440
+
441
+ except Exception as e: # pragma: no cover
442
+ # Only log real errors (not YAML parsing, which is handled above)
443
+ if not isinstance(e, (ParseError, yaml.YAMLError)):
444
+ logger.error(
445
+ "Failed to update frontmatter",
446
+ path=str(full_path),
447
+ error=str(e),
448
+ )
449
+ raise FileOperationError(f"Failed to update frontmatter: {e}")
450
+
451
+ async def compute_checksum(self, path: FilePath) -> str:
452
+ """Compute checksum for a file using true async I/O.
453
+
454
+ Uses aiofiles for non-blocking I/O with 64KB chunked reading.
455
+ Semaphore limits concurrent file operations to prevent OOM.
456
+ Memory usage is constant regardless of file size.
457
+
458
+ Args:
459
+ path: Path to the file (Path or string)
460
+
461
+ Returns:
462
+ SHA256 checksum hex string
463
+
464
+ Raises:
465
+ FileError: If checksum computation fails
466
+ """
467
+ # Convert string to Path if needed
468
+ path_obj = self.base_path / path if isinstance(path, str) else path
469
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
470
+
471
+ # Semaphore controls concurrency - max N files processed at once
472
+ async with self._file_semaphore:
473
+ try:
474
+ hasher = hashlib.sha256()
475
+ chunk_size = 65536 # 64KB chunks
476
+
477
+ # async I/O with aiofiles
478
+ async with aiofiles.open(full_path, mode="rb") as f:
479
+ while chunk := await f.read(chunk_size):
480
+ hasher.update(chunk)
481
+
482
+ return hasher.hexdigest()
483
+
484
+ except Exception as e: # pragma: no cover
485
+ logger.error("Failed to compute checksum", path=str(full_path), error=str(e))
486
+ raise FileError(f"Failed to compute checksum for {path}: {e}")
487
+
488
+ async def get_file_metadata(self, path: FilePath) -> FileMetadata:
489
+ """Return file metadata for a given path.
490
+
491
+ This method is async to support cloud implementations (S3FileService)
492
+ where file metadata requires async operations (head_object).
493
+
494
+ Args:
495
+ path: Path to the file (Path or string)
496
+
497
+ Returns:
498
+ FileMetadata with size, created_at, and modified_at
499
+ """
500
+ # Convert string to Path if needed
501
+ path_obj = self.base_path / path if isinstance(path, str) else path
502
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
503
+
504
+ # Run blocking stat() in thread pool to maintain async compatibility
505
+ loop = asyncio.get_event_loop()
506
+ stat_result = await loop.run_in_executor(None, full_path.stat)
507
+
508
+ return FileMetadata(
509
+ size=stat_result.st_size,
510
+ created_at=datetime.fromtimestamp(stat_result.st_ctime).astimezone(),
511
+ modified_at=datetime.fromtimestamp(stat_result.st_mtime).astimezone(),
512
+ )
513
+
514
+ def content_type(self, path: FilePath) -> str:
515
+ """Return content_type for a given path.
516
+
517
+ Args:
518
+ path: Path to the file (Path or string)
519
+
520
+ Returns:
521
+ MIME type of the file
522
+ """
523
+ # Convert string to Path if needed
524
+ path_obj = self.base_path / path if isinstance(path, str) else path
525
+ full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
526
+ # get file timestamps
527
+ mime_type, _ = mimetypes.guess_type(full_path.name)
528
+
529
+ # .canvas files are json
530
+ if full_path.suffix == ".canvas":
531
+ mime_type = "application/json"
532
+
533
+ content_type = mime_type or "text/plain"
534
+ return content_type
535
+
536
+ def is_markdown(self, path: FilePath) -> bool:
537
+ """Check if a file is a markdown file.
538
+
539
+ Args:
540
+ path: Path to the file (Path or string)
541
+
542
+ Returns:
543
+ True if the file is a markdown file, False otherwise
544
+ """
545
+ return self.content_type(path) == "text/markdown"