basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +5 -1
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +130 -20
- basic_memory/alembic/migrations.py +4 -9
- basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
- basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
- basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
- basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
- basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
- basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
- basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
- basic_memory/api/app.py +87 -20
- basic_memory/api/container.py +133 -0
- basic_memory/api/routers/__init__.py +4 -1
- basic_memory/api/routers/directory_router.py +84 -0
- basic_memory/api/routers/importer_router.py +152 -0
- basic_memory/api/routers/knowledge_router.py +180 -23
- basic_memory/api/routers/management_router.py +80 -0
- basic_memory/api/routers/memory_router.py +9 -64
- basic_memory/api/routers/project_router.py +460 -0
- basic_memory/api/routers/prompt_router.py +260 -0
- basic_memory/api/routers/resource_router.py +136 -11
- basic_memory/api/routers/search_router.py +5 -5
- basic_memory/api/routers/utils.py +169 -0
- basic_memory/api/template_loader.py +292 -0
- basic_memory/api/v2/__init__.py +35 -0
- basic_memory/api/v2/routers/__init__.py +21 -0
- basic_memory/api/v2/routers/directory_router.py +93 -0
- basic_memory/api/v2/routers/importer_router.py +181 -0
- basic_memory/api/v2/routers/knowledge_router.py +427 -0
- basic_memory/api/v2/routers/memory_router.py +130 -0
- basic_memory/api/v2/routers/project_router.py +359 -0
- basic_memory/api/v2/routers/prompt_router.py +269 -0
- basic_memory/api/v2/routers/resource_router.py +286 -0
- basic_memory/api/v2/routers/search_router.py +73 -0
- basic_memory/cli/app.py +80 -10
- basic_memory/cli/auth.py +300 -0
- basic_memory/cli/commands/__init__.py +15 -2
- basic_memory/cli/commands/cloud/__init__.py +6 -0
- basic_memory/cli/commands/cloud/api_client.py +127 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
- basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
- basic_memory/cli/commands/cloud/core_commands.py +195 -0
- basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
- basic_memory/cli/commands/cloud/rclone_config.py +110 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
- basic_memory/cli/commands/cloud/upload.py +240 -0
- basic_memory/cli/commands/cloud/upload_command.py +124 -0
- basic_memory/cli/commands/command_utils.py +99 -0
- basic_memory/cli/commands/db.py +87 -12
- basic_memory/cli/commands/format.py +198 -0
- basic_memory/cli/commands/import_chatgpt.py +47 -223
- basic_memory/cli/commands/import_claude_conversations.py +48 -171
- basic_memory/cli/commands/import_claude_projects.py +53 -160
- basic_memory/cli/commands/import_memory_json.py +55 -111
- basic_memory/cli/commands/mcp.py +67 -11
- basic_memory/cli/commands/project.py +889 -0
- basic_memory/cli/commands/status.py +52 -34
- basic_memory/cli/commands/telemetry.py +81 -0
- basic_memory/cli/commands/tool.py +341 -0
- basic_memory/cli/container.py +84 -0
- basic_memory/cli/main.py +14 -6
- basic_memory/config.py +580 -26
- basic_memory/db.py +285 -28
- basic_memory/deps/__init__.py +293 -0
- basic_memory/deps/config.py +26 -0
- basic_memory/deps/db.py +56 -0
- basic_memory/deps/importers.py +200 -0
- basic_memory/deps/projects.py +238 -0
- basic_memory/deps/repositories.py +179 -0
- basic_memory/deps/services.py +480 -0
- basic_memory/deps.py +16 -185
- basic_memory/file_utils.py +318 -54
- basic_memory/ignore_utils.py +297 -0
- basic_memory/importers/__init__.py +27 -0
- basic_memory/importers/base.py +100 -0
- basic_memory/importers/chatgpt_importer.py +245 -0
- basic_memory/importers/claude_conversations_importer.py +192 -0
- basic_memory/importers/claude_projects_importer.py +184 -0
- basic_memory/importers/memory_json_importer.py +128 -0
- basic_memory/importers/utils.py +61 -0
- basic_memory/markdown/entity_parser.py +182 -23
- basic_memory/markdown/markdown_processor.py +70 -7
- basic_memory/markdown/plugins.py +43 -23
- basic_memory/markdown/schemas.py +1 -1
- basic_memory/markdown/utils.py +38 -14
- basic_memory/mcp/async_client.py +135 -4
- basic_memory/mcp/clients/__init__.py +28 -0
- basic_memory/mcp/clients/directory.py +70 -0
- basic_memory/mcp/clients/knowledge.py +176 -0
- basic_memory/mcp/clients/memory.py +120 -0
- basic_memory/mcp/clients/project.py +89 -0
- basic_memory/mcp/clients/resource.py +71 -0
- basic_memory/mcp/clients/search.py +65 -0
- basic_memory/mcp/container.py +110 -0
- basic_memory/mcp/project_context.py +155 -0
- basic_memory/mcp/prompts/__init__.py +19 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
- basic_memory/mcp/prompts/continue_conversation.py +62 -0
- basic_memory/mcp/prompts/recent_activity.py +188 -0
- basic_memory/mcp/prompts/search.py +57 -0
- basic_memory/mcp/prompts/utils.py +162 -0
- basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
- basic_memory/mcp/resources/project_info.py +71 -0
- basic_memory/mcp/server.py +61 -9
- basic_memory/mcp/tools/__init__.py +33 -21
- basic_memory/mcp/tools/build_context.py +120 -0
- basic_memory/mcp/tools/canvas.py +152 -0
- basic_memory/mcp/tools/chatgpt_tools.py +190 -0
- basic_memory/mcp/tools/delete_note.py +249 -0
- basic_memory/mcp/tools/edit_note.py +325 -0
- basic_memory/mcp/tools/list_directory.py +157 -0
- basic_memory/mcp/tools/move_note.py +549 -0
- basic_memory/mcp/tools/project_management.py +204 -0
- basic_memory/mcp/tools/read_content.py +281 -0
- basic_memory/mcp/tools/read_note.py +265 -0
- basic_memory/mcp/tools/recent_activity.py +528 -0
- basic_memory/mcp/tools/search.py +377 -24
- basic_memory/mcp/tools/utils.py +402 -16
- basic_memory/mcp/tools/view_note.py +78 -0
- basic_memory/mcp/tools/write_note.py +230 -0
- basic_memory/models/__init__.py +3 -2
- basic_memory/models/knowledge.py +82 -17
- basic_memory/models/project.py +93 -0
- basic_memory/models/search.py +68 -8
- basic_memory/project_resolver.py +222 -0
- basic_memory/repository/__init__.py +2 -0
- basic_memory/repository/entity_repository.py +437 -8
- basic_memory/repository/observation_repository.py +36 -3
- basic_memory/repository/postgres_search_repository.py +451 -0
- basic_memory/repository/project_info_repository.py +10 -0
- basic_memory/repository/project_repository.py +140 -0
- basic_memory/repository/relation_repository.py +79 -4
- basic_memory/repository/repository.py +148 -29
- basic_memory/repository/search_index_row.py +95 -0
- basic_memory/repository/search_repository.py +79 -268
- basic_memory/repository/search_repository_base.py +241 -0
- basic_memory/repository/sqlite_search_repository.py +437 -0
- basic_memory/runtime.py +61 -0
- basic_memory/schemas/__init__.py +22 -9
- basic_memory/schemas/base.py +131 -12
- basic_memory/schemas/cloud.py +50 -0
- basic_memory/schemas/directory.py +31 -0
- basic_memory/schemas/importer.py +35 -0
- basic_memory/schemas/memory.py +194 -25
- basic_memory/schemas/project_info.py +213 -0
- basic_memory/schemas/prompt.py +90 -0
- basic_memory/schemas/request.py +56 -2
- basic_memory/schemas/response.py +85 -28
- basic_memory/schemas/search.py +36 -35
- basic_memory/schemas/sync_report.py +72 -0
- basic_memory/schemas/v2/__init__.py +27 -0
- basic_memory/schemas/v2/entity.py +133 -0
- basic_memory/schemas/v2/resource.py +47 -0
- basic_memory/services/__init__.py +2 -1
- basic_memory/services/context_service.py +451 -138
- basic_memory/services/directory_service.py +310 -0
- basic_memory/services/entity_service.py +636 -71
- basic_memory/services/exceptions.py +21 -0
- basic_memory/services/file_service.py +402 -33
- basic_memory/services/initialization.py +216 -0
- basic_memory/services/link_resolver.py +50 -56
- basic_memory/services/project_service.py +888 -0
- basic_memory/services/search_service.py +232 -37
- basic_memory/sync/__init__.py +4 -2
- basic_memory/sync/background_sync.py +26 -0
- basic_memory/sync/coordinator.py +160 -0
- basic_memory/sync/sync_service.py +1200 -109
- basic_memory/sync/watch_service.py +432 -135
- basic_memory/telemetry.py +249 -0
- basic_memory/templates/prompts/continue_conversation.hbs +110 -0
- basic_memory/templates/prompts/search.hbs +101 -0
- basic_memory/utils.py +407 -54
- basic_memory-0.17.4.dist-info/METADATA +617 -0
- basic_memory-0.17.4.dist-info/RECORD +193 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
- basic_memory/alembic/README +0 -1
- basic_memory/cli/commands/sync.py +0 -206
- basic_memory/cli/commands/tools.py +0 -157
- basic_memory/mcp/tools/knowledge.py +0 -68
- basic_memory/mcp/tools/memory.py +0 -170
- basic_memory/mcp/tools/notes.py +0 -202
- basic_memory/schemas/discovery.py +0 -28
- basic_memory/sync/file_change_scanner.py +0 -158
- basic_memory/sync/utils.py +0 -31
- basic_memory-0.7.0.dist-info/METADATA +0 -378
- basic_memory-0.7.0.dist-info/RECORD +0 -82
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,3 +14,24 @@ class EntityCreationError(Exception):
|
|
|
14
14
|
"""Raised when an entity cannot be created"""
|
|
15
15
|
|
|
16
16
|
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DirectoryOperationError(Exception):
|
|
20
|
+
"""Raised when directory operations fail"""
|
|
21
|
+
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SyncFatalError(Exception):
|
|
26
|
+
"""Raised when sync encounters a fatal error that prevents continuation.
|
|
27
|
+
|
|
28
|
+
Fatal errors include:
|
|
29
|
+
- Project deleted during sync (FOREIGN KEY constraint)
|
|
30
|
+
- Database corruption
|
|
31
|
+
- Critical system failures
|
|
32
|
+
|
|
33
|
+
When this exception is raised, the entire sync operation should be terminated
|
|
34
|
+
immediately rather than attempting to continue with remaining files.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
pass
|
|
@@ -1,25 +1,39 @@
|
|
|
1
1
|
"""Service for file operations with checksum tracking."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
4
|
+
import hashlib
|
|
5
|
+
import mimetypes
|
|
6
|
+
from datetime import datetime
|
|
3
7
|
from pathlib import Path
|
|
4
|
-
from typing import Tuple, Union
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
|
5
9
|
|
|
6
|
-
|
|
10
|
+
import aiofiles
|
|
11
|
+
|
|
12
|
+
import yaml
|
|
7
13
|
|
|
8
14
|
from basic_memory import file_utils
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
17
|
+
from basic_memory.config import BasicMemoryConfig
|
|
18
|
+
from basic_memory.file_utils import FileError, FileMetadata, ParseError
|
|
9
19
|
from basic_memory.markdown.markdown_processor import MarkdownProcessor
|
|
10
20
|
from basic_memory.models import Entity as EntityModel
|
|
11
21
|
from basic_memory.schemas import Entity as EntitySchema
|
|
12
22
|
from basic_memory.services.exceptions import FileOperationError
|
|
23
|
+
from basic_memory.utils import FilePath
|
|
24
|
+
from loguru import logger
|
|
13
25
|
|
|
14
26
|
|
|
15
27
|
class FileService:
|
|
16
|
-
"""Service for handling file operations.
|
|
28
|
+
"""Service for handling file operations with concurrency control.
|
|
17
29
|
|
|
18
30
|
All paths are handled as Path objects internally. Strings are converted to
|
|
19
31
|
Path objects when passed in. Relative paths are assumed to be relative to
|
|
20
32
|
base_path.
|
|
21
33
|
|
|
22
34
|
Features:
|
|
35
|
+
- True async I/O with aiofiles (non-blocking)
|
|
36
|
+
- Built-in concurrency limits (semaphore)
|
|
23
37
|
- Consistent file writing with checksums
|
|
24
38
|
- Frontmatter management
|
|
25
39
|
- Atomic operations
|
|
@@ -30,9 +44,15 @@ class FileService:
|
|
|
30
44
|
self,
|
|
31
45
|
base_path: Path,
|
|
32
46
|
markdown_processor: MarkdownProcessor,
|
|
47
|
+
max_concurrent_files: int = 10,
|
|
48
|
+
app_config: Optional["BasicMemoryConfig"] = None,
|
|
33
49
|
):
|
|
34
50
|
self.base_path = base_path.resolve() # Get absolute path
|
|
35
51
|
self.markdown_processor = markdown_processor
|
|
52
|
+
self.app_config = app_config
|
|
53
|
+
# Semaphore to limit concurrent file operations
|
|
54
|
+
# Prevents OOM on large projects by processing files in batches
|
|
55
|
+
self._file_semaphore = asyncio.Semaphore(max_concurrent_files)
|
|
36
56
|
|
|
37
57
|
def get_entity_path(self, entity: Union[EntityModel, EntitySchema]) -> Path:
|
|
38
58
|
"""Generate absolute filesystem path for entity.
|
|
@@ -57,7 +77,7 @@ class FileService:
|
|
|
57
77
|
Returns:
|
|
58
78
|
Raw content string without metadata sections
|
|
59
79
|
"""
|
|
60
|
-
logger.debug(f"Reading entity
|
|
80
|
+
logger.debug(f"Reading entity content, entity_id={entity.id}, permalink={entity.permalink}")
|
|
61
81
|
|
|
62
82
|
file_path = self.get_entity_path(entity)
|
|
63
83
|
markdown = await self.markdown_processor.read_file(file_path)
|
|
@@ -75,13 +95,13 @@ class FileService:
|
|
|
75
95
|
path = self.get_entity_path(entity)
|
|
76
96
|
await self.delete_file(path)
|
|
77
97
|
|
|
78
|
-
async def exists(self, path:
|
|
98
|
+
async def exists(self, path: FilePath) -> bool:
|
|
79
99
|
"""Check if file exists at the provided path.
|
|
80
100
|
|
|
81
101
|
If path is relative, it is assumed to be relative to base_path.
|
|
82
102
|
|
|
83
103
|
Args:
|
|
84
|
-
path: Path to check (Path
|
|
104
|
+
path: Path to check (Path or string)
|
|
85
105
|
|
|
86
106
|
Returns:
|
|
87
107
|
True if file exists, False otherwise
|
|
@@ -90,58 +110,183 @@ class FileService:
|
|
|
90
110
|
FileOperationError: If check fails
|
|
91
111
|
"""
|
|
92
112
|
try:
|
|
93
|
-
|
|
94
|
-
if path
|
|
95
|
-
|
|
113
|
+
# Convert string to Path if needed
|
|
114
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
115
|
+
logger.debug(f"Checking file existence: path={path_obj}")
|
|
116
|
+
if path_obj.is_absolute():
|
|
117
|
+
return path_obj.exists()
|
|
96
118
|
else:
|
|
97
|
-
return (self.base_path /
|
|
119
|
+
return (self.base_path / path_obj).exists()
|
|
98
120
|
except Exception as e:
|
|
99
|
-
logger.error(
|
|
121
|
+
logger.error("Failed to check file existence", path=str(path), error=str(e))
|
|
100
122
|
raise FileOperationError(f"Failed to check file existence: {e}")
|
|
101
123
|
|
|
102
|
-
async def
|
|
124
|
+
async def ensure_directory(self, path: FilePath) -> None:
|
|
125
|
+
"""Ensure directory exists, creating if necessary.
|
|
126
|
+
|
|
127
|
+
Uses semaphore to control concurrency for directory creation operations.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
path: Directory path to ensure (Path or string)
|
|
131
|
+
|
|
132
|
+
Raises:
|
|
133
|
+
FileOperationError: If directory creation fails
|
|
134
|
+
"""
|
|
135
|
+
try:
|
|
136
|
+
# Convert string to Path if needed
|
|
137
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
138
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
139
|
+
|
|
140
|
+
# Use semaphore for concurrency control
|
|
141
|
+
async with self._file_semaphore:
|
|
142
|
+
# Run blocking mkdir in thread pool
|
|
143
|
+
loop = asyncio.get_event_loop()
|
|
144
|
+
await loop.run_in_executor(
|
|
145
|
+
None, lambda: full_path.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
)
|
|
147
|
+
except Exception as e: # pragma: no cover
|
|
148
|
+
logger.error("Failed to create directory", path=str(path), error=str(e))
|
|
149
|
+
raise FileOperationError(f"Failed to create directory {path}: {e}")
|
|
150
|
+
|
|
151
|
+
async def write_file(self, path: FilePath, content: str) -> str:
|
|
103
152
|
"""Write content to file and return checksum.
|
|
104
153
|
|
|
105
154
|
Handles both absolute and relative paths. Relative paths are resolved
|
|
106
155
|
against base_path.
|
|
107
156
|
|
|
157
|
+
If format_on_save is enabled in config, runs the configured formatter
|
|
158
|
+
after writing and returns the checksum of the formatted content.
|
|
159
|
+
|
|
108
160
|
Args:
|
|
109
|
-
path: Where to write (Path
|
|
161
|
+
path: Where to write (Path or string)
|
|
110
162
|
content: Content to write
|
|
111
163
|
|
|
112
164
|
Returns:
|
|
113
|
-
Checksum of written content
|
|
165
|
+
Checksum of written content (or formatted content if formatting enabled)
|
|
114
166
|
|
|
115
167
|
Raises:
|
|
116
168
|
FileOperationError: If write fails
|
|
117
169
|
"""
|
|
118
|
-
|
|
119
|
-
|
|
170
|
+
# Convert string to Path if needed
|
|
171
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
172
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
120
173
|
|
|
121
174
|
try:
|
|
122
175
|
# Ensure parent directory exists
|
|
123
|
-
await
|
|
176
|
+
await self.ensure_directory(full_path.parent)
|
|
124
177
|
|
|
125
178
|
# Write content atomically
|
|
179
|
+
logger.info(
|
|
180
|
+
"Writing file: "
|
|
181
|
+
f"path={path_obj}, "
|
|
182
|
+
f"content_length={len(content)}, "
|
|
183
|
+
f"is_markdown={full_path.suffix.lower() == '.md'}"
|
|
184
|
+
)
|
|
185
|
+
|
|
126
186
|
await file_utils.write_file_atomic(full_path, content)
|
|
127
187
|
|
|
128
|
-
#
|
|
129
|
-
|
|
130
|
-
|
|
188
|
+
# Format file if configured
|
|
189
|
+
final_content = content
|
|
190
|
+
if self.app_config:
|
|
191
|
+
formatted_content = await file_utils.format_file(
|
|
192
|
+
full_path, self.app_config, is_markdown=self.is_markdown(path)
|
|
193
|
+
)
|
|
194
|
+
if formatted_content is not None:
|
|
195
|
+
final_content = formatted_content # pragma: no cover
|
|
196
|
+
|
|
197
|
+
# Compute and return checksum of final content
|
|
198
|
+
checksum = await file_utils.compute_checksum(final_content)
|
|
199
|
+
logger.debug(f"File write completed path={full_path}, {checksum=}")
|
|
131
200
|
return checksum
|
|
132
201
|
|
|
133
202
|
except Exception as e:
|
|
134
|
-
logger.
|
|
203
|
+
logger.exception("File write error", path=str(full_path), error=str(e))
|
|
135
204
|
raise FileOperationError(f"Failed to write file: {e}")
|
|
136
205
|
|
|
137
|
-
async def
|
|
138
|
-
"""Read file
|
|
206
|
+
async def read_file_content(self, path: FilePath) -> str:
|
|
207
|
+
"""Read file content using true async I/O with aiofiles.
|
|
139
208
|
|
|
140
209
|
Handles both absolute and relative paths. Relative paths are resolved
|
|
141
210
|
against base_path.
|
|
142
211
|
|
|
143
212
|
Args:
|
|
144
|
-
path: Path to read (Path
|
|
213
|
+
path: Path to read (Path or string)
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
File content as string
|
|
217
|
+
|
|
218
|
+
Raises:
|
|
219
|
+
FileOperationError: If read fails
|
|
220
|
+
"""
|
|
221
|
+
# Convert string to Path if needed
|
|
222
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
223
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
logger.debug("Reading file content", operation="read_file_content", path=str(full_path))
|
|
227
|
+
async with aiofiles.open(full_path, mode="r", encoding="utf-8") as f:
|
|
228
|
+
content = await f.read()
|
|
229
|
+
|
|
230
|
+
logger.debug(
|
|
231
|
+
"File read completed",
|
|
232
|
+
path=str(full_path),
|
|
233
|
+
content_length=len(content),
|
|
234
|
+
)
|
|
235
|
+
return content
|
|
236
|
+
|
|
237
|
+
except FileNotFoundError:
|
|
238
|
+
# Preserve FileNotFoundError so callers (e.g. sync) can treat it as deletion.
|
|
239
|
+
logger.warning("File not found", operation="read_file_content", path=str(full_path))
|
|
240
|
+
raise
|
|
241
|
+
except Exception as e:
|
|
242
|
+
logger.exception("File read error", path=str(full_path), error=str(e))
|
|
243
|
+
raise FileOperationError(f"Failed to read file: {e}")
|
|
244
|
+
|
|
245
|
+
async def read_file_bytes(self, path: FilePath) -> bytes:
|
|
246
|
+
"""Read file content as bytes using true async I/O with aiofiles.
|
|
247
|
+
|
|
248
|
+
This method reads files in binary mode, suitable for non-text files
|
|
249
|
+
like images, PDFs, etc. For cloud compatibility with S3FileService.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
path: Path to read (Path or string)
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
File content as bytes
|
|
256
|
+
|
|
257
|
+
Raises:
|
|
258
|
+
FileOperationError: If read fails
|
|
259
|
+
"""
|
|
260
|
+
# Convert string to Path if needed
|
|
261
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
262
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
logger.debug("Reading file bytes", operation="read_file_bytes", path=str(full_path))
|
|
266
|
+
async with aiofiles.open(full_path, mode="rb") as f:
|
|
267
|
+
content = await f.read()
|
|
268
|
+
|
|
269
|
+
logger.debug(
|
|
270
|
+
"File read completed",
|
|
271
|
+
path=str(full_path),
|
|
272
|
+
content_length=len(content),
|
|
273
|
+
)
|
|
274
|
+
return content
|
|
275
|
+
|
|
276
|
+
except Exception as e:
|
|
277
|
+
logger.exception("File read error", path=str(full_path), error=str(e))
|
|
278
|
+
raise FileOperationError(f"Failed to read file: {e}")
|
|
279
|
+
|
|
280
|
+
async def read_file(self, path: FilePath) -> Tuple[str, str]:
|
|
281
|
+
"""Read file and compute checksum using true async I/O.
|
|
282
|
+
|
|
283
|
+
Uses aiofiles for non-blocking file reads.
|
|
284
|
+
|
|
285
|
+
Handles both absolute and relative paths. Relative paths are resolved
|
|
286
|
+
against base_path.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
path: Path to read (Path or string)
|
|
145
290
|
|
|
146
291
|
Returns:
|
|
147
292
|
Tuple of (content, checksum)
|
|
@@ -149,28 +294,252 @@ class FileService:
|
|
|
149
294
|
Raises:
|
|
150
295
|
FileOperationError: If read fails
|
|
151
296
|
"""
|
|
152
|
-
|
|
153
|
-
|
|
297
|
+
# Convert string to Path if needed
|
|
298
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
299
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
154
300
|
|
|
155
301
|
try:
|
|
156
|
-
|
|
302
|
+
logger.debug("Reading file", operation="read_file", path=str(full_path))
|
|
303
|
+
|
|
304
|
+
# Use aiofiles for non-blocking read
|
|
305
|
+
async with aiofiles.open(full_path, mode="r", encoding="utf-8") as f:
|
|
306
|
+
content = await f.read()
|
|
307
|
+
|
|
157
308
|
checksum = await file_utils.compute_checksum(content)
|
|
158
|
-
|
|
309
|
+
|
|
310
|
+
logger.debug(
|
|
311
|
+
"File read completed",
|
|
312
|
+
path=str(full_path),
|
|
313
|
+
checksum=checksum,
|
|
314
|
+
content_length=len(content),
|
|
315
|
+
)
|
|
159
316
|
return content, checksum
|
|
160
317
|
|
|
161
318
|
except Exception as e:
|
|
162
|
-
logger.
|
|
319
|
+
logger.exception("File read error", path=str(full_path), error=str(e))
|
|
163
320
|
raise FileOperationError(f"Failed to read file: {e}")
|
|
164
321
|
|
|
165
|
-
async def delete_file(self, path:
|
|
322
|
+
async def delete_file(self, path: FilePath) -> None:
|
|
166
323
|
"""Delete file if it exists.
|
|
167
324
|
|
|
168
325
|
Handles both absolute and relative paths. Relative paths are resolved
|
|
169
326
|
against base_path.
|
|
170
327
|
|
|
171
328
|
Args:
|
|
172
|
-
path: Path to delete (Path
|
|
329
|
+
path: Path to delete (Path or string)
|
|
173
330
|
"""
|
|
174
|
-
|
|
175
|
-
|
|
331
|
+
# Convert string to Path if needed
|
|
332
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
333
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
176
334
|
full_path.unlink(missing_ok=True)
|
|
335
|
+
|
|
336
|
+
async def move_file(self, source: FilePath, destination: FilePath) -> None:
|
|
337
|
+
"""Move/rename a file from source to destination.
|
|
338
|
+
|
|
339
|
+
This method abstracts the underlying storage (filesystem vs cloud).
|
|
340
|
+
Default implementation uses atomic filesystem rename, but cloud-backed
|
|
341
|
+
implementations (e.g., S3) can override to copy+delete.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
source: Source path (relative to base_path or absolute)
|
|
345
|
+
destination: Destination path (relative to base_path or absolute)
|
|
346
|
+
|
|
347
|
+
Raises:
|
|
348
|
+
FileOperationError: If the move fails
|
|
349
|
+
"""
|
|
350
|
+
# Convert strings to Paths and resolve relative paths against base_path
|
|
351
|
+
src_obj = self.base_path / source if isinstance(source, str) else source
|
|
352
|
+
dst_obj = self.base_path / destination if isinstance(destination, str) else destination
|
|
353
|
+
src_full = src_obj if src_obj.is_absolute() else self.base_path / src_obj
|
|
354
|
+
dst_full = dst_obj if dst_obj.is_absolute() else self.base_path / dst_obj
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
# Ensure destination directory exists
|
|
358
|
+
await self.ensure_directory(dst_full.parent)
|
|
359
|
+
|
|
360
|
+
# Use semaphore for concurrency control and run blocking rename in executor
|
|
361
|
+
async with self._file_semaphore:
|
|
362
|
+
loop = asyncio.get_event_loop()
|
|
363
|
+
await loop.run_in_executor(None, lambda: src_full.rename(dst_full))
|
|
364
|
+
except Exception as e: # pragma: no cover
|
|
365
|
+
logger.exception(
|
|
366
|
+
"File move error",
|
|
367
|
+
source=str(src_full),
|
|
368
|
+
destination=str(dst_full),
|
|
369
|
+
error=str(e),
|
|
370
|
+
)
|
|
371
|
+
raise FileOperationError(f"Failed to move file {source} -> {destination}: {e}")
|
|
372
|
+
|
|
373
|
+
async def update_frontmatter(self, path: FilePath, updates: Dict[str, Any]) -> str:
|
|
374
|
+
"""Update frontmatter fields in a file while preserving all content.
|
|
375
|
+
|
|
376
|
+
Only modifies the frontmatter section, leaving all content untouched.
|
|
377
|
+
Creates frontmatter section if none exists.
|
|
378
|
+
Returns checksum of updated file.
|
|
379
|
+
|
|
380
|
+
Uses aiofiles for true async I/O (non-blocking).
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
path: Path to markdown file (Path or string)
|
|
384
|
+
updates: Dict of frontmatter fields to update
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
Checksum of updated file
|
|
388
|
+
|
|
389
|
+
Raises:
|
|
390
|
+
FileOperationError: If file operations fail
|
|
391
|
+
ParseError: If frontmatter parsing fails
|
|
392
|
+
"""
|
|
393
|
+
# Convert string to Path if needed
|
|
394
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
395
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
396
|
+
|
|
397
|
+
try:
|
|
398
|
+
# Read current content using aiofiles
|
|
399
|
+
async with aiofiles.open(full_path, mode="r", encoding="utf-8") as f:
|
|
400
|
+
content = await f.read()
|
|
401
|
+
|
|
402
|
+
# Parse current frontmatter with proper error handling for malformed YAML
|
|
403
|
+
current_fm = {}
|
|
404
|
+
if file_utils.has_frontmatter(content):
|
|
405
|
+
try:
|
|
406
|
+
current_fm = file_utils.parse_frontmatter(content)
|
|
407
|
+
content = file_utils.remove_frontmatter(content)
|
|
408
|
+
except (ParseError, yaml.YAMLError) as e: # pragma: no cover
|
|
409
|
+
# Log warning and treat as plain markdown without frontmatter
|
|
410
|
+
logger.warning( # pragma: no cover
|
|
411
|
+
f"Failed to parse YAML frontmatter in {full_path}: {e}. "
|
|
412
|
+
"Treating file as plain markdown without frontmatter."
|
|
413
|
+
)
|
|
414
|
+
# Keep full content, treat as having no frontmatter
|
|
415
|
+
current_fm = {} # pragma: no cover
|
|
416
|
+
|
|
417
|
+
# Update frontmatter
|
|
418
|
+
new_fm = {**current_fm, **updates}
|
|
419
|
+
|
|
420
|
+
# Write new file with updated frontmatter
|
|
421
|
+
yaml_fm = yaml.dump(new_fm, sort_keys=False, allow_unicode=True)
|
|
422
|
+
final_content = f"---\n{yaml_fm}---\n\n{content.strip()}"
|
|
423
|
+
|
|
424
|
+
logger.debug(
|
|
425
|
+
"Updating frontmatter", path=str(full_path), update_keys=list(updates.keys())
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
await file_utils.write_file_atomic(full_path, final_content)
|
|
429
|
+
|
|
430
|
+
# Format file if configured
|
|
431
|
+
content_for_checksum = final_content
|
|
432
|
+
if self.app_config:
|
|
433
|
+
formatted_content = await file_utils.format_file(
|
|
434
|
+
full_path, self.app_config, is_markdown=self.is_markdown(path)
|
|
435
|
+
)
|
|
436
|
+
if formatted_content is not None:
|
|
437
|
+
content_for_checksum = formatted_content # pragma: no cover
|
|
438
|
+
|
|
439
|
+
return await file_utils.compute_checksum(content_for_checksum)
|
|
440
|
+
|
|
441
|
+
except Exception as e: # pragma: no cover
|
|
442
|
+
# Only log real errors (not YAML parsing, which is handled above)
|
|
443
|
+
if not isinstance(e, (ParseError, yaml.YAMLError)):
|
|
444
|
+
logger.error(
|
|
445
|
+
"Failed to update frontmatter",
|
|
446
|
+
path=str(full_path),
|
|
447
|
+
error=str(e),
|
|
448
|
+
)
|
|
449
|
+
raise FileOperationError(f"Failed to update frontmatter: {e}")
|
|
450
|
+
|
|
451
|
+
async def compute_checksum(self, path: FilePath) -> str:
|
|
452
|
+
"""Compute checksum for a file using true async I/O.
|
|
453
|
+
|
|
454
|
+
Uses aiofiles for non-blocking I/O with 64KB chunked reading.
|
|
455
|
+
Semaphore limits concurrent file operations to prevent OOM.
|
|
456
|
+
Memory usage is constant regardless of file size.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
path: Path to the file (Path or string)
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
SHA256 checksum hex string
|
|
463
|
+
|
|
464
|
+
Raises:
|
|
465
|
+
FileError: If checksum computation fails
|
|
466
|
+
"""
|
|
467
|
+
# Convert string to Path if needed
|
|
468
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
469
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
470
|
+
|
|
471
|
+
# Semaphore controls concurrency - max N files processed at once
|
|
472
|
+
async with self._file_semaphore:
|
|
473
|
+
try:
|
|
474
|
+
hasher = hashlib.sha256()
|
|
475
|
+
chunk_size = 65536 # 64KB chunks
|
|
476
|
+
|
|
477
|
+
# async I/O with aiofiles
|
|
478
|
+
async with aiofiles.open(full_path, mode="rb") as f:
|
|
479
|
+
while chunk := await f.read(chunk_size):
|
|
480
|
+
hasher.update(chunk)
|
|
481
|
+
|
|
482
|
+
return hasher.hexdigest()
|
|
483
|
+
|
|
484
|
+
except Exception as e: # pragma: no cover
|
|
485
|
+
logger.error("Failed to compute checksum", path=str(full_path), error=str(e))
|
|
486
|
+
raise FileError(f"Failed to compute checksum for {path}: {e}")
|
|
487
|
+
|
|
488
|
+
async def get_file_metadata(self, path: FilePath) -> FileMetadata:
|
|
489
|
+
"""Return file metadata for a given path.
|
|
490
|
+
|
|
491
|
+
This method is async to support cloud implementations (S3FileService)
|
|
492
|
+
where file metadata requires async operations (head_object).
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
path: Path to the file (Path or string)
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
FileMetadata with size, created_at, and modified_at
|
|
499
|
+
"""
|
|
500
|
+
# Convert string to Path if needed
|
|
501
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
502
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
503
|
+
|
|
504
|
+
# Run blocking stat() in thread pool to maintain async compatibility
|
|
505
|
+
loop = asyncio.get_event_loop()
|
|
506
|
+
stat_result = await loop.run_in_executor(None, full_path.stat)
|
|
507
|
+
|
|
508
|
+
return FileMetadata(
|
|
509
|
+
size=stat_result.st_size,
|
|
510
|
+
created_at=datetime.fromtimestamp(stat_result.st_ctime).astimezone(),
|
|
511
|
+
modified_at=datetime.fromtimestamp(stat_result.st_mtime).astimezone(),
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
def content_type(self, path: FilePath) -> str:
|
|
515
|
+
"""Return content_type for a given path.
|
|
516
|
+
|
|
517
|
+
Args:
|
|
518
|
+
path: Path to the file (Path or string)
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
MIME type of the file
|
|
522
|
+
"""
|
|
523
|
+
# Convert string to Path if needed
|
|
524
|
+
path_obj = self.base_path / path if isinstance(path, str) else path
|
|
525
|
+
full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj
|
|
526
|
+
# get file timestamps
|
|
527
|
+
mime_type, _ = mimetypes.guess_type(full_path.name)
|
|
528
|
+
|
|
529
|
+
# .canvas files are json
|
|
530
|
+
if full_path.suffix == ".canvas":
|
|
531
|
+
mime_type = "application/json"
|
|
532
|
+
|
|
533
|
+
content_type = mime_type or "text/plain"
|
|
534
|
+
return content_type
|
|
535
|
+
|
|
536
|
+
def is_markdown(self, path: FilePath) -> bool:
|
|
537
|
+
"""Check if a file is a markdown file.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
path: Path to the file (Path or string)
|
|
541
|
+
|
|
542
|
+
Returns:
|
|
543
|
+
True if the file is a markdown file, False otherwise
|
|
544
|
+
"""
|
|
545
|
+
return self.content_type(path) == "text/markdown"
|