basic-memory 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +1 -1
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +23 -1
- basic_memory/alembic/migrations.py +4 -9
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +106 -0
- basic_memory/api/app.py +9 -10
- basic_memory/api/routers/__init__.py +2 -1
- basic_memory/api/routers/knowledge_router.py +31 -5
- basic_memory/api/routers/memory_router.py +18 -17
- basic_memory/api/routers/project_info_router.py +275 -0
- basic_memory/api/routers/resource_router.py +105 -4
- basic_memory/api/routers/search_router.py +22 -4
- basic_memory/cli/app.py +54 -5
- basic_memory/cli/commands/__init__.py +15 -2
- basic_memory/cli/commands/db.py +9 -13
- basic_memory/cli/commands/import_chatgpt.py +26 -30
- basic_memory/cli/commands/import_claude_conversations.py +27 -29
- basic_memory/cli/commands/import_claude_projects.py +29 -31
- basic_memory/cli/commands/import_memory_json.py +26 -28
- basic_memory/cli/commands/mcp.py +7 -1
- basic_memory/cli/commands/project.py +119 -0
- basic_memory/cli/commands/project_info.py +167 -0
- basic_memory/cli/commands/status.py +14 -28
- basic_memory/cli/commands/sync.py +63 -22
- basic_memory/cli/commands/tool.py +253 -0
- basic_memory/cli/main.py +39 -1
- basic_memory/config.py +166 -4
- basic_memory/db.py +19 -4
- basic_memory/deps.py +10 -3
- basic_memory/file_utils.py +37 -19
- basic_memory/markdown/entity_parser.py +3 -3
- basic_memory/markdown/utils.py +5 -0
- basic_memory/mcp/async_client.py +1 -1
- basic_memory/mcp/main.py +24 -0
- basic_memory/mcp/prompts/__init__.py +19 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +26 -0
- basic_memory/mcp/prompts/continue_conversation.py +111 -0
- basic_memory/mcp/prompts/recent_activity.py +88 -0
- basic_memory/mcp/prompts/search.py +182 -0
- basic_memory/mcp/prompts/utils.py +155 -0
- basic_memory/mcp/server.py +2 -6
- basic_memory/mcp/tools/__init__.py +12 -21
- basic_memory/mcp/tools/build_context.py +85 -0
- basic_memory/mcp/tools/canvas.py +97 -0
- basic_memory/mcp/tools/delete_note.py +28 -0
- basic_memory/mcp/tools/project_info.py +51 -0
- basic_memory/mcp/tools/read_content.py +229 -0
- basic_memory/mcp/tools/read_note.py +190 -0
- basic_memory/mcp/tools/recent_activity.py +100 -0
- basic_memory/mcp/tools/search.py +56 -17
- basic_memory/mcp/tools/utils.py +245 -16
- basic_memory/mcp/tools/write_note.py +124 -0
- basic_memory/models/knowledge.py +27 -11
- basic_memory/models/search.py +2 -1
- basic_memory/repository/entity_repository.py +3 -2
- basic_memory/repository/project_info_repository.py +9 -0
- basic_memory/repository/repository.py +24 -7
- basic_memory/repository/search_repository.py +47 -14
- basic_memory/schemas/__init__.py +10 -9
- basic_memory/schemas/base.py +4 -1
- basic_memory/schemas/memory.py +14 -4
- basic_memory/schemas/project_info.py +96 -0
- basic_memory/schemas/search.py +29 -33
- basic_memory/services/context_service.py +3 -3
- basic_memory/services/entity_service.py +26 -13
- basic_memory/services/file_service.py +145 -26
- basic_memory/services/link_resolver.py +9 -46
- basic_memory/services/search_service.py +95 -22
- basic_memory/sync/__init__.py +3 -2
- basic_memory/sync/sync_service.py +523 -117
- basic_memory/sync/watch_service.py +258 -132
- basic_memory/utils.py +51 -36
- basic_memory-0.9.0.dist-info/METADATA +736 -0
- basic_memory-0.9.0.dist-info/RECORD +99 -0
- basic_memory/alembic/README +0 -1
- basic_memory/cli/commands/tools.py +0 -157
- basic_memory/mcp/tools/knowledge.py +0 -68
- basic_memory/mcp/tools/memory.py +0 -170
- basic_memory/mcp/tools/notes.py +0 -202
- basic_memory/schemas/discovery.py +0 -28
- basic_memory/sync/file_change_scanner.py +0 -158
- basic_memory/sync/utils.py +0 -31
- basic_memory-0.7.0.dist-info/METADATA +0 -378
- basic_memory-0.7.0.dist-info/RECORD +0 -82
- {basic_memory-0.7.0.dist-info → basic_memory-0.9.0.dist-info}/WHEEL +0 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.9.0.dist-info}/entry_points.txt +0 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.9.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,51 +1,444 @@
|
|
|
1
1
|
"""Service for syncing files between filesystem and database."""
|
|
2
2
|
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from dataclasses import field
|
|
7
|
+
from datetime import datetime
|
|
3
8
|
from pathlib import Path
|
|
4
|
-
from typing import Dict
|
|
9
|
+
from typing import Dict, Optional, Set, Tuple
|
|
5
10
|
|
|
6
|
-
import logfire
|
|
7
11
|
from loguru import logger
|
|
8
12
|
from sqlalchemy.exc import IntegrityError
|
|
9
13
|
|
|
10
|
-
from basic_memory import
|
|
11
|
-
from basic_memory.
|
|
14
|
+
from basic_memory.markdown import EntityParser
|
|
15
|
+
from basic_memory.models import Entity
|
|
12
16
|
from basic_memory.repository import EntityRepository, RelationRepository
|
|
13
|
-
from basic_memory.services import EntityService
|
|
17
|
+
from basic_memory.services import EntityService, FileService
|
|
14
18
|
from basic_memory.services.search_service import SearchService
|
|
15
|
-
|
|
16
|
-
from
|
|
19
|
+
import time
|
|
20
|
+
from rich.progress import Progress, TextColumn, BarColumn, TaskProgressColumn
|
|
17
21
|
|
|
18
22
|
|
|
19
|
-
|
|
20
|
-
|
|
23
|
+
@dataclass
|
|
24
|
+
class SyncReport:
|
|
25
|
+
"""Report of file changes found compared to database state.
|
|
21
26
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
Attributes:
|
|
28
|
+
total: Total number of files in directory being synced
|
|
29
|
+
new: Files that exist on disk but not in database
|
|
30
|
+
modified: Files that exist in both but have different checksums
|
|
31
|
+
deleted: Files that exist in database but not on disk
|
|
32
|
+
moves: Files that have been moved from one location to another
|
|
33
|
+
checksums: Current checksums for files on disk
|
|
25
34
|
"""
|
|
26
35
|
|
|
36
|
+
# We keep paths as strings in sets/dicts for easier serialization
|
|
37
|
+
new: Set[str] = field(default_factory=set)
|
|
38
|
+
modified: Set[str] = field(default_factory=set)
|
|
39
|
+
deleted: Set[str] = field(default_factory=set)
|
|
40
|
+
moves: Dict[str, str] = field(default_factory=dict) # old_path -> new_path
|
|
41
|
+
checksums: Dict[str, str] = field(default_factory=dict) # path -> checksum
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def total(self) -> int:
|
|
45
|
+
"""Total number of changes."""
|
|
46
|
+
return len(self.new) + len(self.modified) + len(self.deleted) + len(self.moves)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class ScanResult:
|
|
51
|
+
"""Result of scanning a directory."""
|
|
52
|
+
|
|
53
|
+
# file_path -> checksum
|
|
54
|
+
files: Dict[str, str] = field(default_factory=dict)
|
|
55
|
+
|
|
56
|
+
# checksum -> file_path
|
|
57
|
+
checksums: Dict[str, str] = field(default_factory=dict)
|
|
58
|
+
|
|
59
|
+
# file_path -> error message
|
|
60
|
+
errors: Dict[str, str] = field(default_factory=dict)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class SyncService:
|
|
64
|
+
"""Syncs documents and knowledge files with database."""
|
|
65
|
+
|
|
27
66
|
def __init__(
|
|
28
67
|
self,
|
|
29
|
-
scanner: FileChangeScanner,
|
|
30
68
|
entity_service: EntityService,
|
|
31
69
|
entity_parser: EntityParser,
|
|
32
70
|
entity_repository: EntityRepository,
|
|
33
71
|
relation_repository: RelationRepository,
|
|
34
72
|
search_service: SearchService,
|
|
73
|
+
file_service: FileService,
|
|
35
74
|
):
|
|
36
|
-
self.scanner = scanner
|
|
37
75
|
self.entity_service = entity_service
|
|
38
76
|
self.entity_parser = entity_parser
|
|
39
77
|
self.entity_repository = entity_repository
|
|
40
78
|
self.relation_repository = relation_repository
|
|
41
79
|
self.search_service = search_service
|
|
80
|
+
self.file_service = file_service
|
|
81
|
+
|
|
82
|
+
async def sync(self, directory: Path, show_progress: bool = True) -> SyncReport:
|
|
83
|
+
"""Sync all files with database."""
|
|
84
|
+
|
|
85
|
+
start_time = time.time()
|
|
86
|
+
console = None
|
|
87
|
+
progress = None # Will be initialized if show_progress is True
|
|
88
|
+
|
|
89
|
+
logger.info("Sync operation started", directory=str(directory))
|
|
90
|
+
|
|
91
|
+
# initial paths from db to sync
|
|
92
|
+
# path -> checksum
|
|
93
|
+
if show_progress:
|
|
94
|
+
from rich.console import Console
|
|
95
|
+
|
|
96
|
+
console = Console()
|
|
97
|
+
console.print(f"Scanning directory: {directory}")
|
|
98
|
+
|
|
99
|
+
report = await self.scan(directory)
|
|
100
|
+
|
|
101
|
+
# Initialize progress tracking if requested
|
|
102
|
+
if show_progress and report.total > 0:
|
|
103
|
+
progress = Progress(
|
|
104
|
+
TextColumn("[bold blue]{task.description}"),
|
|
105
|
+
BarColumn(),
|
|
106
|
+
TaskProgressColumn(),
|
|
107
|
+
console=console,
|
|
108
|
+
expand=True,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# order of sync matters to resolve relations effectively
|
|
112
|
+
logger.info(
|
|
113
|
+
"Sync changes detected",
|
|
114
|
+
new_files=len(report.new),
|
|
115
|
+
modified_files=len(report.modified),
|
|
116
|
+
deleted_files=len(report.deleted),
|
|
117
|
+
moved_files=len(report.moves),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if show_progress and report.total > 0:
|
|
121
|
+
with progress: # pyright: ignore
|
|
122
|
+
# Track each category separately
|
|
123
|
+
move_task = None
|
|
124
|
+
if report.moves: # pragma: no cover
|
|
125
|
+
move_task = progress.add_task("[blue]Moving files...", total=len(report.moves)) # pyright: ignore
|
|
126
|
+
|
|
127
|
+
delete_task = None
|
|
128
|
+
if report.deleted: # pragma: no cover
|
|
129
|
+
delete_task = progress.add_task( # pyright: ignore
|
|
130
|
+
"[red]Deleting files...", total=len(report.deleted)
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
new_task = None
|
|
134
|
+
if report.new:
|
|
135
|
+
new_task = progress.add_task( # pyright: ignore
|
|
136
|
+
"[green]Adding new files...", total=len(report.new)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
modify_task = None
|
|
140
|
+
if report.modified: # pragma: no cover
|
|
141
|
+
modify_task = progress.add_task( # pyright: ignore
|
|
142
|
+
"[yellow]Updating modified files...", total=len(report.modified)
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# sync moves first
|
|
146
|
+
for i, (old_path, new_path) in enumerate(report.moves.items()):
|
|
147
|
+
# in the case where a file has been deleted and replaced by another file
|
|
148
|
+
# it will show up in the move and modified lists, so handle it in modified
|
|
149
|
+
if new_path in report.modified: # pragma: no cover
|
|
150
|
+
report.modified.remove(new_path)
|
|
151
|
+
logger.debug(
|
|
152
|
+
"File marked as moved and modified",
|
|
153
|
+
old_path=old_path,
|
|
154
|
+
new_path=new_path,
|
|
155
|
+
action="processing as modified",
|
|
156
|
+
)
|
|
157
|
+
else: # pragma: no cover
|
|
158
|
+
await self.handle_move(old_path, new_path)
|
|
159
|
+
|
|
160
|
+
if move_task is not None: # pragma: no cover
|
|
161
|
+
progress.update(move_task, advance=1) # pyright: ignore
|
|
162
|
+
|
|
163
|
+
# deleted next
|
|
164
|
+
for i, path in enumerate(report.deleted): # pragma: no cover
|
|
165
|
+
await self.handle_delete(path)
|
|
166
|
+
if delete_task is not None: # pragma: no cover
|
|
167
|
+
progress.update(delete_task, advance=1) # pyright: ignore
|
|
168
|
+
|
|
169
|
+
# then new and modified
|
|
170
|
+
for i, path in enumerate(report.new):
|
|
171
|
+
await self.sync_file(path, new=True)
|
|
172
|
+
if new_task is not None:
|
|
173
|
+
progress.update(new_task, advance=1) # pyright: ignore
|
|
174
|
+
|
|
175
|
+
for i, path in enumerate(report.modified): # pragma: no cover
|
|
176
|
+
await self.sync_file(path, new=False)
|
|
177
|
+
if modify_task is not None: # pragma: no cover
|
|
178
|
+
progress.update(modify_task, advance=1) # pyright: ignore
|
|
179
|
+
|
|
180
|
+
# Final step - resolving relations
|
|
181
|
+
if report.total > 0:
|
|
182
|
+
relation_task = progress.add_task("[cyan]Resolving relations...", total=1) # pyright: ignore
|
|
183
|
+
await self.resolve_relations()
|
|
184
|
+
progress.update(relation_task, advance=1) # pyright: ignore
|
|
185
|
+
else:
|
|
186
|
+
# No progress display - proceed with normal sync
|
|
187
|
+
# sync moves first
|
|
188
|
+
for old_path, new_path in report.moves.items():
|
|
189
|
+
# in the case where a file has been deleted and replaced by another file
|
|
190
|
+
# it will show up in the move and modified lists, so handle it in modified
|
|
191
|
+
if new_path in report.modified:
|
|
192
|
+
report.modified.remove(new_path)
|
|
193
|
+
logger.debug(
|
|
194
|
+
"File marked as moved and modified",
|
|
195
|
+
old_path=old_path,
|
|
196
|
+
new_path=new_path,
|
|
197
|
+
action="processing as modified",
|
|
198
|
+
)
|
|
199
|
+
else:
|
|
200
|
+
await self.handle_move(old_path, new_path)
|
|
201
|
+
|
|
202
|
+
# deleted next
|
|
203
|
+
for path in report.deleted:
|
|
204
|
+
await self.handle_delete(path)
|
|
205
|
+
|
|
206
|
+
# then new and modified
|
|
207
|
+
for path in report.new:
|
|
208
|
+
await self.sync_file(path, new=True)
|
|
209
|
+
|
|
210
|
+
for path in report.modified:
|
|
211
|
+
await self.sync_file(path, new=False)
|
|
212
|
+
|
|
213
|
+
await self.resolve_relations()
|
|
214
|
+
|
|
215
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
|
216
|
+
logger.info(
|
|
217
|
+
"Sync operation completed",
|
|
218
|
+
directory=str(directory),
|
|
219
|
+
total_changes=report.total,
|
|
220
|
+
duration_ms=duration_ms,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
return report
|
|
224
|
+
|
|
225
|
+
async def scan(self, directory):
|
|
226
|
+
"""Scan directory for changes compared to database state."""
|
|
227
|
+
|
|
228
|
+
db_paths = await self.get_db_file_state()
|
|
229
|
+
|
|
230
|
+
# Track potentially moved files by checksum
|
|
231
|
+
scan_result = await self.scan_directory(directory)
|
|
232
|
+
report = SyncReport()
|
|
233
|
+
|
|
234
|
+
# First find potential new files and record checksums
|
|
235
|
+
# if a path is not present in the db, it could be new or could be the destination of a move
|
|
236
|
+
for file_path, checksum in scan_result.files.items():
|
|
237
|
+
if file_path not in db_paths:
|
|
238
|
+
report.new.add(file_path)
|
|
239
|
+
report.checksums[file_path] = checksum
|
|
240
|
+
|
|
241
|
+
# Now detect moves and deletions
|
|
242
|
+
for db_path, db_checksum in db_paths.items():
|
|
243
|
+
local_checksum_for_db_path = scan_result.files.get(db_path)
|
|
244
|
+
|
|
245
|
+
# file not modified
|
|
246
|
+
if db_checksum == local_checksum_for_db_path:
|
|
247
|
+
pass
|
|
248
|
+
|
|
249
|
+
# if checksums don't match for the same path, its modified
|
|
250
|
+
if local_checksum_for_db_path and db_checksum != local_checksum_for_db_path:
|
|
251
|
+
report.modified.add(db_path)
|
|
252
|
+
report.checksums[db_path] = local_checksum_for_db_path
|
|
253
|
+
|
|
254
|
+
# check if it's moved or deleted
|
|
255
|
+
if not local_checksum_for_db_path:
|
|
256
|
+
# if we find the checksum in another file, it's a move
|
|
257
|
+
if db_checksum in scan_result.checksums:
|
|
258
|
+
new_path = scan_result.checksums[db_checksum]
|
|
259
|
+
report.moves[db_path] = new_path
|
|
260
|
+
|
|
261
|
+
# Remove from new files if present
|
|
262
|
+
if new_path in report.new:
|
|
263
|
+
report.new.remove(new_path)
|
|
264
|
+
|
|
265
|
+
# deleted
|
|
266
|
+
else:
|
|
267
|
+
report.deleted.add(db_path)
|
|
268
|
+
return report
|
|
269
|
+
|
|
270
|
+
async def get_db_file_state(self) -> Dict[str, str]:
|
|
271
|
+
"""Get file_path and checksums from database.
|
|
272
|
+
Args:
|
|
273
|
+
db_records: database records
|
|
274
|
+
Returns:
|
|
275
|
+
Dict mapping file paths to FileState
|
|
276
|
+
:param db_records: the data from the db
|
|
277
|
+
"""
|
|
278
|
+
db_records = await self.entity_repository.find_all()
|
|
279
|
+
return {r.file_path: r.checksum or "" for r in db_records}
|
|
280
|
+
|
|
281
|
+
async def sync_file(
|
|
282
|
+
self, path: str, new: bool = True
|
|
283
|
+
) -> Tuple[Optional[Entity], Optional[str]]:
|
|
284
|
+
"""Sync a single file.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
path: Path to file to sync
|
|
288
|
+
new: Whether this is a new file
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Tuple of (entity, checksum) or (None, None) if sync fails
|
|
292
|
+
"""
|
|
293
|
+
try:
|
|
294
|
+
logger.debug(
|
|
295
|
+
"Syncing file",
|
|
296
|
+
path=path,
|
|
297
|
+
is_new=new,
|
|
298
|
+
is_markdown=self.file_service.is_markdown(path),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
if self.file_service.is_markdown(path):
|
|
302
|
+
entity, checksum = await self.sync_markdown_file(path, new)
|
|
303
|
+
else:
|
|
304
|
+
entity, checksum = await self.sync_regular_file(path, new)
|
|
42
305
|
|
|
43
|
-
|
|
306
|
+
if entity is not None:
|
|
307
|
+
await self.search_service.index_entity(entity)
|
|
308
|
+
|
|
309
|
+
logger.debug(
|
|
310
|
+
"File sync completed", path=path, entity_id=entity.id, checksum=checksum
|
|
311
|
+
)
|
|
312
|
+
return entity, checksum
|
|
313
|
+
|
|
314
|
+
except Exception as e: # pragma: no cover
|
|
315
|
+
logger.exception("Failed to sync file", path=path, error=str(e))
|
|
316
|
+
return None, None
|
|
317
|
+
|
|
318
|
+
async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optional[Entity], str]:
|
|
319
|
+
"""Sync a markdown file with full processing.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
path: Path to markdown file
|
|
323
|
+
new: Whether this is a new file
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Tuple of (entity, checksum)
|
|
327
|
+
"""
|
|
328
|
+
# Parse markdown first to get any existing permalink
|
|
329
|
+
logger.debug("Parsing markdown file", path=path)
|
|
330
|
+
entity_markdown = await self.entity_parser.parse_file(path)
|
|
331
|
+
|
|
332
|
+
# Resolve permalink - this handles all the cases including conflicts
|
|
333
|
+
permalink = await self.entity_service.resolve_permalink(path, markdown=entity_markdown)
|
|
334
|
+
|
|
335
|
+
# If permalink changed, update the file
|
|
336
|
+
if permalink != entity_markdown.frontmatter.permalink:
|
|
337
|
+
logger.info(
|
|
338
|
+
"Updating permalink",
|
|
339
|
+
path=path,
|
|
340
|
+
old_permalink=entity_markdown.frontmatter.permalink,
|
|
341
|
+
new_permalink=permalink,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
entity_markdown.frontmatter.metadata["permalink"] = permalink
|
|
345
|
+
checksum = await self.file_service.update_frontmatter(path, {"permalink": permalink})
|
|
346
|
+
else:
|
|
347
|
+
checksum = await self.file_service.compute_checksum(path)
|
|
348
|
+
|
|
349
|
+
# if the file is new, create an entity
|
|
350
|
+
if new:
|
|
351
|
+
# Create entity with final permalink
|
|
352
|
+
logger.debug("Creating new entity from markdown", path=path, permalink=permalink)
|
|
353
|
+
|
|
354
|
+
await self.entity_service.create_entity_from_markdown(Path(path), entity_markdown)
|
|
355
|
+
|
|
356
|
+
# otherwise we need to update the entity and observations
|
|
357
|
+
else:
|
|
358
|
+
logger.debug("Updating entity from markdown", path=path, permalink=permalink)
|
|
359
|
+
|
|
360
|
+
await self.entity_service.update_entity_and_observations(Path(path), entity_markdown)
|
|
361
|
+
|
|
362
|
+
# Update relations and search index
|
|
363
|
+
entity = await self.entity_service.update_entity_relations(path, entity_markdown)
|
|
364
|
+
|
|
365
|
+
# set checksum
|
|
366
|
+
await self.entity_repository.update(entity.id, {"checksum": checksum})
|
|
367
|
+
|
|
368
|
+
logger.debug(
|
|
369
|
+
"Markdown sync completed",
|
|
370
|
+
path=path,
|
|
371
|
+
entity_id=entity.id,
|
|
372
|
+
observation_count=len(entity.observations),
|
|
373
|
+
relation_count=len(entity.relations),
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
return entity, checksum
|
|
377
|
+
|
|
378
|
+
async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Optional[Entity], str]:
|
|
379
|
+
"""Sync a non-markdown file with basic tracking.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
path: Path to file
|
|
383
|
+
new: Whether this is a new file
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
Tuple of (entity, checksum)
|
|
387
|
+
"""
|
|
388
|
+
checksum = await self.file_service.compute_checksum(path)
|
|
389
|
+
if new:
|
|
390
|
+
# Generate permalink from path
|
|
391
|
+
await self.entity_service.resolve_permalink(path)
|
|
392
|
+
|
|
393
|
+
# get file timestamps
|
|
394
|
+
file_stats = self.file_service.file_stats(path)
|
|
395
|
+
created = datetime.fromtimestamp(file_stats.st_ctime)
|
|
396
|
+
modified = datetime.fromtimestamp(file_stats.st_mtime)
|
|
397
|
+
|
|
398
|
+
# get mime type
|
|
399
|
+
content_type = self.file_service.content_type(path)
|
|
400
|
+
|
|
401
|
+
file_path = Path(path)
|
|
402
|
+
entity = await self.entity_repository.add(
|
|
403
|
+
Entity(
|
|
404
|
+
entity_type="file",
|
|
405
|
+
file_path=path,
|
|
406
|
+
checksum=checksum,
|
|
407
|
+
title=file_path.name,
|
|
408
|
+
created_at=created,
|
|
409
|
+
updated_at=modified,
|
|
410
|
+
content_type=content_type,
|
|
411
|
+
)
|
|
412
|
+
)
|
|
413
|
+
return entity, checksum
|
|
414
|
+
else:
|
|
415
|
+
entity = await self.entity_repository.get_by_file_path(path)
|
|
416
|
+
if entity is None: # pragma: no cover
|
|
417
|
+
logger.error("Entity not found for existing file", path=path)
|
|
418
|
+
raise ValueError(f"Entity not found for existing file: {path}")
|
|
419
|
+
|
|
420
|
+
updated = await self.entity_repository.update(
|
|
421
|
+
entity.id, {"file_path": path, "checksum": checksum}
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
if updated is None: # pragma: no cover
|
|
425
|
+
logger.error("Failed to update entity", entity_id=entity.id, path=path)
|
|
426
|
+
raise ValueError(f"Failed to update entity with ID {entity.id}")
|
|
427
|
+
|
|
428
|
+
return updated, checksum
|
|
429
|
+
|
|
430
|
+
async def handle_delete(self, file_path: str):
|
|
44
431
|
"""Handle complete entity deletion including search index cleanup."""
|
|
432
|
+
|
|
45
433
|
# First get entity to get permalink before deletion
|
|
46
434
|
entity = await self.entity_repository.get_by_file_path(file_path)
|
|
47
435
|
if entity:
|
|
48
|
-
logger.
|
|
436
|
+
logger.info(
|
|
437
|
+
"Deleting entity",
|
|
438
|
+
file_path=file_path,
|
|
439
|
+
entity_id=entity.id,
|
|
440
|
+
permalink=entity.permalink,
|
|
441
|
+
)
|
|
49
442
|
|
|
50
443
|
# Delete from db (this cascades to observations/relations)
|
|
51
444
|
await self.entity_service.delete_entity_by_file_path(file_path)
|
|
@@ -56,119 +449,132 @@ class SyncService:
|
|
|
56
449
|
+ [o.permalink for o in entity.observations]
|
|
57
450
|
+ [r.permalink for r in entity.relations]
|
|
58
451
|
)
|
|
59
|
-
|
|
452
|
+
|
|
453
|
+
logger.debug(
|
|
454
|
+
"Cleaning up search index",
|
|
455
|
+
entity_id=entity.id,
|
|
456
|
+
file_path=file_path,
|
|
457
|
+
index_entries=len(permalinks),
|
|
458
|
+
)
|
|
459
|
+
|
|
60
460
|
for permalink in permalinks:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
with logfire.span("sync", directory=directory): # pyright: ignore [reportGeneralTypeIssues]
|
|
67
|
-
changes = await self.scanner.find_knowledge_changes(directory)
|
|
68
|
-
logger.info(f"Found {changes.total_changes} knowledge changes")
|
|
69
|
-
|
|
70
|
-
# Handle moves first
|
|
71
|
-
for old_path, new_path in changes.moves.items():
|
|
72
|
-
logger.debug(f"Moving entity: {old_path} -> {new_path}")
|
|
73
|
-
entity = await self.entity_repository.get_by_file_path(old_path)
|
|
74
|
-
if entity:
|
|
75
|
-
# Update file_path but keep the same permalink for link stability
|
|
76
|
-
updated = await self.entity_repository.update(
|
|
77
|
-
entity.id, {"file_path": new_path, "checksum": changes.checksums[new_path]}
|
|
78
|
-
)
|
|
79
|
-
# update search index
|
|
80
|
-
if updated:
|
|
81
|
-
await self.search_service.index_entity(updated)
|
|
82
|
-
|
|
83
|
-
# Handle deletions next
|
|
84
|
-
# remove rows from db for files no longer present
|
|
85
|
-
for path in changes.deleted:
|
|
86
|
-
await self.handle_entity_deletion(path)
|
|
87
|
-
|
|
88
|
-
# Parse files that need updating
|
|
89
|
-
parsed_entities: Dict[str, EntityMarkdown] = {}
|
|
90
|
-
|
|
91
|
-
for path in [*changes.new, *changes.modified]:
|
|
92
|
-
entity_markdown = await self.entity_parser.parse_file(directory / path)
|
|
93
|
-
parsed_entities[path] = entity_markdown
|
|
94
|
-
|
|
95
|
-
# First pass: Create/update entities
|
|
96
|
-
# entities will have a null checksum to indicate they are not complete
|
|
97
|
-
for path, entity_markdown in parsed_entities.items():
|
|
98
|
-
# Get unique permalink and update markdown if needed
|
|
99
|
-
permalink = await self.entity_service.resolve_permalink(
|
|
100
|
-
Path(path), markdown=entity_markdown
|
|
101
|
-
)
|
|
461
|
+
if permalink:
|
|
462
|
+
await self.search_service.delete_by_permalink(permalink)
|
|
463
|
+
else:
|
|
464
|
+
await self.search_service.delete_by_entity_id(entity.id)
|
|
102
465
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
logger.info(f"Adding permalink '{permalink}' to file: {path}")
|
|
466
|
+
async def handle_move(self, old_path, new_path):
|
|
467
|
+
logger.info("Moving entity", old_path=old_path, new_path=new_path)
|
|
106
468
|
|
|
107
|
-
|
|
108
|
-
|
|
469
|
+
entity = await self.entity_repository.get_by_file_path(old_path)
|
|
470
|
+
if entity:
|
|
471
|
+
# Update file_path but keep the same permalink for link stability
|
|
472
|
+
updated = await self.entity_repository.update(entity.id, {"file_path": new_path})
|
|
109
473
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
474
|
+
if updated is None: # pragma: no cover
|
|
475
|
+
logger.error(
|
|
476
|
+
"Failed to update entity path",
|
|
477
|
+
entity_id=entity.id,
|
|
478
|
+
old_path=old_path,
|
|
479
|
+
new_path=new_path,
|
|
480
|
+
)
|
|
481
|
+
raise ValueError(f"Failed to update entity path for ID {entity.id}")
|
|
114
482
|
|
|
115
|
-
|
|
116
|
-
|
|
483
|
+
logger.debug(
|
|
484
|
+
"Entity path updated",
|
|
485
|
+
entity_id=entity.id,
|
|
486
|
+
permalink=entity.permalink,
|
|
487
|
+
old_path=old_path,
|
|
488
|
+
new_path=new_path,
|
|
489
|
+
)
|
|
117
490
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
# Create entity with final permalink
|
|
121
|
-
logger.debug(f"Creating new entity_markdown: {path}")
|
|
122
|
-
await self.entity_service.create_entity_from_markdown(
|
|
123
|
-
Path(path), entity_markdown
|
|
124
|
-
)
|
|
125
|
-
# otherwise we need to update the entity and observations
|
|
126
|
-
else:
|
|
127
|
-
logger.debug(f"Updating entity_markdown: {path}")
|
|
128
|
-
await self.entity_service.update_entity_and_observations(
|
|
129
|
-
Path(path), entity_markdown
|
|
130
|
-
)
|
|
491
|
+
# update search index
|
|
492
|
+
await self.search_service.index_entity(updated)
|
|
131
493
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
logger.debug(f"Updating relations for: {path}")
|
|
494
|
+
async def resolve_relations(self):
|
|
495
|
+
"""Try to resolve any unresolved relations"""
|
|
135
496
|
|
|
136
|
-
|
|
137
|
-
checksum = changes.checksums[path]
|
|
138
|
-
entity = await self.entity_service.update_entity_relations(
|
|
139
|
-
Path(path), entity_markdown
|
|
140
|
-
)
|
|
497
|
+
unresolved_relations = await self.relation_repository.find_unresolved_relations()
|
|
141
498
|
|
|
142
|
-
|
|
143
|
-
|
|
499
|
+
logger.info("Resolving forward references", count=len(unresolved_relations))
|
|
500
|
+
|
|
501
|
+
for relation in unresolved_relations:
|
|
502
|
+
logger.debug(
|
|
503
|
+
"Attempting to resolve relation",
|
|
504
|
+
relation_id=relation.id,
|
|
505
|
+
from_id=relation.from_id,
|
|
506
|
+
to_name=relation.to_name,
|
|
507
|
+
)
|
|
144
508
|
|
|
145
|
-
|
|
146
|
-
await self.entity_repository.update(entity.id, {"checksum": checksum})
|
|
509
|
+
resolved_entity = await self.entity_service.link_resolver.resolve_link(relation.to_name)
|
|
147
510
|
|
|
148
|
-
#
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
relation.
|
|
511
|
+
# ignore reference to self
|
|
512
|
+
if resolved_entity and resolved_entity.id != relation.from_id:
|
|
513
|
+
logger.debug(
|
|
514
|
+
"Resolved forward reference",
|
|
515
|
+
relation_id=relation.id,
|
|
516
|
+
from_id=relation.from_id,
|
|
517
|
+
to_name=relation.to_name,
|
|
518
|
+
resolved_id=resolved_entity.id,
|
|
519
|
+
resolved_title=resolved_entity.title,
|
|
153
520
|
)
|
|
154
|
-
|
|
155
|
-
|
|
521
|
+
try:
|
|
522
|
+
await self.relation_repository.update(
|
|
523
|
+
relation.id,
|
|
524
|
+
{
|
|
525
|
+
"to_id": resolved_entity.id,
|
|
526
|
+
"to_name": resolved_entity.title,
|
|
527
|
+
},
|
|
528
|
+
)
|
|
529
|
+
except IntegrityError: # pragma: no cover
|
|
156
530
|
logger.debug(
|
|
157
|
-
|
|
531
|
+
"Ignoring duplicate relation",
|
|
532
|
+
relation_id=relation.id,
|
|
533
|
+
from_id=relation.from_id,
|
|
534
|
+
to_name=relation.to_name,
|
|
158
535
|
)
|
|
159
536
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
537
|
+
# update search index
|
|
538
|
+
await self.search_service.index_entity(resolved_entity)
|
|
539
|
+
|
|
540
|
+
async def scan_directory(self, directory: Path) -> ScanResult:
|
|
541
|
+
"""
|
|
542
|
+
Scan directory for markdown files and their checksums.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
directory: Directory to scan
|
|
546
|
+
|
|
547
|
+
Returns:
|
|
548
|
+
ScanResult containing found files and any errors
|
|
549
|
+
"""
|
|
550
|
+
start_time = time.time()
|
|
551
|
+
|
|
552
|
+
logger.debug("Scanning directory", directory=str(directory))
|
|
553
|
+
result = ScanResult()
|
|
554
|
+
|
|
555
|
+
for root, dirnames, filenames in os.walk(str(directory)):
|
|
556
|
+
# Skip dot directories in-place
|
|
557
|
+
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
|
|
558
|
+
|
|
559
|
+
for filename in filenames:
|
|
560
|
+
# Skip dot files
|
|
561
|
+
if filename.startswith("."):
|
|
562
|
+
continue
|
|
563
|
+
|
|
564
|
+
path = Path(root) / filename
|
|
565
|
+
rel_path = str(path.relative_to(directory))
|
|
566
|
+
checksum = await self.file_service.compute_checksum(rel_path)
|
|
567
|
+
result.files[rel_path] = checksum
|
|
568
|
+
result.checksums[checksum] = rel_path
|
|
569
|
+
|
|
570
|
+
logger.debug("Found file", path=rel_path, checksum=checksum)
|
|
571
|
+
|
|
572
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
|
573
|
+
logger.debug(
|
|
574
|
+
"Directory scan completed",
|
|
575
|
+
directory=str(directory),
|
|
576
|
+
files_found=len(result.files),
|
|
577
|
+
duration_ms=duration_ms,
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
return result
|