basic-memory 0.14.4__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +1 -1
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +5 -9
- basic_memory/api/app.py +10 -4
- basic_memory/api/routers/directory_router.py +23 -2
- basic_memory/api/routers/knowledge_router.py +25 -8
- basic_memory/api/routers/project_router.py +100 -4
- basic_memory/cli/app.py +9 -28
- basic_memory/cli/auth.py +277 -0
- basic_memory/cli/commands/cloud/__init__.py +5 -0
- basic_memory/cli/commands/cloud/api_client.py +112 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +818 -0
- basic_memory/cli/commands/cloud/core_commands.py +288 -0
- basic_memory/cli/commands/cloud/mount_commands.py +295 -0
- basic_memory/cli/commands/cloud/rclone_config.py +288 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +198 -0
- basic_memory/cli/commands/command_utils.py +43 -0
- basic_memory/cli/commands/import_memory_json.py +0 -4
- basic_memory/cli/commands/mcp.py +77 -60
- basic_memory/cli/commands/project.py +154 -152
- basic_memory/cli/commands/status.py +25 -22
- basic_memory/cli/commands/sync.py +45 -228
- basic_memory/cli/commands/tool.py +87 -16
- basic_memory/cli/main.py +1 -0
- basic_memory/config.py +131 -21
- basic_memory/db.py +104 -3
- basic_memory/deps.py +27 -8
- basic_memory/file_utils.py +37 -13
- basic_memory/ignore_utils.py +295 -0
- basic_memory/markdown/plugins.py +9 -7
- basic_memory/mcp/async_client.py +124 -14
- basic_memory/mcp/project_context.py +141 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +49 -4
- basic_memory/mcp/prompts/continue_conversation.py +17 -16
- basic_memory/mcp/prompts/recent_activity.py +116 -32
- basic_memory/mcp/prompts/search.py +13 -12
- basic_memory/mcp/prompts/utils.py +11 -4
- basic_memory/mcp/resources/ai_assistant_guide.md +211 -341
- basic_memory/mcp/resources/project_info.py +27 -11
- basic_memory/mcp/server.py +0 -37
- basic_memory/mcp/tools/__init__.py +5 -6
- basic_memory/mcp/tools/build_context.py +67 -56
- basic_memory/mcp/tools/canvas.py +38 -26
- basic_memory/mcp/tools/chatgpt_tools.py +187 -0
- basic_memory/mcp/tools/delete_note.py +81 -47
- basic_memory/mcp/tools/edit_note.py +155 -138
- basic_memory/mcp/tools/list_directory.py +112 -99
- basic_memory/mcp/tools/move_note.py +181 -101
- basic_memory/mcp/tools/project_management.py +113 -277
- basic_memory/mcp/tools/read_content.py +91 -74
- basic_memory/mcp/tools/read_note.py +152 -115
- basic_memory/mcp/tools/recent_activity.py +471 -68
- basic_memory/mcp/tools/search.py +105 -92
- basic_memory/mcp/tools/sync_status.py +136 -130
- basic_memory/mcp/tools/utils.py +4 -0
- basic_memory/mcp/tools/view_note.py +44 -33
- basic_memory/mcp/tools/write_note.py +151 -90
- basic_memory/models/knowledge.py +12 -6
- basic_memory/models/project.py +6 -2
- basic_memory/repository/entity_repository.py +89 -82
- basic_memory/repository/relation_repository.py +13 -0
- basic_memory/repository/repository.py +18 -5
- basic_memory/repository/search_repository.py +46 -2
- basic_memory/schemas/__init__.py +6 -0
- basic_memory/schemas/base.py +39 -11
- basic_memory/schemas/cloud.py +46 -0
- basic_memory/schemas/memory.py +90 -21
- basic_memory/schemas/project_info.py +9 -10
- basic_memory/schemas/sync_report.py +48 -0
- basic_memory/services/context_service.py +25 -11
- basic_memory/services/directory_service.py +124 -3
- basic_memory/services/entity_service.py +100 -48
- basic_memory/services/initialization.py +30 -11
- basic_memory/services/project_service.py +101 -24
- basic_memory/services/search_service.py +16 -8
- basic_memory/sync/sync_service.py +173 -34
- basic_memory/sync/watch_service.py +101 -40
- basic_memory/utils.py +14 -4
- {basic_memory-0.14.4.dist-info → basic_memory-0.15.1.dist-info}/METADATA +57 -9
- basic_memory-0.15.1.dist-info/RECORD +146 -0
- basic_memory/mcp/project_session.py +0 -120
- basic_memory-0.14.4.dist-info/RECORD +0 -133
- {basic_memory-0.14.4.dist-info → basic_memory-0.15.1.dist-info}/WHEEL +0 -0
- {basic_memory-0.14.4.dist-info → basic_memory-0.15.1.dist-info}/entry_points.txt +0 -0
- {basic_memory-0.14.4.dist-info → basic_memory-0.15.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -113,8 +113,10 @@ class SearchService:
|
|
|
113
113
|
# Add word boundaries
|
|
114
114
|
variants.update(w.strip() for w in text.lower().split() if w.strip())
|
|
115
115
|
|
|
116
|
-
#
|
|
117
|
-
|
|
116
|
+
# Trigrams disabled: They create massive search index bloat, increasing DB size significantly
|
|
117
|
+
# and slowing down indexing performance. FTS5 search works well without them.
|
|
118
|
+
# See: https://github.com/basicmachines-co/basic-memory/issues/351
|
|
119
|
+
# variants.update(text[i : i + 3].lower() for i in range(len(text) - 2))
|
|
118
120
|
|
|
119
121
|
return variants
|
|
120
122
|
|
|
@@ -219,6 +221,9 @@ class SearchService:
|
|
|
219
221
|
The project_id is automatically added by the repository when indexing.
|
|
220
222
|
"""
|
|
221
223
|
|
|
224
|
+
# Collect all search index rows to batch insert at the end
|
|
225
|
+
rows_to_index = []
|
|
226
|
+
|
|
222
227
|
content_stems = []
|
|
223
228
|
content_snippet = ""
|
|
224
229
|
title_variants = self._generate_variants(entity.title)
|
|
@@ -241,8 +246,8 @@ class SearchService:
|
|
|
241
246
|
|
|
242
247
|
entity_content_stems = "\n".join(p for p in content_stems if p and p.strip())
|
|
243
248
|
|
|
244
|
-
#
|
|
245
|
-
|
|
249
|
+
# Add entity row
|
|
250
|
+
rows_to_index.append(
|
|
246
251
|
SearchIndexRow(
|
|
247
252
|
id=entity.id,
|
|
248
253
|
type=SearchItemType.ENTITY.value,
|
|
@@ -261,13 +266,13 @@ class SearchService:
|
|
|
261
266
|
)
|
|
262
267
|
)
|
|
263
268
|
|
|
264
|
-
#
|
|
269
|
+
# Add observation rows
|
|
265
270
|
for obs in entity.observations:
|
|
266
271
|
# Index with parent entity's file path since that's where it's defined
|
|
267
272
|
obs_content_stems = "\n".join(
|
|
268
273
|
p for p in self._generate_variants(obs.content) if p and p.strip()
|
|
269
274
|
)
|
|
270
|
-
|
|
275
|
+
rows_to_index.append(
|
|
271
276
|
SearchIndexRow(
|
|
272
277
|
id=obs.id,
|
|
273
278
|
type=SearchItemType.OBSERVATION.value,
|
|
@@ -287,7 +292,7 @@ class SearchService:
|
|
|
287
292
|
)
|
|
288
293
|
)
|
|
289
294
|
|
|
290
|
-
#
|
|
295
|
+
# Add relation rows (only outgoing relations defined in this file)
|
|
291
296
|
for rel in entity.outgoing_relations:
|
|
292
297
|
# Create descriptive title showing the relationship
|
|
293
298
|
relation_title = (
|
|
@@ -299,7 +304,7 @@ class SearchService:
|
|
|
299
304
|
rel_content_stems = "\n".join(
|
|
300
305
|
p for p in self._generate_variants(relation_title) if p and p.strip()
|
|
301
306
|
)
|
|
302
|
-
|
|
307
|
+
rows_to_index.append(
|
|
303
308
|
SearchIndexRow(
|
|
304
309
|
id=rel.id,
|
|
305
310
|
title=relation_title,
|
|
@@ -317,6 +322,9 @@ class SearchService:
|
|
|
317
322
|
)
|
|
318
323
|
)
|
|
319
324
|
|
|
325
|
+
# Batch insert all rows at once
|
|
326
|
+
await self.repository.bulk_index_items(rows_to_index)
|
|
327
|
+
|
|
320
328
|
async def delete_by_permalink(self, permalink: str):
|
|
321
329
|
"""Delete an item from the search index."""
|
|
322
330
|
await self.repository.delete_by_permalink(permalink)
|
|
@@ -1,21 +1,28 @@
|
|
|
1
1
|
"""Service for syncing files between filesystem and database."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import os
|
|
4
5
|
import time
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
5
7
|
from dataclasses import dataclass, field
|
|
6
8
|
from datetime import datetime
|
|
7
9
|
from pathlib import Path
|
|
8
10
|
from typing import Dict, Optional, Set, Tuple
|
|
9
11
|
|
|
10
12
|
from loguru import logger
|
|
13
|
+
from sqlalchemy import select
|
|
11
14
|
from sqlalchemy.exc import IntegrityError
|
|
12
15
|
|
|
13
|
-
from basic_memory
|
|
16
|
+
from basic_memory import db
|
|
17
|
+
from basic_memory.config import BasicMemoryConfig, ConfigManager
|
|
14
18
|
from basic_memory.file_utils import has_frontmatter
|
|
15
|
-
from basic_memory.
|
|
16
|
-
from basic_memory.
|
|
17
|
-
from basic_memory.
|
|
19
|
+
from basic_memory.ignore_utils import load_bmignore_patterns, should_ignore_path
|
|
20
|
+
from basic_memory.markdown import EntityParser, MarkdownProcessor
|
|
21
|
+
from basic_memory.models import Entity, Project
|
|
22
|
+
from basic_memory.repository import EntityRepository, RelationRepository, ObservationRepository
|
|
23
|
+
from basic_memory.repository.search_repository import SearchRepository
|
|
18
24
|
from basic_memory.services import EntityService, FileService
|
|
25
|
+
from basic_memory.services.link_resolver import LinkResolver
|
|
19
26
|
from basic_memory.services.search_service import SearchService
|
|
20
27
|
from basic_memory.services.sync_status_service import sync_status_tracker, SyncStatus
|
|
21
28
|
|
|
@@ -80,6 +87,43 @@ class SyncService:
|
|
|
80
87
|
self.relation_repository = relation_repository
|
|
81
88
|
self.search_service = search_service
|
|
82
89
|
self.file_service = file_service
|
|
90
|
+
self._thread_pool = ThreadPoolExecutor(max_workers=app_config.sync_thread_pool_size)
|
|
91
|
+
# Load ignore patterns once at initialization for performance
|
|
92
|
+
self._ignore_patterns = load_bmignore_patterns()
|
|
93
|
+
|
|
94
|
+
async def _read_file_async(self, file_path: Path) -> str:
|
|
95
|
+
"""Read file content in thread pool to avoid blocking the event loop."""
|
|
96
|
+
loop = asyncio.get_event_loop()
|
|
97
|
+
return await loop.run_in_executor(self._thread_pool, file_path.read_text, "utf-8")
|
|
98
|
+
|
|
99
|
+
async def _compute_checksum_async(self, path: str) -> str:
|
|
100
|
+
"""Compute file checksum in thread pool to avoid blocking the event loop."""
|
|
101
|
+
|
|
102
|
+
def _sync_compute_checksum(path_str: str) -> str:
|
|
103
|
+
# Synchronous version for thread pool execution
|
|
104
|
+
path_obj = self.file_service.base_path / path_str
|
|
105
|
+
|
|
106
|
+
if self.file_service.is_markdown(path_str):
|
|
107
|
+
content = path_obj.read_text(encoding="utf-8")
|
|
108
|
+
else:
|
|
109
|
+
content = path_obj.read_bytes()
|
|
110
|
+
|
|
111
|
+
# Use the synchronous version of compute_checksum
|
|
112
|
+
import hashlib
|
|
113
|
+
|
|
114
|
+
if isinstance(content, str):
|
|
115
|
+
content_bytes = content.encode("utf-8")
|
|
116
|
+
else:
|
|
117
|
+
content_bytes = content
|
|
118
|
+
return hashlib.sha256(content_bytes).hexdigest()
|
|
119
|
+
|
|
120
|
+
loop = asyncio.get_event_loop()
|
|
121
|
+
return await loop.run_in_executor(self._thread_pool, _sync_compute_checksum, path)
|
|
122
|
+
|
|
123
|
+
def __del__(self):
|
|
124
|
+
"""Cleanup thread pool when service is destroyed."""
|
|
125
|
+
if hasattr(self, "_thread_pool"):
|
|
126
|
+
self._thread_pool.shutdown(wait=False)
|
|
83
127
|
|
|
84
128
|
async def sync(self, directory: Path, project_name: Optional[str] = None) -> SyncReport:
|
|
85
129
|
"""Sync all files with database."""
|
|
@@ -231,15 +275,25 @@ class SyncService:
|
|
|
231
275
|
|
|
232
276
|
async def get_db_file_state(self) -> Dict[str, str]:
|
|
233
277
|
"""Get file_path and checksums from database.
|
|
234
|
-
|
|
235
|
-
|
|
278
|
+
|
|
279
|
+
Optimized to query only the columns we need (file_path, checksum) without
|
|
280
|
+
loading full entities or their relationships. This is 10-100x faster for
|
|
281
|
+
large projects compared to loading all entities with observations/relations.
|
|
282
|
+
|
|
236
283
|
Returns:
|
|
237
|
-
Dict mapping file paths to
|
|
238
|
-
:param db_records: the data from the db
|
|
284
|
+
Dict mapping file paths to checksums
|
|
239
285
|
"""
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
286
|
+
# Query only the columns we need - no entity objects or relationships
|
|
287
|
+
query = select(Entity.file_path, Entity.checksum).where(
|
|
288
|
+
Entity.project_id == self.entity_repository.project_id
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
async with db.scoped_session(self.entity_repository.session_maker) as session:
|
|
292
|
+
result = await session.execute(query)
|
|
293
|
+
rows = result.all()
|
|
294
|
+
|
|
295
|
+
logger.info(f"Found {len(rows)} db file records")
|
|
296
|
+
return {row.file_path: row.checksum or "" for row in rows}
|
|
243
297
|
|
|
244
298
|
async def sync_file(
|
|
245
299
|
self, path: str, new: bool = True
|
|
@@ -289,16 +343,18 @@ class SyncService:
|
|
|
289
343
|
logger.debug(f"Parsing markdown file, path: {path}, new: {new}")
|
|
290
344
|
|
|
291
345
|
file_path = self.entity_parser.base_path / path
|
|
292
|
-
file_content =
|
|
346
|
+
file_content = await self._read_file_async(file_path)
|
|
293
347
|
file_contains_frontmatter = has_frontmatter(file_content)
|
|
294
348
|
|
|
295
349
|
# entity markdown will always contain front matter, so it can be used up create/update the entity
|
|
296
350
|
entity_markdown = await self.entity_parser.parse_file(path)
|
|
297
351
|
|
|
298
|
-
# if the file contains frontmatter, resolve a permalink
|
|
299
|
-
if file_contains_frontmatter:
|
|
300
|
-
# Resolve permalink -
|
|
301
|
-
permalink = await self.entity_service.resolve_permalink(
|
|
352
|
+
# if the file contains frontmatter, resolve a permalink (unless disabled)
|
|
353
|
+
if file_contains_frontmatter and not self.app_config.disable_permalinks:
|
|
354
|
+
# Resolve permalink - skip conflict checks during bulk sync for performance
|
|
355
|
+
permalink = await self.entity_service.resolve_permalink(
|
|
356
|
+
path, markdown=entity_markdown, skip_conflict_check=True
|
|
357
|
+
)
|
|
302
358
|
|
|
303
359
|
# If permalink changed, update the file
|
|
304
360
|
if permalink != entity_markdown.frontmatter.permalink:
|
|
@@ -326,7 +382,7 @@ class SyncService:
|
|
|
326
382
|
# After updating relations, we need to compute the checksum again
|
|
327
383
|
# This is necessary for files with wikilinks to ensure consistent checksums
|
|
328
384
|
# after relation processing is complete
|
|
329
|
-
final_checksum = await self.
|
|
385
|
+
final_checksum = await self._compute_checksum_async(path)
|
|
330
386
|
|
|
331
387
|
# set checksum
|
|
332
388
|
await self.entity_repository.update(entity.id, {"checksum": final_checksum})
|
|
@@ -350,10 +406,10 @@ class SyncService:
|
|
|
350
406
|
Returns:
|
|
351
407
|
Tuple of (entity, checksum)
|
|
352
408
|
"""
|
|
353
|
-
checksum = await self.
|
|
409
|
+
checksum = await self._compute_checksum_async(path)
|
|
354
410
|
if new:
|
|
355
|
-
# Generate permalink from path
|
|
356
|
-
await self.entity_service.resolve_permalink(path)
|
|
411
|
+
# Generate permalink from path - skip conflict checks during bulk sync
|
|
412
|
+
await self.entity_service.resolve_permalink(path, skip_conflict_check=True)
|
|
357
413
|
|
|
358
414
|
# get file timestamps
|
|
359
415
|
file_stats = self.file_service.file_stats(path)
|
|
@@ -487,11 +543,15 @@ class SyncService:
|
|
|
487
543
|
updates = {"file_path": new_path}
|
|
488
544
|
|
|
489
545
|
# If configured, also update permalink to match new path
|
|
490
|
-
if
|
|
491
|
-
|
|
546
|
+
if (
|
|
547
|
+
self.app_config.update_permalinks_on_move
|
|
548
|
+
and not self.app_config.disable_permalinks
|
|
549
|
+
and self.file_service.is_markdown(new_path)
|
|
492
550
|
):
|
|
493
|
-
# generate new permalink value
|
|
494
|
-
new_permalink = await self.entity_service.resolve_permalink(
|
|
551
|
+
# generate new permalink value - skip conflict checks during bulk sync
|
|
552
|
+
new_permalink = await self.entity_service.resolve_permalink(
|
|
553
|
+
new_path, skip_conflict_check=True
|
|
554
|
+
)
|
|
495
555
|
|
|
496
556
|
# write to file and get new checksum
|
|
497
557
|
new_checksum = await self.file_service.update_frontmatter(
|
|
@@ -548,12 +608,27 @@ class SyncService:
|
|
|
548
608
|
# update search index
|
|
549
609
|
await self.search_service.index_entity(updated)
|
|
550
610
|
|
|
551
|
-
async def resolve_relations(self):
|
|
552
|
-
"""Try to resolve
|
|
611
|
+
async def resolve_relations(self, entity_id: int | None = None):
|
|
612
|
+
"""Try to resolve unresolved relations.
|
|
553
613
|
|
|
554
|
-
|
|
614
|
+
Args:
|
|
615
|
+
entity_id: If provided, only resolve relations for this specific entity.
|
|
616
|
+
Otherwise, resolve all unresolved relations in the database.
|
|
617
|
+
"""
|
|
555
618
|
|
|
556
|
-
|
|
619
|
+
if entity_id:
|
|
620
|
+
# Only get unresolved relations for the specific entity
|
|
621
|
+
unresolved_relations = (
|
|
622
|
+
await self.relation_repository.find_unresolved_relations_for_entity(entity_id)
|
|
623
|
+
)
|
|
624
|
+
logger.info(
|
|
625
|
+
f"Resolving forward references for entity {entity_id}",
|
|
626
|
+
count=len(unresolved_relations),
|
|
627
|
+
)
|
|
628
|
+
else:
|
|
629
|
+
# Get all unresolved relations (original behavior)
|
|
630
|
+
unresolved_relations = await self.relation_repository.find_unresolved_relations()
|
|
631
|
+
logger.info("Resolving all forward references", count=len(unresolved_relations))
|
|
557
632
|
|
|
558
633
|
for relation in unresolved_relations:
|
|
559
634
|
logger.trace(
|
|
@@ -608,19 +683,35 @@ class SyncService:
|
|
|
608
683
|
|
|
609
684
|
logger.debug(f"Scanning directory {directory}")
|
|
610
685
|
result = ScanResult()
|
|
686
|
+
ignored_count = 0
|
|
611
687
|
|
|
612
688
|
for root, dirnames, filenames in os.walk(str(directory)):
|
|
613
|
-
#
|
|
614
|
-
|
|
689
|
+
# Convert root to Path for easier manipulation
|
|
690
|
+
root_path = Path(root)
|
|
691
|
+
|
|
692
|
+
# Filter out ignored directories in-place
|
|
693
|
+
dirnames_to_remove = []
|
|
694
|
+
for dirname in dirnames:
|
|
695
|
+
dir_path = root_path / dirname
|
|
696
|
+
if should_ignore_path(dir_path, directory, self._ignore_patterns):
|
|
697
|
+
dirnames_to_remove.append(dirname)
|
|
698
|
+
ignored_count += 1
|
|
699
|
+
|
|
700
|
+
# Remove ignored directories from dirnames to prevent os.walk from descending
|
|
701
|
+
for dirname in dirnames_to_remove:
|
|
702
|
+
dirnames.remove(dirname)
|
|
615
703
|
|
|
616
704
|
for filename in filenames:
|
|
617
|
-
|
|
618
|
-
|
|
705
|
+
path = root_path / filename
|
|
706
|
+
|
|
707
|
+
# Check if file should be ignored
|
|
708
|
+
if should_ignore_path(path, directory, self._ignore_patterns):
|
|
709
|
+
ignored_count += 1
|
|
710
|
+
logger.trace(f"Ignoring file per .bmignore: {path.relative_to(directory)}")
|
|
619
711
|
continue
|
|
620
712
|
|
|
621
|
-
path = Path(root) / filename
|
|
622
713
|
rel_path = path.relative_to(directory).as_posix()
|
|
623
|
-
checksum = await self.
|
|
714
|
+
checksum = await self._compute_checksum_async(rel_path)
|
|
624
715
|
result.files[rel_path] = checksum
|
|
625
716
|
result.checksums[checksum] = rel_path
|
|
626
717
|
|
|
@@ -631,7 +722,55 @@ class SyncService:
|
|
|
631
722
|
f"{directory} scan completed "
|
|
632
723
|
f"directory={str(directory)} "
|
|
633
724
|
f"files_found={len(result.files)} "
|
|
725
|
+
f"files_ignored={ignored_count} "
|
|
634
726
|
f"duration_ms={duration_ms}"
|
|
635
727
|
)
|
|
636
728
|
|
|
637
729
|
return result
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
async def get_sync_service(project: Project) -> SyncService: # pragma: no cover
|
|
733
|
+
"""Get sync service instance with all dependencies."""
|
|
734
|
+
|
|
735
|
+
app_config = ConfigManager().config
|
|
736
|
+
_, session_maker = await db.get_or_create_db(
|
|
737
|
+
db_path=app_config.database_path, db_type=db.DatabaseType.FILESYSTEM
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
project_path = Path(project.path)
|
|
741
|
+
entity_parser = EntityParser(project_path)
|
|
742
|
+
markdown_processor = MarkdownProcessor(entity_parser)
|
|
743
|
+
file_service = FileService(project_path, markdown_processor)
|
|
744
|
+
|
|
745
|
+
# Initialize repositories
|
|
746
|
+
entity_repository = EntityRepository(session_maker, project_id=project.id)
|
|
747
|
+
observation_repository = ObservationRepository(session_maker, project_id=project.id)
|
|
748
|
+
relation_repository = RelationRepository(session_maker, project_id=project.id)
|
|
749
|
+
search_repository = SearchRepository(session_maker, project_id=project.id)
|
|
750
|
+
|
|
751
|
+
# Initialize services
|
|
752
|
+
search_service = SearchService(search_repository, entity_repository, file_service)
|
|
753
|
+
link_resolver = LinkResolver(entity_repository, search_service)
|
|
754
|
+
|
|
755
|
+
# Initialize services
|
|
756
|
+
entity_service = EntityService(
|
|
757
|
+
entity_parser,
|
|
758
|
+
entity_repository,
|
|
759
|
+
observation_repository,
|
|
760
|
+
relation_repository,
|
|
761
|
+
file_service,
|
|
762
|
+
link_resolver,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
# Create sync service
|
|
766
|
+
sync_service = SyncService(
|
|
767
|
+
app_config=app_config,
|
|
768
|
+
entity_service=entity_service,
|
|
769
|
+
entity_parser=entity_parser,
|
|
770
|
+
entity_repository=entity_repository,
|
|
771
|
+
relation_repository=relation_repository,
|
|
772
|
+
search_service=search_service,
|
|
773
|
+
file_service=file_service,
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
return sync_service
|
|
@@ -5,9 +5,10 @@ import os
|
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import List, Optional, Set
|
|
8
|
+
from typing import List, Optional, Set, Sequence
|
|
9
9
|
|
|
10
10
|
from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON
|
|
11
|
+
from basic_memory.ignore_utils import load_gitignore_patterns, should_ignore_path
|
|
11
12
|
from basic_memory.models import Project
|
|
12
13
|
from basic_memory.repository import ProjectRepository
|
|
13
14
|
from loguru import logger
|
|
@@ -15,6 +16,7 @@ from pydantic import BaseModel
|
|
|
15
16
|
from rich.console import Console
|
|
16
17
|
from watchfiles import awatch
|
|
17
18
|
from watchfiles.main import FileChange, Change
|
|
19
|
+
import time
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class WatchEvent(BaseModel):
|
|
@@ -81,54 +83,110 @@ class WatchService:
|
|
|
81
83
|
self.state = WatchServiceState()
|
|
82
84
|
self.status_path = Path.home() / ".basic-memory" / WATCH_STATUS_JSON
|
|
83
85
|
self.status_path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
self._ignore_patterns_cache: dict[Path, Set[str]] = {}
|
|
84
87
|
|
|
85
88
|
# quiet mode for mcp so it doesn't mess up stdout
|
|
86
89
|
self.console = Console(quiet=quiet)
|
|
87
90
|
|
|
91
|
+
async def _schedule_restart(self, stop_event: asyncio.Event):
|
|
92
|
+
"""Schedule a restart of the watch service after the configured interval."""
|
|
93
|
+
await asyncio.sleep(self.app_config.watch_project_reload_interval)
|
|
94
|
+
stop_event.set()
|
|
95
|
+
|
|
96
|
+
def _get_ignore_patterns(self, project_path: Path) -> Set[str]:
|
|
97
|
+
"""Get or load ignore patterns for a project path."""
|
|
98
|
+
if project_path not in self._ignore_patterns_cache:
|
|
99
|
+
self._ignore_patterns_cache[project_path] = load_gitignore_patterns(project_path)
|
|
100
|
+
return self._ignore_patterns_cache[project_path]
|
|
101
|
+
|
|
102
|
+
async def _watch_projects_cycle(self, projects: Sequence[Project], stop_event: asyncio.Event):
|
|
103
|
+
"""Run one cycle of watching the given projects until stop_event is set."""
|
|
104
|
+
project_paths = [project.path for project in projects]
|
|
105
|
+
|
|
106
|
+
async for changes in awatch(
|
|
107
|
+
*project_paths,
|
|
108
|
+
debounce=self.app_config.sync_delay,
|
|
109
|
+
watch_filter=self.filter_changes,
|
|
110
|
+
recursive=True,
|
|
111
|
+
stop_event=stop_event,
|
|
112
|
+
):
|
|
113
|
+
# group changes by project and filter using ignore patterns
|
|
114
|
+
project_changes = defaultdict(list)
|
|
115
|
+
for change, path in changes:
|
|
116
|
+
for project in projects:
|
|
117
|
+
if self.is_project_path(project, path):
|
|
118
|
+
# Check if the file should be ignored based on gitignore patterns
|
|
119
|
+
project_path = Path(project.path)
|
|
120
|
+
file_path = Path(path)
|
|
121
|
+
ignore_patterns = self._get_ignore_patterns(project_path)
|
|
122
|
+
|
|
123
|
+
if should_ignore_path(file_path, project_path, ignore_patterns):
|
|
124
|
+
logger.trace(
|
|
125
|
+
f"Ignoring watched file change: {file_path.relative_to(project_path)}"
|
|
126
|
+
)
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
project_changes[project].append((change, path))
|
|
130
|
+
break
|
|
131
|
+
|
|
132
|
+
# create coroutines to handle changes
|
|
133
|
+
change_handlers = [
|
|
134
|
+
self.handle_changes(project, changes) # pyright: ignore
|
|
135
|
+
for project, changes in project_changes.items()
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
# process changes
|
|
139
|
+
await asyncio.gather(*change_handlers)
|
|
140
|
+
|
|
88
141
|
async def run(self): # pragma: no cover
|
|
89
142
|
"""Watch for file changes and sync them"""
|
|
90
143
|
|
|
91
|
-
|
|
92
|
-
|
|
144
|
+
self.state.running = True
|
|
145
|
+
self.state.start_time = datetime.now()
|
|
146
|
+
await self.write_status()
|
|
93
147
|
|
|
94
148
|
logger.info(
|
|
95
149
|
"Watch service started",
|
|
96
|
-
f"directories={project_paths}",
|
|
97
150
|
f"debounce_ms={self.app_config.sync_delay}",
|
|
98
151
|
f"pid={os.getpid()}",
|
|
99
152
|
)
|
|
100
153
|
|
|
101
|
-
self.state.running = True
|
|
102
|
-
self.state.start_time = datetime.now()
|
|
103
|
-
await self.write_status()
|
|
104
|
-
|
|
105
154
|
try:
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
155
|
+
while self.state.running:
|
|
156
|
+
# Clear ignore patterns cache to pick up any .gitignore changes
|
|
157
|
+
self._ignore_patterns_cache.clear()
|
|
158
|
+
|
|
159
|
+
# Reload projects to catch any new/removed projects
|
|
160
|
+
projects = await self.project_repository.get_active_projects()
|
|
161
|
+
|
|
162
|
+
project_paths = [project.path for project in projects]
|
|
163
|
+
logger.debug(f"Starting watch cycle for directories: {project_paths}")
|
|
164
|
+
|
|
165
|
+
# Create stop event for this watch cycle
|
|
166
|
+
stop_event = asyncio.Event()
|
|
167
|
+
|
|
168
|
+
# Schedule restart after configured interval to reload projects
|
|
169
|
+
timer_task = asyncio.create_task(self._schedule_restart(stop_event))
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
await self._watch_projects_cycle(projects, stop_event)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.exception("Watch service error during cycle", error=str(e))
|
|
175
|
+
self.state.record_error(str(e))
|
|
176
|
+
await self.write_status()
|
|
177
|
+
# Continue to next cycle instead of exiting
|
|
178
|
+
await asyncio.sleep(5) # Brief pause before retry
|
|
179
|
+
finally:
|
|
180
|
+
# Cancel timer task if it's still running
|
|
181
|
+
if not timer_task.done():
|
|
182
|
+
timer_task.cancel()
|
|
183
|
+
try:
|
|
184
|
+
await timer_task
|
|
185
|
+
except asyncio.CancelledError:
|
|
186
|
+
pass
|
|
128
187
|
|
|
129
188
|
except Exception as e:
|
|
130
189
|
logger.exception("Watch service error", error=str(e))
|
|
131
|
-
|
|
132
190
|
self.state.record_error(str(e))
|
|
133
191
|
await self.write_status()
|
|
134
192
|
raise
|
|
@@ -175,11 +233,8 @@ class WatchService:
|
|
|
175
233
|
|
|
176
234
|
async def handle_changes(self, project: Project, changes: Set[FileChange]) -> None:
|
|
177
235
|
"""Process a batch of file changes"""
|
|
178
|
-
|
|
179
|
-
from
|
|
180
|
-
|
|
181
|
-
# Lazily initialize sync service for project changes
|
|
182
|
-
from basic_memory.cli.commands.sync import get_sync_service
|
|
236
|
+
# avoid circular imports
|
|
237
|
+
from basic_memory.sync.sync_service import get_sync_service
|
|
183
238
|
|
|
184
239
|
sync_service = await get_sync_service(project)
|
|
185
240
|
file_service = sync_service.file_service
|
|
@@ -288,9 +343,13 @@ class WatchService:
|
|
|
288
343
|
full_path = directory / path
|
|
289
344
|
if full_path.exists() and full_path.is_file():
|
|
290
345
|
# File still exists despite DELETE event - treat as modification
|
|
291
|
-
logger.debug(
|
|
346
|
+
logger.debug(
|
|
347
|
+
"File exists despite DELETE event, treating as modification", path=path
|
|
348
|
+
)
|
|
292
349
|
entity, checksum = await sync_service.sync_file(path, new=False)
|
|
293
|
-
self.state.add_event(
|
|
350
|
+
self.state.add_event(
|
|
351
|
+
path=path, action="modified", status="success", checksum=checksum
|
|
352
|
+
)
|
|
294
353
|
self.console.print(f"[yellow]✎[/yellow] {path} (atomic write)")
|
|
295
354
|
logger.info(f"atomic write detected: {path}")
|
|
296
355
|
processed.add(path)
|
|
@@ -302,10 +361,12 @@ class WatchService:
|
|
|
302
361
|
entity = await sync_service.entity_repository.get_by_file_path(path)
|
|
303
362
|
if entity is None:
|
|
304
363
|
# No entity means this was likely a directory - skip it
|
|
305
|
-
logger.debug(
|
|
364
|
+
logger.debug(
|
|
365
|
+
f"Skipping deleted path with no entity (likely directory), path={path}"
|
|
366
|
+
)
|
|
306
367
|
processed.add(path)
|
|
307
368
|
continue
|
|
308
|
-
|
|
369
|
+
|
|
309
370
|
# File truly deleted
|
|
310
371
|
logger.debug("Processing deleted file", path=path)
|
|
311
372
|
await sync_service.handle_delete(path)
|
basic_memory/utils.py
CHANGED
|
@@ -223,7 +223,8 @@ def parse_tags(tags: Union[List[str], str, None]) -> List[str]:
|
|
|
223
223
|
if isinstance(tags, str):
|
|
224
224
|
# Check if it's a JSON array string (common issue from AI assistants)
|
|
225
225
|
import json
|
|
226
|
-
|
|
226
|
+
|
|
227
|
+
if tags.strip().startswith("[") and tags.strip().endswith("]"):
|
|
227
228
|
try:
|
|
228
229
|
# Try to parse as JSON array
|
|
229
230
|
parsed_json = json.loads(tags)
|
|
@@ -233,7 +234,7 @@ def parse_tags(tags: Union[List[str], str, None]) -> List[str]:
|
|
|
233
234
|
except json.JSONDecodeError:
|
|
234
235
|
# Not valid JSON, fall through to comma-separated parsing
|
|
235
236
|
pass
|
|
236
|
-
|
|
237
|
+
|
|
237
238
|
# Split by comma, strip whitespace, then strip leading '#' characters
|
|
238
239
|
return [tag.strip().lstrip("#") for tag in tags.split(",") if tag and tag.strip()]
|
|
239
240
|
|
|
@@ -330,8 +331,8 @@ def detect_potential_file_conflicts(file_path: str, existing_paths: List[str]) -
|
|
|
330
331
|
return conflicts
|
|
331
332
|
|
|
332
333
|
|
|
333
|
-
def
|
|
334
|
-
"""Ensure path
|
|
334
|
+
def valid_project_path_value(path: str):
|
|
335
|
+
"""Ensure project path is valid."""
|
|
335
336
|
# Allow empty strings as they resolve to the project root
|
|
336
337
|
if not path:
|
|
337
338
|
return True
|
|
@@ -352,6 +353,15 @@ def validate_project_path(path: str, project_path: Path) -> bool:
|
|
|
352
353
|
if path.strip() and any(ord(c) < 32 and c not in [" ", "\t"] for c in path):
|
|
353
354
|
return False
|
|
354
355
|
|
|
356
|
+
return True
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def validate_project_path(path: str, project_path: Path) -> bool:
|
|
360
|
+
"""Ensure path is valid and stays within project boundaries."""
|
|
361
|
+
|
|
362
|
+
if not valid_project_path_value(path):
|
|
363
|
+
return False
|
|
364
|
+
|
|
355
365
|
try:
|
|
356
366
|
resolved = (project_path / path).resolve()
|
|
357
367
|
return resolved.is_relative_to(project_path.resolve())
|