basic-memory 0.14.4__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (84) hide show
  1. basic_memory/__init__.py +1 -1
  2. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +5 -9
  3. basic_memory/api/app.py +10 -4
  4. basic_memory/api/routers/directory_router.py +23 -2
  5. basic_memory/api/routers/knowledge_router.py +25 -8
  6. basic_memory/api/routers/project_router.py +100 -4
  7. basic_memory/cli/app.py +9 -28
  8. basic_memory/cli/auth.py +277 -0
  9. basic_memory/cli/commands/cloud/__init__.py +5 -0
  10. basic_memory/cli/commands/cloud/api_client.py +112 -0
  11. basic_memory/cli/commands/cloud/bisync_commands.py +818 -0
  12. basic_memory/cli/commands/cloud/core_commands.py +288 -0
  13. basic_memory/cli/commands/cloud/mount_commands.py +295 -0
  14. basic_memory/cli/commands/cloud/rclone_config.py +288 -0
  15. basic_memory/cli/commands/cloud/rclone_installer.py +198 -0
  16. basic_memory/cli/commands/command_utils.py +43 -0
  17. basic_memory/cli/commands/import_memory_json.py +0 -4
  18. basic_memory/cli/commands/mcp.py +77 -60
  19. basic_memory/cli/commands/project.py +154 -152
  20. basic_memory/cli/commands/status.py +25 -22
  21. basic_memory/cli/commands/sync.py +45 -228
  22. basic_memory/cli/commands/tool.py +87 -16
  23. basic_memory/cli/main.py +1 -0
  24. basic_memory/config.py +131 -21
  25. basic_memory/db.py +104 -3
  26. basic_memory/deps.py +27 -8
  27. basic_memory/file_utils.py +37 -13
  28. basic_memory/ignore_utils.py +295 -0
  29. basic_memory/markdown/plugins.py +9 -7
  30. basic_memory/mcp/async_client.py +124 -14
  31. basic_memory/mcp/project_context.py +141 -0
  32. basic_memory/mcp/prompts/ai_assistant_guide.py +49 -4
  33. basic_memory/mcp/prompts/continue_conversation.py +17 -16
  34. basic_memory/mcp/prompts/recent_activity.py +116 -32
  35. basic_memory/mcp/prompts/search.py +13 -12
  36. basic_memory/mcp/prompts/utils.py +11 -4
  37. basic_memory/mcp/resources/ai_assistant_guide.md +211 -341
  38. basic_memory/mcp/resources/project_info.py +27 -11
  39. basic_memory/mcp/server.py +0 -37
  40. basic_memory/mcp/tools/__init__.py +5 -6
  41. basic_memory/mcp/tools/build_context.py +67 -56
  42. basic_memory/mcp/tools/canvas.py +38 -26
  43. basic_memory/mcp/tools/chatgpt_tools.py +187 -0
  44. basic_memory/mcp/tools/delete_note.py +81 -47
  45. basic_memory/mcp/tools/edit_note.py +155 -138
  46. basic_memory/mcp/tools/list_directory.py +112 -99
  47. basic_memory/mcp/tools/move_note.py +181 -101
  48. basic_memory/mcp/tools/project_management.py +113 -277
  49. basic_memory/mcp/tools/read_content.py +91 -74
  50. basic_memory/mcp/tools/read_note.py +152 -115
  51. basic_memory/mcp/tools/recent_activity.py +471 -68
  52. basic_memory/mcp/tools/search.py +105 -92
  53. basic_memory/mcp/tools/sync_status.py +136 -130
  54. basic_memory/mcp/tools/utils.py +4 -0
  55. basic_memory/mcp/tools/view_note.py +44 -33
  56. basic_memory/mcp/tools/write_note.py +151 -90
  57. basic_memory/models/knowledge.py +12 -6
  58. basic_memory/models/project.py +6 -2
  59. basic_memory/repository/entity_repository.py +89 -82
  60. basic_memory/repository/relation_repository.py +13 -0
  61. basic_memory/repository/repository.py +18 -5
  62. basic_memory/repository/search_repository.py +46 -2
  63. basic_memory/schemas/__init__.py +6 -0
  64. basic_memory/schemas/base.py +39 -11
  65. basic_memory/schemas/cloud.py +46 -0
  66. basic_memory/schemas/memory.py +90 -21
  67. basic_memory/schemas/project_info.py +9 -10
  68. basic_memory/schemas/sync_report.py +48 -0
  69. basic_memory/services/context_service.py +25 -11
  70. basic_memory/services/directory_service.py +124 -3
  71. basic_memory/services/entity_service.py +100 -48
  72. basic_memory/services/initialization.py +30 -11
  73. basic_memory/services/project_service.py +101 -24
  74. basic_memory/services/search_service.py +16 -8
  75. basic_memory/sync/sync_service.py +173 -34
  76. basic_memory/sync/watch_service.py +101 -40
  77. basic_memory/utils.py +14 -4
  78. {basic_memory-0.14.4.dist-info → basic_memory-0.15.1.dist-info}/METADATA +57 -9
  79. basic_memory-0.15.1.dist-info/RECORD +146 -0
  80. basic_memory/mcp/project_session.py +0 -120
  81. basic_memory-0.14.4.dist-info/RECORD +0 -133
  82. {basic_memory-0.14.4.dist-info → basic_memory-0.15.1.dist-info}/WHEEL +0 -0
  83. {basic_memory-0.14.4.dist-info → basic_memory-0.15.1.dist-info}/entry_points.txt +0 -0
  84. {basic_memory-0.14.4.dist-info → basic_memory-0.15.1.dist-info}/licenses/LICENSE +0 -0
@@ -113,8 +113,10 @@ class SearchService:
113
113
  # Add word boundaries
114
114
  variants.update(w.strip() for w in text.lower().split() if w.strip())
115
115
 
116
- # Add trigrams for fuzzy matching
117
- variants.update(text[i : i + 3].lower() for i in range(len(text) - 2))
116
+ # Trigrams disabled: They create massive search index bloat, increasing DB size significantly
117
+ # and slowing down indexing performance. FTS5 search works well without them.
118
+ # See: https://github.com/basicmachines-co/basic-memory/issues/351
119
+ # variants.update(text[i : i + 3].lower() for i in range(len(text) - 2))
118
120
 
119
121
  return variants
120
122
 
@@ -219,6 +221,9 @@ class SearchService:
219
221
  The project_id is automatically added by the repository when indexing.
220
222
  """
221
223
 
224
+ # Collect all search index rows to batch insert at the end
225
+ rows_to_index = []
226
+
222
227
  content_stems = []
223
228
  content_snippet = ""
224
229
  title_variants = self._generate_variants(entity.title)
@@ -241,8 +246,8 @@ class SearchService:
241
246
 
242
247
  entity_content_stems = "\n".join(p for p in content_stems if p and p.strip())
243
248
 
244
- # Index entity
245
- await self.repository.index_item(
249
+ # Add entity row
250
+ rows_to_index.append(
246
251
  SearchIndexRow(
247
252
  id=entity.id,
248
253
  type=SearchItemType.ENTITY.value,
@@ -261,13 +266,13 @@ class SearchService:
261
266
  )
262
267
  )
263
268
 
264
- # Index each observation with permalink
269
+ # Add observation rows
265
270
  for obs in entity.observations:
266
271
  # Index with parent entity's file path since that's where it's defined
267
272
  obs_content_stems = "\n".join(
268
273
  p for p in self._generate_variants(obs.content) if p and p.strip()
269
274
  )
270
- await self.repository.index_item(
275
+ rows_to_index.append(
271
276
  SearchIndexRow(
272
277
  id=obs.id,
273
278
  type=SearchItemType.OBSERVATION.value,
@@ -287,7 +292,7 @@ class SearchService:
287
292
  )
288
293
  )
289
294
 
290
- # Only index outgoing relations (ones defined in this file)
295
+ # Add relation rows (only outgoing relations defined in this file)
291
296
  for rel in entity.outgoing_relations:
292
297
  # Create descriptive title showing the relationship
293
298
  relation_title = (
@@ -299,7 +304,7 @@ class SearchService:
299
304
  rel_content_stems = "\n".join(
300
305
  p for p in self._generate_variants(relation_title) if p and p.strip()
301
306
  )
302
- await self.repository.index_item(
307
+ rows_to_index.append(
303
308
  SearchIndexRow(
304
309
  id=rel.id,
305
310
  title=relation_title,
@@ -317,6 +322,9 @@ class SearchService:
317
322
  )
318
323
  )
319
324
 
325
+ # Batch insert all rows at once
326
+ await self.repository.bulk_index_items(rows_to_index)
327
+
320
328
  async def delete_by_permalink(self, permalink: str):
321
329
  """Delete an item from the search index."""
322
330
  await self.repository.delete_by_permalink(permalink)
@@ -1,21 +1,28 @@
1
1
  """Service for syncing files between filesystem and database."""
2
2
 
3
+ import asyncio
3
4
  import os
4
5
  import time
6
+ from concurrent.futures import ThreadPoolExecutor
5
7
  from dataclasses import dataclass, field
6
8
  from datetime import datetime
7
9
  from pathlib import Path
8
10
  from typing import Dict, Optional, Set, Tuple
9
11
 
10
12
  from loguru import logger
13
+ from sqlalchemy import select
11
14
  from sqlalchemy.exc import IntegrityError
12
15
 
13
- from basic_memory.config import BasicMemoryConfig
16
+ from basic_memory import db
17
+ from basic_memory.config import BasicMemoryConfig, ConfigManager
14
18
  from basic_memory.file_utils import has_frontmatter
15
- from basic_memory.markdown import EntityParser
16
- from basic_memory.models import Entity
17
- from basic_memory.repository import EntityRepository, RelationRepository
19
+ from basic_memory.ignore_utils import load_bmignore_patterns, should_ignore_path
20
+ from basic_memory.markdown import EntityParser, MarkdownProcessor
21
+ from basic_memory.models import Entity, Project
22
+ from basic_memory.repository import EntityRepository, RelationRepository, ObservationRepository
23
+ from basic_memory.repository.search_repository import SearchRepository
18
24
  from basic_memory.services import EntityService, FileService
25
+ from basic_memory.services.link_resolver import LinkResolver
19
26
  from basic_memory.services.search_service import SearchService
20
27
  from basic_memory.services.sync_status_service import sync_status_tracker, SyncStatus
21
28
 
@@ -80,6 +87,43 @@ class SyncService:
80
87
  self.relation_repository = relation_repository
81
88
  self.search_service = search_service
82
89
  self.file_service = file_service
90
+ self._thread_pool = ThreadPoolExecutor(max_workers=app_config.sync_thread_pool_size)
91
+ # Load ignore patterns once at initialization for performance
92
+ self._ignore_patterns = load_bmignore_patterns()
93
+
94
+ async def _read_file_async(self, file_path: Path) -> str:
95
+ """Read file content in thread pool to avoid blocking the event loop."""
96
+ loop = asyncio.get_event_loop()
97
+ return await loop.run_in_executor(self._thread_pool, file_path.read_text, "utf-8")
98
+
99
+ async def _compute_checksum_async(self, path: str) -> str:
100
+ """Compute file checksum in thread pool to avoid blocking the event loop."""
101
+
102
+ def _sync_compute_checksum(path_str: str) -> str:
103
+ # Synchronous version for thread pool execution
104
+ path_obj = self.file_service.base_path / path_str
105
+
106
+ if self.file_service.is_markdown(path_str):
107
+ content = path_obj.read_text(encoding="utf-8")
108
+ else:
109
+ content = path_obj.read_bytes()
110
+
111
+ # Use the synchronous version of compute_checksum
112
+ import hashlib
113
+
114
+ if isinstance(content, str):
115
+ content_bytes = content.encode("utf-8")
116
+ else:
117
+ content_bytes = content
118
+ return hashlib.sha256(content_bytes).hexdigest()
119
+
120
+ loop = asyncio.get_event_loop()
121
+ return await loop.run_in_executor(self._thread_pool, _sync_compute_checksum, path)
122
+
123
+ def __del__(self):
124
+ """Cleanup thread pool when service is destroyed."""
125
+ if hasattr(self, "_thread_pool"):
126
+ self._thread_pool.shutdown(wait=False)
83
127
 
84
128
  async def sync(self, directory: Path, project_name: Optional[str] = None) -> SyncReport:
85
129
  """Sync all files with database."""
@@ -231,15 +275,25 @@ class SyncService:
231
275
 
232
276
  async def get_db_file_state(self) -> Dict[str, str]:
233
277
  """Get file_path and checksums from database.
234
- Args:
235
- db_records: database records
278
+
279
+ Optimized to query only the columns we need (file_path, checksum) without
280
+ loading full entities or their relationships. This is 10-100x faster for
281
+ large projects compared to loading all entities with observations/relations.
282
+
236
283
  Returns:
237
- Dict mapping file paths to FileState
238
- :param db_records: the data from the db
284
+ Dict mapping file paths to checksums
239
285
  """
240
- db_records = await self.entity_repository.find_all()
241
- logger.info(f"Found {len(db_records)} db records")
242
- return {r.file_path: r.checksum or "" for r in db_records}
286
+ # Query only the columns we need - no entity objects or relationships
287
+ query = select(Entity.file_path, Entity.checksum).where(
288
+ Entity.project_id == self.entity_repository.project_id
289
+ )
290
+
291
+ async with db.scoped_session(self.entity_repository.session_maker) as session:
292
+ result = await session.execute(query)
293
+ rows = result.all()
294
+
295
+ logger.info(f"Found {len(rows)} db file records")
296
+ return {row.file_path: row.checksum or "" for row in rows}
243
297
 
244
298
  async def sync_file(
245
299
  self, path: str, new: bool = True
@@ -289,16 +343,18 @@ class SyncService:
289
343
  logger.debug(f"Parsing markdown file, path: {path}, new: {new}")
290
344
 
291
345
  file_path = self.entity_parser.base_path / path
292
- file_content = file_path.read_text(encoding="utf-8")
346
+ file_content = await self._read_file_async(file_path)
293
347
  file_contains_frontmatter = has_frontmatter(file_content)
294
348
 
295
349
  # entity markdown will always contain front matter, so it can be used up create/update the entity
296
350
  entity_markdown = await self.entity_parser.parse_file(path)
297
351
 
298
- # if the file contains frontmatter, resolve a permalink
299
- if file_contains_frontmatter:
300
- # Resolve permalink - this handles all the cases including conflicts
301
- permalink = await self.entity_service.resolve_permalink(path, markdown=entity_markdown)
352
+ # if the file contains frontmatter, resolve a permalink (unless disabled)
353
+ if file_contains_frontmatter and not self.app_config.disable_permalinks:
354
+ # Resolve permalink - skip conflict checks during bulk sync for performance
355
+ permalink = await self.entity_service.resolve_permalink(
356
+ path, markdown=entity_markdown, skip_conflict_check=True
357
+ )
302
358
 
303
359
  # If permalink changed, update the file
304
360
  if permalink != entity_markdown.frontmatter.permalink:
@@ -326,7 +382,7 @@ class SyncService:
326
382
  # After updating relations, we need to compute the checksum again
327
383
  # This is necessary for files with wikilinks to ensure consistent checksums
328
384
  # after relation processing is complete
329
- final_checksum = await self.file_service.compute_checksum(path)
385
+ final_checksum = await self._compute_checksum_async(path)
330
386
 
331
387
  # set checksum
332
388
  await self.entity_repository.update(entity.id, {"checksum": final_checksum})
@@ -350,10 +406,10 @@ class SyncService:
350
406
  Returns:
351
407
  Tuple of (entity, checksum)
352
408
  """
353
- checksum = await self.file_service.compute_checksum(path)
409
+ checksum = await self._compute_checksum_async(path)
354
410
  if new:
355
- # Generate permalink from path
356
- await self.entity_service.resolve_permalink(path)
411
+ # Generate permalink from path - skip conflict checks during bulk sync
412
+ await self.entity_service.resolve_permalink(path, skip_conflict_check=True)
357
413
 
358
414
  # get file timestamps
359
415
  file_stats = self.file_service.file_stats(path)
@@ -487,11 +543,15 @@ class SyncService:
487
543
  updates = {"file_path": new_path}
488
544
 
489
545
  # If configured, also update permalink to match new path
490
- if self.app_config.update_permalinks_on_move and self.file_service.is_markdown(
491
- new_path
546
+ if (
547
+ self.app_config.update_permalinks_on_move
548
+ and not self.app_config.disable_permalinks
549
+ and self.file_service.is_markdown(new_path)
492
550
  ):
493
- # generate new permalink value
494
- new_permalink = await self.entity_service.resolve_permalink(new_path)
551
+ # generate new permalink value - skip conflict checks during bulk sync
552
+ new_permalink = await self.entity_service.resolve_permalink(
553
+ new_path, skip_conflict_check=True
554
+ )
495
555
 
496
556
  # write to file and get new checksum
497
557
  new_checksum = await self.file_service.update_frontmatter(
@@ -548,12 +608,27 @@ class SyncService:
548
608
  # update search index
549
609
  await self.search_service.index_entity(updated)
550
610
 
551
- async def resolve_relations(self):
552
- """Try to resolve any unresolved relations"""
611
+ async def resolve_relations(self, entity_id: int | None = None):
612
+ """Try to resolve unresolved relations.
553
613
 
554
- unresolved_relations = await self.relation_repository.find_unresolved_relations()
614
+ Args:
615
+ entity_id: If provided, only resolve relations for this specific entity.
616
+ Otherwise, resolve all unresolved relations in the database.
617
+ """
555
618
 
556
- logger.info("Resolving forward references", count=len(unresolved_relations))
619
+ if entity_id:
620
+ # Only get unresolved relations for the specific entity
621
+ unresolved_relations = (
622
+ await self.relation_repository.find_unresolved_relations_for_entity(entity_id)
623
+ )
624
+ logger.info(
625
+ f"Resolving forward references for entity {entity_id}",
626
+ count=len(unresolved_relations),
627
+ )
628
+ else:
629
+ # Get all unresolved relations (original behavior)
630
+ unresolved_relations = await self.relation_repository.find_unresolved_relations()
631
+ logger.info("Resolving all forward references", count=len(unresolved_relations))
557
632
 
558
633
  for relation in unresolved_relations:
559
634
  logger.trace(
@@ -608,19 +683,35 @@ class SyncService:
608
683
 
609
684
  logger.debug(f"Scanning directory {directory}")
610
685
  result = ScanResult()
686
+ ignored_count = 0
611
687
 
612
688
  for root, dirnames, filenames in os.walk(str(directory)):
613
- # Skip dot directories in-place
614
- dirnames[:] = [d for d in dirnames if not d.startswith(".")]
689
+ # Convert root to Path for easier manipulation
690
+ root_path = Path(root)
691
+
692
+ # Filter out ignored directories in-place
693
+ dirnames_to_remove = []
694
+ for dirname in dirnames:
695
+ dir_path = root_path / dirname
696
+ if should_ignore_path(dir_path, directory, self._ignore_patterns):
697
+ dirnames_to_remove.append(dirname)
698
+ ignored_count += 1
699
+
700
+ # Remove ignored directories from dirnames to prevent os.walk from descending
701
+ for dirname in dirnames_to_remove:
702
+ dirnames.remove(dirname)
615
703
 
616
704
  for filename in filenames:
617
- # Skip dot files
618
- if filename.startswith("."):
705
+ path = root_path / filename
706
+
707
+ # Check if file should be ignored
708
+ if should_ignore_path(path, directory, self._ignore_patterns):
709
+ ignored_count += 1
710
+ logger.trace(f"Ignoring file per .bmignore: {path.relative_to(directory)}")
619
711
  continue
620
712
 
621
- path = Path(root) / filename
622
713
  rel_path = path.relative_to(directory).as_posix()
623
- checksum = await self.file_service.compute_checksum(rel_path)
714
+ checksum = await self._compute_checksum_async(rel_path)
624
715
  result.files[rel_path] = checksum
625
716
  result.checksums[checksum] = rel_path
626
717
 
@@ -631,7 +722,55 @@ class SyncService:
631
722
  f"{directory} scan completed "
632
723
  f"directory={str(directory)} "
633
724
  f"files_found={len(result.files)} "
725
+ f"files_ignored={ignored_count} "
634
726
  f"duration_ms={duration_ms}"
635
727
  )
636
728
 
637
729
  return result
730
+
731
+
732
+ async def get_sync_service(project: Project) -> SyncService: # pragma: no cover
733
+ """Get sync service instance with all dependencies."""
734
+
735
+ app_config = ConfigManager().config
736
+ _, session_maker = await db.get_or_create_db(
737
+ db_path=app_config.database_path, db_type=db.DatabaseType.FILESYSTEM
738
+ )
739
+
740
+ project_path = Path(project.path)
741
+ entity_parser = EntityParser(project_path)
742
+ markdown_processor = MarkdownProcessor(entity_parser)
743
+ file_service = FileService(project_path, markdown_processor)
744
+
745
+ # Initialize repositories
746
+ entity_repository = EntityRepository(session_maker, project_id=project.id)
747
+ observation_repository = ObservationRepository(session_maker, project_id=project.id)
748
+ relation_repository = RelationRepository(session_maker, project_id=project.id)
749
+ search_repository = SearchRepository(session_maker, project_id=project.id)
750
+
751
+ # Initialize services
752
+ search_service = SearchService(search_repository, entity_repository, file_service)
753
+ link_resolver = LinkResolver(entity_repository, search_service)
754
+
755
+ # Initialize services
756
+ entity_service = EntityService(
757
+ entity_parser,
758
+ entity_repository,
759
+ observation_repository,
760
+ relation_repository,
761
+ file_service,
762
+ link_resolver,
763
+ )
764
+
765
+ # Create sync service
766
+ sync_service = SyncService(
767
+ app_config=app_config,
768
+ entity_service=entity_service,
769
+ entity_parser=entity_parser,
770
+ entity_repository=entity_repository,
771
+ relation_repository=relation_repository,
772
+ search_service=search_service,
773
+ file_service=file_service,
774
+ )
775
+
776
+ return sync_service
@@ -5,9 +5,10 @@ import os
5
5
  from collections import defaultdict
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
- from typing import List, Optional, Set
8
+ from typing import List, Optional, Set, Sequence
9
9
 
10
10
  from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON
11
+ from basic_memory.ignore_utils import load_gitignore_patterns, should_ignore_path
11
12
  from basic_memory.models import Project
12
13
  from basic_memory.repository import ProjectRepository
13
14
  from loguru import logger
@@ -15,6 +16,7 @@ from pydantic import BaseModel
15
16
  from rich.console import Console
16
17
  from watchfiles import awatch
17
18
  from watchfiles.main import FileChange, Change
19
+ import time
18
20
 
19
21
 
20
22
  class WatchEvent(BaseModel):
@@ -81,54 +83,110 @@ class WatchService:
81
83
  self.state = WatchServiceState()
82
84
  self.status_path = Path.home() / ".basic-memory" / WATCH_STATUS_JSON
83
85
  self.status_path.parent.mkdir(parents=True, exist_ok=True)
86
+ self._ignore_patterns_cache: dict[Path, Set[str]] = {}
84
87
 
85
88
  # quiet mode for mcp so it doesn't mess up stdout
86
89
  self.console = Console(quiet=quiet)
87
90
 
91
+ async def _schedule_restart(self, stop_event: asyncio.Event):
92
+ """Schedule a restart of the watch service after the configured interval."""
93
+ await asyncio.sleep(self.app_config.watch_project_reload_interval)
94
+ stop_event.set()
95
+
96
+ def _get_ignore_patterns(self, project_path: Path) -> Set[str]:
97
+ """Get or load ignore patterns for a project path."""
98
+ if project_path not in self._ignore_patterns_cache:
99
+ self._ignore_patterns_cache[project_path] = load_gitignore_patterns(project_path)
100
+ return self._ignore_patterns_cache[project_path]
101
+
102
+ async def _watch_projects_cycle(self, projects: Sequence[Project], stop_event: asyncio.Event):
103
+ """Run one cycle of watching the given projects until stop_event is set."""
104
+ project_paths = [project.path for project in projects]
105
+
106
+ async for changes in awatch(
107
+ *project_paths,
108
+ debounce=self.app_config.sync_delay,
109
+ watch_filter=self.filter_changes,
110
+ recursive=True,
111
+ stop_event=stop_event,
112
+ ):
113
+ # group changes by project and filter using ignore patterns
114
+ project_changes = defaultdict(list)
115
+ for change, path in changes:
116
+ for project in projects:
117
+ if self.is_project_path(project, path):
118
+ # Check if the file should be ignored based on gitignore patterns
119
+ project_path = Path(project.path)
120
+ file_path = Path(path)
121
+ ignore_patterns = self._get_ignore_patterns(project_path)
122
+
123
+ if should_ignore_path(file_path, project_path, ignore_patterns):
124
+ logger.trace(
125
+ f"Ignoring watched file change: {file_path.relative_to(project_path)}"
126
+ )
127
+ continue
128
+
129
+ project_changes[project].append((change, path))
130
+ break
131
+
132
+ # create coroutines to handle changes
133
+ change_handlers = [
134
+ self.handle_changes(project, changes) # pyright: ignore
135
+ for project, changes in project_changes.items()
136
+ ]
137
+
138
+ # process changes
139
+ await asyncio.gather(*change_handlers)
140
+
88
141
  async def run(self): # pragma: no cover
89
142
  """Watch for file changes and sync them"""
90
143
 
91
- projects = await self.project_repository.get_active_projects()
92
- project_paths = [project.path for project in projects]
144
+ self.state.running = True
145
+ self.state.start_time = datetime.now()
146
+ await self.write_status()
93
147
 
94
148
  logger.info(
95
149
  "Watch service started",
96
- f"directories={project_paths}",
97
150
  f"debounce_ms={self.app_config.sync_delay}",
98
151
  f"pid={os.getpid()}",
99
152
  )
100
153
 
101
- self.state.running = True
102
- self.state.start_time = datetime.now()
103
- await self.write_status()
104
-
105
154
  try:
106
- async for changes in awatch(
107
- *project_paths,
108
- debounce=self.app_config.sync_delay,
109
- watch_filter=self.filter_changes,
110
- recursive=True,
111
- ):
112
- # group changes by project
113
- project_changes = defaultdict(list)
114
- for change, path in changes:
115
- for project in projects:
116
- if self.is_project_path(project, path):
117
- project_changes[project].append((change, path))
118
- break
119
-
120
- # create coroutines to handle changes
121
- change_handlers = [
122
- self.handle_changes(project, changes) # pyright: ignore
123
- for project, changes in project_changes.items()
124
- ]
125
-
126
- # process changes
127
- await asyncio.gather(*change_handlers)
155
+ while self.state.running:
156
+ # Clear ignore patterns cache to pick up any .gitignore changes
157
+ self._ignore_patterns_cache.clear()
158
+
159
+ # Reload projects to catch any new/removed projects
160
+ projects = await self.project_repository.get_active_projects()
161
+
162
+ project_paths = [project.path for project in projects]
163
+ logger.debug(f"Starting watch cycle for directories: {project_paths}")
164
+
165
+ # Create stop event for this watch cycle
166
+ stop_event = asyncio.Event()
167
+
168
+ # Schedule restart after configured interval to reload projects
169
+ timer_task = asyncio.create_task(self._schedule_restart(stop_event))
170
+
171
+ try:
172
+ await self._watch_projects_cycle(projects, stop_event)
173
+ except Exception as e:
174
+ logger.exception("Watch service error during cycle", error=str(e))
175
+ self.state.record_error(str(e))
176
+ await self.write_status()
177
+ # Continue to next cycle instead of exiting
178
+ await asyncio.sleep(5) # Brief pause before retry
179
+ finally:
180
+ # Cancel timer task if it's still running
181
+ if not timer_task.done():
182
+ timer_task.cancel()
183
+ try:
184
+ await timer_task
185
+ except asyncio.CancelledError:
186
+ pass
128
187
 
129
188
  except Exception as e:
130
189
  logger.exception("Watch service error", error=str(e))
131
-
132
190
  self.state.record_error(str(e))
133
191
  await self.write_status()
134
192
  raise
@@ -175,11 +233,8 @@ class WatchService:
175
233
 
176
234
  async def handle_changes(self, project: Project, changes: Set[FileChange]) -> None:
177
235
  """Process a batch of file changes"""
178
- import time
179
- from typing import List, Set
180
-
181
- # Lazily initialize sync service for project changes
182
- from basic_memory.cli.commands.sync import get_sync_service
236
+ # avoid circular imports
237
+ from basic_memory.sync.sync_service import get_sync_service
183
238
 
184
239
  sync_service = await get_sync_service(project)
185
240
  file_service = sync_service.file_service
@@ -288,9 +343,13 @@ class WatchService:
288
343
  full_path = directory / path
289
344
  if full_path.exists() and full_path.is_file():
290
345
  # File still exists despite DELETE event - treat as modification
291
- logger.debug("File exists despite DELETE event, treating as modification", path=path)
346
+ logger.debug(
347
+ "File exists despite DELETE event, treating as modification", path=path
348
+ )
292
349
  entity, checksum = await sync_service.sync_file(path, new=False)
293
- self.state.add_event(path=path, action="modified", status="success", checksum=checksum)
350
+ self.state.add_event(
351
+ path=path, action="modified", status="success", checksum=checksum
352
+ )
294
353
  self.console.print(f"[yellow]✎[/yellow] {path} (atomic write)")
295
354
  logger.info(f"atomic write detected: {path}")
296
355
  processed.add(path)
@@ -302,10 +361,12 @@ class WatchService:
302
361
  entity = await sync_service.entity_repository.get_by_file_path(path)
303
362
  if entity is None:
304
363
  # No entity means this was likely a directory - skip it
305
- logger.debug(f"Skipping deleted path with no entity (likely directory), path={path}")
364
+ logger.debug(
365
+ f"Skipping deleted path with no entity (likely directory), path={path}"
366
+ )
306
367
  processed.add(path)
307
368
  continue
308
-
369
+
309
370
  # File truly deleted
310
371
  logger.debug("Processing deleted file", path=path)
311
372
  await sync_service.handle_delete(path)
basic_memory/utils.py CHANGED
@@ -223,7 +223,8 @@ def parse_tags(tags: Union[List[str], str, None]) -> List[str]:
223
223
  if isinstance(tags, str):
224
224
  # Check if it's a JSON array string (common issue from AI assistants)
225
225
  import json
226
- if tags.strip().startswith('[') and tags.strip().endswith(']'):
226
+
227
+ if tags.strip().startswith("[") and tags.strip().endswith("]"):
227
228
  try:
228
229
  # Try to parse as JSON array
229
230
  parsed_json = json.loads(tags)
@@ -233,7 +234,7 @@ def parse_tags(tags: Union[List[str], str, None]) -> List[str]:
233
234
  except json.JSONDecodeError:
234
235
  # Not valid JSON, fall through to comma-separated parsing
235
236
  pass
236
-
237
+
237
238
  # Split by comma, strip whitespace, then strip leading '#' characters
238
239
  return [tag.strip().lstrip("#") for tag in tags.split(",") if tag and tag.strip()]
239
240
 
@@ -330,8 +331,8 @@ def detect_potential_file_conflicts(file_path: str, existing_paths: List[str]) -
330
331
  return conflicts
331
332
 
332
333
 
333
- def validate_project_path(path: str, project_path: Path) -> bool:
334
- """Ensure path stays within project boundaries."""
334
+ def valid_project_path_value(path: str):
335
+ """Ensure project path is valid."""
335
336
  # Allow empty strings as they resolve to the project root
336
337
  if not path:
337
338
  return True
@@ -352,6 +353,15 @@ def validate_project_path(path: str, project_path: Path) -> bool:
352
353
  if path.strip() and any(ord(c) < 32 and c not in [" ", "\t"] for c in path):
353
354
  return False
354
355
 
356
+ return True
357
+
358
+
359
+ def validate_project_path(path: str, project_path: Path) -> bool:
360
+ """Ensure path is valid and stays within project boundaries."""
361
+
362
+ if not valid_project_path_value(path):
363
+ return False
364
+
355
365
  try:
356
366
  resolved = (project_path / path).resolve()
357
367
  return resolved.is_relative_to(project_path.resolve())