basic-memory 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (143) hide show
  1. basic_memory/__init__.py +1 -1
  2. basic_memory/alembic/env.py +112 -26
  3. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  4. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +15 -3
  5. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +44 -36
  6. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  7. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  8. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +13 -0
  9. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  10. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  11. basic_memory/api/app.py +45 -24
  12. basic_memory/api/container.py +133 -0
  13. basic_memory/api/routers/knowledge_router.py +17 -5
  14. basic_memory/api/routers/project_router.py +68 -14
  15. basic_memory/api/routers/resource_router.py +37 -27
  16. basic_memory/api/routers/utils.py +53 -14
  17. basic_memory/api/v2/__init__.py +35 -0
  18. basic_memory/api/v2/routers/__init__.py +21 -0
  19. basic_memory/api/v2/routers/directory_router.py +93 -0
  20. basic_memory/api/v2/routers/importer_router.py +181 -0
  21. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  22. basic_memory/api/v2/routers/memory_router.py +130 -0
  23. basic_memory/api/v2/routers/project_router.py +359 -0
  24. basic_memory/api/v2/routers/prompt_router.py +269 -0
  25. basic_memory/api/v2/routers/resource_router.py +286 -0
  26. basic_memory/api/v2/routers/search_router.py +73 -0
  27. basic_memory/cli/app.py +43 -7
  28. basic_memory/cli/auth.py +27 -4
  29. basic_memory/cli/commands/__init__.py +3 -1
  30. basic_memory/cli/commands/cloud/api_client.py +20 -5
  31. basic_memory/cli/commands/cloud/cloud_utils.py +13 -6
  32. basic_memory/cli/commands/cloud/rclone_commands.py +110 -14
  33. basic_memory/cli/commands/cloud/rclone_installer.py +18 -4
  34. basic_memory/cli/commands/cloud/upload.py +10 -3
  35. basic_memory/cli/commands/command_utils.py +52 -4
  36. basic_memory/cli/commands/db.py +78 -19
  37. basic_memory/cli/commands/format.py +198 -0
  38. basic_memory/cli/commands/import_chatgpt.py +12 -8
  39. basic_memory/cli/commands/import_claude_conversations.py +12 -8
  40. basic_memory/cli/commands/import_claude_projects.py +12 -8
  41. basic_memory/cli/commands/import_memory_json.py +12 -8
  42. basic_memory/cli/commands/mcp.py +8 -26
  43. basic_memory/cli/commands/project.py +22 -9
  44. basic_memory/cli/commands/status.py +3 -2
  45. basic_memory/cli/commands/telemetry.py +81 -0
  46. basic_memory/cli/container.py +84 -0
  47. basic_memory/cli/main.py +7 -0
  48. basic_memory/config.py +177 -77
  49. basic_memory/db.py +183 -77
  50. basic_memory/deps/__init__.py +293 -0
  51. basic_memory/deps/config.py +26 -0
  52. basic_memory/deps/db.py +56 -0
  53. basic_memory/deps/importers.py +200 -0
  54. basic_memory/deps/projects.py +238 -0
  55. basic_memory/deps/repositories.py +179 -0
  56. basic_memory/deps/services.py +480 -0
  57. basic_memory/deps.py +14 -409
  58. basic_memory/file_utils.py +212 -3
  59. basic_memory/ignore_utils.py +5 -5
  60. basic_memory/importers/base.py +40 -19
  61. basic_memory/importers/chatgpt_importer.py +17 -4
  62. basic_memory/importers/claude_conversations_importer.py +27 -12
  63. basic_memory/importers/claude_projects_importer.py +50 -14
  64. basic_memory/importers/memory_json_importer.py +36 -16
  65. basic_memory/importers/utils.py +5 -2
  66. basic_memory/markdown/entity_parser.py +62 -23
  67. basic_memory/markdown/markdown_processor.py +67 -4
  68. basic_memory/markdown/plugins.py +4 -2
  69. basic_memory/markdown/utils.py +10 -1
  70. basic_memory/mcp/async_client.py +1 -0
  71. basic_memory/mcp/clients/__init__.py +28 -0
  72. basic_memory/mcp/clients/directory.py +70 -0
  73. basic_memory/mcp/clients/knowledge.py +176 -0
  74. basic_memory/mcp/clients/memory.py +120 -0
  75. basic_memory/mcp/clients/project.py +89 -0
  76. basic_memory/mcp/clients/resource.py +71 -0
  77. basic_memory/mcp/clients/search.py +65 -0
  78. basic_memory/mcp/container.py +110 -0
  79. basic_memory/mcp/project_context.py +47 -33
  80. basic_memory/mcp/prompts/ai_assistant_guide.py +2 -2
  81. basic_memory/mcp/prompts/recent_activity.py +2 -2
  82. basic_memory/mcp/prompts/utils.py +3 -3
  83. basic_memory/mcp/server.py +58 -0
  84. basic_memory/mcp/tools/build_context.py +14 -14
  85. basic_memory/mcp/tools/canvas.py +34 -12
  86. basic_memory/mcp/tools/chatgpt_tools.py +4 -1
  87. basic_memory/mcp/tools/delete_note.py +31 -7
  88. basic_memory/mcp/tools/edit_note.py +14 -9
  89. basic_memory/mcp/tools/list_directory.py +7 -17
  90. basic_memory/mcp/tools/move_note.py +35 -31
  91. basic_memory/mcp/tools/project_management.py +29 -25
  92. basic_memory/mcp/tools/read_content.py +13 -3
  93. basic_memory/mcp/tools/read_note.py +24 -14
  94. basic_memory/mcp/tools/recent_activity.py +32 -38
  95. basic_memory/mcp/tools/search.py +17 -10
  96. basic_memory/mcp/tools/utils.py +28 -0
  97. basic_memory/mcp/tools/view_note.py +2 -1
  98. basic_memory/mcp/tools/write_note.py +37 -14
  99. basic_memory/models/knowledge.py +15 -2
  100. basic_memory/models/project.py +7 -1
  101. basic_memory/models/search.py +58 -2
  102. basic_memory/project_resolver.py +222 -0
  103. basic_memory/repository/entity_repository.py +210 -3
  104. basic_memory/repository/observation_repository.py +1 -0
  105. basic_memory/repository/postgres_search_repository.py +451 -0
  106. basic_memory/repository/project_repository.py +38 -1
  107. basic_memory/repository/relation_repository.py +58 -2
  108. basic_memory/repository/repository.py +1 -0
  109. basic_memory/repository/search_index_row.py +95 -0
  110. basic_memory/repository/search_repository.py +77 -615
  111. basic_memory/repository/search_repository_base.py +241 -0
  112. basic_memory/repository/sqlite_search_repository.py +437 -0
  113. basic_memory/runtime.py +61 -0
  114. basic_memory/schemas/base.py +36 -6
  115. basic_memory/schemas/directory.py +2 -1
  116. basic_memory/schemas/memory.py +9 -2
  117. basic_memory/schemas/project_info.py +2 -0
  118. basic_memory/schemas/response.py +84 -27
  119. basic_memory/schemas/search.py +5 -0
  120. basic_memory/schemas/sync_report.py +1 -1
  121. basic_memory/schemas/v2/__init__.py +27 -0
  122. basic_memory/schemas/v2/entity.py +133 -0
  123. basic_memory/schemas/v2/resource.py +47 -0
  124. basic_memory/services/context_service.py +219 -43
  125. basic_memory/services/directory_service.py +26 -11
  126. basic_memory/services/entity_service.py +68 -33
  127. basic_memory/services/file_service.py +131 -16
  128. basic_memory/services/initialization.py +51 -26
  129. basic_memory/services/link_resolver.py +1 -0
  130. basic_memory/services/project_service.py +68 -43
  131. basic_memory/services/search_service.py +75 -16
  132. basic_memory/sync/__init__.py +2 -1
  133. basic_memory/sync/coordinator.py +160 -0
  134. basic_memory/sync/sync_service.py +135 -115
  135. basic_memory/sync/watch_service.py +32 -12
  136. basic_memory/telemetry.py +249 -0
  137. basic_memory/utils.py +96 -75
  138. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/METADATA +129 -5
  139. basic_memory-0.17.4.dist-info/RECORD +193 -0
  140. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  141. basic_memory-0.16.1.dist-info/RECORD +0 -148
  142. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +0 -0
  143. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -2,6 +2,7 @@
2
2
 
3
3
  import asyncio
4
4
  import os
5
+ import sys
5
6
  import time
6
7
  from collections import OrderedDict
7
8
  from dataclasses import dataclass, field
@@ -10,7 +11,7 @@ from pathlib import Path
10
11
  from typing import AsyncIterator, Dict, List, Optional, Set, Tuple
11
12
 
12
13
  import aiofiles.os
13
- import logfire
14
+
14
15
  from loguru import logger
15
16
  from sqlalchemy.exc import IntegrityError
16
17
 
@@ -26,7 +27,7 @@ from basic_memory.repository import (
26
27
  ObservationRepository,
27
28
  ProjectRepository,
28
29
  )
29
- from basic_memory.repository.search_repository import SearchRepository
30
+ from basic_memory.repository.search_repository import create_search_repository
30
31
  from basic_memory.services import EntityService, FileService
31
32
  from basic_memory.services.exceptions import SyncFatalError
32
33
  from basic_memory.services.link_resolver import LinkResolver
@@ -215,17 +216,12 @@ class SyncService:
215
216
  f"path={path}, error={error}"
216
217
  )
217
218
 
218
- # Record metric for file failure
219
- logfire.metric_counter("sync.circuit_breaker.failures").add(1)
220
-
221
219
  # Log when threshold is reached
222
220
  if failure_info.count >= MAX_CONSECUTIVE_FAILURES:
223
221
  logger.error(
224
222
  f"File {path} has failed {MAX_CONSECUTIVE_FAILURES} times and will be skipped. "
225
223
  f"First failure: {failure_info.first_failure}, Last error: {error}"
226
224
  )
227
- # Record metric for file being blocked by circuit breaker
228
- logfire.metric_counter("sync.circuit_breaker.blocked_files").add(1)
229
225
  else:
230
226
  # Create new failure record
231
227
  self._file_failures[path] = FileFailureInfo(
@@ -255,7 +251,6 @@ class SyncService:
255
251
  logger.info(f"Clearing failure history for {path} after successful sync")
256
252
  del self._file_failures[path]
257
253
 
258
- @logfire.instrument()
259
254
  async def sync(
260
255
  self, directory: Path, project_name: Optional[str] = None, force_full: bool = False
261
256
  ) -> SyncReport:
@@ -282,63 +277,58 @@ class SyncService:
282
277
  )
283
278
 
284
279
  # sync moves first
285
- with logfire.span("process_moves", move_count=len(report.moves)):
286
- for old_path, new_path in report.moves.items():
287
- # in the case where a file has been deleted and replaced by another file
288
- # it will show up in the move and modified lists, so handle it in modified
289
- if new_path in report.modified:
290
- report.modified.remove(new_path)
291
- logger.debug(
292
- f"File marked as moved and modified: old_path={old_path}, new_path={new_path}"
293
- )
294
- else:
295
- await self.handle_move(old_path, new_path)
280
+ for old_path, new_path in report.moves.items():
281
+ # in the case where a file has been deleted and replaced by another file
282
+ # it will show up in the move and modified lists, so handle it in modified
283
+ if new_path in report.modified:
284
+ report.modified.remove(new_path)
285
+ logger.debug(
286
+ f"File marked as moved and modified: old_path={old_path}, new_path={new_path}"
287
+ )
288
+ else:
289
+ await self.handle_move(old_path, new_path)
296
290
 
297
291
  # deleted next
298
- with logfire.span("process_deletes", delete_count=len(report.deleted)):
299
- for path in report.deleted:
300
- await self.handle_delete(path)
292
+ for path in report.deleted:
293
+ await self.handle_delete(path)
301
294
 
302
295
  # then new and modified
303
- with logfire.span("process_new_files", new_count=len(report.new)):
304
- for path in report.new:
305
- entity, _ = await self.sync_file(path, new=True)
306
-
307
- # Track if file was skipped
308
- if entity is None and await self._should_skip_file(path):
309
- failure_info = self._file_failures[path]
310
- report.skipped_files.append(
311
- SkippedFile(
312
- path=path,
313
- reason=failure_info.last_error,
314
- failure_count=failure_info.count,
315
- first_failed=failure_info.first_failure,
316
- )
296
+ for path in report.new:
297
+ entity, _ = await self.sync_file(path, new=True)
298
+
299
+ # Track if file was skipped
300
+ if entity is None and await self._should_skip_file(path):
301
+ failure_info = self._file_failures[path]
302
+ report.skipped_files.append(
303
+ SkippedFile(
304
+ path=path,
305
+ reason=failure_info.last_error,
306
+ failure_count=failure_info.count,
307
+ first_failed=failure_info.first_failure,
317
308
  )
309
+ )
318
310
 
319
- with logfire.span("process_modified_files", modified_count=len(report.modified)):
320
- for path in report.modified:
321
- entity, _ = await self.sync_file(path, new=False)
322
-
323
- # Track if file was skipped
324
- if entity is None and await self._should_skip_file(path):
325
- failure_info = self._file_failures[path]
326
- report.skipped_files.append(
327
- SkippedFile(
328
- path=path,
329
- reason=failure_info.last_error,
330
- failure_count=failure_info.count,
331
- first_failed=failure_info.first_failure,
332
- )
311
+ for path in report.modified:
312
+ entity, _ = await self.sync_file(path, new=False)
313
+
314
+ # Track if file was skipped
315
+ if entity is None and await self._should_skip_file(path):
316
+ failure_info = self._file_failures[path]
317
+ report.skipped_files.append(
318
+ SkippedFile(
319
+ path=path,
320
+ reason=failure_info.last_error,
321
+ failure_count=failure_info.count,
322
+ first_failed=failure_info.first_failure,
333
323
  )
324
+ )
334
325
 
335
326
  # Only resolve relations if there were actual changes
336
327
  # If no files changed, no new unresolved relations could have been created
337
- with logfire.span("resolve_relations"):
338
- if report.total > 0:
339
- await self.resolve_relations()
340
- else:
341
- logger.info("Skipping relation resolution - no file changes detected")
328
+ if report.total > 0:
329
+ await self.resolve_relations()
330
+ else:
331
+ logger.info("Skipping relation resolution - no file changes detected")
342
332
 
343
333
  # Update scan watermark after successful sync
344
334
  # Use the timestamp from sync start (not end) to ensure we catch files
@@ -361,15 +351,6 @@ class SyncService:
361
351
 
362
352
  duration_ms = int((time.time() - start_time) * 1000)
363
353
 
364
- # Record metrics for sync operation
365
- logfire.metric_histogram("sync.duration", unit="ms").record(duration_ms)
366
- logfire.metric_counter("sync.files.new").add(len(report.new))
367
- logfire.metric_counter("sync.files.modified").add(len(report.modified))
368
- logfire.metric_counter("sync.files.deleted").add(len(report.deleted))
369
- logfire.metric_counter("sync.files.moved").add(len(report.moves))
370
- if report.skipped_files:
371
- logfire.metric_counter("sync.files.skipped").add(len(report.skipped_files))
372
-
373
354
  # Log summary with skipped files if any
374
355
  if report.skipped_files:
375
356
  logger.warning(
@@ -390,7 +371,6 @@ class SyncService:
390
371
 
391
372
  return report
392
373
 
393
- @logfire.instrument()
394
374
  async def scan(self, directory, force_full: bool = False):
395
375
  """Smart scan using watermark and file count for large project optimization.
396
376
 
@@ -472,12 +452,6 @@ class SyncService:
472
452
  logger.warning("No scan watermark available, falling back to full scan")
473
453
  file_paths_to_scan = await self._scan_directory_full(directory)
474
454
 
475
- # Record scan type metric
476
- logfire.metric_counter(f"sync.scan.{scan_type}").add(1)
477
- logfire.metric_histogram("sync.scan.files_scanned", unit="files").record(
478
- len(file_paths_to_scan)
479
- )
480
-
481
455
  # Step 3: Process each file with mtime-based comparison
482
456
  scanned_paths: Set[str] = set()
483
457
  changed_checksums: Dict[str, str] = {}
@@ -589,7 +563,6 @@ class SyncService:
589
563
  report.checksums = changed_checksums
590
564
 
591
565
  scan_duration_ms = int((time.time() - scan_start_time) * 1000)
592
- logfire.metric_histogram("sync.scan.duration", unit="ms").record(scan_duration_ms)
593
566
 
594
567
  logger.info(
595
568
  f"Completed {scan_type} scan for directory {directory} in {scan_duration_ms}ms, "
@@ -599,7 +572,6 @@ class SyncService:
599
572
  )
600
573
  return report
601
574
 
602
- @logfire.instrument()
603
575
  async def sync_file(
604
576
  self, path: str, new: bool = True
605
577
  ) -> Tuple[Optional[Entity], Optional[str]]:
@@ -638,10 +610,20 @@ class SyncService:
638
610
  )
639
611
  return entity, checksum
640
612
 
613
+ except FileNotFoundError:
614
+ # File exists in database but not on filesystem
615
+ # This indicates a database/filesystem inconsistency - treat as deletion
616
+ logger.warning(
617
+ f"File not found during sync, treating as deletion: path={path}. "
618
+ "This may indicate a race condition or manual file deletion."
619
+ )
620
+ await self.handle_delete(path)
621
+ return None, None
622
+
641
623
  except Exception as e:
642
624
  # Check if this is a fatal error (or caused by one)
643
625
  # Fatal errors like project deletion should terminate sync immediately
644
- if isinstance(e, SyncFatalError) or isinstance(e.__cause__, SyncFatalError):
626
+ if isinstance(e, SyncFatalError) or isinstance(e.__cause__, SyncFatalError): # pragma: no cover
645
627
  logger.error(f"Fatal sync error encountered, terminating sync: path={path}")
646
628
  raise
647
629
 
@@ -654,7 +636,6 @@ class SyncService:
654
636
 
655
637
  return None, None
656
638
 
657
- @logfire.instrument()
658
639
  async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optional[Entity], str]:
659
640
  """Sync a markdown file with full processing.
660
641
 
@@ -672,12 +653,19 @@ class SyncService:
672
653
  file_contains_frontmatter = has_frontmatter(file_content)
673
654
 
674
655
  # Get file timestamps for tracking modification times
675
- file_stats = self.file_service.file_stats(path)
676
- created = datetime.fromtimestamp(file_stats.st_ctime).astimezone()
677
- modified = datetime.fromtimestamp(file_stats.st_mtime).astimezone()
678
-
679
- # entity markdown will always contain front matter, so it can be used up create/update the entity
680
- entity_markdown = await self.entity_parser.parse_file(path)
656
+ file_metadata = await self.file_service.get_file_metadata(path)
657
+ created = file_metadata.created_at
658
+ modified = file_metadata.modified_at
659
+
660
+ # Parse markdown content with file metadata (avoids redundant file read/stat)
661
+ # This enables cloud implementations (S3FileService) to provide metadata from head_object
662
+ abs_path = self.file_service.base_path / path
663
+ entity_markdown = await self.entity_parser.parse_markdown_content(
664
+ file_path=abs_path,
665
+ content=file_content,
666
+ mtime=file_metadata.modified_at.timestamp(),
667
+ ctime=file_metadata.created_at.timestamp(),
668
+ )
681
669
 
682
670
  # if the file contains frontmatter, resolve a permalink (unless disabled)
683
671
  if file_contains_frontmatter and not self.app_config.disable_permalinks:
@@ -723,8 +711,8 @@ class SyncService:
723
711
  "checksum": final_checksum,
724
712
  "created_at": created,
725
713
  "updated_at": modified,
726
- "mtime": file_stats.st_mtime,
727
- "size": file_stats.st_size,
714
+ "mtime": file_metadata.modified_at.timestamp(),
715
+ "size": file_metadata.size,
728
716
  },
729
717
  )
730
718
 
@@ -737,7 +725,6 @@ class SyncService:
737
725
  # Return the final checksum to ensure everything is consistent
738
726
  return entity, final_checksum
739
727
 
740
- @logfire.instrument()
741
728
  async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Optional[Entity], str]:
742
729
  """Sync a non-markdown file with basic tracking.
743
730
 
@@ -754,9 +741,9 @@ class SyncService:
754
741
  await self.entity_service.resolve_permalink(path, skip_conflict_check=True)
755
742
 
756
743
  # get file timestamps
757
- file_stats = self.file_service.file_stats(path)
758
- created = datetime.fromtimestamp(file_stats.st_ctime).astimezone()
759
- modified = datetime.fromtimestamp(file_stats.st_mtime).astimezone()
744
+ file_metadata = await self.file_service.get_file_metadata(path)
745
+ created = file_metadata.created_at
746
+ modified = file_metadata.modified_at
760
747
 
761
748
  # get mime type
762
749
  content_type = self.file_service.content_type(path)
@@ -772,14 +759,20 @@ class SyncService:
772
759
  created_at=created,
773
760
  updated_at=modified,
774
761
  content_type=content_type,
775
- mtime=file_stats.st_mtime,
776
- size=file_stats.st_size,
762
+ mtime=file_metadata.modified_at.timestamp(),
763
+ size=file_metadata.size,
777
764
  )
778
765
  )
779
766
  return entity, checksum
780
767
  except IntegrityError as e:
781
768
  # Handle race condition where entity was created by another process
782
- if "UNIQUE constraint failed: entity.file_path" in str(e):
769
+ msg = str(e)
770
+ if (
771
+ "UNIQUE constraint failed: entity.file_path" in msg
772
+ or "uix_entity_file_path_project" in msg
773
+ or "duplicate key value violates unique constraint" in msg
774
+ and "file_path" in msg
775
+ ):
783
776
  logger.info(
784
777
  f"Entity already exists for file_path={path}, updating instead of creating"
785
778
  )
@@ -789,15 +782,15 @@ class SyncService:
789
782
  logger.error(f"Entity not found after constraint violation, path={path}")
790
783
  raise ValueError(f"Entity not found after constraint violation: {path}")
791
784
 
792
- # Re-get file stats since we're in update path
793
- file_stats_for_update = self.file_service.file_stats(path)
785
+ # Re-get file metadata since we're in update path
786
+ file_metadata_for_update = await self.file_service.get_file_metadata(path)
794
787
  updated = await self.entity_repository.update(
795
788
  entity.id,
796
789
  {
797
790
  "file_path": path,
798
791
  "checksum": checksum,
799
- "mtime": file_stats_for_update.st_mtime,
800
- "size": file_stats_for_update.st_size,
792
+ "mtime": file_metadata_for_update.modified_at.timestamp(),
793
+ "size": file_metadata_for_update.size,
801
794
  },
802
795
  )
803
796
 
@@ -808,11 +801,11 @@ class SyncService:
808
801
  return updated, checksum
809
802
  else:
810
803
  # Re-raise if it's a different integrity error
811
- raise
804
+ raise # pragma: no cover
812
805
  else:
813
806
  # Get file timestamps for updating modification time
814
- file_stats = self.file_service.file_stats(path)
815
- modified = datetime.fromtimestamp(file_stats.st_mtime).astimezone()
807
+ file_metadata = await self.file_service.get_file_metadata(path)
808
+ modified = file_metadata.modified_at
816
809
 
817
810
  entity = await self.entity_repository.get_by_file_path(path)
818
811
  if entity is None: # pragma: no cover
@@ -827,8 +820,8 @@ class SyncService:
827
820
  "file_path": path,
828
821
  "checksum": checksum,
829
822
  "updated_at": modified,
830
- "mtime": file_stats.st_mtime,
831
- "size": file_stats.st_size,
823
+ "mtime": file_metadata.modified_at.timestamp(),
824
+ "size": file_metadata.size,
832
825
  },
833
826
  )
834
827
 
@@ -838,7 +831,6 @@ class SyncService:
838
831
 
839
832
  return updated, checksum
840
833
 
841
- @logfire.instrument()
842
834
  async def handle_delete(self, file_path: str):
843
835
  """Handle complete entity deletion including search index cleanup."""
844
836
 
@@ -870,7 +862,6 @@ class SyncService:
870
862
  else:
871
863
  await self.search_service.delete_by_entity_id(entity.id)
872
864
 
873
- @logfire.instrument()
874
865
  async def handle_move(self, old_path, new_path):
875
866
  logger.debug("Moving entity", old_path=old_path, new_path=new_path)
876
867
 
@@ -975,7 +966,6 @@ class SyncService:
975
966
  # update search index
976
967
  await self.search_service.index_entity(updated)
977
968
 
978
- @logfire.instrument()
979
969
  async def resolve_relations(self, entity_id: int | None = None):
980
970
  """Try to resolve unresolved relations.
981
971
 
@@ -1026,16 +1016,27 @@ class SyncService:
1026
1016
  "to_name": resolved_entity.title,
1027
1017
  },
1028
1018
  )
1029
- except IntegrityError: # pragma: no cover
1019
+ # update search index only on successful resolution
1020
+ await self.search_service.index_entity(resolved_entity)
1021
+ except IntegrityError:
1022
+ # IntegrityError means a relation with this (from_id, to_id, relation_type)
1023
+ # already exists. The UPDATE was rolled back, so our unresolved relation
1024
+ # (to_id=NULL) still exists in the database. We delete it because:
1025
+ # 1. It's redundant - a resolved relation already captures this relationship
1026
+ # 2. If we don't delete it, future syncs will try to resolve it again
1027
+ # and get the same IntegrityError
1030
1028
  logger.debug(
1031
- "Ignoring duplicate relation "
1029
+ "Deleting duplicate unresolved relation "
1032
1030
  f"relation_id={relation.id} "
1033
1031
  f"from_id={relation.from_id} "
1034
- f"to_name={relation.to_name}"
1032
+ f"to_name={relation.to_name} "
1033
+ f"resolved_to_id={resolved_entity.id}"
1035
1034
  )
1036
-
1037
- # update search index
1038
- await self.search_service.index_entity(resolved_entity)
1035
+ try:
1036
+ await self.relation_repository.delete(relation.id)
1037
+ except Exception as e:
1038
+ # Log but don't fail - the relation may have been deleted already
1039
+ logger.debug(f"Could not delete duplicate relation {relation.id}: {e}")
1039
1040
 
1040
1041
  async def _quick_count_files(self, directory: Path) -> int:
1041
1042
  """Fast file count using find command.
@@ -1043,12 +1044,22 @@ class SyncService:
1043
1044
  Uses subprocess to leverage OS-level file counting which is much faster
1044
1045
  than Python iteration, especially on network filesystems like TigrisFS.
1045
1046
 
1047
+ On Windows, subprocess is not supported with SelectorEventLoop (which we use
1048
+ to avoid aiosqlite cleanup issues), so we fall back to Python-based counting.
1049
+
1046
1050
  Args:
1047
1051
  directory: Directory to count files in
1048
1052
 
1049
1053
  Returns:
1050
1054
  Number of files in directory (recursive)
1051
1055
  """
1056
+ # Windows with SelectorEventLoop doesn't support subprocess
1057
+ if sys.platform == "win32":
1058
+ count = 0
1059
+ async for _ in self.scan_directory(directory):
1060
+ count += 1
1061
+ return count
1062
+
1052
1063
  process = await asyncio.create_subprocess_shell(
1053
1064
  f'find "{directory}" -type f | wc -l',
1054
1065
  stdout=asyncio.subprocess.PIPE,
@@ -1063,8 +1074,6 @@ class SyncService:
1063
1074
  f"error: {error_msg}. Falling back to manual count. "
1064
1075
  f"This will slow down watermark detection!"
1065
1076
  )
1066
- # Track optimization failures for visibility
1067
- logfire.metric_counter("sync.scan.file_count_failure").add(1)
1068
1077
  # Fallback: count using scan_directory
1069
1078
  count = 0
1070
1079
  async for _ in self.scan_directory(directory):
@@ -1081,6 +1090,9 @@ class SyncService:
1081
1090
  This is dramatically faster than scanning all files and comparing mtimes,
1082
1091
  especially on network filesystems like TigrisFS where stat operations are expensive.
1083
1092
 
1093
+ On Windows, subprocess is not supported with SelectorEventLoop (which we use
1094
+ to avoid aiosqlite cleanup issues), so we implement mtime filtering in Python.
1095
+
1084
1096
  Args:
1085
1097
  directory: Directory to scan
1086
1098
  since_timestamp: Unix timestamp to find files newer than
@@ -1088,6 +1100,16 @@ class SyncService:
1088
1100
  Returns:
1089
1101
  List of relative file paths modified since the timestamp (respects .bmignore)
1090
1102
  """
1103
+ # Windows with SelectorEventLoop doesn't support subprocess
1104
+ # Implement mtime filtering in Python to preserve watermark optimization
1105
+ if sys.platform == "win32":
1106
+ file_paths = []
1107
+ async for file_path_str, stat_info in self.scan_directory(directory):
1108
+ if stat_info.st_mtime > since_timestamp:
1109
+ rel_path = Path(file_path_str).relative_to(directory).as_posix()
1110
+ file_paths.append(rel_path)
1111
+ return file_paths
1112
+
1091
1113
  # Convert timestamp to find-compatible format
1092
1114
  since_date = datetime.fromtimestamp(since_timestamp).strftime("%Y-%m-%d %H:%M:%S")
1093
1115
 
@@ -1105,8 +1127,6 @@ class SyncService:
1105
1127
  f"error: {error_msg}. Falling back to full scan. "
1106
1128
  f"This will cause slow syncs on large projects!"
1107
1129
  )
1108
- # Track optimization failures for visibility
1109
- logfire.metric_counter("sync.scan.optimization_failure").add(1)
1110
1130
  # Fallback to full scan
1111
1131
  return await self._scan_directory_full(directory)
1112
1132
 
@@ -1206,14 +1226,14 @@ async def get_sync_service(project: Project) -> SyncService: # pragma: no cover
1206
1226
 
1207
1227
  project_path = Path(project.path)
1208
1228
  entity_parser = EntityParser(project_path)
1209
- markdown_processor = MarkdownProcessor(entity_parser)
1210
- file_service = FileService(project_path, markdown_processor)
1229
+ markdown_processor = MarkdownProcessor(entity_parser, app_config=app_config)
1230
+ file_service = FileService(project_path, markdown_processor, app_config=app_config)
1211
1231
 
1212
1232
  # Initialize repositories
1213
1233
  entity_repository = EntityRepository(session_maker, project_id=project.id)
1214
1234
  observation_repository = ObservationRepository(session_maker, project_id=project.id)
1215
1235
  relation_repository = RelationRepository(session_maker, project_id=project.id)
1216
- search_repository = SearchRepository(session_maker, project_id=project.id)
1236
+ search_repository = create_search_repository(session_maker, project_id=project.id)
1217
1237
  project_repository = ProjectRepository(session_maker)
1218
1238
 
1219
1239
  # Initialize services
@@ -5,14 +5,17 @@ import os
5
5
  from collections import defaultdict
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
- from typing import List, Optional, Set, Sequence
8
+ from typing import List, Optional, Set, Sequence, Callable, Awaitable, TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from basic_memory.sync.sync_service import SyncService
9
12
 
10
13
  from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON
11
14
  from basic_memory.ignore_utils import load_gitignore_patterns, should_ignore_path
12
15
  from basic_memory.models import Project
13
16
  from basic_memory.repository import ProjectRepository
14
17
  from loguru import logger
15
- from pydantic import BaseModel
18
+ from pydantic import BaseModel, Field
16
19
  from rich.console import Console
17
20
  from watchfiles import awatch
18
21
  from watchfiles.main import FileChange, Change
@@ -31,8 +34,8 @@ class WatchEvent(BaseModel):
31
34
  class WatchServiceState(BaseModel):
32
35
  # Service status
33
36
  running: bool = False
34
- start_time: datetime = datetime.now() # Use directly with Pydantic model
35
- pid: int = os.getpid() # Use directly with Pydantic model
37
+ start_time: datetime = Field(default_factory=datetime.now)
38
+ pid: int = Field(default_factory=os.getpid)
36
39
 
37
40
  # Stats
38
41
  error_count: int = 0
@@ -43,7 +46,7 @@ class WatchServiceState(BaseModel):
43
46
  synced_files: int = 0
44
47
 
45
48
  # Recent activity
46
- recent_events: List[WatchEvent] = [] # Use directly with Pydantic model
49
+ recent_events: List[WatchEvent] = Field(default_factory=list)
47
50
 
48
51
  def add_event(
49
52
  self,
@@ -71,12 +74,17 @@ class WatchServiceState(BaseModel):
71
74
  self.last_error = datetime.now()
72
75
 
73
76
 
77
+ # Type alias for sync service factory function
78
+ SyncServiceFactory = Callable[[Project], Awaitable["SyncService"]]
79
+
80
+
74
81
  class WatchService:
75
82
  def __init__(
76
83
  self,
77
84
  app_config: BasicMemoryConfig,
78
85
  project_repository: ProjectRepository,
79
86
  quiet: bool = False,
87
+ sync_service_factory: Optional[SyncServiceFactory] = None,
80
88
  ):
81
89
  self.app_config = app_config
82
90
  self.project_repository = project_repository
@@ -84,10 +92,20 @@ class WatchService:
84
92
  self.status_path = Path.home() / ".basic-memory" / WATCH_STATUS_JSON
85
93
  self.status_path.parent.mkdir(parents=True, exist_ok=True)
86
94
  self._ignore_patterns_cache: dict[Path, Set[str]] = {}
95
+ self._sync_service_factory = sync_service_factory
87
96
 
88
97
  # quiet mode for mcp so it doesn't mess up stdout
89
98
  self.console = Console(quiet=quiet)
90
99
 
100
+ async def _get_sync_service(self, project: Project) -> "SyncService":
101
+ """Get sync service for a project, using factory if provided."""
102
+ if self._sync_service_factory:
103
+ return await self._sync_service_factory(project)
104
+ # Fall back to default factory
105
+ from basic_memory.sync.sync_service import get_sync_service
106
+
107
+ return await get_sync_service(project)
108
+
91
109
  async def _schedule_restart(self, stop_event: asyncio.Event):
92
110
  """Schedule a restart of the watch service after the configured interval."""
93
111
  await asyncio.sleep(self.app_config.watch_project_reload_interval)
@@ -233,9 +251,6 @@ class WatchService:
233
251
 
234
252
  async def handle_changes(self, project: Project, changes: Set[FileChange]) -> None:
235
253
  """Process a batch of file changes"""
236
- # avoid circular imports
237
- from basic_memory.sync.sync_service import get_sync_service
238
-
239
254
  # Check if project still exists in configuration before processing
240
255
  # This prevents deleted projects from being recreated by background sync
241
256
  from basic_memory.config import ConfigManager
@@ -250,7 +265,7 @@ class WatchService:
250
265
  )
251
266
  return
252
267
 
253
- sync_service = await get_sync_service(project)
268
+ sync_service = await self._get_sync_service(project)
254
269
  file_service = sync_service.file_service
255
270
 
256
271
  start_time = time.time()
@@ -284,12 +299,17 @@ class WatchService:
284
299
  )
285
300
 
286
301
  # because of our atomic writes on updates, an add may be an existing file
287
- for added_path in adds: # pragma: no cover TODO add test
302
+ # Avoid mutating `adds` while iterating (can skip items).
303
+ reclassified_as_modified: List[str] = []
304
+ for added_path in list(adds): # pragma: no cover TODO add test
288
305
  entity = await sync_service.entity_repository.get_by_file_path(added_path)
289
306
  if entity is not None:
290
307
  logger.debug(f"Existing file will be processed as modified, path={added_path}")
291
- adds.remove(added_path)
292
- modifies.append(added_path)
308
+ reclassified_as_modified.append(added_path)
309
+
310
+ if reclassified_as_modified:
311
+ adds = [p for p in adds if p not in reclassified_as_modified]
312
+ modifies.extend(reclassified_as_modified)
293
313
 
294
314
  # Track processed files to avoid duplicates
295
315
  processed: Set[str] = set()