basic-memory 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +1 -1
- basic_memory/alembic/env.py +112 -26
- basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +15 -3
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +44 -36
- basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
- basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +13 -0
- basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
- basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
- basic_memory/api/app.py +45 -24
- basic_memory/api/container.py +133 -0
- basic_memory/api/routers/knowledge_router.py +17 -5
- basic_memory/api/routers/project_router.py +68 -14
- basic_memory/api/routers/resource_router.py +37 -27
- basic_memory/api/routers/utils.py +53 -14
- basic_memory/api/v2/__init__.py +35 -0
- basic_memory/api/v2/routers/__init__.py +21 -0
- basic_memory/api/v2/routers/directory_router.py +93 -0
- basic_memory/api/v2/routers/importer_router.py +181 -0
- basic_memory/api/v2/routers/knowledge_router.py +427 -0
- basic_memory/api/v2/routers/memory_router.py +130 -0
- basic_memory/api/v2/routers/project_router.py +359 -0
- basic_memory/api/v2/routers/prompt_router.py +269 -0
- basic_memory/api/v2/routers/resource_router.py +286 -0
- basic_memory/api/v2/routers/search_router.py +73 -0
- basic_memory/cli/app.py +43 -7
- basic_memory/cli/auth.py +27 -4
- basic_memory/cli/commands/__init__.py +3 -1
- basic_memory/cli/commands/cloud/api_client.py +20 -5
- basic_memory/cli/commands/cloud/cloud_utils.py +13 -6
- basic_memory/cli/commands/cloud/rclone_commands.py +110 -14
- basic_memory/cli/commands/cloud/rclone_installer.py +18 -4
- basic_memory/cli/commands/cloud/upload.py +10 -3
- basic_memory/cli/commands/command_utils.py +52 -4
- basic_memory/cli/commands/db.py +78 -19
- basic_memory/cli/commands/format.py +198 -0
- basic_memory/cli/commands/import_chatgpt.py +12 -8
- basic_memory/cli/commands/import_claude_conversations.py +12 -8
- basic_memory/cli/commands/import_claude_projects.py +12 -8
- basic_memory/cli/commands/import_memory_json.py +12 -8
- basic_memory/cli/commands/mcp.py +8 -26
- basic_memory/cli/commands/project.py +22 -9
- basic_memory/cli/commands/status.py +3 -2
- basic_memory/cli/commands/telemetry.py +81 -0
- basic_memory/cli/container.py +84 -0
- basic_memory/cli/main.py +7 -0
- basic_memory/config.py +177 -77
- basic_memory/db.py +183 -77
- basic_memory/deps/__init__.py +293 -0
- basic_memory/deps/config.py +26 -0
- basic_memory/deps/db.py +56 -0
- basic_memory/deps/importers.py +200 -0
- basic_memory/deps/projects.py +238 -0
- basic_memory/deps/repositories.py +179 -0
- basic_memory/deps/services.py +480 -0
- basic_memory/deps.py +14 -409
- basic_memory/file_utils.py +212 -3
- basic_memory/ignore_utils.py +5 -5
- basic_memory/importers/base.py +40 -19
- basic_memory/importers/chatgpt_importer.py +17 -4
- basic_memory/importers/claude_conversations_importer.py +27 -12
- basic_memory/importers/claude_projects_importer.py +50 -14
- basic_memory/importers/memory_json_importer.py +36 -16
- basic_memory/importers/utils.py +5 -2
- basic_memory/markdown/entity_parser.py +62 -23
- basic_memory/markdown/markdown_processor.py +67 -4
- basic_memory/markdown/plugins.py +4 -2
- basic_memory/markdown/utils.py +10 -1
- basic_memory/mcp/async_client.py +1 -0
- basic_memory/mcp/clients/__init__.py +28 -0
- basic_memory/mcp/clients/directory.py +70 -0
- basic_memory/mcp/clients/knowledge.py +176 -0
- basic_memory/mcp/clients/memory.py +120 -0
- basic_memory/mcp/clients/project.py +89 -0
- basic_memory/mcp/clients/resource.py +71 -0
- basic_memory/mcp/clients/search.py +65 -0
- basic_memory/mcp/container.py +110 -0
- basic_memory/mcp/project_context.py +47 -33
- basic_memory/mcp/prompts/ai_assistant_guide.py +2 -2
- basic_memory/mcp/prompts/recent_activity.py +2 -2
- basic_memory/mcp/prompts/utils.py +3 -3
- basic_memory/mcp/server.py +58 -0
- basic_memory/mcp/tools/build_context.py +14 -14
- basic_memory/mcp/tools/canvas.py +34 -12
- basic_memory/mcp/tools/chatgpt_tools.py +4 -1
- basic_memory/mcp/tools/delete_note.py +31 -7
- basic_memory/mcp/tools/edit_note.py +14 -9
- basic_memory/mcp/tools/list_directory.py +7 -17
- basic_memory/mcp/tools/move_note.py +35 -31
- basic_memory/mcp/tools/project_management.py +29 -25
- basic_memory/mcp/tools/read_content.py +13 -3
- basic_memory/mcp/tools/read_note.py +24 -14
- basic_memory/mcp/tools/recent_activity.py +32 -38
- basic_memory/mcp/tools/search.py +17 -10
- basic_memory/mcp/tools/utils.py +28 -0
- basic_memory/mcp/tools/view_note.py +2 -1
- basic_memory/mcp/tools/write_note.py +37 -14
- basic_memory/models/knowledge.py +15 -2
- basic_memory/models/project.py +7 -1
- basic_memory/models/search.py +58 -2
- basic_memory/project_resolver.py +222 -0
- basic_memory/repository/entity_repository.py +210 -3
- basic_memory/repository/observation_repository.py +1 -0
- basic_memory/repository/postgres_search_repository.py +451 -0
- basic_memory/repository/project_repository.py +38 -1
- basic_memory/repository/relation_repository.py +58 -2
- basic_memory/repository/repository.py +1 -0
- basic_memory/repository/search_index_row.py +95 -0
- basic_memory/repository/search_repository.py +77 -615
- basic_memory/repository/search_repository_base.py +241 -0
- basic_memory/repository/sqlite_search_repository.py +437 -0
- basic_memory/runtime.py +61 -0
- basic_memory/schemas/base.py +36 -6
- basic_memory/schemas/directory.py +2 -1
- basic_memory/schemas/memory.py +9 -2
- basic_memory/schemas/project_info.py +2 -0
- basic_memory/schemas/response.py +84 -27
- basic_memory/schemas/search.py +5 -0
- basic_memory/schemas/sync_report.py +1 -1
- basic_memory/schemas/v2/__init__.py +27 -0
- basic_memory/schemas/v2/entity.py +133 -0
- basic_memory/schemas/v2/resource.py +47 -0
- basic_memory/services/context_service.py +219 -43
- basic_memory/services/directory_service.py +26 -11
- basic_memory/services/entity_service.py +68 -33
- basic_memory/services/file_service.py +131 -16
- basic_memory/services/initialization.py +51 -26
- basic_memory/services/link_resolver.py +1 -0
- basic_memory/services/project_service.py +68 -43
- basic_memory/services/search_service.py +75 -16
- basic_memory/sync/__init__.py +2 -1
- basic_memory/sync/coordinator.py +160 -0
- basic_memory/sync/sync_service.py +135 -115
- basic_memory/sync/watch_service.py +32 -12
- basic_memory/telemetry.py +249 -0
- basic_memory/utils.py +96 -75
- {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/METADATA +129 -5
- basic_memory-0.17.4.dist-info/RECORD +193 -0
- {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
- basic_memory-0.16.1.dist-info/RECORD +0 -148
- {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +0 -0
- {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import os
|
|
5
|
+
import sys
|
|
5
6
|
import time
|
|
6
7
|
from collections import OrderedDict
|
|
7
8
|
from dataclasses import dataclass, field
|
|
@@ -10,7 +11,7 @@ from pathlib import Path
|
|
|
10
11
|
from typing import AsyncIterator, Dict, List, Optional, Set, Tuple
|
|
11
12
|
|
|
12
13
|
import aiofiles.os
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
from loguru import logger
|
|
15
16
|
from sqlalchemy.exc import IntegrityError
|
|
16
17
|
|
|
@@ -26,7 +27,7 @@ from basic_memory.repository import (
|
|
|
26
27
|
ObservationRepository,
|
|
27
28
|
ProjectRepository,
|
|
28
29
|
)
|
|
29
|
-
from basic_memory.repository.search_repository import
|
|
30
|
+
from basic_memory.repository.search_repository import create_search_repository
|
|
30
31
|
from basic_memory.services import EntityService, FileService
|
|
31
32
|
from basic_memory.services.exceptions import SyncFatalError
|
|
32
33
|
from basic_memory.services.link_resolver import LinkResolver
|
|
@@ -215,17 +216,12 @@ class SyncService:
|
|
|
215
216
|
f"path={path}, error={error}"
|
|
216
217
|
)
|
|
217
218
|
|
|
218
|
-
# Record metric for file failure
|
|
219
|
-
logfire.metric_counter("sync.circuit_breaker.failures").add(1)
|
|
220
|
-
|
|
221
219
|
# Log when threshold is reached
|
|
222
220
|
if failure_info.count >= MAX_CONSECUTIVE_FAILURES:
|
|
223
221
|
logger.error(
|
|
224
222
|
f"File {path} has failed {MAX_CONSECUTIVE_FAILURES} times and will be skipped. "
|
|
225
223
|
f"First failure: {failure_info.first_failure}, Last error: {error}"
|
|
226
224
|
)
|
|
227
|
-
# Record metric for file being blocked by circuit breaker
|
|
228
|
-
logfire.metric_counter("sync.circuit_breaker.blocked_files").add(1)
|
|
229
225
|
else:
|
|
230
226
|
# Create new failure record
|
|
231
227
|
self._file_failures[path] = FileFailureInfo(
|
|
@@ -255,7 +251,6 @@ class SyncService:
|
|
|
255
251
|
logger.info(f"Clearing failure history for {path} after successful sync")
|
|
256
252
|
del self._file_failures[path]
|
|
257
253
|
|
|
258
|
-
@logfire.instrument()
|
|
259
254
|
async def sync(
|
|
260
255
|
self, directory: Path, project_name: Optional[str] = None, force_full: bool = False
|
|
261
256
|
) -> SyncReport:
|
|
@@ -282,63 +277,58 @@ class SyncService:
|
|
|
282
277
|
)
|
|
283
278
|
|
|
284
279
|
# sync moves first
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
await self.handle_move(old_path, new_path)
|
|
280
|
+
for old_path, new_path in report.moves.items():
|
|
281
|
+
# in the case where a file has been deleted and replaced by another file
|
|
282
|
+
# it will show up in the move and modified lists, so handle it in modified
|
|
283
|
+
if new_path in report.modified:
|
|
284
|
+
report.modified.remove(new_path)
|
|
285
|
+
logger.debug(
|
|
286
|
+
f"File marked as moved and modified: old_path={old_path}, new_path={new_path}"
|
|
287
|
+
)
|
|
288
|
+
else:
|
|
289
|
+
await self.handle_move(old_path, new_path)
|
|
296
290
|
|
|
297
291
|
# deleted next
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
await self.handle_delete(path)
|
|
292
|
+
for path in report.deleted:
|
|
293
|
+
await self.handle_delete(path)
|
|
301
294
|
|
|
302
295
|
# then new and modified
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
first_failed=failure_info.first_failure,
|
|
316
|
-
)
|
|
296
|
+
for path in report.new:
|
|
297
|
+
entity, _ = await self.sync_file(path, new=True)
|
|
298
|
+
|
|
299
|
+
# Track if file was skipped
|
|
300
|
+
if entity is None and await self._should_skip_file(path):
|
|
301
|
+
failure_info = self._file_failures[path]
|
|
302
|
+
report.skipped_files.append(
|
|
303
|
+
SkippedFile(
|
|
304
|
+
path=path,
|
|
305
|
+
reason=failure_info.last_error,
|
|
306
|
+
failure_count=failure_info.count,
|
|
307
|
+
first_failed=failure_info.first_failure,
|
|
317
308
|
)
|
|
309
|
+
)
|
|
318
310
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
first_failed=failure_info.first_failure,
|
|
332
|
-
)
|
|
311
|
+
for path in report.modified:
|
|
312
|
+
entity, _ = await self.sync_file(path, new=False)
|
|
313
|
+
|
|
314
|
+
# Track if file was skipped
|
|
315
|
+
if entity is None and await self._should_skip_file(path):
|
|
316
|
+
failure_info = self._file_failures[path]
|
|
317
|
+
report.skipped_files.append(
|
|
318
|
+
SkippedFile(
|
|
319
|
+
path=path,
|
|
320
|
+
reason=failure_info.last_error,
|
|
321
|
+
failure_count=failure_info.count,
|
|
322
|
+
first_failed=failure_info.first_failure,
|
|
333
323
|
)
|
|
324
|
+
)
|
|
334
325
|
|
|
335
326
|
# Only resolve relations if there were actual changes
|
|
336
327
|
# If no files changed, no new unresolved relations could have been created
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
logger.info("Skipping relation resolution - no file changes detected")
|
|
328
|
+
if report.total > 0:
|
|
329
|
+
await self.resolve_relations()
|
|
330
|
+
else:
|
|
331
|
+
logger.info("Skipping relation resolution - no file changes detected")
|
|
342
332
|
|
|
343
333
|
# Update scan watermark after successful sync
|
|
344
334
|
# Use the timestamp from sync start (not end) to ensure we catch files
|
|
@@ -361,15 +351,6 @@ class SyncService:
|
|
|
361
351
|
|
|
362
352
|
duration_ms = int((time.time() - start_time) * 1000)
|
|
363
353
|
|
|
364
|
-
# Record metrics for sync operation
|
|
365
|
-
logfire.metric_histogram("sync.duration", unit="ms").record(duration_ms)
|
|
366
|
-
logfire.metric_counter("sync.files.new").add(len(report.new))
|
|
367
|
-
logfire.metric_counter("sync.files.modified").add(len(report.modified))
|
|
368
|
-
logfire.metric_counter("sync.files.deleted").add(len(report.deleted))
|
|
369
|
-
logfire.metric_counter("sync.files.moved").add(len(report.moves))
|
|
370
|
-
if report.skipped_files:
|
|
371
|
-
logfire.metric_counter("sync.files.skipped").add(len(report.skipped_files))
|
|
372
|
-
|
|
373
354
|
# Log summary with skipped files if any
|
|
374
355
|
if report.skipped_files:
|
|
375
356
|
logger.warning(
|
|
@@ -390,7 +371,6 @@ class SyncService:
|
|
|
390
371
|
|
|
391
372
|
return report
|
|
392
373
|
|
|
393
|
-
@logfire.instrument()
|
|
394
374
|
async def scan(self, directory, force_full: bool = False):
|
|
395
375
|
"""Smart scan using watermark and file count for large project optimization.
|
|
396
376
|
|
|
@@ -472,12 +452,6 @@ class SyncService:
|
|
|
472
452
|
logger.warning("No scan watermark available, falling back to full scan")
|
|
473
453
|
file_paths_to_scan = await self._scan_directory_full(directory)
|
|
474
454
|
|
|
475
|
-
# Record scan type metric
|
|
476
|
-
logfire.metric_counter(f"sync.scan.{scan_type}").add(1)
|
|
477
|
-
logfire.metric_histogram("sync.scan.files_scanned", unit="files").record(
|
|
478
|
-
len(file_paths_to_scan)
|
|
479
|
-
)
|
|
480
|
-
|
|
481
455
|
# Step 3: Process each file with mtime-based comparison
|
|
482
456
|
scanned_paths: Set[str] = set()
|
|
483
457
|
changed_checksums: Dict[str, str] = {}
|
|
@@ -589,7 +563,6 @@ class SyncService:
|
|
|
589
563
|
report.checksums = changed_checksums
|
|
590
564
|
|
|
591
565
|
scan_duration_ms = int((time.time() - scan_start_time) * 1000)
|
|
592
|
-
logfire.metric_histogram("sync.scan.duration", unit="ms").record(scan_duration_ms)
|
|
593
566
|
|
|
594
567
|
logger.info(
|
|
595
568
|
f"Completed {scan_type} scan for directory {directory} in {scan_duration_ms}ms, "
|
|
@@ -599,7 +572,6 @@ class SyncService:
|
|
|
599
572
|
)
|
|
600
573
|
return report
|
|
601
574
|
|
|
602
|
-
@logfire.instrument()
|
|
603
575
|
async def sync_file(
|
|
604
576
|
self, path: str, new: bool = True
|
|
605
577
|
) -> Tuple[Optional[Entity], Optional[str]]:
|
|
@@ -638,10 +610,20 @@ class SyncService:
|
|
|
638
610
|
)
|
|
639
611
|
return entity, checksum
|
|
640
612
|
|
|
613
|
+
except FileNotFoundError:
|
|
614
|
+
# File exists in database but not on filesystem
|
|
615
|
+
# This indicates a database/filesystem inconsistency - treat as deletion
|
|
616
|
+
logger.warning(
|
|
617
|
+
f"File not found during sync, treating as deletion: path={path}. "
|
|
618
|
+
"This may indicate a race condition or manual file deletion."
|
|
619
|
+
)
|
|
620
|
+
await self.handle_delete(path)
|
|
621
|
+
return None, None
|
|
622
|
+
|
|
641
623
|
except Exception as e:
|
|
642
624
|
# Check if this is a fatal error (or caused by one)
|
|
643
625
|
# Fatal errors like project deletion should terminate sync immediately
|
|
644
|
-
if isinstance(e, SyncFatalError) or isinstance(e.__cause__, SyncFatalError):
|
|
626
|
+
if isinstance(e, SyncFatalError) or isinstance(e.__cause__, SyncFatalError): # pragma: no cover
|
|
645
627
|
logger.error(f"Fatal sync error encountered, terminating sync: path={path}")
|
|
646
628
|
raise
|
|
647
629
|
|
|
@@ -654,7 +636,6 @@ class SyncService:
|
|
|
654
636
|
|
|
655
637
|
return None, None
|
|
656
638
|
|
|
657
|
-
@logfire.instrument()
|
|
658
639
|
async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optional[Entity], str]:
|
|
659
640
|
"""Sync a markdown file with full processing.
|
|
660
641
|
|
|
@@ -672,12 +653,19 @@ class SyncService:
|
|
|
672
653
|
file_contains_frontmatter = has_frontmatter(file_content)
|
|
673
654
|
|
|
674
655
|
# Get file timestamps for tracking modification times
|
|
675
|
-
|
|
676
|
-
created =
|
|
677
|
-
modified =
|
|
678
|
-
|
|
679
|
-
#
|
|
680
|
-
|
|
656
|
+
file_metadata = await self.file_service.get_file_metadata(path)
|
|
657
|
+
created = file_metadata.created_at
|
|
658
|
+
modified = file_metadata.modified_at
|
|
659
|
+
|
|
660
|
+
# Parse markdown content with file metadata (avoids redundant file read/stat)
|
|
661
|
+
# This enables cloud implementations (S3FileService) to provide metadata from head_object
|
|
662
|
+
abs_path = self.file_service.base_path / path
|
|
663
|
+
entity_markdown = await self.entity_parser.parse_markdown_content(
|
|
664
|
+
file_path=abs_path,
|
|
665
|
+
content=file_content,
|
|
666
|
+
mtime=file_metadata.modified_at.timestamp(),
|
|
667
|
+
ctime=file_metadata.created_at.timestamp(),
|
|
668
|
+
)
|
|
681
669
|
|
|
682
670
|
# if the file contains frontmatter, resolve a permalink (unless disabled)
|
|
683
671
|
if file_contains_frontmatter and not self.app_config.disable_permalinks:
|
|
@@ -723,8 +711,8 @@ class SyncService:
|
|
|
723
711
|
"checksum": final_checksum,
|
|
724
712
|
"created_at": created,
|
|
725
713
|
"updated_at": modified,
|
|
726
|
-
"mtime":
|
|
727
|
-
"size":
|
|
714
|
+
"mtime": file_metadata.modified_at.timestamp(),
|
|
715
|
+
"size": file_metadata.size,
|
|
728
716
|
},
|
|
729
717
|
)
|
|
730
718
|
|
|
@@ -737,7 +725,6 @@ class SyncService:
|
|
|
737
725
|
# Return the final checksum to ensure everything is consistent
|
|
738
726
|
return entity, final_checksum
|
|
739
727
|
|
|
740
|
-
@logfire.instrument()
|
|
741
728
|
async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Optional[Entity], str]:
|
|
742
729
|
"""Sync a non-markdown file with basic tracking.
|
|
743
730
|
|
|
@@ -754,9 +741,9 @@ class SyncService:
|
|
|
754
741
|
await self.entity_service.resolve_permalink(path, skip_conflict_check=True)
|
|
755
742
|
|
|
756
743
|
# get file timestamps
|
|
757
|
-
|
|
758
|
-
created =
|
|
759
|
-
modified =
|
|
744
|
+
file_metadata = await self.file_service.get_file_metadata(path)
|
|
745
|
+
created = file_metadata.created_at
|
|
746
|
+
modified = file_metadata.modified_at
|
|
760
747
|
|
|
761
748
|
# get mime type
|
|
762
749
|
content_type = self.file_service.content_type(path)
|
|
@@ -772,14 +759,20 @@ class SyncService:
|
|
|
772
759
|
created_at=created,
|
|
773
760
|
updated_at=modified,
|
|
774
761
|
content_type=content_type,
|
|
775
|
-
mtime=
|
|
776
|
-
size=
|
|
762
|
+
mtime=file_metadata.modified_at.timestamp(),
|
|
763
|
+
size=file_metadata.size,
|
|
777
764
|
)
|
|
778
765
|
)
|
|
779
766
|
return entity, checksum
|
|
780
767
|
except IntegrityError as e:
|
|
781
768
|
# Handle race condition where entity was created by another process
|
|
782
|
-
|
|
769
|
+
msg = str(e)
|
|
770
|
+
if (
|
|
771
|
+
"UNIQUE constraint failed: entity.file_path" in msg
|
|
772
|
+
or "uix_entity_file_path_project" in msg
|
|
773
|
+
or "duplicate key value violates unique constraint" in msg
|
|
774
|
+
and "file_path" in msg
|
|
775
|
+
):
|
|
783
776
|
logger.info(
|
|
784
777
|
f"Entity already exists for file_path={path}, updating instead of creating"
|
|
785
778
|
)
|
|
@@ -789,15 +782,15 @@ class SyncService:
|
|
|
789
782
|
logger.error(f"Entity not found after constraint violation, path={path}")
|
|
790
783
|
raise ValueError(f"Entity not found after constraint violation: {path}")
|
|
791
784
|
|
|
792
|
-
# Re-get file
|
|
793
|
-
|
|
785
|
+
# Re-get file metadata since we're in update path
|
|
786
|
+
file_metadata_for_update = await self.file_service.get_file_metadata(path)
|
|
794
787
|
updated = await self.entity_repository.update(
|
|
795
788
|
entity.id,
|
|
796
789
|
{
|
|
797
790
|
"file_path": path,
|
|
798
791
|
"checksum": checksum,
|
|
799
|
-
"mtime":
|
|
800
|
-
"size":
|
|
792
|
+
"mtime": file_metadata_for_update.modified_at.timestamp(),
|
|
793
|
+
"size": file_metadata_for_update.size,
|
|
801
794
|
},
|
|
802
795
|
)
|
|
803
796
|
|
|
@@ -808,11 +801,11 @@ class SyncService:
|
|
|
808
801
|
return updated, checksum
|
|
809
802
|
else:
|
|
810
803
|
# Re-raise if it's a different integrity error
|
|
811
|
-
raise
|
|
804
|
+
raise # pragma: no cover
|
|
812
805
|
else:
|
|
813
806
|
# Get file timestamps for updating modification time
|
|
814
|
-
|
|
815
|
-
modified =
|
|
807
|
+
file_metadata = await self.file_service.get_file_metadata(path)
|
|
808
|
+
modified = file_metadata.modified_at
|
|
816
809
|
|
|
817
810
|
entity = await self.entity_repository.get_by_file_path(path)
|
|
818
811
|
if entity is None: # pragma: no cover
|
|
@@ -827,8 +820,8 @@ class SyncService:
|
|
|
827
820
|
"file_path": path,
|
|
828
821
|
"checksum": checksum,
|
|
829
822
|
"updated_at": modified,
|
|
830
|
-
"mtime":
|
|
831
|
-
"size":
|
|
823
|
+
"mtime": file_metadata.modified_at.timestamp(),
|
|
824
|
+
"size": file_metadata.size,
|
|
832
825
|
},
|
|
833
826
|
)
|
|
834
827
|
|
|
@@ -838,7 +831,6 @@ class SyncService:
|
|
|
838
831
|
|
|
839
832
|
return updated, checksum
|
|
840
833
|
|
|
841
|
-
@logfire.instrument()
|
|
842
834
|
async def handle_delete(self, file_path: str):
|
|
843
835
|
"""Handle complete entity deletion including search index cleanup."""
|
|
844
836
|
|
|
@@ -870,7 +862,6 @@ class SyncService:
|
|
|
870
862
|
else:
|
|
871
863
|
await self.search_service.delete_by_entity_id(entity.id)
|
|
872
864
|
|
|
873
|
-
@logfire.instrument()
|
|
874
865
|
async def handle_move(self, old_path, new_path):
|
|
875
866
|
logger.debug("Moving entity", old_path=old_path, new_path=new_path)
|
|
876
867
|
|
|
@@ -975,7 +966,6 @@ class SyncService:
|
|
|
975
966
|
# update search index
|
|
976
967
|
await self.search_service.index_entity(updated)
|
|
977
968
|
|
|
978
|
-
@logfire.instrument()
|
|
979
969
|
async def resolve_relations(self, entity_id: int | None = None):
|
|
980
970
|
"""Try to resolve unresolved relations.
|
|
981
971
|
|
|
@@ -1026,16 +1016,27 @@ class SyncService:
|
|
|
1026
1016
|
"to_name": resolved_entity.title,
|
|
1027
1017
|
},
|
|
1028
1018
|
)
|
|
1029
|
-
|
|
1019
|
+
# update search index only on successful resolution
|
|
1020
|
+
await self.search_service.index_entity(resolved_entity)
|
|
1021
|
+
except IntegrityError:
|
|
1022
|
+
# IntegrityError means a relation with this (from_id, to_id, relation_type)
|
|
1023
|
+
# already exists. The UPDATE was rolled back, so our unresolved relation
|
|
1024
|
+
# (to_id=NULL) still exists in the database. We delete it because:
|
|
1025
|
+
# 1. It's redundant - a resolved relation already captures this relationship
|
|
1026
|
+
# 2. If we don't delete it, future syncs will try to resolve it again
|
|
1027
|
+
# and get the same IntegrityError
|
|
1030
1028
|
logger.debug(
|
|
1031
|
-
"
|
|
1029
|
+
"Deleting duplicate unresolved relation "
|
|
1032
1030
|
f"relation_id={relation.id} "
|
|
1033
1031
|
f"from_id={relation.from_id} "
|
|
1034
|
-
f"to_name={relation.to_name}"
|
|
1032
|
+
f"to_name={relation.to_name} "
|
|
1033
|
+
f"resolved_to_id={resolved_entity.id}"
|
|
1035
1034
|
)
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1035
|
+
try:
|
|
1036
|
+
await self.relation_repository.delete(relation.id)
|
|
1037
|
+
except Exception as e:
|
|
1038
|
+
# Log but don't fail - the relation may have been deleted already
|
|
1039
|
+
logger.debug(f"Could not delete duplicate relation {relation.id}: {e}")
|
|
1039
1040
|
|
|
1040
1041
|
async def _quick_count_files(self, directory: Path) -> int:
|
|
1041
1042
|
"""Fast file count using find command.
|
|
@@ -1043,12 +1044,22 @@ class SyncService:
|
|
|
1043
1044
|
Uses subprocess to leverage OS-level file counting which is much faster
|
|
1044
1045
|
than Python iteration, especially on network filesystems like TigrisFS.
|
|
1045
1046
|
|
|
1047
|
+
On Windows, subprocess is not supported with SelectorEventLoop (which we use
|
|
1048
|
+
to avoid aiosqlite cleanup issues), so we fall back to Python-based counting.
|
|
1049
|
+
|
|
1046
1050
|
Args:
|
|
1047
1051
|
directory: Directory to count files in
|
|
1048
1052
|
|
|
1049
1053
|
Returns:
|
|
1050
1054
|
Number of files in directory (recursive)
|
|
1051
1055
|
"""
|
|
1056
|
+
# Windows with SelectorEventLoop doesn't support subprocess
|
|
1057
|
+
if sys.platform == "win32":
|
|
1058
|
+
count = 0
|
|
1059
|
+
async for _ in self.scan_directory(directory):
|
|
1060
|
+
count += 1
|
|
1061
|
+
return count
|
|
1062
|
+
|
|
1052
1063
|
process = await asyncio.create_subprocess_shell(
|
|
1053
1064
|
f'find "{directory}" -type f | wc -l',
|
|
1054
1065
|
stdout=asyncio.subprocess.PIPE,
|
|
@@ -1063,8 +1074,6 @@ class SyncService:
|
|
|
1063
1074
|
f"error: {error_msg}. Falling back to manual count. "
|
|
1064
1075
|
f"This will slow down watermark detection!"
|
|
1065
1076
|
)
|
|
1066
|
-
# Track optimization failures for visibility
|
|
1067
|
-
logfire.metric_counter("sync.scan.file_count_failure").add(1)
|
|
1068
1077
|
# Fallback: count using scan_directory
|
|
1069
1078
|
count = 0
|
|
1070
1079
|
async for _ in self.scan_directory(directory):
|
|
@@ -1081,6 +1090,9 @@ class SyncService:
|
|
|
1081
1090
|
This is dramatically faster than scanning all files and comparing mtimes,
|
|
1082
1091
|
especially on network filesystems like TigrisFS where stat operations are expensive.
|
|
1083
1092
|
|
|
1093
|
+
On Windows, subprocess is not supported with SelectorEventLoop (which we use
|
|
1094
|
+
to avoid aiosqlite cleanup issues), so we implement mtime filtering in Python.
|
|
1095
|
+
|
|
1084
1096
|
Args:
|
|
1085
1097
|
directory: Directory to scan
|
|
1086
1098
|
since_timestamp: Unix timestamp to find files newer than
|
|
@@ -1088,6 +1100,16 @@ class SyncService:
|
|
|
1088
1100
|
Returns:
|
|
1089
1101
|
List of relative file paths modified since the timestamp (respects .bmignore)
|
|
1090
1102
|
"""
|
|
1103
|
+
# Windows with SelectorEventLoop doesn't support subprocess
|
|
1104
|
+
# Implement mtime filtering in Python to preserve watermark optimization
|
|
1105
|
+
if sys.platform == "win32":
|
|
1106
|
+
file_paths = []
|
|
1107
|
+
async for file_path_str, stat_info in self.scan_directory(directory):
|
|
1108
|
+
if stat_info.st_mtime > since_timestamp:
|
|
1109
|
+
rel_path = Path(file_path_str).relative_to(directory).as_posix()
|
|
1110
|
+
file_paths.append(rel_path)
|
|
1111
|
+
return file_paths
|
|
1112
|
+
|
|
1091
1113
|
# Convert timestamp to find-compatible format
|
|
1092
1114
|
since_date = datetime.fromtimestamp(since_timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
|
1093
1115
|
|
|
@@ -1105,8 +1127,6 @@ class SyncService:
|
|
|
1105
1127
|
f"error: {error_msg}. Falling back to full scan. "
|
|
1106
1128
|
f"This will cause slow syncs on large projects!"
|
|
1107
1129
|
)
|
|
1108
|
-
# Track optimization failures for visibility
|
|
1109
|
-
logfire.metric_counter("sync.scan.optimization_failure").add(1)
|
|
1110
1130
|
# Fallback to full scan
|
|
1111
1131
|
return await self._scan_directory_full(directory)
|
|
1112
1132
|
|
|
@@ -1206,14 +1226,14 @@ async def get_sync_service(project: Project) -> SyncService: # pragma: no cover
|
|
|
1206
1226
|
|
|
1207
1227
|
project_path = Path(project.path)
|
|
1208
1228
|
entity_parser = EntityParser(project_path)
|
|
1209
|
-
markdown_processor = MarkdownProcessor(entity_parser)
|
|
1210
|
-
file_service = FileService(project_path, markdown_processor)
|
|
1229
|
+
markdown_processor = MarkdownProcessor(entity_parser, app_config=app_config)
|
|
1230
|
+
file_service = FileService(project_path, markdown_processor, app_config=app_config)
|
|
1211
1231
|
|
|
1212
1232
|
# Initialize repositories
|
|
1213
1233
|
entity_repository = EntityRepository(session_maker, project_id=project.id)
|
|
1214
1234
|
observation_repository = ObservationRepository(session_maker, project_id=project.id)
|
|
1215
1235
|
relation_repository = RelationRepository(session_maker, project_id=project.id)
|
|
1216
|
-
search_repository =
|
|
1236
|
+
search_repository = create_search_repository(session_maker, project_id=project.id)
|
|
1217
1237
|
project_repository = ProjectRepository(session_maker)
|
|
1218
1238
|
|
|
1219
1239
|
# Initialize services
|
|
@@ -5,14 +5,17 @@ import os
|
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import List, Optional, Set, Sequence
|
|
8
|
+
from typing import List, Optional, Set, Sequence, Callable, Awaitable, TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from basic_memory.sync.sync_service import SyncService
|
|
9
12
|
|
|
10
13
|
from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON
|
|
11
14
|
from basic_memory.ignore_utils import load_gitignore_patterns, should_ignore_path
|
|
12
15
|
from basic_memory.models import Project
|
|
13
16
|
from basic_memory.repository import ProjectRepository
|
|
14
17
|
from loguru import logger
|
|
15
|
-
from pydantic import BaseModel
|
|
18
|
+
from pydantic import BaseModel, Field
|
|
16
19
|
from rich.console import Console
|
|
17
20
|
from watchfiles import awatch
|
|
18
21
|
from watchfiles.main import FileChange, Change
|
|
@@ -31,8 +34,8 @@ class WatchEvent(BaseModel):
|
|
|
31
34
|
class WatchServiceState(BaseModel):
|
|
32
35
|
# Service status
|
|
33
36
|
running: bool = False
|
|
34
|
-
start_time: datetime = datetime.now
|
|
35
|
-
pid: int = os.getpid
|
|
37
|
+
start_time: datetime = Field(default_factory=datetime.now)
|
|
38
|
+
pid: int = Field(default_factory=os.getpid)
|
|
36
39
|
|
|
37
40
|
# Stats
|
|
38
41
|
error_count: int = 0
|
|
@@ -43,7 +46,7 @@ class WatchServiceState(BaseModel):
|
|
|
43
46
|
synced_files: int = 0
|
|
44
47
|
|
|
45
48
|
# Recent activity
|
|
46
|
-
recent_events: List[WatchEvent] =
|
|
49
|
+
recent_events: List[WatchEvent] = Field(default_factory=list)
|
|
47
50
|
|
|
48
51
|
def add_event(
|
|
49
52
|
self,
|
|
@@ -71,12 +74,17 @@ class WatchServiceState(BaseModel):
|
|
|
71
74
|
self.last_error = datetime.now()
|
|
72
75
|
|
|
73
76
|
|
|
77
|
+
# Type alias for sync service factory function
|
|
78
|
+
SyncServiceFactory = Callable[[Project], Awaitable["SyncService"]]
|
|
79
|
+
|
|
80
|
+
|
|
74
81
|
class WatchService:
|
|
75
82
|
def __init__(
|
|
76
83
|
self,
|
|
77
84
|
app_config: BasicMemoryConfig,
|
|
78
85
|
project_repository: ProjectRepository,
|
|
79
86
|
quiet: bool = False,
|
|
87
|
+
sync_service_factory: Optional[SyncServiceFactory] = None,
|
|
80
88
|
):
|
|
81
89
|
self.app_config = app_config
|
|
82
90
|
self.project_repository = project_repository
|
|
@@ -84,10 +92,20 @@ class WatchService:
|
|
|
84
92
|
self.status_path = Path.home() / ".basic-memory" / WATCH_STATUS_JSON
|
|
85
93
|
self.status_path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
94
|
self._ignore_patterns_cache: dict[Path, Set[str]] = {}
|
|
95
|
+
self._sync_service_factory = sync_service_factory
|
|
87
96
|
|
|
88
97
|
# quiet mode for mcp so it doesn't mess up stdout
|
|
89
98
|
self.console = Console(quiet=quiet)
|
|
90
99
|
|
|
100
|
+
async def _get_sync_service(self, project: Project) -> "SyncService":
|
|
101
|
+
"""Get sync service for a project, using factory if provided."""
|
|
102
|
+
if self._sync_service_factory:
|
|
103
|
+
return await self._sync_service_factory(project)
|
|
104
|
+
# Fall back to default factory
|
|
105
|
+
from basic_memory.sync.sync_service import get_sync_service
|
|
106
|
+
|
|
107
|
+
return await get_sync_service(project)
|
|
108
|
+
|
|
91
109
|
async def _schedule_restart(self, stop_event: asyncio.Event):
|
|
92
110
|
"""Schedule a restart of the watch service after the configured interval."""
|
|
93
111
|
await asyncio.sleep(self.app_config.watch_project_reload_interval)
|
|
@@ -233,9 +251,6 @@ class WatchService:
|
|
|
233
251
|
|
|
234
252
|
async def handle_changes(self, project: Project, changes: Set[FileChange]) -> None:
|
|
235
253
|
"""Process a batch of file changes"""
|
|
236
|
-
# avoid circular imports
|
|
237
|
-
from basic_memory.sync.sync_service import get_sync_service
|
|
238
|
-
|
|
239
254
|
# Check if project still exists in configuration before processing
|
|
240
255
|
# This prevents deleted projects from being recreated by background sync
|
|
241
256
|
from basic_memory.config import ConfigManager
|
|
@@ -250,7 +265,7 @@ class WatchService:
|
|
|
250
265
|
)
|
|
251
266
|
return
|
|
252
267
|
|
|
253
|
-
sync_service = await
|
|
268
|
+
sync_service = await self._get_sync_service(project)
|
|
254
269
|
file_service = sync_service.file_service
|
|
255
270
|
|
|
256
271
|
start_time = time.time()
|
|
@@ -284,12 +299,17 @@ class WatchService:
|
|
|
284
299
|
)
|
|
285
300
|
|
|
286
301
|
# because of our atomic writes on updates, an add may be an existing file
|
|
287
|
-
|
|
302
|
+
# Avoid mutating `adds` while iterating (can skip items).
|
|
303
|
+
reclassified_as_modified: List[str] = []
|
|
304
|
+
for added_path in list(adds): # pragma: no cover TODO add test
|
|
288
305
|
entity = await sync_service.entity_repository.get_by_file_path(added_path)
|
|
289
306
|
if entity is not None:
|
|
290
307
|
logger.debug(f"Existing file will be processed as modified, path={added_path}")
|
|
291
|
-
|
|
292
|
-
|
|
308
|
+
reclassified_as_modified.append(added_path)
|
|
309
|
+
|
|
310
|
+
if reclassified_as_modified:
|
|
311
|
+
adds = [p for p in adds if p not in reclassified_as_modified]
|
|
312
|
+
modifies.extend(reclassified_as_modified)
|
|
293
313
|
|
|
294
314
|
# Track processed files to avoid duplicates
|
|
295
315
|
processed: Set[str] = set()
|