PyPI - basic-memory - Versions diffs - 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl - Mend

basic-memory 0.16.1py3-none-any.whl → 0.17.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of basic-memory might be problematic. Click here for more details.

Files changed (143) hide show

basic_memory/__init__.py +1 -1
basic_memory/alembic/env.py +112 -26
basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +15 -3
basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +44 -36
basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +13 -0
basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
basic_memory/api/app.py +45 -24
basic_memory/api/container.py +133 -0
basic_memory/api/routers/knowledge_router.py +17 -5
basic_memory/api/routers/project_router.py +68 -14
basic_memory/api/routers/resource_router.py +37 -27
basic_memory/api/routers/utils.py +53 -14
basic_memory/api/v2/__init__.py +35 -0
basic_memory/api/v2/routers/__init__.py +21 -0
basic_memory/api/v2/routers/directory_router.py +93 -0
basic_memory/api/v2/routers/importer_router.py +181 -0
basic_memory/api/v2/routers/knowledge_router.py +427 -0
basic_memory/api/v2/routers/memory_router.py +130 -0
basic_memory/api/v2/routers/project_router.py +359 -0
basic_memory/api/v2/routers/prompt_router.py +269 -0
basic_memory/api/v2/routers/resource_router.py +286 -0
basic_memory/api/v2/routers/search_router.py +73 -0
basic_memory/cli/app.py +43 -7
basic_memory/cli/auth.py +27 -4
basic_memory/cli/commands/__init__.py +3 -1
basic_memory/cli/commands/cloud/api_client.py +20 -5
basic_memory/cli/commands/cloud/cloud_utils.py +13 -6
basic_memory/cli/commands/cloud/rclone_commands.py +110 -14
basic_memory/cli/commands/cloud/rclone_installer.py +18 -4
basic_memory/cli/commands/cloud/upload.py +10 -3
basic_memory/cli/commands/command_utils.py +52 -4
basic_memory/cli/commands/db.py +78 -19
basic_memory/cli/commands/format.py +198 -0
basic_memory/cli/commands/import_chatgpt.py +12 -8
basic_memory/cli/commands/import_claude_conversations.py +12 -8
basic_memory/cli/commands/import_claude_projects.py +12 -8
basic_memory/cli/commands/import_memory_json.py +12 -8
basic_memory/cli/commands/mcp.py +8 -26
basic_memory/cli/commands/project.py +22 -9
basic_memory/cli/commands/status.py +3 -2
basic_memory/cli/commands/telemetry.py +81 -0
basic_memory/cli/container.py +84 -0
basic_memory/cli/main.py +7 -0
basic_memory/config.py +177 -77
basic_memory/db.py +183 -77
basic_memory/deps/__init__.py +293 -0
basic_memory/deps/config.py +26 -0
basic_memory/deps/db.py +56 -0
basic_memory/deps/importers.py +200 -0
basic_memory/deps/projects.py +238 -0
basic_memory/deps/repositories.py +179 -0
basic_memory/deps/services.py +480 -0
basic_memory/deps.py +14 -409
basic_memory/file_utils.py +212 -3
basic_memory/ignore_utils.py +5 -5
basic_memory/importers/base.py +40 -19
basic_memory/importers/chatgpt_importer.py +17 -4
basic_memory/importers/claude_conversations_importer.py +27 -12
basic_memory/importers/claude_projects_importer.py +50 -14
basic_memory/importers/memory_json_importer.py +36 -16
basic_memory/importers/utils.py +5 -2
basic_memory/markdown/entity_parser.py +62 -23
basic_memory/markdown/markdown_processor.py +67 -4
basic_memory/markdown/plugins.py +4 -2
basic_memory/markdown/utils.py +10 -1
basic_memory/mcp/async_client.py +1 -0
basic_memory/mcp/clients/__init__.py +28 -0
basic_memory/mcp/clients/directory.py +70 -0
basic_memory/mcp/clients/knowledge.py +176 -0
basic_memory/mcp/clients/memory.py +120 -0
basic_memory/mcp/clients/project.py +89 -0
basic_memory/mcp/clients/resource.py +71 -0
basic_memory/mcp/clients/search.py +65 -0
basic_memory/mcp/container.py +110 -0
basic_memory/mcp/project_context.py +47 -33
basic_memory/mcp/prompts/ai_assistant_guide.py +2 -2
basic_memory/mcp/prompts/recent_activity.py +2 -2
basic_memory/mcp/prompts/utils.py +3 -3
basic_memory/mcp/server.py +58 -0
basic_memory/mcp/tools/build_context.py +14 -14
basic_memory/mcp/tools/canvas.py +34 -12
basic_memory/mcp/tools/chatgpt_tools.py +4 -1
basic_memory/mcp/tools/delete_note.py +31 -7
basic_memory/mcp/tools/edit_note.py +14 -9
basic_memory/mcp/tools/list_directory.py +7 -17
basic_memory/mcp/tools/move_note.py +35 -31
basic_memory/mcp/tools/project_management.py +29 -25
basic_memory/mcp/tools/read_content.py +13 -3
basic_memory/mcp/tools/read_note.py +24 -14
basic_memory/mcp/tools/recent_activity.py +32 -38
basic_memory/mcp/tools/search.py +17 -10
basic_memory/mcp/tools/utils.py +28 -0
basic_memory/mcp/tools/view_note.py +2 -1
basic_memory/mcp/tools/write_note.py +37 -14
basic_memory/models/knowledge.py +15 -2
basic_memory/models/project.py +7 -1
basic_memory/models/search.py +58 -2
basic_memory/project_resolver.py +222 -0
basic_memory/repository/entity_repository.py +210 -3
basic_memory/repository/observation_repository.py +1 -0
basic_memory/repository/postgres_search_repository.py +451 -0
basic_memory/repository/project_repository.py +38 -1
basic_memory/repository/relation_repository.py +58 -2
basic_memory/repository/repository.py +1 -0
basic_memory/repository/search_index_row.py +95 -0
basic_memory/repository/search_repository.py +77 -615
basic_memory/repository/search_repository_base.py +241 -0
basic_memory/repository/sqlite_search_repository.py +437 -0
basic_memory/runtime.py +61 -0
basic_memory/schemas/base.py +36 -6
basic_memory/schemas/directory.py +2 -1
basic_memory/schemas/memory.py +9 -2
basic_memory/schemas/project_info.py +2 -0
basic_memory/schemas/response.py +84 -27
basic_memory/schemas/search.py +5 -0
basic_memory/schemas/sync_report.py +1 -1
basic_memory/schemas/v2/__init__.py +27 -0
basic_memory/schemas/v2/entity.py +133 -0
basic_memory/schemas/v2/resource.py +47 -0
basic_memory/services/context_service.py +219 -43
basic_memory/services/directory_service.py +26 -11
basic_memory/services/entity_service.py +68 -33
basic_memory/services/file_service.py +131 -16
basic_memory/services/initialization.py +51 -26
basic_memory/services/link_resolver.py +1 -0
basic_memory/services/project_service.py +68 -43
basic_memory/services/search_service.py +75 -16
basic_memory/sync/__init__.py +2 -1
basic_memory/sync/coordinator.py +160 -0
basic_memory/sync/sync_service.py +135 -115
basic_memory/sync/watch_service.py +32 -12
basic_memory/telemetry.py +249 -0
basic_memory/utils.py +96 -75
{basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/METADATA +129 -5
basic_memory-0.17.4.dist-info/RECORD +193 -0
{basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
basic_memory-0.16.1.dist-info/RECORD +0 -148
{basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +0 -0
{basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0

basic_memory/sync/sync_service.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import asyncio
 import os
+import sys
 import time
 from collections import OrderedDict
 from dataclasses import dataclass, field
@@ -10,7 +11,7 @@ from pathlib import Path
 from typing import AsyncIterator, Dict, List, Optional, Set, Tuple
 import aiofiles.os
-import logfire
 from loguru import logger
 from sqlalchemy.exc import IntegrityError
@@ -26,7 +27,7 @@ from basic_memory.repository import (
     ObservationRepository,
     ProjectRepository,
 )
-from basic_memory.repository.search_repository import SearchRepository
+from basic_memory.repository.search_repository import create_search_repository
 from basic_memory.services import EntityService, FileService
 from basic_memory.services.exceptions import SyncFatalError
 from basic_memory.services.link_resolver import LinkResolver
@@ -215,17 +216,12 @@ class SyncService:
                 f"path={path}, error={error}"
             )
-            # Record metric for file failure
-            logfire.metric_counter("sync.circuit_breaker.failures").add(1)
             # Log when threshold is reached
             if failure_info.count >= MAX_CONSECUTIVE_FAILURES:
                 logger.error(
                     f"File {path} has failed {MAX_CONSECUTIVE_FAILURES} times and will be skipped. "
                     f"First failure: {failure_info.first_failure}, Last error: {error}"
                 )
-                # Record metric for file being blocked by circuit breaker
-                logfire.metric_counter("sync.circuit_breaker.blocked_files").add(1)
         else:
             # Create new failure record
             self._file_failures[path] = FileFailureInfo(
@@ -255,7 +251,6 @@ class SyncService:
             logger.info(f"Clearing failure history for {path} after successful sync")
             del self._file_failures[path]
-    @logfire.instrument()
     async def sync(
         self, directory: Path, project_name: Optional[str] = None, force_full: bool = False
     ) -> SyncReport:
@@ -282,63 +277,58 @@ class SyncService:
         )
         # sync moves first
-        with logfire.span("process_moves", move_count=len(report.moves)):
-            for old_path, new_path in report.moves.items():
-                # in the case where a file has been deleted and replaced by another file
-                # it will show up in the move and modified lists, so handle it in modified
-                if new_path in report.modified:
-                    report.modified.remove(new_path)
-                    logger.debug(
-                        f"File marked as moved and modified: old_path={old_path}, new_path={new_path}"
-                    )
-                else:
-                    await self.handle_move(old_path, new_path)
+        for old_path, new_path in report.moves.items():
+            # in the case where a file has been deleted and replaced by another file
+            # it will show up in the move and modified lists, so handle it in modified
+            if new_path in report.modified:
+                report.modified.remove(new_path)
+                logger.debug(
+                    f"File marked as moved and modified: old_path={old_path}, new_path={new_path}"
+                )
+            else:
+                await self.handle_move(old_path, new_path)
         # deleted next
-        with logfire.span("process_deletes", delete_count=len(report.deleted)):
-            for path in report.deleted:
-                await self.handle_delete(path)
+        for path in report.deleted:
+            await self.handle_delete(path)
         # then new and modified
-        with logfire.span("process_new_files", new_count=len(report.new)):
-            for path in report.new:
-                entity, _ = await self.sync_file(path, new=True)
-                # Track if file was skipped
-                if entity is None and await self._should_skip_file(path):
-                    failure_info = self._file_failures[path]
-                    report.skipped_files.append(
-                        SkippedFile(
-                            path=path,
-                            reason=failure_info.last_error,
-                            failure_count=failure_info.count,
-                            first_failed=failure_info.first_failure,
-                        )
+        for path in report.new:
+            entity, _ = await self.sync_file(path, new=True)
+            # Track if file was skipped
+            if entity is None and await self._should_skip_file(path):
+                failure_info = self._file_failures[path]
+                report.skipped_files.append(
+                    SkippedFile(
+                        path=path,
+                        reason=failure_info.last_error,
+                        failure_count=failure_info.count,
+                        first_failed=failure_info.first_failure,
                     )
+                )
-        with logfire.span("process_modified_files", modified_count=len(report.modified)):
-            for path in report.modified:
-                entity, _ = await self.sync_file(path, new=False)
-                # Track if file was skipped
-                if entity is None and await self._should_skip_file(path):
-                    failure_info = self._file_failures[path]
-                    report.skipped_files.append(
-                        SkippedFile(
-                            path=path,
-                            reason=failure_info.last_error,
-                            failure_count=failure_info.count,
-                            first_failed=failure_info.first_failure,
-                        )
+        for path in report.modified:
+            entity, _ = await self.sync_file(path, new=False)
+            # Track if file was skipped
+            if entity is None and await self._should_skip_file(path):
+                failure_info = self._file_failures[path]
+                report.skipped_files.append(
+                    SkippedFile(
+                        path=path,
+                        reason=failure_info.last_error,
+                        failure_count=failure_info.count,
+                        first_failed=failure_info.first_failure,
                     )
+                )
         # Only resolve relations if there were actual changes
         # If no files changed, no new unresolved relations could have been created
-        with logfire.span("resolve_relations"):
-            if report.total > 0:
-                await self.resolve_relations()
-            else:
-                logger.info("Skipping relation resolution - no file changes detected")
+        if report.total > 0:
+            await self.resolve_relations()
+        else:
+            logger.info("Skipping relation resolution - no file changes detected")
         # Update scan watermark after successful sync
         # Use the timestamp from sync start (not end) to ensure we catch files
@@ -361,15 +351,6 @@ class SyncService:
         duration_ms = int((time.time() - start_time) * 1000)
-        # Record metrics for sync operation
-        logfire.metric_histogram("sync.duration", unit="ms").record(duration_ms)
-        logfire.metric_counter("sync.files.new").add(len(report.new))
-        logfire.metric_counter("sync.files.modified").add(len(report.modified))
-        logfire.metric_counter("sync.files.deleted").add(len(report.deleted))
-        logfire.metric_counter("sync.files.moved").add(len(report.moves))
-        if report.skipped_files:
-            logfire.metric_counter("sync.files.skipped").add(len(report.skipped_files))
         # Log summary with skipped files if any
         if report.skipped_files:
             logger.warning(
@@ -390,7 +371,6 @@ class SyncService:
         return report
-    @logfire.instrument()
     async def scan(self, directory, force_full: bool = False):
         """Smart scan using watermark and file count for large project optimization.
@@ -472,12 +452,6 @@ class SyncService:
             logger.warning("No scan watermark available, falling back to full scan")
             file_paths_to_scan = await self._scan_directory_full(directory)
-        # Record scan type metric
-        logfire.metric_counter(f"sync.scan.{scan_type}").add(1)
-        logfire.metric_histogram("sync.scan.files_scanned", unit="files").record(
-            len(file_paths_to_scan)
-        )
         # Step 3: Process each file with mtime-based comparison
         scanned_paths: Set[str] = set()
         changed_checksums: Dict[str, str] = {}
@@ -589,7 +563,6 @@ class SyncService:
         report.checksums = changed_checksums
         scan_duration_ms = int((time.time() - scan_start_time) * 1000)
-        logfire.metric_histogram("sync.scan.duration", unit="ms").record(scan_duration_ms)
         logger.info(
             f"Completed {scan_type} scan for directory {directory} in {scan_duration_ms}ms, "
@@ -599,7 +572,6 @@ class SyncService:
         )
         return report
-    @logfire.instrument()
     async def sync_file(
         self, path: str, new: bool = True
     ) -> Tuple[Optional[Entity], Optional[str]]:
@@ -638,10 +610,20 @@ class SyncService:
                 )
             return entity, checksum
+        except FileNotFoundError:
+            # File exists in database but not on filesystem
+            # This indicates a database/filesystem inconsistency - treat as deletion
+            logger.warning(
+                f"File not found during sync, treating as deletion: path={path}. "
+                "This may indicate a race condition or manual file deletion."
+            )
+            await self.handle_delete(path)
+            return None, None
         except Exception as e:
             # Check if this is a fatal error (or caused by one)
             # Fatal errors like project deletion should terminate sync immediately
-            if isinstance(e, SyncFatalError) or isinstance(e.__cause__, SyncFatalError):
+            if isinstance(e, SyncFatalError) or isinstance(e.__cause__, SyncFatalError):  # pragma: no cover
                 logger.error(f"Fatal sync error encountered, terminating sync: path={path}")
                 raise
@@ -654,7 +636,6 @@ class SyncService:
             return None, None
-    @logfire.instrument()
     async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optional[Entity], str]:
         """Sync a markdown file with full processing.
@@ -672,12 +653,19 @@ class SyncService:
         file_contains_frontmatter = has_frontmatter(file_content)
         # Get file timestamps for tracking modification times
-        file_stats = self.file_service.file_stats(path)
-        created = datetime.fromtimestamp(file_stats.st_ctime).astimezone()
-        modified = datetime.fromtimestamp(file_stats.st_mtime).astimezone()
-        # entity markdown will always contain front matter, so it can be used up create/update the entity
-        entity_markdown = await self.entity_parser.parse_file(path)
+        file_metadata = await self.file_service.get_file_metadata(path)
+        created = file_metadata.created_at
+        modified = file_metadata.modified_at
+        # Parse markdown content with file metadata (avoids redundant file read/stat)
+        # This enables cloud implementations (S3FileService) to provide metadata from head_object
+        abs_path = self.file_service.base_path / path
+        entity_markdown = await self.entity_parser.parse_markdown_content(
+            file_path=abs_path,
+            content=file_content,
+            mtime=file_metadata.modified_at.timestamp(),
+            ctime=file_metadata.created_at.timestamp(),
+        )
         # if the file contains frontmatter, resolve a permalink (unless disabled)
         if file_contains_frontmatter and not self.app_config.disable_permalinks:
@@ -723,8 +711,8 @@ class SyncService:
                 "checksum": final_checksum,
                 "created_at": created,
                 "updated_at": modified,
-                "mtime": file_stats.st_mtime,
-                "size": file_stats.st_size,
+                "mtime": file_metadata.modified_at.timestamp(),
+                "size": file_metadata.size,
             },
         )
@@ -737,7 +725,6 @@ class SyncService:
         # Return the final checksum to ensure everything is consistent
         return entity, final_checksum
-    @logfire.instrument()
     async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Optional[Entity], str]:
         """Sync a non-markdown file with basic tracking.
@@ -754,9 +741,9 @@ class SyncService:
             await self.entity_service.resolve_permalink(path, skip_conflict_check=True)
             # get file timestamps
-            file_stats = self.file_service.file_stats(path)
-            created = datetime.fromtimestamp(file_stats.st_ctime).astimezone()
-            modified = datetime.fromtimestamp(file_stats.st_mtime).astimezone()
+            file_metadata = await self.file_service.get_file_metadata(path)
+            created = file_metadata.created_at
+            modified = file_metadata.modified_at
             # get mime type
             content_type = self.file_service.content_type(path)
@@ -772,14 +759,20 @@ class SyncService:
                         created_at=created,
                         updated_at=modified,
                         content_type=content_type,
-                        mtime=file_stats.st_mtime,
-                        size=file_stats.st_size,
+                        mtime=file_metadata.modified_at.timestamp(),
+                        size=file_metadata.size,
                     )
                 )
                 return entity, checksum
             except IntegrityError as e:
                 # Handle race condition where entity was created by another process
-                if "UNIQUE constraint failed: entity.file_path" in str(e):
+                msg = str(e)
+                if (
+                    "UNIQUE constraint failed: entity.file_path" in msg
+                    or "uix_entity_file_path_project" in msg
+                    or "duplicate key value violates unique constraint" in msg
+                    and "file_path" in msg
+                ):
                     logger.info(
                         f"Entity already exists for file_path={path}, updating instead of creating"
                     )
@@ -789,15 +782,15 @@ class SyncService:
                         logger.error(f"Entity not found after constraint violation, path={path}")
                         raise ValueError(f"Entity not found after constraint violation: {path}")
-                    # Re-get file stats since we're in update path
-                    file_stats_for_update = self.file_service.file_stats(path)
+                    # Re-get file metadata since we're in update path
+                    file_metadata_for_update = await self.file_service.get_file_metadata(path)
                     updated = await self.entity_repository.update(
                         entity.id,
                         {
                             "file_path": path,
                             "checksum": checksum,
-                            "mtime": file_stats_for_update.st_mtime,
-                            "size": file_stats_for_update.st_size,
+                            "mtime": file_metadata_for_update.modified_at.timestamp(),
+                            "size": file_metadata_for_update.size,
                         },
                     )
@@ -808,11 +801,11 @@ class SyncService:
                     return updated, checksum
                 else:
                     # Re-raise if it's a different integrity error
-                    raise
+                    raise  # pragma: no cover
         else:
             # Get file timestamps for updating modification time
-            file_stats = self.file_service.file_stats(path)
-            modified = datetime.fromtimestamp(file_stats.st_mtime).astimezone()
+            file_metadata = await self.file_service.get_file_metadata(path)
+            modified = file_metadata.modified_at
             entity = await self.entity_repository.get_by_file_path(path)
             if entity is None:  # pragma: no cover
@@ -827,8 +820,8 @@ class SyncService:
                     "file_path": path,
                     "checksum": checksum,
                     "updated_at": modified,
-                    "mtime": file_stats.st_mtime,
-                    "size": file_stats.st_size,
+                    "mtime": file_metadata.modified_at.timestamp(),
+                    "size": file_metadata.size,
                 },
             )
@@ -838,7 +831,6 @@ class SyncService:
             return updated, checksum
-    @logfire.instrument()
     async def handle_delete(self, file_path: str):
         """Handle complete entity deletion including search index cleanup."""
@@ -870,7 +862,6 @@ class SyncService:
                 else:
                     await self.search_service.delete_by_entity_id(entity.id)
-    @logfire.instrument()
     async def handle_move(self, old_path, new_path):
         logger.debug("Moving entity", old_path=old_path, new_path=new_path)
@@ -975,7 +966,6 @@ class SyncService:
             # update search index
             await self.search_service.index_entity(updated)
-    @logfire.instrument()
     async def resolve_relations(self, entity_id: int | None = None):
         """Try to resolve unresolved relations.
@@ -1026,16 +1016,27 @@ class SyncService:
                             "to_name": resolved_entity.title,
                         },
                     )
-                except IntegrityError:  # pragma: no cover
+                    # update search index only on successful resolution
+                    await self.search_service.index_entity(resolved_entity)
+                except IntegrityError:
+                    # IntegrityError means a relation with this (from_id, to_id, relation_type)
+                    # already exists. The UPDATE was rolled back, so our unresolved relation
+                    # (to_id=NULL) still exists in the database. We delete it because:
+                    # 1. It's redundant - a resolved relation already captures this relationship
+                    # 2. If we don't delete it, future syncs will try to resolve it again
+                    #    and get the same IntegrityError
                     logger.debug(
-                        "Ignoring duplicate relation "
+                        "Deleting duplicate unresolved relation "
                         f"relation_id={relation.id} "
                         f"from_id={relation.from_id} "
-                        f"to_name={relation.to_name}"
+                        f"to_name={relation.to_name} "
+                        f"resolved_to_id={resolved_entity.id}"
                     )
-                # update search index
-                await self.search_service.index_entity(resolved_entity)
+                    try:
+                        await self.relation_repository.delete(relation.id)
+                    except Exception as e:
+                        # Log but don't fail - the relation may have been deleted already
+                        logger.debug(f"Could not delete duplicate relation {relation.id}: {e}")
     async def _quick_count_files(self, directory: Path) -> int:
         """Fast file count using find command.
@@ -1043,12 +1044,22 @@ class SyncService:
         Uses subprocess to leverage OS-level file counting which is much faster
         than Python iteration, especially on network filesystems like TigrisFS.
+        On Windows, subprocess is not supported with SelectorEventLoop (which we use
+        to avoid aiosqlite cleanup issues), so we fall back to Python-based counting.
         Args:
             directory: Directory to count files in
         Returns:
             Number of files in directory (recursive)
         """
+        # Windows with SelectorEventLoop doesn't support subprocess
+        if sys.platform == "win32":
+            count = 0
+            async for _ in self.scan_directory(directory):
+                count += 1
+            return count
         process = await asyncio.create_subprocess_shell(
             f'find "{directory}" -type f | wc -l',
             stdout=asyncio.subprocess.PIPE,
@@ -1063,8 +1074,6 @@ class SyncService:
                 f"error: {error_msg}. Falling back to manual count. "
                 f"This will slow down watermark detection!"
             )
-            # Track optimization failures for visibility
-            logfire.metric_counter("sync.scan.file_count_failure").add(1)
             # Fallback: count using scan_directory
             count = 0
             async for _ in self.scan_directory(directory):
@@ -1081,6 +1090,9 @@ class SyncService:
         This is dramatically faster than scanning all files and comparing mtimes,
         especially on network filesystems like TigrisFS where stat operations are expensive.
+        On Windows, subprocess is not supported with SelectorEventLoop (which we use
+        to avoid aiosqlite cleanup issues), so we implement mtime filtering in Python.
         Args:
             directory: Directory to scan
             since_timestamp: Unix timestamp to find files newer than
@@ -1088,6 +1100,16 @@ class SyncService:
         Returns:
             List of relative file paths modified since the timestamp (respects .bmignore)
         """
+        # Windows with SelectorEventLoop doesn't support subprocess
+        # Implement mtime filtering in Python to preserve watermark optimization
+        if sys.platform == "win32":
+            file_paths = []
+            async for file_path_str, stat_info in self.scan_directory(directory):
+                if stat_info.st_mtime > since_timestamp:
+                    rel_path = Path(file_path_str).relative_to(directory).as_posix()
+                    file_paths.append(rel_path)
+            return file_paths
         # Convert timestamp to find-compatible format
         since_date = datetime.fromtimestamp(since_timestamp).strftime("%Y-%m-%d %H:%M:%S")
@@ -1105,8 +1127,6 @@ class SyncService:
                 f"error: {error_msg}. Falling back to full scan. "
                 f"This will cause slow syncs on large projects!"
             )
-            # Track optimization failures for visibility
-            logfire.metric_counter("sync.scan.optimization_failure").add(1)
             # Fallback to full scan
             return await self._scan_directory_full(directory)
@@ -1206,14 +1226,14 @@ async def get_sync_service(project: Project) -> SyncService:  # pragma: no cover
     project_path = Path(project.path)
     entity_parser = EntityParser(project_path)
-    markdown_processor = MarkdownProcessor(entity_parser)
-    file_service = FileService(project_path, markdown_processor)
+    markdown_processor = MarkdownProcessor(entity_parser, app_config=app_config)
+    file_service = FileService(project_path, markdown_processor, app_config=app_config)
     # Initialize repositories
     entity_repository = EntityRepository(session_maker, project_id=project.id)
     observation_repository = ObservationRepository(session_maker, project_id=project.id)
     relation_repository = RelationRepository(session_maker, project_id=project.id)
-    search_repository = SearchRepository(session_maker, project_id=project.id)
+    search_repository = create_search_repository(session_maker, project_id=project.id)
     project_repository = ProjectRepository(session_maker)
     # Initialize services

basic_memory/sync/watch_service.py CHANGED Viewed

@@ -5,14 +5,17 @@ import os
 from collections import defaultdict
 from datetime import datetime
 from pathlib import Path
-from typing import List, Optional, Set, Sequence
+from typing import List, Optional, Set, Sequence, Callable, Awaitable, TYPE_CHECKING
+if TYPE_CHECKING:
+    from basic_memory.sync.sync_service import SyncService
 from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON
 from basic_memory.ignore_utils import load_gitignore_patterns, should_ignore_path
 from basic_memory.models import Project
 from basic_memory.repository import ProjectRepository
 from loguru import logger
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from rich.console import Console
 from watchfiles import awatch
 from watchfiles.main import FileChange, Change
@@ -31,8 +34,8 @@ class WatchEvent(BaseModel):
 class WatchServiceState(BaseModel):
     # Service status
     running: bool = False
-    start_time: datetime = datetime.now()  # Use directly with Pydantic model
-    pid: int = os.getpid()  # Use directly with Pydantic model
+    start_time: datetime = Field(default_factory=datetime.now)
+    pid: int = Field(default_factory=os.getpid)
     # Stats
     error_count: int = 0
@@ -43,7 +46,7 @@ class WatchServiceState(BaseModel):
     synced_files: int = 0
     # Recent activity
-    recent_events: List[WatchEvent] = []  # Use directly with Pydantic model
+    recent_events: List[WatchEvent] = Field(default_factory=list)
     def add_event(
         self,
@@ -71,12 +74,17 @@ class WatchServiceState(BaseModel):
         self.last_error = datetime.now()
+# Type alias for sync service factory function
+SyncServiceFactory = Callable[[Project], Awaitable["SyncService"]]
 class WatchService:
     def __init__(
         self,
         app_config: BasicMemoryConfig,
         project_repository: ProjectRepository,
         quiet: bool = False,
+        sync_service_factory: Optional[SyncServiceFactory] = None,
     ):
         self.app_config = app_config
         self.project_repository = project_repository
@@ -84,10 +92,20 @@ class WatchService:
         self.status_path = Path.home() / ".basic-memory" / WATCH_STATUS_JSON
         self.status_path.parent.mkdir(parents=True, exist_ok=True)
         self._ignore_patterns_cache: dict[Path, Set[str]] = {}
+        self._sync_service_factory = sync_service_factory
         # quiet mode for mcp so it doesn't mess up stdout
         self.console = Console(quiet=quiet)
+    async def _get_sync_service(self, project: Project) -> "SyncService":
+        """Get sync service for a project, using factory if provided."""
+        if self._sync_service_factory:
+            return await self._sync_service_factory(project)
+        # Fall back to default factory
+        from basic_memory.sync.sync_service import get_sync_service
+        return await get_sync_service(project)
     async def _schedule_restart(self, stop_event: asyncio.Event):
         """Schedule a restart of the watch service after the configured interval."""
         await asyncio.sleep(self.app_config.watch_project_reload_interval)
@@ -233,9 +251,6 @@ class WatchService:
     async def handle_changes(self, project: Project, changes: Set[FileChange]) -> None:
         """Process a batch of file changes"""
-        # avoid circular imports
-        from basic_memory.sync.sync_service import get_sync_service
         # Check if project still exists in configuration before processing
         # This prevents deleted projects from being recreated by background sync
         from basic_memory.config import ConfigManager
@@ -250,7 +265,7 @@ class WatchService:
             )
             return
-        sync_service = await get_sync_service(project)
+        sync_service = await self._get_sync_service(project)
         file_service = sync_service.file_service
         start_time = time.time()
@@ -284,12 +299,17 @@ class WatchService:
         )
         # because of our atomic writes on updates, an add may be an existing file
-        for added_path in adds:  # pragma: no cover TODO add test
+        # Avoid mutating `adds` while iterating (can skip items).
+        reclassified_as_modified: List[str] = []
+        for added_path in list(adds):  # pragma: no cover TODO add test
             entity = await sync_service.entity_repository.get_by_file_path(added_path)
             if entity is not None:
                 logger.debug(f"Existing file will be processed as modified, path={added_path}")
-                adds.remove(added_path)
-                modifies.append(added_path)
+                reclassified_as_modified.append(added_path)
+        if reclassified_as_modified:
+            adds = [p for p in adds if p not in reclassified_as_modified]
+            modifies.extend(reclassified_as_modified)
         # Track processed files to avoid duplicates
         processed: Set[str] = set()

basic-memory 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl

Potentially problematic release.

basic-memory 0.16.1py3-none-any.whl → 0.17.4py3-none-any.whl