basic-memory 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (143) hide show
  1. basic_memory/__init__.py +1 -1
  2. basic_memory/alembic/env.py +112 -26
  3. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  4. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +15 -3
  5. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +44 -36
  6. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  7. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  8. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +13 -0
  9. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  10. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  11. basic_memory/api/app.py +45 -24
  12. basic_memory/api/container.py +133 -0
  13. basic_memory/api/routers/knowledge_router.py +17 -5
  14. basic_memory/api/routers/project_router.py +68 -14
  15. basic_memory/api/routers/resource_router.py +37 -27
  16. basic_memory/api/routers/utils.py +53 -14
  17. basic_memory/api/v2/__init__.py +35 -0
  18. basic_memory/api/v2/routers/__init__.py +21 -0
  19. basic_memory/api/v2/routers/directory_router.py +93 -0
  20. basic_memory/api/v2/routers/importer_router.py +181 -0
  21. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  22. basic_memory/api/v2/routers/memory_router.py +130 -0
  23. basic_memory/api/v2/routers/project_router.py +359 -0
  24. basic_memory/api/v2/routers/prompt_router.py +269 -0
  25. basic_memory/api/v2/routers/resource_router.py +286 -0
  26. basic_memory/api/v2/routers/search_router.py +73 -0
  27. basic_memory/cli/app.py +43 -7
  28. basic_memory/cli/auth.py +27 -4
  29. basic_memory/cli/commands/__init__.py +3 -1
  30. basic_memory/cli/commands/cloud/api_client.py +20 -5
  31. basic_memory/cli/commands/cloud/cloud_utils.py +13 -6
  32. basic_memory/cli/commands/cloud/rclone_commands.py +110 -14
  33. basic_memory/cli/commands/cloud/rclone_installer.py +18 -4
  34. basic_memory/cli/commands/cloud/upload.py +10 -3
  35. basic_memory/cli/commands/command_utils.py +52 -4
  36. basic_memory/cli/commands/db.py +78 -19
  37. basic_memory/cli/commands/format.py +198 -0
  38. basic_memory/cli/commands/import_chatgpt.py +12 -8
  39. basic_memory/cli/commands/import_claude_conversations.py +12 -8
  40. basic_memory/cli/commands/import_claude_projects.py +12 -8
  41. basic_memory/cli/commands/import_memory_json.py +12 -8
  42. basic_memory/cli/commands/mcp.py +8 -26
  43. basic_memory/cli/commands/project.py +22 -9
  44. basic_memory/cli/commands/status.py +3 -2
  45. basic_memory/cli/commands/telemetry.py +81 -0
  46. basic_memory/cli/container.py +84 -0
  47. basic_memory/cli/main.py +7 -0
  48. basic_memory/config.py +177 -77
  49. basic_memory/db.py +183 -77
  50. basic_memory/deps/__init__.py +293 -0
  51. basic_memory/deps/config.py +26 -0
  52. basic_memory/deps/db.py +56 -0
  53. basic_memory/deps/importers.py +200 -0
  54. basic_memory/deps/projects.py +238 -0
  55. basic_memory/deps/repositories.py +179 -0
  56. basic_memory/deps/services.py +480 -0
  57. basic_memory/deps.py +14 -409
  58. basic_memory/file_utils.py +212 -3
  59. basic_memory/ignore_utils.py +5 -5
  60. basic_memory/importers/base.py +40 -19
  61. basic_memory/importers/chatgpt_importer.py +17 -4
  62. basic_memory/importers/claude_conversations_importer.py +27 -12
  63. basic_memory/importers/claude_projects_importer.py +50 -14
  64. basic_memory/importers/memory_json_importer.py +36 -16
  65. basic_memory/importers/utils.py +5 -2
  66. basic_memory/markdown/entity_parser.py +62 -23
  67. basic_memory/markdown/markdown_processor.py +67 -4
  68. basic_memory/markdown/plugins.py +4 -2
  69. basic_memory/markdown/utils.py +10 -1
  70. basic_memory/mcp/async_client.py +1 -0
  71. basic_memory/mcp/clients/__init__.py +28 -0
  72. basic_memory/mcp/clients/directory.py +70 -0
  73. basic_memory/mcp/clients/knowledge.py +176 -0
  74. basic_memory/mcp/clients/memory.py +120 -0
  75. basic_memory/mcp/clients/project.py +89 -0
  76. basic_memory/mcp/clients/resource.py +71 -0
  77. basic_memory/mcp/clients/search.py +65 -0
  78. basic_memory/mcp/container.py +110 -0
  79. basic_memory/mcp/project_context.py +47 -33
  80. basic_memory/mcp/prompts/ai_assistant_guide.py +2 -2
  81. basic_memory/mcp/prompts/recent_activity.py +2 -2
  82. basic_memory/mcp/prompts/utils.py +3 -3
  83. basic_memory/mcp/server.py +58 -0
  84. basic_memory/mcp/tools/build_context.py +14 -14
  85. basic_memory/mcp/tools/canvas.py +34 -12
  86. basic_memory/mcp/tools/chatgpt_tools.py +4 -1
  87. basic_memory/mcp/tools/delete_note.py +31 -7
  88. basic_memory/mcp/tools/edit_note.py +14 -9
  89. basic_memory/mcp/tools/list_directory.py +7 -17
  90. basic_memory/mcp/tools/move_note.py +35 -31
  91. basic_memory/mcp/tools/project_management.py +29 -25
  92. basic_memory/mcp/tools/read_content.py +13 -3
  93. basic_memory/mcp/tools/read_note.py +24 -14
  94. basic_memory/mcp/tools/recent_activity.py +32 -38
  95. basic_memory/mcp/tools/search.py +17 -10
  96. basic_memory/mcp/tools/utils.py +28 -0
  97. basic_memory/mcp/tools/view_note.py +2 -1
  98. basic_memory/mcp/tools/write_note.py +37 -14
  99. basic_memory/models/knowledge.py +15 -2
  100. basic_memory/models/project.py +7 -1
  101. basic_memory/models/search.py +58 -2
  102. basic_memory/project_resolver.py +222 -0
  103. basic_memory/repository/entity_repository.py +210 -3
  104. basic_memory/repository/observation_repository.py +1 -0
  105. basic_memory/repository/postgres_search_repository.py +451 -0
  106. basic_memory/repository/project_repository.py +38 -1
  107. basic_memory/repository/relation_repository.py +58 -2
  108. basic_memory/repository/repository.py +1 -0
  109. basic_memory/repository/search_index_row.py +95 -0
  110. basic_memory/repository/search_repository.py +77 -615
  111. basic_memory/repository/search_repository_base.py +241 -0
  112. basic_memory/repository/sqlite_search_repository.py +437 -0
  113. basic_memory/runtime.py +61 -0
  114. basic_memory/schemas/base.py +36 -6
  115. basic_memory/schemas/directory.py +2 -1
  116. basic_memory/schemas/memory.py +9 -2
  117. basic_memory/schemas/project_info.py +2 -0
  118. basic_memory/schemas/response.py +84 -27
  119. basic_memory/schemas/search.py +5 -0
  120. basic_memory/schemas/sync_report.py +1 -1
  121. basic_memory/schemas/v2/__init__.py +27 -0
  122. basic_memory/schemas/v2/entity.py +133 -0
  123. basic_memory/schemas/v2/resource.py +47 -0
  124. basic_memory/services/context_service.py +219 -43
  125. basic_memory/services/directory_service.py +26 -11
  126. basic_memory/services/entity_service.py +68 -33
  127. basic_memory/services/file_service.py +131 -16
  128. basic_memory/services/initialization.py +51 -26
  129. basic_memory/services/link_resolver.py +1 -0
  130. basic_memory/services/project_service.py +68 -43
  131. basic_memory/services/search_service.py +75 -16
  132. basic_memory/sync/__init__.py +2 -1
  133. basic_memory/sync/coordinator.py +160 -0
  134. basic_memory/sync/sync_service.py +135 -115
  135. basic_memory/sync/watch_service.py +32 -12
  136. basic_memory/telemetry.py +249 -0
  137. basic_memory/utils.py +96 -75
  138. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/METADATA +129 -5
  139. basic_memory-0.17.4.dist-info/RECORD +193 -0
  140. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  141. basic_memory-0.16.1.dist-info/RECORD +0 -148
  142. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +0 -0
  143. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,222 @@
1
+ """Unified project resolution across MCP, API, and CLI.
2
+
3
+ This module provides a single canonical implementation of project resolution
4
+ logic, eliminating duplicated decision trees across the codebase.
5
+
6
+ The resolution follows a three-tier hierarchy:
7
+ 1. Constrained mode: BASIC_MEMORY_MCP_PROJECT env var (highest priority)
8
+ 2. Explicit parameter: Project passed directly to operation
9
+ 3. Default project: Used when default_project_mode=true (lowest priority)
10
+
11
+ In cloud mode, project is required unless discovery mode is explicitly allowed.
12
+ """
13
+
14
+ import os
15
+ from dataclasses import dataclass
16
+ from enum import Enum, auto
17
+ from typing import Optional
18
+
19
+ from loguru import logger
20
+
21
+
22
+ class ResolutionMode(Enum):
23
+ """How the project was resolved."""
24
+
25
+ CLOUD_EXPLICIT = auto() # Explicit project in cloud mode
26
+ CLOUD_DISCOVERY = auto() # Discovery mode allowed in cloud (no project)
27
+ ENV_CONSTRAINT = auto() # BASIC_MEMORY_MCP_PROJECT env var
28
+ EXPLICIT = auto() # Explicit project parameter
29
+ DEFAULT = auto() # default_project with default_project_mode=true
30
+ NONE = auto() # No resolution possible
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class ResolvedProject:
35
+ """Result of project resolution.
36
+
37
+ Attributes:
38
+ project: The resolved project name, or None if not resolved
39
+ mode: How the project was resolved
40
+ reason: Human-readable explanation of resolution
41
+ """
42
+
43
+ project: Optional[str]
44
+ mode: ResolutionMode
45
+ reason: str
46
+
47
+ @property
48
+ def is_resolved(self) -> bool:
49
+ """Whether a project was successfully resolved."""
50
+ return self.project is not None
51
+
52
+ @property
53
+ def is_discovery_mode(self) -> bool:
54
+ """Whether we're in discovery mode (no specific project)."""
55
+ return self.mode == ResolutionMode.CLOUD_DISCOVERY or (
56
+ self.mode == ResolutionMode.NONE and self.project is None
57
+ )
58
+
59
+
60
+ @dataclass
61
+ class ProjectResolver:
62
+ """Unified project resolution logic.
63
+
64
+ Resolves the effective project given requested project, environment
65
+ constraints, and configuration settings.
66
+
67
+ This is the single canonical implementation of project resolution,
68
+ used by MCP tools, API routes, and CLI commands.
69
+
70
+ Args:
71
+ cloud_mode: Whether running in cloud mode (project required)
72
+ default_project_mode: Whether to use default project when not specified
73
+ default_project: The default project name
74
+ constrained_project: Optional env-constrained project override
75
+ (typically from BASIC_MEMORY_MCP_PROJECT)
76
+ """
77
+
78
+ cloud_mode: bool = False
79
+ default_project_mode: bool = False
80
+ default_project: Optional[str] = None
81
+ constrained_project: Optional[str] = None
82
+
83
+ @classmethod
84
+ def from_env(
85
+ cls,
86
+ cloud_mode: bool = False,
87
+ default_project_mode: bool = False,
88
+ default_project: Optional[str] = None,
89
+ ) -> "ProjectResolver":
90
+ """Create resolver with constrained_project from environment.
91
+
92
+ Args:
93
+ cloud_mode: Whether running in cloud mode
94
+ default_project_mode: Whether to use default project when not specified
95
+ default_project: The default project name
96
+
97
+ Returns:
98
+ ProjectResolver configured with current environment
99
+ """
100
+ constrained = os.environ.get("BASIC_MEMORY_MCP_PROJECT")
101
+ return cls(
102
+ cloud_mode=cloud_mode,
103
+ default_project_mode=default_project_mode,
104
+ default_project=default_project,
105
+ constrained_project=constrained,
106
+ )
107
+
108
+ def resolve(
109
+ self,
110
+ project: Optional[str] = None,
111
+ allow_discovery: bool = False,
112
+ ) -> ResolvedProject:
113
+ """Resolve project using the three-tier hierarchy.
114
+
115
+ Resolution order:
116
+ 1. Cloud mode check (project required unless discovery allowed)
117
+ 2. Constrained project from env var (highest priority in local mode)
118
+ 3. Explicit project parameter
119
+ 4. Default project if default_project_mode=true
120
+
121
+ Args:
122
+ project: Optional explicit project parameter
123
+ allow_discovery: If True, allows returning None in cloud mode
124
+ for discovery operations (e.g., recent_activity across projects)
125
+
126
+ Returns:
127
+ ResolvedProject with project name, resolution mode, and reason
128
+
129
+ Raises:
130
+ ValueError: If in cloud mode and no project specified (unless discovery allowed)
131
+ """
132
+ # --- Cloud Mode Handling ---
133
+ # In cloud mode, project is required unless discovery is explicitly allowed
134
+ if self.cloud_mode:
135
+ if project:
136
+ logger.debug(f"Cloud mode: using explicit project '{project}'")
137
+ return ResolvedProject(
138
+ project=project,
139
+ mode=ResolutionMode.CLOUD_EXPLICIT,
140
+ reason=f"Explicit project in cloud mode: {project}",
141
+ )
142
+ elif allow_discovery:
143
+ logger.debug("Cloud mode: discovery mode allowed, no project required")
144
+ return ResolvedProject(
145
+ project=None,
146
+ mode=ResolutionMode.CLOUD_DISCOVERY,
147
+ reason="Discovery mode enabled in cloud",
148
+ )
149
+ else:
150
+ raise ValueError("No project specified. Project is required for cloud mode.")
151
+
152
+ # --- Local Mode: Three-Tier Hierarchy ---
153
+
154
+ # Priority 1: CLI constraint overrides everything
155
+ if self.constrained_project:
156
+ logger.debug(f"Using CLI constrained project: {self.constrained_project}")
157
+ return ResolvedProject(
158
+ project=self.constrained_project,
159
+ mode=ResolutionMode.ENV_CONSTRAINT,
160
+ reason=f"Environment constraint: BASIC_MEMORY_MCP_PROJECT={self.constrained_project}",
161
+ )
162
+
163
+ # Priority 2: Explicit project parameter
164
+ if project:
165
+ logger.debug(f"Using explicit project parameter: {project}")
166
+ return ResolvedProject(
167
+ project=project,
168
+ mode=ResolutionMode.EXPLICIT,
169
+ reason=f"Explicit parameter: {project}",
170
+ )
171
+
172
+ # Priority 3: Default project mode
173
+ if self.default_project_mode and self.default_project:
174
+ logger.debug(f"Using default project from config: {self.default_project}")
175
+ return ResolvedProject(
176
+ project=self.default_project,
177
+ mode=ResolutionMode.DEFAULT,
178
+ reason=f"Default project mode: {self.default_project}",
179
+ )
180
+
181
+ # No resolution possible
182
+ logger.debug("No project resolution possible")
183
+ return ResolvedProject(
184
+ project=None,
185
+ mode=ResolutionMode.NONE,
186
+ reason="No project specified and default_project_mode is disabled",
187
+ )
188
+
189
+ def require_project(
190
+ self,
191
+ project: Optional[str] = None,
192
+ error_message: Optional[str] = None,
193
+ ) -> ResolvedProject:
194
+ """Resolve project, raising an error if not resolved.
195
+
196
+ Convenience method for operations that require a project.
197
+
198
+ Args:
199
+ project: Optional explicit project parameter
200
+ error_message: Custom error message if project not resolved
201
+
202
+ Returns:
203
+ ResolvedProject (always with a non-None project)
204
+
205
+ Raises:
206
+ ValueError: If project could not be resolved
207
+ """
208
+ result = self.resolve(project, allow_discovery=False)
209
+ if not result.is_resolved:
210
+ msg = error_message or (
211
+ "No project specified. Either set 'default_project_mode=true' in config, "
212
+ "or provide a 'project' argument."
213
+ )
214
+ raise ValueError(msg)
215
+ return result
216
+
217
+
218
+ __all__ = [
219
+ "ProjectResolver",
220
+ "ResolvedProject",
221
+ "ResolutionMode",
222
+ ]
@@ -1,7 +1,8 @@
1
1
  """Repository for managing entities in the knowledge graph."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import List, Optional, Sequence, Union
4
+ from typing import List, Optional, Sequence, Union, Any
5
+
5
6
 
6
7
  from loguru import logger
7
8
  from sqlalchemy import select
@@ -9,6 +10,7 @@ from sqlalchemy.exc import IntegrityError
9
10
  from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
10
11
  from sqlalchemy.orm import selectinload
11
12
  from sqlalchemy.orm.interfaces import LoaderOption
13
+ from sqlalchemy.engine import Row
12
14
 
13
15
  from basic_memory import db
14
16
  from basic_memory.models.knowledge import Entity, Observation, Relation
@@ -31,6 +33,34 @@ class EntityRepository(Repository[Entity]):
31
33
  """
32
34
  super().__init__(session_maker, Entity, project_id=project_id)
33
35
 
36
+ async def get_by_id(self, entity_id: int) -> Optional[Entity]: # pragma: no cover
37
+ """Get entity by numeric ID.
38
+
39
+ Args:
40
+ entity_id: Numeric entity ID
41
+
42
+ Returns:
43
+ Entity if found, None otherwise
44
+ """
45
+ async with db.scoped_session(self.session_maker) as session:
46
+ return await self.select_by_id(session, entity_id)
47
+
48
+ async def get_by_external_id(self, external_id: str) -> Optional[Entity]:
49
+ """Get entity by external UUID.
50
+
51
+ Args:
52
+ external_id: External UUID identifier
53
+
54
+ Returns:
55
+ Entity if found, None otherwise
56
+ """
57
+ query = (
58
+ self.select()
59
+ .where(Entity.external_id == external_id)
60
+ .options(*self.get_load_options())
61
+ )
62
+ return await self.find_one(query)
63
+
34
64
  async def get_by_permalink(self, permalink: str) -> Optional[Entity]:
35
65
  """Get entity by permalink.
36
66
 
@@ -63,6 +93,129 @@ class EntityRepository(Repository[Entity]):
63
93
  )
64
94
  return await self.find_one(query)
65
95
 
96
+ # -------------------------------------------------------------------------
97
+ # Lightweight methods for permalink resolution (no eager loading)
98
+ # -------------------------------------------------------------------------
99
+
100
+ async def permalink_exists(self, permalink: str) -> bool:
101
+ """Check if a permalink exists without loading the full entity.
102
+
103
+ This is much faster than get_by_permalink() as it skips eager loading
104
+ of observations and relations. Use for existence checks in bulk operations.
105
+
106
+ Args:
107
+ permalink: Permalink to check
108
+
109
+ Returns:
110
+ True if permalink exists, False otherwise
111
+ """
112
+ query = select(Entity.id).where(Entity.permalink == permalink).limit(1)
113
+ query = self._add_project_filter(query)
114
+ result = await self.execute_query(query, use_query_options=False)
115
+ return result.scalar_one_or_none() is not None
116
+
117
+ async def get_file_path_for_permalink(self, permalink: str) -> Optional[str]:
118
+ """Get the file_path for a permalink without loading the full entity.
119
+
120
+ Use when you only need the file_path, not the full entity with relations.
121
+
122
+ Args:
123
+ permalink: Permalink to look up
124
+
125
+ Returns:
126
+ file_path string if found, None otherwise
127
+ """
128
+ query = select(Entity.file_path).where(Entity.permalink == permalink)
129
+ query = self._add_project_filter(query)
130
+ result = await self.execute_query(query, use_query_options=False)
131
+ return result.scalar_one_or_none()
132
+
133
+ async def get_permalink_for_file_path(self, file_path: Union[Path, str]) -> Optional[str]:
134
+ """Get the permalink for a file_path without loading the full entity.
135
+
136
+ Use when you only need the permalink, not the full entity with relations.
137
+
138
+ Args:
139
+ file_path: File path to look up
140
+
141
+ Returns:
142
+ permalink string if found, None otherwise
143
+ """
144
+ query = select(Entity.permalink).where(Entity.file_path == Path(file_path).as_posix())
145
+ query = self._add_project_filter(query)
146
+ result = await self.execute_query(query, use_query_options=False)
147
+ return result.scalar_one_or_none()
148
+
149
+ async def get_all_permalinks(self) -> List[str]:
150
+ """Get all permalinks for this project.
151
+
152
+ Optimized for bulk operations - returns only permalink strings
153
+ without loading entities or relationships.
154
+
155
+ Returns:
156
+ List of all permalinks in the project
157
+ """
158
+ query = select(Entity.permalink)
159
+ query = self._add_project_filter(query)
160
+ result = await self.execute_query(query, use_query_options=False)
161
+ return list(result.scalars().all())
162
+
163
+ async def get_permalink_to_file_path_map(self) -> dict[str, str]:
164
+ """Get a mapping of permalink -> file_path for all entities.
165
+
166
+ Optimized for bulk permalink resolution - loads minimal data in one query.
167
+
168
+ Returns:
169
+ Dict mapping permalink to file_path
170
+ """
171
+ query = select(Entity.permalink, Entity.file_path)
172
+ query = self._add_project_filter(query)
173
+ result = await self.execute_query(query, use_query_options=False)
174
+ return {row.permalink: row.file_path for row in result.all()}
175
+
176
+ async def get_file_path_to_permalink_map(self) -> dict[str, str]:
177
+ """Get a mapping of file_path -> permalink for all entities.
178
+
179
+ Optimized for bulk permalink resolution - loads minimal data in one query.
180
+
181
+ Returns:
182
+ Dict mapping file_path to permalink
183
+ """
184
+ query = select(Entity.file_path, Entity.permalink)
185
+ query = self._add_project_filter(query)
186
+ result = await self.execute_query(query, use_query_options=False)
187
+ return {row.file_path: row.permalink for row in result.all()}
188
+
189
+ async def get_by_file_paths(
190
+ self, session: AsyncSession, file_paths: Sequence[Union[Path, str]]
191
+ ) -> List[Row[Any]]:
192
+ """Get file paths and checksums for multiple entities (optimized for change detection).
193
+
194
+ Only queries file_path and checksum columns, skips loading full entities and relationships.
195
+ This is much faster than loading complete Entity objects when you only need checksums.
196
+
197
+ Args:
198
+ session: Database session to use for the query
199
+ file_paths: List of file paths to query
200
+
201
+ Returns:
202
+ List of (file_path, checksum) tuples for matching entities
203
+ """
204
+ if not file_paths: # pragma: no cover
205
+ return [] # pragma: no cover
206
+
207
+ # Convert all paths to POSIX strings for consistent comparison
208
+ posix_paths = [Path(fp).as_posix() for fp in file_paths] # pragma: no cover
209
+
210
+ # Query ONLY file_path and checksum columns (not full Entity objects)
211
+ query = select(Entity.file_path, Entity.checksum).where( # pragma: no cover
212
+ Entity.file_path.in_(posix_paths)
213
+ )
214
+ query = self._add_project_filter(query) # pragma: no cover
215
+
216
+ result = await session.execute(query) # pragma: no cover
217
+ return list(result.all()) # pragma: no cover
218
+
66
219
  async def find_by_checksum(self, checksum: str) -> Sequence[Entity]:
67
220
  """Find entities with the given checksum.
68
221
 
@@ -80,6 +233,34 @@ class EntityRepository(Repository[Entity]):
80
233
  result = await self.execute_query(query, use_query_options=False)
81
234
  return list(result.scalars().all())
82
235
 
236
+ async def find_by_checksums(self, checksums: Sequence[str]) -> Sequence[Entity]:
237
+ """Find entities with any of the given checksums (batch query for move detection).
238
+
239
+ This is a batch-optimized version of find_by_checksum() that queries multiple checksums
240
+ in a single database query. Used for efficient move detection in cloud indexing.
241
+
242
+ Performance: For 1000 new files, this makes 1 query vs 1000 individual queries (~100x faster).
243
+
244
+ Example:
245
+ When processing new files, we check if any are actually moved files by finding
246
+ entities with matching checksums at different paths.
247
+
248
+ Args:
249
+ checksums: List of file content checksums to search for
250
+
251
+ Returns:
252
+ Sequence of entities with matching checksums (may be empty).
253
+ Multiple entities may have the same checksum if files were copied.
254
+ """
255
+ if not checksums: # pragma: no cover
256
+ return [] # pragma: no cover
257
+
258
+ # Query: SELECT * FROM entities WHERE checksum IN (checksum1, checksum2, ...)
259
+ query = self.select().where(Entity.checksum.in_(checksums)) # pragma: no cover
260
+ # Don't load relationships for move detection - we only need file_path and checksum
261
+ result = await self.execute_query(query, use_query_options=False) # pragma: no cover
262
+ return list(result.scalars().all()) # pragma: no cover
263
+
83
264
  async def delete_by_file_path(self, file_path: Union[Path, str]) -> bool:
84
265
  """Delete entity with the provided file_path.
85
266
 
@@ -155,8 +336,13 @@ class EntityRepository(Repository[Entity]):
155
336
 
156
337
  except IntegrityError as e:
157
338
  # Check if this is a FOREIGN KEY constraint failure
339
+ # SQLite: "FOREIGN KEY constraint failed"
340
+ # Postgres: "violates foreign key constraint"
158
341
  error_str = str(e)
159
- if "FOREIGN KEY constraint failed" in error_str:
342
+ if (
343
+ "FOREIGN KEY constraint failed" in error_str
344
+ or "violates foreign key constraint" in error_str
345
+ ):
160
346
  # Import locally to avoid circular dependency (repository -> services -> repository)
161
347
  from basic_memory.services.exceptions import SyncFatalError
162
348
 
@@ -310,5 +496,26 @@ class EntityRepository(Repository[Entity]):
310
496
 
311
497
  # Insert with unique permalink
312
498
  session.add(entity)
313
- await session.flush()
499
+ try:
500
+ await session.flush()
501
+ except IntegrityError as e: # pragma: no cover
502
+ # Check if this is a FOREIGN KEY constraint failure
503
+ # SQLite: "FOREIGN KEY constraint failed"
504
+ # Postgres: "violates foreign key constraint"
505
+ error_str = str(e)
506
+ if (
507
+ "FOREIGN KEY constraint failed" in error_str
508
+ or "violates foreign key constraint" in error_str
509
+ ):
510
+ # Import locally to avoid circular dependency (repository -> services -> repository)
511
+ from basic_memory.services.exceptions import SyncFatalError
512
+
513
+ # Project doesn't exist in database - this is a fatal sync error
514
+ raise SyncFatalError( # pragma: no cover
515
+ f"Cannot sync file '{entity.file_path}': "
516
+ f"project_id={entity.project_id} does not exist in database. "
517
+ f"The project may have been deleted. This sync will be terminated."
518
+ ) from e
519
+ # Re-raise if not a foreign key error
520
+ raise # pragma: no cover
314
521
  return entity
@@ -2,6 +2,7 @@
2
2
 
3
3
  from typing import Dict, List, Sequence
4
4
 
5
+
5
6
  from sqlalchemy import select
6
7
  from sqlalchemy.ext.asyncio import async_sessionmaker
7
8