basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (195) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +130 -20
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  6. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  7. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  8. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  9. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  10. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  11. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  12. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  13. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  14. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  15. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  16. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  17. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  18. basic_memory/api/app.py +87 -20
  19. basic_memory/api/container.py +133 -0
  20. basic_memory/api/routers/__init__.py +4 -1
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +180 -23
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +9 -64
  26. basic_memory/api/routers/project_router.py +460 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +136 -11
  29. basic_memory/api/routers/search_router.py +5 -5
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +181 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +359 -0
  39. basic_memory/api/v2/routers/prompt_router.py +269 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/app.py +80 -10
  43. basic_memory/cli/auth.py +300 -0
  44. basic_memory/cli/commands/__init__.py +15 -2
  45. basic_memory/cli/commands/cloud/__init__.py +6 -0
  46. basic_memory/cli/commands/cloud/api_client.py +127 -0
  47. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  48. basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
  49. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  50. basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
  51. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  52. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  53. basic_memory/cli/commands/cloud/upload.py +240 -0
  54. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  55. basic_memory/cli/commands/command_utils.py +99 -0
  56. basic_memory/cli/commands/db.py +87 -12
  57. basic_memory/cli/commands/format.py +198 -0
  58. basic_memory/cli/commands/import_chatgpt.py +47 -223
  59. basic_memory/cli/commands/import_claude_conversations.py +48 -171
  60. basic_memory/cli/commands/import_claude_projects.py +53 -160
  61. basic_memory/cli/commands/import_memory_json.py +55 -111
  62. basic_memory/cli/commands/mcp.py +67 -11
  63. basic_memory/cli/commands/project.py +889 -0
  64. basic_memory/cli/commands/status.py +52 -34
  65. basic_memory/cli/commands/telemetry.py +81 -0
  66. basic_memory/cli/commands/tool.py +341 -0
  67. basic_memory/cli/container.py +84 -0
  68. basic_memory/cli/main.py +14 -6
  69. basic_memory/config.py +580 -26
  70. basic_memory/db.py +285 -28
  71. basic_memory/deps/__init__.py +293 -0
  72. basic_memory/deps/config.py +26 -0
  73. basic_memory/deps/db.py +56 -0
  74. basic_memory/deps/importers.py +200 -0
  75. basic_memory/deps/projects.py +238 -0
  76. basic_memory/deps/repositories.py +179 -0
  77. basic_memory/deps/services.py +480 -0
  78. basic_memory/deps.py +16 -185
  79. basic_memory/file_utils.py +318 -54
  80. basic_memory/ignore_utils.py +297 -0
  81. basic_memory/importers/__init__.py +27 -0
  82. basic_memory/importers/base.py +100 -0
  83. basic_memory/importers/chatgpt_importer.py +245 -0
  84. basic_memory/importers/claude_conversations_importer.py +192 -0
  85. basic_memory/importers/claude_projects_importer.py +184 -0
  86. basic_memory/importers/memory_json_importer.py +128 -0
  87. basic_memory/importers/utils.py +61 -0
  88. basic_memory/markdown/entity_parser.py +182 -23
  89. basic_memory/markdown/markdown_processor.py +70 -7
  90. basic_memory/markdown/plugins.py +43 -23
  91. basic_memory/markdown/schemas.py +1 -1
  92. basic_memory/markdown/utils.py +38 -14
  93. basic_memory/mcp/async_client.py +135 -4
  94. basic_memory/mcp/clients/__init__.py +28 -0
  95. basic_memory/mcp/clients/directory.py +70 -0
  96. basic_memory/mcp/clients/knowledge.py +176 -0
  97. basic_memory/mcp/clients/memory.py +120 -0
  98. basic_memory/mcp/clients/project.py +89 -0
  99. basic_memory/mcp/clients/resource.py +71 -0
  100. basic_memory/mcp/clients/search.py +65 -0
  101. basic_memory/mcp/container.py +110 -0
  102. basic_memory/mcp/project_context.py +155 -0
  103. basic_memory/mcp/prompts/__init__.py +19 -0
  104. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  105. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  106. basic_memory/mcp/prompts/recent_activity.py +188 -0
  107. basic_memory/mcp/prompts/search.py +57 -0
  108. basic_memory/mcp/prompts/utils.py +162 -0
  109. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  110. basic_memory/mcp/resources/project_info.py +71 -0
  111. basic_memory/mcp/server.py +61 -9
  112. basic_memory/mcp/tools/__init__.py +33 -21
  113. basic_memory/mcp/tools/build_context.py +120 -0
  114. basic_memory/mcp/tools/canvas.py +152 -0
  115. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  116. basic_memory/mcp/tools/delete_note.py +249 -0
  117. basic_memory/mcp/tools/edit_note.py +325 -0
  118. basic_memory/mcp/tools/list_directory.py +157 -0
  119. basic_memory/mcp/tools/move_note.py +549 -0
  120. basic_memory/mcp/tools/project_management.py +204 -0
  121. basic_memory/mcp/tools/read_content.py +281 -0
  122. basic_memory/mcp/tools/read_note.py +265 -0
  123. basic_memory/mcp/tools/recent_activity.py +528 -0
  124. basic_memory/mcp/tools/search.py +377 -24
  125. basic_memory/mcp/tools/utils.py +402 -16
  126. basic_memory/mcp/tools/view_note.py +78 -0
  127. basic_memory/mcp/tools/write_note.py +230 -0
  128. basic_memory/models/__init__.py +3 -2
  129. basic_memory/models/knowledge.py +82 -17
  130. basic_memory/models/project.py +93 -0
  131. basic_memory/models/search.py +68 -8
  132. basic_memory/project_resolver.py +222 -0
  133. basic_memory/repository/__init__.py +2 -0
  134. basic_memory/repository/entity_repository.py +437 -8
  135. basic_memory/repository/observation_repository.py +36 -3
  136. basic_memory/repository/postgres_search_repository.py +451 -0
  137. basic_memory/repository/project_info_repository.py +10 -0
  138. basic_memory/repository/project_repository.py +140 -0
  139. basic_memory/repository/relation_repository.py +79 -4
  140. basic_memory/repository/repository.py +148 -29
  141. basic_memory/repository/search_index_row.py +95 -0
  142. basic_memory/repository/search_repository.py +79 -268
  143. basic_memory/repository/search_repository_base.py +241 -0
  144. basic_memory/repository/sqlite_search_repository.py +437 -0
  145. basic_memory/runtime.py +61 -0
  146. basic_memory/schemas/__init__.py +22 -9
  147. basic_memory/schemas/base.py +131 -12
  148. basic_memory/schemas/cloud.py +50 -0
  149. basic_memory/schemas/directory.py +31 -0
  150. basic_memory/schemas/importer.py +35 -0
  151. basic_memory/schemas/memory.py +194 -25
  152. basic_memory/schemas/project_info.py +213 -0
  153. basic_memory/schemas/prompt.py +90 -0
  154. basic_memory/schemas/request.py +56 -2
  155. basic_memory/schemas/response.py +85 -28
  156. basic_memory/schemas/search.py +36 -35
  157. basic_memory/schemas/sync_report.py +72 -0
  158. basic_memory/schemas/v2/__init__.py +27 -0
  159. basic_memory/schemas/v2/entity.py +133 -0
  160. basic_memory/schemas/v2/resource.py +47 -0
  161. basic_memory/services/__init__.py +2 -1
  162. basic_memory/services/context_service.py +451 -138
  163. basic_memory/services/directory_service.py +310 -0
  164. basic_memory/services/entity_service.py +636 -71
  165. basic_memory/services/exceptions.py +21 -0
  166. basic_memory/services/file_service.py +402 -33
  167. basic_memory/services/initialization.py +216 -0
  168. basic_memory/services/link_resolver.py +50 -56
  169. basic_memory/services/project_service.py +888 -0
  170. basic_memory/services/search_service.py +232 -37
  171. basic_memory/sync/__init__.py +4 -2
  172. basic_memory/sync/background_sync.py +26 -0
  173. basic_memory/sync/coordinator.py +160 -0
  174. basic_memory/sync/sync_service.py +1200 -109
  175. basic_memory/sync/watch_service.py +432 -135
  176. basic_memory/telemetry.py +249 -0
  177. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  178. basic_memory/templates/prompts/search.hbs +101 -0
  179. basic_memory/utils.py +407 -54
  180. basic_memory-0.17.4.dist-info/METADATA +617 -0
  181. basic_memory-0.17.4.dist-info/RECORD +193 -0
  182. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  183. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
  184. basic_memory/alembic/README +0 -1
  185. basic_memory/cli/commands/sync.py +0 -206
  186. basic_memory/cli/commands/tools.py +0 -157
  187. basic_memory/mcp/tools/knowledge.py +0 -68
  188. basic_memory/mcp/tools/memory.py +0 -170
  189. basic_memory/mcp/tools/notes.py +0 -202
  190. basic_memory/schemas/discovery.py +0 -28
  191. basic_memory/sync/file_change_scanner.py +0 -158
  192. basic_memory/sync/utils.py +0 -31
  193. basic_memory-0.7.0.dist-info/METADATA +0 -378
  194. basic_memory-0.7.0.dist-info/RECORD +0 -82
  195. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,18 @@
1
1
  """Repository for managing entities in the knowledge graph."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import List, Optional, Sequence, Union
4
+ from typing import List, Optional, Sequence, Union, Any
5
5
 
6
+
7
+ from loguru import logger
8
+ from sqlalchemy import select
9
+ from sqlalchemy.exc import IntegrityError
6
10
  from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
7
11
  from sqlalchemy.orm import selectinload
8
12
  from sqlalchemy.orm.interfaces import LoaderOption
13
+ from sqlalchemy.engine import Row
9
14
 
15
+ from basic_memory import db
10
16
  from basic_memory.models.knowledge import Entity, Observation, Relation
11
17
  from basic_memory.repository.repository import Repository
12
18
 
@@ -18,9 +24,42 @@ class EntityRepository(Repository[Entity]):
18
24
  to strings before passing to repository methods.
19
25
  """
20
26
 
21
- def __init__(self, session_maker: async_sessionmaker[AsyncSession]):
22
- """Initialize with session maker."""
23
- super().__init__(session_maker, Entity)
27
+ def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int):
28
+ """Initialize with session maker and project_id filter.
29
+
30
+ Args:
31
+ session_maker: SQLAlchemy session maker
32
+ project_id: Project ID to filter all operations by
33
+ """
34
+ super().__init__(session_maker, Entity, project_id=project_id)
35
+
36
+ async def get_by_id(self, entity_id: int) -> Optional[Entity]: # pragma: no cover
37
+ """Get entity by numeric ID.
38
+
39
+ Args:
40
+ entity_id: Numeric entity ID
41
+
42
+ Returns:
43
+ Entity if found, None otherwise
44
+ """
45
+ async with db.scoped_session(self.session_maker) as session:
46
+ return await self.select_by_id(session, entity_id)
47
+
48
+ async def get_by_external_id(self, external_id: str) -> Optional[Entity]:
49
+ """Get entity by external UUID.
50
+
51
+ Args:
52
+ external_id: External UUID identifier
53
+
54
+ Returns:
55
+ Entity if found, None otherwise
56
+ """
57
+ query = (
58
+ self.select()
59
+ .where(Entity.external_id == external_id)
60
+ .options(*self.get_load_options())
61
+ )
62
+ return await self.find_one(query)
24
63
 
25
64
  async def get_by_permalink(self, permalink: str) -> Optional[Entity]:
26
65
  """Get entity by permalink.
@@ -31,14 +70,15 @@ class EntityRepository(Repository[Entity]):
31
70
  query = self.select().where(Entity.permalink == permalink).options(*self.get_load_options())
32
71
  return await self.find_one(query)
33
72
 
34
- async def get_by_title(self, title: str) -> Optional[Entity]:
73
+ async def get_by_title(self, title: str) -> Sequence[Entity]:
35
74
  """Get entity by title.
36
75
 
37
76
  Args:
38
77
  title: Title of the entity to find
39
78
  """
40
79
  query = self.select().where(Entity.title == title).options(*self.get_load_options())
41
- return await self.find_one(query)
80
+ result = await self.execute_query(query)
81
+ return list(result.scalars().all())
42
82
 
43
83
  async def get_by_file_path(self, file_path: Union[Path, str]) -> Optional[Entity]:
44
84
  """Get entity by file_path.
@@ -48,18 +88,186 @@ class EntityRepository(Repository[Entity]):
48
88
  """
49
89
  query = (
50
90
  self.select()
51
- .where(Entity.file_path == str(file_path))
91
+ .where(Entity.file_path == Path(file_path).as_posix())
52
92
  .options(*self.get_load_options())
53
93
  )
54
94
  return await self.find_one(query)
55
95
 
96
+ # -------------------------------------------------------------------------
97
+ # Lightweight methods for permalink resolution (no eager loading)
98
+ # -------------------------------------------------------------------------
99
+
100
+ async def permalink_exists(self, permalink: str) -> bool:
101
+ """Check if a permalink exists without loading the full entity.
102
+
103
+ This is much faster than get_by_permalink() as it skips eager loading
104
+ of observations and relations. Use for existence checks in bulk operations.
105
+
106
+ Args:
107
+ permalink: Permalink to check
108
+
109
+ Returns:
110
+ True if permalink exists, False otherwise
111
+ """
112
+ query = select(Entity.id).where(Entity.permalink == permalink).limit(1)
113
+ query = self._add_project_filter(query)
114
+ result = await self.execute_query(query, use_query_options=False)
115
+ return result.scalar_one_or_none() is not None
116
+
117
+ async def get_file_path_for_permalink(self, permalink: str) -> Optional[str]:
118
+ """Get the file_path for a permalink without loading the full entity.
119
+
120
+ Use when you only need the file_path, not the full entity with relations.
121
+
122
+ Args:
123
+ permalink: Permalink to look up
124
+
125
+ Returns:
126
+ file_path string if found, None otherwise
127
+ """
128
+ query = select(Entity.file_path).where(Entity.permalink == permalink)
129
+ query = self._add_project_filter(query)
130
+ result = await self.execute_query(query, use_query_options=False)
131
+ return result.scalar_one_or_none()
132
+
133
+ async def get_permalink_for_file_path(self, file_path: Union[Path, str]) -> Optional[str]:
134
+ """Get the permalink for a file_path without loading the full entity.
135
+
136
+ Use when you only need the permalink, not the full entity with relations.
137
+
138
+ Args:
139
+ file_path: File path to look up
140
+
141
+ Returns:
142
+ permalink string if found, None otherwise
143
+ """
144
+ query = select(Entity.permalink).where(Entity.file_path == Path(file_path).as_posix())
145
+ query = self._add_project_filter(query)
146
+ result = await self.execute_query(query, use_query_options=False)
147
+ return result.scalar_one_or_none()
148
+
149
+ async def get_all_permalinks(self) -> List[str]:
150
+ """Get all permalinks for this project.
151
+
152
+ Optimized for bulk operations - returns only permalink strings
153
+ without loading entities or relationships.
154
+
155
+ Returns:
156
+ List of all permalinks in the project
157
+ """
158
+ query = select(Entity.permalink)
159
+ query = self._add_project_filter(query)
160
+ result = await self.execute_query(query, use_query_options=False)
161
+ return list(result.scalars().all())
162
+
163
+ async def get_permalink_to_file_path_map(self) -> dict[str, str]:
164
+ """Get a mapping of permalink -> file_path for all entities.
165
+
166
+ Optimized for bulk permalink resolution - loads minimal data in one query.
167
+
168
+ Returns:
169
+ Dict mapping permalink to file_path
170
+ """
171
+ query = select(Entity.permalink, Entity.file_path)
172
+ query = self._add_project_filter(query)
173
+ result = await self.execute_query(query, use_query_options=False)
174
+ return {row.permalink: row.file_path for row in result.all()}
175
+
176
+ async def get_file_path_to_permalink_map(self) -> dict[str, str]:
177
+ """Get a mapping of file_path -> permalink for all entities.
178
+
179
+ Optimized for bulk permalink resolution - loads minimal data in one query.
180
+
181
+ Returns:
182
+ Dict mapping file_path to permalink
183
+ """
184
+ query = select(Entity.file_path, Entity.permalink)
185
+ query = self._add_project_filter(query)
186
+ result = await self.execute_query(query, use_query_options=False)
187
+ return {row.file_path: row.permalink for row in result.all()}
188
+
189
+ async def get_by_file_paths(
190
+ self, session: AsyncSession, file_paths: Sequence[Union[Path, str]]
191
+ ) -> List[Row[Any]]:
192
+ """Get file paths and checksums for multiple entities (optimized for change detection).
193
+
194
+ Only queries file_path and checksum columns, skips loading full entities and relationships.
195
+ This is much faster than loading complete Entity objects when you only need checksums.
196
+
197
+ Args:
198
+ session: Database session to use for the query
199
+ file_paths: List of file paths to query
200
+
201
+ Returns:
202
+ List of (file_path, checksum) tuples for matching entities
203
+ """
204
+ if not file_paths: # pragma: no cover
205
+ return [] # pragma: no cover
206
+
207
+ # Convert all paths to POSIX strings for consistent comparison
208
+ posix_paths = [Path(fp).as_posix() for fp in file_paths] # pragma: no cover
209
+
210
+ # Query ONLY file_path and checksum columns (not full Entity objects)
211
+ query = select(Entity.file_path, Entity.checksum).where( # pragma: no cover
212
+ Entity.file_path.in_(posix_paths)
213
+ )
214
+ query = self._add_project_filter(query) # pragma: no cover
215
+
216
+ result = await session.execute(query) # pragma: no cover
217
+ return list(result.all()) # pragma: no cover
218
+
219
+ async def find_by_checksum(self, checksum: str) -> Sequence[Entity]:
220
+ """Find entities with the given checksum.
221
+
222
+ Used for move detection - finds entities that may have been moved to a new path.
223
+ Multiple entities may have the same checksum if files were copied.
224
+
225
+ Args:
226
+ checksum: File content checksum to search for
227
+
228
+ Returns:
229
+ Sequence of entities with matching checksum (may be empty)
230
+ """
231
+ query = self.select().where(Entity.checksum == checksum)
232
+ # Don't load relationships for move detection - we only need file_path and checksum
233
+ result = await self.execute_query(query, use_query_options=False)
234
+ return list(result.scalars().all())
235
+
236
+ async def find_by_checksums(self, checksums: Sequence[str]) -> Sequence[Entity]:
237
+ """Find entities with any of the given checksums (batch query for move detection).
238
+
239
+ This is a batch-optimized version of find_by_checksum() that queries multiple checksums
240
+ in a single database query. Used for efficient move detection in cloud indexing.
241
+
242
+ Performance: For 1000 new files, this makes 1 query vs 1000 individual queries (~100x faster).
243
+
244
+ Example:
245
+ When processing new files, we check if any are actually moved files by finding
246
+ entities with matching checksums at different paths.
247
+
248
+ Args:
249
+ checksums: List of file content checksums to search for
250
+
251
+ Returns:
252
+ Sequence of entities with matching checksums (may be empty).
253
+ Multiple entities may have the same checksum if files were copied.
254
+ """
255
+ if not checksums: # pragma: no cover
256
+ return [] # pragma: no cover
257
+
258
+ # Query: SELECT * FROM entities WHERE checksum IN (checksum1, checksum2, ...)
259
+ query = self.select().where(Entity.checksum.in_(checksums)) # pragma: no cover
260
+ # Don't load relationships for move detection - we only need file_path and checksum
261
+ result = await self.execute_query(query, use_query_options=False) # pragma: no cover
262
+ return list(result.scalars().all()) # pragma: no cover
263
+
56
264
  async def delete_by_file_path(self, file_path: Union[Path, str]) -> bool:
57
265
  """Delete entity with the provided file_path.
58
266
 
59
267
  Args:
60
268
  file_path: Path to the entity file (will be converted to string internally)
61
269
  """
62
- return await self.delete_by_fields(file_path=str(file_path))
270
+ return await self.delete_by_fields(file_path=Path(file_path).as_posix())
63
271
 
64
272
  def get_load_options(self) -> List[LoaderOption]:
65
273
  """Get SQLAlchemy loader options for eager loading relationships."""
@@ -90,3 +298,224 @@ class EntityRepository(Repository[Entity]):
90
298
 
91
299
  result = await self.execute_query(query)
92
300
  return list(result.scalars().all())
301
+
302
+ async def upsert_entity(self, entity: Entity) -> Entity:
303
+ """Insert or update entity using simple try/catch with database-level conflict resolution.
304
+
305
+ Handles file_path race conditions by checking for existing entity on IntegrityError.
306
+ For permalink conflicts, generates a unique permalink with numeric suffix.
307
+
308
+ Args:
309
+ entity: The entity to insert or update
310
+
311
+ Returns:
312
+ The inserted or updated entity
313
+ """
314
+ async with db.scoped_session(self.session_maker) as session:
315
+ # Set project_id if applicable and not already set
316
+ self._set_project_id_if_needed(entity)
317
+
318
+ # Try simple insert first
319
+ try:
320
+ session.add(entity)
321
+ await session.flush()
322
+
323
+ # Return with relationships loaded
324
+ query = (
325
+ self.select()
326
+ .where(Entity.file_path == entity.file_path)
327
+ .options(*self.get_load_options())
328
+ )
329
+ result = await session.execute(query)
330
+ found = result.scalar_one_or_none()
331
+ if not found: # pragma: no cover
332
+ raise RuntimeError(
333
+ f"Failed to retrieve entity after insert: {entity.file_path}"
334
+ )
335
+ return found
336
+
337
+ except IntegrityError as e:
338
+ # Check if this is a FOREIGN KEY constraint failure
339
+ # SQLite: "FOREIGN KEY constraint failed"
340
+ # Postgres: "violates foreign key constraint"
341
+ error_str = str(e)
342
+ if (
343
+ "FOREIGN KEY constraint failed" in error_str
344
+ or "violates foreign key constraint" in error_str
345
+ ):
346
+ # Import locally to avoid circular dependency (repository -> services -> repository)
347
+ from basic_memory.services.exceptions import SyncFatalError
348
+
349
+ # Project doesn't exist in database - this is a fatal sync error
350
+ raise SyncFatalError(
351
+ f"Cannot sync file '{entity.file_path}': "
352
+ f"project_id={entity.project_id} does not exist in database. "
353
+ f"The project may have been deleted. This sync will be terminated."
354
+ ) from e
355
+
356
+ await session.rollback()
357
+
358
+ # Re-query after rollback to get a fresh, attached entity
359
+ existing_result = await session.execute(
360
+ select(Entity)
361
+ .where(
362
+ Entity.file_path == entity.file_path, Entity.project_id == entity.project_id
363
+ )
364
+ .options(*self.get_load_options())
365
+ )
366
+ existing_entity = existing_result.scalar_one_or_none()
367
+
368
+ if existing_entity:
369
+ # File path conflict - update the existing entity
370
+ logger.debug(
371
+ f"Resolving file_path conflict for {entity.file_path}, "
372
+ f"entity_id={existing_entity.id}, observations={len(entity.observations)}"
373
+ )
374
+ # Use merge to avoid session state conflicts
375
+ # Set the ID to update existing entity
376
+ entity.id = existing_entity.id
377
+
378
+ # Ensure observations reference the correct entity_id
379
+ for obs in entity.observations:
380
+ obs.entity_id = existing_entity.id
381
+ # Clear any existing ID to force INSERT as new observation
382
+ obs.id = None
383
+
384
+ # Merge the entity which will update the existing one
385
+ merged_entity = await session.merge(entity)
386
+
387
+ await session.commit()
388
+
389
+ # Re-query to get proper relationships loaded
390
+ final_result = await session.execute(
391
+ select(Entity)
392
+ .where(Entity.id == merged_entity.id)
393
+ .options(*self.get_load_options())
394
+ )
395
+ return final_result.scalar_one()
396
+
397
+ else:
398
+ # No file_path conflict - must be permalink conflict
399
+ # Generate unique permalink and retry
400
+ entity = await self._handle_permalink_conflict(entity, session)
401
+ return entity
402
+
403
+ async def get_all_file_paths(self) -> List[str]:
404
+ """Get all file paths for this project - optimized for deletion detection.
405
+
406
+ Returns only file_path strings without loading entities or relationships.
407
+ Used by streaming sync to detect deleted files efficiently.
408
+
409
+ Returns:
410
+ List of file_path strings for all entities in the project
411
+ """
412
+ query = select(Entity.file_path)
413
+ query = self._add_project_filter(query)
414
+
415
+ result = await self.execute_query(query, use_query_options=False)
416
+ return list(result.scalars().all())
417
+
418
+ async def get_distinct_directories(self) -> List[str]:
419
+ """Extract unique directory paths from file_path column.
420
+
421
+ Optimized method for getting directory structure without loading full entities
422
+ or relationships. Returns a sorted list of unique directory paths.
423
+
424
+ Returns:
425
+ List of unique directory paths (e.g., ["notes", "notes/meetings", "specs"])
426
+ """
427
+ # Query only file_path column, no entity objects or relationships
428
+ query = select(Entity.file_path).distinct()
429
+ query = self._add_project_filter(query)
430
+
431
+ # Execute with use_query_options=False to skip eager loading
432
+ result = await self.execute_query(query, use_query_options=False)
433
+ file_paths = [row for row in result.scalars().all()]
434
+
435
+ # Parse file paths to extract unique directories
436
+ directories = set()
437
+ for file_path in file_paths:
438
+ parts = [p for p in file_path.split("/") if p]
439
+ # Add all parent directories (exclude filename which is the last part)
440
+ for i in range(len(parts) - 1):
441
+ dir_path = "/".join(parts[: i + 1])
442
+ directories.add(dir_path)
443
+
444
+ return sorted(directories)
445
+
446
+ async def find_by_directory_prefix(self, directory_prefix: str) -> Sequence[Entity]:
447
+ """Find entities whose file_path starts with the given directory prefix.
448
+
449
+ Optimized method for listing directory contents without loading all entities.
450
+ Uses SQL LIKE pattern matching to filter entities by directory path.
451
+
452
+ Args:
453
+ directory_prefix: Directory path prefix (e.g., "docs", "docs/guides")
454
+ Empty string returns all entities (root directory)
455
+
456
+ Returns:
457
+ Sequence of entities in the specified directory and subdirectories
458
+ """
459
+ # Build SQL LIKE pattern
460
+ if directory_prefix == "" or directory_prefix == "/":
461
+ # Root directory - return all entities
462
+ return await self.find_all()
463
+
464
+ # Remove leading/trailing slashes for consistency
465
+ directory_prefix = directory_prefix.strip("/")
466
+
467
+ # Query entities with file_path starting with prefix
468
+ # Pattern matches "prefix/" to ensure we get files IN the directory,
469
+ # not just files whose names start with the prefix
470
+ pattern = f"{directory_prefix}/%"
471
+
472
+ query = self.select().where(Entity.file_path.like(pattern))
473
+
474
+ # Skip eager loading - we only need basic entity fields for directory trees
475
+ result = await self.execute_query(query, use_query_options=False)
476
+ return list(result.scalars().all())
477
+
478
+ async def _handle_permalink_conflict(self, entity: Entity, session: AsyncSession) -> Entity:
479
+ """Handle permalink conflicts by generating a unique permalink."""
480
+ base_permalink = entity.permalink
481
+ suffix = 1
482
+
483
+ # Find a unique permalink
484
+ while True:
485
+ test_permalink = f"{base_permalink}-{suffix}"
486
+ existing = await session.execute(
487
+ select(Entity).where(
488
+ Entity.permalink == test_permalink, Entity.project_id == entity.project_id
489
+ )
490
+ )
491
+ if existing.scalar_one_or_none() is None:
492
+ # Found unique permalink
493
+ entity.permalink = test_permalink
494
+ break
495
+ suffix += 1
496
+
497
+ # Insert with unique permalink
498
+ session.add(entity)
499
+ try:
500
+ await session.flush()
501
+ except IntegrityError as e: # pragma: no cover
502
+ # Check if this is a FOREIGN KEY constraint failure
503
+ # SQLite: "FOREIGN KEY constraint failed"
504
+ # Postgres: "violates foreign key constraint"
505
+ error_str = str(e)
506
+ if (
507
+ "FOREIGN KEY constraint failed" in error_str
508
+ or "violates foreign key constraint" in error_str
509
+ ):
510
+ # Import locally to avoid circular dependency (repository -> services -> repository)
511
+ from basic_memory.services.exceptions import SyncFatalError
512
+
513
+ # Project doesn't exist in database - this is a fatal sync error
514
+ raise SyncFatalError( # pragma: no cover
515
+ f"Cannot sync file '{entity.file_path}': "
516
+ f"project_id={entity.project_id} does not exist in database. "
517
+ f"The project may have been deleted. This sync will be terminated."
518
+ ) from e
519
+ # Re-raise if not a foreign key error
520
+ raise # pragma: no cover
521
+ return entity
@@ -1,6 +1,7 @@
1
1
  """Repository for managing Observation objects."""
2
2
 
3
- from typing import Sequence
3
+ from typing import Dict, List, Sequence
4
+
4
5
 
5
6
  from sqlalchemy import select
6
7
  from sqlalchemy.ext.asyncio import async_sessionmaker
@@ -12,8 +13,14 @@ from basic_memory.repository.repository import Repository
12
13
  class ObservationRepository(Repository[Observation]):
13
14
  """Repository for Observation model with memory-specific operations."""
14
15
 
15
- def __init__(self, session_maker: async_sessionmaker):
16
- super().__init__(session_maker, Observation)
16
+ def __init__(self, session_maker: async_sessionmaker, project_id: int):
17
+ """Initialize with session maker and project_id filter.
18
+
19
+ Args:
20
+ session_maker: SQLAlchemy session maker
21
+ project_id: Project ID to filter all operations by
22
+ """
23
+ super().__init__(session_maker, Observation, project_id=project_id)
17
24
 
18
25
  async def find_by_entity(self, entity_id: int) -> Sequence[Observation]:
19
26
  """Find all observations for a specific entity."""
@@ -38,3 +45,29 @@ class ObservationRepository(Repository[Observation]):
38
45
  query = select(Observation.category).distinct()
39
46
  result = await self.execute_query(query, use_query_options=False)
40
47
  return result.scalars().all()
48
+
49
+ async def find_by_entities(self, entity_ids: List[int]) -> Dict[int, List[Observation]]:
50
+ """Find all observations for multiple entities in a single query.
51
+
52
+ Args:
53
+ entity_ids: List of entity IDs to fetch observations for
54
+
55
+ Returns:
56
+ Dictionary mapping entity_id to list of observations
57
+ """
58
+ if not entity_ids: # pragma: no cover
59
+ return {}
60
+
61
+ # Query observations for all entities in the list
62
+ query = select(Observation).filter(Observation.entity_id.in_(entity_ids))
63
+ result = await self.execute_query(query)
64
+ observations = result.scalars().all()
65
+
66
+ # Group observations by entity_id
67
+ observations_by_entity = {}
68
+ for obs in observations:
69
+ if obs.entity_id not in observations_by_entity:
70
+ observations_by_entity[obs.entity_id] = []
71
+ observations_by_entity[obs.entity_id].append(obs)
72
+
73
+ return observations_by_entity