basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (195) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +130 -20
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  6. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  7. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  8. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  9. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  10. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  11. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  12. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  13. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  14. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  15. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  16. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  17. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  18. basic_memory/api/app.py +87 -20
  19. basic_memory/api/container.py +133 -0
  20. basic_memory/api/routers/__init__.py +4 -1
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +180 -23
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +9 -64
  26. basic_memory/api/routers/project_router.py +460 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +136 -11
  29. basic_memory/api/routers/search_router.py +5 -5
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +181 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +359 -0
  39. basic_memory/api/v2/routers/prompt_router.py +269 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/app.py +80 -10
  43. basic_memory/cli/auth.py +300 -0
  44. basic_memory/cli/commands/__init__.py +15 -2
  45. basic_memory/cli/commands/cloud/__init__.py +6 -0
  46. basic_memory/cli/commands/cloud/api_client.py +127 -0
  47. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  48. basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
  49. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  50. basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
  51. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  52. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  53. basic_memory/cli/commands/cloud/upload.py +240 -0
  54. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  55. basic_memory/cli/commands/command_utils.py +99 -0
  56. basic_memory/cli/commands/db.py +87 -12
  57. basic_memory/cli/commands/format.py +198 -0
  58. basic_memory/cli/commands/import_chatgpt.py +47 -223
  59. basic_memory/cli/commands/import_claude_conversations.py +48 -171
  60. basic_memory/cli/commands/import_claude_projects.py +53 -160
  61. basic_memory/cli/commands/import_memory_json.py +55 -111
  62. basic_memory/cli/commands/mcp.py +67 -11
  63. basic_memory/cli/commands/project.py +889 -0
  64. basic_memory/cli/commands/status.py +52 -34
  65. basic_memory/cli/commands/telemetry.py +81 -0
  66. basic_memory/cli/commands/tool.py +341 -0
  67. basic_memory/cli/container.py +84 -0
  68. basic_memory/cli/main.py +14 -6
  69. basic_memory/config.py +580 -26
  70. basic_memory/db.py +285 -28
  71. basic_memory/deps/__init__.py +293 -0
  72. basic_memory/deps/config.py +26 -0
  73. basic_memory/deps/db.py +56 -0
  74. basic_memory/deps/importers.py +200 -0
  75. basic_memory/deps/projects.py +238 -0
  76. basic_memory/deps/repositories.py +179 -0
  77. basic_memory/deps/services.py +480 -0
  78. basic_memory/deps.py +16 -185
  79. basic_memory/file_utils.py +318 -54
  80. basic_memory/ignore_utils.py +297 -0
  81. basic_memory/importers/__init__.py +27 -0
  82. basic_memory/importers/base.py +100 -0
  83. basic_memory/importers/chatgpt_importer.py +245 -0
  84. basic_memory/importers/claude_conversations_importer.py +192 -0
  85. basic_memory/importers/claude_projects_importer.py +184 -0
  86. basic_memory/importers/memory_json_importer.py +128 -0
  87. basic_memory/importers/utils.py +61 -0
  88. basic_memory/markdown/entity_parser.py +182 -23
  89. basic_memory/markdown/markdown_processor.py +70 -7
  90. basic_memory/markdown/plugins.py +43 -23
  91. basic_memory/markdown/schemas.py +1 -1
  92. basic_memory/markdown/utils.py +38 -14
  93. basic_memory/mcp/async_client.py +135 -4
  94. basic_memory/mcp/clients/__init__.py +28 -0
  95. basic_memory/mcp/clients/directory.py +70 -0
  96. basic_memory/mcp/clients/knowledge.py +176 -0
  97. basic_memory/mcp/clients/memory.py +120 -0
  98. basic_memory/mcp/clients/project.py +89 -0
  99. basic_memory/mcp/clients/resource.py +71 -0
  100. basic_memory/mcp/clients/search.py +65 -0
  101. basic_memory/mcp/container.py +110 -0
  102. basic_memory/mcp/project_context.py +155 -0
  103. basic_memory/mcp/prompts/__init__.py +19 -0
  104. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  105. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  106. basic_memory/mcp/prompts/recent_activity.py +188 -0
  107. basic_memory/mcp/prompts/search.py +57 -0
  108. basic_memory/mcp/prompts/utils.py +162 -0
  109. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  110. basic_memory/mcp/resources/project_info.py +71 -0
  111. basic_memory/mcp/server.py +61 -9
  112. basic_memory/mcp/tools/__init__.py +33 -21
  113. basic_memory/mcp/tools/build_context.py +120 -0
  114. basic_memory/mcp/tools/canvas.py +152 -0
  115. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  116. basic_memory/mcp/tools/delete_note.py +249 -0
  117. basic_memory/mcp/tools/edit_note.py +325 -0
  118. basic_memory/mcp/tools/list_directory.py +157 -0
  119. basic_memory/mcp/tools/move_note.py +549 -0
  120. basic_memory/mcp/tools/project_management.py +204 -0
  121. basic_memory/mcp/tools/read_content.py +281 -0
  122. basic_memory/mcp/tools/read_note.py +265 -0
  123. basic_memory/mcp/tools/recent_activity.py +528 -0
  124. basic_memory/mcp/tools/search.py +377 -24
  125. basic_memory/mcp/tools/utils.py +402 -16
  126. basic_memory/mcp/tools/view_note.py +78 -0
  127. basic_memory/mcp/tools/write_note.py +230 -0
  128. basic_memory/models/__init__.py +3 -2
  129. basic_memory/models/knowledge.py +82 -17
  130. basic_memory/models/project.py +93 -0
  131. basic_memory/models/search.py +68 -8
  132. basic_memory/project_resolver.py +222 -0
  133. basic_memory/repository/__init__.py +2 -0
  134. basic_memory/repository/entity_repository.py +437 -8
  135. basic_memory/repository/observation_repository.py +36 -3
  136. basic_memory/repository/postgres_search_repository.py +451 -0
  137. basic_memory/repository/project_info_repository.py +10 -0
  138. basic_memory/repository/project_repository.py +140 -0
  139. basic_memory/repository/relation_repository.py +79 -4
  140. basic_memory/repository/repository.py +148 -29
  141. basic_memory/repository/search_index_row.py +95 -0
  142. basic_memory/repository/search_repository.py +79 -268
  143. basic_memory/repository/search_repository_base.py +241 -0
  144. basic_memory/repository/sqlite_search_repository.py +437 -0
  145. basic_memory/runtime.py +61 -0
  146. basic_memory/schemas/__init__.py +22 -9
  147. basic_memory/schemas/base.py +131 -12
  148. basic_memory/schemas/cloud.py +50 -0
  149. basic_memory/schemas/directory.py +31 -0
  150. basic_memory/schemas/importer.py +35 -0
  151. basic_memory/schemas/memory.py +194 -25
  152. basic_memory/schemas/project_info.py +213 -0
  153. basic_memory/schemas/prompt.py +90 -0
  154. basic_memory/schemas/request.py +56 -2
  155. basic_memory/schemas/response.py +85 -28
  156. basic_memory/schemas/search.py +36 -35
  157. basic_memory/schemas/sync_report.py +72 -0
  158. basic_memory/schemas/v2/__init__.py +27 -0
  159. basic_memory/schemas/v2/entity.py +133 -0
  160. basic_memory/schemas/v2/resource.py +47 -0
  161. basic_memory/services/__init__.py +2 -1
  162. basic_memory/services/context_service.py +451 -138
  163. basic_memory/services/directory_service.py +310 -0
  164. basic_memory/services/entity_service.py +636 -71
  165. basic_memory/services/exceptions.py +21 -0
  166. basic_memory/services/file_service.py +402 -33
  167. basic_memory/services/initialization.py +216 -0
  168. basic_memory/services/link_resolver.py +50 -56
  169. basic_memory/services/project_service.py +888 -0
  170. basic_memory/services/search_service.py +232 -37
  171. basic_memory/sync/__init__.py +4 -2
  172. basic_memory/sync/background_sync.py +26 -0
  173. basic_memory/sync/coordinator.py +160 -0
  174. basic_memory/sync/sync_service.py +1200 -109
  175. basic_memory/sync/watch_service.py +432 -135
  176. basic_memory/telemetry.py +249 -0
  177. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  178. basic_memory/templates/prompts/search.hbs +101 -0
  179. basic_memory/utils.py +407 -54
  180. basic_memory-0.17.4.dist-info/METADATA +617 -0
  181. basic_memory-0.17.4.dist-info/RECORD +193 -0
  182. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  183. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
  184. basic_memory/alembic/README +0 -1
  185. basic_memory/cli/commands/sync.py +0 -206
  186. basic_memory/cli/commands/tools.py +0 -157
  187. basic_memory/mcp/tools/knowledge.py +0 -68
  188. basic_memory/mcp/tools/memory.py +0 -170
  189. basic_memory/mcp/tools/notes.py +0 -202
  190. basic_memory/schemas/discovery.py +0 -28
  191. basic_memory/sync/file_change_scanner.py +0 -158
  192. basic_memory/sync/utils.py +0 -31
  193. basic_memory-0.7.0.dist-info/METADATA +0 -378
  194. basic_memory-0.7.0.dist-info/RECORD +0 -82
  195. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,14 @@
1
1
  """Service for search operations."""
2
2
 
3
+ import ast
3
4
  from datetime import datetime
4
5
  from typing import List, Optional, Set
5
6
 
7
+
8
+ from dateparser import parse
6
9
  from fastapi import BackgroundTasks
7
10
  from loguru import logger
11
+ from sqlalchemy import text
8
12
 
9
13
  from basic_memory.models import Entity
10
14
  from basic_memory.repository import EntityRepository
@@ -12,6 +16,21 @@ from basic_memory.repository.search_repository import SearchRepository, SearchIn
12
16
  from basic_memory.schemas.search import SearchQuery, SearchItemType
13
17
  from basic_memory.services import FileService
14
18
 
19
+ # Maximum size for content_stems field to stay under Postgres's 8KB index row limit.
20
+ # We use 6000 characters to leave headroom for other indexed columns and overhead.
21
+ MAX_CONTENT_STEMS_SIZE = 6000
22
+
23
+
24
+ def _mtime_to_datetime(entity: Entity) -> datetime:
25
+ """Convert entity mtime (file modification time) to datetime.
26
+
27
+ Returns the file's actual modification time, falling back to updated_at
28
+ if mtime is not available.
29
+ """
30
+ if entity.mtime:
31
+ return datetime.fromtimestamp(entity.mtime).astimezone()
32
+ return entity.updated_at
33
+
15
34
 
16
35
  class SearchService:
17
36
  """Service for search operations.
@@ -38,9 +57,10 @@ class SearchService:
38
57
 
39
58
  async def reindex_all(self, background_tasks: Optional[BackgroundTasks] = None) -> None:
40
59
  """Reindex all content from database."""
41
- logger.info("Starting full reindex")
42
60
 
61
+ logger.info("Starting full reindex")
43
62
  # Clear and recreate search index
63
+ await self.repository.execute_query(text("DROP TABLE IF EXISTS search_index"), params={})
44
64
  await self.init_search_index()
45
65
 
46
66
  # Reindex all entities
@@ -63,13 +83,13 @@ class SearchService:
63
83
  logger.debug("no criteria passed to query")
64
84
  return []
65
85
 
66
- logger.debug(f"Searching with query: {query}")
86
+ logger.trace(f"Searching with query: {query}")
67
87
 
68
88
  after_date = (
69
89
  (
70
90
  query.after_date
71
91
  if isinstance(query.after_date, datetime)
72
- else datetime.fromisoformat(query.after_date)
92
+ else parse(query.after_date)
73
93
  )
74
94
  if query.after_date
75
95
  else None
@@ -82,7 +102,7 @@ class SearchService:
82
102
  permalink_match=query.permalink_match,
83
103
  title=query.title,
84
104
  types=query.types,
85
- entity_types=query.entity_types,
105
+ search_item_types=query.entity_types,
86
106
  after_date=after_date,
87
107
  limit=limit,
88
108
  offset=offset,
@@ -109,84 +129,216 @@ class SearchService:
109
129
  # Add word boundaries
110
130
  variants.update(w.strip() for w in text.lower().split() if w.strip())
111
131
 
112
- # Add trigrams for fuzzy matching
113
- variants.update(text[i : i + 3].lower() for i in range(len(text) - 2))
132
+ # Trigrams disabled: They create massive search index bloat, increasing DB size significantly
133
+ # and slowing down indexing performance. FTS5 search works well without them.
134
+ # See: https://github.com/basicmachines-co/basic-memory/issues/351
135
+ # variants.update(text[i : i + 3].lower() for i in range(len(text) - 2))
114
136
 
115
137
  return variants
116
138
 
139
+ def _extract_entity_tags(self, entity: Entity) -> List[str]:
140
+ """Extract tags from entity metadata for search indexing.
141
+
142
+ Handles multiple tag formats:
143
+ - List format: ["tag1", "tag2"]
144
+ - String format: "['tag1', 'tag2']" or "[tag1, tag2]"
145
+ - Empty: [] or "[]"
146
+
147
+ Returns a list of tag strings for search indexing.
148
+ """
149
+ if not entity.entity_metadata or "tags" not in entity.entity_metadata:
150
+ return []
151
+
152
+ tags = entity.entity_metadata["tags"]
153
+
154
+ # Handle list format (preferred)
155
+ if isinstance(tags, list):
156
+ return [str(tag) for tag in tags if tag]
157
+
158
+ # Handle string format (legacy)
159
+ if isinstance(tags, str):
160
+ try:
161
+ # Parse string representation of list
162
+ parsed_tags = ast.literal_eval(tags)
163
+ if isinstance(parsed_tags, list):
164
+ return [str(tag) for tag in parsed_tags if tag]
165
+ except (ValueError, SyntaxError):
166
+ # If parsing fails, treat as single tag
167
+ return [tags] if tags.strip() else []
168
+
169
+ return [] # pragma: no cover
170
+
117
171
  async def index_entity(
118
172
  self,
119
173
  entity: Entity,
120
174
  background_tasks: Optional[BackgroundTasks] = None,
175
+ content: str | None = None,
176
+ ) -> None:
177
+ if background_tasks:
178
+ background_tasks.add_task(self.index_entity_data, entity, content)
179
+ else:
180
+ await self.index_entity_data(entity, content)
181
+
182
+ async def index_entity_data(
183
+ self,
184
+ entity: Entity,
185
+ content: str | None = None,
186
+ ) -> None:
187
+ logger.info(
188
+ f"[BackgroundTask] Starting search index for entity_id={entity.id} "
189
+ f"permalink={entity.permalink} project_id={entity.project_id}"
190
+ )
191
+ try:
192
+ # delete all search index data associated with entity
193
+ await self.repository.delete_by_entity_id(entity_id=entity.id)
194
+
195
+ # reindex
196
+ await self.index_entity_markdown(
197
+ entity, content
198
+ ) if entity.is_markdown else await self.index_entity_file(entity)
199
+
200
+ logger.info(
201
+ f"[BackgroundTask] Completed search index for entity_id={entity.id} "
202
+ f"permalink={entity.permalink}"
203
+ )
204
+ except Exception as e: # pragma: no cover
205
+ # Background task failure logging; exceptions are re-raised.
206
+ # Avoid forcing synthetic failures just for line coverage.
207
+ logger.error( # pragma: no cover
208
+ f"[BackgroundTask] Failed search index for entity_id={entity.id} "
209
+ f"permalink={entity.permalink} error={e}"
210
+ )
211
+ raise # pragma: no cover
212
+
213
+ async def index_entity_file(
214
+ self,
215
+ entity: Entity,
216
+ ) -> None:
217
+ # Index entity file with no content
218
+ await self.repository.index_item(
219
+ SearchIndexRow(
220
+ id=entity.id,
221
+ entity_id=entity.id,
222
+ type=SearchItemType.ENTITY.value,
223
+ title=entity.title,
224
+ permalink=entity.permalink, # Required for Postgres NOT NULL constraint
225
+ file_path=entity.file_path,
226
+ metadata={
227
+ "entity_type": entity.entity_type,
228
+ },
229
+ created_at=entity.created_at,
230
+ updated_at=_mtime_to_datetime(entity),
231
+ project_id=entity.project_id,
232
+ )
233
+ )
234
+
235
+ async def index_entity_markdown(
236
+ self,
237
+ entity: Entity,
238
+ content: str | None = None,
121
239
  ) -> None:
122
240
  """Index an entity and all its observations and relations.
123
241
 
242
+ Args:
243
+ entity: The entity to index
244
+ content: Optional pre-loaded content (avoids file read). If None, will read from file.
245
+
124
246
  Indexing structure:
125
247
  1. Entities
126
248
  - permalink: direct from entity (e.g., "specs/search")
127
249
  - file_path: physical file location
250
+ - project_id: project context for isolation
128
251
 
129
252
  2. Observations
130
253
  - permalink: entity permalink + /observations/id (e.g., "specs/search/observations/123")
131
254
  - file_path: parent entity's file (where observation is defined)
255
+ - project_id: inherited from parent entity
132
256
 
133
257
  3. Relations (only index outgoing relations defined in this file)
134
258
  - permalink: from_entity/relation_type/to_entity (e.g., "specs/search/implements/features/search-ui")
135
259
  - file_path: source entity's file (where relation is defined)
260
+ - project_id: inherited from source entity
136
261
 
137
262
  Each type gets its own row in the search index with appropriate metadata.
263
+ The project_id is automatically added by the repository when indexing.
138
264
  """
139
- if background_tasks:
140
- background_tasks.add_task(self.index_entity_data, entity)
141
- else:
142
- await self.index_entity_data(entity)
143
265
 
144
- async def index_entity_data(
145
- self,
146
- entity: Entity,
147
- ) -> None:
148
- """Actually perform the indexing."""
266
+ # Collect all search index rows to batch insert at the end
267
+ rows_to_index = []
149
268
 
150
- content_parts = []
269
+ content_stems = []
270
+ content_snippet = ""
151
271
  title_variants = self._generate_variants(entity.title)
152
- content_parts.extend(title_variants)
272
+ content_stems.extend(title_variants)
153
273
 
154
- content = await self.file_service.read_entity_content(entity)
274
+ # Use provided content or read from file
275
+ if content is None:
276
+ content = await self.file_service.read_entity_content(entity)
155
277
  if content:
156
- content_parts.append(content)
278
+ content_stems.append(content)
279
+ content_snippet = f"{content[:250]}"
157
280
 
158
- content_parts.extend(self._generate_variants(entity.permalink))
159
- content_parts.extend(self._generate_variants(entity.file_path))
281
+ if entity.permalink:
282
+ content_stems.extend(self._generate_variants(entity.permalink))
160
283
 
161
- entity_content = "\n".join(p for p in content_parts if p and p.strip())
284
+ content_stems.extend(self._generate_variants(entity.file_path))
162
285
 
163
- # Index entity
164
- await self.repository.index_item(
286
+ # Add entity tags from frontmatter to search content
287
+ entity_tags = self._extract_entity_tags(entity)
288
+ if entity_tags:
289
+ content_stems.extend(entity_tags)
290
+
291
+ entity_content_stems = "\n".join(p for p in content_stems if p and p.strip())
292
+
293
+ # Truncate to stay under Postgres's 8KB index row limit
294
+ if len(entity_content_stems) > MAX_CONTENT_STEMS_SIZE: # pragma: no cover
295
+ entity_content_stems = entity_content_stems[:MAX_CONTENT_STEMS_SIZE] # pragma: no cover
296
+
297
+ # Add entity row
298
+ rows_to_index.append(
165
299
  SearchIndexRow(
166
300
  id=entity.id,
167
301
  type=SearchItemType.ENTITY.value,
168
302
  title=entity.title,
169
- content=entity_content,
303
+ content_stems=entity_content_stems,
304
+ content_snippet=content_snippet,
170
305
  permalink=entity.permalink,
171
306
  file_path=entity.file_path,
307
+ entity_id=entity.id,
172
308
  metadata={
173
309
  "entity_type": entity.entity_type,
174
310
  },
175
311
  created_at=entity.created_at,
176
- updated_at=entity.updated_at,
312
+ updated_at=_mtime_to_datetime(entity),
313
+ project_id=entity.project_id,
177
314
  )
178
315
  )
179
316
 
180
- # Index each observation with permalink
317
+ # Add observation rows - dedupe by permalink to avoid unique constraint violations
318
+ # Two observations with same entity/category/content generate identical permalinks
319
+ seen_permalinks: set[str] = {entity.permalink} if entity.permalink else set()
181
320
  for obs in entity.observations:
321
+ obs_permalink = obs.permalink
322
+ if obs_permalink in seen_permalinks:
323
+ logger.debug(f"Skipping duplicate observation permalink: {obs_permalink}")
324
+ continue
325
+ seen_permalinks.add(obs_permalink)
326
+
182
327
  # Index with parent entity's file path since that's where it's defined
183
- await self.repository.index_item(
328
+ obs_content_stems = "\n".join(
329
+ p for p in self._generate_variants(obs.content) if p and p.strip()
330
+ )
331
+ # Truncate to stay under Postgres's 8KB index row limit
332
+ if len(obs_content_stems) > MAX_CONTENT_STEMS_SIZE: # pragma: no cover
333
+ obs_content_stems = obs_content_stems[:MAX_CONTENT_STEMS_SIZE] # pragma: no cover
334
+ rows_to_index.append(
184
335
  SearchIndexRow(
185
336
  id=obs.id,
186
337
  type=SearchItemType.OBSERVATION.value,
187
- title=f"{obs.category}: {obs.content[:50]}...",
188
- content=obs.content,
189
- permalink=obs.permalink,
338
+ title=f"{obs.category}: {obs.content[:100]}...",
339
+ content_stems=obs_content_stems,
340
+ content_snippet=obs.content,
341
+ permalink=obs_permalink,
190
342
  file_path=entity.file_path,
191
343
  category=obs.category,
192
344
  entity_id=entity.id,
@@ -194,11 +346,12 @@ class SearchService:
194
346
  "tags": obs.tags,
195
347
  },
196
348
  created_at=entity.created_at,
197
- updated_at=entity.updated_at,
349
+ updated_at=_mtime_to_datetime(entity),
350
+ project_id=entity.project_id,
198
351
  )
199
352
  )
200
353
 
201
- # Only index outgoing relations (ones defined in this file)
354
+ # Add relation rows (only outgoing relations defined in this file)
202
355
  for rel in entity.outgoing_relations:
203
356
  # Create descriptive title showing the relationship
204
357
  relation_title = (
@@ -207,21 +360,63 @@ class SearchService:
207
360
  else f"{rel.from_entity.title}"
208
361
  )
209
362
 
210
- await self.repository.index_item(
363
+ rel_content_stems = "\n".join(
364
+ p for p in self._generate_variants(relation_title) if p and p.strip()
365
+ )
366
+ rows_to_index.append(
211
367
  SearchIndexRow(
212
368
  id=rel.id,
213
369
  title=relation_title,
214
370
  permalink=rel.permalink,
371
+ content_stems=rel_content_stems,
215
372
  file_path=entity.file_path,
216
373
  type=SearchItemType.RELATION.value,
374
+ entity_id=entity.id,
217
375
  from_id=rel.from_id,
218
376
  to_id=rel.to_id,
219
377
  relation_type=rel.relation_type,
220
378
  created_at=entity.created_at,
221
- updated_at=entity.updated_at,
379
+ updated_at=_mtime_to_datetime(entity),
380
+ project_id=entity.project_id,
222
381
  )
223
382
  )
224
383
 
225
- async def delete_by_permalink(self, path_id: str):
384
+ # Batch insert all rows at once
385
+ await self.repository.bulk_index_items(rows_to_index)
386
+
387
+ async def delete_by_permalink(self, permalink: str):
388
+ """Delete an item from the search index."""
389
+ await self.repository.delete_by_permalink(permalink)
390
+
391
+ async def delete_by_entity_id(self, entity_id: int):
226
392
  """Delete an item from the search index."""
227
- await self.repository.delete_by_permalink(path_id)
393
+ await self.repository.delete_by_entity_id(entity_id)
394
+
395
+ async def handle_delete(self, entity: Entity):
396
+ """Handle complete entity deletion from search index including observations and relations.
397
+
398
+ This replicates the logic from sync_service.handle_delete() to properly clean up
399
+ all search index entries for an entity and its related data.
400
+ """
401
+ logger.debug(
402
+ f"Cleaning up search index for entity_id={entity.id}, file_path={entity.file_path}, "
403
+ f"observations={len(entity.observations)}, relations={len(entity.outgoing_relations)}"
404
+ )
405
+
406
+ # Clean up search index - same logic as sync_service.handle_delete()
407
+ permalinks = (
408
+ [entity.permalink]
409
+ + [o.permalink for o in entity.observations]
410
+ + [r.permalink for r in entity.outgoing_relations]
411
+ )
412
+
413
+ logger.debug(
414
+ f"Deleting search index entries for entity_id={entity.id}, "
415
+ f"index_entries={len(permalinks)}"
416
+ )
417
+
418
+ for permalink in permalinks:
419
+ if permalink:
420
+ await self.delete_by_permalink(permalink)
421
+ else:
422
+ await self.delete_by_entity_id(entity.id)
@@ -1,5 +1,7 @@
1
- from .file_change_scanner import FileChangeScanner
1
+ """Basic Memory sync services."""
2
+
3
+ from .coordinator import SyncCoordinator, SyncStatus
2
4
  from .sync_service import SyncService
3
5
  from .watch_service import WatchService
4
6
 
5
- __all__ = ["SyncService", "FileChangeScanner", "WatchService"]
7
+ __all__ = ["SyncService", "WatchService", "SyncCoordinator", "SyncStatus"]
@@ -0,0 +1,26 @@
1
+ import asyncio
2
+
3
+ from loguru import logger
4
+
5
+ from basic_memory.config import get_project_config
6
+ from basic_memory.sync import SyncService, WatchService
7
+
8
+
9
+ async def sync_and_watch(
10
+ sync_service: SyncService, watch_service: WatchService
11
+ ): # pragma: no cover
12
+ """Run sync and watch service."""
13
+
14
+ config = get_project_config()
15
+ logger.info(f"Starting watch service to sync file changes in dir: {config.home}")
16
+ # full sync
17
+ await sync_service.sync(config.home)
18
+
19
+ # watch changes
20
+ await watch_service.run()
21
+
22
+
23
+ async def create_background_sync_task(
24
+ sync_service: SyncService, watch_service: WatchService
25
+ ): # pragma: no cover
26
+ return asyncio.create_task(sync_and_watch(sync_service, watch_service))
@@ -0,0 +1,160 @@
1
+ """SyncCoordinator - centralized sync/watch lifecycle management.
2
+
3
+ This module provides a single coordinator that manages the lifecycle of
4
+ file synchronization and watch services across all entry points (API, MCP, CLI).
5
+
6
+ The coordinator handles:
7
+ - Starting/stopping watch service
8
+ - Scheduling background sync
9
+ - Reporting status
10
+ - Clean shutdown behavior
11
+ """
12
+
13
+ import asyncio
14
+ from dataclasses import dataclass, field
15
+ from enum import Enum, auto
16
+ from typing import Optional
17
+
18
+ from loguru import logger
19
+
20
+ from basic_memory.config import BasicMemoryConfig
21
+
22
+
23
+ class SyncStatus(Enum):
24
+ """Status of the sync coordinator."""
25
+
26
+ NOT_STARTED = auto()
27
+ STARTING = auto()
28
+ RUNNING = auto()
29
+ STOPPING = auto()
30
+ STOPPED = auto()
31
+ ERROR = auto()
32
+
33
+
34
+ @dataclass
35
+ class SyncCoordinator:
36
+ """Centralized coordinator for sync/watch lifecycle.
37
+
38
+ Manages the lifecycle of file synchronization services, providing:
39
+ - Unified start/stop interface
40
+ - Status tracking
41
+ - Clean shutdown with proper task cancellation
42
+
43
+ Args:
44
+ config: BasicMemoryConfig with sync settings
45
+ should_sync: Whether sync should be enabled (from container decision)
46
+ skip_reason: Human-readable reason if sync is skipped
47
+
48
+ Usage:
49
+ coordinator = SyncCoordinator(config=config, should_sync=True)
50
+ await coordinator.start()
51
+ # ... application runs ...
52
+ await coordinator.stop()
53
+ """
54
+
55
+ config: BasicMemoryConfig
56
+ should_sync: bool = True
57
+ skip_reason: Optional[str] = None
58
+
59
+ # Internal state (not constructor args)
60
+ _status: SyncStatus = field(default=SyncStatus.NOT_STARTED, init=False)
61
+ _sync_task: Optional[asyncio.Task] = field(default=None, init=False)
62
+
63
+ @property
64
+ def status(self) -> SyncStatus:
65
+ """Current status of the coordinator."""
66
+ return self._status
67
+
68
+ @property
69
+ def is_running(self) -> bool:
70
+ """Whether sync is currently running."""
71
+ return self._status == SyncStatus.RUNNING
72
+
73
+ async def start(self) -> None:
74
+ """Start the sync/watch service if enabled.
75
+
76
+ This is a non-blocking call that starts the sync task in the background.
77
+ Use stop() to cleanly shut down.
78
+ """
79
+ if not self.should_sync:
80
+ if self.skip_reason:
81
+ logger.info(f"{self.skip_reason} - skipping local file sync")
82
+ self._status = SyncStatus.STOPPED
83
+ return
84
+
85
+ if self._status in (SyncStatus.RUNNING, SyncStatus.STARTING):
86
+ logger.warning("Sync coordinator already running or starting")
87
+ return
88
+
89
+ self._status = SyncStatus.STARTING
90
+ logger.info("Starting file sync in background")
91
+
92
+ try:
93
+ # Deferred import to avoid circular dependency
94
+ from basic_memory.services.initialization import initialize_file_sync
95
+
96
+ async def _file_sync_runner() -> None: # pragma: no cover
97
+ """Run the file sync service."""
98
+ try:
99
+ await initialize_file_sync(self.config)
100
+ except asyncio.CancelledError:
101
+ logger.debug("File sync cancelled")
102
+ raise
103
+ except Exception as e:
104
+ logger.error(f"Error in file sync: {e}")
105
+ self._status = SyncStatus.ERROR
106
+ raise
107
+
108
+ self._sync_task = asyncio.create_task(_file_sync_runner())
109
+ self._status = SyncStatus.RUNNING
110
+ logger.info("Sync coordinator started successfully")
111
+
112
+ except Exception as e: # pragma: no cover
113
+ logger.error(f"Failed to start sync coordinator: {e}")
114
+ self._status = SyncStatus.ERROR
115
+ raise
116
+
117
+ async def stop(self) -> None:
118
+ """Stop the sync/watch service cleanly.
119
+
120
+ Cancels the background task and waits for it to complete.
121
+ Safe to call even if not running.
122
+ """
123
+ if self._status in (SyncStatus.NOT_STARTED, SyncStatus.STOPPED):
124
+ return
125
+
126
+ if self._sync_task is None: # pragma: no cover
127
+ self._status = SyncStatus.STOPPED
128
+ return
129
+
130
+ self._status = SyncStatus.STOPPING
131
+ logger.info("Stopping sync coordinator...")
132
+
133
+ self._sync_task.cancel()
134
+ try:
135
+ await self._sync_task
136
+ except asyncio.CancelledError:
137
+ logger.info("File sync task cancelled successfully")
138
+
139
+ self._sync_task = None
140
+ self._status = SyncStatus.STOPPED
141
+ logger.info("Sync coordinator stopped")
142
+
143
+ def get_status_info(self) -> dict:
144
+ """Get status information for reporting.
145
+
146
+ Returns:
147
+ Dictionary with status details for diagnostics
148
+ """
149
+ return {
150
+ "status": self._status.name,
151
+ "should_sync": self.should_sync,
152
+ "skip_reason": self.skip_reason,
153
+ "has_task": self._sync_task is not None,
154
+ }
155
+
156
+
157
+ __all__ = [
158
+ "SyncCoordinator",
159
+ "SyncStatus",
160
+ ]