basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +5 -1
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +130 -20
- basic_memory/alembic/migrations.py +4 -9
- basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
- basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
- basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
- basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
- basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
- basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
- basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
- basic_memory/api/app.py +87 -20
- basic_memory/api/container.py +133 -0
- basic_memory/api/routers/__init__.py +4 -1
- basic_memory/api/routers/directory_router.py +84 -0
- basic_memory/api/routers/importer_router.py +152 -0
- basic_memory/api/routers/knowledge_router.py +180 -23
- basic_memory/api/routers/management_router.py +80 -0
- basic_memory/api/routers/memory_router.py +9 -64
- basic_memory/api/routers/project_router.py +460 -0
- basic_memory/api/routers/prompt_router.py +260 -0
- basic_memory/api/routers/resource_router.py +136 -11
- basic_memory/api/routers/search_router.py +5 -5
- basic_memory/api/routers/utils.py +169 -0
- basic_memory/api/template_loader.py +292 -0
- basic_memory/api/v2/__init__.py +35 -0
- basic_memory/api/v2/routers/__init__.py +21 -0
- basic_memory/api/v2/routers/directory_router.py +93 -0
- basic_memory/api/v2/routers/importer_router.py +181 -0
- basic_memory/api/v2/routers/knowledge_router.py +427 -0
- basic_memory/api/v2/routers/memory_router.py +130 -0
- basic_memory/api/v2/routers/project_router.py +359 -0
- basic_memory/api/v2/routers/prompt_router.py +269 -0
- basic_memory/api/v2/routers/resource_router.py +286 -0
- basic_memory/api/v2/routers/search_router.py +73 -0
- basic_memory/cli/app.py +80 -10
- basic_memory/cli/auth.py +300 -0
- basic_memory/cli/commands/__init__.py +15 -2
- basic_memory/cli/commands/cloud/__init__.py +6 -0
- basic_memory/cli/commands/cloud/api_client.py +127 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
- basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
- basic_memory/cli/commands/cloud/core_commands.py +195 -0
- basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
- basic_memory/cli/commands/cloud/rclone_config.py +110 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
- basic_memory/cli/commands/cloud/upload.py +240 -0
- basic_memory/cli/commands/cloud/upload_command.py +124 -0
- basic_memory/cli/commands/command_utils.py +99 -0
- basic_memory/cli/commands/db.py +87 -12
- basic_memory/cli/commands/format.py +198 -0
- basic_memory/cli/commands/import_chatgpt.py +47 -223
- basic_memory/cli/commands/import_claude_conversations.py +48 -171
- basic_memory/cli/commands/import_claude_projects.py +53 -160
- basic_memory/cli/commands/import_memory_json.py +55 -111
- basic_memory/cli/commands/mcp.py +67 -11
- basic_memory/cli/commands/project.py +889 -0
- basic_memory/cli/commands/status.py +52 -34
- basic_memory/cli/commands/telemetry.py +81 -0
- basic_memory/cli/commands/tool.py +341 -0
- basic_memory/cli/container.py +84 -0
- basic_memory/cli/main.py +14 -6
- basic_memory/config.py +580 -26
- basic_memory/db.py +285 -28
- basic_memory/deps/__init__.py +293 -0
- basic_memory/deps/config.py +26 -0
- basic_memory/deps/db.py +56 -0
- basic_memory/deps/importers.py +200 -0
- basic_memory/deps/projects.py +238 -0
- basic_memory/deps/repositories.py +179 -0
- basic_memory/deps/services.py +480 -0
- basic_memory/deps.py +16 -185
- basic_memory/file_utils.py +318 -54
- basic_memory/ignore_utils.py +297 -0
- basic_memory/importers/__init__.py +27 -0
- basic_memory/importers/base.py +100 -0
- basic_memory/importers/chatgpt_importer.py +245 -0
- basic_memory/importers/claude_conversations_importer.py +192 -0
- basic_memory/importers/claude_projects_importer.py +184 -0
- basic_memory/importers/memory_json_importer.py +128 -0
- basic_memory/importers/utils.py +61 -0
- basic_memory/markdown/entity_parser.py +182 -23
- basic_memory/markdown/markdown_processor.py +70 -7
- basic_memory/markdown/plugins.py +43 -23
- basic_memory/markdown/schemas.py +1 -1
- basic_memory/markdown/utils.py +38 -14
- basic_memory/mcp/async_client.py +135 -4
- basic_memory/mcp/clients/__init__.py +28 -0
- basic_memory/mcp/clients/directory.py +70 -0
- basic_memory/mcp/clients/knowledge.py +176 -0
- basic_memory/mcp/clients/memory.py +120 -0
- basic_memory/mcp/clients/project.py +89 -0
- basic_memory/mcp/clients/resource.py +71 -0
- basic_memory/mcp/clients/search.py +65 -0
- basic_memory/mcp/container.py +110 -0
- basic_memory/mcp/project_context.py +155 -0
- basic_memory/mcp/prompts/__init__.py +19 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
- basic_memory/mcp/prompts/continue_conversation.py +62 -0
- basic_memory/mcp/prompts/recent_activity.py +188 -0
- basic_memory/mcp/prompts/search.py +57 -0
- basic_memory/mcp/prompts/utils.py +162 -0
- basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
- basic_memory/mcp/resources/project_info.py +71 -0
- basic_memory/mcp/server.py +61 -9
- basic_memory/mcp/tools/__init__.py +33 -21
- basic_memory/mcp/tools/build_context.py +120 -0
- basic_memory/mcp/tools/canvas.py +152 -0
- basic_memory/mcp/tools/chatgpt_tools.py +190 -0
- basic_memory/mcp/tools/delete_note.py +249 -0
- basic_memory/mcp/tools/edit_note.py +325 -0
- basic_memory/mcp/tools/list_directory.py +157 -0
- basic_memory/mcp/tools/move_note.py +549 -0
- basic_memory/mcp/tools/project_management.py +204 -0
- basic_memory/mcp/tools/read_content.py +281 -0
- basic_memory/mcp/tools/read_note.py +265 -0
- basic_memory/mcp/tools/recent_activity.py +528 -0
- basic_memory/mcp/tools/search.py +377 -24
- basic_memory/mcp/tools/utils.py +402 -16
- basic_memory/mcp/tools/view_note.py +78 -0
- basic_memory/mcp/tools/write_note.py +230 -0
- basic_memory/models/__init__.py +3 -2
- basic_memory/models/knowledge.py +82 -17
- basic_memory/models/project.py +93 -0
- basic_memory/models/search.py +68 -8
- basic_memory/project_resolver.py +222 -0
- basic_memory/repository/__init__.py +2 -0
- basic_memory/repository/entity_repository.py +437 -8
- basic_memory/repository/observation_repository.py +36 -3
- basic_memory/repository/postgres_search_repository.py +451 -0
- basic_memory/repository/project_info_repository.py +10 -0
- basic_memory/repository/project_repository.py +140 -0
- basic_memory/repository/relation_repository.py +79 -4
- basic_memory/repository/repository.py +148 -29
- basic_memory/repository/search_index_row.py +95 -0
- basic_memory/repository/search_repository.py +79 -268
- basic_memory/repository/search_repository_base.py +241 -0
- basic_memory/repository/sqlite_search_repository.py +437 -0
- basic_memory/runtime.py +61 -0
- basic_memory/schemas/__init__.py +22 -9
- basic_memory/schemas/base.py +131 -12
- basic_memory/schemas/cloud.py +50 -0
- basic_memory/schemas/directory.py +31 -0
- basic_memory/schemas/importer.py +35 -0
- basic_memory/schemas/memory.py +194 -25
- basic_memory/schemas/project_info.py +213 -0
- basic_memory/schemas/prompt.py +90 -0
- basic_memory/schemas/request.py +56 -2
- basic_memory/schemas/response.py +85 -28
- basic_memory/schemas/search.py +36 -35
- basic_memory/schemas/sync_report.py +72 -0
- basic_memory/schemas/v2/__init__.py +27 -0
- basic_memory/schemas/v2/entity.py +133 -0
- basic_memory/schemas/v2/resource.py +47 -0
- basic_memory/services/__init__.py +2 -1
- basic_memory/services/context_service.py +451 -138
- basic_memory/services/directory_service.py +310 -0
- basic_memory/services/entity_service.py +636 -71
- basic_memory/services/exceptions.py +21 -0
- basic_memory/services/file_service.py +402 -33
- basic_memory/services/initialization.py +216 -0
- basic_memory/services/link_resolver.py +50 -56
- basic_memory/services/project_service.py +888 -0
- basic_memory/services/search_service.py +232 -37
- basic_memory/sync/__init__.py +4 -2
- basic_memory/sync/background_sync.py +26 -0
- basic_memory/sync/coordinator.py +160 -0
- basic_memory/sync/sync_service.py +1200 -109
- basic_memory/sync/watch_service.py +432 -135
- basic_memory/telemetry.py +249 -0
- basic_memory/templates/prompts/continue_conversation.hbs +110 -0
- basic_memory/templates/prompts/search.hbs +101 -0
- basic_memory/utils.py +407 -54
- basic_memory-0.17.4.dist-info/METADATA +617 -0
- basic_memory-0.17.4.dist-info/RECORD +193 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
- basic_memory/alembic/README +0 -1
- basic_memory/cli/commands/sync.py +0 -206
- basic_memory/cli/commands/tools.py +0 -157
- basic_memory/mcp/tools/knowledge.py +0 -68
- basic_memory/mcp/tools/memory.py +0 -170
- basic_memory/mcp/tools/notes.py +0 -202
- basic_memory/schemas/discovery.py +0 -28
- basic_memory/sync/file_change_scanner.py +0 -158
- basic_memory/sync/utils.py +0 -31
- basic_memory-0.7.0.dist-info/METADATA +0 -378
- basic_memory-0.7.0.dist-info/RECORD +0 -82
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
"""Service for search operations."""
|
|
2
2
|
|
|
3
|
+
import ast
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
from typing import List, Optional, Set
|
|
5
6
|
|
|
7
|
+
|
|
8
|
+
from dateparser import parse
|
|
6
9
|
from fastapi import BackgroundTasks
|
|
7
10
|
from loguru import logger
|
|
11
|
+
from sqlalchemy import text
|
|
8
12
|
|
|
9
13
|
from basic_memory.models import Entity
|
|
10
14
|
from basic_memory.repository import EntityRepository
|
|
@@ -12,6 +16,21 @@ from basic_memory.repository.search_repository import SearchRepository, SearchIn
|
|
|
12
16
|
from basic_memory.schemas.search import SearchQuery, SearchItemType
|
|
13
17
|
from basic_memory.services import FileService
|
|
14
18
|
|
|
19
|
+
# Maximum size for content_stems field to stay under Postgres's 8KB index row limit.
|
|
20
|
+
# We use 6000 characters to leave headroom for other indexed columns and overhead.
|
|
21
|
+
MAX_CONTENT_STEMS_SIZE = 6000
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _mtime_to_datetime(entity: Entity) -> datetime:
|
|
25
|
+
"""Convert entity mtime (file modification time) to datetime.
|
|
26
|
+
|
|
27
|
+
Returns the file's actual modification time, falling back to updated_at
|
|
28
|
+
if mtime is not available.
|
|
29
|
+
"""
|
|
30
|
+
if entity.mtime:
|
|
31
|
+
return datetime.fromtimestamp(entity.mtime).astimezone()
|
|
32
|
+
return entity.updated_at
|
|
33
|
+
|
|
15
34
|
|
|
16
35
|
class SearchService:
|
|
17
36
|
"""Service for search operations.
|
|
@@ -38,9 +57,10 @@ class SearchService:
|
|
|
38
57
|
|
|
39
58
|
async def reindex_all(self, background_tasks: Optional[BackgroundTasks] = None) -> None:
|
|
40
59
|
"""Reindex all content from database."""
|
|
41
|
-
logger.info("Starting full reindex")
|
|
42
60
|
|
|
61
|
+
logger.info("Starting full reindex")
|
|
43
62
|
# Clear and recreate search index
|
|
63
|
+
await self.repository.execute_query(text("DROP TABLE IF EXISTS search_index"), params={})
|
|
44
64
|
await self.init_search_index()
|
|
45
65
|
|
|
46
66
|
# Reindex all entities
|
|
@@ -63,13 +83,13 @@ class SearchService:
|
|
|
63
83
|
logger.debug("no criteria passed to query")
|
|
64
84
|
return []
|
|
65
85
|
|
|
66
|
-
logger.
|
|
86
|
+
logger.trace(f"Searching with query: {query}")
|
|
67
87
|
|
|
68
88
|
after_date = (
|
|
69
89
|
(
|
|
70
90
|
query.after_date
|
|
71
91
|
if isinstance(query.after_date, datetime)
|
|
72
|
-
else
|
|
92
|
+
else parse(query.after_date)
|
|
73
93
|
)
|
|
74
94
|
if query.after_date
|
|
75
95
|
else None
|
|
@@ -82,7 +102,7 @@ class SearchService:
|
|
|
82
102
|
permalink_match=query.permalink_match,
|
|
83
103
|
title=query.title,
|
|
84
104
|
types=query.types,
|
|
85
|
-
|
|
105
|
+
search_item_types=query.entity_types,
|
|
86
106
|
after_date=after_date,
|
|
87
107
|
limit=limit,
|
|
88
108
|
offset=offset,
|
|
@@ -109,84 +129,216 @@ class SearchService:
|
|
|
109
129
|
# Add word boundaries
|
|
110
130
|
variants.update(w.strip() for w in text.lower().split() if w.strip())
|
|
111
131
|
|
|
112
|
-
#
|
|
113
|
-
|
|
132
|
+
# Trigrams disabled: They create massive search index bloat, increasing DB size significantly
|
|
133
|
+
# and slowing down indexing performance. FTS5 search works well without them.
|
|
134
|
+
# See: https://github.com/basicmachines-co/basic-memory/issues/351
|
|
135
|
+
# variants.update(text[i : i + 3].lower() for i in range(len(text) - 2))
|
|
114
136
|
|
|
115
137
|
return variants
|
|
116
138
|
|
|
139
|
+
def _extract_entity_tags(self, entity: Entity) -> List[str]:
|
|
140
|
+
"""Extract tags from entity metadata for search indexing.
|
|
141
|
+
|
|
142
|
+
Handles multiple tag formats:
|
|
143
|
+
- List format: ["tag1", "tag2"]
|
|
144
|
+
- String format: "['tag1', 'tag2']" or "[tag1, tag2]"
|
|
145
|
+
- Empty: [] or "[]"
|
|
146
|
+
|
|
147
|
+
Returns a list of tag strings for search indexing.
|
|
148
|
+
"""
|
|
149
|
+
if not entity.entity_metadata or "tags" not in entity.entity_metadata:
|
|
150
|
+
return []
|
|
151
|
+
|
|
152
|
+
tags = entity.entity_metadata["tags"]
|
|
153
|
+
|
|
154
|
+
# Handle list format (preferred)
|
|
155
|
+
if isinstance(tags, list):
|
|
156
|
+
return [str(tag) for tag in tags if tag]
|
|
157
|
+
|
|
158
|
+
# Handle string format (legacy)
|
|
159
|
+
if isinstance(tags, str):
|
|
160
|
+
try:
|
|
161
|
+
# Parse string representation of list
|
|
162
|
+
parsed_tags = ast.literal_eval(tags)
|
|
163
|
+
if isinstance(parsed_tags, list):
|
|
164
|
+
return [str(tag) for tag in parsed_tags if tag]
|
|
165
|
+
except (ValueError, SyntaxError):
|
|
166
|
+
# If parsing fails, treat as single tag
|
|
167
|
+
return [tags] if tags.strip() else []
|
|
168
|
+
|
|
169
|
+
return [] # pragma: no cover
|
|
170
|
+
|
|
117
171
|
async def index_entity(
|
|
118
172
|
self,
|
|
119
173
|
entity: Entity,
|
|
120
174
|
background_tasks: Optional[BackgroundTasks] = None,
|
|
175
|
+
content: str | None = None,
|
|
176
|
+
) -> None:
|
|
177
|
+
if background_tasks:
|
|
178
|
+
background_tasks.add_task(self.index_entity_data, entity, content)
|
|
179
|
+
else:
|
|
180
|
+
await self.index_entity_data(entity, content)
|
|
181
|
+
|
|
182
|
+
async def index_entity_data(
|
|
183
|
+
self,
|
|
184
|
+
entity: Entity,
|
|
185
|
+
content: str | None = None,
|
|
186
|
+
) -> None:
|
|
187
|
+
logger.info(
|
|
188
|
+
f"[BackgroundTask] Starting search index for entity_id={entity.id} "
|
|
189
|
+
f"permalink={entity.permalink} project_id={entity.project_id}"
|
|
190
|
+
)
|
|
191
|
+
try:
|
|
192
|
+
# delete all search index data associated with entity
|
|
193
|
+
await self.repository.delete_by_entity_id(entity_id=entity.id)
|
|
194
|
+
|
|
195
|
+
# reindex
|
|
196
|
+
await self.index_entity_markdown(
|
|
197
|
+
entity, content
|
|
198
|
+
) if entity.is_markdown else await self.index_entity_file(entity)
|
|
199
|
+
|
|
200
|
+
logger.info(
|
|
201
|
+
f"[BackgroundTask] Completed search index for entity_id={entity.id} "
|
|
202
|
+
f"permalink={entity.permalink}"
|
|
203
|
+
)
|
|
204
|
+
except Exception as e: # pragma: no cover
|
|
205
|
+
# Background task failure logging; exceptions are re-raised.
|
|
206
|
+
# Avoid forcing synthetic failures just for line coverage.
|
|
207
|
+
logger.error( # pragma: no cover
|
|
208
|
+
f"[BackgroundTask] Failed search index for entity_id={entity.id} "
|
|
209
|
+
f"permalink={entity.permalink} error={e}"
|
|
210
|
+
)
|
|
211
|
+
raise # pragma: no cover
|
|
212
|
+
|
|
213
|
+
async def index_entity_file(
|
|
214
|
+
self,
|
|
215
|
+
entity: Entity,
|
|
216
|
+
) -> None:
|
|
217
|
+
# Index entity file with no content
|
|
218
|
+
await self.repository.index_item(
|
|
219
|
+
SearchIndexRow(
|
|
220
|
+
id=entity.id,
|
|
221
|
+
entity_id=entity.id,
|
|
222
|
+
type=SearchItemType.ENTITY.value,
|
|
223
|
+
title=entity.title,
|
|
224
|
+
permalink=entity.permalink, # Required for Postgres NOT NULL constraint
|
|
225
|
+
file_path=entity.file_path,
|
|
226
|
+
metadata={
|
|
227
|
+
"entity_type": entity.entity_type,
|
|
228
|
+
},
|
|
229
|
+
created_at=entity.created_at,
|
|
230
|
+
updated_at=_mtime_to_datetime(entity),
|
|
231
|
+
project_id=entity.project_id,
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
async def index_entity_markdown(
|
|
236
|
+
self,
|
|
237
|
+
entity: Entity,
|
|
238
|
+
content: str | None = None,
|
|
121
239
|
) -> None:
|
|
122
240
|
"""Index an entity and all its observations and relations.
|
|
123
241
|
|
|
242
|
+
Args:
|
|
243
|
+
entity: The entity to index
|
|
244
|
+
content: Optional pre-loaded content (avoids file read). If None, will read from file.
|
|
245
|
+
|
|
124
246
|
Indexing structure:
|
|
125
247
|
1. Entities
|
|
126
248
|
- permalink: direct from entity (e.g., "specs/search")
|
|
127
249
|
- file_path: physical file location
|
|
250
|
+
- project_id: project context for isolation
|
|
128
251
|
|
|
129
252
|
2. Observations
|
|
130
253
|
- permalink: entity permalink + /observations/id (e.g., "specs/search/observations/123")
|
|
131
254
|
- file_path: parent entity's file (where observation is defined)
|
|
255
|
+
- project_id: inherited from parent entity
|
|
132
256
|
|
|
133
257
|
3. Relations (only index outgoing relations defined in this file)
|
|
134
258
|
- permalink: from_entity/relation_type/to_entity (e.g., "specs/search/implements/features/search-ui")
|
|
135
259
|
- file_path: source entity's file (where relation is defined)
|
|
260
|
+
- project_id: inherited from source entity
|
|
136
261
|
|
|
137
262
|
Each type gets its own row in the search index with appropriate metadata.
|
|
263
|
+
The project_id is automatically added by the repository when indexing.
|
|
138
264
|
"""
|
|
139
|
-
if background_tasks:
|
|
140
|
-
background_tasks.add_task(self.index_entity_data, entity)
|
|
141
|
-
else:
|
|
142
|
-
await self.index_entity_data(entity)
|
|
143
265
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
entity: Entity,
|
|
147
|
-
) -> None:
|
|
148
|
-
"""Actually perform the indexing."""
|
|
266
|
+
# Collect all search index rows to batch insert at the end
|
|
267
|
+
rows_to_index = []
|
|
149
268
|
|
|
150
|
-
|
|
269
|
+
content_stems = []
|
|
270
|
+
content_snippet = ""
|
|
151
271
|
title_variants = self._generate_variants(entity.title)
|
|
152
|
-
|
|
272
|
+
content_stems.extend(title_variants)
|
|
153
273
|
|
|
154
|
-
content
|
|
274
|
+
# Use provided content or read from file
|
|
275
|
+
if content is None:
|
|
276
|
+
content = await self.file_service.read_entity_content(entity)
|
|
155
277
|
if content:
|
|
156
|
-
|
|
278
|
+
content_stems.append(content)
|
|
279
|
+
content_snippet = f"{content[:250]}"
|
|
157
280
|
|
|
158
|
-
|
|
159
|
-
|
|
281
|
+
if entity.permalink:
|
|
282
|
+
content_stems.extend(self._generate_variants(entity.permalink))
|
|
160
283
|
|
|
161
|
-
|
|
284
|
+
content_stems.extend(self._generate_variants(entity.file_path))
|
|
162
285
|
|
|
163
|
-
#
|
|
164
|
-
|
|
286
|
+
# Add entity tags from frontmatter to search content
|
|
287
|
+
entity_tags = self._extract_entity_tags(entity)
|
|
288
|
+
if entity_tags:
|
|
289
|
+
content_stems.extend(entity_tags)
|
|
290
|
+
|
|
291
|
+
entity_content_stems = "\n".join(p for p in content_stems if p and p.strip())
|
|
292
|
+
|
|
293
|
+
# Truncate to stay under Postgres's 8KB index row limit
|
|
294
|
+
if len(entity_content_stems) > MAX_CONTENT_STEMS_SIZE: # pragma: no cover
|
|
295
|
+
entity_content_stems = entity_content_stems[:MAX_CONTENT_STEMS_SIZE] # pragma: no cover
|
|
296
|
+
|
|
297
|
+
# Add entity row
|
|
298
|
+
rows_to_index.append(
|
|
165
299
|
SearchIndexRow(
|
|
166
300
|
id=entity.id,
|
|
167
301
|
type=SearchItemType.ENTITY.value,
|
|
168
302
|
title=entity.title,
|
|
169
|
-
|
|
303
|
+
content_stems=entity_content_stems,
|
|
304
|
+
content_snippet=content_snippet,
|
|
170
305
|
permalink=entity.permalink,
|
|
171
306
|
file_path=entity.file_path,
|
|
307
|
+
entity_id=entity.id,
|
|
172
308
|
metadata={
|
|
173
309
|
"entity_type": entity.entity_type,
|
|
174
310
|
},
|
|
175
311
|
created_at=entity.created_at,
|
|
176
|
-
updated_at=entity
|
|
312
|
+
updated_at=_mtime_to_datetime(entity),
|
|
313
|
+
project_id=entity.project_id,
|
|
177
314
|
)
|
|
178
315
|
)
|
|
179
316
|
|
|
180
|
-
#
|
|
317
|
+
# Add observation rows - dedupe by permalink to avoid unique constraint violations
|
|
318
|
+
# Two observations with same entity/category/content generate identical permalinks
|
|
319
|
+
seen_permalinks: set[str] = {entity.permalink} if entity.permalink else set()
|
|
181
320
|
for obs in entity.observations:
|
|
321
|
+
obs_permalink = obs.permalink
|
|
322
|
+
if obs_permalink in seen_permalinks:
|
|
323
|
+
logger.debug(f"Skipping duplicate observation permalink: {obs_permalink}")
|
|
324
|
+
continue
|
|
325
|
+
seen_permalinks.add(obs_permalink)
|
|
326
|
+
|
|
182
327
|
# Index with parent entity's file path since that's where it's defined
|
|
183
|
-
|
|
328
|
+
obs_content_stems = "\n".join(
|
|
329
|
+
p for p in self._generate_variants(obs.content) if p and p.strip()
|
|
330
|
+
)
|
|
331
|
+
# Truncate to stay under Postgres's 8KB index row limit
|
|
332
|
+
if len(obs_content_stems) > MAX_CONTENT_STEMS_SIZE: # pragma: no cover
|
|
333
|
+
obs_content_stems = obs_content_stems[:MAX_CONTENT_STEMS_SIZE] # pragma: no cover
|
|
334
|
+
rows_to_index.append(
|
|
184
335
|
SearchIndexRow(
|
|
185
336
|
id=obs.id,
|
|
186
337
|
type=SearchItemType.OBSERVATION.value,
|
|
187
|
-
title=f"{obs.category}: {obs.content[:
|
|
188
|
-
|
|
189
|
-
|
|
338
|
+
title=f"{obs.category}: {obs.content[:100]}...",
|
|
339
|
+
content_stems=obs_content_stems,
|
|
340
|
+
content_snippet=obs.content,
|
|
341
|
+
permalink=obs_permalink,
|
|
190
342
|
file_path=entity.file_path,
|
|
191
343
|
category=obs.category,
|
|
192
344
|
entity_id=entity.id,
|
|
@@ -194,11 +346,12 @@ class SearchService:
|
|
|
194
346
|
"tags": obs.tags,
|
|
195
347
|
},
|
|
196
348
|
created_at=entity.created_at,
|
|
197
|
-
updated_at=entity
|
|
349
|
+
updated_at=_mtime_to_datetime(entity),
|
|
350
|
+
project_id=entity.project_id,
|
|
198
351
|
)
|
|
199
352
|
)
|
|
200
353
|
|
|
201
|
-
#
|
|
354
|
+
# Add relation rows (only outgoing relations defined in this file)
|
|
202
355
|
for rel in entity.outgoing_relations:
|
|
203
356
|
# Create descriptive title showing the relationship
|
|
204
357
|
relation_title = (
|
|
@@ -207,21 +360,63 @@ class SearchService:
|
|
|
207
360
|
else f"{rel.from_entity.title}"
|
|
208
361
|
)
|
|
209
362
|
|
|
210
|
-
|
|
363
|
+
rel_content_stems = "\n".join(
|
|
364
|
+
p for p in self._generate_variants(relation_title) if p and p.strip()
|
|
365
|
+
)
|
|
366
|
+
rows_to_index.append(
|
|
211
367
|
SearchIndexRow(
|
|
212
368
|
id=rel.id,
|
|
213
369
|
title=relation_title,
|
|
214
370
|
permalink=rel.permalink,
|
|
371
|
+
content_stems=rel_content_stems,
|
|
215
372
|
file_path=entity.file_path,
|
|
216
373
|
type=SearchItemType.RELATION.value,
|
|
374
|
+
entity_id=entity.id,
|
|
217
375
|
from_id=rel.from_id,
|
|
218
376
|
to_id=rel.to_id,
|
|
219
377
|
relation_type=rel.relation_type,
|
|
220
378
|
created_at=entity.created_at,
|
|
221
|
-
updated_at=entity
|
|
379
|
+
updated_at=_mtime_to_datetime(entity),
|
|
380
|
+
project_id=entity.project_id,
|
|
222
381
|
)
|
|
223
382
|
)
|
|
224
383
|
|
|
225
|
-
|
|
384
|
+
# Batch insert all rows at once
|
|
385
|
+
await self.repository.bulk_index_items(rows_to_index)
|
|
386
|
+
|
|
387
|
+
async def delete_by_permalink(self, permalink: str):
|
|
388
|
+
"""Delete an item from the search index."""
|
|
389
|
+
await self.repository.delete_by_permalink(permalink)
|
|
390
|
+
|
|
391
|
+
async def delete_by_entity_id(self, entity_id: int):
|
|
226
392
|
"""Delete an item from the search index."""
|
|
227
|
-
await self.repository.
|
|
393
|
+
await self.repository.delete_by_entity_id(entity_id)
|
|
394
|
+
|
|
395
|
+
async def handle_delete(self, entity: Entity):
|
|
396
|
+
"""Handle complete entity deletion from search index including observations and relations.
|
|
397
|
+
|
|
398
|
+
This replicates the logic from sync_service.handle_delete() to properly clean up
|
|
399
|
+
all search index entries for an entity and its related data.
|
|
400
|
+
"""
|
|
401
|
+
logger.debug(
|
|
402
|
+
f"Cleaning up search index for entity_id={entity.id}, file_path={entity.file_path}, "
|
|
403
|
+
f"observations={len(entity.observations)}, relations={len(entity.outgoing_relations)}"
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Clean up search index - same logic as sync_service.handle_delete()
|
|
407
|
+
permalinks = (
|
|
408
|
+
[entity.permalink]
|
|
409
|
+
+ [o.permalink for o in entity.observations]
|
|
410
|
+
+ [r.permalink for r in entity.outgoing_relations]
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
logger.debug(
|
|
414
|
+
f"Deleting search index entries for entity_id={entity.id}, "
|
|
415
|
+
f"index_entries={len(permalinks)}"
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
for permalink in permalinks:
|
|
419
|
+
if permalink:
|
|
420
|
+
await self.delete_by_permalink(permalink)
|
|
421
|
+
else:
|
|
422
|
+
await self.delete_by_entity_id(entity.id)
|
basic_memory/sync/__init__.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
"""Basic Memory sync services."""
|
|
2
|
+
|
|
3
|
+
from .coordinator import SyncCoordinator, SyncStatus
|
|
2
4
|
from .sync_service import SyncService
|
|
3
5
|
from .watch_service import WatchService
|
|
4
6
|
|
|
5
|
-
__all__ = ["SyncService", "
|
|
7
|
+
__all__ = ["SyncService", "WatchService", "SyncCoordinator", "SyncStatus"]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
|
|
5
|
+
from basic_memory.config import get_project_config
|
|
6
|
+
from basic_memory.sync import SyncService, WatchService
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def sync_and_watch(
|
|
10
|
+
sync_service: SyncService, watch_service: WatchService
|
|
11
|
+
): # pragma: no cover
|
|
12
|
+
"""Run sync and watch service."""
|
|
13
|
+
|
|
14
|
+
config = get_project_config()
|
|
15
|
+
logger.info(f"Starting watch service to sync file changes in dir: {config.home}")
|
|
16
|
+
# full sync
|
|
17
|
+
await sync_service.sync(config.home)
|
|
18
|
+
|
|
19
|
+
# watch changes
|
|
20
|
+
await watch_service.run()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def create_background_sync_task(
|
|
24
|
+
sync_service: SyncService, watch_service: WatchService
|
|
25
|
+
): # pragma: no cover
|
|
26
|
+
return asyncio.create_task(sync_and_watch(sync_service, watch_service))
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""SyncCoordinator - centralized sync/watch lifecycle management.
|
|
2
|
+
|
|
3
|
+
This module provides a single coordinator that manages the lifecycle of
|
|
4
|
+
file synchronization and watch services across all entry points (API, MCP, CLI).
|
|
5
|
+
|
|
6
|
+
The coordinator handles:
|
|
7
|
+
- Starting/stopping watch service
|
|
8
|
+
- Scheduling background sync
|
|
9
|
+
- Reporting status
|
|
10
|
+
- Clean shutdown behavior
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from enum import Enum, auto
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from loguru import logger
|
|
19
|
+
|
|
20
|
+
from basic_memory.config import BasicMemoryConfig
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SyncStatus(Enum):
|
|
24
|
+
"""Status of the sync coordinator."""
|
|
25
|
+
|
|
26
|
+
NOT_STARTED = auto()
|
|
27
|
+
STARTING = auto()
|
|
28
|
+
RUNNING = auto()
|
|
29
|
+
STOPPING = auto()
|
|
30
|
+
STOPPED = auto()
|
|
31
|
+
ERROR = auto()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class SyncCoordinator:
|
|
36
|
+
"""Centralized coordinator for sync/watch lifecycle.
|
|
37
|
+
|
|
38
|
+
Manages the lifecycle of file synchronization services, providing:
|
|
39
|
+
- Unified start/stop interface
|
|
40
|
+
- Status tracking
|
|
41
|
+
- Clean shutdown with proper task cancellation
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
config: BasicMemoryConfig with sync settings
|
|
45
|
+
should_sync: Whether sync should be enabled (from container decision)
|
|
46
|
+
skip_reason: Human-readable reason if sync is skipped
|
|
47
|
+
|
|
48
|
+
Usage:
|
|
49
|
+
coordinator = SyncCoordinator(config=config, should_sync=True)
|
|
50
|
+
await coordinator.start()
|
|
51
|
+
# ... application runs ...
|
|
52
|
+
await coordinator.stop()
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
config: BasicMemoryConfig
|
|
56
|
+
should_sync: bool = True
|
|
57
|
+
skip_reason: Optional[str] = None
|
|
58
|
+
|
|
59
|
+
# Internal state (not constructor args)
|
|
60
|
+
_status: SyncStatus = field(default=SyncStatus.NOT_STARTED, init=False)
|
|
61
|
+
_sync_task: Optional[asyncio.Task] = field(default=None, init=False)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def status(self) -> SyncStatus:
|
|
65
|
+
"""Current status of the coordinator."""
|
|
66
|
+
return self._status
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def is_running(self) -> bool:
|
|
70
|
+
"""Whether sync is currently running."""
|
|
71
|
+
return self._status == SyncStatus.RUNNING
|
|
72
|
+
|
|
73
|
+
async def start(self) -> None:
|
|
74
|
+
"""Start the sync/watch service if enabled.
|
|
75
|
+
|
|
76
|
+
This is a non-blocking call that starts the sync task in the background.
|
|
77
|
+
Use stop() to cleanly shut down.
|
|
78
|
+
"""
|
|
79
|
+
if not self.should_sync:
|
|
80
|
+
if self.skip_reason:
|
|
81
|
+
logger.info(f"{self.skip_reason} - skipping local file sync")
|
|
82
|
+
self._status = SyncStatus.STOPPED
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
if self._status in (SyncStatus.RUNNING, SyncStatus.STARTING):
|
|
86
|
+
logger.warning("Sync coordinator already running or starting")
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
self._status = SyncStatus.STARTING
|
|
90
|
+
logger.info("Starting file sync in background")
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
# Deferred import to avoid circular dependency
|
|
94
|
+
from basic_memory.services.initialization import initialize_file_sync
|
|
95
|
+
|
|
96
|
+
async def _file_sync_runner() -> None: # pragma: no cover
|
|
97
|
+
"""Run the file sync service."""
|
|
98
|
+
try:
|
|
99
|
+
await initialize_file_sync(self.config)
|
|
100
|
+
except asyncio.CancelledError:
|
|
101
|
+
logger.debug("File sync cancelled")
|
|
102
|
+
raise
|
|
103
|
+
except Exception as e:
|
|
104
|
+
logger.error(f"Error in file sync: {e}")
|
|
105
|
+
self._status = SyncStatus.ERROR
|
|
106
|
+
raise
|
|
107
|
+
|
|
108
|
+
self._sync_task = asyncio.create_task(_file_sync_runner())
|
|
109
|
+
self._status = SyncStatus.RUNNING
|
|
110
|
+
logger.info("Sync coordinator started successfully")
|
|
111
|
+
|
|
112
|
+
except Exception as e: # pragma: no cover
|
|
113
|
+
logger.error(f"Failed to start sync coordinator: {e}")
|
|
114
|
+
self._status = SyncStatus.ERROR
|
|
115
|
+
raise
|
|
116
|
+
|
|
117
|
+
async def stop(self) -> None:
|
|
118
|
+
"""Stop the sync/watch service cleanly.
|
|
119
|
+
|
|
120
|
+
Cancels the background task and waits for it to complete.
|
|
121
|
+
Safe to call even if not running.
|
|
122
|
+
"""
|
|
123
|
+
if self._status in (SyncStatus.NOT_STARTED, SyncStatus.STOPPED):
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
if self._sync_task is None: # pragma: no cover
|
|
127
|
+
self._status = SyncStatus.STOPPED
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
self._status = SyncStatus.STOPPING
|
|
131
|
+
logger.info("Stopping sync coordinator...")
|
|
132
|
+
|
|
133
|
+
self._sync_task.cancel()
|
|
134
|
+
try:
|
|
135
|
+
await self._sync_task
|
|
136
|
+
except asyncio.CancelledError:
|
|
137
|
+
logger.info("File sync task cancelled successfully")
|
|
138
|
+
|
|
139
|
+
self._sync_task = None
|
|
140
|
+
self._status = SyncStatus.STOPPED
|
|
141
|
+
logger.info("Sync coordinator stopped")
|
|
142
|
+
|
|
143
|
+
def get_status_info(self) -> dict:
|
|
144
|
+
"""Get status information for reporting.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Dictionary with status details for diagnostics
|
|
148
|
+
"""
|
|
149
|
+
return {
|
|
150
|
+
"status": self._status.name,
|
|
151
|
+
"should_sync": self.should_sync,
|
|
152
|
+
"skip_reason": self.skip_reason,
|
|
153
|
+
"has_task": self._sync_task is not None,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
__all__ = [
|
|
158
|
+
"SyncCoordinator",
|
|
159
|
+
"SyncStatus",
|
|
160
|
+
]
|