basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +5 -1
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +130 -20
- basic_memory/alembic/migrations.py +4 -9
- basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
- basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
- basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
- basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
- basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
- basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
- basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
- basic_memory/api/app.py +87 -20
- basic_memory/api/container.py +133 -0
- basic_memory/api/routers/__init__.py +4 -1
- basic_memory/api/routers/directory_router.py +84 -0
- basic_memory/api/routers/importer_router.py +152 -0
- basic_memory/api/routers/knowledge_router.py +180 -23
- basic_memory/api/routers/management_router.py +80 -0
- basic_memory/api/routers/memory_router.py +9 -64
- basic_memory/api/routers/project_router.py +460 -0
- basic_memory/api/routers/prompt_router.py +260 -0
- basic_memory/api/routers/resource_router.py +136 -11
- basic_memory/api/routers/search_router.py +5 -5
- basic_memory/api/routers/utils.py +169 -0
- basic_memory/api/template_loader.py +292 -0
- basic_memory/api/v2/__init__.py +35 -0
- basic_memory/api/v2/routers/__init__.py +21 -0
- basic_memory/api/v2/routers/directory_router.py +93 -0
- basic_memory/api/v2/routers/importer_router.py +181 -0
- basic_memory/api/v2/routers/knowledge_router.py +427 -0
- basic_memory/api/v2/routers/memory_router.py +130 -0
- basic_memory/api/v2/routers/project_router.py +359 -0
- basic_memory/api/v2/routers/prompt_router.py +269 -0
- basic_memory/api/v2/routers/resource_router.py +286 -0
- basic_memory/api/v2/routers/search_router.py +73 -0
- basic_memory/cli/app.py +80 -10
- basic_memory/cli/auth.py +300 -0
- basic_memory/cli/commands/__init__.py +15 -2
- basic_memory/cli/commands/cloud/__init__.py +6 -0
- basic_memory/cli/commands/cloud/api_client.py +127 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
- basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
- basic_memory/cli/commands/cloud/core_commands.py +195 -0
- basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
- basic_memory/cli/commands/cloud/rclone_config.py +110 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
- basic_memory/cli/commands/cloud/upload.py +240 -0
- basic_memory/cli/commands/cloud/upload_command.py +124 -0
- basic_memory/cli/commands/command_utils.py +99 -0
- basic_memory/cli/commands/db.py +87 -12
- basic_memory/cli/commands/format.py +198 -0
- basic_memory/cli/commands/import_chatgpt.py +47 -223
- basic_memory/cli/commands/import_claude_conversations.py +48 -171
- basic_memory/cli/commands/import_claude_projects.py +53 -160
- basic_memory/cli/commands/import_memory_json.py +55 -111
- basic_memory/cli/commands/mcp.py +67 -11
- basic_memory/cli/commands/project.py +889 -0
- basic_memory/cli/commands/status.py +52 -34
- basic_memory/cli/commands/telemetry.py +81 -0
- basic_memory/cli/commands/tool.py +341 -0
- basic_memory/cli/container.py +84 -0
- basic_memory/cli/main.py +14 -6
- basic_memory/config.py +580 -26
- basic_memory/db.py +285 -28
- basic_memory/deps/__init__.py +293 -0
- basic_memory/deps/config.py +26 -0
- basic_memory/deps/db.py +56 -0
- basic_memory/deps/importers.py +200 -0
- basic_memory/deps/projects.py +238 -0
- basic_memory/deps/repositories.py +179 -0
- basic_memory/deps/services.py +480 -0
- basic_memory/deps.py +16 -185
- basic_memory/file_utils.py +318 -54
- basic_memory/ignore_utils.py +297 -0
- basic_memory/importers/__init__.py +27 -0
- basic_memory/importers/base.py +100 -0
- basic_memory/importers/chatgpt_importer.py +245 -0
- basic_memory/importers/claude_conversations_importer.py +192 -0
- basic_memory/importers/claude_projects_importer.py +184 -0
- basic_memory/importers/memory_json_importer.py +128 -0
- basic_memory/importers/utils.py +61 -0
- basic_memory/markdown/entity_parser.py +182 -23
- basic_memory/markdown/markdown_processor.py +70 -7
- basic_memory/markdown/plugins.py +43 -23
- basic_memory/markdown/schemas.py +1 -1
- basic_memory/markdown/utils.py +38 -14
- basic_memory/mcp/async_client.py +135 -4
- basic_memory/mcp/clients/__init__.py +28 -0
- basic_memory/mcp/clients/directory.py +70 -0
- basic_memory/mcp/clients/knowledge.py +176 -0
- basic_memory/mcp/clients/memory.py +120 -0
- basic_memory/mcp/clients/project.py +89 -0
- basic_memory/mcp/clients/resource.py +71 -0
- basic_memory/mcp/clients/search.py +65 -0
- basic_memory/mcp/container.py +110 -0
- basic_memory/mcp/project_context.py +155 -0
- basic_memory/mcp/prompts/__init__.py +19 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
- basic_memory/mcp/prompts/continue_conversation.py +62 -0
- basic_memory/mcp/prompts/recent_activity.py +188 -0
- basic_memory/mcp/prompts/search.py +57 -0
- basic_memory/mcp/prompts/utils.py +162 -0
- basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
- basic_memory/mcp/resources/project_info.py +71 -0
- basic_memory/mcp/server.py +61 -9
- basic_memory/mcp/tools/__init__.py +33 -21
- basic_memory/mcp/tools/build_context.py +120 -0
- basic_memory/mcp/tools/canvas.py +152 -0
- basic_memory/mcp/tools/chatgpt_tools.py +190 -0
- basic_memory/mcp/tools/delete_note.py +249 -0
- basic_memory/mcp/tools/edit_note.py +325 -0
- basic_memory/mcp/tools/list_directory.py +157 -0
- basic_memory/mcp/tools/move_note.py +549 -0
- basic_memory/mcp/tools/project_management.py +204 -0
- basic_memory/mcp/tools/read_content.py +281 -0
- basic_memory/mcp/tools/read_note.py +265 -0
- basic_memory/mcp/tools/recent_activity.py +528 -0
- basic_memory/mcp/tools/search.py +377 -24
- basic_memory/mcp/tools/utils.py +402 -16
- basic_memory/mcp/tools/view_note.py +78 -0
- basic_memory/mcp/tools/write_note.py +230 -0
- basic_memory/models/__init__.py +3 -2
- basic_memory/models/knowledge.py +82 -17
- basic_memory/models/project.py +93 -0
- basic_memory/models/search.py +68 -8
- basic_memory/project_resolver.py +222 -0
- basic_memory/repository/__init__.py +2 -0
- basic_memory/repository/entity_repository.py +437 -8
- basic_memory/repository/observation_repository.py +36 -3
- basic_memory/repository/postgres_search_repository.py +451 -0
- basic_memory/repository/project_info_repository.py +10 -0
- basic_memory/repository/project_repository.py +140 -0
- basic_memory/repository/relation_repository.py +79 -4
- basic_memory/repository/repository.py +148 -29
- basic_memory/repository/search_index_row.py +95 -0
- basic_memory/repository/search_repository.py +79 -268
- basic_memory/repository/search_repository_base.py +241 -0
- basic_memory/repository/sqlite_search_repository.py +437 -0
- basic_memory/runtime.py +61 -0
- basic_memory/schemas/__init__.py +22 -9
- basic_memory/schemas/base.py +131 -12
- basic_memory/schemas/cloud.py +50 -0
- basic_memory/schemas/directory.py +31 -0
- basic_memory/schemas/importer.py +35 -0
- basic_memory/schemas/memory.py +194 -25
- basic_memory/schemas/project_info.py +213 -0
- basic_memory/schemas/prompt.py +90 -0
- basic_memory/schemas/request.py +56 -2
- basic_memory/schemas/response.py +85 -28
- basic_memory/schemas/search.py +36 -35
- basic_memory/schemas/sync_report.py +72 -0
- basic_memory/schemas/v2/__init__.py +27 -0
- basic_memory/schemas/v2/entity.py +133 -0
- basic_memory/schemas/v2/resource.py +47 -0
- basic_memory/services/__init__.py +2 -1
- basic_memory/services/context_service.py +451 -138
- basic_memory/services/directory_service.py +310 -0
- basic_memory/services/entity_service.py +636 -71
- basic_memory/services/exceptions.py +21 -0
- basic_memory/services/file_service.py +402 -33
- basic_memory/services/initialization.py +216 -0
- basic_memory/services/link_resolver.py +50 -56
- basic_memory/services/project_service.py +888 -0
- basic_memory/services/search_service.py +232 -37
- basic_memory/sync/__init__.py +4 -2
- basic_memory/sync/background_sync.py +26 -0
- basic_memory/sync/coordinator.py +160 -0
- basic_memory/sync/sync_service.py +1200 -109
- basic_memory/sync/watch_service.py +432 -135
- basic_memory/telemetry.py +249 -0
- basic_memory/templates/prompts/continue_conversation.hbs +110 -0
- basic_memory/templates/prompts/search.hbs +101 -0
- basic_memory/utils.py +407 -54
- basic_memory-0.17.4.dist-info/METADATA +617 -0
- basic_memory-0.17.4.dist-info/RECORD +193 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
- basic_memory/alembic/README +0 -1
- basic_memory/cli/commands/sync.py +0 -206
- basic_memory/cli/commands/tools.py +0 -157
- basic_memory/mcp/tools/knowledge.py +0 -68
- basic_memory/mcp/tools/memory.py +0 -170
- basic_memory/mcp/tools/notes.py +0 -202
- basic_memory/schemas/discovery.py +0 -28
- basic_memory/sync/file_change_scanner.py +0 -158
- basic_memory/sync/utils.py +0 -31
- basic_memory-0.7.0.dist-info/METADATA +0 -378
- basic_memory-0.7.0.dist-info/RECORD +0 -82
- {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,108 +1,35 @@
|
|
|
1
|
-
"""Repository for search operations.
|
|
1
|
+
"""Repository for search operations.
|
|
2
|
+
|
|
3
|
+
This module provides the search repository interface.
|
|
4
|
+
The actual repository implementations are backend-specific:
|
|
5
|
+
- SQLiteSearchRepository: Uses FTS5 virtual tables
|
|
6
|
+
- PostgresSearchRepository: Uses tsvector/tsquery with GIN indexes
|
|
7
|
+
"""
|
|
2
8
|
|
|
3
|
-
import json
|
|
4
|
-
import time
|
|
5
|
-
from dataclasses import dataclass
|
|
6
9
|
from datetime import datetime
|
|
7
|
-
from typing import List, Optional,
|
|
10
|
+
from typing import List, Optional, Protocol
|
|
8
11
|
|
|
9
|
-
from
|
|
10
|
-
from sqlalchemy import text, Executable, Result
|
|
12
|
+
from sqlalchemy import Result
|
|
11
13
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
12
14
|
|
|
13
|
-
from basic_memory import
|
|
14
|
-
from basic_memory.
|
|
15
|
+
from basic_memory.config import ConfigManager, DatabaseBackend
|
|
16
|
+
from basic_memory.repository.postgres_search_repository import PostgresSearchRepository
|
|
17
|
+
from basic_memory.repository.search_index_row import SearchIndexRow
|
|
18
|
+
from basic_memory.repository.sqlite_search_repository import SQLiteSearchRepository
|
|
15
19
|
from basic_memory.schemas.search import SearchItemType
|
|
16
20
|
|
|
17
21
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"""Search result with score and metadata."""
|
|
21
|
-
|
|
22
|
-
id: int
|
|
23
|
-
type: str
|
|
24
|
-
permalink: str
|
|
25
|
-
file_path: str
|
|
26
|
-
metadata: Optional[dict] = None
|
|
27
|
-
|
|
28
|
-
# date values
|
|
29
|
-
created_at: Optional[datetime] = None
|
|
30
|
-
updated_at: Optional[datetime] = None
|
|
31
|
-
|
|
32
|
-
# assigned in result
|
|
33
|
-
score: Optional[float] = None
|
|
34
|
-
|
|
35
|
-
# Type-specific fields
|
|
36
|
-
title: Optional[str] = None # entity
|
|
37
|
-
content: Optional[str] = None # entity, observation
|
|
38
|
-
entity_id: Optional[int] = None # observations
|
|
39
|
-
category: Optional[str] = None # observations
|
|
40
|
-
from_id: Optional[int] = None # relations
|
|
41
|
-
to_id: Optional[int] = None # relations
|
|
42
|
-
relation_type: Optional[str] = None # relations
|
|
43
|
-
|
|
44
|
-
def to_insert(self):
|
|
45
|
-
return {
|
|
46
|
-
"id": self.id,
|
|
47
|
-
"title": self.title,
|
|
48
|
-
"content": self.content,
|
|
49
|
-
"permalink": self.permalink,
|
|
50
|
-
"file_path": self.file_path,
|
|
51
|
-
"type": self.type,
|
|
52
|
-
"metadata": json.dumps(self.metadata),
|
|
53
|
-
"from_id": self.from_id,
|
|
54
|
-
"to_id": self.to_id,
|
|
55
|
-
"relation_type": self.relation_type,
|
|
56
|
-
"entity_id": self.entity_id,
|
|
57
|
-
"category": self.category,
|
|
58
|
-
"created_at": self.created_at if self.created_at else None,
|
|
59
|
-
"updated_at": self.updated_at if self.updated_at else None,
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class SearchRepository:
|
|
64
|
-
"""Repository for search index operations."""
|
|
65
|
-
|
|
66
|
-
def __init__(self, session_maker: async_sessionmaker[AsyncSession]):
|
|
67
|
-
self.session_maker = session_maker
|
|
22
|
+
class SearchRepository(Protocol):
|
|
23
|
+
"""Protocol defining the search repository interface.
|
|
68
24
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
logger.info("Initializing search index")
|
|
72
|
-
try:
|
|
73
|
-
async with db.scoped_session(self.session_maker) as session:
|
|
74
|
-
await session.execute(CREATE_SEARCH_INDEX)
|
|
75
|
-
await session.commit()
|
|
76
|
-
except Exception as e: # pragma: no cover
|
|
77
|
-
logger.error(f"Error initializing search index: {e}")
|
|
78
|
-
raise e
|
|
25
|
+
Both SQLite and Postgres implementations must satisfy this protocol.
|
|
26
|
+
"""
|
|
79
27
|
|
|
80
|
-
|
|
81
|
-
"""Prepare a search term for FTS5 query.
|
|
28
|
+
project_id: int
|
|
82
29
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
For FTS5:
|
|
88
|
-
- Special characters and phrases need to be quoted
|
|
89
|
-
- Terms with spaces or special chars need quotes
|
|
90
|
-
"""
|
|
91
|
-
if "*" in term:
|
|
92
|
-
return term
|
|
93
|
-
|
|
94
|
-
# List of special characters that need quoting (excluding *)
|
|
95
|
-
special_chars = ["/", "-", ".", " ", "(", ")", "[", "]", '"', "'"]
|
|
96
|
-
|
|
97
|
-
# Check if term contains any special characters
|
|
98
|
-
needs_quotes = any(c in term for c in special_chars)
|
|
99
|
-
|
|
100
|
-
if needs_quotes:
|
|
101
|
-
# If the term already contains quotes, escape them and add a wildcard
|
|
102
|
-
term = term.replace('"', '""')
|
|
103
|
-
term = f'"{term}"*'
|
|
104
|
-
|
|
105
|
-
return term
|
|
30
|
+
async def init_search_index(self) -> None:
|
|
31
|
+
"""Initialize the search index schema."""
|
|
32
|
+
...
|
|
106
33
|
|
|
107
34
|
async def search(
|
|
108
35
|
self,
|
|
@@ -110,181 +37,65 @@ class SearchRepository:
|
|
|
110
37
|
permalink: Optional[str] = None,
|
|
111
38
|
permalink_match: Optional[str] = None,
|
|
112
39
|
title: Optional[str] = None,
|
|
113
|
-
types: Optional[List[
|
|
40
|
+
types: Optional[List[str]] = None,
|
|
114
41
|
after_date: Optional[datetime] = None,
|
|
115
|
-
|
|
42
|
+
search_item_types: Optional[List[SearchItemType]] = None,
|
|
116
43
|
limit: int = 10,
|
|
117
44
|
offset: int = 0,
|
|
118
45
|
) -> List[SearchIndexRow]:
|
|
119
|
-
"""Search across
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
# Build WHERE clause
|
|
176
|
-
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
|
177
|
-
|
|
178
|
-
sql = f"""
|
|
179
|
-
SELECT
|
|
180
|
-
id,
|
|
181
|
-
title,
|
|
182
|
-
permalink,
|
|
183
|
-
file_path,
|
|
184
|
-
type,
|
|
185
|
-
metadata,
|
|
186
|
-
from_id,
|
|
187
|
-
to_id,
|
|
188
|
-
relation_type,
|
|
189
|
-
entity_id,
|
|
190
|
-
content,
|
|
191
|
-
category,
|
|
192
|
-
created_at,
|
|
193
|
-
updated_at,
|
|
194
|
-
bm25(search_index) as score
|
|
195
|
-
FROM search_index
|
|
196
|
-
WHERE {where_clause}
|
|
197
|
-
ORDER BY score ASC {order_by_clause}
|
|
198
|
-
LIMIT :limit
|
|
199
|
-
OFFSET :offset
|
|
200
|
-
"""
|
|
201
|
-
|
|
202
|
-
logger.debug(f"Search {sql} params: {params}")
|
|
203
|
-
async with db.scoped_session(self.session_maker) as session:
|
|
204
|
-
result = await session.execute(text(sql), params)
|
|
205
|
-
rows = result.fetchall()
|
|
206
|
-
|
|
207
|
-
results = [
|
|
208
|
-
SearchIndexRow(
|
|
209
|
-
id=row.id,
|
|
210
|
-
title=row.title,
|
|
211
|
-
permalink=row.permalink,
|
|
212
|
-
file_path=row.file_path,
|
|
213
|
-
type=row.type,
|
|
214
|
-
score=row.score,
|
|
215
|
-
metadata=json.loads(row.metadata),
|
|
216
|
-
from_id=row.from_id,
|
|
217
|
-
to_id=row.to_id,
|
|
218
|
-
relation_type=row.relation_type,
|
|
219
|
-
entity_id=row.entity_id,
|
|
220
|
-
content=row.content,
|
|
221
|
-
category=row.category,
|
|
222
|
-
created_at=row.created_at,
|
|
223
|
-
updated_at=row.updated_at,
|
|
224
|
-
)
|
|
225
|
-
for row in rows
|
|
226
|
-
]
|
|
227
|
-
|
|
228
|
-
logger.debug(f"Found {len(results)} search results")
|
|
229
|
-
for r in results:
|
|
230
|
-
logger.debug(
|
|
231
|
-
f"Search result: type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
return results
|
|
235
|
-
|
|
236
|
-
async def index_item(
|
|
237
|
-
self,
|
|
238
|
-
search_index_row: SearchIndexRow,
|
|
239
|
-
):
|
|
240
|
-
"""Index or update a single item."""
|
|
241
|
-
async with db.scoped_session(self.session_maker) as session:
|
|
242
|
-
# Delete existing record if any
|
|
243
|
-
await session.execute(
|
|
244
|
-
text("DELETE FROM search_index WHERE permalink = :permalink"),
|
|
245
|
-
{"permalink": search_index_row.permalink},
|
|
246
|
-
)
|
|
247
|
-
|
|
248
|
-
# Insert new record
|
|
249
|
-
await session.execute(
|
|
250
|
-
text("""
|
|
251
|
-
INSERT INTO search_index (
|
|
252
|
-
id, title, content, permalink, file_path, type, metadata,
|
|
253
|
-
from_id, to_id, relation_type,
|
|
254
|
-
entity_id, category,
|
|
255
|
-
created_at, updated_at
|
|
256
|
-
) VALUES (
|
|
257
|
-
:id, :title, :content, :permalink, :file_path, :type, :metadata,
|
|
258
|
-
:from_id, :to_id, :relation_type,
|
|
259
|
-
:entity_id, :category,
|
|
260
|
-
:created_at, :updated_at
|
|
261
|
-
)
|
|
262
|
-
"""),
|
|
263
|
-
search_index_row.to_insert(),
|
|
264
|
-
)
|
|
265
|
-
logger.debug(f"indexed row {search_index_row}")
|
|
266
|
-
await session.commit()
|
|
267
|
-
|
|
268
|
-
async def delete_by_permalink(self, permalink: str):
|
|
269
|
-
"""Delete an item from the search index."""
|
|
270
|
-
async with db.scoped_session(self.session_maker) as session:
|
|
271
|
-
await session.execute(
|
|
272
|
-
text("DELETE FROM search_index WHERE permalink = :permalink"),
|
|
273
|
-
{"permalink": permalink},
|
|
274
|
-
)
|
|
275
|
-
await session.commit()
|
|
276
|
-
|
|
277
|
-
async def execute_query(
|
|
278
|
-
self,
|
|
279
|
-
query: Executable,
|
|
280
|
-
params: Dict[str, Any],
|
|
281
|
-
) -> Result[Any]:
|
|
282
|
-
"""Execute a query asynchronously."""
|
|
283
|
-
# logger.debug(f"Executing query: {query}, params: {params}")
|
|
284
|
-
async with db.scoped_session(self.session_maker) as session:
|
|
285
|
-
start_time = time.perf_counter()
|
|
286
|
-
result = await session.execute(query, params)
|
|
287
|
-
end_time = time.perf_counter()
|
|
288
|
-
elapsed_time = end_time - start_time
|
|
289
|
-
logger.debug(f"Query executed successfully in {elapsed_time:.2f}s.")
|
|
290
|
-
return result
|
|
46
|
+
"""Search across indexed content."""
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
async def index_item(self, search_index_row: SearchIndexRow) -> None:
|
|
50
|
+
"""Index a single item."""
|
|
51
|
+
...
|
|
52
|
+
|
|
53
|
+
async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
|
|
54
|
+
"""Index multiple items in a batch."""
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
async def delete_by_permalink(self, permalink: str) -> None:
|
|
58
|
+
"""Delete item by permalink."""
|
|
59
|
+
...
|
|
60
|
+
|
|
61
|
+
async def delete_by_entity_id(self, entity_id: int) -> None:
|
|
62
|
+
"""Delete items by entity ID."""
|
|
63
|
+
...
|
|
64
|
+
|
|
65
|
+
async def execute_query(self, query, params: dict) -> Result:
|
|
66
|
+
"""Execute a raw SQL query."""
|
|
67
|
+
...
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def create_search_repository(
|
|
71
|
+
session_maker: async_sessionmaker[AsyncSession],
|
|
72
|
+
project_id: int,
|
|
73
|
+
database_backend: Optional[DatabaseBackend] = None,
|
|
74
|
+
) -> SearchRepository:
|
|
75
|
+
"""Factory function to create the appropriate search repository based on database backend.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
session_maker: SQLAlchemy async session maker
|
|
79
|
+
project_id: Project ID for the repository
|
|
80
|
+
database_backend: Optional explicit backend. If not provided, reads from ConfigManager.
|
|
81
|
+
Prefer passing explicitly from composition roots.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
SearchRepository: Backend-appropriate search repository instance
|
|
85
|
+
"""
|
|
86
|
+
# Prefer explicit parameter; fall back to ConfigManager for backwards compatibility
|
|
87
|
+
if database_backend is None:
|
|
88
|
+
config = ConfigManager().config
|
|
89
|
+
database_backend = config.database_backend
|
|
90
|
+
|
|
91
|
+
if database_backend == DatabaseBackend.POSTGRES: # pragma: no cover
|
|
92
|
+
return PostgresSearchRepository(session_maker, project_id=project_id) # pragma: no cover
|
|
93
|
+
else:
|
|
94
|
+
return SQLiteSearchRepository(session_maker, project_id=project_id)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
__all__ = [
|
|
98
|
+
"SearchRepository",
|
|
99
|
+
"SearchIndexRow",
|
|
100
|
+
"create_search_repository",
|
|
101
|
+
]
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""Abstract base class for search repository implementations."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from loguru import logger
|
|
9
|
+
from sqlalchemy import Executable, Result, text
|
|
10
|
+
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
11
|
+
|
|
12
|
+
from basic_memory import db
|
|
13
|
+
from basic_memory.schemas.search import SearchItemType
|
|
14
|
+
from basic_memory.repository.search_index_row import SearchIndexRow
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SearchRepositoryBase(ABC):
|
|
18
|
+
"""Abstract base class for backend-specific search repository implementations.
|
|
19
|
+
|
|
20
|
+
This class defines the common interface that all search repositories must implement,
|
|
21
|
+
regardless of whether they use SQLite FTS5 or Postgres tsvector for full-text search.
|
|
22
|
+
|
|
23
|
+
Concrete implementations:
|
|
24
|
+
- SQLiteSearchRepository: Uses FTS5 virtual tables with MATCH queries
|
|
25
|
+
- PostgresSearchRepository: Uses tsvector/tsquery with GIN indexes
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int):
|
|
29
|
+
"""Initialize with session maker and project_id filter.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
session_maker: SQLAlchemy session maker
|
|
33
|
+
project_id: Project ID to filter all operations by
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ValueError: If project_id is None or invalid
|
|
37
|
+
"""
|
|
38
|
+
if project_id is None or project_id <= 0: # pragma: no cover
|
|
39
|
+
raise ValueError("A valid project_id is required for SearchRepository")
|
|
40
|
+
|
|
41
|
+
self.session_maker = session_maker
|
|
42
|
+
self.project_id = project_id
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
async def init_search_index(self) -> None:
|
|
46
|
+
"""Create or recreate the search index.
|
|
47
|
+
|
|
48
|
+
Backend-specific implementations:
|
|
49
|
+
- SQLite: CREATE VIRTUAL TABLE using FTS5
|
|
50
|
+
- Postgres: CREATE TABLE with tsvector column and GIN indexes
|
|
51
|
+
"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
|
|
56
|
+
"""Prepare a search term for backend-specific query syntax.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
term: The search term to prepare
|
|
60
|
+
is_prefix: Whether to add prefix search capability
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Formatted search term for the backend
|
|
64
|
+
|
|
65
|
+
Backend-specific implementations:
|
|
66
|
+
- SQLite: Quotes FTS5 special characters, adds * wildcards
|
|
67
|
+
- Postgres: Converts to tsquery syntax with :* prefix operator
|
|
68
|
+
"""
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
@abstractmethod
|
|
72
|
+
async def search(
|
|
73
|
+
self,
|
|
74
|
+
search_text: Optional[str] = None,
|
|
75
|
+
permalink: Optional[str] = None,
|
|
76
|
+
permalink_match: Optional[str] = None,
|
|
77
|
+
title: Optional[str] = None,
|
|
78
|
+
types: Optional[List[str]] = None,
|
|
79
|
+
after_date: Optional[datetime] = None,
|
|
80
|
+
search_item_types: Optional[List[SearchItemType]] = None,
|
|
81
|
+
limit: int = 10,
|
|
82
|
+
offset: int = 0,
|
|
83
|
+
) -> List[SearchIndexRow]:
|
|
84
|
+
"""Search across all indexed content.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
search_text: Full-text search across title and content
|
|
88
|
+
permalink: Exact permalink match
|
|
89
|
+
permalink_match: Permalink pattern match (supports *)
|
|
90
|
+
title: Title search
|
|
91
|
+
types: Filter by entity types (from metadata.entity_type)
|
|
92
|
+
after_date: Filter by created_at > after_date
|
|
93
|
+
search_item_types: Filter by SearchItemType (ENTITY, OBSERVATION, RELATION)
|
|
94
|
+
limit: Maximum results to return
|
|
95
|
+
offset: Number of results to skip
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
List of SearchIndexRow results with relevance scores
|
|
99
|
+
|
|
100
|
+
Backend-specific implementations:
|
|
101
|
+
- SQLite: Uses MATCH operator and bm25() for scoring
|
|
102
|
+
- Postgres: Uses @@ operator and ts_rank() for scoring
|
|
103
|
+
"""
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
async def index_item(self, search_index_row: SearchIndexRow) -> None:
|
|
107
|
+
"""Index or update a single item.
|
|
108
|
+
|
|
109
|
+
This implementation is shared across backends as it uses standard SQL INSERT.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
113
|
+
# Delete existing record if any
|
|
114
|
+
await session.execute(
|
|
115
|
+
text(
|
|
116
|
+
"DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
|
|
117
|
+
),
|
|
118
|
+
{"permalink": search_index_row.permalink, "project_id": self.project_id},
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# When using text() raw SQL, always serialize JSON to string
|
|
122
|
+
# Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
|
|
123
|
+
# The database driver/column type will handle conversion
|
|
124
|
+
insert_data = search_index_row.to_insert(serialize_json=True)
|
|
125
|
+
insert_data["project_id"] = self.project_id
|
|
126
|
+
|
|
127
|
+
# Insert new record
|
|
128
|
+
await session.execute(
|
|
129
|
+
text("""
|
|
130
|
+
INSERT INTO search_index (
|
|
131
|
+
id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
|
|
132
|
+
from_id, to_id, relation_type,
|
|
133
|
+
entity_id, category,
|
|
134
|
+
created_at, updated_at,
|
|
135
|
+
project_id
|
|
136
|
+
) VALUES (
|
|
137
|
+
:id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
|
|
138
|
+
:from_id, :to_id, :relation_type,
|
|
139
|
+
:entity_id, :category,
|
|
140
|
+
:created_at, :updated_at,
|
|
141
|
+
:project_id
|
|
142
|
+
)
|
|
143
|
+
"""),
|
|
144
|
+
insert_data,
|
|
145
|
+
)
|
|
146
|
+
logger.debug(f"indexed row {search_index_row}")
|
|
147
|
+
await session.commit()
|
|
148
|
+
|
|
149
|
+
async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
|
|
150
|
+
"""Index multiple items in a single batch operation.
|
|
151
|
+
|
|
152
|
+
This implementation is shared across backends as it uses standard SQL INSERT.
|
|
153
|
+
|
|
154
|
+
Note: This method assumes that any existing records for the entity_id
|
|
155
|
+
have already been deleted (typically via delete_by_entity_id).
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
search_index_rows: List of SearchIndexRow objects to index
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
if not search_index_rows: # pragma: no cover
|
|
162
|
+
return # pragma: no cover
|
|
163
|
+
|
|
164
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
165
|
+
# When using text() raw SQL, always serialize JSON to string
|
|
166
|
+
# Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
|
|
167
|
+
# The database driver/column type will handle conversion
|
|
168
|
+
insert_data_list = []
|
|
169
|
+
for row in search_index_rows:
|
|
170
|
+
insert_data = row.to_insert(serialize_json=True)
|
|
171
|
+
insert_data["project_id"] = self.project_id
|
|
172
|
+
insert_data_list.append(insert_data)
|
|
173
|
+
|
|
174
|
+
# Batch insert all records using executemany
|
|
175
|
+
await session.execute(
|
|
176
|
+
text("""
|
|
177
|
+
INSERT INTO search_index (
|
|
178
|
+
id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
|
|
179
|
+
from_id, to_id, relation_type,
|
|
180
|
+
entity_id, category,
|
|
181
|
+
created_at, updated_at,
|
|
182
|
+
project_id
|
|
183
|
+
) VALUES (
|
|
184
|
+
:id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
|
|
185
|
+
:from_id, :to_id, :relation_type,
|
|
186
|
+
:entity_id, :category,
|
|
187
|
+
:created_at, :updated_at,
|
|
188
|
+
:project_id
|
|
189
|
+
)
|
|
190
|
+
"""),
|
|
191
|
+
insert_data_list,
|
|
192
|
+
)
|
|
193
|
+
logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
|
|
194
|
+
await session.commit()
|
|
195
|
+
|
|
196
|
+
async def delete_by_entity_id(self, entity_id: int) -> None:
|
|
197
|
+
"""Delete all search index entries for an entity.
|
|
198
|
+
|
|
199
|
+
This implementation is shared across backends as it uses standard SQL DELETE.
|
|
200
|
+
"""
|
|
201
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
202
|
+
await session.execute(
|
|
203
|
+
text(
|
|
204
|
+
"DELETE FROM search_index WHERE entity_id = :entity_id AND project_id = :project_id"
|
|
205
|
+
),
|
|
206
|
+
{"entity_id": entity_id, "project_id": self.project_id},
|
|
207
|
+
)
|
|
208
|
+
await session.commit()
|
|
209
|
+
|
|
210
|
+
async def delete_by_permalink(self, permalink: str) -> None:
|
|
211
|
+
"""Delete a search index entry by permalink.
|
|
212
|
+
|
|
213
|
+
This implementation is shared across backends as it uses standard SQL DELETE.
|
|
214
|
+
"""
|
|
215
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
216
|
+
await session.execute(
|
|
217
|
+
text(
|
|
218
|
+
"DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
|
|
219
|
+
),
|
|
220
|
+
{"permalink": permalink, "project_id": self.project_id},
|
|
221
|
+
)
|
|
222
|
+
await session.commit()
|
|
223
|
+
|
|
224
|
+
async def execute_query(
|
|
225
|
+
self,
|
|
226
|
+
query: Executable,
|
|
227
|
+
params: Dict[str, Any],
|
|
228
|
+
) -> Result[Any]:
|
|
229
|
+
"""Execute a query asynchronously.
|
|
230
|
+
|
|
231
|
+
This implementation is shared across backends for utility query execution.
|
|
232
|
+
"""
|
|
233
|
+
import time
|
|
234
|
+
|
|
235
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
236
|
+
start_time = time.perf_counter()
|
|
237
|
+
result = await session.execute(query, params)
|
|
238
|
+
end_time = time.perf_counter()
|
|
239
|
+
elapsed_time = end_time - start_time
|
|
240
|
+
logger.debug(f"Query executed successfully in {elapsed_time:.2f}s.")
|
|
241
|
+
return result
|