basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (195) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +130 -20
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  6. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  7. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  8. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  9. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  10. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  11. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  12. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  13. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  14. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  15. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  16. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  17. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  18. basic_memory/api/app.py +87 -20
  19. basic_memory/api/container.py +133 -0
  20. basic_memory/api/routers/__init__.py +4 -1
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +180 -23
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +9 -64
  26. basic_memory/api/routers/project_router.py +460 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +136 -11
  29. basic_memory/api/routers/search_router.py +5 -5
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +181 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +359 -0
  39. basic_memory/api/v2/routers/prompt_router.py +269 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/app.py +80 -10
  43. basic_memory/cli/auth.py +300 -0
  44. basic_memory/cli/commands/__init__.py +15 -2
  45. basic_memory/cli/commands/cloud/__init__.py +6 -0
  46. basic_memory/cli/commands/cloud/api_client.py +127 -0
  47. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  48. basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
  49. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  50. basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
  51. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  52. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  53. basic_memory/cli/commands/cloud/upload.py +240 -0
  54. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  55. basic_memory/cli/commands/command_utils.py +99 -0
  56. basic_memory/cli/commands/db.py +87 -12
  57. basic_memory/cli/commands/format.py +198 -0
  58. basic_memory/cli/commands/import_chatgpt.py +47 -223
  59. basic_memory/cli/commands/import_claude_conversations.py +48 -171
  60. basic_memory/cli/commands/import_claude_projects.py +53 -160
  61. basic_memory/cli/commands/import_memory_json.py +55 -111
  62. basic_memory/cli/commands/mcp.py +67 -11
  63. basic_memory/cli/commands/project.py +889 -0
  64. basic_memory/cli/commands/status.py +52 -34
  65. basic_memory/cli/commands/telemetry.py +81 -0
  66. basic_memory/cli/commands/tool.py +341 -0
  67. basic_memory/cli/container.py +84 -0
  68. basic_memory/cli/main.py +14 -6
  69. basic_memory/config.py +580 -26
  70. basic_memory/db.py +285 -28
  71. basic_memory/deps/__init__.py +293 -0
  72. basic_memory/deps/config.py +26 -0
  73. basic_memory/deps/db.py +56 -0
  74. basic_memory/deps/importers.py +200 -0
  75. basic_memory/deps/projects.py +238 -0
  76. basic_memory/deps/repositories.py +179 -0
  77. basic_memory/deps/services.py +480 -0
  78. basic_memory/deps.py +16 -185
  79. basic_memory/file_utils.py +318 -54
  80. basic_memory/ignore_utils.py +297 -0
  81. basic_memory/importers/__init__.py +27 -0
  82. basic_memory/importers/base.py +100 -0
  83. basic_memory/importers/chatgpt_importer.py +245 -0
  84. basic_memory/importers/claude_conversations_importer.py +192 -0
  85. basic_memory/importers/claude_projects_importer.py +184 -0
  86. basic_memory/importers/memory_json_importer.py +128 -0
  87. basic_memory/importers/utils.py +61 -0
  88. basic_memory/markdown/entity_parser.py +182 -23
  89. basic_memory/markdown/markdown_processor.py +70 -7
  90. basic_memory/markdown/plugins.py +43 -23
  91. basic_memory/markdown/schemas.py +1 -1
  92. basic_memory/markdown/utils.py +38 -14
  93. basic_memory/mcp/async_client.py +135 -4
  94. basic_memory/mcp/clients/__init__.py +28 -0
  95. basic_memory/mcp/clients/directory.py +70 -0
  96. basic_memory/mcp/clients/knowledge.py +176 -0
  97. basic_memory/mcp/clients/memory.py +120 -0
  98. basic_memory/mcp/clients/project.py +89 -0
  99. basic_memory/mcp/clients/resource.py +71 -0
  100. basic_memory/mcp/clients/search.py +65 -0
  101. basic_memory/mcp/container.py +110 -0
  102. basic_memory/mcp/project_context.py +155 -0
  103. basic_memory/mcp/prompts/__init__.py +19 -0
  104. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  105. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  106. basic_memory/mcp/prompts/recent_activity.py +188 -0
  107. basic_memory/mcp/prompts/search.py +57 -0
  108. basic_memory/mcp/prompts/utils.py +162 -0
  109. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  110. basic_memory/mcp/resources/project_info.py +71 -0
  111. basic_memory/mcp/server.py +61 -9
  112. basic_memory/mcp/tools/__init__.py +33 -21
  113. basic_memory/mcp/tools/build_context.py +120 -0
  114. basic_memory/mcp/tools/canvas.py +152 -0
  115. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  116. basic_memory/mcp/tools/delete_note.py +249 -0
  117. basic_memory/mcp/tools/edit_note.py +325 -0
  118. basic_memory/mcp/tools/list_directory.py +157 -0
  119. basic_memory/mcp/tools/move_note.py +549 -0
  120. basic_memory/mcp/tools/project_management.py +204 -0
  121. basic_memory/mcp/tools/read_content.py +281 -0
  122. basic_memory/mcp/tools/read_note.py +265 -0
  123. basic_memory/mcp/tools/recent_activity.py +528 -0
  124. basic_memory/mcp/tools/search.py +377 -24
  125. basic_memory/mcp/tools/utils.py +402 -16
  126. basic_memory/mcp/tools/view_note.py +78 -0
  127. basic_memory/mcp/tools/write_note.py +230 -0
  128. basic_memory/models/__init__.py +3 -2
  129. basic_memory/models/knowledge.py +82 -17
  130. basic_memory/models/project.py +93 -0
  131. basic_memory/models/search.py +68 -8
  132. basic_memory/project_resolver.py +222 -0
  133. basic_memory/repository/__init__.py +2 -0
  134. basic_memory/repository/entity_repository.py +437 -8
  135. basic_memory/repository/observation_repository.py +36 -3
  136. basic_memory/repository/postgres_search_repository.py +451 -0
  137. basic_memory/repository/project_info_repository.py +10 -0
  138. basic_memory/repository/project_repository.py +140 -0
  139. basic_memory/repository/relation_repository.py +79 -4
  140. basic_memory/repository/repository.py +148 -29
  141. basic_memory/repository/search_index_row.py +95 -0
  142. basic_memory/repository/search_repository.py +79 -268
  143. basic_memory/repository/search_repository_base.py +241 -0
  144. basic_memory/repository/sqlite_search_repository.py +437 -0
  145. basic_memory/runtime.py +61 -0
  146. basic_memory/schemas/__init__.py +22 -9
  147. basic_memory/schemas/base.py +131 -12
  148. basic_memory/schemas/cloud.py +50 -0
  149. basic_memory/schemas/directory.py +31 -0
  150. basic_memory/schemas/importer.py +35 -0
  151. basic_memory/schemas/memory.py +194 -25
  152. basic_memory/schemas/project_info.py +213 -0
  153. basic_memory/schemas/prompt.py +90 -0
  154. basic_memory/schemas/request.py +56 -2
  155. basic_memory/schemas/response.py +85 -28
  156. basic_memory/schemas/search.py +36 -35
  157. basic_memory/schemas/sync_report.py +72 -0
  158. basic_memory/schemas/v2/__init__.py +27 -0
  159. basic_memory/schemas/v2/entity.py +133 -0
  160. basic_memory/schemas/v2/resource.py +47 -0
  161. basic_memory/services/__init__.py +2 -1
  162. basic_memory/services/context_service.py +451 -138
  163. basic_memory/services/directory_service.py +310 -0
  164. basic_memory/services/entity_service.py +636 -71
  165. basic_memory/services/exceptions.py +21 -0
  166. basic_memory/services/file_service.py +402 -33
  167. basic_memory/services/initialization.py +216 -0
  168. basic_memory/services/link_resolver.py +50 -56
  169. basic_memory/services/project_service.py +888 -0
  170. basic_memory/services/search_service.py +232 -37
  171. basic_memory/sync/__init__.py +4 -2
  172. basic_memory/sync/background_sync.py +26 -0
  173. basic_memory/sync/coordinator.py +160 -0
  174. basic_memory/sync/sync_service.py +1200 -109
  175. basic_memory/sync/watch_service.py +432 -135
  176. basic_memory/telemetry.py +249 -0
  177. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  178. basic_memory/templates/prompts/search.hbs +101 -0
  179. basic_memory/utils.py +407 -54
  180. basic_memory-0.17.4.dist-info/METADATA +617 -0
  181. basic_memory-0.17.4.dist-info/RECORD +193 -0
  182. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  183. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
  184. basic_memory/alembic/README +0 -1
  185. basic_memory/cli/commands/sync.py +0 -206
  186. basic_memory/cli/commands/tools.py +0 -157
  187. basic_memory/mcp/tools/knowledge.py +0 -68
  188. basic_memory/mcp/tools/memory.py +0 -170
  189. basic_memory/mcp/tools/notes.py +0 -202
  190. basic_memory/schemas/discovery.py +0 -28
  191. basic_memory/sync/file_change_scanner.py +0 -158
  192. basic_memory/sync/utils.py +0 -31
  193. basic_memory-0.7.0.dist-info/METADATA +0 -378
  194. basic_memory-0.7.0.dist-info/RECORD +0 -82
  195. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,108 +1,35 @@
1
- """Repository for search operations."""
1
+ """Repository for search operations.
2
+
3
+ This module provides the search repository interface.
4
+ The actual repository implementations are backend-specific:
5
+ - SQLiteSearchRepository: Uses FTS5 virtual tables
6
+ - PostgresSearchRepository: Uses tsvector/tsquery with GIN indexes
7
+ """
2
8
 
3
- import json
4
- import time
5
- from dataclasses import dataclass
6
9
  from datetime import datetime
7
- from typing import List, Optional, Any, Dict
10
+ from typing import List, Optional, Protocol
8
11
 
9
- from loguru import logger
10
- from sqlalchemy import text, Executable, Result
12
+ from sqlalchemy import Result
11
13
  from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
12
14
 
13
- from basic_memory import db
14
- from basic_memory.models.search import CREATE_SEARCH_INDEX
15
+ from basic_memory.config import ConfigManager, DatabaseBackend
16
+ from basic_memory.repository.postgres_search_repository import PostgresSearchRepository
17
+ from basic_memory.repository.search_index_row import SearchIndexRow
18
+ from basic_memory.repository.sqlite_search_repository import SQLiteSearchRepository
15
19
  from basic_memory.schemas.search import SearchItemType
16
20
 
17
21
 
18
- @dataclass
19
- class SearchIndexRow:
20
- """Search result with score and metadata."""
21
-
22
- id: int
23
- type: str
24
- permalink: str
25
- file_path: str
26
- metadata: Optional[dict] = None
27
-
28
- # date values
29
- created_at: Optional[datetime] = None
30
- updated_at: Optional[datetime] = None
31
-
32
- # assigned in result
33
- score: Optional[float] = None
34
-
35
- # Type-specific fields
36
- title: Optional[str] = None # entity
37
- content: Optional[str] = None # entity, observation
38
- entity_id: Optional[int] = None # observations
39
- category: Optional[str] = None # observations
40
- from_id: Optional[int] = None # relations
41
- to_id: Optional[int] = None # relations
42
- relation_type: Optional[str] = None # relations
43
-
44
- def to_insert(self):
45
- return {
46
- "id": self.id,
47
- "title": self.title,
48
- "content": self.content,
49
- "permalink": self.permalink,
50
- "file_path": self.file_path,
51
- "type": self.type,
52
- "metadata": json.dumps(self.metadata),
53
- "from_id": self.from_id,
54
- "to_id": self.to_id,
55
- "relation_type": self.relation_type,
56
- "entity_id": self.entity_id,
57
- "category": self.category,
58
- "created_at": self.created_at if self.created_at else None,
59
- "updated_at": self.updated_at if self.updated_at else None,
60
- }
61
-
62
-
63
- class SearchRepository:
64
- """Repository for search index operations."""
65
-
66
- def __init__(self, session_maker: async_sessionmaker[AsyncSession]):
67
- self.session_maker = session_maker
22
+ class SearchRepository(Protocol):
23
+ """Protocol defining the search repository interface.
68
24
 
69
- async def init_search_index(self):
70
- """Create or recreate the search index."""
71
- logger.info("Initializing search index")
72
- try:
73
- async with db.scoped_session(self.session_maker) as session:
74
- await session.execute(CREATE_SEARCH_INDEX)
75
- await session.commit()
76
- except Exception as e: # pragma: no cover
77
- logger.error(f"Error initializing search index: {e}")
78
- raise e
25
+ Both SQLite and Postgres implementations must satisfy this protocol.
26
+ """
79
27
 
80
- def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
81
- """Prepare a search term for FTS5 query.
28
+ project_id: int
82
29
 
83
- Args:
84
- term: The search term to prepare
85
- is_prefix: Whether to add prefix search capability (* suffix)
86
-
87
- For FTS5:
88
- - Special characters and phrases need to be quoted
89
- - Terms with spaces or special chars need quotes
90
- """
91
- if "*" in term:
92
- return term
93
-
94
- # List of special characters that need quoting (excluding *)
95
- special_chars = ["/", "-", ".", " ", "(", ")", "[", "]", '"', "'"]
96
-
97
- # Check if term contains any special characters
98
- needs_quotes = any(c in term for c in special_chars)
99
-
100
- if needs_quotes:
101
- # If the term already contains quotes, escape them and add a wildcard
102
- term = term.replace('"', '""')
103
- term = f'"{term}"*'
104
-
105
- return term
30
+ async def init_search_index(self) -> None:
31
+ """Initialize the search index schema."""
32
+ ...
106
33
 
107
34
  async def search(
108
35
  self,
@@ -110,181 +37,65 @@ class SearchRepository:
110
37
  permalink: Optional[str] = None,
111
38
  permalink_match: Optional[str] = None,
112
39
  title: Optional[str] = None,
113
- types: Optional[List[SearchItemType]] = None,
40
+ types: Optional[List[str]] = None,
114
41
  after_date: Optional[datetime] = None,
115
- entity_types: Optional[List[str]] = None,
42
+ search_item_types: Optional[List[SearchItemType]] = None,
116
43
  limit: int = 10,
117
44
  offset: int = 0,
118
45
  ) -> List[SearchIndexRow]:
119
- """Search across all indexed content with fuzzy matching."""
120
- conditions = []
121
- params = {}
122
- order_by_clause = ""
123
-
124
- # Handle text search for title and content
125
- if search_text:
126
- search_text = self._prepare_search_term(search_text.strip())
127
- params["text"] = search_text
128
- conditions.append("(title MATCH :text OR content MATCH :text)")
129
-
130
- # Handle title match search
131
- if title:
132
- title_text = self._prepare_search_term(title.strip())
133
- params["text"] = title_text
134
- conditions.append("title MATCH :text")
135
-
136
- # Handle permalink exact search
137
- if permalink:
138
- params["permalink"] = permalink
139
- conditions.append("permalink = :permalink")
140
-
141
- # Handle permalink match search, supports *
142
- if permalink_match:
143
- # Clean and prepare permalink for FTS5 GLOB match
144
- permalink_text = self._prepare_search_term(
145
- permalink_match.lower().strip(), is_prefix=False
146
- )
147
- params["permalink"] = permalink_text
148
- if "*" in permalink_match:
149
- conditions.append("permalink GLOB :permalink")
150
- else:
151
- conditions.append("permalink MATCH :permalink")
152
-
153
- # Handle type filter
154
- if types:
155
- type_list = ", ".join(f"'{t.value}'" for t in types)
156
- conditions.append(f"type IN ({type_list})")
157
-
158
- # Handle entity type filter
159
- if entity_types:
160
- entity_type_list = ", ".join(f"'{t}'" for t in entity_types)
161
- conditions.append(f"json_extract(metadata, '$.entity_type') IN ({entity_type_list})")
162
-
163
- # Handle date filter using datetime() for proper comparison
164
- if after_date:
165
- params["after_date"] = after_date
166
- conditions.append("datetime(created_at) > datetime(:after_date)")
167
-
168
- # order by most recent first
169
- order_by_clause = ", updated_at DESC"
170
-
171
- # set limit on search query
172
- params["limit"] = limit
173
- params["offset"] = offset
174
-
175
- # Build WHERE clause
176
- where_clause = " AND ".join(conditions) if conditions else "1=1"
177
-
178
- sql = f"""
179
- SELECT
180
- id,
181
- title,
182
- permalink,
183
- file_path,
184
- type,
185
- metadata,
186
- from_id,
187
- to_id,
188
- relation_type,
189
- entity_id,
190
- content,
191
- category,
192
- created_at,
193
- updated_at,
194
- bm25(search_index) as score
195
- FROM search_index
196
- WHERE {where_clause}
197
- ORDER BY score ASC {order_by_clause}
198
- LIMIT :limit
199
- OFFSET :offset
200
- """
201
-
202
- logger.debug(f"Search {sql} params: {params}")
203
- async with db.scoped_session(self.session_maker) as session:
204
- result = await session.execute(text(sql), params)
205
- rows = result.fetchall()
206
-
207
- results = [
208
- SearchIndexRow(
209
- id=row.id,
210
- title=row.title,
211
- permalink=row.permalink,
212
- file_path=row.file_path,
213
- type=row.type,
214
- score=row.score,
215
- metadata=json.loads(row.metadata),
216
- from_id=row.from_id,
217
- to_id=row.to_id,
218
- relation_type=row.relation_type,
219
- entity_id=row.entity_id,
220
- content=row.content,
221
- category=row.category,
222
- created_at=row.created_at,
223
- updated_at=row.updated_at,
224
- )
225
- for row in rows
226
- ]
227
-
228
- logger.debug(f"Found {len(results)} search results")
229
- for r in results:
230
- logger.debug(
231
- f"Search result: type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
232
- )
233
-
234
- return results
235
-
236
- async def index_item(
237
- self,
238
- search_index_row: SearchIndexRow,
239
- ):
240
- """Index or update a single item."""
241
- async with db.scoped_session(self.session_maker) as session:
242
- # Delete existing record if any
243
- await session.execute(
244
- text("DELETE FROM search_index WHERE permalink = :permalink"),
245
- {"permalink": search_index_row.permalink},
246
- )
247
-
248
- # Insert new record
249
- await session.execute(
250
- text("""
251
- INSERT INTO search_index (
252
- id, title, content, permalink, file_path, type, metadata,
253
- from_id, to_id, relation_type,
254
- entity_id, category,
255
- created_at, updated_at
256
- ) VALUES (
257
- :id, :title, :content, :permalink, :file_path, :type, :metadata,
258
- :from_id, :to_id, :relation_type,
259
- :entity_id, :category,
260
- :created_at, :updated_at
261
- )
262
- """),
263
- search_index_row.to_insert(),
264
- )
265
- logger.debug(f"indexed row {search_index_row}")
266
- await session.commit()
267
-
268
- async def delete_by_permalink(self, permalink: str):
269
- """Delete an item from the search index."""
270
- async with db.scoped_session(self.session_maker) as session:
271
- await session.execute(
272
- text("DELETE FROM search_index WHERE permalink = :permalink"),
273
- {"permalink": permalink},
274
- )
275
- await session.commit()
276
-
277
- async def execute_query(
278
- self,
279
- query: Executable,
280
- params: Dict[str, Any],
281
- ) -> Result[Any]:
282
- """Execute a query asynchronously."""
283
- # logger.debug(f"Executing query: {query}, params: {params}")
284
- async with db.scoped_session(self.session_maker) as session:
285
- start_time = time.perf_counter()
286
- result = await session.execute(query, params)
287
- end_time = time.perf_counter()
288
- elapsed_time = end_time - start_time
289
- logger.debug(f"Query executed successfully in {elapsed_time:.2f}s.")
290
- return result
46
+ """Search across indexed content."""
47
+ ...
48
+
49
+ async def index_item(self, search_index_row: SearchIndexRow) -> None:
50
+ """Index a single item."""
51
+ ...
52
+
53
+ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
54
+ """Index multiple items in a batch."""
55
+ ...
56
+
57
+ async def delete_by_permalink(self, permalink: str) -> None:
58
+ """Delete item by permalink."""
59
+ ...
60
+
61
+ async def delete_by_entity_id(self, entity_id: int) -> None:
62
+ """Delete items by entity ID."""
63
+ ...
64
+
65
+ async def execute_query(self, query, params: dict) -> Result:
66
+ """Execute a raw SQL query."""
67
+ ...
68
+
69
+
70
+ def create_search_repository(
71
+ session_maker: async_sessionmaker[AsyncSession],
72
+ project_id: int,
73
+ database_backend: Optional[DatabaseBackend] = None,
74
+ ) -> SearchRepository:
75
+ """Factory function to create the appropriate search repository based on database backend.
76
+
77
+ Args:
78
+ session_maker: SQLAlchemy async session maker
79
+ project_id: Project ID for the repository
80
+ database_backend: Optional explicit backend. If not provided, reads from ConfigManager.
81
+ Prefer passing explicitly from composition roots.
82
+
83
+ Returns:
84
+ SearchRepository: Backend-appropriate search repository instance
85
+ """
86
+ # Prefer explicit parameter; fall back to ConfigManager for backwards compatibility
87
+ if database_backend is None:
88
+ config = ConfigManager().config
89
+ database_backend = config.database_backend
90
+
91
+ if database_backend == DatabaseBackend.POSTGRES: # pragma: no cover
92
+ return PostgresSearchRepository(session_maker, project_id=project_id) # pragma: no cover
93
+ else:
94
+ return SQLiteSearchRepository(session_maker, project_id=project_id)
95
+
96
+
97
+ __all__ = [
98
+ "SearchRepository",
99
+ "SearchIndexRow",
100
+ "create_search_repository",
101
+ ]
@@ -0,0 +1,241 @@
1
+ """Abstract base class for search repository implementations."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from datetime import datetime
5
+ from typing import Any, Dict, List, Optional
6
+
7
+
8
+ from loguru import logger
9
+ from sqlalchemy import Executable, Result, text
10
+ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
11
+
12
+ from basic_memory import db
13
+ from basic_memory.schemas.search import SearchItemType
14
+ from basic_memory.repository.search_index_row import SearchIndexRow
15
+
16
+
17
+ class SearchRepositoryBase(ABC):
18
+ """Abstract base class for backend-specific search repository implementations.
19
+
20
+ This class defines the common interface that all search repositories must implement,
21
+ regardless of whether they use SQLite FTS5 or Postgres tsvector for full-text search.
22
+
23
+ Concrete implementations:
24
+ - SQLiteSearchRepository: Uses FTS5 virtual tables with MATCH queries
25
+ - PostgresSearchRepository: Uses tsvector/tsquery with GIN indexes
26
+ """
27
+
28
+ def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int):
29
+ """Initialize with session maker and project_id filter.
30
+
31
+ Args:
32
+ session_maker: SQLAlchemy session maker
33
+ project_id: Project ID to filter all operations by
34
+
35
+ Raises:
36
+ ValueError: If project_id is None or invalid
37
+ """
38
+ if project_id is None or project_id <= 0: # pragma: no cover
39
+ raise ValueError("A valid project_id is required for SearchRepository")
40
+
41
+ self.session_maker = session_maker
42
+ self.project_id = project_id
43
+
44
+ @abstractmethod
45
+ async def init_search_index(self) -> None:
46
+ """Create or recreate the search index.
47
+
48
+ Backend-specific implementations:
49
+ - SQLite: CREATE VIRTUAL TABLE using FTS5
50
+ - Postgres: CREATE TABLE with tsvector column and GIN indexes
51
+ """
52
+ pass
53
+
54
+ @abstractmethod
55
+ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
56
+ """Prepare a search term for backend-specific query syntax.
57
+
58
+ Args:
59
+ term: The search term to prepare
60
+ is_prefix: Whether to add prefix search capability
61
+
62
+ Returns:
63
+ Formatted search term for the backend
64
+
65
+ Backend-specific implementations:
66
+ - SQLite: Quotes FTS5 special characters, adds * wildcards
67
+ - Postgres: Converts to tsquery syntax with :* prefix operator
68
+ """
69
+ pass
70
+
71
+ @abstractmethod
72
+ async def search(
73
+ self,
74
+ search_text: Optional[str] = None,
75
+ permalink: Optional[str] = None,
76
+ permalink_match: Optional[str] = None,
77
+ title: Optional[str] = None,
78
+ types: Optional[List[str]] = None,
79
+ after_date: Optional[datetime] = None,
80
+ search_item_types: Optional[List[SearchItemType]] = None,
81
+ limit: int = 10,
82
+ offset: int = 0,
83
+ ) -> List[SearchIndexRow]:
84
+ """Search across all indexed content.
85
+
86
+ Args:
87
+ search_text: Full-text search across title and content
88
+ permalink: Exact permalink match
89
+ permalink_match: Permalink pattern match (supports *)
90
+ title: Title search
91
+ types: Filter by entity types (from metadata.entity_type)
92
+ after_date: Filter by created_at > after_date
93
+ search_item_types: Filter by SearchItemType (ENTITY, OBSERVATION, RELATION)
94
+ limit: Maximum results to return
95
+ offset: Number of results to skip
96
+
97
+ Returns:
98
+ List of SearchIndexRow results with relevance scores
99
+
100
+ Backend-specific implementations:
101
+ - SQLite: Uses MATCH operator and bm25() for scoring
102
+ - Postgres: Uses @@ operator and ts_rank() for scoring
103
+ """
104
+ pass
105
+
106
+ async def index_item(self, search_index_row: SearchIndexRow) -> None:
107
+ """Index or update a single item.
108
+
109
+ This implementation is shared across backends as it uses standard SQL INSERT.
110
+ """
111
+
112
+ async with db.scoped_session(self.session_maker) as session:
113
+ # Delete existing record if any
114
+ await session.execute(
115
+ text(
116
+ "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
117
+ ),
118
+ {"permalink": search_index_row.permalink, "project_id": self.project_id},
119
+ )
120
+
121
+ # When using text() raw SQL, always serialize JSON to string
122
+ # Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
123
+ # The database driver/column type will handle conversion
124
+ insert_data = search_index_row.to_insert(serialize_json=True)
125
+ insert_data["project_id"] = self.project_id
126
+
127
+ # Insert new record
128
+ await session.execute(
129
+ text("""
130
+ INSERT INTO search_index (
131
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
132
+ from_id, to_id, relation_type,
133
+ entity_id, category,
134
+ created_at, updated_at,
135
+ project_id
136
+ ) VALUES (
137
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
138
+ :from_id, :to_id, :relation_type,
139
+ :entity_id, :category,
140
+ :created_at, :updated_at,
141
+ :project_id
142
+ )
143
+ """),
144
+ insert_data,
145
+ )
146
+ logger.debug(f"indexed row {search_index_row}")
147
+ await session.commit()
148
+
149
+ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
150
+ """Index multiple items in a single batch operation.
151
+
152
+ This implementation is shared across backends as it uses standard SQL INSERT.
153
+
154
+ Note: This method assumes that any existing records for the entity_id
155
+ have already been deleted (typically via delete_by_entity_id).
156
+
157
+ Args:
158
+ search_index_rows: List of SearchIndexRow objects to index
159
+ """
160
+
161
+ if not search_index_rows: # pragma: no cover
162
+ return # pragma: no cover
163
+
164
+ async with db.scoped_session(self.session_maker) as session:
165
+ # When using text() raw SQL, always serialize JSON to string
166
+ # Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
167
+ # The database driver/column type will handle conversion
168
+ insert_data_list = []
169
+ for row in search_index_rows:
170
+ insert_data = row.to_insert(serialize_json=True)
171
+ insert_data["project_id"] = self.project_id
172
+ insert_data_list.append(insert_data)
173
+
174
+ # Batch insert all records using executemany
175
+ await session.execute(
176
+ text("""
177
+ INSERT INTO search_index (
178
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
179
+ from_id, to_id, relation_type,
180
+ entity_id, category,
181
+ created_at, updated_at,
182
+ project_id
183
+ ) VALUES (
184
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
185
+ :from_id, :to_id, :relation_type,
186
+ :entity_id, :category,
187
+ :created_at, :updated_at,
188
+ :project_id
189
+ )
190
+ """),
191
+ insert_data_list,
192
+ )
193
+ logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
194
+ await session.commit()
195
+
196
+ async def delete_by_entity_id(self, entity_id: int) -> None:
197
+ """Delete all search index entries for an entity.
198
+
199
+ This implementation is shared across backends as it uses standard SQL DELETE.
200
+ """
201
+ async with db.scoped_session(self.session_maker) as session:
202
+ await session.execute(
203
+ text(
204
+ "DELETE FROM search_index WHERE entity_id = :entity_id AND project_id = :project_id"
205
+ ),
206
+ {"entity_id": entity_id, "project_id": self.project_id},
207
+ )
208
+ await session.commit()
209
+
210
+ async def delete_by_permalink(self, permalink: str) -> None:
211
+ """Delete a search index entry by permalink.
212
+
213
+ This implementation is shared across backends as it uses standard SQL DELETE.
214
+ """
215
+ async with db.scoped_session(self.session_maker) as session:
216
+ await session.execute(
217
+ text(
218
+ "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
219
+ ),
220
+ {"permalink": permalink, "project_id": self.project_id},
221
+ )
222
+ await session.commit()
223
+
224
+ async def execute_query(
225
+ self,
226
+ query: Executable,
227
+ params: Dict[str, Any],
228
+ ) -> Result[Any]:
229
+ """Execute a query asynchronously.
230
+
231
+ This implementation is shared across backends for utility query execution.
232
+ """
233
+ import time
234
+
235
+ async with db.scoped_session(self.session_maker) as session:
236
+ start_time = time.perf_counter()
237
+ result = await session.execute(query, params)
238
+ end_time = time.perf_counter()
239
+ elapsed_time = end_time - start_time
240
+ logger.debug(f"Query executed successfully in {elapsed_time:.2f}s.")
241
+ return result