basic-memory 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (143) hide show
  1. basic_memory/__init__.py +1 -1
  2. basic_memory/alembic/env.py +112 -26
  3. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  4. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +15 -3
  5. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +44 -36
  6. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  7. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  8. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +13 -0
  9. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  10. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  11. basic_memory/api/app.py +45 -24
  12. basic_memory/api/container.py +133 -0
  13. basic_memory/api/routers/knowledge_router.py +17 -5
  14. basic_memory/api/routers/project_router.py +68 -14
  15. basic_memory/api/routers/resource_router.py +37 -27
  16. basic_memory/api/routers/utils.py +53 -14
  17. basic_memory/api/v2/__init__.py +35 -0
  18. basic_memory/api/v2/routers/__init__.py +21 -0
  19. basic_memory/api/v2/routers/directory_router.py +93 -0
  20. basic_memory/api/v2/routers/importer_router.py +181 -0
  21. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  22. basic_memory/api/v2/routers/memory_router.py +130 -0
  23. basic_memory/api/v2/routers/project_router.py +359 -0
  24. basic_memory/api/v2/routers/prompt_router.py +269 -0
  25. basic_memory/api/v2/routers/resource_router.py +286 -0
  26. basic_memory/api/v2/routers/search_router.py +73 -0
  27. basic_memory/cli/app.py +43 -7
  28. basic_memory/cli/auth.py +27 -4
  29. basic_memory/cli/commands/__init__.py +3 -1
  30. basic_memory/cli/commands/cloud/api_client.py +20 -5
  31. basic_memory/cli/commands/cloud/cloud_utils.py +13 -6
  32. basic_memory/cli/commands/cloud/rclone_commands.py +110 -14
  33. basic_memory/cli/commands/cloud/rclone_installer.py +18 -4
  34. basic_memory/cli/commands/cloud/upload.py +10 -3
  35. basic_memory/cli/commands/command_utils.py +52 -4
  36. basic_memory/cli/commands/db.py +78 -19
  37. basic_memory/cli/commands/format.py +198 -0
  38. basic_memory/cli/commands/import_chatgpt.py +12 -8
  39. basic_memory/cli/commands/import_claude_conversations.py +12 -8
  40. basic_memory/cli/commands/import_claude_projects.py +12 -8
  41. basic_memory/cli/commands/import_memory_json.py +12 -8
  42. basic_memory/cli/commands/mcp.py +8 -26
  43. basic_memory/cli/commands/project.py +22 -9
  44. basic_memory/cli/commands/status.py +3 -2
  45. basic_memory/cli/commands/telemetry.py +81 -0
  46. basic_memory/cli/container.py +84 -0
  47. basic_memory/cli/main.py +7 -0
  48. basic_memory/config.py +177 -77
  49. basic_memory/db.py +183 -77
  50. basic_memory/deps/__init__.py +293 -0
  51. basic_memory/deps/config.py +26 -0
  52. basic_memory/deps/db.py +56 -0
  53. basic_memory/deps/importers.py +200 -0
  54. basic_memory/deps/projects.py +238 -0
  55. basic_memory/deps/repositories.py +179 -0
  56. basic_memory/deps/services.py +480 -0
  57. basic_memory/deps.py +14 -409
  58. basic_memory/file_utils.py +212 -3
  59. basic_memory/ignore_utils.py +5 -5
  60. basic_memory/importers/base.py +40 -19
  61. basic_memory/importers/chatgpt_importer.py +17 -4
  62. basic_memory/importers/claude_conversations_importer.py +27 -12
  63. basic_memory/importers/claude_projects_importer.py +50 -14
  64. basic_memory/importers/memory_json_importer.py +36 -16
  65. basic_memory/importers/utils.py +5 -2
  66. basic_memory/markdown/entity_parser.py +62 -23
  67. basic_memory/markdown/markdown_processor.py +67 -4
  68. basic_memory/markdown/plugins.py +4 -2
  69. basic_memory/markdown/utils.py +10 -1
  70. basic_memory/mcp/async_client.py +1 -0
  71. basic_memory/mcp/clients/__init__.py +28 -0
  72. basic_memory/mcp/clients/directory.py +70 -0
  73. basic_memory/mcp/clients/knowledge.py +176 -0
  74. basic_memory/mcp/clients/memory.py +120 -0
  75. basic_memory/mcp/clients/project.py +89 -0
  76. basic_memory/mcp/clients/resource.py +71 -0
  77. basic_memory/mcp/clients/search.py +65 -0
  78. basic_memory/mcp/container.py +110 -0
  79. basic_memory/mcp/project_context.py +47 -33
  80. basic_memory/mcp/prompts/ai_assistant_guide.py +2 -2
  81. basic_memory/mcp/prompts/recent_activity.py +2 -2
  82. basic_memory/mcp/prompts/utils.py +3 -3
  83. basic_memory/mcp/server.py +58 -0
  84. basic_memory/mcp/tools/build_context.py +14 -14
  85. basic_memory/mcp/tools/canvas.py +34 -12
  86. basic_memory/mcp/tools/chatgpt_tools.py +4 -1
  87. basic_memory/mcp/tools/delete_note.py +31 -7
  88. basic_memory/mcp/tools/edit_note.py +14 -9
  89. basic_memory/mcp/tools/list_directory.py +7 -17
  90. basic_memory/mcp/tools/move_note.py +35 -31
  91. basic_memory/mcp/tools/project_management.py +29 -25
  92. basic_memory/mcp/tools/read_content.py +13 -3
  93. basic_memory/mcp/tools/read_note.py +24 -14
  94. basic_memory/mcp/tools/recent_activity.py +32 -38
  95. basic_memory/mcp/tools/search.py +17 -10
  96. basic_memory/mcp/tools/utils.py +28 -0
  97. basic_memory/mcp/tools/view_note.py +2 -1
  98. basic_memory/mcp/tools/write_note.py +37 -14
  99. basic_memory/models/knowledge.py +15 -2
  100. basic_memory/models/project.py +7 -1
  101. basic_memory/models/search.py +58 -2
  102. basic_memory/project_resolver.py +222 -0
  103. basic_memory/repository/entity_repository.py +210 -3
  104. basic_memory/repository/observation_repository.py +1 -0
  105. basic_memory/repository/postgres_search_repository.py +451 -0
  106. basic_memory/repository/project_repository.py +38 -1
  107. basic_memory/repository/relation_repository.py +58 -2
  108. basic_memory/repository/repository.py +1 -0
  109. basic_memory/repository/search_index_row.py +95 -0
  110. basic_memory/repository/search_repository.py +77 -615
  111. basic_memory/repository/search_repository_base.py +241 -0
  112. basic_memory/repository/sqlite_search_repository.py +437 -0
  113. basic_memory/runtime.py +61 -0
  114. basic_memory/schemas/base.py +36 -6
  115. basic_memory/schemas/directory.py +2 -1
  116. basic_memory/schemas/memory.py +9 -2
  117. basic_memory/schemas/project_info.py +2 -0
  118. basic_memory/schemas/response.py +84 -27
  119. basic_memory/schemas/search.py +5 -0
  120. basic_memory/schemas/sync_report.py +1 -1
  121. basic_memory/schemas/v2/__init__.py +27 -0
  122. basic_memory/schemas/v2/entity.py +133 -0
  123. basic_memory/schemas/v2/resource.py +47 -0
  124. basic_memory/services/context_service.py +219 -43
  125. basic_memory/services/directory_service.py +26 -11
  126. basic_memory/services/entity_service.py +68 -33
  127. basic_memory/services/file_service.py +131 -16
  128. basic_memory/services/initialization.py +51 -26
  129. basic_memory/services/link_resolver.py +1 -0
  130. basic_memory/services/project_service.py +68 -43
  131. basic_memory/services/search_service.py +75 -16
  132. basic_memory/sync/__init__.py +2 -1
  133. basic_memory/sync/coordinator.py +160 -0
  134. basic_memory/sync/sync_service.py +135 -115
  135. basic_memory/sync/watch_service.py +32 -12
  136. basic_memory/telemetry.py +249 -0
  137. basic_memory/utils.py +96 -75
  138. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/METADATA +129 -5
  139. basic_memory-0.17.4.dist-info/RECORD +193 -0
  140. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  141. basic_memory-0.16.1.dist-info/RECORD +0 -148
  142. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +0 -0
  143. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,365 +1,35 @@
1
- """Repository for search operations."""
1
+ """Repository for search operations.
2
+
3
+ This module provides the search repository interface.
4
+ The actual repository implementations are backend-specific:
5
+ - SQLiteSearchRepository: Uses FTS5 virtual tables
6
+ - PostgresSearchRepository: Uses tsvector/tsquery with GIN indexes
7
+ """
2
8
 
3
- import json
4
- import re
5
- import time
6
- from dataclasses import dataclass
7
9
  from datetime import datetime
8
- from typing import Any, Dict, List, Optional
9
- from pathlib import Path
10
+ from typing import List, Optional, Protocol
10
11
 
11
- from loguru import logger
12
- from sqlalchemy import Executable, Result, text
12
+ from sqlalchemy import Result
13
13
  from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
14
14
 
15
- from basic_memory import db
16
- from basic_memory.models.search import CREATE_SEARCH_INDEX
15
+ from basic_memory.config import ConfigManager, DatabaseBackend
16
+ from basic_memory.repository.postgres_search_repository import PostgresSearchRepository
17
+ from basic_memory.repository.search_index_row import SearchIndexRow
18
+ from basic_memory.repository.sqlite_search_repository import SQLiteSearchRepository
17
19
  from basic_memory.schemas.search import SearchItemType
18
20
 
19
21
 
20
- @dataclass
21
- class SearchIndexRow:
22
- """Search result with score and metadata."""
23
-
24
- project_id: int
25
- id: int
26
- type: str
27
- file_path: str
28
-
29
- # date values
30
- created_at: datetime
31
- updated_at: datetime
32
-
33
- permalink: Optional[str] = None
34
- metadata: Optional[dict] = None
35
-
36
- # assigned in result
37
- score: Optional[float] = None
38
-
39
- # Type-specific fields
40
- title: Optional[str] = None # entity
41
- content_stems: Optional[str] = None # entity, observation
42
- content_snippet: Optional[str] = None # entity, observation
43
- entity_id: Optional[int] = None # observations
44
- category: Optional[str] = None # observations
45
- from_id: Optional[int] = None # relations
46
- to_id: Optional[int] = None # relations
47
- relation_type: Optional[str] = None # relations
48
-
49
- @property
50
- def content(self):
51
- return self.content_snippet
52
-
53
- @property
54
- def directory(self) -> str:
55
- """Extract directory part from file_path.
56
-
57
- For a file at "projects/notes/ideas.md", returns "/projects/notes"
58
- For a file at root level "README.md", returns "/"
59
- """
60
- if not self.type == SearchItemType.ENTITY.value and not self.file_path:
61
- return ""
62
-
63
- # Normalize path separators to handle both Windows (\) and Unix (/) paths
64
- normalized_path = Path(self.file_path).as_posix()
65
-
66
- # Split the path by slashes
67
- parts = normalized_path.split("/")
68
-
69
- # If there's only one part (e.g., "README.md"), it's at the root
70
- if len(parts) <= 1:
71
- return "/"
72
-
73
- # Join all parts except the last one (filename)
74
- directory_path = "/".join(parts[:-1])
75
- return f"/{directory_path}"
76
-
77
- def to_insert(self):
78
- return {
79
- "id": self.id,
80
- "title": self.title,
81
- "content_stems": self.content_stems,
82
- "content_snippet": self.content_snippet,
83
- "permalink": self.permalink,
84
- "file_path": self.file_path,
85
- "type": self.type,
86
- "metadata": json.dumps(self.metadata),
87
- "from_id": self.from_id,
88
- "to_id": self.to_id,
89
- "relation_type": self.relation_type,
90
- "entity_id": self.entity_id,
91
- "category": self.category,
92
- "created_at": self.created_at if self.created_at else None,
93
- "updated_at": self.updated_at if self.updated_at else None,
94
- "project_id": self.project_id,
95
- }
96
-
97
-
98
- class SearchRepository:
99
- """Repository for search index operations."""
100
-
101
- def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int):
102
- """Initialize with session maker and project_id filter.
103
-
104
- Args:
105
- session_maker: SQLAlchemy session maker
106
- project_id: Project ID to filter all operations by
107
-
108
- Raises:
109
- ValueError: If project_id is None or invalid
110
- """
111
- if project_id is None or project_id <= 0: # pragma: no cover
112
- raise ValueError("A valid project_id is required for SearchRepository")
113
-
114
- self.session_maker = session_maker
115
- self.project_id = project_id
116
-
117
- async def init_search_index(self):
118
- """Create or recreate the search index."""
119
- logger.info("Initializing search index")
120
- try:
121
- async with db.scoped_session(self.session_maker) as session:
122
- await session.execute(CREATE_SEARCH_INDEX)
123
- await session.commit()
124
- except Exception as e: # pragma: no cover
125
- logger.error(f"Error initializing search index: {e}")
126
- raise e
127
-
128
- def _prepare_boolean_query(self, query: str) -> str:
129
- """Prepare a Boolean query by quoting individual terms while preserving operators.
130
-
131
- Args:
132
- query: A Boolean query like "tier1-test AND unicode" or "(hello OR world) NOT test"
133
-
134
- Returns:
135
- A properly formatted Boolean query with quoted terms that need quoting
136
- """
137
- # Define Boolean operators and their boundaries
138
- boolean_pattern = r"(\bAND\b|\bOR\b|\bNOT\b)"
139
-
140
- # Split the query by Boolean operators, keeping the operators
141
- parts = re.split(boolean_pattern, query)
142
-
143
- processed_parts = []
144
- for part in parts:
145
- part = part.strip()
146
- if not part:
147
- continue
148
-
149
- # If it's a Boolean operator, keep it as is
150
- if part in ["AND", "OR", "NOT"]:
151
- processed_parts.append(part)
152
- else:
153
- # Handle parentheses specially - they should be preserved for grouping
154
- if "(" in part or ")" in part:
155
- # Parse parenthetical expressions carefully
156
- processed_part = self._prepare_parenthetical_term(part)
157
- processed_parts.append(processed_part)
158
- else:
159
- # This is a search term - for Boolean queries, don't add prefix wildcards
160
- prepared_term = self._prepare_single_term(part, is_prefix=False)
161
- processed_parts.append(prepared_term)
162
-
163
- return " ".join(processed_parts)
164
-
165
- def _prepare_parenthetical_term(self, term: str) -> str:
166
- """Prepare a term that contains parentheses, preserving the parentheses for grouping.
167
-
168
- Args:
169
- term: A term that may contain parentheses like "(hello" or "world)" or "(hello OR world)"
170
-
171
- Returns:
172
- A properly formatted term with parentheses preserved
173
- """
174
- # Handle terms that start/end with parentheses but may contain quotable content
175
- result = ""
176
- i = 0
177
- while i < len(term):
178
- if term[i] in "()":
179
- # Preserve parentheses as-is
180
- result += term[i]
181
- i += 1
182
- else:
183
- # Find the next parenthesis or end of string
184
- start = i
185
- while i < len(term) and term[i] not in "()":
186
- i += 1
187
-
188
- # Extract the content between parentheses
189
- content = term[start:i].strip()
190
- if content:
191
- # Only quote if it actually needs quoting (has hyphens, special chars, etc)
192
- # but don't quote if it's just simple words
193
- if self._needs_quoting(content):
194
- escaped_content = content.replace('"', '""')
195
- result += f'"{escaped_content}"'
196
- else:
197
- result += content
198
-
199
- return result
200
-
201
- def _needs_quoting(self, term: str) -> bool:
202
- """Check if a term needs to be quoted for FTS5 safety.
203
-
204
- Args:
205
- term: The term to check
206
-
207
- Returns:
208
- True if the term should be quoted
209
- """
210
- if not term or not term.strip():
211
- return False
212
-
213
- # Characters that indicate we should quote (excluding parentheses which are valid syntax)
214
- needs_quoting_chars = [
215
- " ",
216
- ".",
217
- ":",
218
- ";",
219
- ",",
220
- "<",
221
- ">",
222
- "?",
223
- "/",
224
- "-",
225
- "'",
226
- '"',
227
- "[",
228
- "]",
229
- "{",
230
- "}",
231
- "+",
232
- "!",
233
- "@",
234
- "#",
235
- "$",
236
- "%",
237
- "^",
238
- "&",
239
- "=",
240
- "|",
241
- "\\",
242
- "~",
243
- "`",
244
- ]
245
-
246
- return any(c in term for c in needs_quoting_chars)
247
-
248
- def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
249
- """Prepare a single search term (no Boolean operators).
250
-
251
- Args:
252
- term: A single search term
253
- is_prefix: Whether to add prefix search capability (* suffix)
22
+ class SearchRepository(Protocol):
23
+ """Protocol defining the search repository interface.
254
24
 
255
- Returns:
256
- A properly formatted single term
257
- """
258
- if not term or not term.strip():
259
- return term
25
+ Both SQLite and Postgres implementations must satisfy this protocol.
26
+ """
260
27
 
261
- term = term.strip()
262
-
263
- # Check if term is already a proper wildcard pattern (alphanumeric + *)
264
- # e.g., "hello*", "test*world" - these should be left alone
265
- if "*" in term and all(c.isalnum() or c in "*_-" for c in term):
266
- return term
267
-
268
- # Characters that can cause FTS5 syntax errors when used as operators
269
- # We're more conservative here - only quote when we detect problematic patterns
270
- problematic_chars = [
271
- '"',
272
- "'",
273
- "(",
274
- ")",
275
- "[",
276
- "]",
277
- "{",
278
- "}",
279
- "+",
280
- "!",
281
- "@",
282
- "#",
283
- "$",
284
- "%",
285
- "^",
286
- "&",
287
- "=",
288
- "|",
289
- "\\",
290
- "~",
291
- "`",
292
- ]
293
-
294
- # Characters that indicate we should quote (spaces, dots, colons, etc.)
295
- # Adding hyphens here because FTS5 can have issues with hyphens followed by wildcards
296
- needs_quoting_chars = [" ", ".", ":", ";", ",", "<", ">", "?", "/", "-"]
297
-
298
- # Check if term needs quoting
299
- has_problematic = any(c in term for c in problematic_chars)
300
- has_spaces_or_special = any(c in term for c in needs_quoting_chars)
301
-
302
- if has_problematic or has_spaces_or_special:
303
- # Handle multi-word queries differently from special character queries
304
- if " " in term and not any(c in term for c in problematic_chars):
305
- # Check if any individual word contains special characters that need quoting
306
- words = term.strip().split()
307
- has_special_in_words = any(
308
- any(c in word for c in needs_quoting_chars if c != " ") for word in words
309
- )
310
-
311
- if not has_special_in_words:
312
- # For multi-word queries with simple words (like "emoji unicode"),
313
- # use boolean AND to handle word order variations
314
- if is_prefix:
315
- # Add prefix wildcard to each word for better matching
316
- prepared_words = [f"{word}*" for word in words if word]
317
- else:
318
- prepared_words = words
319
- term = " AND ".join(prepared_words)
320
- else:
321
- # If any word has special characters, quote the entire phrase
322
- escaped_term = term.replace('"', '""')
323
- if is_prefix and not ("/" in term and term.endswith(".md")):
324
- term = f'"{escaped_term}"*'
325
- else:
326
- term = f'"{escaped_term}"'
327
- else:
328
- # For terms with problematic characters or file paths, use exact phrase matching
329
- # Escape any existing quotes by doubling them
330
- escaped_term = term.replace('"', '""')
331
- # Quote the entire term to handle special characters safely
332
- if is_prefix and not ("/" in term and term.endswith(".md")):
333
- # For search terms (not file paths), add prefix matching
334
- term = f'"{escaped_term}"*'
335
- else:
336
- # For file paths, use exact matching
337
- term = f'"{escaped_term}"'
338
- elif is_prefix:
339
- # Only add wildcard for simple terms without special characters
340
- term = f"{term}*"
341
-
342
- return term
343
-
344
- def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
345
- """Prepare a search term for FTS5 query.
346
-
347
- Args:
348
- term: The search term to prepare
349
- is_prefix: Whether to add prefix search capability (* suffix)
350
-
351
- For FTS5:
352
- - Boolean operators (AND, OR, NOT) are preserved for complex queries
353
- - Terms with FTS5 special characters are quoted to prevent syntax errors
354
- - Simple terms get prefix wildcards for better matching
355
- """
356
- # Check for explicit boolean operators - if present, process as Boolean query
357
- boolean_operators = [" AND ", " OR ", " NOT "]
358
- if any(op in f" {term} " for op in boolean_operators):
359
- return self._prepare_boolean_query(term)
28
+ project_id: int
360
29
 
361
- # For non-Boolean queries, use the single term preparation logic
362
- return self._prepare_single_term(term, is_prefix)
30
+ async def init_search_index(self) -> None:
31
+ """Initialize the search index schema."""
32
+ ...
363
33
 
364
34
  async def search(
365
35
  self,
@@ -373,267 +43,59 @@ class SearchRepository:
373
43
  limit: int = 10,
374
44
  offset: int = 0,
375
45
  ) -> List[SearchIndexRow]:
376
- """Search across all indexed content with fuzzy matching."""
377
- conditions = []
378
- params = {}
379
- order_by_clause = ""
380
-
381
- # Handle text search for title and content
382
- if search_text:
383
- # Skip FTS for wildcard-only queries that would cause "unknown special query" errors
384
- if search_text.strip() == "*" or search_text.strip() == "":
385
- # For wildcard searches, don't add any text conditions - return all results
386
- pass
387
- else:
388
- # Use _prepare_search_term to handle both Boolean and non-Boolean queries
389
- processed_text = self._prepare_search_term(search_text.strip())
390
- params["text"] = processed_text
391
- conditions.append("(title MATCH :text OR content_stems MATCH :text)")
392
-
393
- # Handle title match search
394
- if title:
395
- title_text = self._prepare_search_term(title.strip(), is_prefix=False)
396
- params["title_text"] = title_text
397
- conditions.append("title MATCH :title_text")
398
-
399
- # Handle permalink exact search
400
- if permalink:
401
- params["permalink"] = permalink
402
- conditions.append("permalink = :permalink")
403
-
404
- # Handle permalink match search, supports *
405
- if permalink_match:
406
- # For GLOB patterns, don't use _prepare_search_term as it will quote slashes
407
- # GLOB patterns need to preserve their syntax
408
- permalink_text = permalink_match.lower().strip()
409
- params["permalink"] = permalink_text
410
- if "*" in permalink_match:
411
- conditions.append("permalink GLOB :permalink")
412
- else:
413
- # For exact matches without *, we can use FTS5 MATCH
414
- # but only prepare the term if it doesn't look like a path
415
- if "/" in permalink_text:
416
- conditions.append("permalink = :permalink")
417
- else:
418
- permalink_text = self._prepare_search_term(permalink_text, is_prefix=False)
419
- params["permalink"] = permalink_text
420
- conditions.append("permalink MATCH :permalink")
421
-
422
- # Handle entity type filter
423
- if search_item_types:
424
- type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
425
- conditions.append(f"type IN ({type_list})")
426
-
427
- # Handle type filter
428
- if types:
429
- type_list = ", ".join(f"'{t}'" for t in types)
430
- conditions.append(f"json_extract(metadata, '$.entity_type') IN ({type_list})")
431
-
432
- # Handle date filter using datetime() for proper comparison
433
- if after_date:
434
- params["after_date"] = after_date
435
- conditions.append("datetime(created_at) > datetime(:after_date)")
436
-
437
- # order by most recent first
438
- order_by_clause = ", updated_at DESC"
439
-
440
- # Always filter by project_id
441
- params["project_id"] = self.project_id
442
- conditions.append("project_id = :project_id")
443
-
444
- # set limit on search query
445
- params["limit"] = limit
446
- params["offset"] = offset
447
-
448
- # Build WHERE clause
449
- where_clause = " AND ".join(conditions) if conditions else "1=1"
450
-
451
- sql = f"""
452
- SELECT
453
- project_id,
454
- id,
455
- title,
456
- permalink,
457
- file_path,
458
- type,
459
- metadata,
460
- from_id,
461
- to_id,
462
- relation_type,
463
- entity_id,
464
- content_snippet,
465
- category,
466
- created_at,
467
- updated_at,
468
- bm25(search_index) as score
469
- FROM search_index
470
- WHERE {where_clause}
471
- ORDER BY score ASC {order_by_clause}
472
- LIMIT :limit
473
- OFFSET :offset
474
- """
475
-
476
- logger.trace(f"Search {sql} params: {params}")
477
- try:
478
- async with db.scoped_session(self.session_maker) as session:
479
- result = await session.execute(text(sql), params)
480
- rows = result.fetchall()
481
- except Exception as e:
482
- # Handle FTS5 syntax errors and provide user-friendly feedback
483
- if "fts5: syntax error" in str(e).lower(): # pragma: no cover
484
- logger.warning(f"FTS5 syntax error for search term: {search_text}, error: {e}")
485
- # Return empty results rather than crashing
486
- return []
487
- else:
488
- # Re-raise other database errors
489
- logger.error(f"Database error during search: {e}")
490
- raise
491
-
492
- results = [
493
- SearchIndexRow(
494
- project_id=self.project_id,
495
- id=row.id,
496
- title=row.title,
497
- permalink=row.permalink,
498
- file_path=row.file_path,
499
- type=row.type,
500
- score=row.score,
501
- metadata=json.loads(row.metadata),
502
- from_id=row.from_id,
503
- to_id=row.to_id,
504
- relation_type=row.relation_type,
505
- entity_id=row.entity_id,
506
- content_snippet=row.content_snippet,
507
- category=row.category,
508
- created_at=row.created_at,
509
- updated_at=row.updated_at,
510
- )
511
- for row in rows
512
- ]
513
-
514
- logger.trace(f"Found {len(results)} search results")
515
- for r in results:
516
- logger.trace(
517
- f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
518
- )
519
-
520
- return results
521
-
522
- async def index_item(
523
- self,
524
- search_index_row: SearchIndexRow,
525
- ):
526
- """Index or update a single item."""
527
- async with db.scoped_session(self.session_maker) as session:
528
- # Delete existing record if any
529
- await session.execute(
530
- text(
531
- "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
532
- ),
533
- {"permalink": search_index_row.permalink, "project_id": self.project_id},
534
- )
535
-
536
- # Prepare data for insert with project_id
537
- insert_data = search_index_row.to_insert()
538
- insert_data["project_id"] = self.project_id
539
-
540
- # Insert new record
541
- await session.execute(
542
- text("""
543
- INSERT INTO search_index (
544
- id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
545
- from_id, to_id, relation_type,
546
- entity_id, category,
547
- created_at, updated_at,
548
- project_id
549
- ) VALUES (
550
- :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
551
- :from_id, :to_id, :relation_type,
552
- :entity_id, :category,
553
- :created_at, :updated_at,
554
- :project_id
555
- )
556
- """),
557
- insert_data,
558
- )
559
- logger.debug(f"indexed row {search_index_row}")
560
- await session.commit()
561
-
562
- async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]):
563
- """Index multiple items in a single batch operation.
564
-
565
- Note: This method assumes that any existing records for the entity_id
566
- have already been deleted (typically via delete_by_entity_id).
567
-
568
- Args:
569
- search_index_rows: List of SearchIndexRow objects to index
570
- """
571
- if not search_index_rows:
572
- return
573
-
574
- async with db.scoped_session(self.session_maker) as session:
575
- # Prepare all insert data with project_id
576
- insert_data_list = []
577
- for row in search_index_rows:
578
- insert_data = row.to_insert()
579
- insert_data["project_id"] = self.project_id
580
- insert_data_list.append(insert_data)
581
-
582
- # Batch insert all records using executemany
583
- await session.execute(
584
- text("""
585
- INSERT INTO search_index (
586
- id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
587
- from_id, to_id, relation_type,
588
- entity_id, category,
589
- created_at, updated_at,
590
- project_id
591
- ) VALUES (
592
- :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
593
- :from_id, :to_id, :relation_type,
594
- :entity_id, :category,
595
- :created_at, :updated_at,
596
- :project_id
597
- )
598
- """),
599
- insert_data_list,
600
- )
601
- logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
602
- await session.commit()
603
-
604
- async def delete_by_entity_id(self, entity_id: int):
605
- """Delete an item from the search index by entity_id."""
606
- async with db.scoped_session(self.session_maker) as session:
607
- await session.execute(
608
- text(
609
- "DELETE FROM search_index WHERE entity_id = :entity_id AND project_id = :project_id"
610
- ),
611
- {"entity_id": entity_id, "project_id": self.project_id},
612
- )
613
- await session.commit()
614
-
615
- async def delete_by_permalink(self, permalink: str):
616
- """Delete an item from the search index."""
617
- async with db.scoped_session(self.session_maker) as session:
618
- await session.execute(
619
- text(
620
- "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
621
- ),
622
- {"permalink": permalink, "project_id": self.project_id},
623
- )
624
- await session.commit()
625
-
626
- async def execute_query(
627
- self,
628
- query: Executable,
629
- params: Dict[str, Any],
630
- ) -> Result[Any]:
631
- """Execute a query asynchronously."""
632
- # logger.debug(f"Executing query: {query}, params: {params}")
633
- async with db.scoped_session(self.session_maker) as session:
634
- start_time = time.perf_counter()
635
- result = await session.execute(query, params)
636
- end_time = time.perf_counter()
637
- elapsed_time = end_time - start_time
638
- logger.debug(f"Query executed successfully in {elapsed_time:.2f}s.")
639
- return result
46
+ """Search across indexed content."""
47
+ ...
48
+
49
+ async def index_item(self, search_index_row: SearchIndexRow) -> None:
50
+ """Index a single item."""
51
+ ...
52
+
53
+ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
54
+ """Index multiple items in a batch."""
55
+ ...
56
+
57
+ async def delete_by_permalink(self, permalink: str) -> None:
58
+ """Delete item by permalink."""
59
+ ...
60
+
61
+ async def delete_by_entity_id(self, entity_id: int) -> None:
62
+ """Delete items by entity ID."""
63
+ ...
64
+
65
+ async def execute_query(self, query, params: dict) -> Result:
66
+ """Execute a raw SQL query."""
67
+ ...
68
+
69
+
70
+ def create_search_repository(
71
+ session_maker: async_sessionmaker[AsyncSession],
72
+ project_id: int,
73
+ database_backend: Optional[DatabaseBackend] = None,
74
+ ) -> SearchRepository:
75
+ """Factory function to create the appropriate search repository based on database backend.
76
+
77
+ Args:
78
+ session_maker: SQLAlchemy async session maker
79
+ project_id: Project ID for the repository
80
+ database_backend: Optional explicit backend. If not provided, reads from ConfigManager.
81
+ Prefer passing explicitly from composition roots.
82
+
83
+ Returns:
84
+ SearchRepository: Backend-appropriate search repository instance
85
+ """
86
+ # Prefer explicit parameter; fall back to ConfigManager for backwards compatibility
87
+ if database_backend is None:
88
+ config = ConfigManager().config
89
+ database_backend = config.database_backend
90
+
91
+ if database_backend == DatabaseBackend.POSTGRES: # pragma: no cover
92
+ return PostgresSearchRepository(session_maker, project_id=project_id) # pragma: no cover
93
+ else:
94
+ return SQLiteSearchRepository(session_maker, project_id=project_id)
95
+
96
+
97
+ __all__ = [
98
+ "SearchRepository",
99
+ "SearchIndexRow",
100
+ "create_search_repository",
101
+ ]