basic-memory 0.7.0__py3-none-any.whl → 0.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (150) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +27 -3
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  6. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +108 -0
  7. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +104 -0
  8. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  9. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  10. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  11. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +100 -0
  12. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  13. basic_memory/api/app.py +64 -18
  14. basic_memory/api/routers/__init__.py +4 -1
  15. basic_memory/api/routers/directory_router.py +84 -0
  16. basic_memory/api/routers/importer_router.py +152 -0
  17. basic_memory/api/routers/knowledge_router.py +166 -21
  18. basic_memory/api/routers/management_router.py +80 -0
  19. basic_memory/api/routers/memory_router.py +9 -64
  20. basic_memory/api/routers/project_router.py +406 -0
  21. basic_memory/api/routers/prompt_router.py +260 -0
  22. basic_memory/api/routers/resource_router.py +119 -4
  23. basic_memory/api/routers/search_router.py +5 -5
  24. basic_memory/api/routers/utils.py +130 -0
  25. basic_memory/api/template_loader.py +292 -0
  26. basic_memory/cli/app.py +43 -9
  27. basic_memory/cli/auth.py +277 -0
  28. basic_memory/cli/commands/__init__.py +13 -2
  29. basic_memory/cli/commands/cloud/__init__.py +6 -0
  30. basic_memory/cli/commands/cloud/api_client.py +112 -0
  31. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  32. basic_memory/cli/commands/cloud/cloud_utils.py +101 -0
  33. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  34. basic_memory/cli/commands/cloud/rclone_commands.py +301 -0
  35. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  36. basic_memory/cli/commands/cloud/rclone_installer.py +249 -0
  37. basic_memory/cli/commands/cloud/upload.py +233 -0
  38. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  39. basic_memory/cli/commands/command_utils.py +51 -0
  40. basic_memory/cli/commands/db.py +28 -12
  41. basic_memory/cli/commands/import_chatgpt.py +40 -220
  42. basic_memory/cli/commands/import_claude_conversations.py +41 -168
  43. basic_memory/cli/commands/import_claude_projects.py +46 -157
  44. basic_memory/cli/commands/import_memory_json.py +48 -108
  45. basic_memory/cli/commands/mcp.py +84 -10
  46. basic_memory/cli/commands/project.py +876 -0
  47. basic_memory/cli/commands/status.py +50 -33
  48. basic_memory/cli/commands/tool.py +341 -0
  49. basic_memory/cli/main.py +8 -7
  50. basic_memory/config.py +477 -23
  51. basic_memory/db.py +168 -17
  52. basic_memory/deps.py +251 -25
  53. basic_memory/file_utils.py +113 -58
  54. basic_memory/ignore_utils.py +297 -0
  55. basic_memory/importers/__init__.py +27 -0
  56. basic_memory/importers/base.py +79 -0
  57. basic_memory/importers/chatgpt_importer.py +232 -0
  58. basic_memory/importers/claude_conversations_importer.py +177 -0
  59. basic_memory/importers/claude_projects_importer.py +148 -0
  60. basic_memory/importers/memory_json_importer.py +108 -0
  61. basic_memory/importers/utils.py +58 -0
  62. basic_memory/markdown/entity_parser.py +143 -23
  63. basic_memory/markdown/markdown_processor.py +3 -3
  64. basic_memory/markdown/plugins.py +39 -21
  65. basic_memory/markdown/schemas.py +1 -1
  66. basic_memory/markdown/utils.py +28 -13
  67. basic_memory/mcp/async_client.py +134 -4
  68. basic_memory/mcp/project_context.py +141 -0
  69. basic_memory/mcp/prompts/__init__.py +19 -0
  70. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  71. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  72. basic_memory/mcp/prompts/recent_activity.py +188 -0
  73. basic_memory/mcp/prompts/search.py +57 -0
  74. basic_memory/mcp/prompts/utils.py +162 -0
  75. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  76. basic_memory/mcp/resources/project_info.py +71 -0
  77. basic_memory/mcp/server.py +7 -13
  78. basic_memory/mcp/tools/__init__.py +33 -21
  79. basic_memory/mcp/tools/build_context.py +120 -0
  80. basic_memory/mcp/tools/canvas.py +130 -0
  81. basic_memory/mcp/tools/chatgpt_tools.py +187 -0
  82. basic_memory/mcp/tools/delete_note.py +225 -0
  83. basic_memory/mcp/tools/edit_note.py +320 -0
  84. basic_memory/mcp/tools/list_directory.py +167 -0
  85. basic_memory/mcp/tools/move_note.py +545 -0
  86. basic_memory/mcp/tools/project_management.py +200 -0
  87. basic_memory/mcp/tools/read_content.py +271 -0
  88. basic_memory/mcp/tools/read_note.py +255 -0
  89. basic_memory/mcp/tools/recent_activity.py +534 -0
  90. basic_memory/mcp/tools/search.py +369 -23
  91. basic_memory/mcp/tools/utils.py +374 -16
  92. basic_memory/mcp/tools/view_note.py +77 -0
  93. basic_memory/mcp/tools/write_note.py +207 -0
  94. basic_memory/models/__init__.py +3 -2
  95. basic_memory/models/knowledge.py +67 -15
  96. basic_memory/models/project.py +87 -0
  97. basic_memory/models/search.py +10 -6
  98. basic_memory/repository/__init__.py +2 -0
  99. basic_memory/repository/entity_repository.py +229 -7
  100. basic_memory/repository/observation_repository.py +35 -3
  101. basic_memory/repository/project_info_repository.py +10 -0
  102. basic_memory/repository/project_repository.py +103 -0
  103. basic_memory/repository/relation_repository.py +21 -2
  104. basic_memory/repository/repository.py +147 -29
  105. basic_memory/repository/search_repository.py +411 -62
  106. basic_memory/schemas/__init__.py +22 -9
  107. basic_memory/schemas/base.py +97 -8
  108. basic_memory/schemas/cloud.py +50 -0
  109. basic_memory/schemas/directory.py +30 -0
  110. basic_memory/schemas/importer.py +35 -0
  111. basic_memory/schemas/memory.py +187 -25
  112. basic_memory/schemas/project_info.py +211 -0
  113. basic_memory/schemas/prompt.py +90 -0
  114. basic_memory/schemas/request.py +56 -2
  115. basic_memory/schemas/response.py +1 -1
  116. basic_memory/schemas/search.py +31 -35
  117. basic_memory/schemas/sync_report.py +72 -0
  118. basic_memory/services/__init__.py +2 -1
  119. basic_memory/services/context_service.py +241 -104
  120. basic_memory/services/directory_service.py +295 -0
  121. basic_memory/services/entity_service.py +590 -60
  122. basic_memory/services/exceptions.py +21 -0
  123. basic_memory/services/file_service.py +284 -30
  124. basic_memory/services/initialization.py +191 -0
  125. basic_memory/services/link_resolver.py +49 -56
  126. basic_memory/services/project_service.py +863 -0
  127. basic_memory/services/search_service.py +168 -32
  128. basic_memory/sync/__init__.py +3 -2
  129. basic_memory/sync/background_sync.py +26 -0
  130. basic_memory/sync/sync_service.py +1180 -109
  131. basic_memory/sync/watch_service.py +412 -135
  132. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  133. basic_memory/templates/prompts/search.hbs +101 -0
  134. basic_memory/utils.py +383 -51
  135. basic_memory-0.16.1.dist-info/METADATA +493 -0
  136. basic_memory-0.16.1.dist-info/RECORD +148 -0
  137. {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/entry_points.txt +1 -0
  138. basic_memory/alembic/README +0 -1
  139. basic_memory/cli/commands/sync.py +0 -206
  140. basic_memory/cli/commands/tools.py +0 -157
  141. basic_memory/mcp/tools/knowledge.py +0 -68
  142. basic_memory/mcp/tools/memory.py +0 -170
  143. basic_memory/mcp/tools/notes.py +0 -202
  144. basic_memory/schemas/discovery.py +0 -28
  145. basic_memory/sync/file_change_scanner.py +0 -158
  146. basic_memory/sync/utils.py +0 -31
  147. basic_memory-0.7.0.dist-info/METADATA +0 -378
  148. basic_memory-0.7.0.dist-info/RECORD +0 -82
  149. {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/WHEEL +0 -0
  150. {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,13 +1,15 @@
1
1
  """Repository for search operations."""
2
2
 
3
3
  import json
4
+ import re
4
5
  import time
5
6
  from dataclasses import dataclass
6
7
  from datetime import datetime
7
- from typing import List, Optional, Any, Dict
8
+ from typing import Any, Dict, List, Optional
9
+ from pathlib import Path
8
10
 
9
11
  from loguru import logger
10
- from sqlalchemy import text, Executable, Result
12
+ from sqlalchemy import Executable, Result, text
11
13
  from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
12
14
 
13
15
  from basic_memory import db
@@ -19,33 +21,65 @@ from basic_memory.schemas.search import SearchItemType
19
21
  class SearchIndexRow:
20
22
  """Search result with score and metadata."""
21
23
 
24
+ project_id: int
22
25
  id: int
23
26
  type: str
24
- permalink: str
25
27
  file_path: str
26
- metadata: Optional[dict] = None
27
28
 
28
29
  # date values
29
- created_at: Optional[datetime] = None
30
- updated_at: Optional[datetime] = None
30
+ created_at: datetime
31
+ updated_at: datetime
32
+
33
+ permalink: Optional[str] = None
34
+ metadata: Optional[dict] = None
31
35
 
32
36
  # assigned in result
33
37
  score: Optional[float] = None
34
38
 
35
39
  # Type-specific fields
36
40
  title: Optional[str] = None # entity
37
- content: Optional[str] = None # entity, observation
41
+ content_stems: Optional[str] = None # entity, observation
42
+ content_snippet: Optional[str] = None # entity, observation
38
43
  entity_id: Optional[int] = None # observations
39
44
  category: Optional[str] = None # observations
40
45
  from_id: Optional[int] = None # relations
41
46
  to_id: Optional[int] = None # relations
42
47
  relation_type: Optional[str] = None # relations
43
48
 
49
+ @property
50
+ def content(self):
51
+ return self.content_snippet
52
+
53
+ @property
54
+ def directory(self) -> str:
55
+ """Extract directory part from file_path.
56
+
57
+ For a file at "projects/notes/ideas.md", returns "/projects/notes"
58
+ For a file at root level "README.md", returns "/"
59
+ """
60
+ if not self.type == SearchItemType.ENTITY.value and not self.file_path:
61
+ return ""
62
+
63
+ # Normalize path separators to handle both Windows (\) and Unix (/) paths
64
+ normalized_path = Path(self.file_path).as_posix()
65
+
66
+ # Split the path by slashes
67
+ parts = normalized_path.split("/")
68
+
69
+ # If there's only one part (e.g., "README.md"), it's at the root
70
+ if len(parts) <= 1:
71
+ return "/"
72
+
73
+ # Join all parts except the last one (filename)
74
+ directory_path = "/".join(parts[:-1])
75
+ return f"/{directory_path}"
76
+
44
77
  def to_insert(self):
45
78
  return {
46
79
  "id": self.id,
47
80
  "title": self.title,
48
- "content": self.content,
81
+ "content_stems": self.content_stems,
82
+ "content_snippet": self.content_snippet,
49
83
  "permalink": self.permalink,
50
84
  "file_path": self.file_path,
51
85
  "type": self.type,
@@ -57,14 +91,28 @@ class SearchIndexRow:
57
91
  "category": self.category,
58
92
  "created_at": self.created_at if self.created_at else None,
59
93
  "updated_at": self.updated_at if self.updated_at else None,
94
+ "project_id": self.project_id,
60
95
  }
61
96
 
62
97
 
63
98
  class SearchRepository:
64
99
  """Repository for search index operations."""
65
100
 
66
- def __init__(self, session_maker: async_sessionmaker[AsyncSession]):
101
+ def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int):
102
+ """Initialize with session maker and project_id filter.
103
+
104
+ Args:
105
+ session_maker: SQLAlchemy session maker
106
+ project_id: Project ID to filter all operations by
107
+
108
+ Raises:
109
+ ValueError: If project_id is None or invalid
110
+ """
111
+ if project_id is None or project_id <= 0: # pragma: no cover
112
+ raise ValueError("A valid project_id is required for SearchRepository")
113
+
67
114
  self.session_maker = session_maker
115
+ self.project_id = project_id
68
116
 
69
117
  async def init_search_index(self):
70
118
  """Create or recreate the search index."""
@@ -77,42 +125,251 @@ class SearchRepository:
77
125
  logger.error(f"Error initializing search index: {e}")
78
126
  raise e
79
127
 
80
- def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
81
- """Prepare a search term for FTS5 query.
128
+ def _prepare_boolean_query(self, query: str) -> str:
129
+ """Prepare a Boolean query by quoting individual terms while preserving operators.
82
130
 
83
131
  Args:
84
- term: The search term to prepare
132
+ query: A Boolean query like "tier1-test AND unicode" or "(hello OR world) NOT test"
133
+
134
+ Returns:
135
+ A properly formatted Boolean query with quoted terms that need quoting
136
+ """
137
+ # Define Boolean operators and their boundaries
138
+ boolean_pattern = r"(\bAND\b|\bOR\b|\bNOT\b)"
139
+
140
+ # Split the query by Boolean operators, keeping the operators
141
+ parts = re.split(boolean_pattern, query)
142
+
143
+ processed_parts = []
144
+ for part in parts:
145
+ part = part.strip()
146
+ if not part:
147
+ continue
148
+
149
+ # If it's a Boolean operator, keep it as is
150
+ if part in ["AND", "OR", "NOT"]:
151
+ processed_parts.append(part)
152
+ else:
153
+ # Handle parentheses specially - they should be preserved for grouping
154
+ if "(" in part or ")" in part:
155
+ # Parse parenthetical expressions carefully
156
+ processed_part = self._prepare_parenthetical_term(part)
157
+ processed_parts.append(processed_part)
158
+ else:
159
+ # This is a search term - for Boolean queries, don't add prefix wildcards
160
+ prepared_term = self._prepare_single_term(part, is_prefix=False)
161
+ processed_parts.append(prepared_term)
162
+
163
+ return " ".join(processed_parts)
164
+
165
+ def _prepare_parenthetical_term(self, term: str) -> str:
166
+ """Prepare a term that contains parentheses, preserving the parentheses for grouping.
167
+
168
+ Args:
169
+ term: A term that may contain parentheses like "(hello" or "world)" or "(hello OR world)"
170
+
171
+ Returns:
172
+ A properly formatted term with parentheses preserved
173
+ """
174
+ # Handle terms that start/end with parentheses but may contain quotable content
175
+ result = ""
176
+ i = 0
177
+ while i < len(term):
178
+ if term[i] in "()":
179
+ # Preserve parentheses as-is
180
+ result += term[i]
181
+ i += 1
182
+ else:
183
+ # Find the next parenthesis or end of string
184
+ start = i
185
+ while i < len(term) and term[i] not in "()":
186
+ i += 1
187
+
188
+ # Extract the content between parentheses
189
+ content = term[start:i].strip()
190
+ if content:
191
+ # Only quote if it actually needs quoting (has hyphens, special chars, etc)
192
+ # but don't quote if it's just simple words
193
+ if self._needs_quoting(content):
194
+ escaped_content = content.replace('"', '""')
195
+ result += f'"{escaped_content}"'
196
+ else:
197
+ result += content
198
+
199
+ return result
200
+
201
+ def _needs_quoting(self, term: str) -> bool:
202
+ """Check if a term needs to be quoted for FTS5 safety.
203
+
204
+ Args:
205
+ term: The term to check
206
+
207
+ Returns:
208
+ True if the term should be quoted
209
+ """
210
+ if not term or not term.strip():
211
+ return False
212
+
213
+ # Characters that indicate we should quote (excluding parentheses which are valid syntax)
214
+ needs_quoting_chars = [
215
+ " ",
216
+ ".",
217
+ ":",
218
+ ";",
219
+ ",",
220
+ "<",
221
+ ">",
222
+ "?",
223
+ "/",
224
+ "-",
225
+ "'",
226
+ '"',
227
+ "[",
228
+ "]",
229
+ "{",
230
+ "}",
231
+ "+",
232
+ "!",
233
+ "@",
234
+ "#",
235
+ "$",
236
+ "%",
237
+ "^",
238
+ "&",
239
+ "=",
240
+ "|",
241
+ "\\",
242
+ "~",
243
+ "`",
244
+ ]
245
+
246
+ return any(c in term for c in needs_quoting_chars)
247
+
248
+ def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
249
+ """Prepare a single search term (no Boolean operators).
250
+
251
+ Args:
252
+ term: A single search term
85
253
  is_prefix: Whether to add prefix search capability (* suffix)
86
254
 
87
- For FTS5:
88
- - Special characters and phrases need to be quoted
89
- - Terms with spaces or special chars need quotes
255
+ Returns:
256
+ A properly formatted single term
90
257
  """
91
- if "*" in term:
258
+ if not term or not term.strip():
92
259
  return term
93
260
 
94
- # List of special characters that need quoting (excluding *)
95
- special_chars = ["/", "-", ".", " ", "(", ")", "[", "]", '"', "'"]
261
+ term = term.strip()
96
262
 
97
- # Check if term contains any special characters
98
- needs_quotes = any(c in term for c in special_chars)
263
+ # Check if term is already a proper wildcard pattern (alphanumeric + *)
264
+ # e.g., "hello*", "test*world" - these should be left alone
265
+ if "*" in term and all(c.isalnum() or c in "*_-" for c in term):
266
+ return term
267
+
268
+ # Characters that can cause FTS5 syntax errors when used as operators
269
+ # We're more conservative here - only quote when we detect problematic patterns
270
+ problematic_chars = [
271
+ '"',
272
+ "'",
273
+ "(",
274
+ ")",
275
+ "[",
276
+ "]",
277
+ "{",
278
+ "}",
279
+ "+",
280
+ "!",
281
+ "@",
282
+ "#",
283
+ "$",
284
+ "%",
285
+ "^",
286
+ "&",
287
+ "=",
288
+ "|",
289
+ "\\",
290
+ "~",
291
+ "`",
292
+ ]
99
293
 
100
- if needs_quotes:
101
- # If the term already contains quotes, escape them and add a wildcard
102
- term = term.replace('"', '""')
103
- term = f'"{term}"*'
294
+ # Characters that indicate we should quote (spaces, dots, colons, etc.)
295
+ # Adding hyphens here because FTS5 can have issues with hyphens followed by wildcards
296
+ needs_quoting_chars = [" ", ".", ":", ";", ",", "<", ">", "?", "/", "-"]
297
+
298
+ # Check if term needs quoting
299
+ has_problematic = any(c in term for c in problematic_chars)
300
+ has_spaces_or_special = any(c in term for c in needs_quoting_chars)
301
+
302
+ if has_problematic or has_spaces_or_special:
303
+ # Handle multi-word queries differently from special character queries
304
+ if " " in term and not any(c in term for c in problematic_chars):
305
+ # Check if any individual word contains special characters that need quoting
306
+ words = term.strip().split()
307
+ has_special_in_words = any(
308
+ any(c in word for c in needs_quoting_chars if c != " ") for word in words
309
+ )
310
+
311
+ if not has_special_in_words:
312
+ # For multi-word queries with simple words (like "emoji unicode"),
313
+ # use boolean AND to handle word order variations
314
+ if is_prefix:
315
+ # Add prefix wildcard to each word for better matching
316
+ prepared_words = [f"{word}*" for word in words if word]
317
+ else:
318
+ prepared_words = words
319
+ term = " AND ".join(prepared_words)
320
+ else:
321
+ # If any word has special characters, quote the entire phrase
322
+ escaped_term = term.replace('"', '""')
323
+ if is_prefix and not ("/" in term and term.endswith(".md")):
324
+ term = f'"{escaped_term}"*'
325
+ else:
326
+ term = f'"{escaped_term}"'
327
+ else:
328
+ # For terms with problematic characters or file paths, use exact phrase matching
329
+ # Escape any existing quotes by doubling them
330
+ escaped_term = term.replace('"', '""')
331
+ # Quote the entire term to handle special characters safely
332
+ if is_prefix and not ("/" in term and term.endswith(".md")):
333
+ # For search terms (not file paths), add prefix matching
334
+ term = f'"{escaped_term}"*'
335
+ else:
336
+ # For file paths, use exact matching
337
+ term = f'"{escaped_term}"'
338
+ elif is_prefix:
339
+ # Only add wildcard for simple terms without special characters
340
+ term = f"{term}*"
104
341
 
105
342
  return term
106
343
 
344
+ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
345
+ """Prepare a search term for FTS5 query.
346
+
347
+ Args:
348
+ term: The search term to prepare
349
+ is_prefix: Whether to add prefix search capability (* suffix)
350
+
351
+ For FTS5:
352
+ - Boolean operators (AND, OR, NOT) are preserved for complex queries
353
+ - Terms with FTS5 special characters are quoted to prevent syntax errors
354
+ - Simple terms get prefix wildcards for better matching
355
+ """
356
+ # Check for explicit boolean operators - if present, process as Boolean query
357
+ boolean_operators = [" AND ", " OR ", " NOT "]
358
+ if any(op in f" {term} " for op in boolean_operators):
359
+ return self._prepare_boolean_query(term)
360
+
361
+ # For non-Boolean queries, use the single term preparation logic
362
+ return self._prepare_single_term(term, is_prefix)
363
+
107
364
  async def search(
108
365
  self,
109
366
  search_text: Optional[str] = None,
110
367
  permalink: Optional[str] = None,
111
368
  permalink_match: Optional[str] = None,
112
369
  title: Optional[str] = None,
113
- types: Optional[List[SearchItemType]] = None,
370
+ types: Optional[List[str]] = None,
114
371
  after_date: Optional[datetime] = None,
115
- entity_types: Optional[List[str]] = None,
372
+ search_item_types: Optional[List[SearchItemType]] = None,
116
373
  limit: int = 10,
117
374
  offset: int = 0,
118
375
  ) -> List[SearchIndexRow]:
@@ -123,15 +380,21 @@ class SearchRepository:
123
380
 
124
381
  # Handle text search for title and content
125
382
  if search_text:
126
- search_text = self._prepare_search_term(search_text.strip())
127
- params["text"] = search_text
128
- conditions.append("(title MATCH :text OR content MATCH :text)")
383
+ # Skip FTS for wildcard-only queries that would cause "unknown special query" errors
384
+ if search_text.strip() == "*" or search_text.strip() == "":
385
+ # For wildcard searches, don't add any text conditions - return all results
386
+ pass
387
+ else:
388
+ # Use _prepare_search_term to handle both Boolean and non-Boolean queries
389
+ processed_text = self._prepare_search_term(search_text.strip())
390
+ params["text"] = processed_text
391
+ conditions.append("(title MATCH :text OR content_stems MATCH :text)")
129
392
 
130
393
  # Handle title match search
131
394
  if title:
132
- title_text = self._prepare_search_term(title.strip())
133
- params["text"] = title_text
134
- conditions.append("title MATCH :text")
395
+ title_text = self._prepare_search_term(title.strip(), is_prefix=False)
396
+ params["title_text"] = title_text
397
+ conditions.append("title MATCH :title_text")
135
398
 
136
399
  # Handle permalink exact search
137
400
  if permalink:
@@ -140,25 +403,31 @@ class SearchRepository:
140
403
 
141
404
  # Handle permalink match search, supports *
142
405
  if permalink_match:
143
- # Clean and prepare permalink for FTS5 GLOB match
144
- permalink_text = self._prepare_search_term(
145
- permalink_match.lower().strip(), is_prefix=False
146
- )
406
+ # For GLOB patterns, don't use _prepare_search_term as it will quote slashes
407
+ # GLOB patterns need to preserve their syntax
408
+ permalink_text = permalink_match.lower().strip()
147
409
  params["permalink"] = permalink_text
148
410
  if "*" in permalink_match:
149
411
  conditions.append("permalink GLOB :permalink")
150
412
  else:
151
- conditions.append("permalink MATCH :permalink")
413
+ # For exact matches without *, we can use FTS5 MATCH
414
+ # but only prepare the term if it doesn't look like a path
415
+ if "/" in permalink_text:
416
+ conditions.append("permalink = :permalink")
417
+ else:
418
+ permalink_text = self._prepare_search_term(permalink_text, is_prefix=False)
419
+ params["permalink"] = permalink_text
420
+ conditions.append("permalink MATCH :permalink")
152
421
 
153
- # Handle type filter
154
- if types:
155
- type_list = ", ".join(f"'{t.value}'" for t in types)
422
+ # Handle entity type filter
423
+ if search_item_types:
424
+ type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
156
425
  conditions.append(f"type IN ({type_list})")
157
426
 
158
- # Handle entity type filter
159
- if entity_types:
160
- entity_type_list = ", ".join(f"'{t}'" for t in entity_types)
161
- conditions.append(f"json_extract(metadata, '$.entity_type') IN ({entity_type_list})")
427
+ # Handle type filter
428
+ if types:
429
+ type_list = ", ".join(f"'{t}'" for t in types)
430
+ conditions.append(f"json_extract(metadata, '$.entity_type') IN ({type_list})")
162
431
 
163
432
  # Handle date filter using datetime() for proper comparison
164
433
  if after_date:
@@ -168,6 +437,10 @@ class SearchRepository:
168
437
  # order by most recent first
169
438
  order_by_clause = ", updated_at DESC"
170
439
 
440
+ # Always filter by project_id
441
+ params["project_id"] = self.project_id
442
+ conditions.append("project_id = :project_id")
443
+
171
444
  # set limit on search query
172
445
  params["limit"] = limit
173
446
  params["offset"] = offset
@@ -177,6 +450,7 @@ class SearchRepository:
177
450
 
178
451
  sql = f"""
179
452
  SELECT
453
+ project_id,
180
454
  id,
181
455
  title,
182
456
  permalink,
@@ -187,7 +461,7 @@ class SearchRepository:
187
461
  to_id,
188
462
  relation_type,
189
463
  entity_id,
190
- content,
464
+ content_snippet,
191
465
  category,
192
466
  created_at,
193
467
  updated_at,
@@ -199,13 +473,25 @@ class SearchRepository:
199
473
  OFFSET :offset
200
474
  """
201
475
 
202
- logger.debug(f"Search {sql} params: {params}")
203
- async with db.scoped_session(self.session_maker) as session:
204
- result = await session.execute(text(sql), params)
205
- rows = result.fetchall()
476
+ logger.trace(f"Search {sql} params: {params}")
477
+ try:
478
+ async with db.scoped_session(self.session_maker) as session:
479
+ result = await session.execute(text(sql), params)
480
+ rows = result.fetchall()
481
+ except Exception as e:
482
+ # Handle FTS5 syntax errors and provide user-friendly feedback
483
+ if "fts5: syntax error" in str(e).lower(): # pragma: no cover
484
+ logger.warning(f"FTS5 syntax error for search term: {search_text}, error: {e}")
485
+ # Return empty results rather than crashing
486
+ return []
487
+ else:
488
+ # Re-raise other database errors
489
+ logger.error(f"Database error during search: {e}")
490
+ raise
206
491
 
207
492
  results = [
208
493
  SearchIndexRow(
494
+ project_id=self.project_id,
209
495
  id=row.id,
210
496
  title=row.title,
211
497
  permalink=row.permalink,
@@ -217,7 +503,7 @@ class SearchRepository:
217
503
  to_id=row.to_id,
218
504
  relation_type=row.relation_type,
219
505
  entity_id=row.entity_id,
220
- content=row.content,
506
+ content_snippet=row.content_snippet,
221
507
  category=row.category,
222
508
  created_at=row.created_at,
223
509
  updated_at=row.updated_at,
@@ -225,10 +511,10 @@ class SearchRepository:
225
511
  for row in rows
226
512
  ]
227
513
 
228
- logger.debug(f"Found {len(results)} search results")
514
+ logger.trace(f"Found {len(results)} search results")
229
515
  for r in results:
230
- logger.debug(
231
- f"Search result: type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
516
+ logger.trace(
517
+ f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
232
518
  )
233
519
 
234
520
  return results
@@ -241,36 +527,99 @@ class SearchRepository:
241
527
  async with db.scoped_session(self.session_maker) as session:
242
528
  # Delete existing record if any
243
529
  await session.execute(
244
- text("DELETE FROM search_index WHERE permalink = :permalink"),
245
- {"permalink": search_index_row.permalink},
530
+ text(
531
+ "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
532
+ ),
533
+ {"permalink": search_index_row.permalink, "project_id": self.project_id},
246
534
  )
247
535
 
536
+ # Prepare data for insert with project_id
537
+ insert_data = search_index_row.to_insert()
538
+ insert_data["project_id"] = self.project_id
539
+
248
540
  # Insert new record
249
541
  await session.execute(
250
542
  text("""
251
543
  INSERT INTO search_index (
252
- id, title, content, permalink, file_path, type, metadata,
544
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
253
545
  from_id, to_id, relation_type,
254
546
  entity_id, category,
255
- created_at, updated_at
547
+ created_at, updated_at,
548
+ project_id
256
549
  ) VALUES (
257
- :id, :title, :content, :permalink, :file_path, :type, :metadata,
550
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
258
551
  :from_id, :to_id, :relation_type,
259
552
  :entity_id, :category,
260
- :created_at, :updated_at
553
+ :created_at, :updated_at,
554
+ :project_id
261
555
  )
262
556
  """),
263
- search_index_row.to_insert(),
557
+ insert_data,
264
558
  )
265
559
  logger.debug(f"indexed row {search_index_row}")
266
560
  await session.commit()
267
561
 
562
+ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]):
563
+ """Index multiple items in a single batch operation.
564
+
565
+ Note: This method assumes that any existing records for the entity_id
566
+ have already been deleted (typically via delete_by_entity_id).
567
+
568
+ Args:
569
+ search_index_rows: List of SearchIndexRow objects to index
570
+ """
571
+ if not search_index_rows:
572
+ return
573
+
574
+ async with db.scoped_session(self.session_maker) as session:
575
+ # Prepare all insert data with project_id
576
+ insert_data_list = []
577
+ for row in search_index_rows:
578
+ insert_data = row.to_insert()
579
+ insert_data["project_id"] = self.project_id
580
+ insert_data_list.append(insert_data)
581
+
582
+ # Batch insert all records using executemany
583
+ await session.execute(
584
+ text("""
585
+ INSERT INTO search_index (
586
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
587
+ from_id, to_id, relation_type,
588
+ entity_id, category,
589
+ created_at, updated_at,
590
+ project_id
591
+ ) VALUES (
592
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
593
+ :from_id, :to_id, :relation_type,
594
+ :entity_id, :category,
595
+ :created_at, :updated_at,
596
+ :project_id
597
+ )
598
+ """),
599
+ insert_data_list,
600
+ )
601
+ logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
602
+ await session.commit()
603
+
604
+ async def delete_by_entity_id(self, entity_id: int):
605
+ """Delete an item from the search index by entity_id."""
606
+ async with db.scoped_session(self.session_maker) as session:
607
+ await session.execute(
608
+ text(
609
+ "DELETE FROM search_index WHERE entity_id = :entity_id AND project_id = :project_id"
610
+ ),
611
+ {"entity_id": entity_id, "project_id": self.project_id},
612
+ )
613
+ await session.commit()
614
+
268
615
  async def delete_by_permalink(self, permalink: str):
269
616
  """Delete an item from the search index."""
270
617
  async with db.scoped_session(self.session_maker) as session:
271
618
  await session.execute(
272
- text("DELETE FROM search_index WHERE permalink = :permalink"),
273
- {"permalink": permalink},
619
+ text(
620
+ "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
621
+ ),
622
+ {"permalink": permalink, "project_id": self.project_id},
274
623
  )
275
624
  await session.commit()
276
625