basic-memory 0.2.12__py3-none-any.whl → 0.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (149) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +27 -3
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  6. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +108 -0
  7. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +104 -0
  8. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  9. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  10. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  11. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +100 -0
  12. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  13. basic_memory/api/app.py +63 -31
  14. basic_memory/api/routers/__init__.py +4 -1
  15. basic_memory/api/routers/directory_router.py +84 -0
  16. basic_memory/api/routers/importer_router.py +152 -0
  17. basic_memory/api/routers/knowledge_router.py +165 -28
  18. basic_memory/api/routers/management_router.py +80 -0
  19. basic_memory/api/routers/memory_router.py +28 -67
  20. basic_memory/api/routers/project_router.py +406 -0
  21. basic_memory/api/routers/prompt_router.py +260 -0
  22. basic_memory/api/routers/resource_router.py +219 -14
  23. basic_memory/api/routers/search_router.py +21 -13
  24. basic_memory/api/routers/utils.py +130 -0
  25. basic_memory/api/template_loader.py +292 -0
  26. basic_memory/cli/app.py +52 -1
  27. basic_memory/cli/auth.py +277 -0
  28. basic_memory/cli/commands/__init__.py +13 -2
  29. basic_memory/cli/commands/cloud/__init__.py +6 -0
  30. basic_memory/cli/commands/cloud/api_client.py +112 -0
  31. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  32. basic_memory/cli/commands/cloud/cloud_utils.py +101 -0
  33. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  34. basic_memory/cli/commands/cloud/rclone_commands.py +301 -0
  35. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  36. basic_memory/cli/commands/cloud/rclone_installer.py +249 -0
  37. basic_memory/cli/commands/cloud/upload.py +233 -0
  38. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  39. basic_memory/cli/commands/command_utils.py +51 -0
  40. basic_memory/cli/commands/db.py +26 -7
  41. basic_memory/cli/commands/import_chatgpt.py +83 -0
  42. basic_memory/cli/commands/import_claude_conversations.py +86 -0
  43. basic_memory/cli/commands/import_claude_projects.py +85 -0
  44. basic_memory/cli/commands/import_memory_json.py +35 -92
  45. basic_memory/cli/commands/mcp.py +84 -10
  46. basic_memory/cli/commands/project.py +876 -0
  47. basic_memory/cli/commands/status.py +47 -30
  48. basic_memory/cli/commands/tool.py +341 -0
  49. basic_memory/cli/main.py +13 -6
  50. basic_memory/config.py +481 -22
  51. basic_memory/db.py +192 -32
  52. basic_memory/deps.py +252 -22
  53. basic_memory/file_utils.py +113 -58
  54. basic_memory/ignore_utils.py +297 -0
  55. basic_memory/importers/__init__.py +27 -0
  56. basic_memory/importers/base.py +79 -0
  57. basic_memory/importers/chatgpt_importer.py +232 -0
  58. basic_memory/importers/claude_conversations_importer.py +177 -0
  59. basic_memory/importers/claude_projects_importer.py +148 -0
  60. basic_memory/importers/memory_json_importer.py +108 -0
  61. basic_memory/importers/utils.py +58 -0
  62. basic_memory/markdown/entity_parser.py +143 -23
  63. basic_memory/markdown/markdown_processor.py +3 -3
  64. basic_memory/markdown/plugins.py +39 -21
  65. basic_memory/markdown/schemas.py +1 -1
  66. basic_memory/markdown/utils.py +28 -13
  67. basic_memory/mcp/async_client.py +134 -4
  68. basic_memory/mcp/project_context.py +141 -0
  69. basic_memory/mcp/prompts/__init__.py +19 -0
  70. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  71. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  72. basic_memory/mcp/prompts/recent_activity.py +188 -0
  73. basic_memory/mcp/prompts/search.py +57 -0
  74. basic_memory/mcp/prompts/utils.py +162 -0
  75. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  76. basic_memory/mcp/resources/project_info.py +71 -0
  77. basic_memory/mcp/server.py +7 -13
  78. basic_memory/mcp/tools/__init__.py +33 -21
  79. basic_memory/mcp/tools/build_context.py +120 -0
  80. basic_memory/mcp/tools/canvas.py +130 -0
  81. basic_memory/mcp/tools/chatgpt_tools.py +187 -0
  82. basic_memory/mcp/tools/delete_note.py +225 -0
  83. basic_memory/mcp/tools/edit_note.py +320 -0
  84. basic_memory/mcp/tools/list_directory.py +167 -0
  85. basic_memory/mcp/tools/move_note.py +545 -0
  86. basic_memory/mcp/tools/project_management.py +200 -0
  87. basic_memory/mcp/tools/read_content.py +271 -0
  88. basic_memory/mcp/tools/read_note.py +255 -0
  89. basic_memory/mcp/tools/recent_activity.py +534 -0
  90. basic_memory/mcp/tools/search.py +369 -14
  91. basic_memory/mcp/tools/utils.py +374 -16
  92. basic_memory/mcp/tools/view_note.py +77 -0
  93. basic_memory/mcp/tools/write_note.py +207 -0
  94. basic_memory/models/__init__.py +3 -2
  95. basic_memory/models/knowledge.py +67 -15
  96. basic_memory/models/project.py +87 -0
  97. basic_memory/models/search.py +10 -6
  98. basic_memory/repository/__init__.py +2 -0
  99. basic_memory/repository/entity_repository.py +229 -7
  100. basic_memory/repository/observation_repository.py +35 -3
  101. basic_memory/repository/project_info_repository.py +10 -0
  102. basic_memory/repository/project_repository.py +103 -0
  103. basic_memory/repository/relation_repository.py +21 -2
  104. basic_memory/repository/repository.py +147 -29
  105. basic_memory/repository/search_repository.py +437 -59
  106. basic_memory/schemas/__init__.py +22 -9
  107. basic_memory/schemas/base.py +97 -8
  108. basic_memory/schemas/cloud.py +50 -0
  109. basic_memory/schemas/directory.py +30 -0
  110. basic_memory/schemas/importer.py +35 -0
  111. basic_memory/schemas/memory.py +188 -23
  112. basic_memory/schemas/project_info.py +211 -0
  113. basic_memory/schemas/prompt.py +90 -0
  114. basic_memory/schemas/request.py +57 -3
  115. basic_memory/schemas/response.py +9 -1
  116. basic_memory/schemas/search.py +33 -35
  117. basic_memory/schemas/sync_report.py +72 -0
  118. basic_memory/services/__init__.py +2 -1
  119. basic_memory/services/context_service.py +251 -106
  120. basic_memory/services/directory_service.py +295 -0
  121. basic_memory/services/entity_service.py +595 -60
  122. basic_memory/services/exceptions.py +21 -0
  123. basic_memory/services/file_service.py +284 -30
  124. basic_memory/services/initialization.py +191 -0
  125. basic_memory/services/link_resolver.py +50 -56
  126. basic_memory/services/project_service.py +863 -0
  127. basic_memory/services/search_service.py +172 -34
  128. basic_memory/sync/__init__.py +3 -2
  129. basic_memory/sync/background_sync.py +26 -0
  130. basic_memory/sync/sync_service.py +1176 -96
  131. basic_memory/sync/watch_service.py +412 -135
  132. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  133. basic_memory/templates/prompts/search.hbs +101 -0
  134. basic_memory/utils.py +388 -28
  135. basic_memory-0.16.1.dist-info/METADATA +493 -0
  136. basic_memory-0.16.1.dist-info/RECORD +148 -0
  137. {basic_memory-0.2.12.dist-info → basic_memory-0.16.1.dist-info}/entry_points.txt +1 -0
  138. basic_memory/alembic/README +0 -1
  139. basic_memory/cli/commands/sync.py +0 -203
  140. basic_memory/mcp/tools/knowledge.py +0 -56
  141. basic_memory/mcp/tools/memory.py +0 -151
  142. basic_memory/mcp/tools/notes.py +0 -122
  143. basic_memory/schemas/discovery.py +0 -28
  144. basic_memory/sync/file_change_scanner.py +0 -158
  145. basic_memory/sync/utils.py +0 -34
  146. basic_memory-0.2.12.dist-info/METADATA +0 -291
  147. basic_memory-0.2.12.dist-info/RECORD +0 -78
  148. {basic_memory-0.2.12.dist-info → basic_memory-0.16.1.dist-info}/WHEEL +0 -0
  149. {basic_memory-0.2.12.dist-info → basic_memory-0.16.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,13 +1,15 @@
1
1
  """Repository for search operations."""
2
2
 
3
3
  import json
4
+ import re
4
5
  import time
5
6
  from dataclasses import dataclass
6
7
  from datetime import datetime
7
- from typing import List, Optional, Any, Dict
8
+ from typing import Any, Dict, List, Optional
9
+ from pathlib import Path
8
10
 
9
11
  from loguru import logger
10
- from sqlalchemy import text, Executable, Result
12
+ from sqlalchemy import Executable, Result, text
11
13
  from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
12
14
 
13
15
  from basic_memory import db
@@ -19,33 +21,65 @@ from basic_memory.schemas.search import SearchItemType
19
21
  class SearchIndexRow:
20
22
  """Search result with score and metadata."""
21
23
 
24
+ project_id: int
22
25
  id: int
23
26
  type: str
24
- permalink: str
25
27
  file_path: str
26
- metadata: Optional[dict] = None
27
28
 
28
29
  # date values
29
- created_at: Optional[datetime] = None
30
- updated_at: Optional[datetime] = None
30
+ created_at: datetime
31
+ updated_at: datetime
32
+
33
+ permalink: Optional[str] = None
34
+ metadata: Optional[dict] = None
31
35
 
32
36
  # assigned in result
33
37
  score: Optional[float] = None
34
38
 
35
39
  # Type-specific fields
36
40
  title: Optional[str] = None # entity
37
- content: Optional[str] = None # entity, observation
41
+ content_stems: Optional[str] = None # entity, observation
42
+ content_snippet: Optional[str] = None # entity, observation
38
43
  entity_id: Optional[int] = None # observations
39
44
  category: Optional[str] = None # observations
40
45
  from_id: Optional[int] = None # relations
41
46
  to_id: Optional[int] = None # relations
42
47
  relation_type: Optional[str] = None # relations
43
48
 
49
+ @property
50
+ def content(self):
51
+ return self.content_snippet
52
+
53
+ @property
54
+ def directory(self) -> str:
55
+ """Extract directory part from file_path.
56
+
57
+ For a file at "projects/notes/ideas.md", returns "/projects/notes"
58
+ For a file at root level "README.md", returns "/"
59
+ """
60
+ if not self.type == SearchItemType.ENTITY.value and not self.file_path:
61
+ return ""
62
+
63
+ # Normalize path separators to handle both Windows (\) and Unix (/) paths
64
+ normalized_path = Path(self.file_path).as_posix()
65
+
66
+ # Split the path by slashes
67
+ parts = normalized_path.split("/")
68
+
69
+ # If there's only one part (e.g., "README.md"), it's at the root
70
+ if len(parts) <= 1:
71
+ return "/"
72
+
73
+ # Join all parts except the last one (filename)
74
+ directory_path = "/".join(parts[:-1])
75
+ return f"/{directory_path}"
76
+
44
77
  def to_insert(self):
45
78
  return {
46
79
  "id": self.id,
47
80
  "title": self.title,
48
- "content": self.content,
81
+ "content_stems": self.content_stems,
82
+ "content_snippet": self.content_snippet,
49
83
  "permalink": self.permalink,
50
84
  "file_path": self.file_path,
51
85
  "type": self.type,
@@ -57,47 +91,287 @@ class SearchIndexRow:
57
91
  "category": self.category,
58
92
  "created_at": self.created_at if self.created_at else None,
59
93
  "updated_at": self.updated_at if self.updated_at else None,
94
+ "project_id": self.project_id,
60
95
  }
61
96
 
62
97
 
63
98
  class SearchRepository:
64
99
  """Repository for search index operations."""
65
100
 
66
- def __init__(self, session_maker: async_sessionmaker[AsyncSession]):
101
+ def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int):
102
+ """Initialize with session maker and project_id filter.
103
+
104
+ Args:
105
+ session_maker: SQLAlchemy session maker
106
+ project_id: Project ID to filter all operations by
107
+
108
+ Raises:
109
+ ValueError: If project_id is None or invalid
110
+ """
111
+ if project_id is None or project_id <= 0: # pragma: no cover
112
+ raise ValueError("A valid project_id is required for SearchRepository")
113
+
67
114
  self.session_maker = session_maker
115
+ self.project_id = project_id
68
116
 
69
117
  async def init_search_index(self):
70
118
  """Create or recreate the search index."""
71
-
72
119
  logger.info("Initializing search index")
73
- async with db.scoped_session(self.session_maker) as session:
74
- await session.execute(CREATE_SEARCH_INDEX)
75
- await session.commit()
120
+ try:
121
+ async with db.scoped_session(self.session_maker) as session:
122
+ await session.execute(CREATE_SEARCH_INDEX)
123
+ await session.commit()
124
+ except Exception as e: # pragma: no cover
125
+ logger.error(f"Error initializing search index: {e}")
126
+ raise e
127
+
128
+ def _prepare_boolean_query(self, query: str) -> str:
129
+ """Prepare a Boolean query by quoting individual terms while preserving operators.
130
+
131
+ Args:
132
+ query: A Boolean query like "tier1-test AND unicode" or "(hello OR world) NOT test"
133
+
134
+ Returns:
135
+ A properly formatted Boolean query with quoted terms that need quoting
136
+ """
137
+ # Define Boolean operators and their boundaries
138
+ boolean_pattern = r"(\bAND\b|\bOR\b|\bNOT\b)"
139
+
140
+ # Split the query by Boolean operators, keeping the operators
141
+ parts = re.split(boolean_pattern, query)
142
+
143
+ processed_parts = []
144
+ for part in parts:
145
+ part = part.strip()
146
+ if not part:
147
+ continue
148
+
149
+ # If it's a Boolean operator, keep it as is
150
+ if part in ["AND", "OR", "NOT"]:
151
+ processed_parts.append(part)
152
+ else:
153
+ # Handle parentheses specially - they should be preserved for grouping
154
+ if "(" in part or ")" in part:
155
+ # Parse parenthetical expressions carefully
156
+ processed_part = self._prepare_parenthetical_term(part)
157
+ processed_parts.append(processed_part)
158
+ else:
159
+ # This is a search term - for Boolean queries, don't add prefix wildcards
160
+ prepared_term = self._prepare_single_term(part, is_prefix=False)
161
+ processed_parts.append(prepared_term)
162
+
163
+ return " ".join(processed_parts)
164
+
165
+ def _prepare_parenthetical_term(self, term: str) -> str:
166
+ """Prepare a term that contains parentheses, preserving the parentheses for grouping.
167
+
168
+ Args:
169
+ term: A term that may contain parentheses like "(hello" or "world)" or "(hello OR world)"
170
+
171
+ Returns:
172
+ A properly formatted term with parentheses preserved
173
+ """
174
+ # Handle terms that start/end with parentheses but may contain quotable content
175
+ result = ""
176
+ i = 0
177
+ while i < len(term):
178
+ if term[i] in "()":
179
+ # Preserve parentheses as-is
180
+ result += term[i]
181
+ i += 1
182
+ else:
183
+ # Find the next parenthesis or end of string
184
+ start = i
185
+ while i < len(term) and term[i] not in "()":
186
+ i += 1
187
+
188
+ # Extract the content between parentheses
189
+ content = term[start:i].strip()
190
+ if content:
191
+ # Only quote if it actually needs quoting (has hyphens, special chars, etc)
192
+ # but don't quote if it's just simple words
193
+ if self._needs_quoting(content):
194
+ escaped_content = content.replace('"', '""')
195
+ result += f'"{escaped_content}"'
196
+ else:
197
+ result += content
198
+
199
+ return result
200
+
201
+ def _needs_quoting(self, term: str) -> bool:
202
+ """Check if a term needs to be quoted for FTS5 safety.
203
+
204
+ Args:
205
+ term: The term to check
206
+
207
+ Returns:
208
+ True if the term should be quoted
209
+ """
210
+ if not term or not term.strip():
211
+ return False
212
+
213
+ # Characters that indicate we should quote (excluding parentheses which are valid syntax)
214
+ needs_quoting_chars = [
215
+ " ",
216
+ ".",
217
+ ":",
218
+ ";",
219
+ ",",
220
+ "<",
221
+ ">",
222
+ "?",
223
+ "/",
224
+ "-",
225
+ "'",
226
+ '"',
227
+ "[",
228
+ "]",
229
+ "{",
230
+ "}",
231
+ "+",
232
+ "!",
233
+ "@",
234
+ "#",
235
+ "$",
236
+ "%",
237
+ "^",
238
+ "&",
239
+ "=",
240
+ "|",
241
+ "\\",
242
+ "~",
243
+ "`",
244
+ ]
245
+
246
+ return any(c in term for c in needs_quoting_chars)
247
+
248
+ def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
249
+ """Prepare a single search term (no Boolean operators).
250
+
251
+ Args:
252
+ term: A single search term
253
+ is_prefix: Whether to add prefix search capability (* suffix)
76
254
 
77
- def _quote_search_term(self, term: str) -> str:
78
- """Add quotes if term contains special characters.
79
- For FTS5, special characters and phrases need to be quoted to be treated as a single token.
255
+ Returns:
256
+ A properly formatted single term
80
257
  """
81
- # List of special characters that need quoting
82
- special_chars = ["/", "*", "-", ".", " ", "(", ")", "[", "]", '"', "'"]
83
-
84
- # Check if term contains any special characters
85
- if any(c in term for c in special_chars):
86
- # If the term already contains quotes, escape them
87
- term = term.replace('"', '""')
88
- return f'"{term}"'
258
+ if not term or not term.strip():
259
+ return term
260
+
261
+ term = term.strip()
262
+
263
+ # Check if term is already a proper wildcard pattern (alphanumeric + *)
264
+ # e.g., "hello*", "test*world" - these should be left alone
265
+ if "*" in term and all(c.isalnum() or c in "*_-" for c in term):
266
+ return term
267
+
268
+ # Characters that can cause FTS5 syntax errors when used as operators
269
+ # We're more conservative here - only quote when we detect problematic patterns
270
+ problematic_chars = [
271
+ '"',
272
+ "'",
273
+ "(",
274
+ ")",
275
+ "[",
276
+ "]",
277
+ "{",
278
+ "}",
279
+ "+",
280
+ "!",
281
+ "@",
282
+ "#",
283
+ "$",
284
+ "%",
285
+ "^",
286
+ "&",
287
+ "=",
288
+ "|",
289
+ "\\",
290
+ "~",
291
+ "`",
292
+ ]
293
+
294
+ # Characters that indicate we should quote (spaces, dots, colons, etc.)
295
+ # Adding hyphens here because FTS5 can have issues with hyphens followed by wildcards
296
+ needs_quoting_chars = [" ", ".", ":", ";", ",", "<", ">", "?", "/", "-"]
297
+
298
+ # Check if term needs quoting
299
+ has_problematic = any(c in term for c in problematic_chars)
300
+ has_spaces_or_special = any(c in term for c in needs_quoting_chars)
301
+
302
+ if has_problematic or has_spaces_or_special:
303
+ # Handle multi-word queries differently from special character queries
304
+ if " " in term and not any(c in term for c in problematic_chars):
305
+ # Check if any individual word contains special characters that need quoting
306
+ words = term.strip().split()
307
+ has_special_in_words = any(
308
+ any(c in word for c in needs_quoting_chars if c != " ") for word in words
309
+ )
310
+
311
+ if not has_special_in_words:
312
+ # For multi-word queries with simple words (like "emoji unicode"),
313
+ # use boolean AND to handle word order variations
314
+ if is_prefix:
315
+ # Add prefix wildcard to each word for better matching
316
+ prepared_words = [f"{word}*" for word in words if word]
317
+ else:
318
+ prepared_words = words
319
+ term = " AND ".join(prepared_words)
320
+ else:
321
+ # If any word has special characters, quote the entire phrase
322
+ escaped_term = term.replace('"', '""')
323
+ if is_prefix and not ("/" in term and term.endswith(".md")):
324
+ term = f'"{escaped_term}"*'
325
+ else:
326
+ term = f'"{escaped_term}"'
327
+ else:
328
+ # For terms with problematic characters or file paths, use exact phrase matching
329
+ # Escape any existing quotes by doubling them
330
+ escaped_term = term.replace('"', '""')
331
+ # Quote the entire term to handle special characters safely
332
+ if is_prefix and not ("/" in term and term.endswith(".md")):
333
+ # For search terms (not file paths), add prefix matching
334
+ term = f'"{escaped_term}"*'
335
+ else:
336
+ # For file paths, use exact matching
337
+ term = f'"{escaped_term}"'
338
+ elif is_prefix:
339
+ # Only add wildcard for simple terms without special characters
340
+ term = f"{term}*"
341
+
89
342
  return term
90
343
 
344
+ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
345
+ """Prepare a search term for FTS5 query.
346
+
347
+ Args:
348
+ term: The search term to prepare
349
+ is_prefix: Whether to add prefix search capability (* suffix)
350
+
351
+ For FTS5:
352
+ - Boolean operators (AND, OR, NOT) are preserved for complex queries
353
+ - Terms with FTS5 special characters are quoted to prevent syntax errors
354
+ - Simple terms get prefix wildcards for better matching
355
+ """
356
+ # Check for explicit boolean operators - if present, process as Boolean query
357
+ boolean_operators = [" AND ", " OR ", " NOT "]
358
+ if any(op in f" {term} " for op in boolean_operators):
359
+ return self._prepare_boolean_query(term)
360
+
361
+ # For non-Boolean queries, use the single term preparation logic
362
+ return self._prepare_single_term(term, is_prefix)
363
+
91
364
  async def search(
92
365
  self,
93
366
  search_text: Optional[str] = None,
94
367
  permalink: Optional[str] = None,
95
368
  permalink_match: Optional[str] = None,
96
369
  title: Optional[str] = None,
97
- types: Optional[List[SearchItemType]] = None,
370
+ types: Optional[List[str]] = None,
98
371
  after_date: Optional[datetime] = None,
99
- entity_types: Optional[List[str]] = None,
372
+ search_item_types: Optional[List[SearchItemType]] = None,
100
373
  limit: int = 10,
374
+ offset: int = 0,
101
375
  ) -> List[SearchIndexRow]:
102
376
  """Search across all indexed content with fuzzy matching."""
103
377
  conditions = []
@@ -106,15 +380,21 @@ class SearchRepository:
106
380
 
107
381
  # Handle text search for title and content
108
382
  if search_text:
109
- search_text = self._quote_search_term(search_text.lower().strip())
110
- params["text"] = f"{search_text}*"
111
- conditions.append("(title MATCH :text OR content MATCH :text)")
383
+ # Skip FTS for wildcard-only queries that would cause "unknown special query" errors
384
+ if search_text.strip() == "*" or search_text.strip() == "":
385
+ # For wildcard searches, don't add any text conditions - return all results
386
+ pass
387
+ else:
388
+ # Use _prepare_search_term to handle both Boolean and non-Boolean queries
389
+ processed_text = self._prepare_search_term(search_text.strip())
390
+ params["text"] = processed_text
391
+ conditions.append("(title MATCH :text OR content_stems MATCH :text)")
112
392
 
113
393
  # Handle title match search
114
394
  if title:
115
- title_text = self._quote_search_term(title.lower().strip())
116
- params["text"] = f"{title_text}*"
117
- conditions.append("title MATCH :text")
395
+ title_text = self._prepare_search_term(title.strip(), is_prefix=False)
396
+ params["title_text"] = title_text
397
+ conditions.append("title MATCH :title_text")
118
398
 
119
399
  # Handle permalink exact search
120
400
  if permalink:
@@ -123,18 +403,31 @@ class SearchRepository:
123
403
 
124
404
  # Handle permalink match search, supports *
125
405
  if permalink_match:
126
- params["permalink"] = self._quote_search_term(permalink_match)
127
- conditions.append("permalink MATCH :permalink")
406
+ # For GLOB patterns, don't use _prepare_search_term as it will quote slashes
407
+ # GLOB patterns need to preserve their syntax
408
+ permalink_text = permalink_match.lower().strip()
409
+ params["permalink"] = permalink_text
410
+ if "*" in permalink_match:
411
+ conditions.append("permalink GLOB :permalink")
412
+ else:
413
+ # For exact matches without *, we can use FTS5 MATCH
414
+ # but only prepare the term if it doesn't look like a path
415
+ if "/" in permalink_text:
416
+ conditions.append("permalink = :permalink")
417
+ else:
418
+ permalink_text = self._prepare_search_term(permalink_text, is_prefix=False)
419
+ params["permalink"] = permalink_text
420
+ conditions.append("permalink MATCH :permalink")
128
421
 
129
- # Handle type filter
130
- if types:
131
- type_list = ", ".join(f"'{t.value}'" for t in types)
422
+ # Handle entity type filter
423
+ if search_item_types:
424
+ type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
132
425
  conditions.append(f"type IN ({type_list})")
133
426
 
134
- # Handle entity type filter
135
- if entity_types:
136
- entity_type_list = ", ".join(f"'{t}'" for t in entity_types)
137
- conditions.append(f"json_extract(metadata, '$.entity_type') IN ({entity_type_list})")
427
+ # Handle type filter
428
+ if types:
429
+ type_list = ", ".join(f"'{t}'" for t in types)
430
+ conditions.append(f"json_extract(metadata, '$.entity_type') IN ({type_list})")
138
431
 
139
432
  # Handle date filter using datetime() for proper comparison
140
433
  if after_date:
@@ -144,14 +437,20 @@ class SearchRepository:
144
437
  # order by most recent first
145
438
  order_by_clause = ", updated_at DESC"
146
439
 
440
+ # Always filter by project_id
441
+ params["project_id"] = self.project_id
442
+ conditions.append("project_id = :project_id")
443
+
147
444
  # set limit on search query
148
445
  params["limit"] = limit
446
+ params["offset"] = offset
149
447
 
150
448
  # Build WHERE clause
151
449
  where_clause = " AND ".join(conditions) if conditions else "1=1"
152
450
 
153
451
  sql = f"""
154
452
  SELECT
453
+ project_id,
155
454
  id,
156
455
  title,
157
456
  permalink,
@@ -162,7 +461,7 @@ class SearchRepository:
162
461
  to_id,
163
462
  relation_type,
164
463
  entity_id,
165
- content,
464
+ content_snippet,
166
465
  category,
167
466
  created_at,
168
467
  updated_at,
@@ -171,15 +470,28 @@ class SearchRepository:
171
470
  WHERE {where_clause}
172
471
  ORDER BY score ASC {order_by_clause}
173
472
  LIMIT :limit
473
+ OFFSET :offset
174
474
  """
175
475
 
176
- # logger.debug(f"Search {sql} params: {params}")
177
- async with db.scoped_session(self.session_maker) as session:
178
- result = await session.execute(text(sql), params)
179
- rows = result.fetchall()
476
+ logger.trace(f"Search {sql} params: {params}")
477
+ try:
478
+ async with db.scoped_session(self.session_maker) as session:
479
+ result = await session.execute(text(sql), params)
480
+ rows = result.fetchall()
481
+ except Exception as e:
482
+ # Handle FTS5 syntax errors and provide user-friendly feedback
483
+ if "fts5: syntax error" in str(e).lower(): # pragma: no cover
484
+ logger.warning(f"FTS5 syntax error for search term: {search_text}, error: {e}")
485
+ # Return empty results rather than crashing
486
+ return []
487
+ else:
488
+ # Re-raise other database errors
489
+ logger.error(f"Database error during search: {e}")
490
+ raise
180
491
 
181
492
  results = [
182
493
  SearchIndexRow(
494
+ project_id=self.project_id,
183
495
  id=row.id,
184
496
  title=row.title,
185
497
  permalink=row.permalink,
@@ -191,7 +503,7 @@ class SearchRepository:
191
503
  to_id=row.to_id,
192
504
  relation_type=row.relation_type,
193
505
  entity_id=row.entity_id,
194
- content=row.content,
506
+ content_snippet=row.content_snippet,
195
507
  category=row.category,
196
508
  created_at=row.created_at,
197
509
  updated_at=row.updated_at,
@@ -199,8 +511,11 @@ class SearchRepository:
199
511
  for row in rows
200
512
  ]
201
513
 
202
- # for r in results:
203
- # logger.debug(f"Search result: type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}")
514
+ logger.trace(f"Found {len(results)} search results")
515
+ for r in results:
516
+ logger.trace(
517
+ f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
518
+ )
204
519
 
205
520
  return results
206
521
 
@@ -212,36 +527,99 @@ class SearchRepository:
212
527
  async with db.scoped_session(self.session_maker) as session:
213
528
  # Delete existing record if any
214
529
  await session.execute(
215
- text("DELETE FROM search_index WHERE permalink = :permalink"),
216
- {"permalink": search_index_row.permalink},
530
+ text(
531
+ "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
532
+ ),
533
+ {"permalink": search_index_row.permalink, "project_id": self.project_id},
217
534
  )
218
535
 
536
+ # Prepare data for insert with project_id
537
+ insert_data = search_index_row.to_insert()
538
+ insert_data["project_id"] = self.project_id
539
+
219
540
  # Insert new record
220
541
  await session.execute(
221
542
  text("""
222
543
  INSERT INTO search_index (
223
- id, title, content, permalink, file_path, type, metadata,
544
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
224
545
  from_id, to_id, relation_type,
225
546
  entity_id, category,
226
- created_at, updated_at
547
+ created_at, updated_at,
548
+ project_id
227
549
  ) VALUES (
228
- :id, :title, :content, :permalink, :file_path, :type, :metadata,
550
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
229
551
  :from_id, :to_id, :relation_type,
230
552
  :entity_id, :category,
231
- :created_at, :updated_at
553
+ :created_at, :updated_at,
554
+ :project_id
232
555
  )
233
556
  """),
234
- search_index_row.to_insert(),
557
+ insert_data,
558
+ )
559
+ logger.debug(f"indexed row {search_index_row}")
560
+ await session.commit()
561
+
562
+ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]):
563
+ """Index multiple items in a single batch operation.
564
+
565
+ Note: This method assumes that any existing records for the entity_id
566
+ have already been deleted (typically via delete_by_entity_id).
567
+
568
+ Args:
569
+ search_index_rows: List of SearchIndexRow objects to index
570
+ """
571
+ if not search_index_rows:
572
+ return
573
+
574
+ async with db.scoped_session(self.session_maker) as session:
575
+ # Prepare all insert data with project_id
576
+ insert_data_list = []
577
+ for row in search_index_rows:
578
+ insert_data = row.to_insert()
579
+ insert_data["project_id"] = self.project_id
580
+ insert_data_list.append(insert_data)
581
+
582
+ # Batch insert all records using executemany
583
+ await session.execute(
584
+ text("""
585
+ INSERT INTO search_index (
586
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
587
+ from_id, to_id, relation_type,
588
+ entity_id, category,
589
+ created_at, updated_at,
590
+ project_id
591
+ ) VALUES (
592
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
593
+ :from_id, :to_id, :relation_type,
594
+ :entity_id, :category,
595
+ :created_at, :updated_at,
596
+ :project_id
597
+ )
598
+ """),
599
+ insert_data_list,
600
+ )
601
+ logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
602
+ await session.commit()
603
+
604
+ async def delete_by_entity_id(self, entity_id: int):
605
+ """Delete an item from the search index by entity_id."""
606
+ async with db.scoped_session(self.session_maker) as session:
607
+ await session.execute(
608
+ text(
609
+ "DELETE FROM search_index WHERE entity_id = :entity_id AND project_id = :project_id"
610
+ ),
611
+ {"entity_id": entity_id, "project_id": self.project_id},
235
612
  )
236
- logger.debug(f"indexed permalink {search_index_row.permalink}")
237
613
  await session.commit()
238
614
 
239
615
  async def delete_by_permalink(self, permalink: str):
240
616
  """Delete an item from the search index."""
241
617
  async with db.scoped_session(self.session_maker) as session:
242
618
  await session.execute(
243
- text("DELETE FROM search_index WHERE permalink = :permalink"),
244
- {"permalink": permalink},
619
+ text(
620
+ "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
621
+ ),
622
+ {"permalink": permalink, "project_id": self.project_id},
245
623
  )
246
624
  await session.commit()
247
625