basic-memory 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (143) hide show
  1. basic_memory/__init__.py +1 -1
  2. basic_memory/alembic/env.py +112 -26
  3. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  4. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +15 -3
  5. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +44 -36
  6. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  7. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  8. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +13 -0
  9. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  10. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  11. basic_memory/api/app.py +45 -24
  12. basic_memory/api/container.py +133 -0
  13. basic_memory/api/routers/knowledge_router.py +17 -5
  14. basic_memory/api/routers/project_router.py +68 -14
  15. basic_memory/api/routers/resource_router.py +37 -27
  16. basic_memory/api/routers/utils.py +53 -14
  17. basic_memory/api/v2/__init__.py +35 -0
  18. basic_memory/api/v2/routers/__init__.py +21 -0
  19. basic_memory/api/v2/routers/directory_router.py +93 -0
  20. basic_memory/api/v2/routers/importer_router.py +181 -0
  21. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  22. basic_memory/api/v2/routers/memory_router.py +130 -0
  23. basic_memory/api/v2/routers/project_router.py +359 -0
  24. basic_memory/api/v2/routers/prompt_router.py +269 -0
  25. basic_memory/api/v2/routers/resource_router.py +286 -0
  26. basic_memory/api/v2/routers/search_router.py +73 -0
  27. basic_memory/cli/app.py +43 -7
  28. basic_memory/cli/auth.py +27 -4
  29. basic_memory/cli/commands/__init__.py +3 -1
  30. basic_memory/cli/commands/cloud/api_client.py +20 -5
  31. basic_memory/cli/commands/cloud/cloud_utils.py +13 -6
  32. basic_memory/cli/commands/cloud/rclone_commands.py +110 -14
  33. basic_memory/cli/commands/cloud/rclone_installer.py +18 -4
  34. basic_memory/cli/commands/cloud/upload.py +10 -3
  35. basic_memory/cli/commands/command_utils.py +52 -4
  36. basic_memory/cli/commands/db.py +78 -19
  37. basic_memory/cli/commands/format.py +198 -0
  38. basic_memory/cli/commands/import_chatgpt.py +12 -8
  39. basic_memory/cli/commands/import_claude_conversations.py +12 -8
  40. basic_memory/cli/commands/import_claude_projects.py +12 -8
  41. basic_memory/cli/commands/import_memory_json.py +12 -8
  42. basic_memory/cli/commands/mcp.py +8 -26
  43. basic_memory/cli/commands/project.py +22 -9
  44. basic_memory/cli/commands/status.py +3 -2
  45. basic_memory/cli/commands/telemetry.py +81 -0
  46. basic_memory/cli/container.py +84 -0
  47. basic_memory/cli/main.py +7 -0
  48. basic_memory/config.py +177 -77
  49. basic_memory/db.py +183 -77
  50. basic_memory/deps/__init__.py +293 -0
  51. basic_memory/deps/config.py +26 -0
  52. basic_memory/deps/db.py +56 -0
  53. basic_memory/deps/importers.py +200 -0
  54. basic_memory/deps/projects.py +238 -0
  55. basic_memory/deps/repositories.py +179 -0
  56. basic_memory/deps/services.py +480 -0
  57. basic_memory/deps.py +14 -409
  58. basic_memory/file_utils.py +212 -3
  59. basic_memory/ignore_utils.py +5 -5
  60. basic_memory/importers/base.py +40 -19
  61. basic_memory/importers/chatgpt_importer.py +17 -4
  62. basic_memory/importers/claude_conversations_importer.py +27 -12
  63. basic_memory/importers/claude_projects_importer.py +50 -14
  64. basic_memory/importers/memory_json_importer.py +36 -16
  65. basic_memory/importers/utils.py +5 -2
  66. basic_memory/markdown/entity_parser.py +62 -23
  67. basic_memory/markdown/markdown_processor.py +67 -4
  68. basic_memory/markdown/plugins.py +4 -2
  69. basic_memory/markdown/utils.py +10 -1
  70. basic_memory/mcp/async_client.py +1 -0
  71. basic_memory/mcp/clients/__init__.py +28 -0
  72. basic_memory/mcp/clients/directory.py +70 -0
  73. basic_memory/mcp/clients/knowledge.py +176 -0
  74. basic_memory/mcp/clients/memory.py +120 -0
  75. basic_memory/mcp/clients/project.py +89 -0
  76. basic_memory/mcp/clients/resource.py +71 -0
  77. basic_memory/mcp/clients/search.py +65 -0
  78. basic_memory/mcp/container.py +110 -0
  79. basic_memory/mcp/project_context.py +47 -33
  80. basic_memory/mcp/prompts/ai_assistant_guide.py +2 -2
  81. basic_memory/mcp/prompts/recent_activity.py +2 -2
  82. basic_memory/mcp/prompts/utils.py +3 -3
  83. basic_memory/mcp/server.py +58 -0
  84. basic_memory/mcp/tools/build_context.py +14 -14
  85. basic_memory/mcp/tools/canvas.py +34 -12
  86. basic_memory/mcp/tools/chatgpt_tools.py +4 -1
  87. basic_memory/mcp/tools/delete_note.py +31 -7
  88. basic_memory/mcp/tools/edit_note.py +14 -9
  89. basic_memory/mcp/tools/list_directory.py +7 -17
  90. basic_memory/mcp/tools/move_note.py +35 -31
  91. basic_memory/mcp/tools/project_management.py +29 -25
  92. basic_memory/mcp/tools/read_content.py +13 -3
  93. basic_memory/mcp/tools/read_note.py +24 -14
  94. basic_memory/mcp/tools/recent_activity.py +32 -38
  95. basic_memory/mcp/tools/search.py +17 -10
  96. basic_memory/mcp/tools/utils.py +28 -0
  97. basic_memory/mcp/tools/view_note.py +2 -1
  98. basic_memory/mcp/tools/write_note.py +37 -14
  99. basic_memory/models/knowledge.py +15 -2
  100. basic_memory/models/project.py +7 -1
  101. basic_memory/models/search.py +58 -2
  102. basic_memory/project_resolver.py +222 -0
  103. basic_memory/repository/entity_repository.py +210 -3
  104. basic_memory/repository/observation_repository.py +1 -0
  105. basic_memory/repository/postgres_search_repository.py +451 -0
  106. basic_memory/repository/project_repository.py +38 -1
  107. basic_memory/repository/relation_repository.py +58 -2
  108. basic_memory/repository/repository.py +1 -0
  109. basic_memory/repository/search_index_row.py +95 -0
  110. basic_memory/repository/search_repository.py +77 -615
  111. basic_memory/repository/search_repository_base.py +241 -0
  112. basic_memory/repository/sqlite_search_repository.py +437 -0
  113. basic_memory/runtime.py +61 -0
  114. basic_memory/schemas/base.py +36 -6
  115. basic_memory/schemas/directory.py +2 -1
  116. basic_memory/schemas/memory.py +9 -2
  117. basic_memory/schemas/project_info.py +2 -0
  118. basic_memory/schemas/response.py +84 -27
  119. basic_memory/schemas/search.py +5 -0
  120. basic_memory/schemas/sync_report.py +1 -1
  121. basic_memory/schemas/v2/__init__.py +27 -0
  122. basic_memory/schemas/v2/entity.py +133 -0
  123. basic_memory/schemas/v2/resource.py +47 -0
  124. basic_memory/services/context_service.py +219 -43
  125. basic_memory/services/directory_service.py +26 -11
  126. basic_memory/services/entity_service.py +68 -33
  127. basic_memory/services/file_service.py +131 -16
  128. basic_memory/services/initialization.py +51 -26
  129. basic_memory/services/link_resolver.py +1 -0
  130. basic_memory/services/project_service.py +68 -43
  131. basic_memory/services/search_service.py +75 -16
  132. basic_memory/sync/__init__.py +2 -1
  133. basic_memory/sync/coordinator.py +160 -0
  134. basic_memory/sync/sync_service.py +135 -115
  135. basic_memory/sync/watch_service.py +32 -12
  136. basic_memory/telemetry.py +249 -0
  137. basic_memory/utils.py +96 -75
  138. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/METADATA +129 -5
  139. basic_memory-0.17.4.dist-info/RECORD +193 -0
  140. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  141. basic_memory-0.16.1.dist-info/RECORD +0 -148
  142. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +0 -0
  143. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,241 @@
1
+ """Abstract base class for search repository implementations."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from datetime import datetime
5
+ from typing import Any, Dict, List, Optional
6
+
7
+
8
+ from loguru import logger
9
+ from sqlalchemy import Executable, Result, text
10
+ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
11
+
12
+ from basic_memory import db
13
+ from basic_memory.schemas.search import SearchItemType
14
+ from basic_memory.repository.search_index_row import SearchIndexRow
15
+
16
+
17
+ class SearchRepositoryBase(ABC):
18
+ """Abstract base class for backend-specific search repository implementations.
19
+
20
+ This class defines the common interface that all search repositories must implement,
21
+ regardless of whether they use SQLite FTS5 or Postgres tsvector for full-text search.
22
+
23
+ Concrete implementations:
24
+ - SQLiteSearchRepository: Uses FTS5 virtual tables with MATCH queries
25
+ - PostgresSearchRepository: Uses tsvector/tsquery with GIN indexes
26
+ """
27
+
28
+ def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int):
29
+ """Initialize with session maker and project_id filter.
30
+
31
+ Args:
32
+ session_maker: SQLAlchemy session maker
33
+ project_id: Project ID to filter all operations by
34
+
35
+ Raises:
36
+ ValueError: If project_id is None or invalid
37
+ """
38
+ if project_id is None or project_id <= 0: # pragma: no cover
39
+ raise ValueError("A valid project_id is required for SearchRepository")
40
+
41
+ self.session_maker = session_maker
42
+ self.project_id = project_id
43
+
44
+ @abstractmethod
45
+ async def init_search_index(self) -> None:
46
+ """Create or recreate the search index.
47
+
48
+ Backend-specific implementations:
49
+ - SQLite: CREATE VIRTUAL TABLE using FTS5
50
+ - Postgres: CREATE TABLE with tsvector column and GIN indexes
51
+ """
52
+ pass
53
+
54
+ @abstractmethod
55
+ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
56
+ """Prepare a search term for backend-specific query syntax.
57
+
58
+ Args:
59
+ term: The search term to prepare
60
+ is_prefix: Whether to add prefix search capability
61
+
62
+ Returns:
63
+ Formatted search term for the backend
64
+
65
+ Backend-specific implementations:
66
+ - SQLite: Quotes FTS5 special characters, adds * wildcards
67
+ - Postgres: Converts to tsquery syntax with :* prefix operator
68
+ """
69
+ pass
70
+
71
+ @abstractmethod
72
+ async def search(
73
+ self,
74
+ search_text: Optional[str] = None,
75
+ permalink: Optional[str] = None,
76
+ permalink_match: Optional[str] = None,
77
+ title: Optional[str] = None,
78
+ types: Optional[List[str]] = None,
79
+ after_date: Optional[datetime] = None,
80
+ search_item_types: Optional[List[SearchItemType]] = None,
81
+ limit: int = 10,
82
+ offset: int = 0,
83
+ ) -> List[SearchIndexRow]:
84
+ """Search across all indexed content.
85
+
86
+ Args:
87
+ search_text: Full-text search across title and content
88
+ permalink: Exact permalink match
89
+ permalink_match: Permalink pattern match (supports *)
90
+ title: Title search
91
+ types: Filter by entity types (from metadata.entity_type)
92
+ after_date: Filter by created_at > after_date
93
+ search_item_types: Filter by SearchItemType (ENTITY, OBSERVATION, RELATION)
94
+ limit: Maximum results to return
95
+ offset: Number of results to skip
96
+
97
+ Returns:
98
+ List of SearchIndexRow results with relevance scores
99
+
100
+ Backend-specific implementations:
101
+ - SQLite: Uses MATCH operator and bm25() for scoring
102
+ - Postgres: Uses @@ operator and ts_rank() for scoring
103
+ """
104
+ pass
105
+
106
+ async def index_item(self, search_index_row: SearchIndexRow) -> None:
107
+ """Index or update a single item.
108
+
109
+ This implementation is shared across backends as it uses standard SQL INSERT.
110
+ """
111
+
112
+ async with db.scoped_session(self.session_maker) as session:
113
+ # Delete existing record if any
114
+ await session.execute(
115
+ text(
116
+ "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
117
+ ),
118
+ {"permalink": search_index_row.permalink, "project_id": self.project_id},
119
+ )
120
+
121
+ # When using text() raw SQL, always serialize JSON to string
122
+ # Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
123
+ # The database driver/column type will handle conversion
124
+ insert_data = search_index_row.to_insert(serialize_json=True)
125
+ insert_data["project_id"] = self.project_id
126
+
127
+ # Insert new record
128
+ await session.execute(
129
+ text("""
130
+ INSERT INTO search_index (
131
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
132
+ from_id, to_id, relation_type,
133
+ entity_id, category,
134
+ created_at, updated_at,
135
+ project_id
136
+ ) VALUES (
137
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
138
+ :from_id, :to_id, :relation_type,
139
+ :entity_id, :category,
140
+ :created_at, :updated_at,
141
+ :project_id
142
+ )
143
+ """),
144
+ insert_data,
145
+ )
146
+ logger.debug(f"indexed row {search_index_row}")
147
+ await session.commit()
148
+
149
+ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
150
+ """Index multiple items in a single batch operation.
151
+
152
+ This implementation is shared across backends as it uses standard SQL INSERT.
153
+
154
+ Note: This method assumes that any existing records for the entity_id
155
+ have already been deleted (typically via delete_by_entity_id).
156
+
157
+ Args:
158
+ search_index_rows: List of SearchIndexRow objects to index
159
+ """
160
+
161
+ if not search_index_rows: # pragma: no cover
162
+ return # pragma: no cover
163
+
164
+ async with db.scoped_session(self.session_maker) as session:
165
+ # When using text() raw SQL, always serialize JSON to string
166
+ # Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
167
+ # The database driver/column type will handle conversion
168
+ insert_data_list = []
169
+ for row in search_index_rows:
170
+ insert_data = row.to_insert(serialize_json=True)
171
+ insert_data["project_id"] = self.project_id
172
+ insert_data_list.append(insert_data)
173
+
174
+ # Batch insert all records using executemany
175
+ await session.execute(
176
+ text("""
177
+ INSERT INTO search_index (
178
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
179
+ from_id, to_id, relation_type,
180
+ entity_id, category,
181
+ created_at, updated_at,
182
+ project_id
183
+ ) VALUES (
184
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
185
+ :from_id, :to_id, :relation_type,
186
+ :entity_id, :category,
187
+ :created_at, :updated_at,
188
+ :project_id
189
+ )
190
+ """),
191
+ insert_data_list,
192
+ )
193
+ logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
194
+ await session.commit()
195
+
196
+ async def delete_by_entity_id(self, entity_id: int) -> None:
197
+ """Delete all search index entries for an entity.
198
+
199
+ This implementation is shared across backends as it uses standard SQL DELETE.
200
+ """
201
+ async with db.scoped_session(self.session_maker) as session:
202
+ await session.execute(
203
+ text(
204
+ "DELETE FROM search_index WHERE entity_id = :entity_id AND project_id = :project_id"
205
+ ),
206
+ {"entity_id": entity_id, "project_id": self.project_id},
207
+ )
208
+ await session.commit()
209
+
210
+ async def delete_by_permalink(self, permalink: str) -> None:
211
+ """Delete a search index entry by permalink.
212
+
213
+ This implementation is shared across backends as it uses standard SQL DELETE.
214
+ """
215
+ async with db.scoped_session(self.session_maker) as session:
216
+ await session.execute(
217
+ text(
218
+ "DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
219
+ ),
220
+ {"permalink": permalink, "project_id": self.project_id},
221
+ )
222
+ await session.commit()
223
+
224
+ async def execute_query(
225
+ self,
226
+ query: Executable,
227
+ params: Dict[str, Any],
228
+ ) -> Result[Any]:
229
+ """Execute a query asynchronously.
230
+
231
+ This implementation is shared across backends for utility query execution.
232
+ """
233
+ import time
234
+
235
+ async with db.scoped_session(self.session_maker) as session:
236
+ start_time = time.perf_counter()
237
+ result = await session.execute(query, params)
238
+ end_time = time.perf_counter()
239
+ elapsed_time = end_time - start_time
240
+ logger.debug(f"Query executed successfully in {elapsed_time:.2f}s.")
241
+ return result
@@ -0,0 +1,437 @@
1
+ """SQLite FTS5-based search repository implementation."""
2
+
3
+ import json
4
+ import re
5
+ from datetime import datetime
6
+ from typing import List, Optional
7
+
8
+
9
+ from loguru import logger
10
+ from sqlalchemy import text
11
+
12
+ from basic_memory import db
13
+ from basic_memory.models.search import CREATE_SEARCH_INDEX
14
+ from basic_memory.repository.search_index_row import SearchIndexRow
15
+ from basic_memory.repository.search_repository_base import SearchRepositoryBase
16
+ from basic_memory.schemas.search import SearchItemType
17
+
18
+
19
+ class SQLiteSearchRepository(SearchRepositoryBase):
20
+ """SQLite FTS5 implementation of search repository.
21
+
22
+ Uses SQLite's FTS5 virtual tables for full-text search with:
23
+ - MATCH operator for queries
24
+ - bm25() function for relevance scoring
25
+ - Special character quoting for syntax safety
26
+ - Prefix wildcard matching with *
27
+ """
28
+
29
+ async def init_search_index(self):
30
+ """Create FTS5 virtual table for search if it doesn't exist.
31
+
32
+ Uses CREATE VIRTUAL TABLE IF NOT EXISTS to preserve existing indexed data
33
+ across server restarts.
34
+ """
35
+ logger.info("Initializing SQLite FTS5 search index")
36
+ try:
37
+ async with db.scoped_session(self.session_maker) as session:
38
+ # Create FTS5 virtual table if it doesn't exist
39
+ await session.execute(CREATE_SEARCH_INDEX)
40
+ await session.commit()
41
+ except Exception as e: # pragma: no cover
42
+ logger.error(f"Error initializing search index: {e}")
43
+ raise e
44
+
45
+ def _prepare_boolean_query(self, query: str) -> str:
46
+ """Prepare a Boolean query by quoting individual terms while preserving operators.
47
+
48
+ Args:
49
+ query: A Boolean query like "tier1-test AND unicode" or "(hello OR world) NOT test"
50
+
51
+ Returns:
52
+ A properly formatted Boolean query with quoted terms that need quoting
53
+ """
54
+ # Define Boolean operators and their boundaries
55
+ boolean_pattern = r"(\bAND\b|\bOR\b|\bNOT\b)"
56
+
57
+ # Split the query by Boolean operators, keeping the operators
58
+ parts = re.split(boolean_pattern, query)
59
+
60
+ processed_parts = []
61
+ for part in parts:
62
+ part = part.strip()
63
+ if not part:
64
+ continue
65
+
66
+ # If it's a Boolean operator, keep it as is
67
+ if part in ["AND", "OR", "NOT"]:
68
+ processed_parts.append(part)
69
+ else:
70
+ # Handle parentheses specially - they should be preserved for grouping
71
+ if "(" in part or ")" in part:
72
+ # Parse parenthetical expressions carefully
73
+ processed_part = self._prepare_parenthetical_term(part)
74
+ processed_parts.append(processed_part)
75
+ else:
76
+ # This is a search term - for Boolean queries, don't add prefix wildcards
77
+ prepared_term = self._prepare_single_term(part, is_prefix=False)
78
+ processed_parts.append(prepared_term)
79
+
80
+ return " ".join(processed_parts)
81
+
82
+ def _prepare_parenthetical_term(self, term: str) -> str:
83
+ """Prepare a term that contains parentheses, preserving the parentheses for grouping.
84
+
85
+ Args:
86
+ term: A term that may contain parentheses like "(hello" or "world)" or "(hello OR world)"
87
+
88
+ Returns:
89
+ A properly formatted term with parentheses preserved
90
+ """
91
+ # Handle terms that start/end with parentheses but may contain quotable content
92
+ result = ""
93
+ i = 0
94
+ while i < len(term):
95
+ if term[i] in "()":
96
+ # Preserve parentheses as-is
97
+ result += term[i]
98
+ i += 1
99
+ else:
100
+ # Find the next parenthesis or end of string
101
+ start = i
102
+ while i < len(term) and term[i] not in "()":
103
+ i += 1
104
+
105
+ # Extract the content between parentheses
106
+ content = term[start:i].strip()
107
+ if content:
108
+ # Only quote if it actually needs quoting (has hyphens, special chars, etc)
109
+ # but don't quote if it's just simple words
110
+ if self._needs_quoting(content):
111
+ escaped_content = content.replace('"', '""')
112
+ result += f'"{escaped_content}"'
113
+ else:
114
+ result += content
115
+
116
+ return result
117
+
118
+ def _needs_quoting(self, term: str) -> bool:
119
+ """Check if a term needs to be quoted for FTS5 safety.
120
+
121
+ Args:
122
+ term: The term to check
123
+
124
+ Returns:
125
+ True if the term should be quoted
126
+ """
127
+ if not term or not term.strip():
128
+ return False
129
+
130
+ # Characters that indicate we should quote (excluding parentheses which are valid syntax)
131
+ needs_quoting_chars = [
132
+ " ",
133
+ ".",
134
+ ":",
135
+ ";",
136
+ ",",
137
+ "<",
138
+ ">",
139
+ "?",
140
+ "/",
141
+ "-",
142
+ "'",
143
+ '"',
144
+ "[",
145
+ "]",
146
+ "{",
147
+ "}",
148
+ "+",
149
+ "!",
150
+ "@",
151
+ "#",
152
+ "$",
153
+ "%",
154
+ "^",
155
+ "&",
156
+ "=",
157
+ "|",
158
+ "\\",
159
+ "~",
160
+ "`",
161
+ ]
162
+
163
+ return any(c in term for c in needs_quoting_chars)
164
+
165
+ def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
166
+ """Prepare a single search term (no Boolean operators).
167
+
168
+ Args:
169
+ term: A single search term
170
+ is_prefix: Whether to add prefix search capability (* suffix)
171
+
172
+ Returns:
173
+ A properly formatted single term
174
+ """
175
+ if not term or not term.strip():
176
+ return term
177
+
178
+ term = term.strip()
179
+
180
+ # Check if term is already a proper wildcard pattern (alphanumeric + *)
181
+ # e.g., "hello*", "test*world" - these should be left alone
182
+ if "*" in term and all(c.isalnum() or c in "*_-" for c in term):
183
+ return term
184
+
185
+ # Characters that can cause FTS5 syntax errors when used as operators
186
+ # We're more conservative here - only quote when we detect problematic patterns
187
+ problematic_chars = [
188
+ '"',
189
+ "'",
190
+ "(",
191
+ ")",
192
+ "[",
193
+ "]",
194
+ "{",
195
+ "}",
196
+ "+",
197
+ "!",
198
+ "@",
199
+ "#",
200
+ "$",
201
+ "%",
202
+ "^",
203
+ "&",
204
+ "=",
205
+ "|",
206
+ "\\",
207
+ "~",
208
+ "`",
209
+ ]
210
+
211
+ # Characters that indicate we should quote (spaces, dots, colons, etc.)
212
+ # Adding hyphens here because FTS5 can have issues with hyphens followed by wildcards
213
+ needs_quoting_chars = [" ", ".", ":", ";", ",", "<", ">", "?", "/", "-"]
214
+
215
+ # Check if term needs quoting
216
+ has_problematic = any(c in term for c in problematic_chars)
217
+ has_spaces_or_special = any(c in term for c in needs_quoting_chars)
218
+
219
+ if has_problematic or has_spaces_or_special:
220
+ # Handle multi-word queries differently from special character queries
221
+ if " " in term and not any(c in term for c in problematic_chars):
222
+ # Check if any individual word contains special characters that need quoting
223
+ words = term.strip().split()
224
+ has_special_in_words = any(
225
+ any(c in word for c in needs_quoting_chars if c != " ") for word in words
226
+ )
227
+
228
+ if not has_special_in_words:
229
+ # For multi-word queries with simple words (like "emoji unicode"),
230
+ # use boolean AND to handle word order variations
231
+ if is_prefix:
232
+ # Add prefix wildcard to each word for better matching
233
+ prepared_words = [f"{word}*" for word in words if word]
234
+ else:
235
+ prepared_words = words
236
+ term = " AND ".join(prepared_words)
237
+ else:
238
+ # If any word has special characters, quote the entire phrase
239
+ escaped_term = term.replace('"', '""')
240
+ if is_prefix and not ("/" in term and term.endswith(".md")):
241
+ term = f'"{escaped_term}"*'
242
+ else:
243
+ term = f'"{escaped_term}"' # pragma: no cover
244
+ else:
245
+ # For terms with problematic characters or file paths, use exact phrase matching
246
+ # Escape any existing quotes by doubling them
247
+ escaped_term = term.replace('"', '""')
248
+ # Quote the entire term to handle special characters safely
249
+ if is_prefix and not ("/" in term and term.endswith(".md")):
250
+ # For search terms (not file paths), add prefix matching
251
+ term = f'"{escaped_term}"*'
252
+ else:
253
+ # For file paths, use exact matching
254
+ term = f'"{escaped_term}"'
255
+ elif is_prefix:
256
+ # Only add wildcard for simple terms without special characters
257
+ term = f"{term}*"
258
+
259
+ return term
260
+
261
+ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
262
+ """Prepare a search term for FTS5 query.
263
+
264
+ Args:
265
+ term: The search term to prepare
266
+ is_prefix: Whether to add prefix search capability (* suffix)
267
+
268
+ For FTS5:
269
+ - Boolean operators (AND, OR, NOT) are preserved for complex queries
270
+ - Terms with FTS5 special characters are quoted to prevent syntax errors
271
+ - Simple terms get prefix wildcards for better matching
272
+ """
273
+ # Check for explicit boolean operators - if present, process as Boolean query
274
+ boolean_operators = [" AND ", " OR ", " NOT "]
275
+ if any(op in f" {term} " for op in boolean_operators):
276
+ return self._prepare_boolean_query(term)
277
+
278
+ # For non-Boolean queries, use the single term preparation logic
279
+ return self._prepare_single_term(term, is_prefix)
280
+
281
+ async def search(
282
+ self,
283
+ search_text: Optional[str] = None,
284
+ permalink: Optional[str] = None,
285
+ permalink_match: Optional[str] = None,
286
+ title: Optional[str] = None,
287
+ types: Optional[List[str]] = None,
288
+ after_date: Optional[datetime] = None,
289
+ search_item_types: Optional[List[SearchItemType]] = None,
290
+ limit: int = 10,
291
+ offset: int = 0,
292
+ ) -> List[SearchIndexRow]:
293
+ """Search across all indexed content using SQLite FTS5."""
294
+ conditions = []
295
+ params = {}
296
+ order_by_clause = ""
297
+
298
+ # Handle text search for title and content
299
+ if search_text:
300
+ # Skip FTS for wildcard-only queries that would cause "unknown special query" errors
301
+ if search_text.strip() == "*" or search_text.strip() == "":
302
+ # For wildcard searches, don't add any text conditions - return all results
303
+ pass
304
+ else:
305
+ # Use _prepare_search_term to handle both Boolean and non-Boolean queries
306
+ processed_text = self._prepare_search_term(search_text.strip())
307
+ params["text"] = processed_text
308
+ conditions.append("(title MATCH :text OR content_stems MATCH :text)")
309
+
310
+ # Handle title match search
311
+ if title:
312
+ title_text = self._prepare_search_term(title.strip(), is_prefix=False)
313
+ params["title_text"] = title_text
314
+ conditions.append("title MATCH :title_text")
315
+
316
+ # Handle permalink exact search
317
+ if permalink:
318
+ params["permalink"] = permalink
319
+ conditions.append("permalink = :permalink")
320
+
321
+ # Handle permalink match search, supports *
322
+ if permalink_match:
323
+ # For GLOB patterns, don't use _prepare_search_term as it will quote slashes
324
+ # GLOB patterns need to preserve their syntax
325
+ permalink_text = permalink_match.lower().strip()
326
+ params["permalink"] = permalink_text
327
+ if "*" in permalink_match:
328
+ conditions.append("permalink GLOB :permalink")
329
+ else:
330
+ # For exact matches without *, we can use FTS5 MATCH
331
+ # but only prepare the term if it doesn't look like a path
332
+ if "/" in permalink_text:
333
+ conditions.append("permalink = :permalink")
334
+ else:
335
+ permalink_text = self._prepare_search_term(permalink_text, is_prefix=False)
336
+ params["permalink"] = permalink_text
337
+ conditions.append("permalink MATCH :permalink")
338
+
339
+ # Handle entity type filter
340
+ if search_item_types:
341
+ type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
342
+ conditions.append(f"type IN ({type_list})")
343
+
344
+ # Handle type filter
345
+ if types:
346
+ type_list = ", ".join(f"'{t}'" for t in types)
347
+ conditions.append(f"json_extract(metadata, '$.entity_type') IN ({type_list})")
348
+
349
+ # Handle date filter using datetime() for proper comparison
350
+ if after_date:
351
+ params["after_date"] = after_date
352
+ conditions.append("datetime(created_at) > datetime(:after_date)")
353
+
354
+ # order by most recent first
355
+ order_by_clause = ", updated_at DESC"
356
+
357
+ # Always filter by project_id
358
+ params["project_id"] = self.project_id
359
+ conditions.append("project_id = :project_id")
360
+
361
+ # set limit on search query
362
+ params["limit"] = limit
363
+ params["offset"] = offset
364
+
365
+ # Build WHERE clause
366
+ where_clause = " AND ".join(conditions) if conditions else "1=1"
367
+
368
+ sql = f"""
369
+ SELECT
370
+ project_id,
371
+ id,
372
+ title,
373
+ permalink,
374
+ file_path,
375
+ type,
376
+ metadata,
377
+ from_id,
378
+ to_id,
379
+ relation_type,
380
+ entity_id,
381
+ content_snippet,
382
+ category,
383
+ created_at,
384
+ updated_at,
385
+ bm25(search_index) as score
386
+ FROM search_index
387
+ WHERE {where_clause}
388
+ ORDER BY score ASC {order_by_clause}
389
+ LIMIT :limit
390
+ OFFSET :offset
391
+ """
392
+
393
+ logger.trace(f"Search {sql} params: {params}")
394
+ try:
395
+ async with db.scoped_session(self.session_maker) as session:
396
+ result = await session.execute(text(sql), params)
397
+ rows = result.fetchall()
398
+ except Exception as e:
399
+ # Handle FTS5 syntax errors and provide user-friendly feedback
400
+ if "fts5: syntax error" in str(e).lower(): # pragma: no cover
401
+ logger.warning(f"FTS5 syntax error for search term: {search_text}, error: {e}")
402
+ # Return empty results rather than crashing
403
+ return []
404
+ else:
405
+ # Re-raise other database errors
406
+ logger.error(f"Database error during search: {e}")
407
+ raise
408
+
409
+ results = [
410
+ SearchIndexRow(
411
+ project_id=self.project_id,
412
+ id=row.id,
413
+ title=row.title,
414
+ permalink=row.permalink,
415
+ file_path=row.file_path,
416
+ type=row.type,
417
+ score=row.score,
418
+ metadata=json.loads(row.metadata) if row.metadata else {},
419
+ from_id=row.from_id,
420
+ to_id=row.to_id,
421
+ relation_type=row.relation_type,
422
+ entity_id=row.entity_id,
423
+ content_snippet=row.content_snippet,
424
+ category=row.category,
425
+ created_at=row.created_at,
426
+ updated_at=row.updated_at,
427
+ )
428
+ for row in rows
429
+ ]
430
+
431
+ logger.trace(f"Found {len(results)} search results")
432
+ for r in results:
433
+ logger.trace(
434
+ f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
435
+ )
436
+
437
+ return results