basic-memory 0.7.0__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +5 -1
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +27 -3
- basic_memory/alembic/migrations.py +4 -9
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +108 -0
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +104 -0
- basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +100 -0
- basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
- basic_memory/api/app.py +64 -18
- basic_memory/api/routers/__init__.py +4 -1
- basic_memory/api/routers/directory_router.py +84 -0
- basic_memory/api/routers/importer_router.py +152 -0
- basic_memory/api/routers/knowledge_router.py +166 -21
- basic_memory/api/routers/management_router.py +80 -0
- basic_memory/api/routers/memory_router.py +9 -64
- basic_memory/api/routers/project_router.py +406 -0
- basic_memory/api/routers/prompt_router.py +260 -0
- basic_memory/api/routers/resource_router.py +119 -4
- basic_memory/api/routers/search_router.py +5 -5
- basic_memory/api/routers/utils.py +130 -0
- basic_memory/api/template_loader.py +292 -0
- basic_memory/cli/app.py +43 -9
- basic_memory/cli/auth.py +277 -0
- basic_memory/cli/commands/__init__.py +13 -2
- basic_memory/cli/commands/cloud/__init__.py +6 -0
- basic_memory/cli/commands/cloud/api_client.py +112 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
- basic_memory/cli/commands/cloud/cloud_utils.py +101 -0
- basic_memory/cli/commands/cloud/core_commands.py +195 -0
- basic_memory/cli/commands/cloud/rclone_commands.py +301 -0
- basic_memory/cli/commands/cloud/rclone_config.py +110 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +249 -0
- basic_memory/cli/commands/cloud/upload.py +233 -0
- basic_memory/cli/commands/cloud/upload_command.py +124 -0
- basic_memory/cli/commands/command_utils.py +51 -0
- basic_memory/cli/commands/db.py +28 -12
- basic_memory/cli/commands/import_chatgpt.py +40 -220
- basic_memory/cli/commands/import_claude_conversations.py +41 -168
- basic_memory/cli/commands/import_claude_projects.py +46 -157
- basic_memory/cli/commands/import_memory_json.py +48 -108
- basic_memory/cli/commands/mcp.py +84 -10
- basic_memory/cli/commands/project.py +876 -0
- basic_memory/cli/commands/status.py +50 -33
- basic_memory/cli/commands/tool.py +341 -0
- basic_memory/cli/main.py +8 -7
- basic_memory/config.py +477 -23
- basic_memory/db.py +168 -17
- basic_memory/deps.py +251 -25
- basic_memory/file_utils.py +113 -58
- basic_memory/ignore_utils.py +297 -0
- basic_memory/importers/__init__.py +27 -0
- basic_memory/importers/base.py +79 -0
- basic_memory/importers/chatgpt_importer.py +232 -0
- basic_memory/importers/claude_conversations_importer.py +177 -0
- basic_memory/importers/claude_projects_importer.py +148 -0
- basic_memory/importers/memory_json_importer.py +108 -0
- basic_memory/importers/utils.py +58 -0
- basic_memory/markdown/entity_parser.py +143 -23
- basic_memory/markdown/markdown_processor.py +3 -3
- basic_memory/markdown/plugins.py +39 -21
- basic_memory/markdown/schemas.py +1 -1
- basic_memory/markdown/utils.py +28 -13
- basic_memory/mcp/async_client.py +134 -4
- basic_memory/mcp/project_context.py +141 -0
- basic_memory/mcp/prompts/__init__.py +19 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
- basic_memory/mcp/prompts/continue_conversation.py +62 -0
- basic_memory/mcp/prompts/recent_activity.py +188 -0
- basic_memory/mcp/prompts/search.py +57 -0
- basic_memory/mcp/prompts/utils.py +162 -0
- basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
- basic_memory/mcp/resources/project_info.py +71 -0
- basic_memory/mcp/server.py +7 -13
- basic_memory/mcp/tools/__init__.py +33 -21
- basic_memory/mcp/tools/build_context.py +120 -0
- basic_memory/mcp/tools/canvas.py +130 -0
- basic_memory/mcp/tools/chatgpt_tools.py +187 -0
- basic_memory/mcp/tools/delete_note.py +225 -0
- basic_memory/mcp/tools/edit_note.py +320 -0
- basic_memory/mcp/tools/list_directory.py +167 -0
- basic_memory/mcp/tools/move_note.py +545 -0
- basic_memory/mcp/tools/project_management.py +200 -0
- basic_memory/mcp/tools/read_content.py +271 -0
- basic_memory/mcp/tools/read_note.py +255 -0
- basic_memory/mcp/tools/recent_activity.py +534 -0
- basic_memory/mcp/tools/search.py +369 -23
- basic_memory/mcp/tools/utils.py +374 -16
- basic_memory/mcp/tools/view_note.py +77 -0
- basic_memory/mcp/tools/write_note.py +207 -0
- basic_memory/models/__init__.py +3 -2
- basic_memory/models/knowledge.py +67 -15
- basic_memory/models/project.py +87 -0
- basic_memory/models/search.py +10 -6
- basic_memory/repository/__init__.py +2 -0
- basic_memory/repository/entity_repository.py +229 -7
- basic_memory/repository/observation_repository.py +35 -3
- basic_memory/repository/project_info_repository.py +10 -0
- basic_memory/repository/project_repository.py +103 -0
- basic_memory/repository/relation_repository.py +21 -2
- basic_memory/repository/repository.py +147 -29
- basic_memory/repository/search_repository.py +411 -62
- basic_memory/schemas/__init__.py +22 -9
- basic_memory/schemas/base.py +97 -8
- basic_memory/schemas/cloud.py +50 -0
- basic_memory/schemas/directory.py +30 -0
- basic_memory/schemas/importer.py +35 -0
- basic_memory/schemas/memory.py +187 -25
- basic_memory/schemas/project_info.py +211 -0
- basic_memory/schemas/prompt.py +90 -0
- basic_memory/schemas/request.py +56 -2
- basic_memory/schemas/response.py +1 -1
- basic_memory/schemas/search.py +31 -35
- basic_memory/schemas/sync_report.py +72 -0
- basic_memory/services/__init__.py +2 -1
- basic_memory/services/context_service.py +241 -104
- basic_memory/services/directory_service.py +295 -0
- basic_memory/services/entity_service.py +590 -60
- basic_memory/services/exceptions.py +21 -0
- basic_memory/services/file_service.py +284 -30
- basic_memory/services/initialization.py +191 -0
- basic_memory/services/link_resolver.py +49 -56
- basic_memory/services/project_service.py +863 -0
- basic_memory/services/search_service.py +168 -32
- basic_memory/sync/__init__.py +3 -2
- basic_memory/sync/background_sync.py +26 -0
- basic_memory/sync/sync_service.py +1180 -109
- basic_memory/sync/watch_service.py +412 -135
- basic_memory/templates/prompts/continue_conversation.hbs +110 -0
- basic_memory/templates/prompts/search.hbs +101 -0
- basic_memory/utils.py +383 -51
- basic_memory-0.16.1.dist-info/METADATA +493 -0
- basic_memory-0.16.1.dist-info/RECORD +148 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/entry_points.txt +1 -0
- basic_memory/alembic/README +0 -1
- basic_memory/cli/commands/sync.py +0 -206
- basic_memory/cli/commands/tools.py +0 -157
- basic_memory/mcp/tools/knowledge.py +0 -68
- basic_memory/mcp/tools/memory.py +0 -170
- basic_memory/mcp/tools/notes.py +0 -202
- basic_memory/schemas/discovery.py +0 -28
- basic_memory/sync/file_change_scanner.py +0 -158
- basic_memory/sync/utils.py +0 -31
- basic_memory-0.7.0.dist-info/METADATA +0 -378
- basic_memory-0.7.0.dist-info/RECORD +0 -82
- {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/WHEEL +0 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
"""Repository for search operations."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import re
|
|
4
5
|
import time
|
|
5
6
|
from dataclasses import dataclass
|
|
6
7
|
from datetime import datetime
|
|
7
|
-
from typing import
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
from pathlib import Path
|
|
8
10
|
|
|
9
11
|
from loguru import logger
|
|
10
|
-
from sqlalchemy import
|
|
12
|
+
from sqlalchemy import Executable, Result, text
|
|
11
13
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
12
14
|
|
|
13
15
|
from basic_memory import db
|
|
@@ -19,33 +21,65 @@ from basic_memory.schemas.search import SearchItemType
|
|
|
19
21
|
class SearchIndexRow:
|
|
20
22
|
"""Search result with score and metadata."""
|
|
21
23
|
|
|
24
|
+
project_id: int
|
|
22
25
|
id: int
|
|
23
26
|
type: str
|
|
24
|
-
permalink: str
|
|
25
27
|
file_path: str
|
|
26
|
-
metadata: Optional[dict] = None
|
|
27
28
|
|
|
28
29
|
# date values
|
|
29
|
-
created_at:
|
|
30
|
-
updated_at:
|
|
30
|
+
created_at: datetime
|
|
31
|
+
updated_at: datetime
|
|
32
|
+
|
|
33
|
+
permalink: Optional[str] = None
|
|
34
|
+
metadata: Optional[dict] = None
|
|
31
35
|
|
|
32
36
|
# assigned in result
|
|
33
37
|
score: Optional[float] = None
|
|
34
38
|
|
|
35
39
|
# Type-specific fields
|
|
36
40
|
title: Optional[str] = None # entity
|
|
37
|
-
|
|
41
|
+
content_stems: Optional[str] = None # entity, observation
|
|
42
|
+
content_snippet: Optional[str] = None # entity, observation
|
|
38
43
|
entity_id: Optional[int] = None # observations
|
|
39
44
|
category: Optional[str] = None # observations
|
|
40
45
|
from_id: Optional[int] = None # relations
|
|
41
46
|
to_id: Optional[int] = None # relations
|
|
42
47
|
relation_type: Optional[str] = None # relations
|
|
43
48
|
|
|
49
|
+
@property
|
|
50
|
+
def content(self):
|
|
51
|
+
return self.content_snippet
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def directory(self) -> str:
|
|
55
|
+
"""Extract directory part from file_path.
|
|
56
|
+
|
|
57
|
+
For a file at "projects/notes/ideas.md", returns "/projects/notes"
|
|
58
|
+
For a file at root level "README.md", returns "/"
|
|
59
|
+
"""
|
|
60
|
+
if not self.type == SearchItemType.ENTITY.value and not self.file_path:
|
|
61
|
+
return ""
|
|
62
|
+
|
|
63
|
+
# Normalize path separators to handle both Windows (\) and Unix (/) paths
|
|
64
|
+
normalized_path = Path(self.file_path).as_posix()
|
|
65
|
+
|
|
66
|
+
# Split the path by slashes
|
|
67
|
+
parts = normalized_path.split("/")
|
|
68
|
+
|
|
69
|
+
# If there's only one part (e.g., "README.md"), it's at the root
|
|
70
|
+
if len(parts) <= 1:
|
|
71
|
+
return "/"
|
|
72
|
+
|
|
73
|
+
# Join all parts except the last one (filename)
|
|
74
|
+
directory_path = "/".join(parts[:-1])
|
|
75
|
+
return f"/{directory_path}"
|
|
76
|
+
|
|
44
77
|
def to_insert(self):
|
|
45
78
|
return {
|
|
46
79
|
"id": self.id,
|
|
47
80
|
"title": self.title,
|
|
48
|
-
"
|
|
81
|
+
"content_stems": self.content_stems,
|
|
82
|
+
"content_snippet": self.content_snippet,
|
|
49
83
|
"permalink": self.permalink,
|
|
50
84
|
"file_path": self.file_path,
|
|
51
85
|
"type": self.type,
|
|
@@ -57,14 +91,28 @@ class SearchIndexRow:
|
|
|
57
91
|
"category": self.category,
|
|
58
92
|
"created_at": self.created_at if self.created_at else None,
|
|
59
93
|
"updated_at": self.updated_at if self.updated_at else None,
|
|
94
|
+
"project_id": self.project_id,
|
|
60
95
|
}
|
|
61
96
|
|
|
62
97
|
|
|
63
98
|
class SearchRepository:
|
|
64
99
|
"""Repository for search index operations."""
|
|
65
100
|
|
|
66
|
-
def __init__(self, session_maker: async_sessionmaker[AsyncSession]):
|
|
101
|
+
def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int):
|
|
102
|
+
"""Initialize with session maker and project_id filter.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
session_maker: SQLAlchemy session maker
|
|
106
|
+
project_id: Project ID to filter all operations by
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
ValueError: If project_id is None or invalid
|
|
110
|
+
"""
|
|
111
|
+
if project_id is None or project_id <= 0: # pragma: no cover
|
|
112
|
+
raise ValueError("A valid project_id is required for SearchRepository")
|
|
113
|
+
|
|
67
114
|
self.session_maker = session_maker
|
|
115
|
+
self.project_id = project_id
|
|
68
116
|
|
|
69
117
|
async def init_search_index(self):
|
|
70
118
|
"""Create or recreate the search index."""
|
|
@@ -77,42 +125,251 @@ class SearchRepository:
|
|
|
77
125
|
logger.error(f"Error initializing search index: {e}")
|
|
78
126
|
raise e
|
|
79
127
|
|
|
80
|
-
def
|
|
81
|
-
"""Prepare a
|
|
128
|
+
def _prepare_boolean_query(self, query: str) -> str:
|
|
129
|
+
"""Prepare a Boolean query by quoting individual terms while preserving operators.
|
|
82
130
|
|
|
83
131
|
Args:
|
|
84
|
-
|
|
132
|
+
query: A Boolean query like "tier1-test AND unicode" or "(hello OR world) NOT test"
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
A properly formatted Boolean query with quoted terms that need quoting
|
|
136
|
+
"""
|
|
137
|
+
# Define Boolean operators and their boundaries
|
|
138
|
+
boolean_pattern = r"(\bAND\b|\bOR\b|\bNOT\b)"
|
|
139
|
+
|
|
140
|
+
# Split the query by Boolean operators, keeping the operators
|
|
141
|
+
parts = re.split(boolean_pattern, query)
|
|
142
|
+
|
|
143
|
+
processed_parts = []
|
|
144
|
+
for part in parts:
|
|
145
|
+
part = part.strip()
|
|
146
|
+
if not part:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# If it's a Boolean operator, keep it as is
|
|
150
|
+
if part in ["AND", "OR", "NOT"]:
|
|
151
|
+
processed_parts.append(part)
|
|
152
|
+
else:
|
|
153
|
+
# Handle parentheses specially - they should be preserved for grouping
|
|
154
|
+
if "(" in part or ")" in part:
|
|
155
|
+
# Parse parenthetical expressions carefully
|
|
156
|
+
processed_part = self._prepare_parenthetical_term(part)
|
|
157
|
+
processed_parts.append(processed_part)
|
|
158
|
+
else:
|
|
159
|
+
# This is a search term - for Boolean queries, don't add prefix wildcards
|
|
160
|
+
prepared_term = self._prepare_single_term(part, is_prefix=False)
|
|
161
|
+
processed_parts.append(prepared_term)
|
|
162
|
+
|
|
163
|
+
return " ".join(processed_parts)
|
|
164
|
+
|
|
165
|
+
def _prepare_parenthetical_term(self, term: str) -> str:
|
|
166
|
+
"""Prepare a term that contains parentheses, preserving the parentheses for grouping.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
term: A term that may contain parentheses like "(hello" or "world)" or "(hello OR world)"
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
A properly formatted term with parentheses preserved
|
|
173
|
+
"""
|
|
174
|
+
# Handle terms that start/end with parentheses but may contain quotable content
|
|
175
|
+
result = ""
|
|
176
|
+
i = 0
|
|
177
|
+
while i < len(term):
|
|
178
|
+
if term[i] in "()":
|
|
179
|
+
# Preserve parentheses as-is
|
|
180
|
+
result += term[i]
|
|
181
|
+
i += 1
|
|
182
|
+
else:
|
|
183
|
+
# Find the next parenthesis or end of string
|
|
184
|
+
start = i
|
|
185
|
+
while i < len(term) and term[i] not in "()":
|
|
186
|
+
i += 1
|
|
187
|
+
|
|
188
|
+
# Extract the content between parentheses
|
|
189
|
+
content = term[start:i].strip()
|
|
190
|
+
if content:
|
|
191
|
+
# Only quote if it actually needs quoting (has hyphens, special chars, etc)
|
|
192
|
+
# but don't quote if it's just simple words
|
|
193
|
+
if self._needs_quoting(content):
|
|
194
|
+
escaped_content = content.replace('"', '""')
|
|
195
|
+
result += f'"{escaped_content}"'
|
|
196
|
+
else:
|
|
197
|
+
result += content
|
|
198
|
+
|
|
199
|
+
return result
|
|
200
|
+
|
|
201
|
+
def _needs_quoting(self, term: str) -> bool:
|
|
202
|
+
"""Check if a term needs to be quoted for FTS5 safety.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
term: The term to check
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
True if the term should be quoted
|
|
209
|
+
"""
|
|
210
|
+
if not term or not term.strip():
|
|
211
|
+
return False
|
|
212
|
+
|
|
213
|
+
# Characters that indicate we should quote (excluding parentheses which are valid syntax)
|
|
214
|
+
needs_quoting_chars = [
|
|
215
|
+
" ",
|
|
216
|
+
".",
|
|
217
|
+
":",
|
|
218
|
+
";",
|
|
219
|
+
",",
|
|
220
|
+
"<",
|
|
221
|
+
">",
|
|
222
|
+
"?",
|
|
223
|
+
"/",
|
|
224
|
+
"-",
|
|
225
|
+
"'",
|
|
226
|
+
'"',
|
|
227
|
+
"[",
|
|
228
|
+
"]",
|
|
229
|
+
"{",
|
|
230
|
+
"}",
|
|
231
|
+
"+",
|
|
232
|
+
"!",
|
|
233
|
+
"@",
|
|
234
|
+
"#",
|
|
235
|
+
"$",
|
|
236
|
+
"%",
|
|
237
|
+
"^",
|
|
238
|
+
"&",
|
|
239
|
+
"=",
|
|
240
|
+
"|",
|
|
241
|
+
"\\",
|
|
242
|
+
"~",
|
|
243
|
+
"`",
|
|
244
|
+
]
|
|
245
|
+
|
|
246
|
+
return any(c in term for c in needs_quoting_chars)
|
|
247
|
+
|
|
248
|
+
def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
|
|
249
|
+
"""Prepare a single search term (no Boolean operators).
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
term: A single search term
|
|
85
253
|
is_prefix: Whether to add prefix search capability (* suffix)
|
|
86
254
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
- Terms with spaces or special chars need quotes
|
|
255
|
+
Returns:
|
|
256
|
+
A properly formatted single term
|
|
90
257
|
"""
|
|
91
|
-
if
|
|
258
|
+
if not term or not term.strip():
|
|
92
259
|
return term
|
|
93
260
|
|
|
94
|
-
|
|
95
|
-
special_chars = ["/", "-", ".", " ", "(", ")", "[", "]", '"', "'"]
|
|
261
|
+
term = term.strip()
|
|
96
262
|
|
|
97
|
-
# Check if term
|
|
98
|
-
|
|
263
|
+
# Check if term is already a proper wildcard pattern (alphanumeric + *)
|
|
264
|
+
# e.g., "hello*", "test*world" - these should be left alone
|
|
265
|
+
if "*" in term and all(c.isalnum() or c in "*_-" for c in term):
|
|
266
|
+
return term
|
|
267
|
+
|
|
268
|
+
# Characters that can cause FTS5 syntax errors when used as operators
|
|
269
|
+
# We're more conservative here - only quote when we detect problematic patterns
|
|
270
|
+
problematic_chars = [
|
|
271
|
+
'"',
|
|
272
|
+
"'",
|
|
273
|
+
"(",
|
|
274
|
+
")",
|
|
275
|
+
"[",
|
|
276
|
+
"]",
|
|
277
|
+
"{",
|
|
278
|
+
"}",
|
|
279
|
+
"+",
|
|
280
|
+
"!",
|
|
281
|
+
"@",
|
|
282
|
+
"#",
|
|
283
|
+
"$",
|
|
284
|
+
"%",
|
|
285
|
+
"^",
|
|
286
|
+
"&",
|
|
287
|
+
"=",
|
|
288
|
+
"|",
|
|
289
|
+
"\\",
|
|
290
|
+
"~",
|
|
291
|
+
"`",
|
|
292
|
+
]
|
|
99
293
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
294
|
+
# Characters that indicate we should quote (spaces, dots, colons, etc.)
|
|
295
|
+
# Adding hyphens here because FTS5 can have issues with hyphens followed by wildcards
|
|
296
|
+
needs_quoting_chars = [" ", ".", ":", ";", ",", "<", ">", "?", "/", "-"]
|
|
297
|
+
|
|
298
|
+
# Check if term needs quoting
|
|
299
|
+
has_problematic = any(c in term for c in problematic_chars)
|
|
300
|
+
has_spaces_or_special = any(c in term for c in needs_quoting_chars)
|
|
301
|
+
|
|
302
|
+
if has_problematic or has_spaces_or_special:
|
|
303
|
+
# Handle multi-word queries differently from special character queries
|
|
304
|
+
if " " in term and not any(c in term for c in problematic_chars):
|
|
305
|
+
# Check if any individual word contains special characters that need quoting
|
|
306
|
+
words = term.strip().split()
|
|
307
|
+
has_special_in_words = any(
|
|
308
|
+
any(c in word for c in needs_quoting_chars if c != " ") for word in words
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
if not has_special_in_words:
|
|
312
|
+
# For multi-word queries with simple words (like "emoji unicode"),
|
|
313
|
+
# use boolean AND to handle word order variations
|
|
314
|
+
if is_prefix:
|
|
315
|
+
# Add prefix wildcard to each word for better matching
|
|
316
|
+
prepared_words = [f"{word}*" for word in words if word]
|
|
317
|
+
else:
|
|
318
|
+
prepared_words = words
|
|
319
|
+
term = " AND ".join(prepared_words)
|
|
320
|
+
else:
|
|
321
|
+
# If any word has special characters, quote the entire phrase
|
|
322
|
+
escaped_term = term.replace('"', '""')
|
|
323
|
+
if is_prefix and not ("/" in term and term.endswith(".md")):
|
|
324
|
+
term = f'"{escaped_term}"*'
|
|
325
|
+
else:
|
|
326
|
+
term = f'"{escaped_term}"'
|
|
327
|
+
else:
|
|
328
|
+
# For terms with problematic characters or file paths, use exact phrase matching
|
|
329
|
+
# Escape any existing quotes by doubling them
|
|
330
|
+
escaped_term = term.replace('"', '""')
|
|
331
|
+
# Quote the entire term to handle special characters safely
|
|
332
|
+
if is_prefix and not ("/" in term and term.endswith(".md")):
|
|
333
|
+
# For search terms (not file paths), add prefix matching
|
|
334
|
+
term = f'"{escaped_term}"*'
|
|
335
|
+
else:
|
|
336
|
+
# For file paths, use exact matching
|
|
337
|
+
term = f'"{escaped_term}"'
|
|
338
|
+
elif is_prefix:
|
|
339
|
+
# Only add wildcard for simple terms without special characters
|
|
340
|
+
term = f"{term}*"
|
|
104
341
|
|
|
105
342
|
return term
|
|
106
343
|
|
|
344
|
+
def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
|
|
345
|
+
"""Prepare a search term for FTS5 query.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
term: The search term to prepare
|
|
349
|
+
is_prefix: Whether to add prefix search capability (* suffix)
|
|
350
|
+
|
|
351
|
+
For FTS5:
|
|
352
|
+
- Boolean operators (AND, OR, NOT) are preserved for complex queries
|
|
353
|
+
- Terms with FTS5 special characters are quoted to prevent syntax errors
|
|
354
|
+
- Simple terms get prefix wildcards for better matching
|
|
355
|
+
"""
|
|
356
|
+
# Check for explicit boolean operators - if present, process as Boolean query
|
|
357
|
+
boolean_operators = [" AND ", " OR ", " NOT "]
|
|
358
|
+
if any(op in f" {term} " for op in boolean_operators):
|
|
359
|
+
return self._prepare_boolean_query(term)
|
|
360
|
+
|
|
361
|
+
# For non-Boolean queries, use the single term preparation logic
|
|
362
|
+
return self._prepare_single_term(term, is_prefix)
|
|
363
|
+
|
|
107
364
|
async def search(
|
|
108
365
|
self,
|
|
109
366
|
search_text: Optional[str] = None,
|
|
110
367
|
permalink: Optional[str] = None,
|
|
111
368
|
permalink_match: Optional[str] = None,
|
|
112
369
|
title: Optional[str] = None,
|
|
113
|
-
types: Optional[List[
|
|
370
|
+
types: Optional[List[str]] = None,
|
|
114
371
|
after_date: Optional[datetime] = None,
|
|
115
|
-
|
|
372
|
+
search_item_types: Optional[List[SearchItemType]] = None,
|
|
116
373
|
limit: int = 10,
|
|
117
374
|
offset: int = 0,
|
|
118
375
|
) -> List[SearchIndexRow]:
|
|
@@ -123,15 +380,21 @@ class SearchRepository:
|
|
|
123
380
|
|
|
124
381
|
# Handle text search for title and content
|
|
125
382
|
if search_text:
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
383
|
+
# Skip FTS for wildcard-only queries that would cause "unknown special query" errors
|
|
384
|
+
if search_text.strip() == "*" or search_text.strip() == "":
|
|
385
|
+
# For wildcard searches, don't add any text conditions - return all results
|
|
386
|
+
pass
|
|
387
|
+
else:
|
|
388
|
+
# Use _prepare_search_term to handle both Boolean and non-Boolean queries
|
|
389
|
+
processed_text = self._prepare_search_term(search_text.strip())
|
|
390
|
+
params["text"] = processed_text
|
|
391
|
+
conditions.append("(title MATCH :text OR content_stems MATCH :text)")
|
|
129
392
|
|
|
130
393
|
# Handle title match search
|
|
131
394
|
if title:
|
|
132
|
-
title_text = self._prepare_search_term(title.strip())
|
|
133
|
-
params["
|
|
134
|
-
conditions.append("title MATCH :
|
|
395
|
+
title_text = self._prepare_search_term(title.strip(), is_prefix=False)
|
|
396
|
+
params["title_text"] = title_text
|
|
397
|
+
conditions.append("title MATCH :title_text")
|
|
135
398
|
|
|
136
399
|
# Handle permalink exact search
|
|
137
400
|
if permalink:
|
|
@@ -140,25 +403,31 @@ class SearchRepository:
|
|
|
140
403
|
|
|
141
404
|
# Handle permalink match search, supports *
|
|
142
405
|
if permalink_match:
|
|
143
|
-
#
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
)
|
|
406
|
+
# For GLOB patterns, don't use _prepare_search_term as it will quote slashes
|
|
407
|
+
# GLOB patterns need to preserve their syntax
|
|
408
|
+
permalink_text = permalink_match.lower().strip()
|
|
147
409
|
params["permalink"] = permalink_text
|
|
148
410
|
if "*" in permalink_match:
|
|
149
411
|
conditions.append("permalink GLOB :permalink")
|
|
150
412
|
else:
|
|
151
|
-
|
|
413
|
+
# For exact matches without *, we can use FTS5 MATCH
|
|
414
|
+
# but only prepare the term if it doesn't look like a path
|
|
415
|
+
if "/" in permalink_text:
|
|
416
|
+
conditions.append("permalink = :permalink")
|
|
417
|
+
else:
|
|
418
|
+
permalink_text = self._prepare_search_term(permalink_text, is_prefix=False)
|
|
419
|
+
params["permalink"] = permalink_text
|
|
420
|
+
conditions.append("permalink MATCH :permalink")
|
|
152
421
|
|
|
153
|
-
# Handle type filter
|
|
154
|
-
if
|
|
155
|
-
type_list = ", ".join(f"'{t.value}'" for t in
|
|
422
|
+
# Handle entity type filter
|
|
423
|
+
if search_item_types:
|
|
424
|
+
type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
|
|
156
425
|
conditions.append(f"type IN ({type_list})")
|
|
157
426
|
|
|
158
|
-
# Handle
|
|
159
|
-
if
|
|
160
|
-
|
|
161
|
-
conditions.append(f"json_extract(metadata, '$.entity_type') IN ({
|
|
427
|
+
# Handle type filter
|
|
428
|
+
if types:
|
|
429
|
+
type_list = ", ".join(f"'{t}'" for t in types)
|
|
430
|
+
conditions.append(f"json_extract(metadata, '$.entity_type') IN ({type_list})")
|
|
162
431
|
|
|
163
432
|
# Handle date filter using datetime() for proper comparison
|
|
164
433
|
if after_date:
|
|
@@ -168,6 +437,10 @@ class SearchRepository:
|
|
|
168
437
|
# order by most recent first
|
|
169
438
|
order_by_clause = ", updated_at DESC"
|
|
170
439
|
|
|
440
|
+
# Always filter by project_id
|
|
441
|
+
params["project_id"] = self.project_id
|
|
442
|
+
conditions.append("project_id = :project_id")
|
|
443
|
+
|
|
171
444
|
# set limit on search query
|
|
172
445
|
params["limit"] = limit
|
|
173
446
|
params["offset"] = offset
|
|
@@ -177,6 +450,7 @@ class SearchRepository:
|
|
|
177
450
|
|
|
178
451
|
sql = f"""
|
|
179
452
|
SELECT
|
|
453
|
+
project_id,
|
|
180
454
|
id,
|
|
181
455
|
title,
|
|
182
456
|
permalink,
|
|
@@ -187,7 +461,7 @@ class SearchRepository:
|
|
|
187
461
|
to_id,
|
|
188
462
|
relation_type,
|
|
189
463
|
entity_id,
|
|
190
|
-
|
|
464
|
+
content_snippet,
|
|
191
465
|
category,
|
|
192
466
|
created_at,
|
|
193
467
|
updated_at,
|
|
@@ -199,13 +473,25 @@ class SearchRepository:
|
|
|
199
473
|
OFFSET :offset
|
|
200
474
|
"""
|
|
201
475
|
|
|
202
|
-
logger.
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
476
|
+
logger.trace(f"Search {sql} params: {params}")
|
|
477
|
+
try:
|
|
478
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
479
|
+
result = await session.execute(text(sql), params)
|
|
480
|
+
rows = result.fetchall()
|
|
481
|
+
except Exception as e:
|
|
482
|
+
# Handle FTS5 syntax errors and provide user-friendly feedback
|
|
483
|
+
if "fts5: syntax error" in str(e).lower(): # pragma: no cover
|
|
484
|
+
logger.warning(f"FTS5 syntax error for search term: {search_text}, error: {e}")
|
|
485
|
+
# Return empty results rather than crashing
|
|
486
|
+
return []
|
|
487
|
+
else:
|
|
488
|
+
# Re-raise other database errors
|
|
489
|
+
logger.error(f"Database error during search: {e}")
|
|
490
|
+
raise
|
|
206
491
|
|
|
207
492
|
results = [
|
|
208
493
|
SearchIndexRow(
|
|
494
|
+
project_id=self.project_id,
|
|
209
495
|
id=row.id,
|
|
210
496
|
title=row.title,
|
|
211
497
|
permalink=row.permalink,
|
|
@@ -217,7 +503,7 @@ class SearchRepository:
|
|
|
217
503
|
to_id=row.to_id,
|
|
218
504
|
relation_type=row.relation_type,
|
|
219
505
|
entity_id=row.entity_id,
|
|
220
|
-
|
|
506
|
+
content_snippet=row.content_snippet,
|
|
221
507
|
category=row.category,
|
|
222
508
|
created_at=row.created_at,
|
|
223
509
|
updated_at=row.updated_at,
|
|
@@ -225,10 +511,10 @@ class SearchRepository:
|
|
|
225
511
|
for row in rows
|
|
226
512
|
]
|
|
227
513
|
|
|
228
|
-
logger.
|
|
514
|
+
logger.trace(f"Found {len(results)} search results")
|
|
229
515
|
for r in results:
|
|
230
|
-
logger.
|
|
231
|
-
f"Search result: type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
|
|
516
|
+
logger.trace(
|
|
517
|
+
f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
|
|
232
518
|
)
|
|
233
519
|
|
|
234
520
|
return results
|
|
@@ -241,36 +527,99 @@ class SearchRepository:
|
|
|
241
527
|
async with db.scoped_session(self.session_maker) as session:
|
|
242
528
|
# Delete existing record if any
|
|
243
529
|
await session.execute(
|
|
244
|
-
text(
|
|
245
|
-
|
|
530
|
+
text(
|
|
531
|
+
"DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
|
|
532
|
+
),
|
|
533
|
+
{"permalink": search_index_row.permalink, "project_id": self.project_id},
|
|
246
534
|
)
|
|
247
535
|
|
|
536
|
+
# Prepare data for insert with project_id
|
|
537
|
+
insert_data = search_index_row.to_insert()
|
|
538
|
+
insert_data["project_id"] = self.project_id
|
|
539
|
+
|
|
248
540
|
# Insert new record
|
|
249
541
|
await session.execute(
|
|
250
542
|
text("""
|
|
251
543
|
INSERT INTO search_index (
|
|
252
|
-
id, title,
|
|
544
|
+
id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
|
|
253
545
|
from_id, to_id, relation_type,
|
|
254
546
|
entity_id, category,
|
|
255
|
-
created_at, updated_at
|
|
547
|
+
created_at, updated_at,
|
|
548
|
+
project_id
|
|
256
549
|
) VALUES (
|
|
257
|
-
:id, :title, :
|
|
550
|
+
:id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
|
|
258
551
|
:from_id, :to_id, :relation_type,
|
|
259
552
|
:entity_id, :category,
|
|
260
|
-
:created_at, :updated_at
|
|
553
|
+
:created_at, :updated_at,
|
|
554
|
+
:project_id
|
|
261
555
|
)
|
|
262
556
|
"""),
|
|
263
|
-
|
|
557
|
+
insert_data,
|
|
264
558
|
)
|
|
265
559
|
logger.debug(f"indexed row {search_index_row}")
|
|
266
560
|
await session.commit()
|
|
267
561
|
|
|
562
|
+
async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]):
|
|
563
|
+
"""Index multiple items in a single batch operation.
|
|
564
|
+
|
|
565
|
+
Note: This method assumes that any existing records for the entity_id
|
|
566
|
+
have already been deleted (typically via delete_by_entity_id).
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
search_index_rows: List of SearchIndexRow objects to index
|
|
570
|
+
"""
|
|
571
|
+
if not search_index_rows:
|
|
572
|
+
return
|
|
573
|
+
|
|
574
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
575
|
+
# Prepare all insert data with project_id
|
|
576
|
+
insert_data_list = []
|
|
577
|
+
for row in search_index_rows:
|
|
578
|
+
insert_data = row.to_insert()
|
|
579
|
+
insert_data["project_id"] = self.project_id
|
|
580
|
+
insert_data_list.append(insert_data)
|
|
581
|
+
|
|
582
|
+
# Batch insert all records using executemany
|
|
583
|
+
await session.execute(
|
|
584
|
+
text("""
|
|
585
|
+
INSERT INTO search_index (
|
|
586
|
+
id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
|
|
587
|
+
from_id, to_id, relation_type,
|
|
588
|
+
entity_id, category,
|
|
589
|
+
created_at, updated_at,
|
|
590
|
+
project_id
|
|
591
|
+
) VALUES (
|
|
592
|
+
:id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
|
|
593
|
+
:from_id, :to_id, :relation_type,
|
|
594
|
+
:entity_id, :category,
|
|
595
|
+
:created_at, :updated_at,
|
|
596
|
+
:project_id
|
|
597
|
+
)
|
|
598
|
+
"""),
|
|
599
|
+
insert_data_list,
|
|
600
|
+
)
|
|
601
|
+
logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
|
|
602
|
+
await session.commit()
|
|
603
|
+
|
|
604
|
+
async def delete_by_entity_id(self, entity_id: int):
|
|
605
|
+
"""Delete an item from the search index by entity_id."""
|
|
606
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
607
|
+
await session.execute(
|
|
608
|
+
text(
|
|
609
|
+
"DELETE FROM search_index WHERE entity_id = :entity_id AND project_id = :project_id"
|
|
610
|
+
),
|
|
611
|
+
{"entity_id": entity_id, "project_id": self.project_id},
|
|
612
|
+
)
|
|
613
|
+
await session.commit()
|
|
614
|
+
|
|
268
615
|
async def delete_by_permalink(self, permalink: str):
|
|
269
616
|
"""Delete an item from the search index."""
|
|
270
617
|
async with db.scoped_session(self.session_maker) as session:
|
|
271
618
|
await session.execute(
|
|
272
|
-
text(
|
|
273
|
-
|
|
619
|
+
text(
|
|
620
|
+
"DELETE FROM search_index WHERE permalink = :permalink AND project_id = :project_id"
|
|
621
|
+
),
|
|
622
|
+
{"permalink": permalink, "project_id": self.project_id},
|
|
274
623
|
)
|
|
275
624
|
await session.commit()
|
|
276
625
|
|