basic-memory 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +1 -1
- basic_memory/alembic/env.py +112 -26
- basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +15 -3
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +44 -36
- basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
- basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +13 -0
- basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
- basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
- basic_memory/api/app.py +45 -24
- basic_memory/api/container.py +133 -0
- basic_memory/api/routers/knowledge_router.py +17 -5
- basic_memory/api/routers/project_router.py +68 -14
- basic_memory/api/routers/resource_router.py +37 -27
- basic_memory/api/routers/utils.py +53 -14
- basic_memory/api/v2/__init__.py +35 -0
- basic_memory/api/v2/routers/__init__.py +21 -0
- basic_memory/api/v2/routers/directory_router.py +93 -0
- basic_memory/api/v2/routers/importer_router.py +181 -0
- basic_memory/api/v2/routers/knowledge_router.py +427 -0
- basic_memory/api/v2/routers/memory_router.py +130 -0
- basic_memory/api/v2/routers/project_router.py +359 -0
- basic_memory/api/v2/routers/prompt_router.py +269 -0
- basic_memory/api/v2/routers/resource_router.py +286 -0
- basic_memory/api/v2/routers/search_router.py +73 -0
- basic_memory/cli/app.py +43 -7
- basic_memory/cli/auth.py +27 -4
- basic_memory/cli/commands/__init__.py +3 -1
- basic_memory/cli/commands/cloud/api_client.py +20 -5
- basic_memory/cli/commands/cloud/cloud_utils.py +13 -6
- basic_memory/cli/commands/cloud/rclone_commands.py +110 -14
- basic_memory/cli/commands/cloud/rclone_installer.py +18 -4
- basic_memory/cli/commands/cloud/upload.py +10 -3
- basic_memory/cli/commands/command_utils.py +52 -4
- basic_memory/cli/commands/db.py +78 -19
- basic_memory/cli/commands/format.py +198 -0
- basic_memory/cli/commands/import_chatgpt.py +12 -8
- basic_memory/cli/commands/import_claude_conversations.py +12 -8
- basic_memory/cli/commands/import_claude_projects.py +12 -8
- basic_memory/cli/commands/import_memory_json.py +12 -8
- basic_memory/cli/commands/mcp.py +8 -26
- basic_memory/cli/commands/project.py +22 -9
- basic_memory/cli/commands/status.py +3 -2
- basic_memory/cli/commands/telemetry.py +81 -0
- basic_memory/cli/container.py +84 -0
- basic_memory/cli/main.py +7 -0
- basic_memory/config.py +177 -77
- basic_memory/db.py +183 -77
- basic_memory/deps/__init__.py +293 -0
- basic_memory/deps/config.py +26 -0
- basic_memory/deps/db.py +56 -0
- basic_memory/deps/importers.py +200 -0
- basic_memory/deps/projects.py +238 -0
- basic_memory/deps/repositories.py +179 -0
- basic_memory/deps/services.py +480 -0
- basic_memory/deps.py +14 -409
- basic_memory/file_utils.py +212 -3
- basic_memory/ignore_utils.py +5 -5
- basic_memory/importers/base.py +40 -19
- basic_memory/importers/chatgpt_importer.py +17 -4
- basic_memory/importers/claude_conversations_importer.py +27 -12
- basic_memory/importers/claude_projects_importer.py +50 -14
- basic_memory/importers/memory_json_importer.py +36 -16
- basic_memory/importers/utils.py +5 -2
- basic_memory/markdown/entity_parser.py +62 -23
- basic_memory/markdown/markdown_processor.py +67 -4
- basic_memory/markdown/plugins.py +4 -2
- basic_memory/markdown/utils.py +10 -1
- basic_memory/mcp/async_client.py +1 -0
- basic_memory/mcp/clients/__init__.py +28 -0
- basic_memory/mcp/clients/directory.py +70 -0
- basic_memory/mcp/clients/knowledge.py +176 -0
- basic_memory/mcp/clients/memory.py +120 -0
- basic_memory/mcp/clients/project.py +89 -0
- basic_memory/mcp/clients/resource.py +71 -0
- basic_memory/mcp/clients/search.py +65 -0
- basic_memory/mcp/container.py +110 -0
- basic_memory/mcp/project_context.py +47 -33
- basic_memory/mcp/prompts/ai_assistant_guide.py +2 -2
- basic_memory/mcp/prompts/recent_activity.py +2 -2
- basic_memory/mcp/prompts/utils.py +3 -3
- basic_memory/mcp/server.py +58 -0
- basic_memory/mcp/tools/build_context.py +14 -14
- basic_memory/mcp/tools/canvas.py +34 -12
- basic_memory/mcp/tools/chatgpt_tools.py +4 -1
- basic_memory/mcp/tools/delete_note.py +31 -7
- basic_memory/mcp/tools/edit_note.py +14 -9
- basic_memory/mcp/tools/list_directory.py +7 -17
- basic_memory/mcp/tools/move_note.py +35 -31
- basic_memory/mcp/tools/project_management.py +29 -25
- basic_memory/mcp/tools/read_content.py +13 -3
- basic_memory/mcp/tools/read_note.py +24 -14
- basic_memory/mcp/tools/recent_activity.py +32 -38
- basic_memory/mcp/tools/search.py +17 -10
- basic_memory/mcp/tools/utils.py +28 -0
- basic_memory/mcp/tools/view_note.py +2 -1
- basic_memory/mcp/tools/write_note.py +37 -14
- basic_memory/models/knowledge.py +15 -2
- basic_memory/models/project.py +7 -1
- basic_memory/models/search.py +58 -2
- basic_memory/project_resolver.py +222 -0
- basic_memory/repository/entity_repository.py +210 -3
- basic_memory/repository/observation_repository.py +1 -0
- basic_memory/repository/postgres_search_repository.py +451 -0
- basic_memory/repository/project_repository.py +38 -1
- basic_memory/repository/relation_repository.py +58 -2
- basic_memory/repository/repository.py +1 -0
- basic_memory/repository/search_index_row.py +95 -0
- basic_memory/repository/search_repository.py +77 -615
- basic_memory/repository/search_repository_base.py +241 -0
- basic_memory/repository/sqlite_search_repository.py +437 -0
- basic_memory/runtime.py +61 -0
- basic_memory/schemas/base.py +36 -6
- basic_memory/schemas/directory.py +2 -1
- basic_memory/schemas/memory.py +9 -2
- basic_memory/schemas/project_info.py +2 -0
- basic_memory/schemas/response.py +84 -27
- basic_memory/schemas/search.py +5 -0
- basic_memory/schemas/sync_report.py +1 -1
- basic_memory/schemas/v2/__init__.py +27 -0
- basic_memory/schemas/v2/entity.py +133 -0
- basic_memory/schemas/v2/resource.py +47 -0
- basic_memory/services/context_service.py +219 -43
- basic_memory/services/directory_service.py +26 -11
- basic_memory/services/entity_service.py +68 -33
- basic_memory/services/file_service.py +131 -16
- basic_memory/services/initialization.py +51 -26
- basic_memory/services/link_resolver.py +1 -0
- basic_memory/services/project_service.py +68 -43
- basic_memory/services/search_service.py +75 -16
- basic_memory/sync/__init__.py +2 -1
- basic_memory/sync/coordinator.py +160 -0
- basic_memory/sync/sync_service.py +135 -115
- basic_memory/sync/watch_service.py +32 -12
- basic_memory/telemetry.py +249 -0
- basic_memory/utils.py +96 -75
- {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/METADATA +129 -5
- basic_memory-0.17.4.dist-info/RECORD +193 -0
- {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
- basic_memory-0.16.1.dist-info/RECORD +0 -148
- {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +0 -0
- {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
"""PostgreSQL tsvector-based search repository implementation."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from loguru import logger
|
|
10
|
+
from sqlalchemy import text
|
|
11
|
+
|
|
12
|
+
from basic_memory import db
|
|
13
|
+
from basic_memory.repository.search_index_row import SearchIndexRow
|
|
14
|
+
from basic_memory.repository.search_repository_base import SearchRepositoryBase
|
|
15
|
+
from basic_memory.schemas.search import SearchItemType
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PostgresSearchRepository(SearchRepositoryBase):
|
|
19
|
+
"""PostgreSQL tsvector implementation of search repository.
|
|
20
|
+
|
|
21
|
+
Uses PostgreSQL's full-text search capabilities with:
|
|
22
|
+
- tsvector for document representation
|
|
23
|
+
- tsquery for query representation
|
|
24
|
+
- GIN indexes for performance
|
|
25
|
+
- ts_rank() function for relevance scoring
|
|
26
|
+
- JSONB containment operators for metadata search
|
|
27
|
+
|
|
28
|
+
Note: This implementation uses UPSERT patterns (INSERT ... ON CONFLICT) instead of
|
|
29
|
+
delete-then-insert to handle race conditions during parallel entity indexing.
|
|
30
|
+
The partial unique index uix_search_index_permalink_project prevents duplicate
|
|
31
|
+
permalinks per project.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
async def init_search_index(self):
|
|
35
|
+
"""Create Postgres table with tsvector column and GIN indexes.
|
|
36
|
+
|
|
37
|
+
Note: This is handled by Alembic migrations. This method is a no-op
|
|
38
|
+
for Postgres as the schema is created via migrations.
|
|
39
|
+
"""
|
|
40
|
+
logger.info("PostgreSQL search index initialization handled by migrations")
|
|
41
|
+
# Table creation is done via Alembic migrations
|
|
42
|
+
# This includes:
|
|
43
|
+
# - CREATE TABLE search_index (...)
|
|
44
|
+
# - ADD COLUMN textsearchable_index_col tsvector GENERATED ALWAYS AS (...)
|
|
45
|
+
# - CREATE INDEX USING GIN on textsearchable_index_col
|
|
46
|
+
# - CREATE INDEX USING GIN on metadata jsonb_path_ops
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
async def index_item(self, search_index_row: SearchIndexRow) -> None:
|
|
50
|
+
"""Index or update a single item using UPSERT.
|
|
51
|
+
|
|
52
|
+
Uses INSERT ... ON CONFLICT to handle race conditions during parallel
|
|
53
|
+
entity indexing. The partial unique index uix_search_index_permalink_project
|
|
54
|
+
on (permalink, project_id) WHERE permalink IS NOT NULL prevents duplicate
|
|
55
|
+
permalinks.
|
|
56
|
+
|
|
57
|
+
For rows with non-null permalinks (entities), conflicts are resolved by
|
|
58
|
+
updating the existing row. For rows with null permalinks, no conflict
|
|
59
|
+
occurs on this index.
|
|
60
|
+
"""
|
|
61
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
62
|
+
# Serialize JSON for raw SQL
|
|
63
|
+
insert_data = search_index_row.to_insert(serialize_json=True)
|
|
64
|
+
insert_data["project_id"] = self.project_id
|
|
65
|
+
|
|
66
|
+
# Use upsert to handle race conditions during parallel indexing
|
|
67
|
+
# ON CONFLICT (permalink, project_id) matches the partial unique index
|
|
68
|
+
# uix_search_index_permalink_project WHERE permalink IS NOT NULL
|
|
69
|
+
# For rows with NULL permalinks, no conflict occurs (partial index doesn't apply)
|
|
70
|
+
await session.execute(
|
|
71
|
+
text("""
|
|
72
|
+
INSERT INTO search_index (
|
|
73
|
+
id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
|
|
74
|
+
from_id, to_id, relation_type,
|
|
75
|
+
entity_id, category,
|
|
76
|
+
created_at, updated_at,
|
|
77
|
+
project_id
|
|
78
|
+
) VALUES (
|
|
79
|
+
:id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
|
|
80
|
+
:from_id, :to_id, :relation_type,
|
|
81
|
+
:entity_id, :category,
|
|
82
|
+
:created_at, :updated_at,
|
|
83
|
+
:project_id
|
|
84
|
+
)
|
|
85
|
+
ON CONFLICT (permalink, project_id) WHERE permalink IS NOT NULL DO UPDATE SET
|
|
86
|
+
id = EXCLUDED.id,
|
|
87
|
+
title = EXCLUDED.title,
|
|
88
|
+
content_stems = EXCLUDED.content_stems,
|
|
89
|
+
content_snippet = EXCLUDED.content_snippet,
|
|
90
|
+
file_path = EXCLUDED.file_path,
|
|
91
|
+
type = EXCLUDED.type,
|
|
92
|
+
metadata = EXCLUDED.metadata,
|
|
93
|
+
from_id = EXCLUDED.from_id,
|
|
94
|
+
to_id = EXCLUDED.to_id,
|
|
95
|
+
relation_type = EXCLUDED.relation_type,
|
|
96
|
+
entity_id = EXCLUDED.entity_id,
|
|
97
|
+
category = EXCLUDED.category,
|
|
98
|
+
created_at = EXCLUDED.created_at,
|
|
99
|
+
updated_at = EXCLUDED.updated_at
|
|
100
|
+
"""),
|
|
101
|
+
insert_data,
|
|
102
|
+
)
|
|
103
|
+
logger.debug(f"indexed row {search_index_row}")
|
|
104
|
+
await session.commit()
|
|
105
|
+
|
|
106
|
+
def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
|
|
107
|
+
"""Prepare a search term for tsquery format.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
term: The search term to prepare
|
|
111
|
+
is_prefix: Whether to add prefix search capability (:* operator)
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Formatted search term for tsquery
|
|
115
|
+
|
|
116
|
+
For Postgres:
|
|
117
|
+
- Boolean operators are converted to tsquery format (&, |, !)
|
|
118
|
+
- Prefix matching uses the :* operator
|
|
119
|
+
- Terms are sanitized to prevent tsquery syntax errors
|
|
120
|
+
"""
|
|
121
|
+
# Check for explicit boolean operators
|
|
122
|
+
boolean_operators = [" AND ", " OR ", " NOT "]
|
|
123
|
+
if any(op in f" {term} " for op in boolean_operators):
|
|
124
|
+
return self._prepare_boolean_query(term)
|
|
125
|
+
|
|
126
|
+
# For non-Boolean queries, prepare single term
|
|
127
|
+
return self._prepare_single_term(term, is_prefix)
|
|
128
|
+
|
|
129
|
+
def _prepare_boolean_query(self, query: str) -> str:
|
|
130
|
+
"""Convert Boolean query to tsquery format.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
query: A Boolean query like "coffee AND brewing" or "(pour OR french) AND press"
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
tsquery-formatted string with & (AND), | (OR), ! (NOT) operators
|
|
137
|
+
|
|
138
|
+
Examples:
|
|
139
|
+
"coffee AND brewing" -> "coffee & brewing"
|
|
140
|
+
"(pour OR french) AND press" -> "(pour | french) & press"
|
|
141
|
+
"coffee NOT decaf" -> "coffee & !decaf"
|
|
142
|
+
"""
|
|
143
|
+
# Replace Boolean operators with tsquery operators
|
|
144
|
+
# Keep parentheses for grouping
|
|
145
|
+
result = query
|
|
146
|
+
result = re.sub(r"\bAND\b", "&", result)
|
|
147
|
+
result = re.sub(r"\bOR\b", "|", result)
|
|
148
|
+
# NOT must be converted to "& !" and the ! must be attached to the following term
|
|
149
|
+
# "Python NOT Django" -> "Python & !Django"
|
|
150
|
+
result = re.sub(r"\bNOT\s+", "& !", result)
|
|
151
|
+
|
|
152
|
+
return result
|
|
153
|
+
|
|
154
|
+
def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
|
|
155
|
+
"""Prepare a single search term for tsquery.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
term: A single search term
|
|
159
|
+
is_prefix: Whether to add prefix search capability (:* suffix)
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
A properly formatted single term for tsquery
|
|
163
|
+
|
|
164
|
+
For Postgres tsquery:
|
|
165
|
+
- Multi-word queries become "word1 & word2"
|
|
166
|
+
- Prefix matching uses ":*" suffix (e.g., "coff:*")
|
|
167
|
+
- Special characters that need escaping: & | ! ( ) :
|
|
168
|
+
"""
|
|
169
|
+
if not term or not term.strip():
|
|
170
|
+
return term
|
|
171
|
+
|
|
172
|
+
term = term.strip()
|
|
173
|
+
|
|
174
|
+
# Check if term is already a wildcard pattern
|
|
175
|
+
if "*" in term:
|
|
176
|
+
# Replace * with :* for Postgres prefix matching
|
|
177
|
+
return term.replace("*", ":*")
|
|
178
|
+
|
|
179
|
+
# Remove tsquery special characters from the search term
|
|
180
|
+
# These characters have special meaning in tsquery and cause syntax errors
|
|
181
|
+
# if not used as operators
|
|
182
|
+
special_chars = ["&", "|", "!", "(", ")", ":"]
|
|
183
|
+
cleaned_term = term
|
|
184
|
+
for char in special_chars:
|
|
185
|
+
cleaned_term = cleaned_term.replace(char, " ")
|
|
186
|
+
|
|
187
|
+
# Handle multi-word queries
|
|
188
|
+
if " " in cleaned_term:
|
|
189
|
+
words = [w for w in cleaned_term.split() if w.strip()]
|
|
190
|
+
if not words:
|
|
191
|
+
# All characters were special chars, search won't match anything
|
|
192
|
+
# Return a safe search term that won't cause syntax errors
|
|
193
|
+
return "NOSPECIALCHARS:*"
|
|
194
|
+
if is_prefix:
|
|
195
|
+
# Add prefix matching to each word
|
|
196
|
+
prepared_words = [f"{word}:*" for word in words]
|
|
197
|
+
else:
|
|
198
|
+
prepared_words = words
|
|
199
|
+
# Join with AND operator
|
|
200
|
+
return " & ".join(prepared_words)
|
|
201
|
+
|
|
202
|
+
# Single word
|
|
203
|
+
cleaned_term = cleaned_term.strip()
|
|
204
|
+
if is_prefix:
|
|
205
|
+
return f"{cleaned_term}:*"
|
|
206
|
+
else:
|
|
207
|
+
return cleaned_term
|
|
208
|
+
|
|
209
|
+
async def search(
|
|
210
|
+
self,
|
|
211
|
+
search_text: Optional[str] = None,
|
|
212
|
+
permalink: Optional[str] = None,
|
|
213
|
+
permalink_match: Optional[str] = None,
|
|
214
|
+
title: Optional[str] = None,
|
|
215
|
+
types: Optional[List[str]] = None,
|
|
216
|
+
after_date: Optional[datetime] = None,
|
|
217
|
+
search_item_types: Optional[List[SearchItemType]] = None,
|
|
218
|
+
limit: int = 10,
|
|
219
|
+
offset: int = 0,
|
|
220
|
+
) -> List[SearchIndexRow]:
|
|
221
|
+
"""Search across all indexed content using PostgreSQL tsvector."""
|
|
222
|
+
conditions = []
|
|
223
|
+
params = {}
|
|
224
|
+
order_by_clause = ""
|
|
225
|
+
|
|
226
|
+
# Handle text search for title and content using tsvector
|
|
227
|
+
if search_text:
|
|
228
|
+
if search_text.strip() == "*" or search_text.strip() == "":
|
|
229
|
+
# For wildcard searches, don't add any text conditions
|
|
230
|
+
pass
|
|
231
|
+
else:
|
|
232
|
+
# Prepare search term for tsquery
|
|
233
|
+
processed_text = self._prepare_search_term(search_text.strip())
|
|
234
|
+
params["text"] = processed_text
|
|
235
|
+
# Use @@ operator for tsvector matching
|
|
236
|
+
conditions.append("textsearchable_index_col @@ to_tsquery('english', :text)")
|
|
237
|
+
|
|
238
|
+
# Handle title search
|
|
239
|
+
if title:
|
|
240
|
+
title_text = self._prepare_search_term(title.strip(), is_prefix=False)
|
|
241
|
+
params["title_text"] = title_text
|
|
242
|
+
conditions.append("to_tsvector('english', title) @@ to_tsquery('english', :title_text)")
|
|
243
|
+
|
|
244
|
+
# Handle permalink exact search
|
|
245
|
+
if permalink:
|
|
246
|
+
params["permalink"] = permalink
|
|
247
|
+
conditions.append("permalink = :permalink")
|
|
248
|
+
|
|
249
|
+
# Handle permalink pattern match
|
|
250
|
+
if permalink_match:
|
|
251
|
+
permalink_text = permalink_match.lower().strip()
|
|
252
|
+
params["permalink"] = permalink_text
|
|
253
|
+
if "*" in permalink_match:
|
|
254
|
+
# Use LIKE for pattern matching in Postgres
|
|
255
|
+
# Convert * to % for SQL LIKE
|
|
256
|
+
permalink_pattern = permalink_text.replace("*", "%")
|
|
257
|
+
params["permalink"] = permalink_pattern
|
|
258
|
+
conditions.append("permalink LIKE :permalink")
|
|
259
|
+
else:
|
|
260
|
+
conditions.append("permalink = :permalink")
|
|
261
|
+
|
|
262
|
+
# Handle search item type filter
|
|
263
|
+
if search_item_types:
|
|
264
|
+
type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
|
|
265
|
+
conditions.append(f"type IN ({type_list})")
|
|
266
|
+
|
|
267
|
+
# Handle entity type filter using JSONB containment
|
|
268
|
+
if types:
|
|
269
|
+
# Use JSONB @> operator for efficient containment queries
|
|
270
|
+
type_conditions = []
|
|
271
|
+
for entity_type in types:
|
|
272
|
+
# Create JSONB containment condition for each type
|
|
273
|
+
type_conditions.append(f'metadata @> \'{{"entity_type": "{entity_type}"}}\'')
|
|
274
|
+
conditions.append(f"({' OR '.join(type_conditions)})")
|
|
275
|
+
|
|
276
|
+
# Handle date filter
|
|
277
|
+
if after_date:
|
|
278
|
+
params["after_date"] = after_date
|
|
279
|
+
conditions.append("created_at > :after_date")
|
|
280
|
+
# order by most recent first
|
|
281
|
+
order_by_clause = ", updated_at DESC"
|
|
282
|
+
|
|
283
|
+
# Always filter by project_id
|
|
284
|
+
params["project_id"] = self.project_id
|
|
285
|
+
conditions.append("project_id = :project_id")
|
|
286
|
+
|
|
287
|
+
# set limit and offset
|
|
288
|
+
params["limit"] = limit
|
|
289
|
+
params["offset"] = offset
|
|
290
|
+
|
|
291
|
+
# Build WHERE clause
|
|
292
|
+
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
|
293
|
+
|
|
294
|
+
# Build SQL with ts_rank() for scoring
|
|
295
|
+
# Note: If no text search, score will be NULL, so we use COALESCE to default to 0
|
|
296
|
+
if search_text and search_text.strip() and search_text.strip() != "*":
|
|
297
|
+
score_expr = "ts_rank(textsearchable_index_col, to_tsquery('english', :text))"
|
|
298
|
+
else:
|
|
299
|
+
score_expr = "0"
|
|
300
|
+
|
|
301
|
+
sql = f"""
|
|
302
|
+
SELECT
|
|
303
|
+
project_id,
|
|
304
|
+
id,
|
|
305
|
+
title,
|
|
306
|
+
permalink,
|
|
307
|
+
file_path,
|
|
308
|
+
type,
|
|
309
|
+
metadata,
|
|
310
|
+
from_id,
|
|
311
|
+
to_id,
|
|
312
|
+
relation_type,
|
|
313
|
+
entity_id,
|
|
314
|
+
content_snippet,
|
|
315
|
+
category,
|
|
316
|
+
created_at,
|
|
317
|
+
updated_at,
|
|
318
|
+
{score_expr} as score
|
|
319
|
+
FROM search_index
|
|
320
|
+
WHERE {where_clause}
|
|
321
|
+
ORDER BY score DESC, id ASC {order_by_clause}
|
|
322
|
+
LIMIT :limit
|
|
323
|
+
OFFSET :offset
|
|
324
|
+
"""
|
|
325
|
+
|
|
326
|
+
logger.trace(f"Search {sql} params: {params}")
|
|
327
|
+
try:
|
|
328
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
329
|
+
result = await session.execute(text(sql), params)
|
|
330
|
+
rows = result.fetchall()
|
|
331
|
+
except Exception as e:
|
|
332
|
+
# Handle tsquery syntax errors (and only those).
|
|
333
|
+
#
|
|
334
|
+
# Important: Postgres errors for other failures (e.g. missing table) will still mention
|
|
335
|
+
# `to_tsquery(...)` in the SQL text, so checking for the substring "tsquery" is too broad.
|
|
336
|
+
msg = str(e).lower()
|
|
337
|
+
if (
|
|
338
|
+
"syntax error in tsquery" in msg
|
|
339
|
+
or "invalid input syntax for type tsquery" in msg
|
|
340
|
+
or "no operand in tsquery" in msg
|
|
341
|
+
or "no operator in tsquery" in msg
|
|
342
|
+
):
|
|
343
|
+
logger.warning(f"tsquery syntax error for search term: {search_text}, error: {e}")
|
|
344
|
+
return []
|
|
345
|
+
|
|
346
|
+
# Re-raise other database errors
|
|
347
|
+
logger.error(f"Database error during search: {e}")
|
|
348
|
+
raise
|
|
349
|
+
|
|
350
|
+
results = [
|
|
351
|
+
SearchIndexRow(
|
|
352
|
+
project_id=self.project_id,
|
|
353
|
+
id=row.id,
|
|
354
|
+
title=row.title,
|
|
355
|
+
permalink=row.permalink,
|
|
356
|
+
file_path=row.file_path,
|
|
357
|
+
type=row.type,
|
|
358
|
+
score=float(row.score) if row.score else 0.0,
|
|
359
|
+
metadata=(
|
|
360
|
+
row.metadata
|
|
361
|
+
if isinstance(row.metadata, dict)
|
|
362
|
+
else (json.loads(row.metadata) if row.metadata else {})
|
|
363
|
+
),
|
|
364
|
+
from_id=row.from_id,
|
|
365
|
+
to_id=row.to_id,
|
|
366
|
+
relation_type=row.relation_type,
|
|
367
|
+
entity_id=row.entity_id,
|
|
368
|
+
content_snippet=row.content_snippet,
|
|
369
|
+
category=row.category,
|
|
370
|
+
created_at=row.created_at,
|
|
371
|
+
updated_at=row.updated_at,
|
|
372
|
+
)
|
|
373
|
+
for row in rows
|
|
374
|
+
]
|
|
375
|
+
|
|
376
|
+
logger.trace(f"Found {len(results)} search results")
|
|
377
|
+
for r in results:
|
|
378
|
+
logger.trace(
|
|
379
|
+
f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
return results
|
|
383
|
+
|
|
384
|
+
async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
|
|
385
|
+
"""Index multiple items in a single batch operation using UPSERT.
|
|
386
|
+
|
|
387
|
+
Uses INSERT ... ON CONFLICT to handle race conditions during parallel
|
|
388
|
+
entity indexing. The partial unique index uix_search_index_permalink_project
|
|
389
|
+
on (permalink, project_id) WHERE permalink IS NOT NULL prevents duplicate
|
|
390
|
+
permalinks.
|
|
391
|
+
|
|
392
|
+
For rows with non-null permalinks (entities), conflicts are resolved by
|
|
393
|
+
updating the existing row. For rows with null permalinks (observations,
|
|
394
|
+
relations), the partial index doesn't apply and they are inserted directly.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
search_index_rows: List of SearchIndexRow objects to index
|
|
398
|
+
"""
|
|
399
|
+
|
|
400
|
+
if not search_index_rows:
|
|
401
|
+
return
|
|
402
|
+
|
|
403
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
404
|
+
# When using text() raw SQL, always serialize JSON to string
|
|
405
|
+
# Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
|
|
406
|
+
# The database driver/column type will handle conversion
|
|
407
|
+
insert_data_list = []
|
|
408
|
+
for row in search_index_rows:
|
|
409
|
+
insert_data = row.to_insert(serialize_json=True)
|
|
410
|
+
insert_data["project_id"] = self.project_id
|
|
411
|
+
insert_data_list.append(insert_data)
|
|
412
|
+
|
|
413
|
+
# Use upsert to handle race conditions during parallel indexing
|
|
414
|
+
# ON CONFLICT (permalink, project_id) matches the partial unique index
|
|
415
|
+
# uix_search_index_permalink_project WHERE permalink IS NOT NULL
|
|
416
|
+
# For rows with NULL permalinks (observations, relations), no conflict occurs
|
|
417
|
+
await session.execute(
|
|
418
|
+
text("""
|
|
419
|
+
INSERT INTO search_index (
|
|
420
|
+
id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
|
|
421
|
+
from_id, to_id, relation_type,
|
|
422
|
+
entity_id, category,
|
|
423
|
+
created_at, updated_at,
|
|
424
|
+
project_id
|
|
425
|
+
) VALUES (
|
|
426
|
+
:id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
|
|
427
|
+
:from_id, :to_id, :relation_type,
|
|
428
|
+
:entity_id, :category,
|
|
429
|
+
:created_at, :updated_at,
|
|
430
|
+
:project_id
|
|
431
|
+
)
|
|
432
|
+
ON CONFLICT (permalink, project_id) WHERE permalink IS NOT NULL DO UPDATE SET
|
|
433
|
+
id = EXCLUDED.id,
|
|
434
|
+
title = EXCLUDED.title,
|
|
435
|
+
content_stems = EXCLUDED.content_stems,
|
|
436
|
+
content_snippet = EXCLUDED.content_snippet,
|
|
437
|
+
file_path = EXCLUDED.file_path,
|
|
438
|
+
type = EXCLUDED.type,
|
|
439
|
+
metadata = EXCLUDED.metadata,
|
|
440
|
+
from_id = EXCLUDED.from_id,
|
|
441
|
+
to_id = EXCLUDED.to_id,
|
|
442
|
+
relation_type = EXCLUDED.relation_type,
|
|
443
|
+
entity_id = EXCLUDED.entity_id,
|
|
444
|
+
category = EXCLUDED.category,
|
|
445
|
+
created_at = EXCLUDED.created_at,
|
|
446
|
+
updated_at = EXCLUDED.updated_at
|
|
447
|
+
"""),
|
|
448
|
+
insert_data_list,
|
|
449
|
+
)
|
|
450
|
+
logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
|
|
451
|
+
await session.commit()
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Optional, Sequence, Union
|
|
5
5
|
|
|
6
|
+
|
|
6
7
|
from sqlalchemy import text
|
|
7
8
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
8
9
|
|
|
@@ -23,7 +24,7 @@ class ProjectRepository(Repository[Project]):
|
|
|
23
24
|
super().__init__(session_maker, Project)
|
|
24
25
|
|
|
25
26
|
async def get_by_name(self, name: str) -> Optional[Project]:
|
|
26
|
-
"""Get project by name.
|
|
27
|
+
"""Get project by name (exact match).
|
|
27
28
|
|
|
28
29
|
Args:
|
|
29
30
|
name: Unique name of the project
|
|
@@ -31,6 +32,18 @@ class ProjectRepository(Repository[Project]):
|
|
|
31
32
|
query = self.select().where(Project.name == name)
|
|
32
33
|
return await self.find_one(query)
|
|
33
34
|
|
|
35
|
+
async def get_by_name_case_insensitive(self, name: str) -> Optional[Project]:
|
|
36
|
+
"""Get project by name (case-insensitive match).
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
name: Project name (case-insensitive)
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Project if found, None otherwise
|
|
43
|
+
"""
|
|
44
|
+
query = self.select().where(Project.name.ilike(name))
|
|
45
|
+
return await self.find_one(query)
|
|
46
|
+
|
|
34
47
|
async def get_by_permalink(self, permalink: str) -> Optional[Project]:
|
|
35
48
|
"""Get project by permalink.
|
|
36
49
|
|
|
@@ -49,6 +62,30 @@ class ProjectRepository(Repository[Project]):
|
|
|
49
62
|
query = self.select().where(Project.path == Path(path).as_posix())
|
|
50
63
|
return await self.find_one(query)
|
|
51
64
|
|
|
65
|
+
async def get_by_id(self, project_id: int) -> Optional[Project]:
|
|
66
|
+
"""Get project by numeric ID.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
project_id: Numeric project ID
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Project if found, None otherwise
|
|
73
|
+
"""
|
|
74
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
75
|
+
return await self.select_by_id(session, project_id)
|
|
76
|
+
|
|
77
|
+
async def get_by_external_id(self, external_id: str) -> Optional[Project]:
|
|
78
|
+
"""Get project by external UUID.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
external_id: External UUID identifier
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Project if found, None otherwise
|
|
85
|
+
"""
|
|
86
|
+
query = self.select().where(Project.external_id == external_id)
|
|
87
|
+
return await self.find_one(query)
|
|
88
|
+
|
|
52
89
|
async def get_default_project(self) -> Optional[Project]:
|
|
53
90
|
"""Get the default project (the one marked as is_default=True)."""
|
|
54
91
|
query = self.select().where(Project.is_default.is_not(None))
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Repository for managing Relation objects."""
|
|
2
2
|
|
|
3
|
-
from sqlalchemy import and_, delete
|
|
4
3
|
from typing import Sequence, List, Optional
|
|
5
4
|
|
|
6
|
-
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import and_, delete, select
|
|
7
|
+
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
|
8
|
+
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
|
7
9
|
from sqlalchemy.ext.asyncio import async_sessionmaker
|
|
8
10
|
from sqlalchemy.orm import selectinload, aliased
|
|
9
11
|
from sqlalchemy.orm.interfaces import LoaderOption
|
|
@@ -86,5 +88,59 @@ class RelationRepository(Repository[Relation]):
|
|
|
86
88
|
result = await self.execute_query(query)
|
|
87
89
|
return result.scalars().all()
|
|
88
90
|
|
|
91
|
+
async def add_all_ignore_duplicates(self, relations: List[Relation]) -> int:
|
|
92
|
+
"""Bulk insert relations, ignoring duplicates.
|
|
93
|
+
|
|
94
|
+
Uses ON CONFLICT DO NOTHING to skip relations that would violate the
|
|
95
|
+
unique constraint on (from_id, to_name, relation_type). This is useful
|
|
96
|
+
for bulk operations where the same link may appear multiple times in
|
|
97
|
+
a document.
|
|
98
|
+
|
|
99
|
+
Works with both SQLite and PostgreSQL dialects.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
relations: List of Relation objects to insert
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Number of relations actually inserted (excludes duplicates)
|
|
106
|
+
"""
|
|
107
|
+
if not relations:
|
|
108
|
+
return 0
|
|
109
|
+
|
|
110
|
+
# Convert Relation objects to dicts for insert
|
|
111
|
+
values = [
|
|
112
|
+
{
|
|
113
|
+
"project_id": r.project_id if r.project_id else self.project_id,
|
|
114
|
+
"from_id": r.from_id,
|
|
115
|
+
"to_id": r.to_id,
|
|
116
|
+
"to_name": r.to_name,
|
|
117
|
+
"relation_type": r.relation_type,
|
|
118
|
+
"context": r.context,
|
|
119
|
+
}
|
|
120
|
+
for r in relations
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
124
|
+
# Check dialect to use appropriate insert
|
|
125
|
+
dialect_name = session.bind.dialect.name if session.bind else "sqlite"
|
|
126
|
+
|
|
127
|
+
if dialect_name == "postgresql": # pragma: no cover
|
|
128
|
+
# PostgreSQL: use RETURNING to count inserted rows
|
|
129
|
+
# (rowcount is 0 for ON CONFLICT DO NOTHING)
|
|
130
|
+
stmt = ( # pragma: no cover
|
|
131
|
+
pg_insert(Relation)
|
|
132
|
+
.values(values)
|
|
133
|
+
.on_conflict_do_nothing()
|
|
134
|
+
.returning(Relation.id)
|
|
135
|
+
)
|
|
136
|
+
result = await session.execute(stmt) # pragma: no cover
|
|
137
|
+
return len(result.fetchall()) # pragma: no cover
|
|
138
|
+
else:
|
|
139
|
+
# SQLite: rowcount works correctly
|
|
140
|
+
stmt = sqlite_insert(Relation).values(values)
|
|
141
|
+
stmt = stmt.on_conflict_do_nothing()
|
|
142
|
+
result = await session.execute(stmt)
|
|
143
|
+
return result.rowcount if result.rowcount > 0 else 0
|
|
144
|
+
|
|
89
145
|
def get_load_options(self) -> List[LoaderOption]:
|
|
90
146
|
return [selectinload(Relation.from_entity), selectinload(Relation.to_entity)]
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Search index data structures."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Optional
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from basic_memory.schemas.search import SearchItemType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class SearchIndexRow:
|
|
14
|
+
"""Search result with score and metadata."""
|
|
15
|
+
|
|
16
|
+
project_id: int
|
|
17
|
+
id: int
|
|
18
|
+
type: str
|
|
19
|
+
file_path: str
|
|
20
|
+
|
|
21
|
+
# date values
|
|
22
|
+
created_at: datetime
|
|
23
|
+
updated_at: datetime
|
|
24
|
+
|
|
25
|
+
permalink: Optional[str] = None
|
|
26
|
+
metadata: Optional[dict] = None
|
|
27
|
+
|
|
28
|
+
# assigned in result
|
|
29
|
+
score: Optional[float] = None
|
|
30
|
+
|
|
31
|
+
# Type-specific fields
|
|
32
|
+
title: Optional[str] = None # entity
|
|
33
|
+
content_stems: Optional[str] = None # entity, observation
|
|
34
|
+
content_snippet: Optional[str] = None # entity, observation
|
|
35
|
+
entity_id: Optional[int] = None # observations
|
|
36
|
+
category: Optional[str] = None # observations
|
|
37
|
+
from_id: Optional[int] = None # relations
|
|
38
|
+
to_id: Optional[int] = None # relations
|
|
39
|
+
relation_type: Optional[str] = None # relations
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def content(self):
|
|
43
|
+
return self.content_snippet
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def directory(self) -> str:
|
|
47
|
+
"""Extract directory part from file_path.
|
|
48
|
+
|
|
49
|
+
For a file at "projects/notes/ideas.md", returns "/projects/notes"
|
|
50
|
+
For a file at root level "README.md", returns "/"
|
|
51
|
+
"""
|
|
52
|
+
if not self.type == SearchItemType.ENTITY.value and not self.file_path:
|
|
53
|
+
return ""
|
|
54
|
+
|
|
55
|
+
# Normalize path separators to handle both Windows (\) and Unix (/) paths
|
|
56
|
+
normalized_path = Path(self.file_path).as_posix()
|
|
57
|
+
|
|
58
|
+
# Split the path by slashes
|
|
59
|
+
parts = normalized_path.split("/")
|
|
60
|
+
|
|
61
|
+
# If there's only one part (e.g., "README.md"), it's at the root
|
|
62
|
+
if len(parts) <= 1:
|
|
63
|
+
return "/"
|
|
64
|
+
|
|
65
|
+
# Join all parts except the last one (filename)
|
|
66
|
+
directory_path = "/".join(parts[:-1])
|
|
67
|
+
return f"/{directory_path}"
|
|
68
|
+
|
|
69
|
+
def to_insert(self, serialize_json: bool = True):
|
|
70
|
+
"""Convert to dict for database insertion.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
serialize_json: If True, converts metadata dict to JSON string (for SQLite).
|
|
74
|
+
If False, keeps metadata as dict (for Postgres JSONB).
|
|
75
|
+
"""
|
|
76
|
+
return {
|
|
77
|
+
"id": self.id,
|
|
78
|
+
"title": self.title,
|
|
79
|
+
"content_stems": self.content_stems,
|
|
80
|
+
"content_snippet": self.content_snippet,
|
|
81
|
+
"permalink": self.permalink,
|
|
82
|
+
"file_path": self.file_path,
|
|
83
|
+
"type": self.type,
|
|
84
|
+
"metadata": json.dumps(self.metadata)
|
|
85
|
+
if serialize_json and self.metadata
|
|
86
|
+
else self.metadata,
|
|
87
|
+
"from_id": self.from_id,
|
|
88
|
+
"to_id": self.to_id,
|
|
89
|
+
"relation_type": self.relation_type,
|
|
90
|
+
"entity_id": self.entity_id,
|
|
91
|
+
"category": self.category,
|
|
92
|
+
"created_at": self.created_at if self.created_at else None,
|
|
93
|
+
"updated_at": self.updated_at if self.updated_at else None,
|
|
94
|
+
"project_id": self.project_id,
|
|
95
|
+
}
|