basic-memory 0.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- basic_memory/__init__.py +7 -0
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +185 -0
- basic_memory/alembic/migrations.py +24 -0
- basic_memory/alembic/script.py.mako +26 -0
- basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
- basic_memory/alembic/versions/3dae7c7b1564_initial_schema.py +93 -0
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
- basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
- basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
- basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
- basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
- basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
- basic_memory/api/__init__.py +5 -0
- basic_memory/api/app.py +131 -0
- basic_memory/api/routers/__init__.py +11 -0
- basic_memory/api/routers/directory_router.py +84 -0
- basic_memory/api/routers/importer_router.py +152 -0
- basic_memory/api/routers/knowledge_router.py +318 -0
- basic_memory/api/routers/management_router.py +80 -0
- basic_memory/api/routers/memory_router.py +90 -0
- basic_memory/api/routers/project_router.py +448 -0
- basic_memory/api/routers/prompt_router.py +260 -0
- basic_memory/api/routers/resource_router.py +249 -0
- basic_memory/api/routers/search_router.py +36 -0
- basic_memory/api/routers/utils.py +169 -0
- basic_memory/api/template_loader.py +292 -0
- basic_memory/api/v2/__init__.py +35 -0
- basic_memory/api/v2/routers/__init__.py +21 -0
- basic_memory/api/v2/routers/directory_router.py +93 -0
- basic_memory/api/v2/routers/importer_router.py +182 -0
- basic_memory/api/v2/routers/knowledge_router.py +413 -0
- basic_memory/api/v2/routers/memory_router.py +130 -0
- basic_memory/api/v2/routers/project_router.py +342 -0
- basic_memory/api/v2/routers/prompt_router.py +270 -0
- basic_memory/api/v2/routers/resource_router.py +286 -0
- basic_memory/api/v2/routers/search_router.py +73 -0
- basic_memory/cli/__init__.py +1 -0
- basic_memory/cli/app.py +84 -0
- basic_memory/cli/auth.py +277 -0
- basic_memory/cli/commands/__init__.py +18 -0
- basic_memory/cli/commands/cloud/__init__.py +6 -0
- basic_memory/cli/commands/cloud/api_client.py +112 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
- basic_memory/cli/commands/cloud/cloud_utils.py +101 -0
- basic_memory/cli/commands/cloud/core_commands.py +195 -0
- basic_memory/cli/commands/cloud/rclone_commands.py +371 -0
- basic_memory/cli/commands/cloud/rclone_config.py +110 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
- basic_memory/cli/commands/cloud/upload.py +233 -0
- basic_memory/cli/commands/cloud/upload_command.py +124 -0
- basic_memory/cli/commands/command_utils.py +77 -0
- basic_memory/cli/commands/db.py +44 -0
- basic_memory/cli/commands/format.py +198 -0
- basic_memory/cli/commands/import_chatgpt.py +84 -0
- basic_memory/cli/commands/import_claude_conversations.py +87 -0
- basic_memory/cli/commands/import_claude_projects.py +86 -0
- basic_memory/cli/commands/import_memory_json.py +87 -0
- basic_memory/cli/commands/mcp.py +76 -0
- basic_memory/cli/commands/project.py +889 -0
- basic_memory/cli/commands/status.py +174 -0
- basic_memory/cli/commands/telemetry.py +81 -0
- basic_memory/cli/commands/tool.py +341 -0
- basic_memory/cli/main.py +28 -0
- basic_memory/config.py +616 -0
- basic_memory/db.py +394 -0
- basic_memory/deps.py +705 -0
- basic_memory/file_utils.py +478 -0
- basic_memory/ignore_utils.py +297 -0
- basic_memory/importers/__init__.py +27 -0
- basic_memory/importers/base.py +79 -0
- basic_memory/importers/chatgpt_importer.py +232 -0
- basic_memory/importers/claude_conversations_importer.py +180 -0
- basic_memory/importers/claude_projects_importer.py +148 -0
- basic_memory/importers/memory_json_importer.py +108 -0
- basic_memory/importers/utils.py +61 -0
- basic_memory/markdown/__init__.py +21 -0
- basic_memory/markdown/entity_parser.py +279 -0
- basic_memory/markdown/markdown_processor.py +160 -0
- basic_memory/markdown/plugins.py +242 -0
- basic_memory/markdown/schemas.py +70 -0
- basic_memory/markdown/utils.py +117 -0
- basic_memory/mcp/__init__.py +1 -0
- basic_memory/mcp/async_client.py +139 -0
- basic_memory/mcp/project_context.py +141 -0
- basic_memory/mcp/prompts/__init__.py +19 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
- basic_memory/mcp/prompts/continue_conversation.py +62 -0
- basic_memory/mcp/prompts/recent_activity.py +188 -0
- basic_memory/mcp/prompts/search.py +57 -0
- basic_memory/mcp/prompts/utils.py +162 -0
- basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
- basic_memory/mcp/resources/project_info.py +71 -0
- basic_memory/mcp/server.py +81 -0
- basic_memory/mcp/tools/__init__.py +48 -0
- basic_memory/mcp/tools/build_context.py +120 -0
- basic_memory/mcp/tools/canvas.py +152 -0
- basic_memory/mcp/tools/chatgpt_tools.py +190 -0
- basic_memory/mcp/tools/delete_note.py +242 -0
- basic_memory/mcp/tools/edit_note.py +324 -0
- basic_memory/mcp/tools/list_directory.py +168 -0
- basic_memory/mcp/tools/move_note.py +551 -0
- basic_memory/mcp/tools/project_management.py +201 -0
- basic_memory/mcp/tools/read_content.py +281 -0
- basic_memory/mcp/tools/read_note.py +267 -0
- basic_memory/mcp/tools/recent_activity.py +534 -0
- basic_memory/mcp/tools/search.py +385 -0
- basic_memory/mcp/tools/utils.py +540 -0
- basic_memory/mcp/tools/view_note.py +78 -0
- basic_memory/mcp/tools/write_note.py +230 -0
- basic_memory/models/__init__.py +15 -0
- basic_memory/models/base.py +10 -0
- basic_memory/models/knowledge.py +226 -0
- basic_memory/models/project.py +87 -0
- basic_memory/models/search.py +85 -0
- basic_memory/repository/__init__.py +11 -0
- basic_memory/repository/entity_repository.py +503 -0
- basic_memory/repository/observation_repository.py +73 -0
- basic_memory/repository/postgres_search_repository.py +379 -0
- basic_memory/repository/project_info_repository.py +10 -0
- basic_memory/repository/project_repository.py +128 -0
- basic_memory/repository/relation_repository.py +146 -0
- basic_memory/repository/repository.py +385 -0
- basic_memory/repository/search_index_row.py +95 -0
- basic_memory/repository/search_repository.py +94 -0
- basic_memory/repository/search_repository_base.py +241 -0
- basic_memory/repository/sqlite_search_repository.py +439 -0
- basic_memory/schemas/__init__.py +86 -0
- basic_memory/schemas/base.py +297 -0
- basic_memory/schemas/cloud.py +50 -0
- basic_memory/schemas/delete.py +37 -0
- basic_memory/schemas/directory.py +30 -0
- basic_memory/schemas/importer.py +35 -0
- basic_memory/schemas/memory.py +285 -0
- basic_memory/schemas/project_info.py +212 -0
- basic_memory/schemas/prompt.py +90 -0
- basic_memory/schemas/request.py +112 -0
- basic_memory/schemas/response.py +229 -0
- basic_memory/schemas/search.py +117 -0
- basic_memory/schemas/sync_report.py +72 -0
- basic_memory/schemas/v2/__init__.py +27 -0
- basic_memory/schemas/v2/entity.py +129 -0
- basic_memory/schemas/v2/resource.py +46 -0
- basic_memory/services/__init__.py +8 -0
- basic_memory/services/context_service.py +601 -0
- basic_memory/services/directory_service.py +308 -0
- basic_memory/services/entity_service.py +864 -0
- basic_memory/services/exceptions.py +37 -0
- basic_memory/services/file_service.py +541 -0
- basic_memory/services/initialization.py +216 -0
- basic_memory/services/link_resolver.py +121 -0
- basic_memory/services/project_service.py +880 -0
- basic_memory/services/search_service.py +404 -0
- basic_memory/services/service.py +15 -0
- basic_memory/sync/__init__.py +6 -0
- basic_memory/sync/background_sync.py +26 -0
- basic_memory/sync/sync_service.py +1259 -0
- basic_memory/sync/watch_service.py +510 -0
- basic_memory/telemetry.py +249 -0
- basic_memory/templates/prompts/continue_conversation.hbs +110 -0
- basic_memory/templates/prompts/search.hbs +101 -0
- basic_memory/utils.py +468 -0
- basic_memory-0.17.1.dist-info/METADATA +617 -0
- basic_memory-0.17.1.dist-info/RECORD +171 -0
- basic_memory-0.17.1.dist-info/WHEEL +4 -0
- basic_memory-0.17.1.dist-info/entry_points.txt +3 -0
- basic_memory-0.17.1.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
"""PostgreSQL tsvector-based search repository implementation."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from loguru import logger
|
|
10
|
+
from sqlalchemy import text
|
|
11
|
+
|
|
12
|
+
from basic_memory import db
|
|
13
|
+
from basic_memory.repository.search_index_row import SearchIndexRow
|
|
14
|
+
from basic_memory.repository.search_repository_base import SearchRepositoryBase
|
|
15
|
+
from basic_memory.schemas.search import SearchItemType
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PostgresSearchRepository(SearchRepositoryBase):
|
|
19
|
+
"""PostgreSQL tsvector implementation of search repository.
|
|
20
|
+
|
|
21
|
+
Uses PostgreSQL's full-text search capabilities with:
|
|
22
|
+
- tsvector for document representation
|
|
23
|
+
- tsquery for query representation
|
|
24
|
+
- GIN indexes for performance
|
|
25
|
+
- ts_rank() function for relevance scoring
|
|
26
|
+
- JSONB containment operators for metadata search
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
async def init_search_index(self):
|
|
30
|
+
"""Create Postgres table with tsvector column and GIN indexes.
|
|
31
|
+
|
|
32
|
+
Note: This is handled by Alembic migrations. This method is a no-op
|
|
33
|
+
for Postgres as the schema is created via migrations.
|
|
34
|
+
"""
|
|
35
|
+
logger.info("PostgreSQL search index initialization handled by migrations")
|
|
36
|
+
# Table creation is done via Alembic migrations
|
|
37
|
+
# This includes:
|
|
38
|
+
# - CREATE TABLE search_index (...)
|
|
39
|
+
# - ADD COLUMN textsearchable_index_col tsvector GENERATED ALWAYS AS (...)
|
|
40
|
+
# - CREATE INDEX USING GIN on textsearchable_index_col
|
|
41
|
+
# - CREATE INDEX USING GIN on metadata jsonb_path_ops
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
|
|
45
|
+
"""Prepare a search term for tsquery format.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
term: The search term to prepare
|
|
49
|
+
is_prefix: Whether to add prefix search capability (:* operator)
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Formatted search term for tsquery
|
|
53
|
+
|
|
54
|
+
For Postgres:
|
|
55
|
+
- Boolean operators are converted to tsquery format (&, |, !)
|
|
56
|
+
- Prefix matching uses the :* operator
|
|
57
|
+
- Terms are sanitized to prevent tsquery syntax errors
|
|
58
|
+
"""
|
|
59
|
+
# Check for explicit boolean operators
|
|
60
|
+
boolean_operators = [" AND ", " OR ", " NOT "]
|
|
61
|
+
if any(op in f" {term} " for op in boolean_operators):
|
|
62
|
+
return self._prepare_boolean_query(term)
|
|
63
|
+
|
|
64
|
+
# For non-Boolean queries, prepare single term
|
|
65
|
+
return self._prepare_single_term(term, is_prefix)
|
|
66
|
+
|
|
67
|
+
def _prepare_boolean_query(self, query: str) -> str:
|
|
68
|
+
"""Convert Boolean query to tsquery format.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
query: A Boolean query like "coffee AND brewing" or "(pour OR french) AND press"
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
tsquery-formatted string with & (AND), | (OR), ! (NOT) operators
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
"coffee AND brewing" -> "coffee & brewing"
|
|
78
|
+
"(pour OR french) AND press" -> "(pour | french) & press"
|
|
79
|
+
"coffee NOT decaf" -> "coffee & !decaf"
|
|
80
|
+
"""
|
|
81
|
+
# Replace Boolean operators with tsquery operators
|
|
82
|
+
# Keep parentheses for grouping
|
|
83
|
+
result = query
|
|
84
|
+
result = re.sub(r"\bAND\b", "&", result)
|
|
85
|
+
result = re.sub(r"\bOR\b", "|", result)
|
|
86
|
+
# NOT must be converted to "& !" and the ! must be attached to the following term
|
|
87
|
+
# "Python NOT Django" -> "Python & !Django"
|
|
88
|
+
result = re.sub(r"\bNOT\s+", "& !", result)
|
|
89
|
+
|
|
90
|
+
return result
|
|
91
|
+
|
|
92
|
+
def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
|
|
93
|
+
"""Prepare a single search term for tsquery.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
term: A single search term
|
|
97
|
+
is_prefix: Whether to add prefix search capability (:* suffix)
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
A properly formatted single term for tsquery
|
|
101
|
+
|
|
102
|
+
For Postgres tsquery:
|
|
103
|
+
- Multi-word queries become "word1 & word2"
|
|
104
|
+
- Prefix matching uses ":*" suffix (e.g., "coff:*")
|
|
105
|
+
- Special characters that need escaping: & | ! ( ) :
|
|
106
|
+
"""
|
|
107
|
+
if not term or not term.strip():
|
|
108
|
+
return term
|
|
109
|
+
|
|
110
|
+
term = term.strip()
|
|
111
|
+
|
|
112
|
+
# Check if term is already a wildcard pattern
|
|
113
|
+
if "*" in term:
|
|
114
|
+
# Replace * with :* for Postgres prefix matching
|
|
115
|
+
return term.replace("*", ":*")
|
|
116
|
+
|
|
117
|
+
# Remove tsquery special characters from the search term
|
|
118
|
+
# These characters have special meaning in tsquery and cause syntax errors
|
|
119
|
+
# if not used as operators
|
|
120
|
+
special_chars = ["&", "|", "!", "(", ")", ":"]
|
|
121
|
+
cleaned_term = term
|
|
122
|
+
for char in special_chars:
|
|
123
|
+
cleaned_term = cleaned_term.replace(char, " ")
|
|
124
|
+
|
|
125
|
+
# Handle multi-word queries
|
|
126
|
+
if " " in cleaned_term:
|
|
127
|
+
words = [w for w in cleaned_term.split() if w.strip()]
|
|
128
|
+
if not words:
|
|
129
|
+
# All characters were special chars, search won't match anything
|
|
130
|
+
# Return a safe search term that won't cause syntax errors
|
|
131
|
+
return "NOSPECIALCHARS:*"
|
|
132
|
+
if is_prefix:
|
|
133
|
+
# Add prefix matching to each word
|
|
134
|
+
prepared_words = [f"{word}:*" for word in words]
|
|
135
|
+
else:
|
|
136
|
+
prepared_words = words
|
|
137
|
+
# Join with AND operator
|
|
138
|
+
return " & ".join(prepared_words)
|
|
139
|
+
|
|
140
|
+
# Single word
|
|
141
|
+
cleaned_term = cleaned_term.strip()
|
|
142
|
+
if not cleaned_term:
|
|
143
|
+
return "NOSPECIALCHARS:*"
|
|
144
|
+
if is_prefix:
|
|
145
|
+
return f"{cleaned_term}:*"
|
|
146
|
+
else:
|
|
147
|
+
return cleaned_term
|
|
148
|
+
|
|
149
|
+
async def search(
|
|
150
|
+
self,
|
|
151
|
+
search_text: Optional[str] = None,
|
|
152
|
+
permalink: Optional[str] = None,
|
|
153
|
+
permalink_match: Optional[str] = None,
|
|
154
|
+
title: Optional[str] = None,
|
|
155
|
+
types: Optional[List[str]] = None,
|
|
156
|
+
after_date: Optional[datetime] = None,
|
|
157
|
+
search_item_types: Optional[List[SearchItemType]] = None,
|
|
158
|
+
limit: int = 10,
|
|
159
|
+
offset: int = 0,
|
|
160
|
+
) -> List[SearchIndexRow]:
|
|
161
|
+
"""Search across all indexed content using PostgreSQL tsvector."""
|
|
162
|
+
conditions = []
|
|
163
|
+
params = {}
|
|
164
|
+
order_by_clause = ""
|
|
165
|
+
|
|
166
|
+
# Handle text search for title and content using tsvector
|
|
167
|
+
if search_text:
|
|
168
|
+
if search_text.strip() == "*" or search_text.strip() == "":
|
|
169
|
+
# For wildcard searches, don't add any text conditions
|
|
170
|
+
pass
|
|
171
|
+
else:
|
|
172
|
+
# Prepare search term for tsquery
|
|
173
|
+
processed_text = self._prepare_search_term(search_text.strip())
|
|
174
|
+
params["text"] = processed_text
|
|
175
|
+
# Use @@ operator for tsvector matching
|
|
176
|
+
conditions.append("textsearchable_index_col @@ to_tsquery('english', :text)")
|
|
177
|
+
|
|
178
|
+
# Handle title search
|
|
179
|
+
if title:
|
|
180
|
+
title_text = self._prepare_search_term(title.strip(), is_prefix=False)
|
|
181
|
+
params["title_text"] = title_text
|
|
182
|
+
conditions.append("to_tsvector('english', title) @@ to_tsquery('english', :title_text)")
|
|
183
|
+
|
|
184
|
+
# Handle permalink exact search
|
|
185
|
+
if permalink:
|
|
186
|
+
params["permalink"] = permalink
|
|
187
|
+
conditions.append("permalink = :permalink")
|
|
188
|
+
|
|
189
|
+
# Handle permalink pattern match
|
|
190
|
+
if permalink_match:
|
|
191
|
+
permalink_text = permalink_match.lower().strip()
|
|
192
|
+
params["permalink"] = permalink_text
|
|
193
|
+
if "*" in permalink_match:
|
|
194
|
+
# Use LIKE for pattern matching in Postgres
|
|
195
|
+
# Convert * to % for SQL LIKE
|
|
196
|
+
permalink_pattern = permalink_text.replace("*", "%")
|
|
197
|
+
params["permalink"] = permalink_pattern
|
|
198
|
+
conditions.append("permalink LIKE :permalink")
|
|
199
|
+
else:
|
|
200
|
+
conditions.append("permalink = :permalink")
|
|
201
|
+
|
|
202
|
+
# Handle search item type filter
|
|
203
|
+
if search_item_types:
|
|
204
|
+
type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
|
|
205
|
+
conditions.append(f"type IN ({type_list})")
|
|
206
|
+
|
|
207
|
+
# Handle entity type filter using JSONB containment
|
|
208
|
+
if types:
|
|
209
|
+
# Use JSONB @> operator for efficient containment queries
|
|
210
|
+
type_conditions = []
|
|
211
|
+
for entity_type in types:
|
|
212
|
+
# Create JSONB containment condition for each type
|
|
213
|
+
type_conditions.append(f'metadata @> \'{{"entity_type": "{entity_type}"}}\'')
|
|
214
|
+
conditions.append(f"({' OR '.join(type_conditions)})")
|
|
215
|
+
|
|
216
|
+
# Handle date filter
|
|
217
|
+
if after_date:
|
|
218
|
+
params["after_date"] = after_date
|
|
219
|
+
conditions.append("created_at > :after_date")
|
|
220
|
+
# order by most recent first
|
|
221
|
+
order_by_clause = ", updated_at DESC"
|
|
222
|
+
|
|
223
|
+
# Always filter by project_id
|
|
224
|
+
params["project_id"] = self.project_id
|
|
225
|
+
conditions.append("project_id = :project_id")
|
|
226
|
+
|
|
227
|
+
# set limit and offset
|
|
228
|
+
params["limit"] = limit
|
|
229
|
+
params["offset"] = offset
|
|
230
|
+
|
|
231
|
+
# Build WHERE clause
|
|
232
|
+
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
|
233
|
+
|
|
234
|
+
# Build SQL with ts_rank() for scoring
|
|
235
|
+
# Note: If no text search, score will be NULL, so we use COALESCE to default to 0
|
|
236
|
+
if search_text and search_text.strip() and search_text.strip() != "*":
|
|
237
|
+
score_expr = "ts_rank(textsearchable_index_col, to_tsquery('english', :text))"
|
|
238
|
+
else:
|
|
239
|
+
score_expr = "0"
|
|
240
|
+
|
|
241
|
+
sql = f"""
|
|
242
|
+
SELECT
|
|
243
|
+
project_id,
|
|
244
|
+
id,
|
|
245
|
+
title,
|
|
246
|
+
permalink,
|
|
247
|
+
file_path,
|
|
248
|
+
type,
|
|
249
|
+
metadata,
|
|
250
|
+
from_id,
|
|
251
|
+
to_id,
|
|
252
|
+
relation_type,
|
|
253
|
+
entity_id,
|
|
254
|
+
content_snippet,
|
|
255
|
+
category,
|
|
256
|
+
created_at,
|
|
257
|
+
updated_at,
|
|
258
|
+
{score_expr} as score
|
|
259
|
+
FROM search_index
|
|
260
|
+
WHERE {where_clause}
|
|
261
|
+
ORDER BY score DESC, id ASC {order_by_clause}
|
|
262
|
+
LIMIT :limit
|
|
263
|
+
OFFSET :offset
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
logger.trace(f"Search {sql} params: {params}")
|
|
267
|
+
try:
|
|
268
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
269
|
+
result = await session.execute(text(sql), params)
|
|
270
|
+
rows = result.fetchall()
|
|
271
|
+
except Exception as e:
|
|
272
|
+
# Handle tsquery syntax errors
|
|
273
|
+
if "tsquery" in str(e).lower() or "syntax error" in str(e).lower(): # pragma: no cover
|
|
274
|
+
logger.warning(f"tsquery syntax error for search term: {search_text}, error: {e}")
|
|
275
|
+
# Return empty results rather than crashing
|
|
276
|
+
return []
|
|
277
|
+
else:
|
|
278
|
+
# Re-raise other database errors
|
|
279
|
+
logger.error(f"Database error during search: {e}")
|
|
280
|
+
raise
|
|
281
|
+
|
|
282
|
+
results = [
|
|
283
|
+
SearchIndexRow(
|
|
284
|
+
project_id=self.project_id,
|
|
285
|
+
id=row.id,
|
|
286
|
+
title=row.title,
|
|
287
|
+
permalink=row.permalink,
|
|
288
|
+
file_path=row.file_path,
|
|
289
|
+
type=row.type,
|
|
290
|
+
score=float(row.score) if row.score else 0.0,
|
|
291
|
+
metadata=(
|
|
292
|
+
row.metadata
|
|
293
|
+
if isinstance(row.metadata, dict)
|
|
294
|
+
else (json.loads(row.metadata) if row.metadata else {})
|
|
295
|
+
),
|
|
296
|
+
from_id=row.from_id,
|
|
297
|
+
to_id=row.to_id,
|
|
298
|
+
relation_type=row.relation_type,
|
|
299
|
+
entity_id=row.entity_id,
|
|
300
|
+
content_snippet=row.content_snippet,
|
|
301
|
+
category=row.category,
|
|
302
|
+
created_at=row.created_at,
|
|
303
|
+
updated_at=row.updated_at,
|
|
304
|
+
)
|
|
305
|
+
for row in rows
|
|
306
|
+
]
|
|
307
|
+
|
|
308
|
+
logger.trace(f"Found {len(results)} search results")
|
|
309
|
+
for r in results:
|
|
310
|
+
logger.trace(
|
|
311
|
+
f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
return results
|
|
315
|
+
|
|
316
|
+
async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
|
|
317
|
+
"""Index multiple items in a single batch operation using UPSERT.
|
|
318
|
+
|
|
319
|
+
Uses INSERT ... ON CONFLICT DO UPDATE to handle re-indexing of existing
|
|
320
|
+
entities (e.g., during forward reference resolution) without requiring
|
|
321
|
+
a separate delete operation. This eliminates race conditions between
|
|
322
|
+
delete and insert operations in separate transactions.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
search_index_rows: List of SearchIndexRow objects to index
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
if not search_index_rows:
|
|
329
|
+
return
|
|
330
|
+
|
|
331
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
332
|
+
# When using text() raw SQL, always serialize JSON to string
|
|
333
|
+
# Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
|
|
334
|
+
# The database driver/column type will handle conversion
|
|
335
|
+
insert_data_list = []
|
|
336
|
+
for row in search_index_rows:
|
|
337
|
+
insert_data = row.to_insert(serialize_json=True)
|
|
338
|
+
insert_data["project_id"] = self.project_id
|
|
339
|
+
insert_data_list.append(insert_data)
|
|
340
|
+
|
|
341
|
+
# Use UPSERT (INSERT ... ON CONFLICT) to handle re-indexing
|
|
342
|
+
# Primary key is (id, type, project_id)
|
|
343
|
+
# This handles race conditions during forward reference resolution
|
|
344
|
+
# where an entity might be re-indexed before the delete commits
|
|
345
|
+
# Syntax works for both SQLite 3.24+ and PostgreSQL
|
|
346
|
+
await session.execute(
|
|
347
|
+
text("""
|
|
348
|
+
INSERT INTO search_index (
|
|
349
|
+
id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
|
|
350
|
+
from_id, to_id, relation_type,
|
|
351
|
+
entity_id, category,
|
|
352
|
+
created_at, updated_at,
|
|
353
|
+
project_id
|
|
354
|
+
) VALUES (
|
|
355
|
+
:id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
|
|
356
|
+
:from_id, :to_id, :relation_type,
|
|
357
|
+
:entity_id, :category,
|
|
358
|
+
:created_at, :updated_at,
|
|
359
|
+
:project_id
|
|
360
|
+
)
|
|
361
|
+
ON CONFLICT (id, type, project_id) DO UPDATE SET
|
|
362
|
+
title = EXCLUDED.title,
|
|
363
|
+
content_stems = EXCLUDED.content_stems,
|
|
364
|
+
content_snippet = EXCLUDED.content_snippet,
|
|
365
|
+
permalink = EXCLUDED.permalink,
|
|
366
|
+
file_path = EXCLUDED.file_path,
|
|
367
|
+
metadata = EXCLUDED.metadata,
|
|
368
|
+
from_id = EXCLUDED.from_id,
|
|
369
|
+
to_id = EXCLUDED.to_id,
|
|
370
|
+
relation_type = EXCLUDED.relation_type,
|
|
371
|
+
entity_id = EXCLUDED.entity_id,
|
|
372
|
+
category = EXCLUDED.category,
|
|
373
|
+
created_at = EXCLUDED.created_at,
|
|
374
|
+
updated_at = EXCLUDED.updated_at
|
|
375
|
+
"""),
|
|
376
|
+
insert_data_list,
|
|
377
|
+
)
|
|
378
|
+
logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
|
|
379
|
+
await session.commit()
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from basic_memory.repository.repository import Repository
|
|
2
|
+
from basic_memory.models.project import Project
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ProjectInfoRepository(Repository):
|
|
6
|
+
"""Repository for statistics queries."""
|
|
7
|
+
|
|
8
|
+
def __init__(self, session_maker):
|
|
9
|
+
# Initialize with Project model as a reference
|
|
10
|
+
super().__init__(session_maker, Project)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Repository for managing projects in Basic Memory."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
from sqlalchemy import text
|
|
8
|
+
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
9
|
+
|
|
10
|
+
from basic_memory import db
|
|
11
|
+
from basic_memory.models.project import Project
|
|
12
|
+
from basic_memory.repository.repository import Repository
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ProjectRepository(Repository[Project]):
|
|
16
|
+
"""Repository for Project model.
|
|
17
|
+
|
|
18
|
+
Projects represent collections of knowledge entities grouped together.
|
|
19
|
+
Each entity, observation, and relation belongs to a specific project.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, session_maker: async_sessionmaker[AsyncSession]):
|
|
23
|
+
"""Initialize with session maker."""
|
|
24
|
+
super().__init__(session_maker, Project)
|
|
25
|
+
|
|
26
|
+
async def get_by_name(self, name: str) -> Optional[Project]:
|
|
27
|
+
"""Get project by name (exact match).
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
name: Unique name of the project
|
|
31
|
+
"""
|
|
32
|
+
query = self.select().where(Project.name == name)
|
|
33
|
+
return await self.find_one(query)
|
|
34
|
+
|
|
35
|
+
async def get_by_name_case_insensitive(self, name: str) -> Optional[Project]:
|
|
36
|
+
"""Get project by name (case-insensitive match).
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
name: Project name (case-insensitive)
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Project if found, None otherwise
|
|
43
|
+
"""
|
|
44
|
+
query = self.select().where(Project.name.ilike(name))
|
|
45
|
+
return await self.find_one(query)
|
|
46
|
+
|
|
47
|
+
async def get_by_permalink(self, permalink: str) -> Optional[Project]:
|
|
48
|
+
"""Get project by permalink.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
permalink: URL-friendly identifier for the project
|
|
52
|
+
"""
|
|
53
|
+
query = self.select().where(Project.permalink == permalink)
|
|
54
|
+
return await self.find_one(query)
|
|
55
|
+
|
|
56
|
+
async def get_by_path(self, path: Union[Path, str]) -> Optional[Project]:
|
|
57
|
+
"""Get project by filesystem path.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
path: Path to the project directory (will be converted to string internally)
|
|
61
|
+
"""
|
|
62
|
+
query = self.select().where(Project.path == Path(path).as_posix())
|
|
63
|
+
return await self.find_one(query)
|
|
64
|
+
|
|
65
|
+
async def get_by_id(self, project_id: int) -> Optional[Project]:
|
|
66
|
+
"""Get project by numeric ID.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
project_id: Numeric project ID
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Project if found, None otherwise
|
|
73
|
+
"""
|
|
74
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
75
|
+
return await self.select_by_id(session, project_id)
|
|
76
|
+
|
|
77
|
+
async def get_default_project(self) -> Optional[Project]:
|
|
78
|
+
"""Get the default project (the one marked as is_default=True)."""
|
|
79
|
+
query = self.select().where(Project.is_default.is_not(None))
|
|
80
|
+
return await self.find_one(query)
|
|
81
|
+
|
|
82
|
+
async def get_active_projects(self) -> Sequence[Project]:
|
|
83
|
+
"""Get all active projects."""
|
|
84
|
+
query = self.select().where(Project.is_active == True) # noqa: E712
|
|
85
|
+
result = await self.execute_query(query)
|
|
86
|
+
return list(result.scalars().all())
|
|
87
|
+
|
|
88
|
+
async def set_as_default(self, project_id: int) -> Optional[Project]:
|
|
89
|
+
"""Set a project as the default and unset previous default.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
project_id: ID of the project to set as default
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
The updated project if found, None otherwise
|
|
96
|
+
"""
|
|
97
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
98
|
+
# First, clear the default flag for all projects using direct SQL
|
|
99
|
+
await session.execute(
|
|
100
|
+
text("UPDATE project SET is_default = NULL WHERE is_default IS NOT NULL")
|
|
101
|
+
)
|
|
102
|
+
await session.flush()
|
|
103
|
+
|
|
104
|
+
# Set the new default project
|
|
105
|
+
target_project = await self.select_by_id(session, project_id)
|
|
106
|
+
if target_project:
|
|
107
|
+
target_project.is_default = True
|
|
108
|
+
await session.flush()
|
|
109
|
+
return target_project
|
|
110
|
+
return None # pragma: no cover
|
|
111
|
+
|
|
112
|
+
async def update_path(self, project_id: int, new_path: str) -> Optional[Project]:
|
|
113
|
+
"""Update project path.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
project_id: ID of the project to update
|
|
117
|
+
new_path: New filesystem path for the project
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
The updated project if found, None otherwise
|
|
121
|
+
"""
|
|
122
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
123
|
+
project = await self.select_by_id(session, project_id)
|
|
124
|
+
if project:
|
|
125
|
+
project.path = new_path
|
|
126
|
+
await session.flush()
|
|
127
|
+
return project
|
|
128
|
+
return None
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Repository for managing Relation objects."""
|
|
2
|
+
|
|
3
|
+
from typing import Sequence, List, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import and_, delete, select
|
|
7
|
+
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
|
8
|
+
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
|
9
|
+
from sqlalchemy.ext.asyncio import async_sessionmaker
|
|
10
|
+
from sqlalchemy.orm import selectinload, aliased
|
|
11
|
+
from sqlalchemy.orm.interfaces import LoaderOption
|
|
12
|
+
|
|
13
|
+
from basic_memory import db
|
|
14
|
+
from basic_memory.models import Relation, Entity
|
|
15
|
+
from basic_memory.repository.repository import Repository
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RelationRepository(Repository[Relation]):
|
|
19
|
+
"""Repository for Relation model with memory-specific operations."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, session_maker: async_sessionmaker, project_id: int):
|
|
22
|
+
"""Initialize with session maker and project_id filter.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
session_maker: SQLAlchemy session maker
|
|
26
|
+
project_id: Project ID to filter all operations by
|
|
27
|
+
"""
|
|
28
|
+
super().__init__(session_maker, Relation, project_id=project_id)
|
|
29
|
+
|
|
30
|
+
async def find_relation(
|
|
31
|
+
self, from_permalink: str, to_permalink: str, relation_type: str
|
|
32
|
+
) -> Optional[Relation]:
|
|
33
|
+
"""Find a relation by its from and to path IDs."""
|
|
34
|
+
from_entity = aliased(Entity)
|
|
35
|
+
to_entity = aliased(Entity)
|
|
36
|
+
|
|
37
|
+
query = (
|
|
38
|
+
select(Relation)
|
|
39
|
+
.join(from_entity, Relation.from_id == from_entity.id)
|
|
40
|
+
.join(to_entity, Relation.to_id == to_entity.id)
|
|
41
|
+
.where(
|
|
42
|
+
and_(
|
|
43
|
+
from_entity.permalink == from_permalink,
|
|
44
|
+
to_entity.permalink == to_permalink,
|
|
45
|
+
Relation.relation_type == relation_type,
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
return await self.find_one(query)
|
|
50
|
+
|
|
51
|
+
async def find_by_entities(self, from_id: int, to_id: int) -> Sequence[Relation]:
|
|
52
|
+
"""Find all relations between two entities."""
|
|
53
|
+
query = select(Relation).where((Relation.from_id == from_id) & (Relation.to_id == to_id))
|
|
54
|
+
result = await self.execute_query(query)
|
|
55
|
+
return result.scalars().all()
|
|
56
|
+
|
|
57
|
+
async def find_by_type(self, relation_type: str) -> Sequence[Relation]:
|
|
58
|
+
"""Find all relations of a specific type."""
|
|
59
|
+
query = select(Relation).filter(Relation.relation_type == relation_type)
|
|
60
|
+
result = await self.execute_query(query)
|
|
61
|
+
return result.scalars().all()
|
|
62
|
+
|
|
63
|
+
async def delete_outgoing_relations_from_entity(self, entity_id: int) -> None:
|
|
64
|
+
"""Delete outgoing relations for an entity.
|
|
65
|
+
|
|
66
|
+
Only deletes relations where this entity is the source (from_id),
|
|
67
|
+
as these are the ones owned by this entity's markdown file.
|
|
68
|
+
"""
|
|
69
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
70
|
+
await session.execute(delete(Relation).where(Relation.from_id == entity_id))
|
|
71
|
+
|
|
72
|
+
async def find_unresolved_relations(self) -> Sequence[Relation]:
|
|
73
|
+
"""Find all unresolved relations, where to_id is null."""
|
|
74
|
+
query = select(Relation).filter(Relation.to_id.is_(None))
|
|
75
|
+
result = await self.execute_query(query)
|
|
76
|
+
return result.scalars().all()
|
|
77
|
+
|
|
78
|
+
async def find_unresolved_relations_for_entity(self, entity_id: int) -> Sequence[Relation]:
|
|
79
|
+
"""Find unresolved relations for a specific entity.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
entity_id: The entity whose unresolved outgoing relations to find.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List of unresolved relations where this entity is the source.
|
|
86
|
+
"""
|
|
87
|
+
query = select(Relation).filter(Relation.from_id == entity_id, Relation.to_id.is_(None))
|
|
88
|
+
result = await self.execute_query(query)
|
|
89
|
+
return result.scalars().all()
|
|
90
|
+
|
|
91
|
+
async def add_all_ignore_duplicates(self, relations: List[Relation]) -> int:
|
|
92
|
+
"""Bulk insert relations, ignoring duplicates.
|
|
93
|
+
|
|
94
|
+
Uses ON CONFLICT DO NOTHING to skip relations that would violate the
|
|
95
|
+
unique constraint on (from_id, to_name, relation_type). This is useful
|
|
96
|
+
for bulk operations where the same link may appear multiple times in
|
|
97
|
+
a document.
|
|
98
|
+
|
|
99
|
+
Works with both SQLite and PostgreSQL dialects.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
relations: List of Relation objects to insert
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Number of relations actually inserted (excludes duplicates)
|
|
106
|
+
"""
|
|
107
|
+
if not relations:
|
|
108
|
+
return 0
|
|
109
|
+
|
|
110
|
+
# Convert Relation objects to dicts for insert
|
|
111
|
+
values = [
|
|
112
|
+
{
|
|
113
|
+
"project_id": r.project_id if r.project_id else self.project_id,
|
|
114
|
+
"from_id": r.from_id,
|
|
115
|
+
"to_id": r.to_id,
|
|
116
|
+
"to_name": r.to_name,
|
|
117
|
+
"relation_type": r.relation_type,
|
|
118
|
+
"context": r.context,
|
|
119
|
+
}
|
|
120
|
+
for r in relations
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
124
|
+
# Check dialect to use appropriate insert
|
|
125
|
+
dialect_name = session.bind.dialect.name if session.bind else "sqlite"
|
|
126
|
+
|
|
127
|
+
if dialect_name == "postgresql":
|
|
128
|
+
# PostgreSQL: use RETURNING to count inserted rows
|
|
129
|
+
# (rowcount is 0 for ON CONFLICT DO NOTHING)
|
|
130
|
+
stmt = (
|
|
131
|
+
pg_insert(Relation)
|
|
132
|
+
.values(values)
|
|
133
|
+
.on_conflict_do_nothing()
|
|
134
|
+
.returning(Relation.id)
|
|
135
|
+
)
|
|
136
|
+
result = await session.execute(stmt)
|
|
137
|
+
return len(result.fetchall())
|
|
138
|
+
else:
|
|
139
|
+
# SQLite: rowcount works correctly
|
|
140
|
+
stmt = sqlite_insert(Relation).values(values)
|
|
141
|
+
stmt = stmt.on_conflict_do_nothing()
|
|
142
|
+
result = await session.execute(stmt)
|
|
143
|
+
return result.rowcount if result.rowcount > 0 else 0
|
|
144
|
+
|
|
145
|
+
def get_load_options(self) -> List[LoaderOption]:
|
|
146
|
+
return [selectinload(Relation.from_entity), selectinload(Relation.to_entity)]
|