basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (195) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +130 -20
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  6. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  7. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  8. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  9. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  10. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  11. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  12. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  13. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  14. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  15. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  16. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  17. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  18. basic_memory/api/app.py +87 -20
  19. basic_memory/api/container.py +133 -0
  20. basic_memory/api/routers/__init__.py +4 -1
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +180 -23
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +9 -64
  26. basic_memory/api/routers/project_router.py +460 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +136 -11
  29. basic_memory/api/routers/search_router.py +5 -5
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +181 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +359 -0
  39. basic_memory/api/v2/routers/prompt_router.py +269 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/app.py +80 -10
  43. basic_memory/cli/auth.py +300 -0
  44. basic_memory/cli/commands/__init__.py +15 -2
  45. basic_memory/cli/commands/cloud/__init__.py +6 -0
  46. basic_memory/cli/commands/cloud/api_client.py +127 -0
  47. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  48. basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
  49. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  50. basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
  51. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  52. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  53. basic_memory/cli/commands/cloud/upload.py +240 -0
  54. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  55. basic_memory/cli/commands/command_utils.py +99 -0
  56. basic_memory/cli/commands/db.py +87 -12
  57. basic_memory/cli/commands/format.py +198 -0
  58. basic_memory/cli/commands/import_chatgpt.py +47 -223
  59. basic_memory/cli/commands/import_claude_conversations.py +48 -171
  60. basic_memory/cli/commands/import_claude_projects.py +53 -160
  61. basic_memory/cli/commands/import_memory_json.py +55 -111
  62. basic_memory/cli/commands/mcp.py +67 -11
  63. basic_memory/cli/commands/project.py +889 -0
  64. basic_memory/cli/commands/status.py +52 -34
  65. basic_memory/cli/commands/telemetry.py +81 -0
  66. basic_memory/cli/commands/tool.py +341 -0
  67. basic_memory/cli/container.py +84 -0
  68. basic_memory/cli/main.py +14 -6
  69. basic_memory/config.py +580 -26
  70. basic_memory/db.py +285 -28
  71. basic_memory/deps/__init__.py +293 -0
  72. basic_memory/deps/config.py +26 -0
  73. basic_memory/deps/db.py +56 -0
  74. basic_memory/deps/importers.py +200 -0
  75. basic_memory/deps/projects.py +238 -0
  76. basic_memory/deps/repositories.py +179 -0
  77. basic_memory/deps/services.py +480 -0
  78. basic_memory/deps.py +16 -185
  79. basic_memory/file_utils.py +318 -54
  80. basic_memory/ignore_utils.py +297 -0
  81. basic_memory/importers/__init__.py +27 -0
  82. basic_memory/importers/base.py +100 -0
  83. basic_memory/importers/chatgpt_importer.py +245 -0
  84. basic_memory/importers/claude_conversations_importer.py +192 -0
  85. basic_memory/importers/claude_projects_importer.py +184 -0
  86. basic_memory/importers/memory_json_importer.py +128 -0
  87. basic_memory/importers/utils.py +61 -0
  88. basic_memory/markdown/entity_parser.py +182 -23
  89. basic_memory/markdown/markdown_processor.py +70 -7
  90. basic_memory/markdown/plugins.py +43 -23
  91. basic_memory/markdown/schemas.py +1 -1
  92. basic_memory/markdown/utils.py +38 -14
  93. basic_memory/mcp/async_client.py +135 -4
  94. basic_memory/mcp/clients/__init__.py +28 -0
  95. basic_memory/mcp/clients/directory.py +70 -0
  96. basic_memory/mcp/clients/knowledge.py +176 -0
  97. basic_memory/mcp/clients/memory.py +120 -0
  98. basic_memory/mcp/clients/project.py +89 -0
  99. basic_memory/mcp/clients/resource.py +71 -0
  100. basic_memory/mcp/clients/search.py +65 -0
  101. basic_memory/mcp/container.py +110 -0
  102. basic_memory/mcp/project_context.py +155 -0
  103. basic_memory/mcp/prompts/__init__.py +19 -0
  104. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  105. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  106. basic_memory/mcp/prompts/recent_activity.py +188 -0
  107. basic_memory/mcp/prompts/search.py +57 -0
  108. basic_memory/mcp/prompts/utils.py +162 -0
  109. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  110. basic_memory/mcp/resources/project_info.py +71 -0
  111. basic_memory/mcp/server.py +61 -9
  112. basic_memory/mcp/tools/__init__.py +33 -21
  113. basic_memory/mcp/tools/build_context.py +120 -0
  114. basic_memory/mcp/tools/canvas.py +152 -0
  115. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  116. basic_memory/mcp/tools/delete_note.py +249 -0
  117. basic_memory/mcp/tools/edit_note.py +325 -0
  118. basic_memory/mcp/tools/list_directory.py +157 -0
  119. basic_memory/mcp/tools/move_note.py +549 -0
  120. basic_memory/mcp/tools/project_management.py +204 -0
  121. basic_memory/mcp/tools/read_content.py +281 -0
  122. basic_memory/mcp/tools/read_note.py +265 -0
  123. basic_memory/mcp/tools/recent_activity.py +528 -0
  124. basic_memory/mcp/tools/search.py +377 -24
  125. basic_memory/mcp/tools/utils.py +402 -16
  126. basic_memory/mcp/tools/view_note.py +78 -0
  127. basic_memory/mcp/tools/write_note.py +230 -0
  128. basic_memory/models/__init__.py +3 -2
  129. basic_memory/models/knowledge.py +82 -17
  130. basic_memory/models/project.py +93 -0
  131. basic_memory/models/search.py +68 -8
  132. basic_memory/project_resolver.py +222 -0
  133. basic_memory/repository/__init__.py +2 -0
  134. basic_memory/repository/entity_repository.py +437 -8
  135. basic_memory/repository/observation_repository.py +36 -3
  136. basic_memory/repository/postgres_search_repository.py +451 -0
  137. basic_memory/repository/project_info_repository.py +10 -0
  138. basic_memory/repository/project_repository.py +140 -0
  139. basic_memory/repository/relation_repository.py +79 -4
  140. basic_memory/repository/repository.py +148 -29
  141. basic_memory/repository/search_index_row.py +95 -0
  142. basic_memory/repository/search_repository.py +79 -268
  143. basic_memory/repository/search_repository_base.py +241 -0
  144. basic_memory/repository/sqlite_search_repository.py +437 -0
  145. basic_memory/runtime.py +61 -0
  146. basic_memory/schemas/__init__.py +22 -9
  147. basic_memory/schemas/base.py +131 -12
  148. basic_memory/schemas/cloud.py +50 -0
  149. basic_memory/schemas/directory.py +31 -0
  150. basic_memory/schemas/importer.py +35 -0
  151. basic_memory/schemas/memory.py +194 -25
  152. basic_memory/schemas/project_info.py +213 -0
  153. basic_memory/schemas/prompt.py +90 -0
  154. basic_memory/schemas/request.py +56 -2
  155. basic_memory/schemas/response.py +85 -28
  156. basic_memory/schemas/search.py +36 -35
  157. basic_memory/schemas/sync_report.py +72 -0
  158. basic_memory/schemas/v2/__init__.py +27 -0
  159. basic_memory/schemas/v2/entity.py +133 -0
  160. basic_memory/schemas/v2/resource.py +47 -0
  161. basic_memory/services/__init__.py +2 -1
  162. basic_memory/services/context_service.py +451 -138
  163. basic_memory/services/directory_service.py +310 -0
  164. basic_memory/services/entity_service.py +636 -71
  165. basic_memory/services/exceptions.py +21 -0
  166. basic_memory/services/file_service.py +402 -33
  167. basic_memory/services/initialization.py +216 -0
  168. basic_memory/services/link_resolver.py +50 -56
  169. basic_memory/services/project_service.py +888 -0
  170. basic_memory/services/search_service.py +232 -37
  171. basic_memory/sync/__init__.py +4 -2
  172. basic_memory/sync/background_sync.py +26 -0
  173. basic_memory/sync/coordinator.py +160 -0
  174. basic_memory/sync/sync_service.py +1200 -109
  175. basic_memory/sync/watch_service.py +432 -135
  176. basic_memory/telemetry.py +249 -0
  177. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  178. basic_memory/templates/prompts/search.hbs +101 -0
  179. basic_memory/utils.py +407 -54
  180. basic_memory-0.17.4.dist-info/METADATA +617 -0
  181. basic_memory-0.17.4.dist-info/RECORD +193 -0
  182. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  183. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
  184. basic_memory/alembic/README +0 -1
  185. basic_memory/cli/commands/sync.py +0 -206
  186. basic_memory/cli/commands/tools.py +0 -157
  187. basic_memory/mcp/tools/knowledge.py +0 -68
  188. basic_memory/mcp/tools/memory.py +0 -170
  189. basic_memory/mcp/tools/notes.py +0 -202
  190. basic_memory/schemas/discovery.py +0 -28
  191. basic_memory/sync/file_change_scanner.py +0 -158
  192. basic_memory/sync/utils.py +0 -31
  193. basic_memory-0.7.0.dist-info/METADATA +0 -378
  194. basic_memory-0.7.0.dist-info/RECORD +0 -82
  195. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,451 @@
1
+ """PostgreSQL tsvector-based search repository implementation."""
2
+
3
+ import json
4
+ import re
5
+ from datetime import datetime
6
+ from typing import List, Optional
7
+
8
+
9
+ from loguru import logger
10
+ from sqlalchemy import text
11
+
12
+ from basic_memory import db
13
+ from basic_memory.repository.search_index_row import SearchIndexRow
14
+ from basic_memory.repository.search_repository_base import SearchRepositoryBase
15
+ from basic_memory.schemas.search import SearchItemType
16
+
17
+
18
+ class PostgresSearchRepository(SearchRepositoryBase):
19
+ """PostgreSQL tsvector implementation of search repository.
20
+
21
+ Uses PostgreSQL's full-text search capabilities with:
22
+ - tsvector for document representation
23
+ - tsquery for query representation
24
+ - GIN indexes for performance
25
+ - ts_rank() function for relevance scoring
26
+ - JSONB containment operators for metadata search
27
+
28
+ Note: This implementation uses UPSERT patterns (INSERT ... ON CONFLICT) instead of
29
+ delete-then-insert to handle race conditions during parallel entity indexing.
30
+ The partial unique index uix_search_index_permalink_project prevents duplicate
31
+ permalinks per project.
32
+ """
33
+
34
+ async def init_search_index(self):
35
+ """Create Postgres table with tsvector column and GIN indexes.
36
+
37
+ Note: This is handled by Alembic migrations. This method is a no-op
38
+ for Postgres as the schema is created via migrations.
39
+ """
40
+ logger.info("PostgreSQL search index initialization handled by migrations")
41
+ # Table creation is done via Alembic migrations
42
+ # This includes:
43
+ # - CREATE TABLE search_index (...)
44
+ # - ADD COLUMN textsearchable_index_col tsvector GENERATED ALWAYS AS (...)
45
+ # - CREATE INDEX USING GIN on textsearchable_index_col
46
+ # - CREATE INDEX USING GIN on metadata jsonb_path_ops
47
+ pass
48
+
49
+ async def index_item(self, search_index_row: SearchIndexRow) -> None:
50
+ """Index or update a single item using UPSERT.
51
+
52
+ Uses INSERT ... ON CONFLICT to handle race conditions during parallel
53
+ entity indexing. The partial unique index uix_search_index_permalink_project
54
+ on (permalink, project_id) WHERE permalink IS NOT NULL prevents duplicate
55
+ permalinks.
56
+
57
+ For rows with non-null permalinks (entities), conflicts are resolved by
58
+ updating the existing row. For rows with null permalinks, no conflict
59
+ occurs on this index.
60
+ """
61
+ async with db.scoped_session(self.session_maker) as session:
62
+ # Serialize JSON for raw SQL
63
+ insert_data = search_index_row.to_insert(serialize_json=True)
64
+ insert_data["project_id"] = self.project_id
65
+
66
+ # Use upsert to handle race conditions during parallel indexing
67
+ # ON CONFLICT (permalink, project_id) matches the partial unique index
68
+ # uix_search_index_permalink_project WHERE permalink IS NOT NULL
69
+ # For rows with NULL permalinks, no conflict occurs (partial index doesn't apply)
70
+ await session.execute(
71
+ text("""
72
+ INSERT INTO search_index (
73
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
74
+ from_id, to_id, relation_type,
75
+ entity_id, category,
76
+ created_at, updated_at,
77
+ project_id
78
+ ) VALUES (
79
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
80
+ :from_id, :to_id, :relation_type,
81
+ :entity_id, :category,
82
+ :created_at, :updated_at,
83
+ :project_id
84
+ )
85
+ ON CONFLICT (permalink, project_id) WHERE permalink IS NOT NULL DO UPDATE SET
86
+ id = EXCLUDED.id,
87
+ title = EXCLUDED.title,
88
+ content_stems = EXCLUDED.content_stems,
89
+ content_snippet = EXCLUDED.content_snippet,
90
+ file_path = EXCLUDED.file_path,
91
+ type = EXCLUDED.type,
92
+ metadata = EXCLUDED.metadata,
93
+ from_id = EXCLUDED.from_id,
94
+ to_id = EXCLUDED.to_id,
95
+ relation_type = EXCLUDED.relation_type,
96
+ entity_id = EXCLUDED.entity_id,
97
+ category = EXCLUDED.category,
98
+ created_at = EXCLUDED.created_at,
99
+ updated_at = EXCLUDED.updated_at
100
+ """),
101
+ insert_data,
102
+ )
103
+ logger.debug(f"indexed row {search_index_row}")
104
+ await session.commit()
105
+
106
+ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
107
+ """Prepare a search term for tsquery format.
108
+
109
+ Args:
110
+ term: The search term to prepare
111
+ is_prefix: Whether to add prefix search capability (:* operator)
112
+
113
+ Returns:
114
+ Formatted search term for tsquery
115
+
116
+ For Postgres:
117
+ - Boolean operators are converted to tsquery format (&, |, !)
118
+ - Prefix matching uses the :* operator
119
+ - Terms are sanitized to prevent tsquery syntax errors
120
+ """
121
+ # Check for explicit boolean operators
122
+ boolean_operators = [" AND ", " OR ", " NOT "]
123
+ if any(op in f" {term} " for op in boolean_operators):
124
+ return self._prepare_boolean_query(term)
125
+
126
+ # For non-Boolean queries, prepare single term
127
+ return self._prepare_single_term(term, is_prefix)
128
+
129
+ def _prepare_boolean_query(self, query: str) -> str:
130
+ """Convert Boolean query to tsquery format.
131
+
132
+ Args:
133
+ query: A Boolean query like "coffee AND brewing" or "(pour OR french) AND press"
134
+
135
+ Returns:
136
+ tsquery-formatted string with & (AND), | (OR), ! (NOT) operators
137
+
138
+ Examples:
139
+ "coffee AND brewing" -> "coffee & brewing"
140
+ "(pour OR french) AND press" -> "(pour | french) & press"
141
+ "coffee NOT decaf" -> "coffee & !decaf"
142
+ """
143
+ # Replace Boolean operators with tsquery operators
144
+ # Keep parentheses for grouping
145
+ result = query
146
+ result = re.sub(r"\bAND\b", "&", result)
147
+ result = re.sub(r"\bOR\b", "|", result)
148
+ # NOT must be converted to "& !" and the ! must be attached to the following term
149
+ # "Python NOT Django" -> "Python & !Django"
150
+ result = re.sub(r"\bNOT\s+", "& !", result)
151
+
152
+ return result
153
+
154
+ def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
155
+ """Prepare a single search term for tsquery.
156
+
157
+ Args:
158
+ term: A single search term
159
+ is_prefix: Whether to add prefix search capability (:* suffix)
160
+
161
+ Returns:
162
+ A properly formatted single term for tsquery
163
+
164
+ For Postgres tsquery:
165
+ - Multi-word queries become "word1 & word2"
166
+ - Prefix matching uses ":*" suffix (e.g., "coff:*")
167
+ - Special characters that need escaping: & | ! ( ) :
168
+ """
169
+ if not term or not term.strip():
170
+ return term
171
+
172
+ term = term.strip()
173
+
174
+ # Check if term is already a wildcard pattern
175
+ if "*" in term:
176
+ # Replace * with :* for Postgres prefix matching
177
+ return term.replace("*", ":*")
178
+
179
+ # Remove tsquery special characters from the search term
180
+ # These characters have special meaning in tsquery and cause syntax errors
181
+ # if not used as operators
182
+ special_chars = ["&", "|", "!", "(", ")", ":"]
183
+ cleaned_term = term
184
+ for char in special_chars:
185
+ cleaned_term = cleaned_term.replace(char, " ")
186
+
187
+ # Handle multi-word queries
188
+ if " " in cleaned_term:
189
+ words = [w for w in cleaned_term.split() if w.strip()]
190
+ if not words:
191
+ # All characters were special chars, search won't match anything
192
+ # Return a safe search term that won't cause syntax errors
193
+ return "NOSPECIALCHARS:*"
194
+ if is_prefix:
195
+ # Add prefix matching to each word
196
+ prepared_words = [f"{word}:*" for word in words]
197
+ else:
198
+ prepared_words = words
199
+ # Join with AND operator
200
+ return " & ".join(prepared_words)
201
+
202
+ # Single word
203
+ cleaned_term = cleaned_term.strip()
204
+ if is_prefix:
205
+ return f"{cleaned_term}:*"
206
+ else:
207
+ return cleaned_term
208
+
209
+ async def search(
210
+ self,
211
+ search_text: Optional[str] = None,
212
+ permalink: Optional[str] = None,
213
+ permalink_match: Optional[str] = None,
214
+ title: Optional[str] = None,
215
+ types: Optional[List[str]] = None,
216
+ after_date: Optional[datetime] = None,
217
+ search_item_types: Optional[List[SearchItemType]] = None,
218
+ limit: int = 10,
219
+ offset: int = 0,
220
+ ) -> List[SearchIndexRow]:
221
+ """Search across all indexed content using PostgreSQL tsvector."""
222
+ conditions = []
223
+ params = {}
224
+ order_by_clause = ""
225
+
226
+ # Handle text search for title and content using tsvector
227
+ if search_text:
228
+ if search_text.strip() == "*" or search_text.strip() == "":
229
+ # For wildcard searches, don't add any text conditions
230
+ pass
231
+ else:
232
+ # Prepare search term for tsquery
233
+ processed_text = self._prepare_search_term(search_text.strip())
234
+ params["text"] = processed_text
235
+ # Use @@ operator for tsvector matching
236
+ conditions.append("textsearchable_index_col @@ to_tsquery('english', :text)")
237
+
238
+ # Handle title search
239
+ if title:
240
+ title_text = self._prepare_search_term(title.strip(), is_prefix=False)
241
+ params["title_text"] = title_text
242
+ conditions.append("to_tsvector('english', title) @@ to_tsquery('english', :title_text)")
243
+
244
+ # Handle permalink exact search
245
+ if permalink:
246
+ params["permalink"] = permalink
247
+ conditions.append("permalink = :permalink")
248
+
249
+ # Handle permalink pattern match
250
+ if permalink_match:
251
+ permalink_text = permalink_match.lower().strip()
252
+ params["permalink"] = permalink_text
253
+ if "*" in permalink_match:
254
+ # Use LIKE for pattern matching in Postgres
255
+ # Convert * to % for SQL LIKE
256
+ permalink_pattern = permalink_text.replace("*", "%")
257
+ params["permalink"] = permalink_pattern
258
+ conditions.append("permalink LIKE :permalink")
259
+ else:
260
+ conditions.append("permalink = :permalink")
261
+
262
+ # Handle search item type filter
263
+ if search_item_types:
264
+ type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
265
+ conditions.append(f"type IN ({type_list})")
266
+
267
+ # Handle entity type filter using JSONB containment
268
+ if types:
269
+ # Use JSONB @> operator for efficient containment queries
270
+ type_conditions = []
271
+ for entity_type in types:
272
+ # Create JSONB containment condition for each type
273
+ type_conditions.append(f'metadata @> \'{{"entity_type": "{entity_type}"}}\'')
274
+ conditions.append(f"({' OR '.join(type_conditions)})")
275
+
276
+ # Handle date filter
277
+ if after_date:
278
+ params["after_date"] = after_date
279
+ conditions.append("created_at > :after_date")
280
+ # order by most recent first
281
+ order_by_clause = ", updated_at DESC"
282
+
283
+ # Always filter by project_id
284
+ params["project_id"] = self.project_id
285
+ conditions.append("project_id = :project_id")
286
+
287
+ # set limit and offset
288
+ params["limit"] = limit
289
+ params["offset"] = offset
290
+
291
+ # Build WHERE clause
292
+ where_clause = " AND ".join(conditions) if conditions else "1=1"
293
+
294
+ # Build SQL with ts_rank() for scoring
295
+ # Note: If no text search, score will be NULL, so we use COALESCE to default to 0
296
+ if search_text and search_text.strip() and search_text.strip() != "*":
297
+ score_expr = "ts_rank(textsearchable_index_col, to_tsquery('english', :text))"
298
+ else:
299
+ score_expr = "0"
300
+
301
+ sql = f"""
302
+ SELECT
303
+ project_id,
304
+ id,
305
+ title,
306
+ permalink,
307
+ file_path,
308
+ type,
309
+ metadata,
310
+ from_id,
311
+ to_id,
312
+ relation_type,
313
+ entity_id,
314
+ content_snippet,
315
+ category,
316
+ created_at,
317
+ updated_at,
318
+ {score_expr} as score
319
+ FROM search_index
320
+ WHERE {where_clause}
321
+ ORDER BY score DESC, id ASC {order_by_clause}
322
+ LIMIT :limit
323
+ OFFSET :offset
324
+ """
325
+
326
+ logger.trace(f"Search {sql} params: {params}")
327
+ try:
328
+ async with db.scoped_session(self.session_maker) as session:
329
+ result = await session.execute(text(sql), params)
330
+ rows = result.fetchall()
331
+ except Exception as e:
332
+ # Handle tsquery syntax errors (and only those).
333
+ #
334
+ # Important: Postgres errors for other failures (e.g. missing table) will still mention
335
+ # `to_tsquery(...)` in the SQL text, so checking for the substring "tsquery" is too broad.
336
+ msg = str(e).lower()
337
+ if (
338
+ "syntax error in tsquery" in msg
339
+ or "invalid input syntax for type tsquery" in msg
340
+ or "no operand in tsquery" in msg
341
+ or "no operator in tsquery" in msg
342
+ ):
343
+ logger.warning(f"tsquery syntax error for search term: {search_text}, error: {e}")
344
+ return []
345
+
346
+ # Re-raise other database errors
347
+ logger.error(f"Database error during search: {e}")
348
+ raise
349
+
350
+ results = [
351
+ SearchIndexRow(
352
+ project_id=self.project_id,
353
+ id=row.id,
354
+ title=row.title,
355
+ permalink=row.permalink,
356
+ file_path=row.file_path,
357
+ type=row.type,
358
+ score=float(row.score) if row.score else 0.0,
359
+ metadata=(
360
+ row.metadata
361
+ if isinstance(row.metadata, dict)
362
+ else (json.loads(row.metadata) if row.metadata else {})
363
+ ),
364
+ from_id=row.from_id,
365
+ to_id=row.to_id,
366
+ relation_type=row.relation_type,
367
+ entity_id=row.entity_id,
368
+ content_snippet=row.content_snippet,
369
+ category=row.category,
370
+ created_at=row.created_at,
371
+ updated_at=row.updated_at,
372
+ )
373
+ for row in rows
374
+ ]
375
+
376
+ logger.trace(f"Found {len(results)} search results")
377
+ for r in results:
378
+ logger.trace(
379
+ f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
380
+ )
381
+
382
+ return results
383
+
384
+ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
385
+ """Index multiple items in a single batch operation using UPSERT.
386
+
387
+ Uses INSERT ... ON CONFLICT to handle race conditions during parallel
388
+ entity indexing. The partial unique index uix_search_index_permalink_project
389
+ on (permalink, project_id) WHERE permalink IS NOT NULL prevents duplicate
390
+ permalinks.
391
+
392
+ For rows with non-null permalinks (entities), conflicts are resolved by
393
+ updating the existing row. For rows with null permalinks (observations,
394
+ relations), the partial index doesn't apply and they are inserted directly.
395
+
396
+ Args:
397
+ search_index_rows: List of SearchIndexRow objects to index
398
+ """
399
+
400
+ if not search_index_rows:
401
+ return
402
+
403
+ async with db.scoped_session(self.session_maker) as session:
404
+ # When using text() raw SQL, always serialize JSON to string
405
+ # Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
406
+ # The database driver/column type will handle conversion
407
+ insert_data_list = []
408
+ for row in search_index_rows:
409
+ insert_data = row.to_insert(serialize_json=True)
410
+ insert_data["project_id"] = self.project_id
411
+ insert_data_list.append(insert_data)
412
+
413
+ # Use upsert to handle race conditions during parallel indexing
414
+ # ON CONFLICT (permalink, project_id) matches the partial unique index
415
+ # uix_search_index_permalink_project WHERE permalink IS NOT NULL
416
+ # For rows with NULL permalinks (observations, relations), no conflict occurs
417
+ await session.execute(
418
+ text("""
419
+ INSERT INTO search_index (
420
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
421
+ from_id, to_id, relation_type,
422
+ entity_id, category,
423
+ created_at, updated_at,
424
+ project_id
425
+ ) VALUES (
426
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
427
+ :from_id, :to_id, :relation_type,
428
+ :entity_id, :category,
429
+ :created_at, :updated_at,
430
+ :project_id
431
+ )
432
+ ON CONFLICT (permalink, project_id) WHERE permalink IS NOT NULL DO UPDATE SET
433
+ id = EXCLUDED.id,
434
+ title = EXCLUDED.title,
435
+ content_stems = EXCLUDED.content_stems,
436
+ content_snippet = EXCLUDED.content_snippet,
437
+ file_path = EXCLUDED.file_path,
438
+ type = EXCLUDED.type,
439
+ metadata = EXCLUDED.metadata,
440
+ from_id = EXCLUDED.from_id,
441
+ to_id = EXCLUDED.to_id,
442
+ relation_type = EXCLUDED.relation_type,
443
+ entity_id = EXCLUDED.entity_id,
444
+ category = EXCLUDED.category,
445
+ created_at = EXCLUDED.created_at,
446
+ updated_at = EXCLUDED.updated_at
447
+ """),
448
+ insert_data_list,
449
+ )
450
+ logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
451
+ await session.commit()
@@ -0,0 +1,10 @@
1
+ from basic_memory.repository.repository import Repository
2
+ from basic_memory.models.project import Project
3
+
4
+
5
+ class ProjectInfoRepository(Repository):
6
+ """Repository for statistics queries."""
7
+
8
+ def __init__(self, session_maker):
9
+ # Initialize with Project model as a reference
10
+ super().__init__(session_maker, Project)
@@ -0,0 +1,140 @@
1
+ """Repository for managing projects in Basic Memory."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional, Sequence, Union
5
+
6
+
7
+ from sqlalchemy import text
8
+ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
9
+
10
+ from basic_memory import db
11
+ from basic_memory.models.project import Project
12
+ from basic_memory.repository.repository import Repository
13
+
14
+
15
+ class ProjectRepository(Repository[Project]):
16
+ """Repository for Project model.
17
+
18
+ Projects represent collections of knowledge entities grouped together.
19
+ Each entity, observation, and relation belongs to a specific project.
20
+ """
21
+
22
+ def __init__(self, session_maker: async_sessionmaker[AsyncSession]):
23
+ """Initialize with session maker."""
24
+ super().__init__(session_maker, Project)
25
+
26
+ async def get_by_name(self, name: str) -> Optional[Project]:
27
+ """Get project by name (exact match).
28
+
29
+ Args:
30
+ name: Unique name of the project
31
+ """
32
+ query = self.select().where(Project.name == name)
33
+ return await self.find_one(query)
34
+
35
+ async def get_by_name_case_insensitive(self, name: str) -> Optional[Project]:
36
+ """Get project by name (case-insensitive match).
37
+
38
+ Args:
39
+ name: Project name (case-insensitive)
40
+
41
+ Returns:
42
+ Project if found, None otherwise
43
+ """
44
+ query = self.select().where(Project.name.ilike(name))
45
+ return await self.find_one(query)
46
+
47
+ async def get_by_permalink(self, permalink: str) -> Optional[Project]:
48
+ """Get project by permalink.
49
+
50
+ Args:
51
+ permalink: URL-friendly identifier for the project
52
+ """
53
+ query = self.select().where(Project.permalink == permalink)
54
+ return await self.find_one(query)
55
+
56
+ async def get_by_path(self, path: Union[Path, str]) -> Optional[Project]:
57
+ """Get project by filesystem path.
58
+
59
+ Args:
60
+ path: Path to the project directory (will be converted to string internally)
61
+ """
62
+ query = self.select().where(Project.path == Path(path).as_posix())
63
+ return await self.find_one(query)
64
+
65
+ async def get_by_id(self, project_id: int) -> Optional[Project]:
66
+ """Get project by numeric ID.
67
+
68
+ Args:
69
+ project_id: Numeric project ID
70
+
71
+ Returns:
72
+ Project if found, None otherwise
73
+ """
74
+ async with db.scoped_session(self.session_maker) as session:
75
+ return await self.select_by_id(session, project_id)
76
+
77
+ async def get_by_external_id(self, external_id: str) -> Optional[Project]:
78
+ """Get project by external UUID.
79
+
80
+ Args:
81
+ external_id: External UUID identifier
82
+
83
+ Returns:
84
+ Project if found, None otherwise
85
+ """
86
+ query = self.select().where(Project.external_id == external_id)
87
+ return await self.find_one(query)
88
+
89
+ async def get_default_project(self) -> Optional[Project]:
90
+ """Get the default project (the one marked as is_default=True)."""
91
+ query = self.select().where(Project.is_default.is_not(None))
92
+ return await self.find_one(query)
93
+
94
+ async def get_active_projects(self) -> Sequence[Project]:
95
+ """Get all active projects."""
96
+ query = self.select().where(Project.is_active == True) # noqa: E712
97
+ result = await self.execute_query(query)
98
+ return list(result.scalars().all())
99
+
100
+ async def set_as_default(self, project_id: int) -> Optional[Project]:
101
+ """Set a project as the default and unset previous default.
102
+
103
+ Args:
104
+ project_id: ID of the project to set as default
105
+
106
+ Returns:
107
+ The updated project if found, None otherwise
108
+ """
109
+ async with db.scoped_session(self.session_maker) as session:
110
+ # First, clear the default flag for all projects using direct SQL
111
+ await session.execute(
112
+ text("UPDATE project SET is_default = NULL WHERE is_default IS NOT NULL")
113
+ )
114
+ await session.flush()
115
+
116
+ # Set the new default project
117
+ target_project = await self.select_by_id(session, project_id)
118
+ if target_project:
119
+ target_project.is_default = True
120
+ await session.flush()
121
+ return target_project
122
+ return None # pragma: no cover
123
+
124
+ async def update_path(self, project_id: int, new_path: str) -> Optional[Project]:
125
+ """Update project path.
126
+
127
+ Args:
128
+ project_id: ID of the project to update
129
+ new_path: New filesystem path for the project
130
+
131
+ Returns:
132
+ The updated project if found, None otherwise
133
+ """
134
+ async with db.scoped_session(self.session_maker) as session:
135
+ project = await self.select_by_id(session, project_id)
136
+ if project:
137
+ project.path = new_path
138
+ await session.flush()
139
+ return project
140
+ return None