basic-memory 0.16.1__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (143) hide show
  1. basic_memory/__init__.py +1 -1
  2. basic_memory/alembic/env.py +112 -26
  3. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  4. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +15 -3
  5. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +44 -36
  6. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  7. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  8. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +13 -0
  9. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  10. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  11. basic_memory/api/app.py +45 -24
  12. basic_memory/api/container.py +133 -0
  13. basic_memory/api/routers/knowledge_router.py +17 -5
  14. basic_memory/api/routers/project_router.py +68 -14
  15. basic_memory/api/routers/resource_router.py +37 -27
  16. basic_memory/api/routers/utils.py +53 -14
  17. basic_memory/api/v2/__init__.py +35 -0
  18. basic_memory/api/v2/routers/__init__.py +21 -0
  19. basic_memory/api/v2/routers/directory_router.py +93 -0
  20. basic_memory/api/v2/routers/importer_router.py +181 -0
  21. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  22. basic_memory/api/v2/routers/memory_router.py +130 -0
  23. basic_memory/api/v2/routers/project_router.py +359 -0
  24. basic_memory/api/v2/routers/prompt_router.py +269 -0
  25. basic_memory/api/v2/routers/resource_router.py +286 -0
  26. basic_memory/api/v2/routers/search_router.py +73 -0
  27. basic_memory/cli/app.py +43 -7
  28. basic_memory/cli/auth.py +27 -4
  29. basic_memory/cli/commands/__init__.py +3 -1
  30. basic_memory/cli/commands/cloud/api_client.py +20 -5
  31. basic_memory/cli/commands/cloud/cloud_utils.py +13 -6
  32. basic_memory/cli/commands/cloud/rclone_commands.py +110 -14
  33. basic_memory/cli/commands/cloud/rclone_installer.py +18 -4
  34. basic_memory/cli/commands/cloud/upload.py +10 -3
  35. basic_memory/cli/commands/command_utils.py +52 -4
  36. basic_memory/cli/commands/db.py +78 -19
  37. basic_memory/cli/commands/format.py +198 -0
  38. basic_memory/cli/commands/import_chatgpt.py +12 -8
  39. basic_memory/cli/commands/import_claude_conversations.py +12 -8
  40. basic_memory/cli/commands/import_claude_projects.py +12 -8
  41. basic_memory/cli/commands/import_memory_json.py +12 -8
  42. basic_memory/cli/commands/mcp.py +8 -26
  43. basic_memory/cli/commands/project.py +22 -9
  44. basic_memory/cli/commands/status.py +3 -2
  45. basic_memory/cli/commands/telemetry.py +81 -0
  46. basic_memory/cli/container.py +84 -0
  47. basic_memory/cli/main.py +7 -0
  48. basic_memory/config.py +177 -77
  49. basic_memory/db.py +183 -77
  50. basic_memory/deps/__init__.py +293 -0
  51. basic_memory/deps/config.py +26 -0
  52. basic_memory/deps/db.py +56 -0
  53. basic_memory/deps/importers.py +200 -0
  54. basic_memory/deps/projects.py +238 -0
  55. basic_memory/deps/repositories.py +179 -0
  56. basic_memory/deps/services.py +480 -0
  57. basic_memory/deps.py +14 -409
  58. basic_memory/file_utils.py +212 -3
  59. basic_memory/ignore_utils.py +5 -5
  60. basic_memory/importers/base.py +40 -19
  61. basic_memory/importers/chatgpt_importer.py +17 -4
  62. basic_memory/importers/claude_conversations_importer.py +27 -12
  63. basic_memory/importers/claude_projects_importer.py +50 -14
  64. basic_memory/importers/memory_json_importer.py +36 -16
  65. basic_memory/importers/utils.py +5 -2
  66. basic_memory/markdown/entity_parser.py +62 -23
  67. basic_memory/markdown/markdown_processor.py +67 -4
  68. basic_memory/markdown/plugins.py +4 -2
  69. basic_memory/markdown/utils.py +10 -1
  70. basic_memory/mcp/async_client.py +1 -0
  71. basic_memory/mcp/clients/__init__.py +28 -0
  72. basic_memory/mcp/clients/directory.py +70 -0
  73. basic_memory/mcp/clients/knowledge.py +176 -0
  74. basic_memory/mcp/clients/memory.py +120 -0
  75. basic_memory/mcp/clients/project.py +89 -0
  76. basic_memory/mcp/clients/resource.py +71 -0
  77. basic_memory/mcp/clients/search.py +65 -0
  78. basic_memory/mcp/container.py +110 -0
  79. basic_memory/mcp/project_context.py +47 -33
  80. basic_memory/mcp/prompts/ai_assistant_guide.py +2 -2
  81. basic_memory/mcp/prompts/recent_activity.py +2 -2
  82. basic_memory/mcp/prompts/utils.py +3 -3
  83. basic_memory/mcp/server.py +58 -0
  84. basic_memory/mcp/tools/build_context.py +14 -14
  85. basic_memory/mcp/tools/canvas.py +34 -12
  86. basic_memory/mcp/tools/chatgpt_tools.py +4 -1
  87. basic_memory/mcp/tools/delete_note.py +31 -7
  88. basic_memory/mcp/tools/edit_note.py +14 -9
  89. basic_memory/mcp/tools/list_directory.py +7 -17
  90. basic_memory/mcp/tools/move_note.py +35 -31
  91. basic_memory/mcp/tools/project_management.py +29 -25
  92. basic_memory/mcp/tools/read_content.py +13 -3
  93. basic_memory/mcp/tools/read_note.py +24 -14
  94. basic_memory/mcp/tools/recent_activity.py +32 -38
  95. basic_memory/mcp/tools/search.py +17 -10
  96. basic_memory/mcp/tools/utils.py +28 -0
  97. basic_memory/mcp/tools/view_note.py +2 -1
  98. basic_memory/mcp/tools/write_note.py +37 -14
  99. basic_memory/models/knowledge.py +15 -2
  100. basic_memory/models/project.py +7 -1
  101. basic_memory/models/search.py +58 -2
  102. basic_memory/project_resolver.py +222 -0
  103. basic_memory/repository/entity_repository.py +210 -3
  104. basic_memory/repository/observation_repository.py +1 -0
  105. basic_memory/repository/postgres_search_repository.py +451 -0
  106. basic_memory/repository/project_repository.py +38 -1
  107. basic_memory/repository/relation_repository.py +58 -2
  108. basic_memory/repository/repository.py +1 -0
  109. basic_memory/repository/search_index_row.py +95 -0
  110. basic_memory/repository/search_repository.py +77 -615
  111. basic_memory/repository/search_repository_base.py +241 -0
  112. basic_memory/repository/sqlite_search_repository.py +437 -0
  113. basic_memory/runtime.py +61 -0
  114. basic_memory/schemas/base.py +36 -6
  115. basic_memory/schemas/directory.py +2 -1
  116. basic_memory/schemas/memory.py +9 -2
  117. basic_memory/schemas/project_info.py +2 -0
  118. basic_memory/schemas/response.py +84 -27
  119. basic_memory/schemas/search.py +5 -0
  120. basic_memory/schemas/sync_report.py +1 -1
  121. basic_memory/schemas/v2/__init__.py +27 -0
  122. basic_memory/schemas/v2/entity.py +133 -0
  123. basic_memory/schemas/v2/resource.py +47 -0
  124. basic_memory/services/context_service.py +219 -43
  125. basic_memory/services/directory_service.py +26 -11
  126. basic_memory/services/entity_service.py +68 -33
  127. basic_memory/services/file_service.py +131 -16
  128. basic_memory/services/initialization.py +51 -26
  129. basic_memory/services/link_resolver.py +1 -0
  130. basic_memory/services/project_service.py +68 -43
  131. basic_memory/services/search_service.py +75 -16
  132. basic_memory/sync/__init__.py +2 -1
  133. basic_memory/sync/coordinator.py +160 -0
  134. basic_memory/sync/sync_service.py +135 -115
  135. basic_memory/sync/watch_service.py +32 -12
  136. basic_memory/telemetry.py +249 -0
  137. basic_memory/utils.py +96 -75
  138. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/METADATA +129 -5
  139. basic_memory-0.17.4.dist-info/RECORD +193 -0
  140. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  141. basic_memory-0.16.1.dist-info/RECORD +0 -148
  142. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +0 -0
  143. {basic_memory-0.16.1.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,451 @@
1
+ """PostgreSQL tsvector-based search repository implementation."""
2
+
3
+ import json
4
+ import re
5
+ from datetime import datetime
6
+ from typing import List, Optional
7
+
8
+
9
+ from loguru import logger
10
+ from sqlalchemy import text
11
+
12
+ from basic_memory import db
13
+ from basic_memory.repository.search_index_row import SearchIndexRow
14
+ from basic_memory.repository.search_repository_base import SearchRepositoryBase
15
+ from basic_memory.schemas.search import SearchItemType
16
+
17
+
18
+ class PostgresSearchRepository(SearchRepositoryBase):
19
+ """PostgreSQL tsvector implementation of search repository.
20
+
21
+ Uses PostgreSQL's full-text search capabilities with:
22
+ - tsvector for document representation
23
+ - tsquery for query representation
24
+ - GIN indexes for performance
25
+ - ts_rank() function for relevance scoring
26
+ - JSONB containment operators for metadata search
27
+
28
+ Note: This implementation uses UPSERT patterns (INSERT ... ON CONFLICT) instead of
29
+ delete-then-insert to handle race conditions during parallel entity indexing.
30
+ The partial unique index uix_search_index_permalink_project prevents duplicate
31
+ permalinks per project.
32
+ """
33
+
34
+ async def init_search_index(self):
35
+ """Create Postgres table with tsvector column and GIN indexes.
36
+
37
+ Note: This is handled by Alembic migrations. This method is a no-op
38
+ for Postgres as the schema is created via migrations.
39
+ """
40
+ logger.info("PostgreSQL search index initialization handled by migrations")
41
+ # Table creation is done via Alembic migrations
42
+ # This includes:
43
+ # - CREATE TABLE search_index (...)
44
+ # - ADD COLUMN textsearchable_index_col tsvector GENERATED ALWAYS AS (...)
45
+ # - CREATE INDEX USING GIN on textsearchable_index_col
46
+ # - CREATE INDEX USING GIN on metadata jsonb_path_ops
47
+ pass
48
+
49
+ async def index_item(self, search_index_row: SearchIndexRow) -> None:
50
+ """Index or update a single item using UPSERT.
51
+
52
+ Uses INSERT ... ON CONFLICT to handle race conditions during parallel
53
+ entity indexing. The partial unique index uix_search_index_permalink_project
54
+ on (permalink, project_id) WHERE permalink IS NOT NULL prevents duplicate
55
+ permalinks.
56
+
57
+ For rows with non-null permalinks (entities), conflicts are resolved by
58
+ updating the existing row. For rows with null permalinks, no conflict
59
+ occurs on this index.
60
+ """
61
+ async with db.scoped_session(self.session_maker) as session:
62
+ # Serialize JSON for raw SQL
63
+ insert_data = search_index_row.to_insert(serialize_json=True)
64
+ insert_data["project_id"] = self.project_id
65
+
66
+ # Use upsert to handle race conditions during parallel indexing
67
+ # ON CONFLICT (permalink, project_id) matches the partial unique index
68
+ # uix_search_index_permalink_project WHERE permalink IS NOT NULL
69
+ # For rows with NULL permalinks, no conflict occurs (partial index doesn't apply)
70
+ await session.execute(
71
+ text("""
72
+ INSERT INTO search_index (
73
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
74
+ from_id, to_id, relation_type,
75
+ entity_id, category,
76
+ created_at, updated_at,
77
+ project_id
78
+ ) VALUES (
79
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
80
+ :from_id, :to_id, :relation_type,
81
+ :entity_id, :category,
82
+ :created_at, :updated_at,
83
+ :project_id
84
+ )
85
+ ON CONFLICT (permalink, project_id) WHERE permalink IS NOT NULL DO UPDATE SET
86
+ id = EXCLUDED.id,
87
+ title = EXCLUDED.title,
88
+ content_stems = EXCLUDED.content_stems,
89
+ content_snippet = EXCLUDED.content_snippet,
90
+ file_path = EXCLUDED.file_path,
91
+ type = EXCLUDED.type,
92
+ metadata = EXCLUDED.metadata,
93
+ from_id = EXCLUDED.from_id,
94
+ to_id = EXCLUDED.to_id,
95
+ relation_type = EXCLUDED.relation_type,
96
+ entity_id = EXCLUDED.entity_id,
97
+ category = EXCLUDED.category,
98
+ created_at = EXCLUDED.created_at,
99
+ updated_at = EXCLUDED.updated_at
100
+ """),
101
+ insert_data,
102
+ )
103
+ logger.debug(f"indexed row {search_index_row}")
104
+ await session.commit()
105
+
106
+ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
107
+ """Prepare a search term for tsquery format.
108
+
109
+ Args:
110
+ term: The search term to prepare
111
+ is_prefix: Whether to add prefix search capability (:* operator)
112
+
113
+ Returns:
114
+ Formatted search term for tsquery
115
+
116
+ For Postgres:
117
+ - Boolean operators are converted to tsquery format (&, |, !)
118
+ - Prefix matching uses the :* operator
119
+ - Terms are sanitized to prevent tsquery syntax errors
120
+ """
121
+ # Check for explicit boolean operators
122
+ boolean_operators = [" AND ", " OR ", " NOT "]
123
+ if any(op in f" {term} " for op in boolean_operators):
124
+ return self._prepare_boolean_query(term)
125
+
126
+ # For non-Boolean queries, prepare single term
127
+ return self._prepare_single_term(term, is_prefix)
128
+
129
+ def _prepare_boolean_query(self, query: str) -> str:
130
+ """Convert Boolean query to tsquery format.
131
+
132
+ Args:
133
+ query: A Boolean query like "coffee AND brewing" or "(pour OR french) AND press"
134
+
135
+ Returns:
136
+ tsquery-formatted string with & (AND), | (OR), ! (NOT) operators
137
+
138
+ Examples:
139
+ "coffee AND brewing" -> "coffee & brewing"
140
+ "(pour OR french) AND press" -> "(pour | french) & press"
141
+ "coffee NOT decaf" -> "coffee & !decaf"
142
+ """
143
+ # Replace Boolean operators with tsquery operators
144
+ # Keep parentheses for grouping
145
+ result = query
146
+ result = re.sub(r"\bAND\b", "&", result)
147
+ result = re.sub(r"\bOR\b", "|", result)
148
+ # NOT must be converted to "& !" and the ! must be attached to the following term
149
+ # "Python NOT Django" -> "Python & !Django"
150
+ result = re.sub(r"\bNOT\s+", "& !", result)
151
+
152
+ return result
153
+
154
+ def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
155
+ """Prepare a single search term for tsquery.
156
+
157
+ Args:
158
+ term: A single search term
159
+ is_prefix: Whether to add prefix search capability (:* suffix)
160
+
161
+ Returns:
162
+ A properly formatted single term for tsquery
163
+
164
+ For Postgres tsquery:
165
+ - Multi-word queries become "word1 & word2"
166
+ - Prefix matching uses ":*" suffix (e.g., "coff:*")
167
+ - Special characters that need escaping: & | ! ( ) :
168
+ """
169
+ if not term or not term.strip():
170
+ return term
171
+
172
+ term = term.strip()
173
+
174
+ # Check if term is already a wildcard pattern
175
+ if "*" in term:
176
+ # Replace * with :* for Postgres prefix matching
177
+ return term.replace("*", ":*")
178
+
179
+ # Remove tsquery special characters from the search term
180
+ # These characters have special meaning in tsquery and cause syntax errors
181
+ # if not used as operators
182
+ special_chars = ["&", "|", "!", "(", ")", ":"]
183
+ cleaned_term = term
184
+ for char in special_chars:
185
+ cleaned_term = cleaned_term.replace(char, " ")
186
+
187
+ # Handle multi-word queries
188
+ if " " in cleaned_term:
189
+ words = [w for w in cleaned_term.split() if w.strip()]
190
+ if not words:
191
+ # All characters were special chars, search won't match anything
192
+ # Return a safe search term that won't cause syntax errors
193
+ return "NOSPECIALCHARS:*"
194
+ if is_prefix:
195
+ # Add prefix matching to each word
196
+ prepared_words = [f"{word}:*" for word in words]
197
+ else:
198
+ prepared_words = words
199
+ # Join with AND operator
200
+ return " & ".join(prepared_words)
201
+
202
+ # Single word
203
+ cleaned_term = cleaned_term.strip()
204
+ if is_prefix:
205
+ return f"{cleaned_term}:*"
206
+ else:
207
+ return cleaned_term
208
+
209
+ async def search(
210
+ self,
211
+ search_text: Optional[str] = None,
212
+ permalink: Optional[str] = None,
213
+ permalink_match: Optional[str] = None,
214
+ title: Optional[str] = None,
215
+ types: Optional[List[str]] = None,
216
+ after_date: Optional[datetime] = None,
217
+ search_item_types: Optional[List[SearchItemType]] = None,
218
+ limit: int = 10,
219
+ offset: int = 0,
220
+ ) -> List[SearchIndexRow]:
221
+ """Search across all indexed content using PostgreSQL tsvector."""
222
+ conditions = []
223
+ params = {}
224
+ order_by_clause = ""
225
+
226
+ # Handle text search for title and content using tsvector
227
+ if search_text:
228
+ if search_text.strip() == "*" or search_text.strip() == "":
229
+ # For wildcard searches, don't add any text conditions
230
+ pass
231
+ else:
232
+ # Prepare search term for tsquery
233
+ processed_text = self._prepare_search_term(search_text.strip())
234
+ params["text"] = processed_text
235
+ # Use @@ operator for tsvector matching
236
+ conditions.append("textsearchable_index_col @@ to_tsquery('english', :text)")
237
+
238
+ # Handle title search
239
+ if title:
240
+ title_text = self._prepare_search_term(title.strip(), is_prefix=False)
241
+ params["title_text"] = title_text
242
+ conditions.append("to_tsvector('english', title) @@ to_tsquery('english', :title_text)")
243
+
244
+ # Handle permalink exact search
245
+ if permalink:
246
+ params["permalink"] = permalink
247
+ conditions.append("permalink = :permalink")
248
+
249
+ # Handle permalink pattern match
250
+ if permalink_match:
251
+ permalink_text = permalink_match.lower().strip()
252
+ params["permalink"] = permalink_text
253
+ if "*" in permalink_match:
254
+ # Use LIKE for pattern matching in Postgres
255
+ # Convert * to % for SQL LIKE
256
+ permalink_pattern = permalink_text.replace("*", "%")
257
+ params["permalink"] = permalink_pattern
258
+ conditions.append("permalink LIKE :permalink")
259
+ else:
260
+ conditions.append("permalink = :permalink")
261
+
262
+ # Handle search item type filter
263
+ if search_item_types:
264
+ type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
265
+ conditions.append(f"type IN ({type_list})")
266
+
267
+ # Handle entity type filter using JSONB containment
268
+ if types:
269
+ # Use JSONB @> operator for efficient containment queries
270
+ type_conditions = []
271
+ for entity_type in types:
272
+ # Create JSONB containment condition for each type
273
+ type_conditions.append(f'metadata @> \'{{"entity_type": "{entity_type}"}}\'')
274
+ conditions.append(f"({' OR '.join(type_conditions)})")
275
+
276
+ # Handle date filter
277
+ if after_date:
278
+ params["after_date"] = after_date
279
+ conditions.append("created_at > :after_date")
280
+ # order by most recent first
281
+ order_by_clause = ", updated_at DESC"
282
+
283
+ # Always filter by project_id
284
+ params["project_id"] = self.project_id
285
+ conditions.append("project_id = :project_id")
286
+
287
+ # set limit and offset
288
+ params["limit"] = limit
289
+ params["offset"] = offset
290
+
291
+ # Build WHERE clause
292
+ where_clause = " AND ".join(conditions) if conditions else "1=1"
293
+
294
+ # Build SQL with ts_rank() for scoring
295
+ # Note: If no text search, score will be NULL, so we use COALESCE to default to 0
296
+ if search_text and search_text.strip() and search_text.strip() != "*":
297
+ score_expr = "ts_rank(textsearchable_index_col, to_tsquery('english', :text))"
298
+ else:
299
+ score_expr = "0"
300
+
301
+ sql = f"""
302
+ SELECT
303
+ project_id,
304
+ id,
305
+ title,
306
+ permalink,
307
+ file_path,
308
+ type,
309
+ metadata,
310
+ from_id,
311
+ to_id,
312
+ relation_type,
313
+ entity_id,
314
+ content_snippet,
315
+ category,
316
+ created_at,
317
+ updated_at,
318
+ {score_expr} as score
319
+ FROM search_index
320
+ WHERE {where_clause}
321
+ ORDER BY score DESC, id ASC {order_by_clause}
322
+ LIMIT :limit
323
+ OFFSET :offset
324
+ """
325
+
326
+ logger.trace(f"Search {sql} params: {params}")
327
+ try:
328
+ async with db.scoped_session(self.session_maker) as session:
329
+ result = await session.execute(text(sql), params)
330
+ rows = result.fetchall()
331
+ except Exception as e:
332
+ # Handle tsquery syntax errors (and only those).
333
+ #
334
+ # Important: Postgres errors for other failures (e.g. missing table) will still mention
335
+ # `to_tsquery(...)` in the SQL text, so checking for the substring "tsquery" is too broad.
336
+ msg = str(e).lower()
337
+ if (
338
+ "syntax error in tsquery" in msg
339
+ or "invalid input syntax for type tsquery" in msg
340
+ or "no operand in tsquery" in msg
341
+ or "no operator in tsquery" in msg
342
+ ):
343
+ logger.warning(f"tsquery syntax error for search term: {search_text}, error: {e}")
344
+ return []
345
+
346
+ # Re-raise other database errors
347
+ logger.error(f"Database error during search: {e}")
348
+ raise
349
+
350
+ results = [
351
+ SearchIndexRow(
352
+ project_id=self.project_id,
353
+ id=row.id,
354
+ title=row.title,
355
+ permalink=row.permalink,
356
+ file_path=row.file_path,
357
+ type=row.type,
358
+ score=float(row.score) if row.score else 0.0,
359
+ metadata=(
360
+ row.metadata
361
+ if isinstance(row.metadata, dict)
362
+ else (json.loads(row.metadata) if row.metadata else {})
363
+ ),
364
+ from_id=row.from_id,
365
+ to_id=row.to_id,
366
+ relation_type=row.relation_type,
367
+ entity_id=row.entity_id,
368
+ content_snippet=row.content_snippet,
369
+ category=row.category,
370
+ created_at=row.created_at,
371
+ updated_at=row.updated_at,
372
+ )
373
+ for row in rows
374
+ ]
375
+
376
+ logger.trace(f"Found {len(results)} search results")
377
+ for r in results:
378
+ logger.trace(
379
+ f"Search result: project_id: {r.project_id} type:{r.type} title: {r.title} permalink: {r.permalink} score: {r.score}"
380
+ )
381
+
382
+ return results
383
+
384
+ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> None:
385
+ """Index multiple items in a single batch operation using UPSERT.
386
+
387
+ Uses INSERT ... ON CONFLICT to handle race conditions during parallel
388
+ entity indexing. The partial unique index uix_search_index_permalink_project
389
+ on (permalink, project_id) WHERE permalink IS NOT NULL prevents duplicate
390
+ permalinks.
391
+
392
+ For rows with non-null permalinks (entities), conflicts are resolved by
393
+ updating the existing row. For rows with null permalinks (observations,
394
+ relations), the partial index doesn't apply and they are inserted directly.
395
+
396
+ Args:
397
+ search_index_rows: List of SearchIndexRow objects to index
398
+ """
399
+
400
+ if not search_index_rows:
401
+ return
402
+
403
+ async with db.scoped_session(self.session_maker) as session:
404
+ # When using text() raw SQL, always serialize JSON to string
405
+ # Both SQLite (TEXT) and Postgres (JSONB) accept JSON strings in raw SQL
406
+ # The database driver/column type will handle conversion
407
+ insert_data_list = []
408
+ for row in search_index_rows:
409
+ insert_data = row.to_insert(serialize_json=True)
410
+ insert_data["project_id"] = self.project_id
411
+ insert_data_list.append(insert_data)
412
+
413
+ # Use upsert to handle race conditions during parallel indexing
414
+ # ON CONFLICT (permalink, project_id) matches the partial unique index
415
+ # uix_search_index_permalink_project WHERE permalink IS NOT NULL
416
+ # For rows with NULL permalinks (observations, relations), no conflict occurs
417
+ await session.execute(
418
+ text("""
419
+ INSERT INTO search_index (
420
+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
421
+ from_id, to_id, relation_type,
422
+ entity_id, category,
423
+ created_at, updated_at,
424
+ project_id
425
+ ) VALUES (
426
+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
427
+ :from_id, :to_id, :relation_type,
428
+ :entity_id, :category,
429
+ :created_at, :updated_at,
430
+ :project_id
431
+ )
432
+ ON CONFLICT (permalink, project_id) WHERE permalink IS NOT NULL DO UPDATE SET
433
+ id = EXCLUDED.id,
434
+ title = EXCLUDED.title,
435
+ content_stems = EXCLUDED.content_stems,
436
+ content_snippet = EXCLUDED.content_snippet,
437
+ file_path = EXCLUDED.file_path,
438
+ type = EXCLUDED.type,
439
+ metadata = EXCLUDED.metadata,
440
+ from_id = EXCLUDED.from_id,
441
+ to_id = EXCLUDED.to_id,
442
+ relation_type = EXCLUDED.relation_type,
443
+ entity_id = EXCLUDED.entity_id,
444
+ category = EXCLUDED.category,
445
+ created_at = EXCLUDED.created_at,
446
+ updated_at = EXCLUDED.updated_at
447
+ """),
448
+ insert_data_list,
449
+ )
450
+ logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
451
+ await session.commit()
@@ -3,6 +3,7 @@
3
3
  from pathlib import Path
4
4
  from typing import Optional, Sequence, Union
5
5
 
6
+
6
7
  from sqlalchemy import text
7
8
  from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
8
9
 
@@ -23,7 +24,7 @@ class ProjectRepository(Repository[Project]):
23
24
  super().__init__(session_maker, Project)
24
25
 
25
26
  async def get_by_name(self, name: str) -> Optional[Project]:
26
- """Get project by name.
27
+ """Get project by name (exact match).
27
28
 
28
29
  Args:
29
30
  name: Unique name of the project
@@ -31,6 +32,18 @@ class ProjectRepository(Repository[Project]):
31
32
  query = self.select().where(Project.name == name)
32
33
  return await self.find_one(query)
33
34
 
35
+ async def get_by_name_case_insensitive(self, name: str) -> Optional[Project]:
36
+ """Get project by name (case-insensitive match).
37
+
38
+ Args:
39
+ name: Project name (case-insensitive)
40
+
41
+ Returns:
42
+ Project if found, None otherwise
43
+ """
44
+ query = self.select().where(Project.name.ilike(name))
45
+ return await self.find_one(query)
46
+
34
47
  async def get_by_permalink(self, permalink: str) -> Optional[Project]:
35
48
  """Get project by permalink.
36
49
 
@@ -49,6 +62,30 @@ class ProjectRepository(Repository[Project]):
49
62
  query = self.select().where(Project.path == Path(path).as_posix())
50
63
  return await self.find_one(query)
51
64
 
65
+ async def get_by_id(self, project_id: int) -> Optional[Project]:
66
+ """Get project by numeric ID.
67
+
68
+ Args:
69
+ project_id: Numeric project ID
70
+
71
+ Returns:
72
+ Project if found, None otherwise
73
+ """
74
+ async with db.scoped_session(self.session_maker) as session:
75
+ return await self.select_by_id(session, project_id)
76
+
77
+ async def get_by_external_id(self, external_id: str) -> Optional[Project]:
78
+ """Get project by external UUID.
79
+
80
+ Args:
81
+ external_id: External UUID identifier
82
+
83
+ Returns:
84
+ Project if found, None otherwise
85
+ """
86
+ query = self.select().where(Project.external_id == external_id)
87
+ return await self.find_one(query)
88
+
52
89
  async def get_default_project(self) -> Optional[Project]:
53
90
  """Get the default project (the one marked as is_default=True)."""
54
91
  query = self.select().where(Project.is_default.is_not(None))
@@ -1,9 +1,11 @@
1
1
  """Repository for managing Relation objects."""
2
2
 
3
- from sqlalchemy import and_, delete
4
3
  from typing import Sequence, List, Optional
5
4
 
6
- from sqlalchemy import select
5
+
6
+ from sqlalchemy import and_, delete, select
7
+ from sqlalchemy.dialects.postgresql import insert as pg_insert
8
+ from sqlalchemy.dialects.sqlite import insert as sqlite_insert
7
9
  from sqlalchemy.ext.asyncio import async_sessionmaker
8
10
  from sqlalchemy.orm import selectinload, aliased
9
11
  from sqlalchemy.orm.interfaces import LoaderOption
@@ -86,5 +88,59 @@ class RelationRepository(Repository[Relation]):
86
88
  result = await self.execute_query(query)
87
89
  return result.scalars().all()
88
90
 
91
+ async def add_all_ignore_duplicates(self, relations: List[Relation]) -> int:
92
+ """Bulk insert relations, ignoring duplicates.
93
+
94
+ Uses ON CONFLICT DO NOTHING to skip relations that would violate the
95
+ unique constraint on (from_id, to_name, relation_type). This is useful
96
+ for bulk operations where the same link may appear multiple times in
97
+ a document.
98
+
99
+ Works with both SQLite and PostgreSQL dialects.
100
+
101
+ Args:
102
+ relations: List of Relation objects to insert
103
+
104
+ Returns:
105
+ Number of relations actually inserted (excludes duplicates)
106
+ """
107
+ if not relations:
108
+ return 0
109
+
110
+ # Convert Relation objects to dicts for insert
111
+ values = [
112
+ {
113
+ "project_id": r.project_id if r.project_id else self.project_id,
114
+ "from_id": r.from_id,
115
+ "to_id": r.to_id,
116
+ "to_name": r.to_name,
117
+ "relation_type": r.relation_type,
118
+ "context": r.context,
119
+ }
120
+ for r in relations
121
+ ]
122
+
123
+ async with db.scoped_session(self.session_maker) as session:
124
+ # Check dialect to use appropriate insert
125
+ dialect_name = session.bind.dialect.name if session.bind else "sqlite"
126
+
127
+ if dialect_name == "postgresql": # pragma: no cover
128
+ # PostgreSQL: use RETURNING to count inserted rows
129
+ # (rowcount is 0 for ON CONFLICT DO NOTHING)
130
+ stmt = ( # pragma: no cover
131
+ pg_insert(Relation)
132
+ .values(values)
133
+ .on_conflict_do_nothing()
134
+ .returning(Relation.id)
135
+ )
136
+ result = await session.execute(stmt) # pragma: no cover
137
+ return len(result.fetchall()) # pragma: no cover
138
+ else:
139
+ # SQLite: rowcount works correctly
140
+ stmt = sqlite_insert(Relation).values(values)
141
+ stmt = stmt.on_conflict_do_nothing()
142
+ result = await session.execute(stmt)
143
+ return result.rowcount if result.rowcount > 0 else 0
144
+
89
145
  def get_load_options(self) -> List[LoaderOption]:
90
146
  return [selectinload(Relation.from_entity), selectinload(Relation.to_entity)]
@@ -2,6 +2,7 @@
2
2
 
3
3
  from typing import Type, Optional, Any, Sequence, TypeVar, List, Dict
4
4
 
5
+
5
6
  from loguru import logger
6
7
  from sqlalchemy import (
7
8
  select,
@@ -0,0 +1,95 @@
1
+ """Search index data structures."""
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from datetime import datetime
6
+ from typing import Optional
7
+ from pathlib import Path
8
+
9
+ from basic_memory.schemas.search import SearchItemType
10
+
11
+
12
+ @dataclass
13
+ class SearchIndexRow:
14
+ """Search result with score and metadata."""
15
+
16
+ project_id: int
17
+ id: int
18
+ type: str
19
+ file_path: str
20
+
21
+ # date values
22
+ created_at: datetime
23
+ updated_at: datetime
24
+
25
+ permalink: Optional[str] = None
26
+ metadata: Optional[dict] = None
27
+
28
+ # assigned in result
29
+ score: Optional[float] = None
30
+
31
+ # Type-specific fields
32
+ title: Optional[str] = None # entity
33
+ content_stems: Optional[str] = None # entity, observation
34
+ content_snippet: Optional[str] = None # entity, observation
35
+ entity_id: Optional[int] = None # observations
36
+ category: Optional[str] = None # observations
37
+ from_id: Optional[int] = None # relations
38
+ to_id: Optional[int] = None # relations
39
+ relation_type: Optional[str] = None # relations
40
+
41
+ @property
42
+ def content(self):
43
+ return self.content_snippet
44
+
45
+ @property
46
+ def directory(self) -> str:
47
+ """Extract directory part from file_path.
48
+
49
+ For a file at "projects/notes/ideas.md", returns "/projects/notes"
50
+ For a file at root level "README.md", returns "/"
51
+ """
52
+ if not self.type == SearchItemType.ENTITY.value and not self.file_path:
53
+ return ""
54
+
55
+ # Normalize path separators to handle both Windows (\) and Unix (/) paths
56
+ normalized_path = Path(self.file_path).as_posix()
57
+
58
+ # Split the path by slashes
59
+ parts = normalized_path.split("/")
60
+
61
+ # If there's only one part (e.g., "README.md"), it's at the root
62
+ if len(parts) <= 1:
63
+ return "/"
64
+
65
+ # Join all parts except the last one (filename)
66
+ directory_path = "/".join(parts[:-1])
67
+ return f"/{directory_path}"
68
+
69
+ def to_insert(self, serialize_json: bool = True):
70
+ """Convert to dict for database insertion.
71
+
72
+ Args:
73
+ serialize_json: If True, converts metadata dict to JSON string (for SQLite).
74
+ If False, keeps metadata as dict (for Postgres JSONB).
75
+ """
76
+ return {
77
+ "id": self.id,
78
+ "title": self.title,
79
+ "content_stems": self.content_stems,
80
+ "content_snippet": self.content_snippet,
81
+ "permalink": self.permalink,
82
+ "file_path": self.file_path,
83
+ "type": self.type,
84
+ "metadata": json.dumps(self.metadata)
85
+ if serialize_json and self.metadata
86
+ else self.metadata,
87
+ "from_id": self.from_id,
88
+ "to_id": self.to_id,
89
+ "relation_type": self.relation_type,
90
+ "entity_id": self.entity_id,
91
+ "category": self.category,
92
+ "created_at": self.created_at if self.created_at else None,
93
+ "updated_at": self.updated_at if self.updated_at else None,
94
+ "project_id": self.project_id,
95
+ }