memorisdk 2.0.1__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of memorisdk might be problematic. Click here for more details.

Files changed (62) hide show
  1. memori/__init__.py +3 -3
  2. memori/agents/conscious_agent.py +289 -77
  3. memori/agents/memory_agent.py +19 -9
  4. memori/agents/retrieval_agent.py +59 -51
  5. memori/config/manager.py +7 -7
  6. memori/config/memory_manager.py +25 -25
  7. memori/config/settings.py +13 -6
  8. memori/core/conversation.py +15 -15
  9. memori/core/database.py +14 -13
  10. memori/core/memory.py +376 -105
  11. memori/core/providers.py +25 -25
  12. memori/database/__init__.py +11 -0
  13. memori/database/adapters/__init__.py +11 -0
  14. memori/database/adapters/mongodb_adapter.py +739 -0
  15. memori/database/adapters/mysql_adapter.py +8 -8
  16. memori/database/adapters/postgresql_adapter.py +6 -6
  17. memori/database/adapters/sqlite_adapter.py +6 -6
  18. memori/database/auto_creator.py +8 -9
  19. memori/database/connection_utils.py +5 -5
  20. memori/database/connectors/__init__.py +11 -0
  21. memori/database/connectors/base_connector.py +18 -19
  22. memori/database/connectors/mongodb_connector.py +654 -0
  23. memori/database/connectors/mysql_connector.py +13 -15
  24. memori/database/connectors/postgres_connector.py +12 -12
  25. memori/database/connectors/sqlite_connector.py +11 -11
  26. memori/database/models.py +2 -2
  27. memori/database/mongodb_manager.py +1484 -0
  28. memori/database/queries/base_queries.py +3 -4
  29. memori/database/queries/chat_queries.py +3 -5
  30. memori/database/queries/entity_queries.py +3 -5
  31. memori/database/queries/memory_queries.py +3 -5
  32. memori/database/query_translator.py +11 -11
  33. memori/database/schema_generators/__init__.py +11 -0
  34. memori/database/schema_generators/mongodb_schema_generator.py +666 -0
  35. memori/database/schema_generators/mysql_schema_generator.py +2 -4
  36. memori/database/search/__init__.py +11 -0
  37. memori/database/search/mongodb_search_adapter.py +653 -0
  38. memori/database/search/mysql_search_adapter.py +8 -8
  39. memori/database/search/sqlite_search_adapter.py +6 -6
  40. memori/database/search_service.py +17 -17
  41. memori/database/sqlalchemy_manager.py +10 -12
  42. memori/integrations/__init__.py +1 -1
  43. memori/integrations/anthropic_integration.py +1 -3
  44. memori/integrations/litellm_integration.py +23 -6
  45. memori/integrations/openai_integration.py +31 -3
  46. memori/tools/memory_tool.py +10 -9
  47. memori/utils/exceptions.py +58 -58
  48. memori/utils/helpers.py +11 -12
  49. memori/utils/input_validator.py +10 -12
  50. memori/utils/logging.py +4 -4
  51. memori/utils/pydantic_models.py +57 -57
  52. memori/utils/query_builder.py +20 -20
  53. memori/utils/security_audit.py +28 -28
  54. memori/utils/security_integration.py +9 -9
  55. memori/utils/transaction_manager.py +20 -19
  56. memori/utils/validators.py +6 -6
  57. {memorisdk-2.0.1.dist-info → memorisdk-2.1.1.dist-info}/METADATA +23 -12
  58. memorisdk-2.1.1.dist-info/RECORD +71 -0
  59. memorisdk-2.0.1.dist-info/RECORD +0 -66
  60. {memorisdk-2.0.1.dist-info → memorisdk-2.1.1.dist-info}/WHEEL +0 -0
  61. {memorisdk-2.0.1.dist-info → memorisdk-2.1.1.dist-info}/licenses/LICENSE +0 -0
  62. {memorisdk-2.0.1.dist-info → memorisdk-2.1.1.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,6 @@ MySQL schema generator for Memori v2.0
3
3
  Converts SQLite schema to MySQL-compatible schema with FULLTEXT search
4
4
  """
5
5
 
6
- from typing import Dict, List
7
-
8
6
  from ..connectors.base_connector import BaseSchemaGenerator, DatabaseType
9
7
 
10
8
 
@@ -14,7 +12,7 @@ class MySQLSchemaGenerator(BaseSchemaGenerator):
14
12
  def __init__(self):
15
13
  super().__init__(DatabaseType.MYSQL)
16
14
 
17
- def get_data_type_mappings(self) -> Dict[str, str]:
15
+ def get_data_type_mappings(self) -> dict[str, str]:
18
16
  """Get MySQL-specific data type mappings from SQLite"""
19
17
  return {
20
18
  "TEXT": "TEXT",
@@ -204,7 +202,7 @@ ALTER TABLE long_term_memory ADD FULLTEXT INDEX ft_long_term_topic (topic);
204
202
  ]
205
203
  return "\n".join(schema_parts)
206
204
 
207
- def get_migration_queries(self) -> List[str]:
205
+ def get_migration_queries(self) -> list[str]:
208
206
  """Get queries to migrate from SQLite to MySQL"""
209
207
  return [
210
208
  # Note: These would be used for data migration from SQLite to MySQL
@@ -5,4 +5,15 @@ Search adapters for different database backends
5
5
  from .mysql_search_adapter import MySQLSearchAdapter
6
6
  from .sqlite_search_adapter import SQLiteSearchAdapter
7
7
 
8
+ try:
9
+ from .mongodb_search_adapter import MongoDBSearchAdapter
10
+
11
+ MONGODB_SEARCH_AVAILABLE = True
12
+ except ImportError:
13
+ MongoDBSearchAdapter = None # type: ignore
14
+ MONGODB_SEARCH_AVAILABLE = False
15
+
8
16
  __all__ = ["SQLiteSearchAdapter", "MySQLSearchAdapter"]
17
+
18
+ if MONGODB_SEARCH_AVAILABLE:
19
+ __all__.append("MongoDBSearchAdapter")
@@ -0,0 +1,653 @@
1
+ """
2
+ MongoDB-specific search adapter with Atlas Vector Search support
3
+ """
4
+
5
+ from datetime import datetime, timezone
6
+ from typing import Any
7
+
8
+ from loguru import logger
9
+
10
+ try:
11
+ import pymongo # noqa: F401
12
+ from pymongo.collection import Collection # noqa: F401
13
+ from pymongo.errors import OperationFailure
14
+
15
+ PYMONGO_AVAILABLE = True
16
+ except ImportError:
17
+ PYMONGO_AVAILABLE = False
18
+
19
+ from ...utils.exceptions import ValidationError
20
+ from ...utils.input_validator import DatabaseInputValidator
21
+ from ..connectors.base_connector import BaseSearchAdapter
22
+ from ..connectors.mongodb_connector import MongoDBConnector
23
+
24
+
25
+ class MongoDBSearchAdapter(BaseSearchAdapter):
26
+ """MongoDB-specific search implementation with Atlas Vector Search support"""
27
+
28
+ def __init__(self, connector: MongoDBConnector):
29
+ """Initialize MongoDB search adapter"""
30
+ if not PYMONGO_AVAILABLE:
31
+ raise ImportError(
32
+ "pymongo is required for MongoDB support. Install with: pip install pymongo"
33
+ )
34
+
35
+ super().__init__(connector)
36
+ self.mongodb_connector = connector
37
+ self.database = connector.get_database()
38
+
39
+ # Collection references
40
+ self.short_term_collection = connector.get_collection("short_term_memory")
41
+ self.long_term_collection = connector.get_collection("long_term_memory")
42
+
43
+ # Check capabilities
44
+ self._vector_search_available = None
45
+ self._text_search_available = None
46
+
47
+ def execute_fulltext_search(
48
+ self,
49
+ query: str,
50
+ namespace: str = "default",
51
+ category_filter: list[str] | None = None,
52
+ limit: int = 10,
53
+ ) -> list[dict[str, Any]]:
54
+ """Execute MongoDB text search with proper validation"""
55
+ try:
56
+ # Validate all parameters
57
+ validated = DatabaseInputValidator.validate_search_params(
58
+ query, namespace, category_filter, limit
59
+ )
60
+
61
+ # Check if text search is available
62
+ if not self._check_text_search_available():
63
+ logger.debug("Text search not available, falling back to regex search")
64
+ return self.execute_fallback_search(
65
+ validated["query"],
66
+ validated["namespace"],
67
+ validated["category_filter"],
68
+ validated["limit"],
69
+ )
70
+
71
+ # Execute MongoDB text search
72
+ return self._execute_mongodb_text_search(
73
+ validated["query"],
74
+ validated["namespace"],
75
+ validated["category_filter"],
76
+ validated["limit"],
77
+ )
78
+
79
+ except ValidationError as e:
80
+ logger.error(f"Invalid search parameters: {e}")
81
+ return []
82
+ except Exception as e:
83
+ logger.error(f"MongoDB text search failed: {e}")
84
+ # Fallback to regex search on error
85
+ return self.execute_fallback_search(
86
+ query, namespace, category_filter, limit
87
+ )
88
+
89
+ def _execute_mongodb_text_search(
90
+ self,
91
+ query: str,
92
+ namespace: str,
93
+ category_filter: list[str] | None,
94
+ limit: int,
95
+ ) -> list[dict[str, Any]]:
96
+ """Execute MongoDB $text search across collections"""
97
+ results = []
98
+
99
+ # Search both collections
100
+ collections = [
101
+ (self.short_term_collection, "short_term"),
102
+ (self.long_term_collection, "long_term"),
103
+ ]
104
+
105
+ for collection, memory_type in collections:
106
+ try:
107
+ # Build search filter
108
+ search_filter: dict[str, Any] = {
109
+ "$text": {"$search": query},
110
+ "namespace": namespace,
111
+ }
112
+
113
+ if category_filter:
114
+ search_filter["category_primary"] = {"$in": category_filter}
115
+
116
+ # For short-term memories, exclude expired ones
117
+ if memory_type == "short_term":
118
+ search_filter["$or"] = [
119
+ {"expires_at": {"$exists": False}},
120
+ {"expires_at": None},
121
+ {"expires_at": {"$gt": datetime.now(timezone.utc)}},
122
+ ]
123
+
124
+ # Execute search with text score
125
+ cursor = (
126
+ collection.find(search_filter, {"score": {"$meta": "textScore"}})
127
+ .sort([("score", {"$meta": "textScore"}), ("importance_score", -1)])
128
+ .limit(limit)
129
+ )
130
+
131
+ # Process results
132
+ for document in cursor:
133
+ memory = self._convert_document_to_memory(document)
134
+ memory["memory_type"] = memory_type
135
+ memory["search_strategy"] = "mongodb_text"
136
+ memory["text_score"] = document.get("score", 0)
137
+ results.append(memory)
138
+
139
+ except Exception as e:
140
+ logger.warning(f"Text search failed for {memory_type}: {e}")
141
+ continue
142
+
143
+ # Sort by text score and importance
144
+ results.sort(
145
+ key=lambda x: (x.get("text_score", 0), x.get("importance_score", 0)),
146
+ reverse=True,
147
+ )
148
+
149
+ return results[:limit]
150
+
151
+ def execute_vector_search(
152
+ self,
153
+ query_vector: list[float],
154
+ namespace: str = "default",
155
+ category_filter: list[str] | None = None,
156
+ limit: int = 10,
157
+ similarity_threshold: float = 0.7,
158
+ ) -> list[dict[str, Any]]:
159
+ """Execute MongoDB Atlas Vector Search"""
160
+ try:
161
+ if not self._check_vector_search_available():
162
+ logger.warning("Vector search not available in this MongoDB deployment")
163
+ return []
164
+
165
+ # Validate inputs
166
+ if not query_vector or not isinstance(query_vector, list):
167
+ raise ValueError("query_vector must be a non-empty list of floats")
168
+
169
+ # Build vector search pipeline
170
+ pipeline = self._build_vector_search_pipeline(
171
+ query_vector, namespace, category_filter, limit, similarity_threshold
172
+ )
173
+
174
+ # Execute vector search on long-term memory (primary collection for vectors)
175
+ try:
176
+ cursor = self.long_term_collection.aggregate(pipeline)
177
+ results = []
178
+
179
+ for document in cursor:
180
+ memory = self._convert_document_to_memory(document)
181
+ memory["memory_type"] = "long_term"
182
+ memory["search_strategy"] = "vector_search"
183
+ memory["vector_score"] = document.get("score", 0)
184
+ results.append(memory)
185
+
186
+ logger.debug(f"Vector search returned {len(results)} results")
187
+ return results
188
+
189
+ except OperationFailure as e:
190
+ if "vector search" in str(e).lower():
191
+ logger.error(f"Vector search not configured properly: {e}")
192
+ return []
193
+ else:
194
+ raise
195
+
196
+ except Exception as e:
197
+ logger.error(f"Vector search failed: {e}")
198
+ return []
199
+
200
+ def _build_vector_search_pipeline(
201
+ self,
202
+ query_vector: list[float],
203
+ namespace: str,
204
+ category_filter: list[str] | None,
205
+ limit: int,
206
+ similarity_threshold: float,
207
+ ) -> list[dict[str, Any]]:
208
+ """Build MongoDB aggregation pipeline for vector search"""
209
+ pipeline = [
210
+ # Vector search stage (Atlas only)
211
+ {
212
+ "$vectorSearch": {
213
+ "index": "vector_search_index", # Must be created in Atlas
214
+ "path": "embedding_vector",
215
+ "queryVector": query_vector,
216
+ "numCandidates": min(limit * 10, 1000), # Search more candidates
217
+ "limit": limit * 2, # Get more results to filter
218
+ }
219
+ },
220
+ # Add similarity score
221
+ {"$addFields": {"score": {"$meta": "vectorSearchScore"}}},
222
+ # Filter by similarity threshold
223
+ {
224
+ "$match": {
225
+ "score": {"$gte": similarity_threshold},
226
+ "namespace": namespace,
227
+ }
228
+ },
229
+ ]
230
+
231
+ # Add category filter if specified
232
+ if category_filter:
233
+ pipeline.append({"$match": {"category_primary": {"$in": category_filter}}})
234
+
235
+ # Final projection and limit
236
+ pipeline.extend(
237
+ [
238
+ {
239
+ "$project": {
240
+ "_id": 1,
241
+ "memory_id": 1,
242
+ "searchable_content": 1,
243
+ "summary": 1,
244
+ "importance_score": 1,
245
+ "category_primary": 1,
246
+ "namespace": 1,
247
+ "classification": 1,
248
+ "topic": 1,
249
+ "created_at": 1,
250
+ "confidence_score": 1,
251
+ "score": 1,
252
+ }
253
+ },
254
+ {"$limit": limit},
255
+ ]
256
+ )
257
+
258
+ return pipeline
259
+
260
+ def execute_hybrid_search(
261
+ self,
262
+ query: str,
263
+ query_vector: list[float] | None = None,
264
+ namespace: str = "default",
265
+ category_filter: list[str] | None = None,
266
+ limit: int = 10,
267
+ text_weight: float = 0.5,
268
+ vector_weight: float = 0.5,
269
+ ) -> list[dict[str, Any]]:
270
+ """Execute hybrid search combining text and vector search"""
271
+ try:
272
+ text_results = []
273
+ vector_results = []
274
+
275
+ # Execute text search
276
+ if query:
277
+ text_results = self.execute_fulltext_search(
278
+ query, namespace, category_filter, limit * 2
279
+ )
280
+
281
+ # Execute vector search if available and vector provided
282
+ if query_vector and self._check_vector_search_available():
283
+ vector_results = self.execute_vector_search(
284
+ query_vector, namespace, category_filter, limit * 2
285
+ )
286
+
287
+ # Combine and score results
288
+ return self._combine_search_results(
289
+ text_results, vector_results, text_weight, vector_weight, limit
290
+ )
291
+
292
+ except Exception as e:
293
+ logger.error(f"Hybrid search failed: {e}")
294
+ # Fallback to text search only
295
+ return self.execute_fulltext_search(
296
+ query, namespace, category_filter, limit
297
+ )
298
+
299
+ def _combine_search_results(
300
+ self,
301
+ text_results: list[dict[str, Any]],
302
+ vector_results: list[dict[str, Any]],
303
+ text_weight: float,
304
+ vector_weight: float,
305
+ limit: int,
306
+ ) -> list[dict[str, Any]]:
307
+ """Combine text and vector search results with weighted scoring"""
308
+ # Create lookup for faster deduplication
309
+ seen_memories = {}
310
+ combined_results = []
311
+
312
+ # Process text results
313
+ for result in text_results:
314
+ memory_id = result.get("memory_id")
315
+ if memory_id:
316
+ text_score = result.get("text_score", 0)
317
+ importance_score = result.get("importance_score", 0)
318
+
319
+ combined_score = (text_score * text_weight) + (importance_score * 0.1)
320
+
321
+ result["combined_score"] = combined_score
322
+ result["has_text_match"] = True
323
+ result["has_vector_match"] = False
324
+
325
+ seen_memories[memory_id] = result
326
+ combined_results.append(result)
327
+
328
+ # Process vector results
329
+ for result in vector_results:
330
+ memory_id = result.get("memory_id")
331
+ if memory_id:
332
+ vector_score = result.get("vector_score", 0)
333
+ importance_score = result.get("importance_score", 0)
334
+
335
+ if memory_id in seen_memories:
336
+ # Update existing result with vector score
337
+ existing = seen_memories[memory_id]
338
+ existing_combined = existing.get("combined_score", 0)
339
+ vector_combined = (vector_score * vector_weight) + (
340
+ importance_score * 0.1
341
+ )
342
+
343
+ # Combine scores
344
+ existing["combined_score"] = existing_combined + vector_combined
345
+ existing["has_vector_match"] = True
346
+ existing["vector_score"] = vector_score
347
+ existing["search_strategy"] = "hybrid"
348
+ else:
349
+ # New result from vector search
350
+ combined_score = (vector_score * vector_weight) + (
351
+ importance_score * 0.1
352
+ )
353
+
354
+ result["combined_score"] = combined_score
355
+ result["has_text_match"] = False
356
+ result["has_vector_match"] = True
357
+
358
+ seen_memories[memory_id] = result
359
+ combined_results.append(result)
360
+
361
+ # Sort by combined score
362
+ combined_results.sort(key=lambda x: x.get("combined_score", 0), reverse=True)
363
+
364
+ logger.debug(
365
+ f"Hybrid search combined {len(text_results)} text + {len(vector_results)} vector results"
366
+ )
367
+ return combined_results[:limit]
368
+
369
+ def create_search_indexes(self) -> list[str]:
370
+ """Create MongoDB-specific search indexes"""
371
+ indexes_created = []
372
+
373
+ try:
374
+ # Create text indexes
375
+ collections = [
376
+ (self.short_term_collection, "short_term_memory"),
377
+ (self.long_term_collection, "long_term_memory"),
378
+ ]
379
+
380
+ for collection, collection_name in collections:
381
+ try:
382
+ # Create text index for full-text search
383
+ collection.create_index(
384
+ [("searchable_content", "text"), ("summary", "text")],
385
+ name=f"{collection_name}_text_search",
386
+ background=True,
387
+ )
388
+
389
+ indexes_created.append(f"{collection_name}_text_search")
390
+ logger.info(f"Created text index for {collection_name}")
391
+
392
+ except Exception as e:
393
+ logger.warning(
394
+ f"Failed to create text index for {collection_name}: {e}"
395
+ )
396
+
397
+ # Note about vector indexes
398
+ if self.mongodb_connector.supports_vector_search():
399
+ logger.info(
400
+ "Vector search is supported. Create vector indexes via MongoDB Atlas UI or Admin API."
401
+ )
402
+ indexes_created.append("vector_search_index (manual creation required)")
403
+ else:
404
+ logger.info("Vector search not supported in this deployment")
405
+
406
+ return indexes_created
407
+
408
+ except Exception as e:
409
+ logger.error(f"Failed to create search indexes: {e}")
410
+ return indexes_created
411
+
412
+ def translate_search_query(self, query: str) -> str:
413
+ """Translate search query to MongoDB text search syntax"""
414
+ if not query or not query.strip():
415
+ return '""' # Empty query
416
+
417
+ # MongoDB text search supports:
418
+ # - Phrase search: "exact phrase"
419
+ # - Term search: term1 term2
420
+ # - Negation: -unwanted
421
+ # - OR operations: term1 OR term2
422
+
423
+ # For safety, we'll do minimal processing
424
+ sanitized = query.strip()
425
+
426
+ # If query contains special characters, wrap in quotes for phrase search
427
+ if any(char in sanitized for char in ['"', "(", ")", "-", "|"]):
428
+ # Remove existing quotes and wrap the whole thing
429
+ sanitized = sanitized.replace('"', "")
430
+ return f'"{sanitized}"'
431
+
432
+ return sanitized
433
+
434
+ def execute_fallback_search(
435
+ self,
436
+ query: str,
437
+ namespace: str = "default",
438
+ category_filter: list[str] | None = None,
439
+ limit: int = 10,
440
+ ) -> list[dict[str, Any]]:
441
+ """Execute regex-based fallback search for MongoDB"""
442
+ try:
443
+ results = []
444
+
445
+ # Create case-insensitive regex pattern
446
+ regex_pattern = {"$regex": query, "$options": "i"}
447
+
448
+ collections = [
449
+ (self.short_term_collection, "short_term"),
450
+ (self.long_term_collection, "long_term"),
451
+ ]
452
+
453
+ for collection, memory_type in collections:
454
+ try:
455
+ # Build search filter using regex
456
+ search_filter = {
457
+ "$or": [
458
+ {"searchable_content": regex_pattern},
459
+ {"summary": regex_pattern},
460
+ ],
461
+ "namespace": namespace,
462
+ }
463
+
464
+ if category_filter:
465
+ search_filter["category_primary"] = {"$in": category_filter}
466
+
467
+ # For short-term memories, exclude expired ones
468
+ if memory_type == "short_term":
469
+ search_filter["$and"] = [
470
+ {"$or": search_filter["$or"]},
471
+ {"namespace": namespace},
472
+ {
473
+ "$or": [
474
+ {"expires_at": {"$exists": False}},
475
+ {"expires_at": None},
476
+ {"expires_at": {"$gt": datetime.now(timezone.utc)}},
477
+ ]
478
+ },
479
+ ]
480
+ # Remove the top-level filters since they're now in $and
481
+ del search_filter["$or"]
482
+ del search_filter["namespace"]
483
+
484
+ if category_filter:
485
+ search_filter["$and"].append(
486
+ {"category_primary": {"$in": category_filter}}
487
+ )
488
+ del search_filter["category_primary"]
489
+
490
+ # Execute regex search
491
+ cursor = (
492
+ collection.find(search_filter)
493
+ .sort([("importance_score", -1), ("created_at", -1)])
494
+ .limit(limit)
495
+ )
496
+
497
+ for document in cursor:
498
+ memory = self._convert_document_to_memory(document)
499
+ memory["memory_type"] = memory_type
500
+ memory["search_strategy"] = "regex_fallback"
501
+ results.append(memory)
502
+
503
+ except Exception as e:
504
+ logger.warning(f"Regex search failed for {memory_type}: {e}")
505
+ continue
506
+
507
+ # Sort by importance score
508
+ results.sort(key=lambda x: x.get("importance_score", 0), reverse=True)
509
+
510
+ logger.debug(f"Regex fallback search returned {len(results)} results")
511
+ return results[:limit]
512
+
513
+ except Exception as e:
514
+ logger.error(f"Fallback search failed: {e}")
515
+ return []
516
+
517
+ def _convert_document_to_memory(self, document: dict[str, Any]) -> dict[str, Any]:
518
+ """Convert MongoDB document to memory format"""
519
+ if not document:
520
+ return {}
521
+
522
+ memory = document.copy()
523
+
524
+ # Convert ObjectId to string
525
+ if "_id" in memory:
526
+ memory["_id"] = str(memory["_id"])
527
+
528
+ # Convert datetime objects to ISO strings for JSON compatibility
529
+ datetime_fields = [
530
+ "created_at",
531
+ "expires_at",
532
+ "last_accessed",
533
+ "extraction_timestamp",
534
+ ]
535
+ for field in datetime_fields:
536
+ if field in memory and isinstance(memory[field], datetime):
537
+ memory[field] = memory[field].isoformat()
538
+
539
+ return memory
540
+
541
+ def _check_text_search_available(self) -> bool:
542
+ """Check if MongoDB text search is available"""
543
+ if self._text_search_available is not None:
544
+ return self._text_search_available
545
+
546
+ try:
547
+ # Try to get text indexes
548
+ indexes = list(self.short_term_collection.list_indexes())
549
+ self._text_search_available = any(
550
+ "text" in str(index.get("key", {})) for index in indexes
551
+ )
552
+ except Exception:
553
+ self._text_search_available = False
554
+
555
+ return self._text_search_available
556
+
557
+ def _check_vector_search_available(self) -> bool:
558
+ """Check if MongoDB Atlas Vector Search is available"""
559
+ if self._vector_search_available is not None:
560
+ return self._vector_search_available
561
+
562
+ try:
563
+ # This is a comprehensive check for vector search availability
564
+ self._vector_search_available = (
565
+ self.mongodb_connector.supports_vector_search()
566
+ )
567
+
568
+ # Additional check: try to see if we have vector search indexes
569
+ if self._vector_search_available:
570
+ try:
571
+ # Try a simple vector search to see if indexes exist
572
+ # This is a minimal test query
573
+ test_pipeline = [
574
+ {
575
+ "$vectorSearch": {
576
+ "index": "vector_search_index",
577
+ "path": "embedding_vector",
578
+ "queryVector": [0.0] * 1536, # Dummy vector
579
+ "numCandidates": 1,
580
+ "limit": 1,
581
+ }
582
+ },
583
+ {"$limit": 0}, # Don't return any results
584
+ ]
585
+
586
+ # If this doesn't throw an error, vector search is properly configured
587
+ list(self.long_term_collection.aggregate(test_pipeline))
588
+ logger.debug("Vector search is available and configured")
589
+
590
+ except OperationFailure as e:
591
+ if "vector search" in str(e).lower() or "index" in str(e).lower():
592
+ logger.warning(
593
+ "Vector search is supported but not configured (missing indexes)"
594
+ )
595
+ self._vector_search_available = False
596
+ else:
597
+ # Other errors might still allow vector search
598
+ pass
599
+
600
+ except Exception:
601
+ self._vector_search_available = False
602
+
603
+ return self._vector_search_available
604
+
605
+ def optimize_search_performance(self):
606
+ """Optimize MongoDB search performance"""
607
+ try:
608
+ # Update collection statistics for better query planning
609
+ collections = [self.short_term_collection, self.long_term_collection]
610
+
611
+ for collection in collections:
612
+ try:
613
+ # MongoDB doesn't have ANALYZE like SQL, but we can:
614
+ # 1. Ensure indexes are being used effectively
615
+ # 2. Check for slow operations
616
+
617
+ # Get collection stats
618
+ stats = self.database.command("collStats", collection.name)
619
+ logger.debug(
620
+ f"Collection {collection.name} stats: {stats.get('count', 0)} documents"
621
+ )
622
+
623
+ # List indexes to ensure they exist
624
+ indexes = list(collection.list_indexes())
625
+ logger.debug(
626
+ f"Collection {collection.name} has {len(indexes)} indexes"
627
+ )
628
+
629
+ except Exception as e:
630
+ logger.warning(f"Failed to get stats for {collection.name}: {e}")
631
+
632
+ logger.info("MongoDB search optimization completed")
633
+
634
+ except Exception as e:
635
+ logger.warning(f"MongoDB search optimization failed: {e}")
636
+
637
+ def get_search_capabilities(self) -> dict[str, Any]:
638
+ """Get MongoDB search capabilities"""
639
+ return {
640
+ "text_search": self._check_text_search_available(),
641
+ "vector_search": self._check_vector_search_available(),
642
+ "regex_search": True, # Always available in MongoDB
643
+ "faceted_search": True, # MongoDB aggregation supports faceting
644
+ "geospatial_search": True, # MongoDB has good geospatial support
645
+ "full_text_operators": [
646
+ "$text", # Text search
647
+ "$regex", # Pattern matching
648
+ "$search", # Atlas Search (if available)
649
+ ],
650
+ "supported_similarity_metrics": ["cosine", "euclidean", "dotProduct"],
651
+ "max_vector_dimensions": 2048, # Atlas limit
652
+ "hybrid_search": True,
653
+ }