memorisdk 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of memorisdk might be problematic. Click here for more details.
- memori/__init__.py +3 -3
- memori/agents/conscious_agent.py +289 -77
- memori/agents/memory_agent.py +19 -9
- memori/agents/retrieval_agent.py +59 -51
- memori/config/manager.py +7 -7
- memori/config/memory_manager.py +25 -25
- memori/config/settings.py +13 -6
- memori/core/conversation.py +15 -15
- memori/core/database.py +14 -13
- memori/core/memory.py +376 -105
- memori/core/providers.py +25 -25
- memori/database/__init__.py +11 -0
- memori/database/adapters/__init__.py +11 -0
- memori/database/adapters/mongodb_adapter.py +739 -0
- memori/database/adapters/mysql_adapter.py +8 -8
- memori/database/adapters/postgresql_adapter.py +6 -6
- memori/database/adapters/sqlite_adapter.py +6 -6
- memori/database/auto_creator.py +8 -9
- memori/database/connection_utils.py +5 -5
- memori/database/connectors/__init__.py +11 -0
- memori/database/connectors/base_connector.py +18 -19
- memori/database/connectors/mongodb_connector.py +527 -0
- memori/database/connectors/mysql_connector.py +13 -15
- memori/database/connectors/postgres_connector.py +12 -12
- memori/database/connectors/sqlite_connector.py +11 -11
- memori/database/models.py +2 -2
- memori/database/mongodb_manager.py +1402 -0
- memori/database/queries/base_queries.py +3 -4
- memori/database/queries/chat_queries.py +3 -5
- memori/database/queries/entity_queries.py +3 -5
- memori/database/queries/memory_queries.py +3 -5
- memori/database/query_translator.py +11 -11
- memori/database/schema_generators/__init__.py +11 -0
- memori/database/schema_generators/mongodb_schema_generator.py +666 -0
- memori/database/schema_generators/mysql_schema_generator.py +2 -4
- memori/database/search/__init__.py +11 -0
- memori/database/search/mongodb_search_adapter.py +653 -0
- memori/database/search/mysql_search_adapter.py +8 -8
- memori/database/search/sqlite_search_adapter.py +6 -6
- memori/database/search_service.py +17 -17
- memori/database/sqlalchemy_manager.py +10 -12
- memori/integrations/__init__.py +1 -1
- memori/integrations/anthropic_integration.py +1 -3
- memori/integrations/litellm_integration.py +23 -6
- memori/integrations/openai_integration.py +31 -3
- memori/tools/memory_tool.py +10 -9
- memori/utils/exceptions.py +58 -58
- memori/utils/helpers.py +11 -12
- memori/utils/input_validator.py +10 -12
- memori/utils/logging.py +4 -4
- memori/utils/pydantic_models.py +57 -57
- memori/utils/query_builder.py +20 -20
- memori/utils/security_audit.py +28 -28
- memori/utils/security_integration.py +9 -9
- memori/utils/transaction_manager.py +20 -19
- memori/utils/validators.py +6 -6
- {memorisdk-2.0.1.dist-info → memorisdk-2.1.0.dist-info}/METADATA +22 -12
- memorisdk-2.1.0.dist-info/RECORD +71 -0
- memorisdk-2.0.1.dist-info/RECORD +0 -66
- {memorisdk-2.0.1.dist-info → memorisdk-2.1.0.dist-info}/WHEEL +0 -0
- {memorisdk-2.0.1.dist-info → memorisdk-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {memorisdk-2.0.1.dist-info → memorisdk-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -3,8 +3,6 @@ MySQL schema generator for Memori v2.0
|
|
|
3
3
|
Converts SQLite schema to MySQL-compatible schema with FULLTEXT search
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
from typing import Dict, List
|
|
7
|
-
|
|
8
6
|
from ..connectors.base_connector import BaseSchemaGenerator, DatabaseType
|
|
9
7
|
|
|
10
8
|
|
|
@@ -14,7 +12,7 @@ class MySQLSchemaGenerator(BaseSchemaGenerator):
|
|
|
14
12
|
def __init__(self):
|
|
15
13
|
super().__init__(DatabaseType.MYSQL)
|
|
16
14
|
|
|
17
|
-
def get_data_type_mappings(self) ->
|
|
15
|
+
def get_data_type_mappings(self) -> dict[str, str]:
|
|
18
16
|
"""Get MySQL-specific data type mappings from SQLite"""
|
|
19
17
|
return {
|
|
20
18
|
"TEXT": "TEXT",
|
|
@@ -204,7 +202,7 @@ ALTER TABLE long_term_memory ADD FULLTEXT INDEX ft_long_term_topic (topic);
|
|
|
204
202
|
]
|
|
205
203
|
return "\n".join(schema_parts)
|
|
206
204
|
|
|
207
|
-
def get_migration_queries(self) ->
|
|
205
|
+
def get_migration_queries(self) -> list[str]:
|
|
208
206
|
"""Get queries to migrate from SQLite to MySQL"""
|
|
209
207
|
return [
|
|
210
208
|
# Note: These would be used for data migration from SQLite to MySQL
|
|
@@ -5,4 +5,15 @@ Search adapters for different database backends
|
|
|
5
5
|
from .mysql_search_adapter import MySQLSearchAdapter
|
|
6
6
|
from .sqlite_search_adapter import SQLiteSearchAdapter
|
|
7
7
|
|
|
8
|
+
try:
|
|
9
|
+
from .mongodb_search_adapter import MongoDBSearchAdapter
|
|
10
|
+
|
|
11
|
+
MONGODB_SEARCH_AVAILABLE = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
MongoDBSearchAdapter = None # type: ignore
|
|
14
|
+
MONGODB_SEARCH_AVAILABLE = False
|
|
15
|
+
|
|
8
16
|
__all__ = ["SQLiteSearchAdapter", "MySQLSearchAdapter"]
|
|
17
|
+
|
|
18
|
+
if MONGODB_SEARCH_AVAILABLE:
|
|
19
|
+
__all__.append("MongoDBSearchAdapter")
|
|
@@ -0,0 +1,653 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MongoDB-specific search adapter with Atlas Vector Search support
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from loguru import logger
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pymongo # noqa: F401
|
|
12
|
+
from pymongo.collection import Collection # noqa: F401
|
|
13
|
+
from pymongo.errors import OperationFailure
|
|
14
|
+
|
|
15
|
+
PYMONGO_AVAILABLE = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
PYMONGO_AVAILABLE = False
|
|
18
|
+
|
|
19
|
+
from ...utils.exceptions import ValidationError
|
|
20
|
+
from ...utils.input_validator import DatabaseInputValidator
|
|
21
|
+
from ..connectors.base_connector import BaseSearchAdapter
|
|
22
|
+
from ..connectors.mongodb_connector import MongoDBConnector
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MongoDBSearchAdapter(BaseSearchAdapter):
|
|
26
|
+
"""MongoDB-specific search implementation with Atlas Vector Search support"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, connector: MongoDBConnector):
|
|
29
|
+
"""Initialize MongoDB search adapter"""
|
|
30
|
+
if not PYMONGO_AVAILABLE:
|
|
31
|
+
raise ImportError(
|
|
32
|
+
"pymongo is required for MongoDB support. Install with: pip install pymongo"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
super().__init__(connector)
|
|
36
|
+
self.mongodb_connector = connector
|
|
37
|
+
self.database = connector.get_database()
|
|
38
|
+
|
|
39
|
+
# Collection references
|
|
40
|
+
self.short_term_collection = connector.get_collection("short_term_memory")
|
|
41
|
+
self.long_term_collection = connector.get_collection("long_term_memory")
|
|
42
|
+
|
|
43
|
+
# Check capabilities
|
|
44
|
+
self._vector_search_available = None
|
|
45
|
+
self._text_search_available = None
|
|
46
|
+
|
|
47
|
+
def execute_fulltext_search(
|
|
48
|
+
self,
|
|
49
|
+
query: str,
|
|
50
|
+
namespace: str = "default",
|
|
51
|
+
category_filter: list[str] | None = None,
|
|
52
|
+
limit: int = 10,
|
|
53
|
+
) -> list[dict[str, Any]]:
|
|
54
|
+
"""Execute MongoDB text search with proper validation"""
|
|
55
|
+
try:
|
|
56
|
+
# Validate all parameters
|
|
57
|
+
validated = DatabaseInputValidator.validate_search_params(
|
|
58
|
+
query, namespace, category_filter, limit
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Check if text search is available
|
|
62
|
+
if not self._check_text_search_available():
|
|
63
|
+
logger.debug("Text search not available, falling back to regex search")
|
|
64
|
+
return self.execute_fallback_search(
|
|
65
|
+
validated["query"],
|
|
66
|
+
validated["namespace"],
|
|
67
|
+
validated["category_filter"],
|
|
68
|
+
validated["limit"],
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Execute MongoDB text search
|
|
72
|
+
return self._execute_mongodb_text_search(
|
|
73
|
+
validated["query"],
|
|
74
|
+
validated["namespace"],
|
|
75
|
+
validated["category_filter"],
|
|
76
|
+
validated["limit"],
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
except ValidationError as e:
|
|
80
|
+
logger.error(f"Invalid search parameters: {e}")
|
|
81
|
+
return []
|
|
82
|
+
except Exception as e:
|
|
83
|
+
logger.error(f"MongoDB text search failed: {e}")
|
|
84
|
+
# Fallback to regex search on error
|
|
85
|
+
return self.execute_fallback_search(
|
|
86
|
+
query, namespace, category_filter, limit
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def _execute_mongodb_text_search(
|
|
90
|
+
self,
|
|
91
|
+
query: str,
|
|
92
|
+
namespace: str,
|
|
93
|
+
category_filter: list[str] | None,
|
|
94
|
+
limit: int,
|
|
95
|
+
) -> list[dict[str, Any]]:
|
|
96
|
+
"""Execute MongoDB $text search across collections"""
|
|
97
|
+
results = []
|
|
98
|
+
|
|
99
|
+
# Search both collections
|
|
100
|
+
collections = [
|
|
101
|
+
(self.short_term_collection, "short_term"),
|
|
102
|
+
(self.long_term_collection, "long_term"),
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
for collection, memory_type in collections:
|
|
106
|
+
try:
|
|
107
|
+
# Build search filter
|
|
108
|
+
search_filter: dict[str, Any] = {
|
|
109
|
+
"$text": {"$search": query},
|
|
110
|
+
"namespace": namespace,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if category_filter:
|
|
114
|
+
search_filter["category_primary"] = {"$in": category_filter}
|
|
115
|
+
|
|
116
|
+
# For short-term memories, exclude expired ones
|
|
117
|
+
if memory_type == "short_term":
|
|
118
|
+
search_filter["$or"] = [
|
|
119
|
+
{"expires_at": {"$exists": False}},
|
|
120
|
+
{"expires_at": None},
|
|
121
|
+
{"expires_at": {"$gt": datetime.now(timezone.utc)}},
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
# Execute search with text score
|
|
125
|
+
cursor = (
|
|
126
|
+
collection.find(search_filter, {"score": {"$meta": "textScore"}})
|
|
127
|
+
.sort([("score", {"$meta": "textScore"}), ("importance_score", -1)])
|
|
128
|
+
.limit(limit)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Process results
|
|
132
|
+
for document in cursor:
|
|
133
|
+
memory = self._convert_document_to_memory(document)
|
|
134
|
+
memory["memory_type"] = memory_type
|
|
135
|
+
memory["search_strategy"] = "mongodb_text"
|
|
136
|
+
memory["text_score"] = document.get("score", 0)
|
|
137
|
+
results.append(memory)
|
|
138
|
+
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.warning(f"Text search failed for {memory_type}: {e}")
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
# Sort by text score and importance
|
|
144
|
+
results.sort(
|
|
145
|
+
key=lambda x: (x.get("text_score", 0), x.get("importance_score", 0)),
|
|
146
|
+
reverse=True,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
return results[:limit]
|
|
150
|
+
|
|
151
|
+
def execute_vector_search(
|
|
152
|
+
self,
|
|
153
|
+
query_vector: list[float],
|
|
154
|
+
namespace: str = "default",
|
|
155
|
+
category_filter: list[str] | None = None,
|
|
156
|
+
limit: int = 10,
|
|
157
|
+
similarity_threshold: float = 0.7,
|
|
158
|
+
) -> list[dict[str, Any]]:
|
|
159
|
+
"""Execute MongoDB Atlas Vector Search"""
|
|
160
|
+
try:
|
|
161
|
+
if not self._check_vector_search_available():
|
|
162
|
+
logger.warning("Vector search not available in this MongoDB deployment")
|
|
163
|
+
return []
|
|
164
|
+
|
|
165
|
+
# Validate inputs
|
|
166
|
+
if not query_vector or not isinstance(query_vector, list):
|
|
167
|
+
raise ValueError("query_vector must be a non-empty list of floats")
|
|
168
|
+
|
|
169
|
+
# Build vector search pipeline
|
|
170
|
+
pipeline = self._build_vector_search_pipeline(
|
|
171
|
+
query_vector, namespace, category_filter, limit, similarity_threshold
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Execute vector search on long-term memory (primary collection for vectors)
|
|
175
|
+
try:
|
|
176
|
+
cursor = self.long_term_collection.aggregate(pipeline)
|
|
177
|
+
results = []
|
|
178
|
+
|
|
179
|
+
for document in cursor:
|
|
180
|
+
memory = self._convert_document_to_memory(document)
|
|
181
|
+
memory["memory_type"] = "long_term"
|
|
182
|
+
memory["search_strategy"] = "vector_search"
|
|
183
|
+
memory["vector_score"] = document.get("score", 0)
|
|
184
|
+
results.append(memory)
|
|
185
|
+
|
|
186
|
+
logger.debug(f"Vector search returned {len(results)} results")
|
|
187
|
+
return results
|
|
188
|
+
|
|
189
|
+
except OperationFailure as e:
|
|
190
|
+
if "vector search" in str(e).lower():
|
|
191
|
+
logger.error(f"Vector search not configured properly: {e}")
|
|
192
|
+
return []
|
|
193
|
+
else:
|
|
194
|
+
raise
|
|
195
|
+
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.error(f"Vector search failed: {e}")
|
|
198
|
+
return []
|
|
199
|
+
|
|
200
|
+
def _build_vector_search_pipeline(
|
|
201
|
+
self,
|
|
202
|
+
query_vector: list[float],
|
|
203
|
+
namespace: str,
|
|
204
|
+
category_filter: list[str] | None,
|
|
205
|
+
limit: int,
|
|
206
|
+
similarity_threshold: float,
|
|
207
|
+
) -> list[dict[str, Any]]:
|
|
208
|
+
"""Build MongoDB aggregation pipeline for vector search"""
|
|
209
|
+
pipeline = [
|
|
210
|
+
# Vector search stage (Atlas only)
|
|
211
|
+
{
|
|
212
|
+
"$vectorSearch": {
|
|
213
|
+
"index": "vector_search_index", # Must be created in Atlas
|
|
214
|
+
"path": "embedding_vector",
|
|
215
|
+
"queryVector": query_vector,
|
|
216
|
+
"numCandidates": min(limit * 10, 1000), # Search more candidates
|
|
217
|
+
"limit": limit * 2, # Get more results to filter
|
|
218
|
+
}
|
|
219
|
+
},
|
|
220
|
+
# Add similarity score
|
|
221
|
+
{"$addFields": {"score": {"$meta": "vectorSearchScore"}}},
|
|
222
|
+
# Filter by similarity threshold
|
|
223
|
+
{
|
|
224
|
+
"$match": {
|
|
225
|
+
"score": {"$gte": similarity_threshold},
|
|
226
|
+
"namespace": namespace,
|
|
227
|
+
}
|
|
228
|
+
},
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
# Add category filter if specified
|
|
232
|
+
if category_filter:
|
|
233
|
+
pipeline.append({"$match": {"category_primary": {"$in": category_filter}}})
|
|
234
|
+
|
|
235
|
+
# Final projection and limit
|
|
236
|
+
pipeline.extend(
|
|
237
|
+
[
|
|
238
|
+
{
|
|
239
|
+
"$project": {
|
|
240
|
+
"_id": 1,
|
|
241
|
+
"memory_id": 1,
|
|
242
|
+
"searchable_content": 1,
|
|
243
|
+
"summary": 1,
|
|
244
|
+
"importance_score": 1,
|
|
245
|
+
"category_primary": 1,
|
|
246
|
+
"namespace": 1,
|
|
247
|
+
"classification": 1,
|
|
248
|
+
"topic": 1,
|
|
249
|
+
"created_at": 1,
|
|
250
|
+
"confidence_score": 1,
|
|
251
|
+
"score": 1,
|
|
252
|
+
}
|
|
253
|
+
},
|
|
254
|
+
{"$limit": limit},
|
|
255
|
+
]
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
return pipeline
|
|
259
|
+
|
|
260
|
+
def execute_hybrid_search(
|
|
261
|
+
self,
|
|
262
|
+
query: str,
|
|
263
|
+
query_vector: list[float] | None = None,
|
|
264
|
+
namespace: str = "default",
|
|
265
|
+
category_filter: list[str] | None = None,
|
|
266
|
+
limit: int = 10,
|
|
267
|
+
text_weight: float = 0.5,
|
|
268
|
+
vector_weight: float = 0.5,
|
|
269
|
+
) -> list[dict[str, Any]]:
|
|
270
|
+
"""Execute hybrid search combining text and vector search"""
|
|
271
|
+
try:
|
|
272
|
+
text_results = []
|
|
273
|
+
vector_results = []
|
|
274
|
+
|
|
275
|
+
# Execute text search
|
|
276
|
+
if query:
|
|
277
|
+
text_results = self.execute_fulltext_search(
|
|
278
|
+
query, namespace, category_filter, limit * 2
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Execute vector search if available and vector provided
|
|
282
|
+
if query_vector and self._check_vector_search_available():
|
|
283
|
+
vector_results = self.execute_vector_search(
|
|
284
|
+
query_vector, namespace, category_filter, limit * 2
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Combine and score results
|
|
288
|
+
return self._combine_search_results(
|
|
289
|
+
text_results, vector_results, text_weight, vector_weight, limit
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logger.error(f"Hybrid search failed: {e}")
|
|
294
|
+
# Fallback to text search only
|
|
295
|
+
return self.execute_fulltext_search(
|
|
296
|
+
query, namespace, category_filter, limit
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def _combine_search_results(
|
|
300
|
+
self,
|
|
301
|
+
text_results: list[dict[str, Any]],
|
|
302
|
+
vector_results: list[dict[str, Any]],
|
|
303
|
+
text_weight: float,
|
|
304
|
+
vector_weight: float,
|
|
305
|
+
limit: int,
|
|
306
|
+
) -> list[dict[str, Any]]:
|
|
307
|
+
"""Combine text and vector search results with weighted scoring"""
|
|
308
|
+
# Create lookup for faster deduplication
|
|
309
|
+
seen_memories = {}
|
|
310
|
+
combined_results = []
|
|
311
|
+
|
|
312
|
+
# Process text results
|
|
313
|
+
for result in text_results:
|
|
314
|
+
memory_id = result.get("memory_id")
|
|
315
|
+
if memory_id:
|
|
316
|
+
text_score = result.get("text_score", 0)
|
|
317
|
+
importance_score = result.get("importance_score", 0)
|
|
318
|
+
|
|
319
|
+
combined_score = (text_score * text_weight) + (importance_score * 0.1)
|
|
320
|
+
|
|
321
|
+
result["combined_score"] = combined_score
|
|
322
|
+
result["has_text_match"] = True
|
|
323
|
+
result["has_vector_match"] = False
|
|
324
|
+
|
|
325
|
+
seen_memories[memory_id] = result
|
|
326
|
+
combined_results.append(result)
|
|
327
|
+
|
|
328
|
+
# Process vector results
|
|
329
|
+
for result in vector_results:
|
|
330
|
+
memory_id = result.get("memory_id")
|
|
331
|
+
if memory_id:
|
|
332
|
+
vector_score = result.get("vector_score", 0)
|
|
333
|
+
importance_score = result.get("importance_score", 0)
|
|
334
|
+
|
|
335
|
+
if memory_id in seen_memories:
|
|
336
|
+
# Update existing result with vector score
|
|
337
|
+
existing = seen_memories[memory_id]
|
|
338
|
+
existing_combined = existing.get("combined_score", 0)
|
|
339
|
+
vector_combined = (vector_score * vector_weight) + (
|
|
340
|
+
importance_score * 0.1
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Combine scores
|
|
344
|
+
existing["combined_score"] = existing_combined + vector_combined
|
|
345
|
+
existing["has_vector_match"] = True
|
|
346
|
+
existing["vector_score"] = vector_score
|
|
347
|
+
existing["search_strategy"] = "hybrid"
|
|
348
|
+
else:
|
|
349
|
+
# New result from vector search
|
|
350
|
+
combined_score = (vector_score * vector_weight) + (
|
|
351
|
+
importance_score * 0.1
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
result["combined_score"] = combined_score
|
|
355
|
+
result["has_text_match"] = False
|
|
356
|
+
result["has_vector_match"] = True
|
|
357
|
+
|
|
358
|
+
seen_memories[memory_id] = result
|
|
359
|
+
combined_results.append(result)
|
|
360
|
+
|
|
361
|
+
# Sort by combined score
|
|
362
|
+
combined_results.sort(key=lambda x: x.get("combined_score", 0), reverse=True)
|
|
363
|
+
|
|
364
|
+
logger.debug(
|
|
365
|
+
f"Hybrid search combined {len(text_results)} text + {len(vector_results)} vector results"
|
|
366
|
+
)
|
|
367
|
+
return combined_results[:limit]
|
|
368
|
+
|
|
369
|
+
def create_search_indexes(self) -> list[str]:
|
|
370
|
+
"""Create MongoDB-specific search indexes"""
|
|
371
|
+
indexes_created = []
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
# Create text indexes
|
|
375
|
+
collections = [
|
|
376
|
+
(self.short_term_collection, "short_term_memory"),
|
|
377
|
+
(self.long_term_collection, "long_term_memory"),
|
|
378
|
+
]
|
|
379
|
+
|
|
380
|
+
for collection, collection_name in collections:
|
|
381
|
+
try:
|
|
382
|
+
# Create text index for full-text search
|
|
383
|
+
collection.create_index(
|
|
384
|
+
[("searchable_content", "text"), ("summary", "text")],
|
|
385
|
+
name=f"{collection_name}_text_search",
|
|
386
|
+
background=True,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
indexes_created.append(f"{collection_name}_text_search")
|
|
390
|
+
logger.info(f"Created text index for {collection_name}")
|
|
391
|
+
|
|
392
|
+
except Exception as e:
|
|
393
|
+
logger.warning(
|
|
394
|
+
f"Failed to create text index for {collection_name}: {e}"
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Note about vector indexes
|
|
398
|
+
if self.mongodb_connector.supports_vector_search():
|
|
399
|
+
logger.info(
|
|
400
|
+
"Vector search is supported. Create vector indexes via MongoDB Atlas UI or Admin API."
|
|
401
|
+
)
|
|
402
|
+
indexes_created.append("vector_search_index (manual creation required)")
|
|
403
|
+
else:
|
|
404
|
+
logger.info("Vector search not supported in this deployment")
|
|
405
|
+
|
|
406
|
+
return indexes_created
|
|
407
|
+
|
|
408
|
+
except Exception as e:
|
|
409
|
+
logger.error(f"Failed to create search indexes: {e}")
|
|
410
|
+
return indexes_created
|
|
411
|
+
|
|
412
|
+
def translate_search_query(self, query: str) -> str:
|
|
413
|
+
"""Translate search query to MongoDB text search syntax"""
|
|
414
|
+
if not query or not query.strip():
|
|
415
|
+
return '""' # Empty query
|
|
416
|
+
|
|
417
|
+
# MongoDB text search supports:
|
|
418
|
+
# - Phrase search: "exact phrase"
|
|
419
|
+
# - Term search: term1 term2
|
|
420
|
+
# - Negation: -unwanted
|
|
421
|
+
# - OR operations: term1 OR term2
|
|
422
|
+
|
|
423
|
+
# For safety, we'll do minimal processing
|
|
424
|
+
sanitized = query.strip()
|
|
425
|
+
|
|
426
|
+
# If query contains special characters, wrap in quotes for phrase search
|
|
427
|
+
if any(char in sanitized for char in ['"', "(", ")", "-", "|"]):
|
|
428
|
+
# Remove existing quotes and wrap the whole thing
|
|
429
|
+
sanitized = sanitized.replace('"', "")
|
|
430
|
+
return f'"{sanitized}"'
|
|
431
|
+
|
|
432
|
+
return sanitized
|
|
433
|
+
|
|
434
|
+
def execute_fallback_search(
|
|
435
|
+
self,
|
|
436
|
+
query: str,
|
|
437
|
+
namespace: str = "default",
|
|
438
|
+
category_filter: list[str] | None = None,
|
|
439
|
+
limit: int = 10,
|
|
440
|
+
) -> list[dict[str, Any]]:
|
|
441
|
+
"""Execute regex-based fallback search for MongoDB"""
|
|
442
|
+
try:
|
|
443
|
+
results = []
|
|
444
|
+
|
|
445
|
+
# Create case-insensitive regex pattern
|
|
446
|
+
regex_pattern = {"$regex": query, "$options": "i"}
|
|
447
|
+
|
|
448
|
+
collections = [
|
|
449
|
+
(self.short_term_collection, "short_term"),
|
|
450
|
+
(self.long_term_collection, "long_term"),
|
|
451
|
+
]
|
|
452
|
+
|
|
453
|
+
for collection, memory_type in collections:
|
|
454
|
+
try:
|
|
455
|
+
# Build search filter using regex
|
|
456
|
+
search_filter = {
|
|
457
|
+
"$or": [
|
|
458
|
+
{"searchable_content": regex_pattern},
|
|
459
|
+
{"summary": regex_pattern},
|
|
460
|
+
],
|
|
461
|
+
"namespace": namespace,
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
if category_filter:
|
|
465
|
+
search_filter["category_primary"] = {"$in": category_filter}
|
|
466
|
+
|
|
467
|
+
# For short-term memories, exclude expired ones
|
|
468
|
+
if memory_type == "short_term":
|
|
469
|
+
search_filter["$and"] = [
|
|
470
|
+
{"$or": search_filter["$or"]},
|
|
471
|
+
{"namespace": namespace},
|
|
472
|
+
{
|
|
473
|
+
"$or": [
|
|
474
|
+
{"expires_at": {"$exists": False}},
|
|
475
|
+
{"expires_at": None},
|
|
476
|
+
{"expires_at": {"$gt": datetime.now(timezone.utc)}},
|
|
477
|
+
]
|
|
478
|
+
},
|
|
479
|
+
]
|
|
480
|
+
# Remove the top-level filters since they're now in $and
|
|
481
|
+
del search_filter["$or"]
|
|
482
|
+
del search_filter["namespace"]
|
|
483
|
+
|
|
484
|
+
if category_filter:
|
|
485
|
+
search_filter["$and"].append(
|
|
486
|
+
{"category_primary": {"$in": category_filter}}
|
|
487
|
+
)
|
|
488
|
+
del search_filter["category_primary"]
|
|
489
|
+
|
|
490
|
+
# Execute regex search
|
|
491
|
+
cursor = (
|
|
492
|
+
collection.find(search_filter)
|
|
493
|
+
.sort([("importance_score", -1), ("created_at", -1)])
|
|
494
|
+
.limit(limit)
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
for document in cursor:
|
|
498
|
+
memory = self._convert_document_to_memory(document)
|
|
499
|
+
memory["memory_type"] = memory_type
|
|
500
|
+
memory["search_strategy"] = "regex_fallback"
|
|
501
|
+
results.append(memory)
|
|
502
|
+
|
|
503
|
+
except Exception as e:
|
|
504
|
+
logger.warning(f"Regex search failed for {memory_type}: {e}")
|
|
505
|
+
continue
|
|
506
|
+
|
|
507
|
+
# Sort by importance score
|
|
508
|
+
results.sort(key=lambda x: x.get("importance_score", 0), reverse=True)
|
|
509
|
+
|
|
510
|
+
logger.debug(f"Regex fallback search returned {len(results)} results")
|
|
511
|
+
return results[:limit]
|
|
512
|
+
|
|
513
|
+
except Exception as e:
|
|
514
|
+
logger.error(f"Fallback search failed: {e}")
|
|
515
|
+
return []
|
|
516
|
+
|
|
517
|
+
def _convert_document_to_memory(self, document: dict[str, Any]) -> dict[str, Any]:
|
|
518
|
+
"""Convert MongoDB document to memory format"""
|
|
519
|
+
if not document:
|
|
520
|
+
return {}
|
|
521
|
+
|
|
522
|
+
memory = document.copy()
|
|
523
|
+
|
|
524
|
+
# Convert ObjectId to string
|
|
525
|
+
if "_id" in memory:
|
|
526
|
+
memory["_id"] = str(memory["_id"])
|
|
527
|
+
|
|
528
|
+
# Convert datetime objects to ISO strings for JSON compatibility
|
|
529
|
+
datetime_fields = [
|
|
530
|
+
"created_at",
|
|
531
|
+
"expires_at",
|
|
532
|
+
"last_accessed",
|
|
533
|
+
"extraction_timestamp",
|
|
534
|
+
]
|
|
535
|
+
for field in datetime_fields:
|
|
536
|
+
if field in memory and isinstance(memory[field], datetime):
|
|
537
|
+
memory[field] = memory[field].isoformat()
|
|
538
|
+
|
|
539
|
+
return memory
|
|
540
|
+
|
|
541
|
+
def _check_text_search_available(self) -> bool:
|
|
542
|
+
"""Check if MongoDB text search is available"""
|
|
543
|
+
if self._text_search_available is not None:
|
|
544
|
+
return self._text_search_available
|
|
545
|
+
|
|
546
|
+
try:
|
|
547
|
+
# Try to get text indexes
|
|
548
|
+
indexes = list(self.short_term_collection.list_indexes())
|
|
549
|
+
self._text_search_available = any(
|
|
550
|
+
"text" in str(index.get("key", {})) for index in indexes
|
|
551
|
+
)
|
|
552
|
+
except Exception:
|
|
553
|
+
self._text_search_available = False
|
|
554
|
+
|
|
555
|
+
return self._text_search_available
|
|
556
|
+
|
|
557
|
+
def _check_vector_search_available(self) -> bool:
|
|
558
|
+
"""Check if MongoDB Atlas Vector Search is available"""
|
|
559
|
+
if self._vector_search_available is not None:
|
|
560
|
+
return self._vector_search_available
|
|
561
|
+
|
|
562
|
+
try:
|
|
563
|
+
# This is a comprehensive check for vector search availability
|
|
564
|
+
self._vector_search_available = (
|
|
565
|
+
self.mongodb_connector.supports_vector_search()
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
# Additional check: try to see if we have vector search indexes
|
|
569
|
+
if self._vector_search_available:
|
|
570
|
+
try:
|
|
571
|
+
# Try a simple vector search to see if indexes exist
|
|
572
|
+
# This is a minimal test query
|
|
573
|
+
test_pipeline = [
|
|
574
|
+
{
|
|
575
|
+
"$vectorSearch": {
|
|
576
|
+
"index": "vector_search_index",
|
|
577
|
+
"path": "embedding_vector",
|
|
578
|
+
"queryVector": [0.0] * 1536, # Dummy vector
|
|
579
|
+
"numCandidates": 1,
|
|
580
|
+
"limit": 1,
|
|
581
|
+
}
|
|
582
|
+
},
|
|
583
|
+
{"$limit": 0}, # Don't return any results
|
|
584
|
+
]
|
|
585
|
+
|
|
586
|
+
# If this doesn't throw an error, vector search is properly configured
|
|
587
|
+
list(self.long_term_collection.aggregate(test_pipeline))
|
|
588
|
+
logger.debug("Vector search is available and configured")
|
|
589
|
+
|
|
590
|
+
except OperationFailure as e:
|
|
591
|
+
if "vector search" in str(e).lower() or "index" in str(e).lower():
|
|
592
|
+
logger.warning(
|
|
593
|
+
"Vector search is supported but not configured (missing indexes)"
|
|
594
|
+
)
|
|
595
|
+
self._vector_search_available = False
|
|
596
|
+
else:
|
|
597
|
+
# Other errors might still allow vector search
|
|
598
|
+
pass
|
|
599
|
+
|
|
600
|
+
except Exception:
|
|
601
|
+
self._vector_search_available = False
|
|
602
|
+
|
|
603
|
+
return self._vector_search_available
|
|
604
|
+
|
|
605
|
+
def optimize_search_performance(self):
|
|
606
|
+
"""Optimize MongoDB search performance"""
|
|
607
|
+
try:
|
|
608
|
+
# Update collection statistics for better query planning
|
|
609
|
+
collections = [self.short_term_collection, self.long_term_collection]
|
|
610
|
+
|
|
611
|
+
for collection in collections:
|
|
612
|
+
try:
|
|
613
|
+
# MongoDB doesn't have ANALYZE like SQL, but we can:
|
|
614
|
+
# 1. Ensure indexes are being used effectively
|
|
615
|
+
# 2. Check for slow operations
|
|
616
|
+
|
|
617
|
+
# Get collection stats
|
|
618
|
+
stats = self.database.command("collStats", collection.name)
|
|
619
|
+
logger.debug(
|
|
620
|
+
f"Collection {collection.name} stats: {stats.get('count', 0)} documents"
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
# List indexes to ensure they exist
|
|
624
|
+
indexes = list(collection.list_indexes())
|
|
625
|
+
logger.debug(
|
|
626
|
+
f"Collection {collection.name} has {len(indexes)} indexes"
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
except Exception as e:
|
|
630
|
+
logger.warning(f"Failed to get stats for {collection.name}: {e}")
|
|
631
|
+
|
|
632
|
+
logger.info("MongoDB search optimization completed")
|
|
633
|
+
|
|
634
|
+
except Exception as e:
|
|
635
|
+
logger.warning(f"MongoDB search optimization failed: {e}")
|
|
636
|
+
|
|
637
|
+
def get_search_capabilities(self) -> dict[str, Any]:
|
|
638
|
+
"""Get MongoDB search capabilities"""
|
|
639
|
+
return {
|
|
640
|
+
"text_search": self._check_text_search_available(),
|
|
641
|
+
"vector_search": self._check_vector_search_available(),
|
|
642
|
+
"regex_search": True, # Always available in MongoDB
|
|
643
|
+
"faceted_search": True, # MongoDB aggregation supports faceting
|
|
644
|
+
"geospatial_search": True, # MongoDB has good geospatial support
|
|
645
|
+
"full_text_operators": [
|
|
646
|
+
"$text", # Text search
|
|
647
|
+
"$regex", # Pattern matching
|
|
648
|
+
"$search", # Atlas Search (if available)
|
|
649
|
+
],
|
|
650
|
+
"supported_similarity_metrics": ["cosine", "euclidean", "dotProduct"],
|
|
651
|
+
"max_vector_dimensions": 2048, # Atlas limit
|
|
652
|
+
"hybrid_search": True,
|
|
653
|
+
}
|