memorisdk 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of memorisdk might be problematic. Click here for more details.
- memori/__init__.py +3 -3
- memori/agents/conscious_agent.py +289 -77
- memori/agents/memory_agent.py +19 -9
- memori/agents/retrieval_agent.py +138 -63
- memori/config/manager.py +7 -7
- memori/config/memory_manager.py +25 -25
- memori/config/settings.py +13 -6
- memori/core/conversation.py +15 -15
- memori/core/database.py +14 -13
- memori/core/memory.py +438 -123
- memori/core/providers.py +25 -25
- memori/database/__init__.py +11 -0
- memori/database/adapters/__init__.py +11 -0
- memori/database/adapters/mongodb_adapter.py +739 -0
- memori/database/adapters/mysql_adapter.py +8 -8
- memori/database/adapters/postgresql_adapter.py +6 -6
- memori/database/adapters/sqlite_adapter.py +6 -6
- memori/database/auto_creator.py +8 -9
- memori/database/connection_utils.py +5 -5
- memori/database/connectors/__init__.py +11 -0
- memori/database/connectors/base_connector.py +18 -19
- memori/database/connectors/mongodb_connector.py +527 -0
- memori/database/connectors/mysql_connector.py +13 -15
- memori/database/connectors/postgres_connector.py +12 -12
- memori/database/connectors/sqlite_connector.py +11 -11
- memori/database/models.py +2 -2
- memori/database/mongodb_manager.py +1402 -0
- memori/database/queries/base_queries.py +3 -4
- memori/database/queries/chat_queries.py +3 -5
- memori/database/queries/entity_queries.py +3 -5
- memori/database/queries/memory_queries.py +3 -5
- memori/database/query_translator.py +11 -11
- memori/database/schema_generators/__init__.py +11 -0
- memori/database/schema_generators/mongodb_schema_generator.py +666 -0
- memori/database/schema_generators/mysql_schema_generator.py +2 -4
- memori/database/search/__init__.py +11 -0
- memori/database/search/mongodb_search_adapter.py +653 -0
- memori/database/search/mysql_search_adapter.py +8 -8
- memori/database/search/sqlite_search_adapter.py +6 -6
- memori/database/search_service.py +218 -66
- memori/database/sqlalchemy_manager.py +72 -25
- memori/integrations/__init__.py +1 -1
- memori/integrations/anthropic_integration.py +1 -3
- memori/integrations/litellm_integration.py +23 -6
- memori/integrations/openai_integration.py +31 -3
- memori/tools/memory_tool.py +104 -13
- memori/utils/exceptions.py +58 -58
- memori/utils/helpers.py +11 -12
- memori/utils/input_validator.py +10 -12
- memori/utils/logging.py +4 -4
- memori/utils/pydantic_models.py +57 -57
- memori/utils/query_builder.py +20 -20
- memori/utils/security_audit.py +28 -28
- memori/utils/security_integration.py +9 -9
- memori/utils/transaction_manager.py +20 -19
- memori/utils/validators.py +6 -6
- {memorisdk-2.0.0.dist-info → memorisdk-2.1.0.dist-info}/METADATA +36 -20
- memorisdk-2.1.0.dist-info/RECORD +71 -0
- memori/scripts/llm_text.py +0 -50
- memorisdk-2.0.0.dist-info/RECORD +0 -67
- {memorisdk-2.0.0.dist-info → memorisdk-2.1.0.dist-info}/WHEEL +0 -0
- {memorisdk-2.0.0.dist-info → memorisdk-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {memorisdk-2.0.0.dist-info → memorisdk-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,666 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MongoDB schema generator for Memori
|
|
3
|
+
Defines collections, validation rules, and indexes for MongoDB
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from ..connectors.base_connector import BaseSchemaGenerator, DatabaseType
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MongoDBSchemaGenerator(BaseSchemaGenerator):
|
|
12
|
+
"""MongoDB-specific schema generator"""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
super().__init__(DatabaseType.MONGODB)
|
|
16
|
+
|
|
17
|
+
def generate_core_schema(self) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Generate MongoDB schema documentation
|
|
20
|
+
Note: MongoDB is schemaless, but we provide documentation for expected structure
|
|
21
|
+
"""
|
|
22
|
+
return """
|
|
23
|
+
# MongoDB Collections Schema for Memori
|
|
24
|
+
|
|
25
|
+
## Collection: chat_history
|
|
26
|
+
Purpose: Store chat interactions between users and AI
|
|
27
|
+
Expected Document Structure:
|
|
28
|
+
{
|
|
29
|
+
"_id": ObjectId,
|
|
30
|
+
"chat_id": "string (unique)",
|
|
31
|
+
"user_input": "string",
|
|
32
|
+
"ai_output": "string",
|
|
33
|
+
"model": "string",
|
|
34
|
+
"timestamp": ISODate,
|
|
35
|
+
"session_id": "string",
|
|
36
|
+
"namespace": "string (default: 'default')",
|
|
37
|
+
"tokens_used": "number",
|
|
38
|
+
"metadata": "object (optional)"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
## Collection: short_term_memory
|
|
42
|
+
Purpose: Store temporary memories with expiration
|
|
43
|
+
Expected Document Structure:
|
|
44
|
+
{
|
|
45
|
+
"_id": ObjectId,
|
|
46
|
+
"memory_id": "string (unique)",
|
|
47
|
+
"chat_id": "string (optional, reference to chat_history)",
|
|
48
|
+
"processed_data": "object",
|
|
49
|
+
"importance_score": "number (0.0-1.0)",
|
|
50
|
+
"category_primary": "string",
|
|
51
|
+
"retention_type": "string (default: 'short_term')",
|
|
52
|
+
"namespace": "string (default: 'default')",
|
|
53
|
+
"created_at": ISODate,
|
|
54
|
+
"expires_at": "ISODate (optional)",
|
|
55
|
+
"access_count": "number (default: 0)",
|
|
56
|
+
"last_accessed": "ISODate (optional)",
|
|
57
|
+
"searchable_content": "string",
|
|
58
|
+
"summary": "string",
|
|
59
|
+
"is_permanent_context": "boolean (default: false)"
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
## Collection: long_term_memory
|
|
63
|
+
Purpose: Store persistent memories with enhanced metadata
|
|
64
|
+
Expected Document Structure:
|
|
65
|
+
{
|
|
66
|
+
"_id": ObjectId,
|
|
67
|
+
"memory_id": "string (unique)",
|
|
68
|
+
"original_chat_id": "string (optional)",
|
|
69
|
+
"processed_data": "object",
|
|
70
|
+
"importance_score": "number (0.0-1.0)",
|
|
71
|
+
"category_primary": "string",
|
|
72
|
+
"retention_type": "string (default: 'long_term')",
|
|
73
|
+
"namespace": "string (default: 'default')",
|
|
74
|
+
"created_at": ISODate,
|
|
75
|
+
"access_count": "number (default: 0)",
|
|
76
|
+
"last_accessed": "ISODate (optional)",
|
|
77
|
+
"searchable_content": "string",
|
|
78
|
+
"summary": "string",
|
|
79
|
+
"novelty_score": "number (0.0-1.0, default: 0.5)",
|
|
80
|
+
"relevance_score": "number (0.0-1.0, default: 0.5)",
|
|
81
|
+
"actionability_score": "number (0.0-1.0, default: 0.5)",
|
|
82
|
+
|
|
83
|
+
// Enhanced Classification Fields
|
|
84
|
+
"classification": "string (default: 'conversational')",
|
|
85
|
+
"memory_importance": "string (default: 'medium')",
|
|
86
|
+
"topic": "string (optional)",
|
|
87
|
+
"entities_json": "array (default: [])",
|
|
88
|
+
"keywords_json": "array (default: [])",
|
|
89
|
+
|
|
90
|
+
// Conscious Context Flags
|
|
91
|
+
"is_user_context": "boolean (default: false)",
|
|
92
|
+
"is_preference": "boolean (default: false)",
|
|
93
|
+
"is_skill_knowledge": "boolean (default: false)",
|
|
94
|
+
"is_current_project": "boolean (default: false)",
|
|
95
|
+
"promotion_eligible": "boolean (default: false)",
|
|
96
|
+
|
|
97
|
+
// Memory Management
|
|
98
|
+
"duplicate_of": "string (optional)",
|
|
99
|
+
"supersedes_json": "array (default: [])",
|
|
100
|
+
"related_memories_json": "array (default: [])",
|
|
101
|
+
|
|
102
|
+
// Technical Metadata
|
|
103
|
+
"confidence_score": "number (0.0-1.0, default: 0.8)",
|
|
104
|
+
"extraction_timestamp": ISODate,
|
|
105
|
+
"classification_reason": "string (optional)",
|
|
106
|
+
|
|
107
|
+
// Processing Status
|
|
108
|
+
"processed_for_duplicates": "boolean (default: false)",
|
|
109
|
+
"conscious_processed": "boolean (default: false)",
|
|
110
|
+
|
|
111
|
+
// Vector Search Support (MongoDB Atlas)
|
|
112
|
+
"embedding_vector": "array<number> (optional, for vector search)"
|
|
113
|
+
}
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def generate_indexes(self) -> str:
|
|
117
|
+
"""Generate MongoDB index creation documentation"""
|
|
118
|
+
return """
|
|
119
|
+
# MongoDB Indexes for Memori Collections
|
|
120
|
+
|
|
121
|
+
## Indexes for chat_history collection:
|
|
122
|
+
- chat_id (unique)
|
|
123
|
+
- namespace + session_id (compound)
|
|
124
|
+
- timestamp (descending)
|
|
125
|
+
- model
|
|
126
|
+
|
|
127
|
+
## Indexes for short_term_memory collection:
|
|
128
|
+
- memory_id (unique)
|
|
129
|
+
- namespace + category_primary + importance_score (compound, descending on score)
|
|
130
|
+
- expires_at
|
|
131
|
+
- created_at (descending)
|
|
132
|
+
- text index on searchable_content + summary
|
|
133
|
+
|
|
134
|
+
## Indexes for long_term_memory collection:
|
|
135
|
+
- memory_id (unique)
|
|
136
|
+
- namespace + category_primary + importance_score (compound, descending on score)
|
|
137
|
+
- classification
|
|
138
|
+
- topic
|
|
139
|
+
- created_at (descending)
|
|
140
|
+
- text index on searchable_content + summary
|
|
141
|
+
- is_user_context + is_preference + is_skill_knowledge + promotion_eligible (compound)
|
|
142
|
+
- conscious_processed
|
|
143
|
+
- processed_for_duplicates
|
|
144
|
+
- confidence_score
|
|
145
|
+
|
|
146
|
+
## Vector Search Index (Atlas only):
|
|
147
|
+
- embedding_vector (vector search index for similarity search)
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def generate_search_setup(self) -> str:
|
|
151
|
+
"""Generate MongoDB search setup documentation"""
|
|
152
|
+
return """
|
|
153
|
+
# MongoDB Search Configuration
|
|
154
|
+
|
|
155
|
+
## Text Search Indexes:
|
|
156
|
+
MongoDB text indexes are automatically created for:
|
|
157
|
+
- short_term_memory: searchable_content, summary
|
|
158
|
+
- long_term_memory: searchable_content, summary
|
|
159
|
+
|
|
160
|
+
## Vector Search (MongoDB Atlas only):
|
|
161
|
+
For vector similarity search, create a vector search index on the 'embedding_vector' field:
|
|
162
|
+
- Field: embedding_vector
|
|
163
|
+
- Type: vector
|
|
164
|
+
- Dimensions: 1536 (or your embedding dimension)
|
|
165
|
+
- Similarity: cosine (or euclidean/dotProduct)
|
|
166
|
+
|
|
167
|
+
Vector search indexes must be created through MongoDB Atlas UI or Atlas Admin API.
|
|
168
|
+
|
|
169
|
+
## Search Strategies:
|
|
170
|
+
1. Text Search: Use MongoDB $text operator for full-text search
|
|
171
|
+
2. Regex Search: Fallback using $regex for pattern matching
|
|
172
|
+
3. Vector Search: Use Atlas Vector Search for semantic similarity (if available)
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def get_data_type_mappings(self) -> dict[str, str]:
|
|
176
|
+
"""Get MongoDB data type mappings"""
|
|
177
|
+
return {
|
|
178
|
+
"string": "string",
|
|
179
|
+
"number": "number",
|
|
180
|
+
"boolean": "boolean",
|
|
181
|
+
"date": "date",
|
|
182
|
+
"object": "object",
|
|
183
|
+
"array": "array",
|
|
184
|
+
"objectId": "objectId",
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
def generate_collections_schema(self) -> dict[str, dict[str, Any]]:
|
|
188
|
+
"""Generate MongoDB collections with validation schemas"""
|
|
189
|
+
return {
|
|
190
|
+
"chat_history": {
|
|
191
|
+
"validator": {
|
|
192
|
+
"$jsonSchema": {
|
|
193
|
+
"bsonType": "object",
|
|
194
|
+
"required": [
|
|
195
|
+
"chat_id",
|
|
196
|
+
"user_input",
|
|
197
|
+
"ai_output",
|
|
198
|
+
"model",
|
|
199
|
+
"timestamp",
|
|
200
|
+
"session_id",
|
|
201
|
+
"namespace",
|
|
202
|
+
],
|
|
203
|
+
"properties": {
|
|
204
|
+
"chat_id": {
|
|
205
|
+
"bsonType": "string",
|
|
206
|
+
"description": "Unique chat interaction identifier",
|
|
207
|
+
},
|
|
208
|
+
"user_input": {
|
|
209
|
+
"bsonType": "string",
|
|
210
|
+
"description": "User's input message",
|
|
211
|
+
},
|
|
212
|
+
"ai_output": {
|
|
213
|
+
"bsonType": "string",
|
|
214
|
+
"description": "AI's response message",
|
|
215
|
+
},
|
|
216
|
+
"model": {
|
|
217
|
+
"bsonType": "string",
|
|
218
|
+
"description": "AI model used for response",
|
|
219
|
+
},
|
|
220
|
+
"timestamp": {
|
|
221
|
+
"bsonType": "date",
|
|
222
|
+
"description": "Interaction timestamp",
|
|
223
|
+
},
|
|
224
|
+
"session_id": {
|
|
225
|
+
"bsonType": "string",
|
|
226
|
+
"description": "Session identifier",
|
|
227
|
+
},
|
|
228
|
+
"namespace": {
|
|
229
|
+
"bsonType": "string",
|
|
230
|
+
"description": "Memory namespace",
|
|
231
|
+
},
|
|
232
|
+
"tokens_used": {
|
|
233
|
+
"bsonType": "int",
|
|
234
|
+
"minimum": 0,
|
|
235
|
+
"description": "Number of tokens used",
|
|
236
|
+
},
|
|
237
|
+
"metadata": {
|
|
238
|
+
"bsonType": "object",
|
|
239
|
+
"description": "Additional metadata",
|
|
240
|
+
},
|
|
241
|
+
},
|
|
242
|
+
}
|
|
243
|
+
},
|
|
244
|
+
"validationAction": "warn", # Use "error" for strict validation
|
|
245
|
+
"validationLevel": "moderate",
|
|
246
|
+
},
|
|
247
|
+
"short_term_memory": {
|
|
248
|
+
"validator": {
|
|
249
|
+
"$jsonSchema": {
|
|
250
|
+
"bsonType": "object",
|
|
251
|
+
"required": [
|
|
252
|
+
"memory_id",
|
|
253
|
+
"processed_data",
|
|
254
|
+
"importance_score",
|
|
255
|
+
"category_primary",
|
|
256
|
+
"namespace",
|
|
257
|
+
"searchable_content",
|
|
258
|
+
"summary",
|
|
259
|
+
],
|
|
260
|
+
"properties": {
|
|
261
|
+
"memory_id": {
|
|
262
|
+
"bsonType": "string",
|
|
263
|
+
"description": "Unique memory identifier",
|
|
264
|
+
},
|
|
265
|
+
"chat_id": {
|
|
266
|
+
"bsonType": "string",
|
|
267
|
+
"description": "Reference to chat interaction",
|
|
268
|
+
},
|
|
269
|
+
"processed_data": {
|
|
270
|
+
"bsonType": "object",
|
|
271
|
+
"description": "Processed memory data",
|
|
272
|
+
},
|
|
273
|
+
"importance_score": {
|
|
274
|
+
"bsonType": "double",
|
|
275
|
+
"minimum": 0.0,
|
|
276
|
+
"maximum": 1.0,
|
|
277
|
+
"description": "Memory importance score",
|
|
278
|
+
},
|
|
279
|
+
"category_primary": {
|
|
280
|
+
"bsonType": "string",
|
|
281
|
+
"description": "Primary memory category",
|
|
282
|
+
},
|
|
283
|
+
"retention_type": {
|
|
284
|
+
"bsonType": "string",
|
|
285
|
+
"description": "Memory retention type",
|
|
286
|
+
},
|
|
287
|
+
"namespace": {
|
|
288
|
+
"bsonType": "string",
|
|
289
|
+
"description": "Memory namespace",
|
|
290
|
+
},
|
|
291
|
+
"created_at": {
|
|
292
|
+
"bsonType": "date",
|
|
293
|
+
"description": "Memory creation timestamp",
|
|
294
|
+
},
|
|
295
|
+
"expires_at": {
|
|
296
|
+
"bsonType": ["date", "null"],
|
|
297
|
+
"description": "Memory expiration timestamp",
|
|
298
|
+
},
|
|
299
|
+
"access_count": {
|
|
300
|
+
"bsonType": "int",
|
|
301
|
+
"minimum": 0,
|
|
302
|
+
"description": "Memory access count",
|
|
303
|
+
},
|
|
304
|
+
"last_accessed": {
|
|
305
|
+
"bsonType": ["date", "null"],
|
|
306
|
+
"description": "Last access timestamp",
|
|
307
|
+
},
|
|
308
|
+
"searchable_content": {
|
|
309
|
+
"bsonType": "string",
|
|
310
|
+
"description": "Searchable text content",
|
|
311
|
+
},
|
|
312
|
+
"summary": {
|
|
313
|
+
"bsonType": "string",
|
|
314
|
+
"description": "Memory summary",
|
|
315
|
+
},
|
|
316
|
+
"is_permanent_context": {
|
|
317
|
+
"bsonType": "bool",
|
|
318
|
+
"description": "Whether memory is permanent context",
|
|
319
|
+
},
|
|
320
|
+
},
|
|
321
|
+
}
|
|
322
|
+
},
|
|
323
|
+
"validationAction": "warn",
|
|
324
|
+
"validationLevel": "moderate",
|
|
325
|
+
},
|
|
326
|
+
"long_term_memory": {
|
|
327
|
+
"validator": {
|
|
328
|
+
"$jsonSchema": {
|
|
329
|
+
"bsonType": "object",
|
|
330
|
+
"required": [
|
|
331
|
+
"memory_id",
|
|
332
|
+
"processed_data",
|
|
333
|
+
"importance_score",
|
|
334
|
+
"category_primary",
|
|
335
|
+
"namespace",
|
|
336
|
+
"searchable_content",
|
|
337
|
+
"summary",
|
|
338
|
+
],
|
|
339
|
+
"properties": {
|
|
340
|
+
"memory_id": {
|
|
341
|
+
"bsonType": "string",
|
|
342
|
+
"description": "Unique memory identifier",
|
|
343
|
+
},
|
|
344
|
+
"original_chat_id": {
|
|
345
|
+
"bsonType": "string",
|
|
346
|
+
"description": "Original chat interaction reference",
|
|
347
|
+
},
|
|
348
|
+
"processed_data": {
|
|
349
|
+
"bsonType": "object",
|
|
350
|
+
"description": "Processed memory data",
|
|
351
|
+
},
|
|
352
|
+
"importance_score": {
|
|
353
|
+
"bsonType": "double",
|
|
354
|
+
"minimum": 0.0,
|
|
355
|
+
"maximum": 1.0,
|
|
356
|
+
"description": "Memory importance score",
|
|
357
|
+
},
|
|
358
|
+
"category_primary": {
|
|
359
|
+
"bsonType": "string",
|
|
360
|
+
"description": "Primary memory category",
|
|
361
|
+
},
|
|
362
|
+
"retention_type": {
|
|
363
|
+
"bsonType": "string",
|
|
364
|
+
"description": "Memory retention type",
|
|
365
|
+
},
|
|
366
|
+
"namespace": {
|
|
367
|
+
"bsonType": "string",
|
|
368
|
+
"description": "Memory namespace",
|
|
369
|
+
},
|
|
370
|
+
"created_at": {
|
|
371
|
+
"bsonType": "date",
|
|
372
|
+
"description": "Memory creation timestamp",
|
|
373
|
+
},
|
|
374
|
+
"access_count": {
|
|
375
|
+
"bsonType": "int",
|
|
376
|
+
"minimum": 0,
|
|
377
|
+
"description": "Memory access count",
|
|
378
|
+
},
|
|
379
|
+
"searchable_content": {
|
|
380
|
+
"bsonType": "string",
|
|
381
|
+
"description": "Searchable text content",
|
|
382
|
+
},
|
|
383
|
+
"summary": {
|
|
384
|
+
"bsonType": "string",
|
|
385
|
+
"description": "Memory summary",
|
|
386
|
+
},
|
|
387
|
+
"novelty_score": {
|
|
388
|
+
"bsonType": "double",
|
|
389
|
+
"minimum": 0.0,
|
|
390
|
+
"maximum": 1.0,
|
|
391
|
+
"description": "Memory novelty score",
|
|
392
|
+
},
|
|
393
|
+
"relevance_score": {
|
|
394
|
+
"bsonType": "double",
|
|
395
|
+
"minimum": 0.0,
|
|
396
|
+
"maximum": 1.0,
|
|
397
|
+
"description": "Memory relevance score",
|
|
398
|
+
},
|
|
399
|
+
"actionability_score": {
|
|
400
|
+
"bsonType": "double",
|
|
401
|
+
"minimum": 0.0,
|
|
402
|
+
"maximum": 1.0,
|
|
403
|
+
"description": "Memory actionability score",
|
|
404
|
+
},
|
|
405
|
+
"classification": {
|
|
406
|
+
"bsonType": "string",
|
|
407
|
+
"description": "Memory classification",
|
|
408
|
+
},
|
|
409
|
+
"memory_importance": {
|
|
410
|
+
"bsonType": "string",
|
|
411
|
+
"enum": ["low", "medium", "high", "critical"],
|
|
412
|
+
"description": "Memory importance level",
|
|
413
|
+
},
|
|
414
|
+
"topic": {
|
|
415
|
+
"bsonType": "string",
|
|
416
|
+
"description": "Memory topic",
|
|
417
|
+
},
|
|
418
|
+
"entities_json": {
|
|
419
|
+
"bsonType": "array",
|
|
420
|
+
"description": "Extracted entities",
|
|
421
|
+
},
|
|
422
|
+
"keywords_json": {
|
|
423
|
+
"bsonType": "array",
|
|
424
|
+
"description": "Extracted keywords",
|
|
425
|
+
},
|
|
426
|
+
"is_user_context": {
|
|
427
|
+
"bsonType": "bool",
|
|
428
|
+
"description": "Whether memory is user context",
|
|
429
|
+
},
|
|
430
|
+
"is_preference": {
|
|
431
|
+
"bsonType": "bool",
|
|
432
|
+
"description": "Whether memory is user preference",
|
|
433
|
+
},
|
|
434
|
+
"is_skill_knowledge": {
|
|
435
|
+
"bsonType": "bool",
|
|
436
|
+
"description": "Whether memory is skill knowledge",
|
|
437
|
+
},
|
|
438
|
+
"is_current_project": {
|
|
439
|
+
"bsonType": "bool",
|
|
440
|
+
"description": "Whether memory relates to current project",
|
|
441
|
+
},
|
|
442
|
+
"promotion_eligible": {
|
|
443
|
+
"bsonType": "bool",
|
|
444
|
+
"description": "Whether memory is eligible for promotion",
|
|
445
|
+
},
|
|
446
|
+
"duplicate_of": {
|
|
447
|
+
"bsonType": "string",
|
|
448
|
+
"description": "Reference to original if duplicate",
|
|
449
|
+
},
|
|
450
|
+
"supersedes_json": {
|
|
451
|
+
"bsonType": "array",
|
|
452
|
+
"description": "Memories this supersedes",
|
|
453
|
+
},
|
|
454
|
+
"related_memories_json": {
|
|
455
|
+
"bsonType": "array",
|
|
456
|
+
"description": "Related memory references",
|
|
457
|
+
},
|
|
458
|
+
"confidence_score": {
|
|
459
|
+
"bsonType": "double",
|
|
460
|
+
"minimum": 0.0,
|
|
461
|
+
"maximum": 1.0,
|
|
462
|
+
"description": "Memory confidence score",
|
|
463
|
+
},
|
|
464
|
+
"extraction_timestamp": {
|
|
465
|
+
"bsonType": "date",
|
|
466
|
+
"description": "Data extraction timestamp",
|
|
467
|
+
},
|
|
468
|
+
"classification_reason": {
|
|
469
|
+
"bsonType": "string",
|
|
470
|
+
"description": "Reason for classification",
|
|
471
|
+
},
|
|
472
|
+
"processed_for_duplicates": {
|
|
473
|
+
"bsonType": "bool",
|
|
474
|
+
"description": "Whether processed for duplicates",
|
|
475
|
+
},
|
|
476
|
+
"conscious_processed": {
|
|
477
|
+
"bsonType": "bool",
|
|
478
|
+
"description": "Whether consciously processed",
|
|
479
|
+
},
|
|
480
|
+
"embedding_vector": {
|
|
481
|
+
"bsonType": "array",
|
|
482
|
+
"items": {"bsonType": "double"},
|
|
483
|
+
"description": "Vector embedding for similarity search",
|
|
484
|
+
},
|
|
485
|
+
},
|
|
486
|
+
}
|
|
487
|
+
},
|
|
488
|
+
"validationAction": "warn",
|
|
489
|
+
"validationLevel": "moderate",
|
|
490
|
+
},
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
def generate_indexes_schema(self) -> dict[str, list[dict[str, Any]]]:
|
|
494
|
+
"""Generate index specifications for MongoDB collections"""
|
|
495
|
+
return {
|
|
496
|
+
"chat_history": [
|
|
497
|
+
{"keys": [("chat_id", 1)], "name": "idx_chat_id", "unique": True},
|
|
498
|
+
{
|
|
499
|
+
"keys": [("namespace", 1), ("session_id", 1)],
|
|
500
|
+
"name": "idx_namespace_session",
|
|
501
|
+
},
|
|
502
|
+
{"keys": [("timestamp", -1)], "name": "idx_timestamp"},
|
|
503
|
+
{"keys": [("model", 1)], "name": "idx_model"},
|
|
504
|
+
],
|
|
505
|
+
"short_term_memory": [
|
|
506
|
+
{"keys": [("memory_id", 1)], "name": "idx_memory_id", "unique": True},
|
|
507
|
+
{
|
|
508
|
+
"keys": [
|
|
509
|
+
("namespace", 1),
|
|
510
|
+
("category_primary", 1),
|
|
511
|
+
("importance_score", -1),
|
|
512
|
+
],
|
|
513
|
+
"name": "idx_namespace_category_importance",
|
|
514
|
+
},
|
|
515
|
+
{"keys": [("expires_at", 1)], "name": "idx_expires_at", "sparse": True},
|
|
516
|
+
{"keys": [("created_at", -1)], "name": "idx_created_at"},
|
|
517
|
+
{"keys": [("chat_id", 1)], "name": "idx_chat_id", "sparse": True},
|
|
518
|
+
{
|
|
519
|
+
"keys": [("searchable_content", "text"), ("summary", "text")],
|
|
520
|
+
"name": "idx_text_search",
|
|
521
|
+
},
|
|
522
|
+
{
|
|
523
|
+
"keys": [("is_permanent_context", 1)],
|
|
524
|
+
"name": "idx_permanent_context",
|
|
525
|
+
},
|
|
526
|
+
{
|
|
527
|
+
"keys": [("access_count", -1), ("last_accessed", -1)],
|
|
528
|
+
"name": "idx_access_pattern",
|
|
529
|
+
},
|
|
530
|
+
],
|
|
531
|
+
"long_term_memory": [
|
|
532
|
+
{"keys": [("memory_id", 1)], "name": "idx_memory_id", "unique": True},
|
|
533
|
+
{
|
|
534
|
+
"keys": [
|
|
535
|
+
("namespace", 1),
|
|
536
|
+
("category_primary", 1),
|
|
537
|
+
("importance_score", -1),
|
|
538
|
+
],
|
|
539
|
+
"name": "idx_namespace_category_importance",
|
|
540
|
+
},
|
|
541
|
+
{"keys": [("classification", 1)], "name": "idx_classification"},
|
|
542
|
+
{"keys": [("topic", 1)], "name": "idx_topic", "sparse": True},
|
|
543
|
+
{"keys": [("created_at", -1)], "name": "idx_created_at"},
|
|
544
|
+
{
|
|
545
|
+
"keys": [("searchable_content", "text"), ("summary", "text")],
|
|
546
|
+
"name": "idx_text_search",
|
|
547
|
+
},
|
|
548
|
+
{
|
|
549
|
+
"keys": [
|
|
550
|
+
("is_user_context", 1),
|
|
551
|
+
("is_preference", 1),
|
|
552
|
+
("is_skill_knowledge", 1),
|
|
553
|
+
("promotion_eligible", 1),
|
|
554
|
+
],
|
|
555
|
+
"name": "idx_conscious_flags",
|
|
556
|
+
},
|
|
557
|
+
{
|
|
558
|
+
"keys": [("conscious_processed", 1)],
|
|
559
|
+
"name": "idx_conscious_processed",
|
|
560
|
+
},
|
|
561
|
+
{
|
|
562
|
+
"keys": [("processed_for_duplicates", 1)],
|
|
563
|
+
"name": "idx_duplicates_processed",
|
|
564
|
+
},
|
|
565
|
+
{"keys": [("confidence_score", -1)], "name": "idx_confidence"},
|
|
566
|
+
{"keys": [("memory_importance", 1)], "name": "idx_memory_importance"},
|
|
567
|
+
{
|
|
568
|
+
"keys": [
|
|
569
|
+
("novelty_score", -1),
|
|
570
|
+
("relevance_score", -1),
|
|
571
|
+
("actionability_score", -1),
|
|
572
|
+
],
|
|
573
|
+
"name": "idx_scores",
|
|
574
|
+
},
|
|
575
|
+
{
|
|
576
|
+
"keys": [("access_count", -1), ("last_accessed", -1)],
|
|
577
|
+
"name": "idx_access_pattern",
|
|
578
|
+
},
|
|
579
|
+
],
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
def generate_vector_search_config(self) -> dict[str, Any]:
|
|
583
|
+
"""Generate vector search configuration for MongoDB Atlas"""
|
|
584
|
+
return {
|
|
585
|
+
"collection": "long_term_memory",
|
|
586
|
+
"vector_index": {
|
|
587
|
+
"name": "vector_search_index",
|
|
588
|
+
"definition": {
|
|
589
|
+
"fields": [
|
|
590
|
+
{
|
|
591
|
+
"path": "embedding_vector",
|
|
592
|
+
"type": "vector",
|
|
593
|
+
"similarity": "cosine",
|
|
594
|
+
"dimensions": 1536, # OpenAI ada-002 dimensions
|
|
595
|
+
}
|
|
596
|
+
]
|
|
597
|
+
},
|
|
598
|
+
},
|
|
599
|
+
"search_pipeline": [
|
|
600
|
+
{
|
|
601
|
+
"$vectorSearch": {
|
|
602
|
+
"index": "vector_search_index",
|
|
603
|
+
"path": "embedding_vector",
|
|
604
|
+
"queryVector": "<<QUERY_VECTOR>>", # Placeholder
|
|
605
|
+
"numCandidates": 100,
|
|
606
|
+
"limit": 10,
|
|
607
|
+
}
|
|
608
|
+
},
|
|
609
|
+
{
|
|
610
|
+
"$project": {
|
|
611
|
+
"memory_id": 1,
|
|
612
|
+
"searchable_content": 1,
|
|
613
|
+
"summary": 1,
|
|
614
|
+
"importance_score": 1,
|
|
615
|
+
"category_primary": 1,
|
|
616
|
+
"namespace": 1,
|
|
617
|
+
"score": {"$meta": "vectorSearchScore"},
|
|
618
|
+
}
|
|
619
|
+
},
|
|
620
|
+
],
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
def generate_full_schema(self) -> str:
|
|
624
|
+
"""Generate complete MongoDB schema documentation"""
|
|
625
|
+
schema_parts = [
|
|
626
|
+
"# MongoDB Schema for Memori v2.0",
|
|
627
|
+
"# Complete database schema with collections, validation, and indexes",
|
|
628
|
+
"",
|
|
629
|
+
self.generate_core_schema(),
|
|
630
|
+
"",
|
|
631
|
+
self.generate_indexes(),
|
|
632
|
+
"",
|
|
633
|
+
self.generate_search_setup(),
|
|
634
|
+
"",
|
|
635
|
+
"# Note: This is documentation only. MongoDB collections and indexes",
|
|
636
|
+
"# are created programmatically by the MongoDBConnector and MongoDBAdapter.",
|
|
637
|
+
"# Vector search indexes must be created via MongoDB Atlas UI or Admin API.",
|
|
638
|
+
]
|
|
639
|
+
return "\n".join(schema_parts)
|
|
640
|
+
|
|
641
|
+
def get_migration_strategy(self) -> dict[str, Any]:
|
|
642
|
+
"""Get strategy for migrating from SQL databases to MongoDB"""
|
|
643
|
+
return {
|
|
644
|
+
"approach": "ETL Pipeline",
|
|
645
|
+
"steps": [
|
|
646
|
+
"Extract data from source SQL database",
|
|
647
|
+
"Transform data to MongoDB document format",
|
|
648
|
+
"Handle data type conversions (timestamps, JSON, etc.)",
|
|
649
|
+
"Load data into MongoDB collections",
|
|
650
|
+
"Create indexes after data load",
|
|
651
|
+
"Validate data integrity",
|
|
652
|
+
],
|
|
653
|
+
"considerations": [
|
|
654
|
+
"SQL foreign keys become document references or embedded documents",
|
|
655
|
+
"JSON fields in SQL become native objects in MongoDB",
|
|
656
|
+
"SQL joins become MongoDB aggregation pipelines or embedded documents",
|
|
657
|
+
"Index strategy differs significantly between SQL and MongoDB",
|
|
658
|
+
"Vector embeddings can be stored natively in MongoDB documents",
|
|
659
|
+
],
|
|
660
|
+
"tools": [
|
|
661
|
+
"MongoDB Compass for visual schema design",
|
|
662
|
+
"MongoDB Database Tools for import/export",
|
|
663
|
+
"Custom ETL scripts for complex transformations",
|
|
664
|
+
"MongoDB Atlas Data Lake for large-scale migrations",
|
|
665
|
+
],
|
|
666
|
+
}
|