memorisdk 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of memorisdk might be problematic. Click here for more details.

Files changed (62) hide show
  1. memori/__init__.py +3 -3
  2. memori/agents/conscious_agent.py +289 -77
  3. memori/agents/memory_agent.py +19 -9
  4. memori/agents/retrieval_agent.py +59 -51
  5. memori/config/manager.py +7 -7
  6. memori/config/memory_manager.py +25 -25
  7. memori/config/settings.py +13 -6
  8. memori/core/conversation.py +15 -15
  9. memori/core/database.py +14 -13
  10. memori/core/memory.py +376 -105
  11. memori/core/providers.py +25 -25
  12. memori/database/__init__.py +11 -0
  13. memori/database/adapters/__init__.py +11 -0
  14. memori/database/adapters/mongodb_adapter.py +739 -0
  15. memori/database/adapters/mysql_adapter.py +8 -8
  16. memori/database/adapters/postgresql_adapter.py +6 -6
  17. memori/database/adapters/sqlite_adapter.py +6 -6
  18. memori/database/auto_creator.py +8 -9
  19. memori/database/connection_utils.py +5 -5
  20. memori/database/connectors/__init__.py +11 -0
  21. memori/database/connectors/base_connector.py +18 -19
  22. memori/database/connectors/mongodb_connector.py +527 -0
  23. memori/database/connectors/mysql_connector.py +13 -15
  24. memori/database/connectors/postgres_connector.py +12 -12
  25. memori/database/connectors/sqlite_connector.py +11 -11
  26. memori/database/models.py +2 -2
  27. memori/database/mongodb_manager.py +1402 -0
  28. memori/database/queries/base_queries.py +3 -4
  29. memori/database/queries/chat_queries.py +3 -5
  30. memori/database/queries/entity_queries.py +3 -5
  31. memori/database/queries/memory_queries.py +3 -5
  32. memori/database/query_translator.py +11 -11
  33. memori/database/schema_generators/__init__.py +11 -0
  34. memori/database/schema_generators/mongodb_schema_generator.py +666 -0
  35. memori/database/schema_generators/mysql_schema_generator.py +2 -4
  36. memori/database/search/__init__.py +11 -0
  37. memori/database/search/mongodb_search_adapter.py +653 -0
  38. memori/database/search/mysql_search_adapter.py +8 -8
  39. memori/database/search/sqlite_search_adapter.py +6 -6
  40. memori/database/search_service.py +17 -17
  41. memori/database/sqlalchemy_manager.py +10 -12
  42. memori/integrations/__init__.py +1 -1
  43. memori/integrations/anthropic_integration.py +1 -3
  44. memori/integrations/litellm_integration.py +23 -6
  45. memori/integrations/openai_integration.py +31 -3
  46. memori/tools/memory_tool.py +10 -9
  47. memori/utils/exceptions.py +58 -58
  48. memori/utils/helpers.py +11 -12
  49. memori/utils/input_validator.py +10 -12
  50. memori/utils/logging.py +4 -4
  51. memori/utils/pydantic_models.py +57 -57
  52. memori/utils/query_builder.py +20 -20
  53. memori/utils/security_audit.py +28 -28
  54. memori/utils/security_integration.py +9 -9
  55. memori/utils/transaction_manager.py +20 -19
  56. memori/utils/validators.py +6 -6
  57. {memorisdk-2.0.1.dist-info → memorisdk-2.1.0.dist-info}/METADATA +22 -12
  58. memorisdk-2.1.0.dist-info/RECORD +71 -0
  59. memorisdk-2.0.1.dist-info/RECORD +0 -66
  60. {memorisdk-2.0.1.dist-info → memorisdk-2.1.0.dist-info}/WHEEL +0 -0
  61. {memorisdk-2.0.1.dist-info → memorisdk-2.1.0.dist-info}/licenses/LICENSE +0 -0
  62. {memorisdk-2.0.1.dist-info → memorisdk-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,666 @@
1
+ """
2
+ MongoDB schema generator for Memori
3
+ Defines collections, validation rules, and indexes for MongoDB
4
+ """
5
+
6
+ from typing import Any
7
+
8
+ from ..connectors.base_connector import BaseSchemaGenerator, DatabaseType
9
+
10
+
11
+ class MongoDBSchemaGenerator(BaseSchemaGenerator):
12
+ """MongoDB-specific schema generator"""
13
+
14
+ def __init__(self):
15
+ super().__init__(DatabaseType.MONGODB)
16
+
17
+ def generate_core_schema(self) -> str:
18
+ """
19
+ Generate MongoDB schema documentation
20
+ Note: MongoDB is schemaless, but we provide documentation for expected structure
21
+ """
22
+ return """
23
+ # MongoDB Collections Schema for Memori
24
+
25
+ ## Collection: chat_history
26
+ Purpose: Store chat interactions between users and AI
27
+ Expected Document Structure:
28
+ {
29
+ "_id": ObjectId,
30
+ "chat_id": "string (unique)",
31
+ "user_input": "string",
32
+ "ai_output": "string",
33
+ "model": "string",
34
+ "timestamp": ISODate,
35
+ "session_id": "string",
36
+ "namespace": "string (default: 'default')",
37
+ "tokens_used": "number",
38
+ "metadata": "object (optional)"
39
+ }
40
+
41
+ ## Collection: short_term_memory
42
+ Purpose: Store temporary memories with expiration
43
+ Expected Document Structure:
44
+ {
45
+ "_id": ObjectId,
46
+ "memory_id": "string (unique)",
47
+ "chat_id": "string (optional, reference to chat_history)",
48
+ "processed_data": "object",
49
+ "importance_score": "number (0.0-1.0)",
50
+ "category_primary": "string",
51
+ "retention_type": "string (default: 'short_term')",
52
+ "namespace": "string (default: 'default')",
53
+ "created_at": ISODate,
54
+ "expires_at": "ISODate (optional)",
55
+ "access_count": "number (default: 0)",
56
+ "last_accessed": "ISODate (optional)",
57
+ "searchable_content": "string",
58
+ "summary": "string",
59
+ "is_permanent_context": "boolean (default: false)"
60
+ }
61
+
62
+ ## Collection: long_term_memory
63
+ Purpose: Store persistent memories with enhanced metadata
64
+ Expected Document Structure:
65
+ {
66
+ "_id": ObjectId,
67
+ "memory_id": "string (unique)",
68
+ "original_chat_id": "string (optional)",
69
+ "processed_data": "object",
70
+ "importance_score": "number (0.0-1.0)",
71
+ "category_primary": "string",
72
+ "retention_type": "string (default: 'long_term')",
73
+ "namespace": "string (default: 'default')",
74
+ "created_at": ISODate,
75
+ "access_count": "number (default: 0)",
76
+ "last_accessed": "ISODate (optional)",
77
+ "searchable_content": "string",
78
+ "summary": "string",
79
+ "novelty_score": "number (0.0-1.0, default: 0.5)",
80
+ "relevance_score": "number (0.0-1.0, default: 0.5)",
81
+ "actionability_score": "number (0.0-1.0, default: 0.5)",
82
+
83
+ // Enhanced Classification Fields
84
+ "classification": "string (default: 'conversational')",
85
+ "memory_importance": "string (default: 'medium')",
86
+ "topic": "string (optional)",
87
+ "entities_json": "array (default: [])",
88
+ "keywords_json": "array (default: [])",
89
+
90
+ // Conscious Context Flags
91
+ "is_user_context": "boolean (default: false)",
92
+ "is_preference": "boolean (default: false)",
93
+ "is_skill_knowledge": "boolean (default: false)",
94
+ "is_current_project": "boolean (default: false)",
95
+ "promotion_eligible": "boolean (default: false)",
96
+
97
+ // Memory Management
98
+ "duplicate_of": "string (optional)",
99
+ "supersedes_json": "array (default: [])",
100
+ "related_memories_json": "array (default: [])",
101
+
102
+ // Technical Metadata
103
+ "confidence_score": "number (0.0-1.0, default: 0.8)",
104
+ "extraction_timestamp": ISODate,
105
+ "classification_reason": "string (optional)",
106
+
107
+ // Processing Status
108
+ "processed_for_duplicates": "boolean (default: false)",
109
+ "conscious_processed": "boolean (default: false)",
110
+
111
+ // Vector Search Support (MongoDB Atlas)
112
+ "embedding_vector": "array<number> (optional, for vector search)"
113
+ }
114
+ """
115
+
116
+ def generate_indexes(self) -> str:
117
+ """Generate MongoDB index creation documentation"""
118
+ return """
119
+ # MongoDB Indexes for Memori Collections
120
+
121
+ ## Indexes for chat_history collection:
122
+ - chat_id (unique)
123
+ - namespace + session_id (compound)
124
+ - timestamp (descending)
125
+ - model
126
+
127
+ ## Indexes for short_term_memory collection:
128
+ - memory_id (unique)
129
+ - namespace + category_primary + importance_score (compound, descending on score)
130
+ - expires_at
131
+ - created_at (descending)
132
+ - text index on searchable_content + summary
133
+
134
+ ## Indexes for long_term_memory collection:
135
+ - memory_id (unique)
136
+ - namespace + category_primary + importance_score (compound, descending on score)
137
+ - classification
138
+ - topic
139
+ - created_at (descending)
140
+ - text index on searchable_content + summary
141
+ - is_user_context + is_preference + is_skill_knowledge + promotion_eligible (compound)
142
+ - conscious_processed
143
+ - processed_for_duplicates
144
+ - confidence_score
145
+
146
+ ## Vector Search Index (Atlas only):
147
+ - embedding_vector (vector search index for similarity search)
148
+ """
149
+
150
+ def generate_search_setup(self) -> str:
151
+ """Generate MongoDB search setup documentation"""
152
+ return """
153
+ # MongoDB Search Configuration
154
+
155
+ ## Text Search Indexes:
156
+ MongoDB text indexes are automatically created for:
157
+ - short_term_memory: searchable_content, summary
158
+ - long_term_memory: searchable_content, summary
159
+
160
+ ## Vector Search (MongoDB Atlas only):
161
+ For vector similarity search, create a vector search index on the 'embedding_vector' field:
162
+ - Field: embedding_vector
163
+ - Type: vector
164
+ - Dimensions: 1536 (or your embedding dimension)
165
+ - Similarity: cosine (or euclidean/dotProduct)
166
+
167
+ Vector search indexes must be created through MongoDB Atlas UI or Atlas Admin API.
168
+
169
+ ## Search Strategies:
170
+ 1. Text Search: Use MongoDB $text operator for full-text search
171
+ 2. Regex Search: Fallback using $regex for pattern matching
172
+ 3. Vector Search: Use Atlas Vector Search for semantic similarity (if available)
173
+ """
174
+
175
+ def get_data_type_mappings(self) -> dict[str, str]:
176
+ """Get MongoDB data type mappings"""
177
+ return {
178
+ "string": "string",
179
+ "number": "number",
180
+ "boolean": "boolean",
181
+ "date": "date",
182
+ "object": "object",
183
+ "array": "array",
184
+ "objectId": "objectId",
185
+ }
186
+
187
+ def generate_collections_schema(self) -> dict[str, dict[str, Any]]:
188
+ """Generate MongoDB collections with validation schemas"""
189
+ return {
190
+ "chat_history": {
191
+ "validator": {
192
+ "$jsonSchema": {
193
+ "bsonType": "object",
194
+ "required": [
195
+ "chat_id",
196
+ "user_input",
197
+ "ai_output",
198
+ "model",
199
+ "timestamp",
200
+ "session_id",
201
+ "namespace",
202
+ ],
203
+ "properties": {
204
+ "chat_id": {
205
+ "bsonType": "string",
206
+ "description": "Unique chat interaction identifier",
207
+ },
208
+ "user_input": {
209
+ "bsonType": "string",
210
+ "description": "User's input message",
211
+ },
212
+ "ai_output": {
213
+ "bsonType": "string",
214
+ "description": "AI's response message",
215
+ },
216
+ "model": {
217
+ "bsonType": "string",
218
+ "description": "AI model used for response",
219
+ },
220
+ "timestamp": {
221
+ "bsonType": "date",
222
+ "description": "Interaction timestamp",
223
+ },
224
+ "session_id": {
225
+ "bsonType": "string",
226
+ "description": "Session identifier",
227
+ },
228
+ "namespace": {
229
+ "bsonType": "string",
230
+ "description": "Memory namespace",
231
+ },
232
+ "tokens_used": {
233
+ "bsonType": "int",
234
+ "minimum": 0,
235
+ "description": "Number of tokens used",
236
+ },
237
+ "metadata": {
238
+ "bsonType": "object",
239
+ "description": "Additional metadata",
240
+ },
241
+ },
242
+ }
243
+ },
244
+ "validationAction": "warn", # Use "error" for strict validation
245
+ "validationLevel": "moderate",
246
+ },
247
+ "short_term_memory": {
248
+ "validator": {
249
+ "$jsonSchema": {
250
+ "bsonType": "object",
251
+ "required": [
252
+ "memory_id",
253
+ "processed_data",
254
+ "importance_score",
255
+ "category_primary",
256
+ "namespace",
257
+ "searchable_content",
258
+ "summary",
259
+ ],
260
+ "properties": {
261
+ "memory_id": {
262
+ "bsonType": "string",
263
+ "description": "Unique memory identifier",
264
+ },
265
+ "chat_id": {
266
+ "bsonType": "string",
267
+ "description": "Reference to chat interaction",
268
+ },
269
+ "processed_data": {
270
+ "bsonType": "object",
271
+ "description": "Processed memory data",
272
+ },
273
+ "importance_score": {
274
+ "bsonType": "double",
275
+ "minimum": 0.0,
276
+ "maximum": 1.0,
277
+ "description": "Memory importance score",
278
+ },
279
+ "category_primary": {
280
+ "bsonType": "string",
281
+ "description": "Primary memory category",
282
+ },
283
+ "retention_type": {
284
+ "bsonType": "string",
285
+ "description": "Memory retention type",
286
+ },
287
+ "namespace": {
288
+ "bsonType": "string",
289
+ "description": "Memory namespace",
290
+ },
291
+ "created_at": {
292
+ "bsonType": "date",
293
+ "description": "Memory creation timestamp",
294
+ },
295
+ "expires_at": {
296
+ "bsonType": ["date", "null"],
297
+ "description": "Memory expiration timestamp",
298
+ },
299
+ "access_count": {
300
+ "bsonType": "int",
301
+ "minimum": 0,
302
+ "description": "Memory access count",
303
+ },
304
+ "last_accessed": {
305
+ "bsonType": ["date", "null"],
306
+ "description": "Last access timestamp",
307
+ },
308
+ "searchable_content": {
309
+ "bsonType": "string",
310
+ "description": "Searchable text content",
311
+ },
312
+ "summary": {
313
+ "bsonType": "string",
314
+ "description": "Memory summary",
315
+ },
316
+ "is_permanent_context": {
317
+ "bsonType": "bool",
318
+ "description": "Whether memory is permanent context",
319
+ },
320
+ },
321
+ }
322
+ },
323
+ "validationAction": "warn",
324
+ "validationLevel": "moderate",
325
+ },
326
+ "long_term_memory": {
327
+ "validator": {
328
+ "$jsonSchema": {
329
+ "bsonType": "object",
330
+ "required": [
331
+ "memory_id",
332
+ "processed_data",
333
+ "importance_score",
334
+ "category_primary",
335
+ "namespace",
336
+ "searchable_content",
337
+ "summary",
338
+ ],
339
+ "properties": {
340
+ "memory_id": {
341
+ "bsonType": "string",
342
+ "description": "Unique memory identifier",
343
+ },
344
+ "original_chat_id": {
345
+ "bsonType": "string",
346
+ "description": "Original chat interaction reference",
347
+ },
348
+ "processed_data": {
349
+ "bsonType": "object",
350
+ "description": "Processed memory data",
351
+ },
352
+ "importance_score": {
353
+ "bsonType": "double",
354
+ "minimum": 0.0,
355
+ "maximum": 1.0,
356
+ "description": "Memory importance score",
357
+ },
358
+ "category_primary": {
359
+ "bsonType": "string",
360
+ "description": "Primary memory category",
361
+ },
362
+ "retention_type": {
363
+ "bsonType": "string",
364
+ "description": "Memory retention type",
365
+ },
366
+ "namespace": {
367
+ "bsonType": "string",
368
+ "description": "Memory namespace",
369
+ },
370
+ "created_at": {
371
+ "bsonType": "date",
372
+ "description": "Memory creation timestamp",
373
+ },
374
+ "access_count": {
375
+ "bsonType": "int",
376
+ "minimum": 0,
377
+ "description": "Memory access count",
378
+ },
379
+ "searchable_content": {
380
+ "bsonType": "string",
381
+ "description": "Searchable text content",
382
+ },
383
+ "summary": {
384
+ "bsonType": "string",
385
+ "description": "Memory summary",
386
+ },
387
+ "novelty_score": {
388
+ "bsonType": "double",
389
+ "minimum": 0.0,
390
+ "maximum": 1.0,
391
+ "description": "Memory novelty score",
392
+ },
393
+ "relevance_score": {
394
+ "bsonType": "double",
395
+ "minimum": 0.0,
396
+ "maximum": 1.0,
397
+ "description": "Memory relevance score",
398
+ },
399
+ "actionability_score": {
400
+ "bsonType": "double",
401
+ "minimum": 0.0,
402
+ "maximum": 1.0,
403
+ "description": "Memory actionability score",
404
+ },
405
+ "classification": {
406
+ "bsonType": "string",
407
+ "description": "Memory classification",
408
+ },
409
+ "memory_importance": {
410
+ "bsonType": "string",
411
+ "enum": ["low", "medium", "high", "critical"],
412
+ "description": "Memory importance level",
413
+ },
414
+ "topic": {
415
+ "bsonType": "string",
416
+ "description": "Memory topic",
417
+ },
418
+ "entities_json": {
419
+ "bsonType": "array",
420
+ "description": "Extracted entities",
421
+ },
422
+ "keywords_json": {
423
+ "bsonType": "array",
424
+ "description": "Extracted keywords",
425
+ },
426
+ "is_user_context": {
427
+ "bsonType": "bool",
428
+ "description": "Whether memory is user context",
429
+ },
430
+ "is_preference": {
431
+ "bsonType": "bool",
432
+ "description": "Whether memory is user preference",
433
+ },
434
+ "is_skill_knowledge": {
435
+ "bsonType": "bool",
436
+ "description": "Whether memory is skill knowledge",
437
+ },
438
+ "is_current_project": {
439
+ "bsonType": "bool",
440
+ "description": "Whether memory relates to current project",
441
+ },
442
+ "promotion_eligible": {
443
+ "bsonType": "bool",
444
+ "description": "Whether memory is eligible for promotion",
445
+ },
446
+ "duplicate_of": {
447
+ "bsonType": "string",
448
+ "description": "Reference to original if duplicate",
449
+ },
450
+ "supersedes_json": {
451
+ "bsonType": "array",
452
+ "description": "Memories this supersedes",
453
+ },
454
+ "related_memories_json": {
455
+ "bsonType": "array",
456
+ "description": "Related memory references",
457
+ },
458
+ "confidence_score": {
459
+ "bsonType": "double",
460
+ "minimum": 0.0,
461
+ "maximum": 1.0,
462
+ "description": "Memory confidence score",
463
+ },
464
+ "extraction_timestamp": {
465
+ "bsonType": "date",
466
+ "description": "Data extraction timestamp",
467
+ },
468
+ "classification_reason": {
469
+ "bsonType": "string",
470
+ "description": "Reason for classification",
471
+ },
472
+ "processed_for_duplicates": {
473
+ "bsonType": "bool",
474
+ "description": "Whether processed for duplicates",
475
+ },
476
+ "conscious_processed": {
477
+ "bsonType": "bool",
478
+ "description": "Whether consciously processed",
479
+ },
480
+ "embedding_vector": {
481
+ "bsonType": "array",
482
+ "items": {"bsonType": "double"},
483
+ "description": "Vector embedding for similarity search",
484
+ },
485
+ },
486
+ }
487
+ },
488
+ "validationAction": "warn",
489
+ "validationLevel": "moderate",
490
+ },
491
+ }
492
+
493
+ def generate_indexes_schema(self) -> dict[str, list[dict[str, Any]]]:
494
+ """Generate index specifications for MongoDB collections"""
495
+ return {
496
+ "chat_history": [
497
+ {"keys": [("chat_id", 1)], "name": "idx_chat_id", "unique": True},
498
+ {
499
+ "keys": [("namespace", 1), ("session_id", 1)],
500
+ "name": "idx_namespace_session",
501
+ },
502
+ {"keys": [("timestamp", -1)], "name": "idx_timestamp"},
503
+ {"keys": [("model", 1)], "name": "idx_model"},
504
+ ],
505
+ "short_term_memory": [
506
+ {"keys": [("memory_id", 1)], "name": "idx_memory_id", "unique": True},
507
+ {
508
+ "keys": [
509
+ ("namespace", 1),
510
+ ("category_primary", 1),
511
+ ("importance_score", -1),
512
+ ],
513
+ "name": "idx_namespace_category_importance",
514
+ },
515
+ {"keys": [("expires_at", 1)], "name": "idx_expires_at", "sparse": True},
516
+ {"keys": [("created_at", -1)], "name": "idx_created_at"},
517
+ {"keys": [("chat_id", 1)], "name": "idx_chat_id", "sparse": True},
518
+ {
519
+ "keys": [("searchable_content", "text"), ("summary", "text")],
520
+ "name": "idx_text_search",
521
+ },
522
+ {
523
+ "keys": [("is_permanent_context", 1)],
524
+ "name": "idx_permanent_context",
525
+ },
526
+ {
527
+ "keys": [("access_count", -1), ("last_accessed", -1)],
528
+ "name": "idx_access_pattern",
529
+ },
530
+ ],
531
+ "long_term_memory": [
532
+ {"keys": [("memory_id", 1)], "name": "idx_memory_id", "unique": True},
533
+ {
534
+ "keys": [
535
+ ("namespace", 1),
536
+ ("category_primary", 1),
537
+ ("importance_score", -1),
538
+ ],
539
+ "name": "idx_namespace_category_importance",
540
+ },
541
+ {"keys": [("classification", 1)], "name": "idx_classification"},
542
+ {"keys": [("topic", 1)], "name": "idx_topic", "sparse": True},
543
+ {"keys": [("created_at", -1)], "name": "idx_created_at"},
544
+ {
545
+ "keys": [("searchable_content", "text"), ("summary", "text")],
546
+ "name": "idx_text_search",
547
+ },
548
+ {
549
+ "keys": [
550
+ ("is_user_context", 1),
551
+ ("is_preference", 1),
552
+ ("is_skill_knowledge", 1),
553
+ ("promotion_eligible", 1),
554
+ ],
555
+ "name": "idx_conscious_flags",
556
+ },
557
+ {
558
+ "keys": [("conscious_processed", 1)],
559
+ "name": "idx_conscious_processed",
560
+ },
561
+ {
562
+ "keys": [("processed_for_duplicates", 1)],
563
+ "name": "idx_duplicates_processed",
564
+ },
565
+ {"keys": [("confidence_score", -1)], "name": "idx_confidence"},
566
+ {"keys": [("memory_importance", 1)], "name": "idx_memory_importance"},
567
+ {
568
+ "keys": [
569
+ ("novelty_score", -1),
570
+ ("relevance_score", -1),
571
+ ("actionability_score", -1),
572
+ ],
573
+ "name": "idx_scores",
574
+ },
575
+ {
576
+ "keys": [("access_count", -1), ("last_accessed", -1)],
577
+ "name": "idx_access_pattern",
578
+ },
579
+ ],
580
+ }
581
+
582
+ def generate_vector_search_config(self) -> dict[str, Any]:
583
+ """Generate vector search configuration for MongoDB Atlas"""
584
+ return {
585
+ "collection": "long_term_memory",
586
+ "vector_index": {
587
+ "name": "vector_search_index",
588
+ "definition": {
589
+ "fields": [
590
+ {
591
+ "path": "embedding_vector",
592
+ "type": "vector",
593
+ "similarity": "cosine",
594
+ "dimensions": 1536, # OpenAI ada-002 dimensions
595
+ }
596
+ ]
597
+ },
598
+ },
599
+ "search_pipeline": [
600
+ {
601
+ "$vectorSearch": {
602
+ "index": "vector_search_index",
603
+ "path": "embedding_vector",
604
+ "queryVector": "<<QUERY_VECTOR>>", # Placeholder
605
+ "numCandidates": 100,
606
+ "limit": 10,
607
+ }
608
+ },
609
+ {
610
+ "$project": {
611
+ "memory_id": 1,
612
+ "searchable_content": 1,
613
+ "summary": 1,
614
+ "importance_score": 1,
615
+ "category_primary": 1,
616
+ "namespace": 1,
617
+ "score": {"$meta": "vectorSearchScore"},
618
+ }
619
+ },
620
+ ],
621
+ }
622
+
623
+ def generate_full_schema(self) -> str:
624
+ """Generate complete MongoDB schema documentation"""
625
+ schema_parts = [
626
+ "# MongoDB Schema for Memori v2.0",
627
+ "# Complete database schema with collections, validation, and indexes",
628
+ "",
629
+ self.generate_core_schema(),
630
+ "",
631
+ self.generate_indexes(),
632
+ "",
633
+ self.generate_search_setup(),
634
+ "",
635
+ "# Note: This is documentation only. MongoDB collections and indexes",
636
+ "# are created programmatically by the MongoDBConnector and MongoDBAdapter.",
637
+ "# Vector search indexes must be created via MongoDB Atlas UI or Admin API.",
638
+ ]
639
+ return "\n".join(schema_parts)
640
+
641
+ def get_migration_strategy(self) -> dict[str, Any]:
642
+ """Get strategy for migrating from SQL databases to MongoDB"""
643
+ return {
644
+ "approach": "ETL Pipeline",
645
+ "steps": [
646
+ "Extract data from source SQL database",
647
+ "Transform data to MongoDB document format",
648
+ "Handle data type conversions (timestamps, JSON, etc.)",
649
+ "Load data into MongoDB collections",
650
+ "Create indexes after data load",
651
+ "Validate data integrity",
652
+ ],
653
+ "considerations": [
654
+ "SQL foreign keys become document references or embedded documents",
655
+ "JSON fields in SQL become native objects in MongoDB",
656
+ "SQL joins become MongoDB aggregation pipelines or embedded documents",
657
+ "Index strategy differs significantly between SQL and MongoDB",
658
+ "Vector embeddings can be stored natively in MongoDB documents",
659
+ ],
660
+ "tools": [
661
+ "MongoDB Compass for visual schema design",
662
+ "MongoDB Database Tools for import/export",
663
+ "Custom ETL scripts for complex transformations",
664
+ "MongoDB Atlas Data Lake for large-scale migrations",
665
+ ],
666
+ }