memorisdk 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of memorisdk might be problematic. Click here for more details.

Files changed (46) hide show
  1. memori/__init__.py +24 -8
  2. memori/agents/conscious_agent.py +252 -414
  3. memori/agents/memory_agent.py +487 -224
  4. memori/agents/retrieval_agent.py +416 -60
  5. memori/config/memory_manager.py +323 -0
  6. memori/core/conversation.py +393 -0
  7. memori/core/database.py +386 -371
  8. memori/core/memory.py +1676 -534
  9. memori/core/providers.py +217 -0
  10. memori/database/adapters/__init__.py +10 -0
  11. memori/database/adapters/mysql_adapter.py +331 -0
  12. memori/database/adapters/postgresql_adapter.py +291 -0
  13. memori/database/adapters/sqlite_adapter.py +229 -0
  14. memori/database/auto_creator.py +320 -0
  15. memori/database/connection_utils.py +207 -0
  16. memori/database/connectors/base_connector.py +283 -0
  17. memori/database/connectors/mysql_connector.py +240 -18
  18. memori/database/connectors/postgres_connector.py +277 -4
  19. memori/database/connectors/sqlite_connector.py +178 -3
  20. memori/database/models.py +400 -0
  21. memori/database/queries/base_queries.py +1 -1
  22. memori/database/queries/memory_queries.py +91 -2
  23. memori/database/query_translator.py +222 -0
  24. memori/database/schema_generators/__init__.py +7 -0
  25. memori/database/schema_generators/mysql_schema_generator.py +215 -0
  26. memori/database/search/__init__.py +8 -0
  27. memori/database/search/mysql_search_adapter.py +255 -0
  28. memori/database/search/sqlite_search_adapter.py +180 -0
  29. memori/database/search_service.py +548 -0
  30. memori/database/sqlalchemy_manager.py +839 -0
  31. memori/integrations/__init__.py +36 -11
  32. memori/integrations/litellm_integration.py +340 -6
  33. memori/integrations/openai_integration.py +506 -240
  34. memori/utils/input_validator.py +395 -0
  35. memori/utils/pydantic_models.py +138 -36
  36. memori/utils/query_builder.py +530 -0
  37. memori/utils/security_audit.py +594 -0
  38. memori/utils/security_integration.py +339 -0
  39. memori/utils/transaction_manager.py +547 -0
  40. {memorisdk-1.0.1.dist-info → memorisdk-2.0.0.dist-info}/METADATA +144 -34
  41. memorisdk-2.0.0.dist-info/RECORD +67 -0
  42. memorisdk-1.0.1.dist-info/RECORD +0 -44
  43. memorisdk-1.0.1.dist-info/entry_points.txt +0 -2
  44. {memorisdk-1.0.1.dist-info → memorisdk-2.0.0.dist-info}/WHEEL +0 -0
  45. {memorisdk-1.0.1.dist-info → memorisdk-2.0.0.dist-info}/licenses/LICENSE +0 -0
  46. {memorisdk-1.0.1.dist-info → memorisdk-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,100 +1,156 @@
1
1
  """
2
- Memory Agent - The heart of Memori v1.0
3
- Pydantic-based memory processing using OpenAI Structured Outputs
2
+ Memory Agent - Async Pydantic-based conversation processing
3
+
4
+ This agent processes conversations and extracts structured information with
5
+ enhanced classification and conscious context detection.
4
6
  """
5
7
 
8
+ import json
6
9
  from datetime import datetime
7
- from typing import Any, Dict, Optional
10
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
8
11
 
9
12
  import openai
10
13
  from loguru import logger
11
14
 
15
+ if TYPE_CHECKING:
16
+ from ..core.providers import ProviderConfig
17
+
12
18
  from ..utils.pydantic_models import (
13
19
  ConversationContext,
14
- MemoryCategoryType,
15
- ProcessedMemory,
16
- RetentionType,
20
+ MemoryClassification,
21
+ MemoryImportanceLevel,
22
+ ProcessedLongTermMemory,
17
23
  )
18
24
 
19
25
 
20
26
  class MemoryAgent:
21
27
  """
22
- Pydantic-based memory agent for intelligent conversation processing.
23
- Uses OpenAI Structured Outputs for reliable, structured memory extraction.
28
+ Async Memory Agent for processing conversations with enhanced classification
24
29
  """
25
30
 
26
- SYSTEM_PROMPT = """You are an advanced Memory Processing Agent responsible for analyzing conversations and extracting structured information for long-term storage.
27
-
28
- Your primary functions:
29
- 1. **Categorize Memory Type**: Classify information as fact, preference, skill, context, or rule
30
- 2. **Extract Entities**: Identify people, technologies, topics, skills, projects, and keywords
31
- 3. **Score Importance**: Determine retention type and various importance dimensions
32
- 4. **Create Searchable Content**: Generate optimized summaries and searchable text
33
- 5. **Make Storage Decisions**: Decide what should be stored and why
34
-
35
- **CATEGORIZATION GUIDELINES:**
36
- - **fact**: Factual information, definitions, technical details, specific data points
37
- - **preference**: User preferences, likes/dislikes, settings, personal choices, opinions
38
- - **skill**: Skills, abilities, competencies, learning progress, expertise levels
39
- - **context**: Project context, work environment, current situations, background info
40
- - **rule**: Rules, policies, procedures, guidelines, constraints, "should/must" statements
41
-
42
- **RETENTION GUIDELINES:**
43
- - **short_term**: Recent activities, temporary information, casual mentions (expires ~7 days)
44
- - **long_term**: Important information, learned skills, preferences, significant context
45
- - **permanent**: Critical rules, core preferences, essential facts, major milestones
46
-
47
- **ENTITY EXTRACTION:**
48
- Focus on extracting specific, searchable entities that would be useful for future retrieval:
49
- - People: Names, roles, relationships
50
- - Technologies: Tools, libraries, platforms, programming languages
51
- - Topics: Subjects, domains, areas of interest
52
- - Skills: Abilities, competencies, learning areas
53
- - Projects: Named projects, repositories, initiatives
54
- - Keywords: Important terms for search and categorization
55
-
56
- **IMPORTANCE SCORING:**
57
- Consider multiple dimensions:
58
- - Overall importance (0.0-1.0): How crucial is this information?
59
- - Novelty (0.0-1.0): How new or unique is this information?
60
- - Relevance (0.0-1.0): How relevant to the user's current interests/work?
61
- - Actionability (0.0-1.0): How actionable or useful is this information?
62
-
63
- Be thorough but practical. Focus on information that would genuinely help in future conversations."""
64
-
65
- def __init__(self, api_key: Optional[str] = None, model: str = "gpt-4o"):
31
+ def __init__(
32
+ self,
33
+ api_key: Optional[str] = None,
34
+ model: Optional[str] = None,
35
+ provider_config: Optional["ProviderConfig"] = None,
36
+ ):
66
37
  """
67
- Initialize Memory Agent with OpenAI configuration
38
+ Initialize Memory Agent with LLM provider configuration
68
39
 
69
40
  Args:
70
- api_key: OpenAI API key (if None, uses environment variable)
71
- model: OpenAI model to use for structured output (gpt-4o recommended)
41
+ api_key: API key (deprecated, use provider_config)
42
+ model: Model to use for structured output (defaults to 'gpt-4o' if not specified)
43
+ provider_config: Provider configuration for LLM client
72
44
  """
73
- self.client = openai.OpenAI(api_key=api_key)
74
- self.model = model
45
+ if provider_config:
46
+ # Use provider configuration to create clients
47
+ self.client = provider_config.create_client()
48
+ self.async_client = provider_config.create_async_client()
49
+ # Use provided model, fallback to provider config model, then default to gpt-4o
50
+ self.model = model or provider_config.model or "gpt-4o"
51
+ logger.debug(f"Memory agent initialized with model: {self.model}")
52
+ self.provider_config = provider_config
53
+ else:
54
+ # Backward compatibility: use api_key directly
55
+ self.client = openai.OpenAI(api_key=api_key)
56
+ self.async_client = openai.AsyncOpenAI(api_key=api_key)
57
+ self.model = model or "gpt-4o"
58
+ self.provider_config = None
59
+
60
+ # Determine if we're using a local/custom endpoint that might not support structured outputs
61
+ self._supports_structured_outputs = self._detect_structured_output_support()
62
+
63
+ SYSTEM_PROMPT = """You are an advanced Memory Processing Agent responsible for analyzing conversations and extracting structured information with intelligent classification and conscious context detection.
75
64
 
76
- async def process_conversation(
65
+ Your primary functions:
66
+ 1. **Intelligent Classification**: Categorize memories with enhanced classification system
67
+ 2. **Conscious Context Detection**: Identify user context information for immediate promotion
68
+ 3. **Entity Extraction**: Extract comprehensive entities and keywords
69
+ 4. **Deduplication**: Identify and handle duplicate information
70
+ 5. **Context Filtering**: Determine what should be stored vs filtered out
71
+
72
+ **ENHANCED CLASSIFICATION SYSTEM:**
73
+
74
+ **CONSCIOUS_INFO** (Auto-promote to short-term context):
75
+ - User's name, location, job, personal details
76
+ - Current projects, technologies they work with
77
+ - Preferences, work style, communication style
78
+ - Skills, expertise, learning goals
79
+ - Important personal context for AI interaction
80
+
81
+ **ESSENTIAL**:
82
+ - Core facts that define user's context
83
+ - Important preferences and opinions
84
+ - Key skills and knowledge areas
85
+ - Critical project information
86
+
87
+ **CONTEXTUAL**:
88
+ - Current work context
89
+ - Ongoing projects and goals
90
+ - Environmental setup and tools
91
+
92
+ **CONVERSATIONAL**:
93
+ - Regular discussions and questions
94
+ - Explanations and clarifications
95
+ - Problem-solving conversations
96
+
97
+ **REFERENCE**:
98
+ - Code examples and technical references
99
+ - Documentation and resources
100
+ - Learning materials
101
+
102
+ **PERSONAL**:
103
+ - Life events and personal information
104
+ - Relationships and social context
105
+ - Personal interests and hobbies
106
+
107
+ **IMPORTANCE LEVELS:**
108
+ - **CRITICAL**: Must never be lost
109
+ - **HIGH**: Very important for context
110
+ - **MEDIUM**: Useful to remember
111
+ - **LOW**: Nice to have context
112
+
113
+ **CONSCIOUS CONTEXT DETECTION:**
114
+ Mark is_user_context=True for:
115
+ - Personal identifiers (name, location, role)
116
+ - Work context (job, company, projects)
117
+ - Technical preferences (languages, tools, frameworks)
118
+ - Communication style and preferences
119
+ - Skills and expertise areas
120
+ - Learning goals and interests
121
+
122
+ Set promotion_eligible=True for memories that should be immediately available in short-term context for all future conversations.
123
+
124
+ **PROCESSING RULES:**
125
+ 1. AVOID DUPLICATES: Check if similar information already exists
126
+ 2. MERGE SIMILAR: Combine related information when appropriate
127
+ 3. FILTER UNNECESSARY: Skip trivial greetings, acknowledgments
128
+ 4. EXTRACT ENTITIES: Identify people, places, technologies, projects
129
+ 5. ASSESS IMPORTANCE: Rate based on relevance to user context
130
+ 6. FLAG USER CONTEXT: Mark information for conscious promotion
131
+
132
+ Focus on extracting information that would genuinely help provide better context and assistance in future conversations."""
133
+
134
+ async def process_conversation_async(
77
135
  self,
78
136
  chat_id: str,
79
137
  user_input: str,
80
138
  ai_output: str,
81
139
  context: Optional[ConversationContext] = None,
82
- mem_prompt: Optional[str] = None,
83
- filters: Optional[Dict[str, Any]] = None,
84
- ) -> ProcessedMemory:
140
+ existing_memories: Optional[List[str]] = None,
141
+ ) -> ProcessedLongTermMemory:
85
142
  """
86
- Process a conversation using OpenAI Structured Outputs
143
+ Async conversation processing with classification and conscious context detection
87
144
 
88
145
  Args:
89
146
  chat_id: Conversation ID
90
147
  user_input: User's input message
91
148
  ai_output: AI's response
92
149
  context: Additional conversation context
93
- mem_prompt: Optional memory filtering prompt
94
- filters: Memory filters to apply
150
+ existing_memories: List of existing memory summaries for deduplication
95
151
 
96
152
  Returns:
97
- Structured processed memory
153
+ Processed memory with classification and conscious flags
98
154
  """
99
155
  try:
100
156
  # Prepare conversation content
@@ -102,8 +158,14 @@ Be thorough but practical. Focus on information that would genuinely help in fut
102
158
 
103
159
  # Build system prompt
104
160
  system_prompt = self.SYSTEM_PROMPT
105
- if mem_prompt:
106
- system_prompt += f"\n\nSPECIAL FOCUS: {mem_prompt}"
161
+
162
+ # Add deduplication context
163
+ if existing_memories:
164
+ dedup_context = (
165
+ "\n\nEXISTING MEMORIES (for deduplication):\n"
166
+ + "\n".join(existing_memories[:10])
167
+ )
168
+ system_prompt += dedup_context
107
169
 
108
170
  # Prepare context information
109
171
  context_info = ""
@@ -117,206 +179,407 @@ CONVERSATION CONTEXT:
117
179
  - Topic Thread: {context.topic_thread or 'General conversation'}
118
180
  """
119
181
 
120
- # Call OpenAI Structured Outputs
121
- completion = self.client.beta.chat.completions.parse(
122
- model=self.model,
123
- messages=[
124
- {"role": "system", "content": system_prompt},
125
- {
126
- "role": "user",
127
- "content": f"Process this conversation for memory storage:\n\n{conversation_text}\n{context_info}",
128
- },
129
- ],
130
- response_format=ProcessedMemory,
131
- temperature=0.1, # Low temperature for consistent processing
132
- )
133
-
134
- # Handle potential refusal
135
- if completion.choices[0].message.refusal:
136
- logger.warning(
137
- f"Memory processing refused for chat {chat_id}: {completion.choices[0].message.refusal}"
138
- )
139
- return self._create_empty_memory(
140
- chat_id, "Processing refused for safety reasons"
182
+ # Try structured outputs first, fall back to manual parsing
183
+ processed_memory = None
184
+
185
+ if self._supports_structured_outputs:
186
+ try:
187
+ # Call OpenAI Structured Outputs (async)
188
+ completion = await self.async_client.beta.chat.completions.parse(
189
+ model=self.model,
190
+ messages=[
191
+ {"role": "system", "content": system_prompt},
192
+ {
193
+ "role": "user",
194
+ "content": f"Process this conversation for enhanced memory storage:\n\n{conversation_text}\n{context_info}",
195
+ },
196
+ ],
197
+ response_format=ProcessedLongTermMemory,
198
+ temperature=0.1, # Low temperature for consistent processing
199
+ )
200
+
201
+ # Handle potential refusal
202
+ if completion.choices[0].message.refusal:
203
+ logger.warning(
204
+ f"Memory processing refused for chat {chat_id}: {completion.choices[0].message.refusal}"
205
+ )
206
+ return self._create_empty_long_term_memory(
207
+ chat_id, "Processing refused for safety reasons"
208
+ )
209
+
210
+ processed_memory = completion.choices[0].message.parsed
211
+ processed_memory.conversation_id = chat_id
212
+ processed_memory.extraction_timestamp = datetime.now()
213
+
214
+ except Exception as e:
215
+ logger.warning(
216
+ f"Structured outputs failed for {chat_id}, falling back to manual parsing: {e}"
217
+ )
218
+ self._supports_structured_outputs = (
219
+ False # Disable for future calls
220
+ )
221
+ processed_memory = None
222
+
223
+ # Fallback to manual parsing if structured outputs failed or not supported
224
+ if processed_memory is None:
225
+ processed_memory = await self._process_with_fallback_parsing(
226
+ chat_id, system_prompt, conversation_text, context_info
141
227
  )
142
228
 
143
- processed_memory = completion.choices[0].message.parsed
144
-
145
- # Apply filters if provided
146
- if filters and not self._passes_filters(processed_memory, filters):
147
- processed_memory.should_store = False
148
- processed_memory.storage_reasoning = (
149
- "Filtered out based on memory filters"
150
- )
151
-
152
- # Add processing metadata
153
- processed_memory.processing_metadata = {
154
- "chat_id": chat_id,
155
- "model": self.model,
156
- "processed_at": datetime.now().isoformat(),
157
- "agent_version": "v1.0_pydantic",
158
- }
159
-
160
229
  logger.debug(
161
- f"Processed conversation {chat_id}: category={processed_memory.category.primary_category}, should_store={processed_memory.should_store}"
230
+ f"Processed conversation {chat_id}: "
231
+ f"classification={processed_memory.classification}, "
232
+ f"importance={processed_memory.importance}, "
233
+ f"conscious_context={processed_memory.is_user_context}, "
234
+ f"promotion_eligible={processed_memory.promotion_eligible}"
162
235
  )
236
+
163
237
  return processed_memory
164
238
 
165
239
  except Exception as e:
166
240
  logger.error(f"Memory agent processing failed for {chat_id}: {e}")
167
- return self._create_empty_memory(chat_id, f"Processing failed: {str(e)}")
241
+ return self._create_empty_long_term_memory(
242
+ chat_id, f"Processing failed: {str(e)}"
243
+ )
244
+
245
+ def _create_empty_long_term_memory(
246
+ self, chat_id: str, reason: str
247
+ ) -> ProcessedLongTermMemory:
248
+ """Create an empty long-term memory object for error cases"""
249
+ return ProcessedLongTermMemory(
250
+ content="Processing failed",
251
+ summary="Processing failed",
252
+ classification=MemoryClassification.CONVERSATIONAL,
253
+ importance=MemoryImportanceLevel.LOW,
254
+ conversation_id=chat_id,
255
+ classification_reason=reason,
256
+ confidence_score=0.0,
257
+ extraction_timestamp=datetime.now(),
258
+ )
168
259
 
169
- def process_conversation_sync(
260
+ # === DEDUPLICATION & FILTERING METHODS ===
261
+
262
+ async def detect_duplicates(
170
263
  self,
171
- chat_id: str,
172
- user_input: str,
173
- ai_output: str,
174
- context: Optional[ConversationContext] = None,
175
- mem_prompt: Optional[str] = None,
176
- filters: Optional[Dict[str, Any]] = None,
177
- ) -> ProcessedMemory:
264
+ new_memory: ProcessedLongTermMemory,
265
+ existing_memories: List[ProcessedLongTermMemory],
266
+ similarity_threshold: float = 0.8,
267
+ ) -> Optional[str]:
178
268
  """
179
- Synchronous version of process_conversation for compatibility
269
+ Detect if new memory is a duplicate of existing memories
270
+
271
+ Args:
272
+ new_memory: New memory to check
273
+ existing_memories: List of existing memories to compare against
274
+ similarity_threshold: Threshold for considering memories similar
275
+
276
+ Returns:
277
+ Memory ID of duplicate if found, None otherwise
278
+ """
279
+ # Simple text similarity check - could be enhanced with embeddings
280
+ new_content = new_memory.content.lower().strip()
281
+ new_summary = new_memory.summary.lower().strip()
282
+
283
+ for existing in existing_memories:
284
+ existing_content = existing.content.lower().strip()
285
+ existing_summary = existing.summary.lower().strip()
286
+
287
+ # Check content similarity
288
+ content_similarity = self._calculate_similarity(
289
+ new_content, existing_content
290
+ )
291
+ summary_similarity = self._calculate_similarity(
292
+ new_summary, existing_summary
293
+ )
294
+
295
+ # Average similarity score
296
+ avg_similarity = (content_similarity + summary_similarity) / 2
297
+
298
+ if avg_similarity >= similarity_threshold:
299
+ logger.info(
300
+ f"Duplicate detected: {avg_similarity:.2f} similarity with {existing.conversation_id}"
301
+ )
302
+ return existing.conversation_id
303
+
304
+ return None
305
+
306
+ def _calculate_similarity(self, text1: str, text2: str) -> float:
307
+ """
308
+ Simple text similarity calculation using word overlap
309
+ Could be enhanced with more sophisticated methods
310
+ """
311
+ if not text1 or not text2:
312
+ return 0.0
313
+
314
+ # Simple word-based similarity
315
+ words1 = set(text1.split())
316
+ words2 = set(text2.split())
317
+
318
+ if not words1 or not words2:
319
+ return 0.0
320
+
321
+ intersection = len(words1.intersection(words2))
322
+ union = len(words1.union(words2))
323
+
324
+ return intersection / union if union > 0 else 0.0
325
+
326
+ def _detect_structured_output_support(self) -> bool:
327
+ """
328
+ Detect if the current provider/endpoint supports OpenAI structured outputs
329
+
330
+ Returns:
331
+ True if structured outputs are likely supported, False otherwise
180
332
  """
181
333
  try:
182
- # Prepare conversation content
183
- conversation_text = f"User: {user_input}\nAssistant: {ai_output}"
334
+ # Check if we have a provider config with custom base_url
335
+ if self.provider_config and hasattr(self.provider_config, "base_url"):
336
+ base_url = self.provider_config.base_url
337
+ if base_url:
338
+ # Local/custom endpoints typically don't support beta features
339
+ if "localhost" in base_url or "127.0.0.1" in base_url:
340
+ logger.debug(
341
+ f"Detected local endpoint ({base_url}), disabling structured outputs"
342
+ )
343
+ return False
344
+ # Custom endpoints that aren't OpenAI
345
+ if "api.openai.com" not in base_url:
346
+ logger.debug(
347
+ f"Detected custom endpoint ({base_url}), disabling structured outputs"
348
+ )
349
+ return False
350
+
351
+ # Check for Azure endpoints (they may or may not support beta features)
352
+ if self.provider_config and hasattr(self.provider_config, "api_type"):
353
+ if self.provider_config.api_type == "azure":
354
+ logger.debug(
355
+ "Detected Azure endpoint, enabling structured outputs (may need manual verification)"
356
+ )
357
+ return (
358
+ True # Azure may support it, let it try and fallback if needed
359
+ )
360
+ elif self.provider_config.api_type in ["custom", "openai_compatible"]:
361
+ logger.debug(
362
+ f"Detected {self.provider_config.api_type} endpoint, disabling structured outputs"
363
+ )
364
+ return False
365
+
366
+ # Default: assume OpenAI endpoint supports structured outputs
367
+ logger.debug("Assuming OpenAI endpoint, enabling structured outputs")
368
+ return True
184
369
 
185
- # Build system prompt
186
- system_prompt = self.SYSTEM_PROMPT
187
- if mem_prompt:
188
- system_prompt += f"\n\nSPECIAL FOCUS: {mem_prompt}"
370
+ except Exception as e:
371
+ logger.debug(
372
+ f"Error detecting structured output support: {e}, defaulting to enabled"
373
+ )
374
+ return True
189
375
 
190
- # Prepare context information
191
- context_info = ""
192
- if context:
193
- context_info = f"""
194
- CONVERSATION CONTEXT:
195
- - Session: {context.session_id}
196
- - Model: {context.model_used}
197
- - User Projects: {', '.join(context.current_projects) if context.current_projects else 'None specified'}
198
- - Relevant Skills: {', '.join(context.relevant_skills) if context.relevant_skills else 'None specified'}
199
- - Topic Thread: {context.topic_thread or 'General conversation'}
200
- """
376
+ async def _process_with_fallback_parsing(
377
+ self,
378
+ chat_id: str,
379
+ system_prompt: str,
380
+ conversation_text: str,
381
+ context_info: str,
382
+ ) -> ProcessedLongTermMemory:
383
+ """
384
+ Process conversation using regular chat completions with manual JSON parsing
385
+
386
+ This method works with any OpenAI-compatible API that supports chat completions
387
+ but doesn't support structured outputs (like Ollama, local models, etc.)
388
+ """
389
+ try:
390
+ # Enhanced system prompt for JSON output
391
+ json_system_prompt = (
392
+ system_prompt
393
+ + "\n\nIMPORTANT: You MUST respond with a valid JSON object that matches this exact schema:\n"
394
+ )
395
+ json_system_prompt += self._get_json_schema_prompt()
396
+ json_system_prompt += "\n\nRespond ONLY with the JSON object, no additional text or formatting."
201
397
 
202
- # Call OpenAI Structured Outputs
203
- completion = self.client.beta.chat.completions.parse(
398
+ # Call regular chat completions
399
+ completion = await self.async_client.chat.completions.create(
204
400
  model=self.model,
205
401
  messages=[
206
- {"role": "system", "content": system_prompt},
402
+ {"role": "system", "content": json_system_prompt},
207
403
  {
208
404
  "role": "user",
209
- "content": f"Process this conversation for memory storage:\n\n{conversation_text}\n{context_info}",
405
+ "content": f"Process this conversation for enhanced memory storage:\n\n{conversation_text}\n{context_info}",
210
406
  },
211
407
  ],
212
- response_format=ProcessedMemory,
213
- temperature=0.1,
408
+ temperature=0.1, # Low temperature for consistent processing
409
+ max_tokens=2000, # Ensure enough tokens for full response
214
410
  )
215
411
 
216
- # Handle potential refusal
217
- if completion.choices[0].message.refusal:
218
- logger.warning(
219
- f"Memory processing refused for chat {chat_id}: {completion.choices[0].message.refusal}"
220
- )
221
- return self._create_empty_memory(
222
- chat_id, "Processing refused for safety reasons"
412
+ # Extract and parse JSON response
413
+ response_text = completion.choices[0].message.content
414
+ if not response_text:
415
+ raise ValueError("Empty response from model")
416
+
417
+ # Clean up response (remove markdown formatting if present)
418
+ response_text = response_text.strip()
419
+ if response_text.startswith("```json"):
420
+ response_text = response_text[7:]
421
+ if response_text.startswith("```"):
422
+ response_text = response_text[3:]
423
+ if response_text.endswith("```"):
424
+ response_text = response_text[:-3]
425
+ response_text = response_text.strip()
426
+
427
+ # Parse JSON
428
+ try:
429
+ parsed_data = json.loads(response_text)
430
+ except json.JSONDecodeError as e:
431
+ logger.error(f"Failed to parse JSON response for {chat_id}: {e}")
432
+ logger.debug(f"Raw response: {response_text}")
433
+ return self._create_empty_long_term_memory(
434
+ chat_id, f"JSON parsing failed: {e}"
223
435
  )
224
436
 
225
- processed_memory = completion.choices[0].message.parsed
437
+ # Convert to ProcessedLongTermMemory object with validation and defaults
438
+ processed_memory = self._create_memory_from_dict(parsed_data, chat_id)
226
439
 
227
- # Apply filters if provided
228
- if filters and not self._passes_filters(processed_memory, filters):
229
- processed_memory.should_store = False
230
- processed_memory.storage_reasoning = (
231
- "Filtered out based on memory filters"
232
- )
440
+ logger.debug(
441
+ f"Successfully parsed memory using fallback method for {chat_id}"
442
+ )
443
+ return processed_memory
233
444
 
234
- # Add processing metadata
235
- processed_memory.processing_metadata = {
236
- "chat_id": chat_id,
237
- "model": self.model,
238
- "processed_at": datetime.now().isoformat(),
239
- "agent_version": "v1.0_pydantic",
240
- }
445
+ except Exception as e:
446
+ logger.error(f"Fallback memory processing failed for {chat_id}: {e}")
447
+ return self._create_empty_long_term_memory(
448
+ chat_id, f"Fallback processing failed: {str(e)}"
449
+ )
241
450
 
242
- logger.debug(
243
- f"Processed conversation {chat_id}: category={processed_memory.category.primary_category}, should_store={processed_memory.should_store}"
451
+ def _get_json_schema_prompt(self) -> str:
452
+ """
453
+ Get JSON schema description for manual parsing
454
+ """
455
+ return """{
456
+ "content": "string - The actual memory content",
457
+ "summary": "string - Concise summary for search",
458
+ "classification": "string - One of: essential, contextual, conversational, reference, personal, conscious-info",
459
+ "importance": "string - One of: critical, high, medium, low",
460
+ "topic": "string or null - Main topic/subject",
461
+ "entities": ["array of strings - People, places, technologies mentioned"],
462
+ "keywords": ["array of strings - Key terms for search"],
463
+ "is_user_context": "boolean - Contains user personal info",
464
+ "is_preference": "boolean - User preference/opinion",
465
+ "is_skill_knowledge": "boolean - User's abilities/expertise",
466
+ "is_current_project": "boolean - Current work context",
467
+ "classification_reason": "string - Why this classification was chosen",
468
+ "confidence_score": "number - AI confidence in extraction (0.0-1.0)",
469
+ "promotion_eligible": "boolean - Should be promoted to short-term"
470
+ }"""
471
+
472
+ def _create_memory_from_dict(
473
+ self, data: Dict[str, Any], chat_id: str
474
+ ) -> ProcessedLongTermMemory:
475
+ """
476
+ Create ProcessedLongTermMemory from dictionary with proper validation and defaults
477
+ """
478
+ try:
479
+ # Import here to avoid circular imports
480
+ from ..utils.pydantic_models import (
481
+ MemoryClassification,
482
+ MemoryImportanceLevel,
244
483
  )
484
+
485
+ # Validate and convert classification
486
+ classification_str = (
487
+ data.get("classification", "conversational").lower().replace("_", "-")
488
+ )
489
+ try:
490
+ classification = MemoryClassification(classification_str)
491
+ except ValueError:
492
+ logger.warning(
493
+ f"Invalid classification '{classification_str}', using 'conversational'"
494
+ )
495
+ classification = MemoryClassification.CONVERSATIONAL
496
+
497
+ # Validate and convert importance
498
+ importance_str = data.get("importance", "medium").lower()
499
+ try:
500
+ importance = MemoryImportanceLevel(importance_str)
501
+ except ValueError:
502
+ logger.warning(f"Invalid importance '{importance_str}', using 'medium'")
503
+ importance = MemoryImportanceLevel.MEDIUM
504
+
505
+ # Create memory object with proper validation
506
+ processed_memory = ProcessedLongTermMemory(
507
+ content=data.get("content", "No content extracted"),
508
+ summary=data.get("summary", "No summary available"),
509
+ classification=classification,
510
+ importance=importance,
511
+ topic=data.get("topic"),
512
+ entities=data.get("entities", []),
513
+ keywords=data.get("keywords", []),
514
+ is_user_context=bool(data.get("is_user_context", False)),
515
+ is_preference=bool(data.get("is_preference", False)),
516
+ is_skill_knowledge=bool(data.get("is_skill_knowledge", False)),
517
+ is_current_project=bool(data.get("is_current_project", False)),
518
+ conversation_id=chat_id,
519
+ confidence_score=float(data.get("confidence_score", 0.7)),
520
+ classification_reason=data.get(
521
+ "classification_reason", "Extracted via fallback parsing"
522
+ ),
523
+ promotion_eligible=bool(data.get("promotion_eligible", False)),
524
+ extraction_timestamp=datetime.now(),
525
+ )
526
+
245
527
  return processed_memory
246
528
 
247
529
  except Exception as e:
248
- logger.error(f"Memory agent processing failed for {chat_id}: {e}")
249
- return self._create_empty_memory(chat_id, f"Processing failed: {str(e)}")
530
+ logger.error(f"Error creating memory from dict: {e}")
531
+ return self._create_empty_long_term_memory(
532
+ chat_id, f"Memory creation failed: {str(e)}"
533
+ )
250
534
 
251
- def _passes_filters(self, memory: ProcessedMemory, filters: Dict[str, Any]) -> bool:
252
- """Check if processed memory passes configured filters"""
535
+ def should_filter_memory(
536
+ self, memory: ProcessedLongTermMemory, filters: Optional[Dict[str, Any]] = None
537
+ ) -> bool:
538
+ """
539
+ Determine if memory should be filtered out
253
540
 
254
- # Include keywords filter
255
- if "include_keywords" in filters:
256
- include_keywords = filters["include_keywords"]
257
- content_lower = memory.searchable_content.lower()
258
- if not any(
259
- keyword.lower() in content_lower for keyword in include_keywords
260
- ):
261
- return False
541
+ Args:
542
+ memory: Memory to check
543
+ filters: Optional filtering criteria
262
544
 
263
- # Exclude keywords filter
264
- if "exclude_keywords" in filters:
265
- exclude_keywords = filters["exclude_keywords"]
266
- content_lower = memory.searchable_content.lower()
267
- if any(keyword.lower() in content_lower for keyword in exclude_keywords):
268
- return False
545
+ Returns:
546
+ True if memory should be filtered out, False otherwise
547
+ """
548
+ if not filters:
549
+ return False
269
550
 
270
- # Minimum importance filter
551
+ # Classification filter
552
+ if "exclude_classifications" in filters:
553
+ if memory.classification in filters["exclude_classifications"]:
554
+ return True
555
+
556
+ # Importance filter
271
557
  if "min_importance" in filters:
272
- if memory.importance.importance_score < filters["min_importance"]:
273
- return False
274
-
275
- # Category filter
276
- if "allowed_categories" in filters:
277
- if memory.category.primary_category not in filters["allowed_categories"]:
278
- return False
279
-
280
- return True
281
-
282
- def _create_empty_memory(self, chat_id: str, reason: str) -> ProcessedMemory:
283
- """Create an empty memory object for error cases"""
284
- from ..utils.pydantic_models import (
285
- ExtractedEntities,
286
- MemoryCategory,
287
- MemoryCategoryType,
288
- MemoryImportance,
289
- RetentionType,
290
- )
558
+ importance_map = {"critical": 4, "high": 3, "medium": 2, "low": 1}
291
559
 
292
- return ProcessedMemory(
293
- category=MemoryCategory(
294
- primary_category=MemoryCategoryType.fact,
295
- confidence_score=0.0,
296
- reasoning="Failed to process",
297
- ),
298
- entities=ExtractedEntities(),
299
- importance=MemoryImportance(
300
- importance_score=0.0,
301
- retention_type=RetentionType.short_term,
302
- reasoning="Processing failed",
303
- ),
304
- summary="Processing failed",
305
- searchable_content="",
306
- should_store=False,
307
- storage_reasoning=reason,
308
- processing_metadata={"chat_id": chat_id, "error": reason},
309
- )
560
+ min_level = importance_map.get(filters["min_importance"], 1)
561
+ memory_level = importance_map.get(memory.importance, 1)
310
562
 
311
- def determine_storage_location(self, processed_memory: ProcessedMemory) -> str:
312
- """Determine appropriate storage location based on memory properties"""
563
+ if memory_level < min_level:
564
+ return True
313
565
 
314
- if processed_memory.category.primary_category == MemoryCategoryType.rule:
315
- return "rules_memory"
566
+ # Confidence filter
567
+ if "min_confidence" in filters:
568
+ if memory.confidence_score < filters["min_confidence"]:
569
+ return True
316
570
 
317
- if processed_memory.importance.retention_type == RetentionType.permanent:
318
- return "long_term_memory"
319
- elif processed_memory.importance.retention_type == RetentionType.long_term:
320
- return "long_term_memory"
321
- else:
322
- return "short_term_memory"
571
+ # Content filters
572
+ if "exclude_keywords" in filters:
573
+ content_lower = memory.content.lower()
574
+ if any(
575
+ keyword.lower() in content_lower
576
+ for keyword in filters["exclude_keywords"]
577
+ ):
578
+ return True
579
+
580
+ # Length filter
581
+ if "min_content_length" in filters:
582
+ if len(memory.content.strip()) < filters["min_content_length"]:
583
+ return True
584
+
585
+ return False