ambivo-agents 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,606 @@
1
+ # ambivo_agents/core/history.py
2
+ """
3
+ BaseAgentHistoryMixin - Shared conversation context functionality for agents
4
+ Provides standardized methods for agents to access and use conversation history
5
+ """
6
+
7
+ import re
8
+ import logging
9
+ from abc import ABC, abstractmethod
10
+ from typing import Dict, List, Any, Optional, Callable, Union, Pattern
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timedelta
13
+ from enum import Enum
14
+
15
+
16
+ class ContextType(Enum):
17
+ """Types of context that can be extracted from conversation history"""
18
+ URL = "url"
19
+ FILE_PATH = "file_path"
20
+ KNOWLEDGE_BASE = "knowledge_base"
21
+ SEARCH_TERM = "search_term"
22
+ DOCUMENT_NAME = "document_name"
23
+ MEDIA_FILE = "media_file"
24
+ CODE_REFERENCE = "code_reference"
25
+ CUSTOM = "custom"
26
+
27
+
28
+ @dataclass
29
+ class ContextItem:
30
+ """Represents a piece of context extracted from conversation history"""
31
+ value: str
32
+ context_type: ContextType
33
+ source_message: str
34
+ timestamp: datetime
35
+ confidence: float = 1.0
36
+ metadata: Dict[str, Any] = field(default_factory=dict)
37
+
38
+
39
+ @dataclass
40
+ class ConversationState:
41
+ """Tracks the current state of a conversation"""
42
+ current_resource: Optional[str] = None
43
+ current_operation: Optional[str] = None
44
+ last_intent: Optional[str] = None
45
+ working_files: List[str] = field(default_factory=list)
46
+ knowledge_bases: List[str] = field(default_factory=list)
47
+ metadata: Dict[str, Any] = field(default_factory=dict)
48
+
49
+
50
+ class BaseAgentHistoryMixin:
51
+ """
52
+ Mixin providing conversation history and context functionality for agents.
53
+
54
+ This mixin adds the ability for agents to:
55
+ 1. Extract context from conversation history
56
+ 2. Maintain conversation state
57
+ 3. Detect intent and resolve missing context
58
+ 4. Provide standardized history-aware processing
59
+
60
+ Usage:
61
+ class MyAgent(BaseAgent, BaseAgentHistoryMixin):
62
+ def __init__(self, **kwargs):
63
+ super().__init__(**kwargs)
64
+ self.setup_history_mixin()
65
+ """
66
+
67
+ def setup_history_mixin(self):
68
+ """Initialize the history mixin - call this in agent __init__"""
69
+ self.conversation_state = ConversationState()
70
+ self.context_extractors: Dict[ContextType, Callable] = {}
71
+ self.intent_keywords: Dict[str, List[str]] = {}
72
+ self.logger = logging.getLogger(f"{self.__class__.__name__}-History")
73
+
74
+ # Register default extractors
75
+ self._register_default_extractors()
76
+
77
+ # Register default intent keywords
78
+ self._register_default_intents()
79
+
80
+ def _register_default_extractors(self):
81
+ """Register default context extractors"""
82
+
83
+ # URL extractor (works for various URL types)
84
+ self.register_context_extractor(
85
+ ContextType.URL,
86
+ lambda text: re.findall(r'https?://[^\s<>"{}|\\^`\[\]]+', text, re.IGNORECASE)
87
+ )
88
+
89
+ # File path extractor (common file extensions)
90
+ self.register_context_extractor(
91
+ ContextType.FILE_PATH,
92
+ lambda text: re.findall(r'[^\s]+\.[a-zA-Z0-9]{2,4}(?:\s|$)', text)
93
+ )
94
+
95
+ # Knowledge base name extractor (alphanumeric with underscores/hyphens)
96
+ self.register_context_extractor(
97
+ ContextType.KNOWLEDGE_BASE,
98
+ lambda text: re.findall(r'\b[a-zA-Z][a-zA-Z0-9_-]*(?:_kb|_base|_knowledge)\b', text, re.IGNORECASE)
99
+ )
100
+
101
+ def _register_default_intents(self):
102
+ """Register default intent keywords for common operations"""
103
+ self.intent_keywords = {
104
+ 'download': ['download', 'get', 'fetch', 'retrieve', 'save'],
105
+ 'upload': ['upload', 'ingest', 'add', 'import', 'insert'],
106
+ 'query': ['search', 'find', 'query', 'look', 'check'],
107
+ 'process': ['convert', 'transform', 'process', 'extract', 'generate'],
108
+ 'analyze': ['analyze', 'examine', 'inspect', 'review', 'evaluate'],
109
+ 'modify': ['edit', 'change', 'update', 'modify', 'alter'],
110
+ 'delete': ['delete', 'remove', 'clear', 'drop', 'destroy']
111
+ }
112
+
113
+ # ========================
114
+ # CONTEXT EXTRACTOR METHODS
115
+ # ========================
116
+
117
+ def register_context_extractor(self, context_type: ContextType, extractor_func: Callable[[str], List[str]]):
118
+ """
119
+ Register a custom context extractor function
120
+
121
+ Args:
122
+ context_type: Type of context this extractor handles
123
+ extractor_func: Function that takes text and returns list of extracted items
124
+ """
125
+ self.context_extractors[context_type] = extractor_func
126
+
127
+ def extract_context_from_text(self, text: str, context_type: ContextType) -> List[str]:
128
+ """
129
+ Extract specific type of context from text
130
+
131
+ Args:
132
+ text: Text to extract context from
133
+ context_type: Type of context to extract
134
+
135
+ Returns:
136
+ List of extracted context items
137
+ """
138
+ extractor = self.context_extractors.get(context_type)
139
+ if not extractor:
140
+ self.logger.warning(f"No extractor registered for context type: {context_type}")
141
+ return []
142
+
143
+ try:
144
+ return extractor(text)
145
+ except Exception as e:
146
+ self.logger.error(f"Error extracting {context_type} from text: {e}")
147
+ return []
148
+
149
+ def extract_all_context_from_text(self, text: str) -> Dict[ContextType, List[str]]:
150
+ """
151
+ Extract all types of context from text
152
+
153
+ Args:
154
+ text: Text to extract context from
155
+
156
+ Returns:
157
+ Dictionary mapping context types to extracted items
158
+ """
159
+ context = {}
160
+ for context_type in self.context_extractors:
161
+ items = self.extract_context_from_text(text, context_type)
162
+ if items:
163
+ context[context_type] = items
164
+ return context
165
+
166
+ # ========================
167
+ # CONVERSATION HISTORY METHODS
168
+ # ========================
169
+
170
+ def get_conversation_history_with_context(self,
171
+ limit: int = 10,
172
+ context_types: Optional[List[ContextType]] = None) -> List[
173
+ Dict[str, Any]]:
174
+ """
175
+ Get conversation history with extracted context
176
+
177
+ Args:
178
+ limit: Maximum number of messages to retrieve
179
+ context_types: Specific context types to extract (None for all)
180
+
181
+ Returns:
182
+ List of messages with extracted context
183
+ """
184
+ try:
185
+ if not hasattr(self, 'memory') or not self.memory:
186
+ self.logger.warning("No memory manager available")
187
+ return []
188
+
189
+ # Get raw history
190
+ history = self.memory.get_recent_messages(
191
+ limit=limit,
192
+ conversation_id=getattr(self.context, 'conversation_id', None)
193
+ )
194
+
195
+ # Enrich with context
196
+ enriched_history = []
197
+ for msg in history:
198
+ if isinstance(msg, dict):
199
+ content = msg.get('content', '')
200
+
201
+ # Extract context from message
202
+ if context_types:
203
+ extracted_context = {}
204
+ for ctx_type in context_types:
205
+ items = self.extract_context_from_text(content, ctx_type)
206
+ if items:
207
+ extracted_context[ctx_type.value] = items
208
+ else:
209
+ extracted_context = self.extract_all_context_from_text(content)
210
+ # Convert enum keys to strings for JSON serialization
211
+ extracted_context = {k.value: v for k, v in extracted_context.items()}
212
+
213
+ # Add context to message
214
+ enriched_msg = {
215
+ **msg,
216
+ 'extracted_context': extracted_context,
217
+ 'has_context': len(extracted_context) > 0
218
+ }
219
+ enriched_history.append(enriched_msg)
220
+
221
+ return enriched_history
222
+
223
+ except Exception as e:
224
+ self.logger.error(f"Error getting conversation history with context: {e}")
225
+ return []
226
+
227
+ def get_recent_context_items(self,
228
+ context_type: ContextType,
229
+ limit: int = 5,
230
+ max_messages: int = 10) -> List[ContextItem]:
231
+ """
232
+ Get recent context items of specific type from conversation history
233
+
234
+ Args:
235
+ context_type: Type of context to retrieve
236
+ limit: Maximum number of context items to return
237
+ max_messages: Maximum number of messages to search through
238
+
239
+ Returns:
240
+ List of ContextItem objects, most recent first
241
+ """
242
+ try:
243
+ history = self.get_conversation_history_with_context(
244
+ limit=max_messages,
245
+ context_types=[context_type]
246
+ )
247
+
248
+ context_items = []
249
+ for msg in reversed(history): # Most recent first
250
+ if msg.get('has_context'):
251
+ items = msg.get('extracted_context', {}).get(context_type.value, [])
252
+ for item in items:
253
+ context_item = ContextItem(
254
+ value=item,
255
+ context_type=context_type,
256
+ source_message=msg.get('content', '')[:100],
257
+ timestamp=datetime.fromisoformat(msg.get('timestamp', datetime.now().isoformat())),
258
+ metadata={'message_id': msg.get('id')}
259
+ )
260
+ context_items.append(context_item)
261
+
262
+ if len(context_items) >= limit:
263
+ break
264
+
265
+ if len(context_items) >= limit:
266
+ break
267
+
268
+ return context_items
269
+
270
+ except Exception as e:
271
+ self.logger.error(f"Error getting recent context items: {e}")
272
+ return []
273
+
274
+ def get_most_recent_context(self, context_type: ContextType) -> Optional[str]:
275
+ """
276
+ Get the most recent context item of specific type
277
+
278
+ Args:
279
+ context_type: Type of context to retrieve
280
+
281
+ Returns:
282
+ Most recent context item or None
283
+ """
284
+ items = self.get_recent_context_items(context_type, limit=1)
285
+ return items[0].value if items else None
286
+
287
+ # ========================
288
+ # INTENT DETECTION METHODS
289
+ # ========================
290
+
291
+ def detect_intent(self, message: str) -> Optional[str]:
292
+ """
293
+ Detect user intent from message
294
+
295
+ Args:
296
+ message: Message to analyze
297
+
298
+ Returns:
299
+ Detected intent or None
300
+ """
301
+ content_lower = message.lower()
302
+
303
+ for intent, keywords in self.intent_keywords.items():
304
+ if any(keyword in content_lower for keyword in keywords):
305
+ return intent
306
+
307
+ return None
308
+
309
+ def has_intent_without_context(self, message: str, required_context_types: List[ContextType]) -> bool:
310
+ """
311
+ Check if message has intent but lacks required context
312
+
313
+ Args:
314
+ message: Message to analyze
315
+ required_context_types: Context types required for this intent
316
+
317
+ Returns:
318
+ True if intent detected but context missing
319
+ """
320
+ # Check if intent is present
321
+ intent = self.detect_intent(message)
322
+ if not intent:
323
+ return False
324
+
325
+ # Check if any required context is missing
326
+ for context_type in required_context_types:
327
+ items = self.extract_context_from_text(message, context_type)
328
+ if not items:
329
+ return True # Intent present but this context type missing
330
+
331
+ return False # All required context is present
332
+
333
+ def should_check_history(self, message: str, context_types: List[ContextType]) -> bool:
334
+ """
335
+ Determine if agent should check conversation history for context
336
+
337
+ Args:
338
+ message: Current message
339
+ context_types: Context types that might be needed
340
+
341
+ Returns:
342
+ True if history should be checked
343
+ """
344
+ # Check for pronouns indicating reference to previous context
345
+ pronouns = ['that', 'this', 'it', 'them', 'those', 'these']
346
+ content_lower = message.lower()
347
+ has_pronouns = any(pronoun in content_lower for pronoun in pronouns)
348
+
349
+ # Check for intent without context
350
+ has_intent_without_context = self.has_intent_without_context(message, context_types)
351
+
352
+ # Check for short messages (likely missing context)
353
+ is_short_message = len(message.split()) < 6
354
+
355
+ return has_pronouns or has_intent_without_context or is_short_message
356
+
357
+ # ========================
358
+ # CONVERSATION STATE METHODS
359
+ # ========================
360
+
361
+ def update_conversation_state(self, message: str, operation: str = None):
362
+ """
363
+ Update conversation state based on current message
364
+
365
+ Args:
366
+ message: Current message
367
+ operation: Operation being performed (optional)
368
+ """
369
+ # Update last intent
370
+ self.conversation_state.last_intent = self.detect_intent(message)
371
+
372
+ # Update current operation
373
+ if operation:
374
+ self.conversation_state.current_operation = operation
375
+
376
+ # Extract and update resources
377
+ all_context = self.extract_all_context_from_text(message)
378
+
379
+ # Update current resource (prioritize URLs, then files)
380
+ if ContextType.URL in all_context:
381
+ self.conversation_state.current_resource = all_context[ContextType.URL][0]
382
+ elif ContextType.FILE_PATH in all_context:
383
+ self.conversation_state.current_resource = all_context[ContextType.FILE_PATH][0]
384
+
385
+ # Update working files
386
+ if ContextType.FILE_PATH in all_context:
387
+ for file_path in all_context[ContextType.FILE_PATH]:
388
+ if file_path not in self.conversation_state.working_files:
389
+ self.conversation_state.working_files.append(file_path)
390
+
391
+ # Update knowledge bases
392
+ if ContextType.KNOWLEDGE_BASE in all_context:
393
+ for kb in all_context[ContextType.KNOWLEDGE_BASE]:
394
+ if kb not in self.conversation_state.knowledge_bases:
395
+ self.conversation_state.knowledge_bases.append(kb)
396
+
397
+ def get_conversation_state(self) -> ConversationState:
398
+ """Get current conversation state"""
399
+ return self.conversation_state
400
+
401
+ def clear_conversation_state(self):
402
+ """Clear conversation state"""
403
+ self.conversation_state = ConversationState()
404
+
405
+ # ========================
406
+ # HIGH-LEVEL HELPER METHODS
407
+ # ========================
408
+
409
+ def resolve_context_for_message(self,
410
+ message: str,
411
+ required_context_types: List[ContextType]) -> Dict[ContextType, Optional[str]]:
412
+ """
413
+ Resolve context for a message by checking current message and history
414
+
415
+ Args:
416
+ message: Current message
417
+ required_context_types: Types of context needed
418
+
419
+ Returns:
420
+ Dictionary mapping context types to resolved values
421
+ """
422
+ resolved_context = {}
423
+
424
+ # First, try to extract from current message
425
+ current_context = self.extract_all_context_from_text(message)
426
+
427
+ for context_type in required_context_types:
428
+ if context_type in current_context and current_context[context_type]:
429
+ # Found in current message
430
+ resolved_context[context_type] = current_context[context_type][0]
431
+ else:
432
+ # Not found, check history
433
+ recent_item = self.get_most_recent_context(context_type)
434
+ resolved_context[context_type] = recent_item
435
+
436
+ return resolved_context
437
+
438
+ def process_message_with_context_resolution(self,
439
+ message: str,
440
+ required_context_types: List[ContextType],
441
+ processor_func: Callable) -> Any:
442
+ """
443
+ Process a message with automatic context resolution
444
+
445
+ Args:
446
+ message: Message to process
447
+ required_context_types: Context types needed for processing
448
+ processor_func: Function to call with resolved context
449
+
450
+ Returns:
451
+ Result from processor_func
452
+ """
453
+ # Resolve context
454
+ resolved_context = self.resolve_context_for_message(message, required_context_types)
455
+
456
+ # Update conversation state
457
+ self.update_conversation_state(message)
458
+
459
+ # Call processor with resolved context
460
+ return processor_func(message, resolved_context)
461
+
462
+ # ========================
463
+ # DEBUGGING AND INTROSPECTION
464
+ # ========================
465
+
466
+ def debug_conversation_context(self) -> Dict[str, Any]:
467
+ """
468
+ Get debugging information about conversation context
469
+
470
+ Returns:
471
+ Dictionary with debugging information
472
+ """
473
+ try:
474
+ recent_history = self.get_conversation_history_with_context(limit=5)
475
+
476
+ return {
477
+ 'conversation_state': {
478
+ 'current_resource': self.conversation_state.current_resource,
479
+ 'current_operation': self.conversation_state.current_operation,
480
+ 'last_intent': self.conversation_state.last_intent,
481
+ 'working_files': self.conversation_state.working_files,
482
+ 'knowledge_bases': self.conversation_state.knowledge_bases
483
+ },
484
+ 'registered_extractors': list(self.context_extractors.keys()),
485
+ 'intent_keywords': self.intent_keywords,
486
+ 'recent_context_summary': {
487
+ 'messages_with_context': len([msg for msg in recent_history if msg.get('has_context')]),
488
+ 'total_messages': len(recent_history),
489
+ 'context_types_found': list(set([
490
+ ctx_type for msg in recent_history
491
+ for ctx_type in msg.get('extracted_context', {}).keys()
492
+ ]))
493
+ }
494
+ }
495
+ except Exception as e:
496
+ return {'error': str(e)}
497
+
498
+
499
+ # ========================
500
+ # SPECIALIZED MIXINS FOR SPECIFIC AGENT TYPES
501
+ # ========================
502
+
503
+ class MediaAgentHistoryMixin(BaseAgentHistoryMixin):
504
+ """Specialized history mixin for media processing agents"""
505
+
506
+ def setup_history_mixin(self):
507
+ super().setup_history_mixin()
508
+
509
+ # Register media-specific extractors
510
+ self.register_context_extractor(
511
+ ContextType.MEDIA_FILE,
512
+ lambda text: re.findall(r'[^\s]+\.(?:mp4|avi|mov|mkv|mp3|wav|flac|aac|m4a|webm|ogg)', text, re.IGNORECASE)
513
+ )
514
+
515
+ # Add media-specific intents
516
+ self.intent_keywords.update({
517
+ 'extract_audio': ['extract audio', 'get audio', 'audio from'],
518
+ 'convert_video': ['convert video', 'convert to', 'change format'],
519
+ 'resize': ['resize', 'scale', 'change size'],
520
+ 'trim': ['trim', 'cut', 'clip'],
521
+ 'thumbnail': ['thumbnail', 'screenshot', 'frame']
522
+ })
523
+
524
+ def get_recent_media_file(self) -> Optional[str]:
525
+ """Get most recent media file from conversation"""
526
+ return self.get_most_recent_context(ContextType.MEDIA_FILE)
527
+
528
+
529
+ class KnowledgeBaseAgentHistoryMixin(BaseAgentHistoryMixin):
530
+ """Specialized history mixin for knowledge base agents"""
531
+
532
+ def setup_history_mixin(self):
533
+ super().setup_history_mixin()
534
+
535
+ # Register KB-specific extractors
536
+ self.register_context_extractor(
537
+ ContextType.DOCUMENT_NAME,
538
+ lambda text: re.findall(r'[^\s]+\.(?:pdf|docx|txt|md|html|csv|json)', text, re.IGNORECASE)
539
+ )
540
+
541
+ # Add KB-specific intents
542
+ self.intent_keywords.update({
543
+ 'ingest': ['ingest', 'upload', 'add document', 'import'],
544
+ 'query_kb': ['query', 'search', 'find in', 'ask'],
545
+ 'create_kb': ['create knowledge base', 'new kb', 'make kb']
546
+ })
547
+
548
+ def get_current_knowledge_base(self) -> Optional[str]:
549
+ """Get current knowledge base from state or history"""
550
+ if self.conversation_state.knowledge_bases:
551
+ return self.conversation_state.knowledge_bases[-1]
552
+ return self.get_most_recent_context(ContextType.KNOWLEDGE_BASE)
553
+
554
+ def get_recent_document(self) -> Optional[str]:
555
+ """Get most recent document from conversation"""
556
+ return self.get_most_recent_context(ContextType.DOCUMENT_NAME)
557
+
558
+
559
+ class WebAgentHistoryMixin(BaseAgentHistoryMixin):
560
+ """Specialized history mixin for web-related agents (search, scraping)"""
561
+
562
+ def setup_history_mixin(self):
563
+ super().setup_history_mixin()
564
+
565
+ # Register web-specific extractors
566
+ self.register_context_extractor(
567
+ ContextType.SEARCH_TERM,
568
+ lambda text: self._extract_search_terms(text)
569
+ )
570
+
571
+ # Add web-specific intents
572
+ self.intent_keywords.update({
573
+ 'search': ['search', 'find', 'look up', 'search for'],
574
+ 'scrape': ['scrape', 'extract from', 'crawl'],
575
+ 'news_search': ['news', 'latest', 'recent'],
576
+ 'academic_search': ['research', 'academic', 'papers']
577
+ })
578
+
579
+ def _extract_search_terms(self, text: str) -> List[str]:
580
+ """Extract search terms from text"""
581
+ # Look for quoted phrases and key terms
582
+ quoted_terms = re.findall(r'"([^"]+)"', text)
583
+ if quoted_terms:
584
+ return quoted_terms
585
+
586
+ # Extract terms after search keywords
587
+ search_patterns = [
588
+ r'search for (.+?)(?:\.|$)',
589
+ r'find (.+?)(?:\.|$)',
590
+ r'look up (.+?)(?:\.|$)'
591
+ ]
592
+
593
+ for pattern in search_patterns:
594
+ matches = re.findall(pattern, text, re.IGNORECASE)
595
+ if matches:
596
+ return [match.strip() for match in matches]
597
+
598
+ return []
599
+
600
+ def get_recent_search_term(self) -> Optional[str]:
601
+ """Get most recent search term from conversation"""
602
+ return self.get_most_recent_context(ContextType.SEARCH_TERM)
603
+
604
+ def get_recent_url(self) -> Optional[str]:
605
+ """Get most recent URL from conversation"""
606
+ return self.get_most_recent_context(ContextType.URL)