mcal-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcal/mcal.py ADDED
@@ -0,0 +1,1697 @@
1
+ """
2
+ MCAL: Memory-Context Alignment Layer
3
+
4
+ Main interface providing a standalone memory architecture for AI agents.
5
+
6
+ Architecture:
7
+ ┌─────────────────────────────────────────────┐
8
+ │ MCAL │
9
+ │ ┌─────────────────────────────────────┐ │
10
+ │ │ UnifiedExtractor (Single-pass) │ │
11
+ │ │ - Entities, Goals, Decisions │ │
12
+ │ │ - Relationships, Embeddings │ │
13
+ │ └─────────────────────────────────────┘ │
14
+ │ │ │
15
+ │ ▼ │
16
+ │ ┌─────────────────────────────────────┐ │
17
+ │ │ UnifiedGraph + VectorIndex │ │
18
+ │ │ (Graph storage + semantic search) │ │
19
+ │ └─────────────────────────────────────┘ │
20
+ └─────────────────────────────────────────────┘
21
+
22
+ Usage:
23
+ from mcal import MCAL
24
+
25
+ # Initialize (no external dependencies!)
26
+ mcal = MCAL(
27
+ openai_api_key="..." # or anthropic_api_key, or use bedrock
28
+ )
29
+
30
+ # Add conversation (extracts facts + intents + reasoning)
31
+ result = await mcal.add(
32
+ messages=[
33
+ {"role": "user", "content": "Let's build a fraud detection system"},
34
+ {"role": "assistant", "content": "Great! What's your data source?"}
35
+ ],
36
+ user_id="user_123"
37
+ )
38
+
39
+ # Search with goal-awareness
40
+ context = await mcal.search(
41
+ query="What database should we use?",
42
+ user_id="user_123"
43
+ )
44
+
45
+ # Get assembled context for LLM
46
+ prompt_context = mcal.get_context(
47
+ query="What's our next step?",
48
+ user_id="user_123",
49
+ max_tokens=4000
50
+ )
51
+ """
52
+
53
+ from __future__ import annotations
54
+
55
+ import asyncio
56
+ import logging
57
+ import time
58
+ from typing import Any, Optional, AsyncIterator
59
+ from dataclasses import dataclass, field
60
+
61
+ from anthropic import Anthropic
62
+ from openai import OpenAI, RateLimitError as OpenAIRateLimitError, APIError as OpenAIAPIError
63
+
64
+ from pathlib import Path
65
+
66
+ from .backends import MemoryBackend, MemoryEntry # MemoryEntry still needed for compatibility
67
+ from .core.models import (
68
+ IntentGraph,
69
+ IntentNode,
70
+ IntentType,
71
+ IntentStatus,
72
+ DecisionTrail,
73
+ Turn,
74
+ Session,
75
+ RetrievalConfig,
76
+ )
77
+ from .core.intent_tracker import IntentTracker
78
+ from .core.reasoning_store import ReasoningStore
79
+ from .core.goal_retriever import ContextAssembler
80
+ from .core.storage import MCALStorage
81
+ from .core.extraction_cache import ExtractionCache, CacheStats
82
+ from .core.unified_extractor import (
83
+ UnifiedExtractor,
84
+ UnifiedGraph,
85
+ NodeType,
86
+ EdgeType,
87
+ graph_to_memories,
88
+ memories_to_context_string,
89
+ )
90
+ from .core.retry import (
91
+ llm_retry,
92
+ classify_http_error,
93
+ LLMRateLimitError,
94
+ LLMServerError,
95
+ )
96
+ from .providers.bedrock import BedrockProvider
97
+ from .core.streaming import (
98
+ StreamEvent,
99
+ StreamEventType,
100
+ ExtractionPhase,
101
+ StreamProgress,
102
+ event_started,
103
+ event_phase_started,
104
+ event_phase_complete,
105
+ event_fact_extracted,
106
+ event_intent_extracted,
107
+ event_decision_extracted,
108
+ event_cache_hit,
109
+ event_error,
110
+ event_complete,
111
+ )
112
+
113
+ logger = logging.getLogger(__name__)
114
+
115
+
116
+ # =============================================================================
117
+ # LLM Client Wrapper
118
+ # =============================================================================
119
+
120
+ class AnthropicClient:
121
+ """Wrapper around Anthropic client for MCAL components."""
122
+
123
+ def __init__(self, api_key: str, model: str = "claude-sonnet-4-20250514"):
124
+ self.client = Anthropic(api_key=api_key)
125
+ self.model = model
126
+
127
+ @llm_retry(max_attempts=3, min_wait=1.0, max_wait=10.0)
128
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
129
+ """
130
+ Generate completion with automatic retry on transient failures.
131
+
132
+ Retries on rate limits and server errors with exponential backoff.
133
+ """
134
+ messages = [{"role": "user", "content": prompt}]
135
+
136
+ try:
137
+ response = self.client.messages.create(
138
+ model=self.model,
139
+ max_tokens=4096,
140
+ system=system or "You are a helpful assistant.",
141
+ messages=messages
142
+ )
143
+ return response.content[0].text
144
+ except Exception as e:
145
+ # Anthropic SDK raises specific exceptions we can classify
146
+ error_str = str(e).lower()
147
+ if 'rate' in error_str or '429' in error_str:
148
+ raise LLMRateLimitError(f"Anthropic rate limit: {e}")
149
+ elif '500' in error_str or '502' in error_str or '503' in error_str or '504' in error_str:
150
+ raise LLMServerError(f"Anthropic server error: {e}")
151
+ raise
152
+
153
+
154
+ class OpenAIClient:
155
+ """Wrapper around OpenAI client for MCAL components."""
156
+
157
+ def __init__(self, api_key: str, model: str = "gpt-4o"):
158
+ self.client = OpenAI(api_key=api_key)
159
+ self.model = model
160
+ # Token tracking
161
+ self.total_prompt_tokens = 0
162
+ self.total_completion_tokens = 0
163
+
164
+ @llm_retry(max_attempts=3, min_wait=1.0, max_wait=10.0)
165
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
166
+ """
167
+ Generate completion with automatic retry on transient failures.
168
+
169
+ Retries on rate limits (429) and server errors (5xx) with exponential backoff.
170
+ """
171
+ messages = [{"role": "user", "content": prompt}]
172
+ if system:
173
+ messages.insert(0, {"role": "system", "content": system})
174
+
175
+ try:
176
+ response = self.client.chat.completions.create(
177
+ model=self.model,
178
+ max_tokens=4096,
179
+ messages=messages
180
+ )
181
+
182
+ # Track token usage
183
+ if response.usage:
184
+ self.total_prompt_tokens += response.usage.prompt_tokens
185
+ self.total_completion_tokens += response.usage.completion_tokens
186
+
187
+ return response.choices[0].message.content
188
+ except OpenAIRateLimitError as e:
189
+ raise LLMRateLimitError(f"OpenAI rate limit: {e}")
190
+ except OpenAIAPIError as e:
191
+ # Check for server errors
192
+ if hasattr(e, 'status_code') and e.status_code in (500, 502, 503, 504):
193
+ raise LLMServerError(f"OpenAI server error ({e.status_code}): {e}")
194
+ raise
195
+
196
+ def get_token_usage(self) -> dict:
197
+ """Get cumulative token usage."""
198
+ return {
199
+ "prompt_tokens": self.total_prompt_tokens,
200
+ "completion_tokens": self.total_completion_tokens,
201
+ "total_tokens": self.total_prompt_tokens + self.total_completion_tokens
202
+ }
203
+
204
+ def reset_token_usage(self):
205
+ """Reset token counters."""
206
+ self.total_prompt_tokens = 0
207
+ self.total_completion_tokens = 0
208
+
209
+ class BedrockProviderWrapper:
210
+ """Wrapper around Bedrock provider for MCAL components."""
211
+
212
+ def __init__(self, model: str = "llama-3.3-70b", region: str = "us-east-1"):
213
+ self.provider = BedrockProvider(model=model, region=region)
214
+ self.model = model
215
+
216
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
217
+ """Generate completion."""
218
+ messages = []
219
+
220
+ # Bedrock Converse API doesn't support system role - merge with user message
221
+ if system:
222
+ combined_prompt = f"{system}\n\n{prompt}"
223
+ messages = [{"role": "user", "content": combined_prompt}]
224
+ else:
225
+ messages = [{"role": "user", "content": prompt}]
226
+
227
+ return await self.provider.generate(messages)
228
+
229
+
230
+ class TieredBedrockProvider:
231
+ """
232
+ Tiered Bedrock provider that routes to fast/smart models based on task complexity.
233
+
234
+ Fast model (8B): Simple tasks like intent detection, graph updates
235
+ Smart model (70B): Complex tasks like decision extraction, reasoning
236
+ """
237
+
238
+ def __init__(
239
+ self,
240
+ fast_model: str = "llama-3.1-8b",
241
+ smart_model: str = "llama-3.3-70b",
242
+ region: str = "us-east-1"
243
+ ):
244
+ self.fast_provider = BedrockProvider(model=fast_model, region=region)
245
+ self.smart_provider = BedrockProvider(model=smart_model, region=region)
246
+ self.fast_model = fast_model
247
+ self.smart_model = smart_model
248
+
249
+ # Track usage for metrics
250
+ self.fast_calls = 0
251
+ self.smart_calls = 0
252
+
253
+ async def complete(
254
+ self,
255
+ prompt: str,
256
+ system: Optional[str] = None,
257
+ use_smart: bool = False
258
+ ) -> str:
259
+ """
260
+ Generate completion using appropriate model tier.
261
+
262
+ Args:
263
+ prompt: The prompt to send
264
+ system: System prompt (merged with user prompt)
265
+ use_smart: Force use of smart model (for complex tasks)
266
+ """
267
+ # Build messages
268
+ if system:
269
+ combined_prompt = f"{system}\n\n{prompt}"
270
+ messages = [{"role": "user", "content": combined_prompt}]
271
+ else:
272
+ messages = [{"role": "user", "content": prompt}]
273
+
274
+ # Route to appropriate model
275
+ if use_smart:
276
+ self.smart_calls += 1
277
+ logger.debug(f"Using SMART model ({self.smart_model})")
278
+ return await self.smart_provider.generate(messages)
279
+ else:
280
+ self.fast_calls += 1
281
+ logger.debug(f"Using FAST model ({self.fast_model})")
282
+ return await self.fast_provider.generate(messages)
283
+
284
+ def get_usage_stats(self) -> dict:
285
+ """Get model usage statistics."""
286
+ total = self.fast_calls + self.smart_calls
287
+ return {
288
+ "fast_calls": self.fast_calls,
289
+ "smart_calls": self.smart_calls,
290
+ "total_calls": total,
291
+ "fast_ratio": self.fast_calls / total if total > 0 else 0,
292
+ }
293
+
294
+ def get_token_usage(self) -> dict:
295
+ """Get total token usage from both providers."""
296
+ fast_usage = self.fast_provider.get_token_usage()
297
+ smart_usage = self.smart_provider.get_token_usage()
298
+ return {
299
+ "prompt_tokens": fast_usage["prompt_tokens"] + smart_usage["prompt_tokens"],
300
+ "completion_tokens": fast_usage["completion_tokens"] + smart_usage["completion_tokens"],
301
+ "total_tokens": fast_usage["total_tokens"] + smart_usage["total_tokens"],
302
+ "fast_tokens": fast_usage["total_tokens"],
303
+ "smart_tokens": smart_usage["total_tokens"],
304
+ }
305
+
306
+ def reset_token_usage(self):
307
+ """Reset token counters on both providers."""
308
+ self.fast_provider.reset_token_usage()
309
+ self.smart_provider.reset_token_usage()
310
+
311
+
312
+ class TieredLLMAdapter:
313
+ """
314
+ Adapter that wraps TieredBedrockProvider to fix use_smart for a specific task type.
315
+
316
+ Used to pass different "views" of the tiered provider to different components:
317
+ - IntentTracker gets a "fast" view (use_smart=False)
318
+ - ReasoningStore gets a "smart" view (use_smart=True)
319
+ """
320
+
321
+ def __init__(self, provider: TieredBedrockProvider, use_smart: bool):
322
+ self._provider = provider
323
+ self._use_smart = use_smart
324
+
325
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
326
+ """Generate completion using the fixed model tier."""
327
+ return await self._provider.complete(prompt, system, use_smart=self._use_smart)
328
+
329
+
330
+
331
+ # =============================================================================
332
+ # MCAL Result Types
333
+ # =============================================================================
334
+
335
+ @dataclass
336
+ class TimingMetrics:
337
+ """Timing metrics for performance monitoring."""
338
+ total_ms: int = 0
339
+ facts_ms: int = 0
340
+ intents_ms: int = 0
341
+ decisions_ms: int = 0
342
+ parallel_savings_ms: int = 0 # Time saved by parallelization
343
+
344
+
345
+ @dataclass
346
+ class AddResult:
347
+ """Result from adding messages to MCAL."""
348
+ # From Mem0
349
+ facts: list[MemoryEntry] = field(default_factory=list)
350
+
351
+ # From MCAL (novel)
352
+ intent_graph: Optional[IntentGraph] = None
353
+ decisions: list[DecisionTrail] = field(default_factory=list)
354
+
355
+ # From Unified Extractor (Issue #19)
356
+ unified_graph: Optional[UnifiedGraph] = None
357
+
358
+ # Performance metrics
359
+ timing: Optional[TimingMetrics] = None
360
+
361
+ @property
362
+ def summary(self) -> dict:
363
+ """Quick summary of what was extracted."""
364
+ summary = {
365
+ "facts_count": len(self.facts),
366
+ "intents_count": len(self.intent_graph.nodes) if self.intent_graph else 0,
367
+ "decisions_count": len(self.decisions),
368
+ }
369
+ # Add unified graph stats if present
370
+ if self.unified_graph:
371
+ graph_summary = self.unified_graph.summary()
372
+ summary["unified_graph"] = {
373
+ "nodes": graph_summary["total_nodes"],
374
+ "edges": graph_summary["total_edges"],
375
+ "decisions": graph_summary["decisions"],
376
+ "goals": graph_summary["goals"],
377
+ "actions": graph_summary["actions"],
378
+ }
379
+ if self.timing:
380
+ summary["timing_ms"] = {
381
+ "total": self.timing.total_ms,
382
+ "facts": self.timing.facts_ms,
383
+ "intents": self.timing.intents_ms,
384
+ "decisions": self.timing.decisions_ms,
385
+ "parallel_savings": self.timing.parallel_savings_ms,
386
+ }
387
+ return summary
388
+
389
+
390
+ @dataclass
391
+ class SearchResult:
392
+ """Result from searching MCAL."""
393
+ # From Mem0 (re-ranked)
394
+ memories: list[MemoryEntry] = field(default_factory=list)
395
+
396
+ # From MCAL (enriched)
397
+ active_goals: list[IntentNode] = field(default_factory=list)
398
+ relevant_decisions: list[DecisionTrail] = field(default_factory=list)
399
+
400
+ # Assembled context
401
+ context: Optional[str] = None
402
+
403
+ # =============================================================================
404
+ # Main MCAL Class
405
+ # =============================================================================
406
+
407
+ class MCAL:
408
+ """
409
+ Memory-Context Alignment Layer.
410
+
411
+ Adds reasoning preservation and goal-aware retrieval on top of Mem0.
412
+
413
+ Three Pillars:
414
+ 1. Intent Graph Preservation - Track goal hierarchies
415
+ 2. Reasoning Chain Storage - Store WHY decisions were made
416
+ 3. Goal-Aware Retrieval - Retrieve based on objectives, not just similarity
417
+ """
418
+
419
+ def __init__(
420
+ self,
421
+ anthropic_api_key: Optional[str] = None,
422
+ openai_api_key: Optional[str] = None,
423
+ model: Optional[str] = None,
424
+ llm_provider: str = "openai",
425
+ bedrock_model: str = "llama-3.3-70b",
426
+ bedrock_region: str = "us-east-1",
427
+ storage_path: Optional[str] = None,
428
+ enable_persistence: bool = True,
429
+ enable_tiered_models: bool = False,
430
+ bedrock_fast_model: str = "llama-3.1-8b",
431
+ bedrock_smart_model: str = "llama-3.3-70b",
432
+ enable_extraction_cache: bool = False,
433
+ cache_ttl_seconds: int = 86400,
434
+ # Deprecated parameters (Issue #53 - kept for backward compatibility)
435
+ mem0_config: Optional[dict] = None,
436
+ mem0_api_key: Optional[str] = None,
437
+ use_standalone_backend: bool = False,
438
+ use_unified_extractor: bool = True,
439
+ ):
440
+ """
441
+ Initialize MCAL.
442
+
443
+ Args:
444
+ anthropic_api_key: Anthropic API key for LLM calls (optional)
445
+ openai_api_key: OpenAI API key for LLM calls (optional)
446
+ model: Model to use for extraction (defaults based on provider)
447
+ llm_provider: LLM provider to use ("openai", "anthropic", or "bedrock")
448
+ bedrock_model: Bedrock model to use (default: llama-3.3-70b)
449
+ bedrock_region: AWS region for Bedrock (default: us-east-1)
450
+ storage_path: Path for persistent storage (default: ~/.mcal)
451
+ enable_persistence: Enable cross-session persistence (default: True)
452
+ enable_tiered_models: Use fast/smart model routing (bedrock only)
453
+ bedrock_fast_model: Fast model for simple tasks (default: llama-3.1-8b)
454
+ bedrock_smart_model: Smart model for complex tasks (default: llama-3.3-70b)
455
+ enable_extraction_cache: Enable caching of extracted state (Issue #9)
456
+ cache_ttl_seconds: Time-to-live for cache entries (default: 24h)
457
+
458
+ Deprecated Args (Issue #53 - will be removed in v1.0):
459
+ mem0_config: Ignored - MCAL is now standalone
460
+ mem0_api_key: Ignored - MCAL is now standalone
461
+ use_standalone_backend: Ignored - always standalone
462
+ use_unified_extractor: Ignored - always uses unified extractor
463
+ """
464
+ self._enable_tiered_models = enable_tiered_models
465
+ self._enable_extraction_cache = enable_extraction_cache
466
+ self._use_unified_extractor = True # Always use unified extractor (Issue #53)
467
+
468
+ # Issue #53: Deprecation warnings for removed parameters
469
+ import warnings
470
+ if mem0_config is not None:
471
+ warnings.warn(
472
+ "mem0_config is deprecated and ignored. MCAL v1.0 is fully standalone.",
473
+ DeprecationWarning,
474
+ stacklevel=2
475
+ )
476
+ if mem0_api_key is not None:
477
+ warnings.warn(
478
+ "mem0_api_key is deprecated and ignored. MCAL v1.0 is fully standalone.",
479
+ DeprecationWarning,
480
+ stacklevel=2
481
+ )
482
+ if use_standalone_backend:
483
+ warnings.warn(
484
+ "use_standalone_backend is deprecated and ignored. MCAL is always standalone.",
485
+ DeprecationWarning,
486
+ stacklevel=2
487
+ )
488
+ if not use_unified_extractor:
489
+ warnings.warn(
490
+ "use_unified_extractor=False is deprecated. Legacy 3-pillar extraction "
491
+ "has been removed. MCAL now always uses unified extraction.",
492
+ DeprecationWarning,
493
+ stacklevel=2
494
+ )
495
+
496
+ # Initialize extraction cache (Issue #9)
497
+ if enable_extraction_cache:
498
+ cache_path = None
499
+ if storage_path:
500
+ cache_path = Path(storage_path) / "extraction_cache.json"
501
+ elif enable_persistence:
502
+ cache_path = Path.home() / ".mcal" / "extraction_cache.json"
503
+ self._extraction_cache = ExtractionCache(
504
+ persist_path=cache_path,
505
+ ttl_seconds=cache_ttl_seconds
506
+ )
507
+ logger.info(f"Extraction cache enabled (TTL: {cache_ttl_seconds}s)")
508
+ else:
509
+ self._extraction_cache = None
510
+
511
+ # Initialize LLM client based on provider
512
+ if llm_provider == "openai":
513
+ if not openai_api_key:
514
+ raise ValueError("openai_api_key required when llm_provider='openai'")
515
+ model = model or "gpt-4o"
516
+ self._llm = OpenAIClient(api_key=openai_api_key, model=model)
517
+ elif llm_provider == "anthropic":
518
+ if not anthropic_api_key:
519
+ raise ValueError("anthropic_api_key required when llm_provider='anthropic'")
520
+ model = model or "claude-sonnet-4-20250514"
521
+ self._llm = AnthropicClient(api_key=anthropic_api_key, model=model)
522
+ elif llm_provider == "bedrock":
523
+ # Bedrock uses AWS credentials from environment
524
+ if enable_tiered_models:
525
+ self._llm = TieredBedrockProvider(
526
+ fast_model=bedrock_fast_model,
527
+ smart_model=bedrock_smart_model,
528
+ region=bedrock_region
529
+ )
530
+ logger.info(f"Using tiered Bedrock: fast={bedrock_fast_model}, smart={bedrock_smart_model}")
531
+ else:
532
+ self._llm = BedrockProviderWrapper(
533
+ model=bedrock_model,
534
+ region=bedrock_region
535
+ )
536
+ else:
537
+ raise ValueError(f"Unknown llm_provider: {llm_provider}. Use 'openai', 'anthropic', or 'bedrock'")
538
+
539
+ # Issue #53: No memory backend needed - MCAL is standalone
540
+ # UnifiedGraph handles all storage and search
541
+
542
+ # Initialize MCAL components (for legacy 3-pillar mode compatibility)
543
+ # With tiered models, use fast LLM for intents, smart LLM for decisions
544
+ if enable_tiered_models and isinstance(self._llm, TieredBedrockProvider):
545
+ intent_llm = TieredLLMAdapter(self._llm, use_smart=False) # Fast model for intents
546
+ decision_llm = TieredLLMAdapter(self._llm, use_smart=True) # Smart model for decisions
547
+ self._intent_tracker = IntentTracker(intent_llm)
548
+ self._reasoning_store = ReasoningStore(decision_llm)
549
+ logger.info("Tiered model routing: intents→fast, decisions→smart")
550
+ else:
551
+ self._intent_tracker = IntentTracker(self._llm)
552
+ self._reasoning_store = ReasoningStore(self._llm)
553
+ self._context_assembler = ContextAssembler()
554
+
555
+ # Initialize unified extractor (Issue #19 - 52x token reduction)
556
+ if use_unified_extractor:
557
+ self._unified_extractor = UnifiedExtractor(self._llm)
558
+ self._user_graphs: dict[str, UnifiedGraph] = {} # Per-user unified graphs
559
+ logger.info("Unified extractor enabled (single-pass extraction)")
560
+ else:
561
+ self._unified_extractor = None
562
+ self._user_graphs = {}
563
+
564
+ # Initialize persistent storage
565
+ self._enable_persistence = enable_persistence
566
+ if enable_persistence:
567
+ storage_base = Path(storage_path) if storage_path else None
568
+ self._storage = MCALStorage(base_path=storage_base)
569
+ logger.info(f"Persistence enabled at {self._storage.base_path}")
570
+ else:
571
+ self._storage = None
572
+ logger.info("Persistence disabled (in-memory only)")
573
+
574
+ # Session management (in-memory cache)
575
+ self._sessions: dict[str, Session] = {}
576
+ self._user_intents: dict[str, IntentGraph] = {}
577
+
578
+ # Issue #53: _extract_facts_async removed - no longer needed with unified extractor
579
+
580
+ async def _extract_intents_async(
581
+ self,
582
+ turns: list[Turn],
583
+ user_id: str,
584
+ session_id: Optional[str]
585
+ ) -> IntentGraph:
586
+ """Extract or update intent graph."""
587
+ # Load existing graph from persistent storage if not in memory
588
+ if user_id not in self._user_intents and self._enable_persistence and self._storage:
589
+ stored_graph = self._storage.load_intent_graph(user_id)
590
+ if stored_graph:
591
+ self._user_intents[user_id] = stored_graph
592
+ logger.info(f"Loaded existing intent graph for user {user_id} from storage")
593
+
594
+ # Get or create user's intent graph
595
+ if user_id in self._user_intents:
596
+ # Update existing graph
597
+ for turn in turns:
598
+ self._user_intents[user_id] = await self._intent_tracker.update_intent(
599
+ turn, self._user_intents[user_id]
600
+ )
601
+ else:
602
+ # Create new graph
603
+ self._user_intents[user_id] = await self._intent_tracker.extract_intents(
604
+ turns, session_id=session_id
605
+ )
606
+
607
+ return self._user_intents[user_id]
608
+
609
+ async def _extract_decisions_async(
610
+ self,
611
+ turns: list[Turn],
612
+ user_id: str,
613
+ intent_graph: Optional[IntentGraph]
614
+ ) -> list[DecisionTrail]:
615
+ """Extract decisions with active goals context."""
616
+ # Load existing decisions from persistent storage
617
+ existing_decisions = []
618
+ if self._enable_persistence and self._storage:
619
+ existing_decisions = self._storage.load_decisions(user_id)
620
+ if existing_decisions:
621
+ logger.info(f"Loaded {len(existing_decisions)} existing decisions for user {user_id}")
622
+
623
+ # Get active goals for context
624
+ goal_ids = None
625
+ active_goals_context = None
626
+ if intent_graph:
627
+ active_goals = intent_graph.get_active_goals()
628
+ goal_ids = [g.id for g in active_goals]
629
+
630
+ # Build rich goals context for decision extraction
631
+ if active_goals:
632
+ goals_lines = []
633
+ for i, goal in enumerate(active_goals, 1):
634
+ goal_type = goal.type.name if hasattr(goal.type, 'name') else str(goal.type)
635
+ goals_lines.append(f"{i}. [{goal_type}] {goal.content}")
636
+ active_goals_context = "\n".join(goals_lines)
637
+ logger.debug(f"Active goals context: {len(active_goals)} goals")
638
+
639
+ # Extract new decisions with full context (goals + prior decisions)
640
+ reconciled_decisions = await self._reasoning_store.extract_decisions(
641
+ turns,
642
+ goal_ids=goal_ids,
643
+ existing_decisions=existing_decisions,
644
+ active_goals_context=active_goals_context
645
+ )
646
+
647
+ return reconciled_decisions
648
+
649
+ async def add(
650
+ self,
651
+ messages: list[dict],
652
+ user_id: str,
653
+ session_id: Optional[str] = None,
654
+ extract_intents: bool = True,
655
+ extract_decisions: bool = True,
656
+ ) -> AddResult:
657
+ """
658
+ Add messages to memory with full MCAL processing.
659
+
660
+ This is the core method that:
661
+ 1. Sends messages to Mem0 for fact extraction (parallel with intents)
662
+ 2. Extracts intent graph (MCAL Pillar 1)
663
+ 3. Extracts decision trails (MCAL Pillar 2)
664
+
665
+ Performance optimization (Issue #7):
666
+ - Fact extraction and intent extraction run in PARALLEL
667
+ - Decision extraction runs after intents (needs active goals context)
668
+
669
+ Args:
670
+ messages: List of message dicts [{"role": "user", "content": "..."}]
671
+ user_id: User identifier
672
+ session_id: Optional session identifier
673
+ extract_intents: Whether to extract intent graph
674
+ extract_decisions: Whether to extract decision trails
675
+
676
+ Returns:
677
+ AddResult with facts, intents, decisions, and timing metrics
678
+ """
679
+ total_start = time.time()
680
+ timing = TimingMetrics()
681
+ result = AddResult()
682
+
683
+ # Issue #2: Validate messages before processing
684
+ valid_messages = []
685
+ for i, msg in enumerate(messages):
686
+ if not isinstance(msg, dict):
687
+ logger.warning(f"Skipping message {i}: not a dict")
688
+ continue
689
+ if not msg.get("content"):
690
+ logger.warning(f"Skipping message {i}: missing or empty content")
691
+ continue
692
+ if not msg.get("role"):
693
+ logger.warning(f"Skipping message {i}: missing role")
694
+ continue
695
+ valid_messages.append(msg)
696
+
697
+ if not valid_messages:
698
+ logger.error("No valid messages to process")
699
+ return result
700
+
701
+ # =========================================================================
702
+ # Issue #19: UNIFIED EXTRACTOR PATH (52x token reduction)
703
+ # Single-pass extraction replacing 6-pass system
704
+ # =========================================================================
705
+ if self._use_unified_extractor and self._unified_extractor:
706
+ logger.info("Using unified extractor (single-pass)")
707
+ unified_start = time.time()
708
+
709
+ # Check if user has existing graph (for delta extraction)
710
+ existing_graph = self._user_graphs.get(user_id)
711
+
712
+ # Issue #25: Load from disk if not in memory
713
+ if not existing_graph and self._enable_persistence and self._storage:
714
+ existing_graph = self._storage.load_unified_graph(user_id)
715
+ if existing_graph:
716
+ self._user_graphs[user_id] = existing_graph
717
+ logger.info(f"Loaded unified graph for {user_id} from storage")
718
+
719
+ if existing_graph:
720
+ # Delta extraction - only process new information
721
+ logger.debug("Using delta extraction for existing user")
722
+ unified_graph = await self._unified_extractor.extract_delta(
723
+ valid_messages, existing_graph
724
+ )
725
+ else:
726
+ # Full extraction for new user
727
+ unified_graph = await self._unified_extractor.extract(valid_messages)
728
+
729
+ # Store graph for user (in memory)
730
+ self._user_graphs[user_id] = unified_graph
731
+ result.unified_graph = unified_graph
732
+
733
+ # Issue #25: Persist to disk
734
+ if self._enable_persistence and self._storage:
735
+ self._storage.save_unified_graph(user_id, unified_graph)
736
+ logger.debug(f"Saved unified graph for {user_id} to storage")
737
+
738
+ # Map to compatible structures for backward compatibility
739
+ # Convert graph nodes to memory entries (facts)
740
+ result.facts = [
741
+ MemoryEntry(
742
+ id=node.id,
743
+ content=f"{node.type.name}: {node.label}",
744
+ metadata={"type": node.type.value, **node.attrs}
745
+ )
746
+ for node in unified_graph.nodes.values()
747
+ ]
748
+
749
+ # Extract decisions count from graph
750
+ graph_summary = unified_graph.summary()
751
+
752
+ timing.total_ms = int((time.time() - unified_start) * 1000)
753
+ timing.facts_ms = timing.total_ms # All in one pass
754
+ timing.intents_ms = 0
755
+ timing.decisions_ms = 0
756
+ result.timing = timing
757
+
758
+ logger.info(
759
+ f"Unified extraction complete: {graph_summary['total_nodes']} nodes, "
760
+ f"{graph_summary['total_edges']} edges, {graph_summary['decisions']} decisions "
761
+ f"in {timing.total_ms}ms"
762
+ )
763
+
764
+ return result
765
+
766
+ # =========================================================================
767
+ # Issue #9: CHECK EXTRACTION CACHE
768
+ # For returning users, skip extraction if messages already processed
769
+ # =========================================================================
770
+ cached_state = None
771
+ messages_to_process = valid_messages
772
+ cache_hit_type = "miss"
773
+
774
+ if self._enable_extraction_cache and self._extraction_cache:
775
+ cached_state, messages_to_process = self._extraction_cache.get_state(
776
+ user_id, valid_messages
777
+ )
778
+
779
+ if cached_state and not messages_to_process:
780
+ # FULL CACHE HIT - return cached results immediately
781
+ cache_hit_type = "full"
782
+ logger.info(f"CACHE HIT: Returning cached results for user {user_id}")
783
+
784
+ # Restore from cache
785
+ if cached_state.intent_graph_data:
786
+ result.intent_graph = self._storage._deserialize_intent_graph(
787
+ cached_state.intent_graph_data
788
+ ) if self._storage else None
789
+ # Also restore to memory
790
+ if result.intent_graph:
791
+ self._user_intents[user_id] = result.intent_graph
792
+
793
+ result.decisions = self._deserialize_decisions(cached_state.decisions_data)
794
+
795
+ # Timing reflects cache lookup (near-zero LLM time)
796
+ timing.total_ms = int((time.time() - total_start) * 1000)
797
+ timing.facts_ms = 0
798
+ timing.intents_ms = 0
799
+ timing.decisions_ms = 0
800
+ timing.parallel_savings_ms = cached_state.extraction_time_ms # Saved this much
801
+ result.timing = timing
802
+
803
+ logger.info(f"Cache hit saved ~{cached_state.extraction_time_ms}ms of extraction time")
804
+ return result
805
+
806
+ elif cached_state:
807
+ # PARTIAL CACHE HIT - process only new messages
808
+ cache_hit_type = "partial"
809
+ logger.info(
810
+ f"PARTIAL CACHE HIT: Processing {len(messages_to_process)} new messages "
811
+ f"(cached {cached_state.messages_processed})"
812
+ )
813
+
814
+ # Restore cached intent graph to memory for incremental update
815
+ if cached_state.intent_graph_data and self._storage:
816
+ cached_graph = self._storage._deserialize_intent_graph(
817
+ cached_state.intent_graph_data
818
+ )
819
+ if cached_graph:
820
+ self._user_intents[user_id] = cached_graph
821
+
822
+ # Convert messages to Turn objects for MCAL processing
823
+ turns_to_process = [
824
+ Turn(
825
+ role=msg.get("role", "user"),
826
+ content=msg.get("content", ""),
827
+ session_id=session_id
828
+ )
829
+ for msg in messages_to_process
830
+ ]
831
+
832
+ # All turns (for decision context)
833
+ all_turns = [
834
+ Turn(
835
+ role=msg.get("role", "user"),
836
+ content=msg.get("content", ""),
837
+ session_id=session_id
838
+ )
839
+ for msg in valid_messages
840
+ ]
841
+
842
+ # =========================================================================
843
+ # PHASE 1: PARALLEL EXTRACTION (Facts + Intents)
844
+ # Issue #7: Run independent extractions in parallel
845
+ # =========================================================================
846
+ logger.debug("Phase 1: Parallel extraction (facts + intents)...")
847
+ phase1_start = time.time()
848
+
849
+ # Prepare tasks
850
+ tasks = []
851
+ task_names = []
852
+
853
+ # Always extract facts (for new messages only if partial cache hit)
854
+ facts_start = time.time()
855
+ tasks.append(self._extract_facts_async(messages_to_process, user_id))
856
+ task_names.append("facts")
857
+
858
+ # Optionally extract intents (in parallel with facts)
859
+ # For partial cache hit, this will incrementally update the cached graph
860
+ if extract_intents:
861
+ intents_start = time.time()
862
+ tasks.append(self._extract_intents_async(turns_to_process, user_id, session_id))
863
+ task_names.append("intents")
864
+
865
+ # Run in parallel
866
+ results = await asyncio.gather(*tasks, return_exceptions=True)
867
+
868
+ # Process results
869
+ for i, (name, res) in enumerate(zip(task_names, results)):
870
+ if isinstance(res, Exception):
871
+ logger.error(f"Error in {name} extraction: {res}")
872
+ continue
873
+
874
+ if name == "facts":
875
+ result.facts = res
876
+ timing.facts_ms = int((time.time() - facts_start) * 1000)
877
+ logger.info(f"Mem0 extracted {len(result.facts)} facts in {timing.facts_ms}ms")
878
+ elif name == "intents":
879
+ result.intent_graph = res
880
+ timing.intents_ms = int((time.time() - intents_start) * 1000)
881
+ logger.info(f"Intent graph has {len(result.intent_graph.nodes)} nodes in {timing.intents_ms}ms")
882
+
883
+ # Persist updated graph to storage
884
+ if self._enable_persistence and self._storage:
885
+ self._storage.save_intent_graph(user_id, result.intent_graph)
886
+ logger.debug(f"Saved intent graph for user {user_id} to storage")
887
+
888
+ phase1_duration = int((time.time() - phase1_start) * 1000)
889
+
890
+ # Calculate parallel savings (what sequential would have taken)
891
+ sequential_time = timing.facts_ms + timing.intents_ms
892
+ timing.parallel_savings_ms = sequential_time - phase1_duration
893
+ logger.info(f"Phase 1 complete: {phase1_duration}ms (saved {timing.parallel_savings_ms}ms via parallelization)")
894
+
895
+ # =========================================================================
896
+ # PHASE 2: SEQUENTIAL EXTRACTION (Decisions - needs intent graph)
897
+ # =========================================================================
898
+ if extract_decisions:
899
+ logger.debug("Phase 2: Decision extraction (requires intent graph)...")
900
+ decisions_start = time.time()
901
+
902
+ # For partial cache hit, we still need full context for decision extraction
903
+ # but ReasoningStore handles existing_decisions merging
904
+ reconciled_decisions = await self._extract_decisions_async(
905
+ all_turns, user_id, result.intent_graph
906
+ )
907
+
908
+ result.decisions = reconciled_decisions
909
+ timing.decisions_ms = int((time.time() - decisions_start) * 1000)
910
+
911
+ if self._enable_persistence and self._storage:
912
+ self._storage.save_decisions(user_id, result.decisions)
913
+ logger.debug(f"Saved {len(result.decisions)} total decisions for user {user_id}")
914
+
915
+ logger.info(f"Extracted {len(result.decisions)} decisions in {timing.decisions_ms}ms")
916
+
917
+ # =========================================================================
918
+ # Issue #9: UPDATE EXTRACTION CACHE
919
+ # =========================================================================
920
+ if self._enable_extraction_cache and self._extraction_cache:
921
+ extraction_time = timing.facts_ms + timing.intents_ms + timing.decisions_ms
922
+
923
+ # Serialize intent graph for cache
924
+ intent_graph_data = None
925
+ if result.intent_graph and self._storage:
926
+ intent_graph_data = self._storage._serialize_intent_graph(result.intent_graph)
927
+
928
+ # Serialize decisions for cache
929
+ decisions_data = self._serialize_decisions(result.decisions)
930
+
931
+ self._extraction_cache.update_state(
932
+ user_id=user_id,
933
+ messages=valid_messages,
934
+ intent_graph_data=intent_graph_data,
935
+ decisions_data=decisions_data,
936
+ extraction_time_ms=extraction_time
937
+ )
938
+
939
+ logger.info(f"Cache updated for user {user_id} ({cache_hit_type} -> updated)")
940
+
941
+ # Final timing
942
+ timing.total_ms = int((time.time() - total_start) * 1000)
943
+ result.timing = timing
944
+
945
+ logger.info(f"Total add() time: {timing.total_ms}ms "
946
+ f"(facts={timing.facts_ms}ms, intents={timing.intents_ms}ms, "
947
+ f"decisions={timing.decisions_ms}ms, parallel_savings={timing.parallel_savings_ms}ms)")
948
+
949
+ return result
950
+
951
+ async def add_stream(
952
+ self,
953
+ messages: list[dict],
954
+ user_id: str,
955
+ session_id: Optional[str] = None,
956
+ extract_intents: bool = True,
957
+ extract_decisions: bool = True,
958
+ ) -> AsyncIterator[StreamEvent]:
959
+ """
960
+ Add messages to memory with streaming progress updates.
961
+
962
+ Issue #10: Streaming Response API
963
+
964
+ This is the streaming version of add() that yields partial results
965
+ as they become available, improving perceived latency.
966
+
967
+ Instead of waiting 22s for complete results, clients see:
968
+ - STARTED event immediately
969
+ - Each intent as it's extracted
970
+ - Each decision as it's extracted
971
+ - COMPLETE event with final AddResult
972
+
973
+ Usage:
974
+ async for event in mcal.add_stream(messages, user_id):
975
+ if event.type == StreamEventType.INTENT_EXTRACTED:
976
+ display_intent(event.data)
977
+ elif event.type == StreamEventType.COMPLETE:
978
+ final_result = event.data
979
+
980
+ Args:
981
+ messages: List of message dicts [{"role": "user", "content": "..."}]
982
+ user_id: User identifier
983
+ session_id: Optional session identifier
984
+ extract_intents: Whether to extract intent graph
985
+ extract_decisions: Whether to extract decision trails
986
+
987
+ Yields:
988
+ StreamEvent objects with type, data, and optional progress
989
+ """
990
+ total_start = time.time()
991
+ timing = TimingMetrics()
992
+ result = AddResult()
993
+
994
+ # Signal extraction start
995
+ yield event_started()
996
+
997
+ # Validate messages
998
+ valid_messages = []
999
+ for i, msg in enumerate(messages):
1000
+ if not isinstance(msg, dict):
1001
+ yield event_error(f"Skipping message {i}: not a dict")
1002
+ continue
1003
+ if not msg.get("content"):
1004
+ yield event_error(f"Skipping message {i}: missing or empty content")
1005
+ continue
1006
+ if not msg.get("role"):
1007
+ yield event_error(f"Skipping message {i}: missing role")
1008
+ continue
1009
+ valid_messages.append(msg)
1010
+
1011
+ if not valid_messages:
1012
+ yield event_error("No valid messages to process")
1013
+ yield event_complete(result)
1014
+ return
1015
+
1016
+ # =========================================================================
1017
+ # CHECK EXTRACTION CACHE
1018
+ # =========================================================================
1019
+ cached_state = None
1020
+ messages_to_process = valid_messages
1021
+
1022
+ if self._enable_extraction_cache and self._extraction_cache:
1023
+ cached_state, messages_to_process = self._extraction_cache.get_state(
1024
+ user_id, valid_messages
1025
+ )
1026
+
1027
+ if cached_state and not messages_to_process:
1028
+ # FULL CACHE HIT
1029
+ yield event_cache_hit(
1030
+ hit_type="full",
1031
+ messages_cached=cached_state.messages_processed,
1032
+ messages_to_process=0,
1033
+ saved_time_ms=cached_state.extraction_time_ms
1034
+ )
1035
+
1036
+ # Restore from cache
1037
+ if cached_state.intent_graph_data:
1038
+ result.intent_graph = self._storage._deserialize_intent_graph(
1039
+ cached_state.intent_graph_data
1040
+ ) if self._storage else None
1041
+ if result.intent_graph:
1042
+ self._user_intents[user_id] = result.intent_graph
1043
+
1044
+ result.decisions = self._deserialize_decisions(cached_state.decisions_data)
1045
+
1046
+ timing.total_ms = int((time.time() - total_start) * 1000)
1047
+ timing.parallel_savings_ms = cached_state.extraction_time_ms
1048
+ result.timing = timing
1049
+
1050
+ yield event_complete(result)
1051
+ return
1052
+
1053
+ elif cached_state:
1054
+ # PARTIAL CACHE HIT
1055
+ yield event_cache_hit(
1056
+ hit_type="partial",
1057
+ messages_cached=cached_state.messages_processed,
1058
+ messages_to_process=len(messages_to_process),
1059
+ saved_time_ms=0 # Will calculate after extraction
1060
+ )
1061
+
1062
+ # Restore cached intent graph
1063
+ if cached_state.intent_graph_data and self._storage:
1064
+ cached_graph = self._storage._deserialize_intent_graph(
1065
+ cached_state.intent_graph_data
1066
+ )
1067
+ if cached_graph:
1068
+ self._user_intents[user_id] = cached_graph
1069
+
1070
+ # Convert to Turn objects
1071
+ turns_to_process = [
1072
+ Turn(
1073
+ role=msg.get("role", "user"),
1074
+ content=msg.get("content", ""),
1075
+ session_id=session_id
1076
+ )
1077
+ for msg in messages_to_process
1078
+ ]
1079
+
1080
+ all_turns = [
1081
+ Turn(
1082
+ role=msg.get("role", "user"),
1083
+ content=msg.get("content", ""),
1084
+ session_id=session_id
1085
+ )
1086
+ for msg in valid_messages
1087
+ ]
1088
+
1089
+ # =========================================================================
1090
+ # PHASE 1: FACTS EXTRACTION (with streaming)
1091
+ # =========================================================================
1092
+ yield event_phase_started(ExtractionPhase.FACTS, "Extracting facts via Mem0...")
1093
+ facts_start = time.time()
1094
+
1095
+ try:
1096
+ result.facts = await self._extract_facts_async(messages_to_process, user_id)
1097
+ timing.facts_ms = int((time.time() - facts_start) * 1000)
1098
+
1099
+ # Yield each fact as it's extracted
1100
+ for i, fact in enumerate(result.facts):
1101
+ yield event_fact_extracted(
1102
+ {"id": fact.id, "content": fact.content, "score": fact.score},
1103
+ progress=StreamProgress(
1104
+ phase=ExtractionPhase.FACTS,
1105
+ current=i + 1,
1106
+ total=len(result.facts),
1107
+ elapsed_ms=int((time.time() - facts_start) * 1000)
1108
+ )
1109
+ )
1110
+
1111
+ yield event_phase_complete(ExtractionPhase.FACTS, len(result.facts), timing.facts_ms)
1112
+ except Exception as e:
1113
+ yield event_error(f"Facts extraction failed: {e}")
1114
+ timing.facts_ms = int((time.time() - facts_start) * 1000)
1115
+
1116
+ # =========================================================================
1117
+ # PHASE 2: INTENTS EXTRACTION (with streaming)
1118
+ # =========================================================================
1119
+ if extract_intents:
1120
+ yield event_phase_started(ExtractionPhase.INTENTS, "Extracting intent graph...")
1121
+ intents_start = time.time()
1122
+
1123
+ try:
1124
+ # Track nodes before extraction for delta detection
1125
+ prev_nodes = set(self._user_intents.get(user_id, IntentGraph()).nodes.keys())
1126
+
1127
+ result.intent_graph = await self._extract_intents_async(
1128
+ turns_to_process, user_id, session_id
1129
+ )
1130
+ timing.intents_ms = int((time.time() - intents_start) * 1000)
1131
+
1132
+ # Yield each intent node (new ones first, then all for completeness)
1133
+ new_nodes = [
1134
+ n for n_id, n in result.intent_graph.nodes.items()
1135
+ if n_id not in prev_nodes
1136
+ ]
1137
+
1138
+ for i, node in enumerate(new_nodes):
1139
+ yield event_intent_extracted(
1140
+ node,
1141
+ progress=StreamProgress(
1142
+ phase=ExtractionPhase.INTENTS,
1143
+ current=i + 1,
1144
+ total=len(new_nodes),
1145
+ elapsed_ms=int((time.time() - intents_start) * 1000),
1146
+ message=f"New intent: {node.type.value}"
1147
+ )
1148
+ )
1149
+
1150
+ # Persist
1151
+ if self._enable_persistence and self._storage:
1152
+ self._storage.save_intent_graph(user_id, result.intent_graph)
1153
+
1154
+ yield event_phase_complete(
1155
+ ExtractionPhase.INTENTS,
1156
+ len(result.intent_graph.nodes),
1157
+ timing.intents_ms
1158
+ )
1159
+ except Exception as e:
1160
+ yield event_error(f"Intent extraction failed: {e}")
1161
+ timing.intents_ms = int((time.time() - intents_start) * 1000)
1162
+
1163
+ # =========================================================================
1164
+ # PHASE 3: DECISIONS EXTRACTION (with streaming)
1165
+ # =========================================================================
1166
+ if extract_decisions:
1167
+ yield event_phase_started(ExtractionPhase.DECISIONS, "Extracting decisions...")
1168
+ decisions_start = time.time()
1169
+
1170
+ try:
1171
+ # Load existing for delta detection
1172
+ existing_ids = set()
1173
+ if self._enable_persistence and self._storage:
1174
+ existing = self._storage.load_decisions(user_id)
1175
+ existing_ids = {d.id for d in existing}
1176
+
1177
+ reconciled_decisions = await self._extract_decisions_async(
1178
+ all_turns, user_id, result.intent_graph
1179
+ )
1180
+ result.decisions = reconciled_decisions
1181
+ timing.decisions_ms = int((time.time() - decisions_start) * 1000)
1182
+
1183
+ # Yield each decision (highlight new ones)
1184
+ for i, decision in enumerate(result.decisions):
1185
+ is_new = decision.id not in existing_ids
1186
+ yield event_decision_extracted(
1187
+ decision,
1188
+ progress=StreamProgress(
1189
+ phase=ExtractionPhase.DECISIONS,
1190
+ current=i + 1,
1191
+ total=len(result.decisions),
1192
+ elapsed_ms=int((time.time() - decisions_start) * 1000),
1193
+ message="New decision" if is_new else "Existing decision"
1194
+ )
1195
+ )
1196
+
1197
+ # Persist
1198
+ if self._enable_persistence and self._storage:
1199
+ self._storage.save_decisions(user_id, result.decisions)
1200
+
1201
+ yield event_phase_complete(
1202
+ ExtractionPhase.DECISIONS,
1203
+ len(result.decisions),
1204
+ timing.decisions_ms
1205
+ )
1206
+ except Exception as e:
1207
+ yield event_error(f"Decision extraction failed: {e}")
1208
+ timing.decisions_ms = int((time.time() - decisions_start) * 1000)
1209
+
1210
+ # =========================================================================
1211
+ # UPDATE CACHE
1212
+ # =========================================================================
1213
+ if self._enable_extraction_cache and self._extraction_cache:
1214
+ extraction_time = timing.facts_ms + timing.intents_ms + timing.decisions_ms
1215
+
1216
+ intent_graph_data = None
1217
+ if result.intent_graph and self._storage:
1218
+ intent_graph_data = self._storage._serialize_intent_graph(result.intent_graph)
1219
+
1220
+ decisions_data = self._serialize_decisions(result.decisions)
1221
+
1222
+ self._extraction_cache.update_state(
1223
+ user_id=user_id,
1224
+ messages=valid_messages,
1225
+ intent_graph_data=intent_graph_data,
1226
+ decisions_data=decisions_data,
1227
+ extraction_time_ms=extraction_time
1228
+ )
1229
+
1230
+ # Final timing
1231
+ timing.total_ms = int((time.time() - total_start) * 1000)
1232
+ timing.parallel_savings_ms = 0 # Sequential in streaming mode
1233
+ result.timing = timing
1234
+
1235
+ # Signal completion with full result
1236
+ yield event_complete(result)
1237
+
1238
+ async def search(
1239
+ self,
1240
+ query: str,
1241
+ user_id: str,
1242
+ limit: int = 10,
1243
+ include_goals: bool = True,
1244
+ include_decisions: bool = True,
1245
+ retrieval_config: Optional[RetrievalConfig] = None,
1246
+ ) -> SearchResult:
1247
+ """
1248
+ Search with goal-aware retrieval.
1249
+
1250
+ This enhances Mem0's search with:
1251
+ 1. Goal-aware re-ranking (MCAL Pillar 3)
1252
+ 2. Relevant decision attachment
1253
+ 3. Active goal context
1254
+
1255
+ When using unified extractor mode, searches the unified graph
1256
+ directly instead of the Mem0 backend.
1257
+
1258
+ Args:
1259
+ query: Search query
1260
+ user_id: User identifier
1261
+ limit: Maximum results
1262
+ include_goals: Include active goals in result
1263
+ include_decisions: Include relevant decisions in result
1264
+ retrieval_config: Optional retrieval configuration
1265
+
1266
+ Returns:
1267
+ SearchResult with memories, goals, decisions, and assembled context
1268
+ """
1269
+ result = SearchResult()
1270
+
1271
+ # Check if user has a unified graph (Issue #26 fix)
1272
+ unified_graph = self._user_graphs.get(user_id) if self._user_graphs else None
1273
+
1274
+ if unified_graph:
1275
+ # Search unified graph directly
1276
+ logger.debug("Searching unified graph...")
1277
+ graph_results = unified_graph.search(query, limit=limit * 2)
1278
+
1279
+ # Convert graph results to MemoryEntry format
1280
+ result.memories = [
1281
+ MemoryEntry(
1282
+ id=r["id"],
1283
+ content=r["content"],
1284
+ metadata={"type": r["type"], **r.get("attributes", {})},
1285
+ score=r["score"]
1286
+ )
1287
+ for r in graph_results
1288
+ ]
1289
+
1290
+ # Get active goals from unified graph
1291
+ if include_goals:
1292
+ active_goal_dicts = unified_graph.get_active_goals()
1293
+ # Convert to IntentNode format for compatibility
1294
+ result.active_goals = [
1295
+ IntentNode(
1296
+ id=g["id"],
1297
+ type=self._goal_type_to_intent_type(g.get("goal_type", "goal")),
1298
+ content=g["content"],
1299
+ status=self._status_to_intent_status(g.get("status", "active"))
1300
+ )
1301
+ for g in active_goal_dicts
1302
+ ]
1303
+
1304
+ # Get decisions from unified graph
1305
+ if include_decisions:
1306
+ all_decisions = unified_graph.get_all_decisions_with_detail()
1307
+ # Filter to relevant decisions (keyword match - any query word)
1308
+ query_words = set(query.lower().split())
1309
+
1310
+ def decision_matches(d: dict) -> bool:
1311
+ """Check if decision matches any query word."""
1312
+ decision_lower = d["decision"].lower()
1313
+ # Check if any query word appears in decision
1314
+ if any(w in decision_lower for w in query_words):
1315
+ return True
1316
+ # Check rationale
1317
+ rationale = d.get("rationale", "") or ""
1318
+ if any(w in rationale.lower() for w in query_words):
1319
+ return True
1320
+ # Check reasons
1321
+ for r in d.get("reasons", []):
1322
+ claim = (r.get("claim", "") or "").lower()
1323
+ if any(w in claim for w in query_words):
1324
+ return True
1325
+ return False
1326
+
1327
+ result.relevant_decisions = [
1328
+ DecisionTrail(
1329
+ id=d["id"],
1330
+ decision=d["decision"],
1331
+ rationale=d.get("rationale", ""),
1332
+ alternatives=[], # Could expand if needed
1333
+ context=d.get("context", "")
1334
+ )
1335
+ for d in all_decisions
1336
+ if decision_matches(d)
1337
+ ][:5] # Limit to top 5 relevant decisions
1338
+ else:
1339
+ # Issue #53: No fallback needed - MCAL is always standalone
1340
+ # If no unified graph, return empty results
1341
+ logger.debug("No unified graph found for user")
1342
+ result.memories = []
1343
+ result.active_goals = []
1344
+ result.relevant_decisions = []
1345
+
1346
+ # Assemble context
1347
+ result.context = self._context_assembler.assemble(
1348
+ retrieved=[], # We'll pass memories differently
1349
+ active_goals=result.active_goals,
1350
+ decisions=result.relevant_decisions,
1351
+ include_goals=include_goals,
1352
+ include_decisions=include_decisions
1353
+ )
1354
+
1355
+ # Add memories to context
1356
+ if result.memories:
1357
+ memory_lines = ["\n### RELEVANT MEMORIES ###"]
1358
+ for mem in result.memories:
1359
+ score_str = f" (relevance: {mem.score:.2f})" if mem.score else ""
1360
+ memory_lines.append(f"- {mem.content}{score_str}")
1361
+ result.context += "\n".join(memory_lines)
1362
+
1363
+ return result
1364
+
1365
+ async def get_context(
1366
+ self,
1367
+ query: str,
1368
+ user_id: str,
1369
+ max_tokens: int = 4000,
1370
+ include_goals: bool = True,
1371
+ include_decisions: bool = True,
1372
+ include_reasoning: bool = True,
1373
+ ) -> str:
1374
+ """
1375
+ Get assembled context for LLM prompt.
1376
+
1377
+ Convenience method that searches and formats context.
1378
+
1379
+ Args:
1380
+ query: The query/task at hand
1381
+ user_id: User identifier
1382
+ max_tokens: Maximum tokens for context
1383
+ include_goals: Include active goals
1384
+ include_decisions: Include relevant decisions
1385
+ include_reasoning: Include decision rationale
1386
+
1387
+ Returns:
1388
+ Formatted context string ready for LLM prompt
1389
+ """
1390
+ # Search with all enhancements
1391
+ search_result = await self.search(
1392
+ query=query,
1393
+ user_id=user_id,
1394
+ include_goals=include_goals,
1395
+ include_decisions=include_decisions,
1396
+ )
1397
+
1398
+ # Build context sections
1399
+ sections = []
1400
+
1401
+ # Active goals
1402
+ if include_goals and search_result.active_goals:
1403
+ goals_text = "### ACTIVE GOALS ###\n"
1404
+ for goal in search_result.active_goals:
1405
+ status = "🔵" if goal.status.value == "active" else "⏳"
1406
+ goals_text += f"{status} {goal.content}\n"
1407
+ sections.append(goals_text)
1408
+
1409
+ # Relevant decisions with reasoning
1410
+ if include_decisions and search_result.relevant_decisions:
1411
+ decisions_text = "### KEY DECISIONS ###\n"
1412
+ for decision in search_result.relevant_decisions[:5]:
1413
+ decisions_text += f"DECISION: {decision.decision}\n"
1414
+ if include_reasoning:
1415
+ decisions_text += f" Rationale: {decision.rationale}\n"
1416
+ if decision.alternatives:
1417
+ alts = ", ".join(a.option for a in decision.alternatives[:3])
1418
+ decisions_text += f" Alternatives considered: {alts}\n"
1419
+ decisions_text += "\n"
1420
+ sections.append(decisions_text)
1421
+
1422
+ # Memories
1423
+ if search_result.memories:
1424
+ memories_text = "### RELEVANT CONTEXT ###\n"
1425
+ for mem in search_result.memories[:10]:
1426
+ memories_text += f"- {mem.content}\n"
1427
+ sections.append(memories_text)
1428
+
1429
+ return "\n".join(sections)
1430
+
1431
+ def get_active_goals(self, user_id: str) -> list[IntentNode]:
1432
+ """Get active goals for a user."""
1433
+ if user_id in self._user_intents:
1434
+ return self._user_intents[user_id].get_active_goals()
1435
+ return []
1436
+
1437
+ def get_intent_graph(self, user_id: str) -> Optional[IntentGraph]:
1438
+ """Get full intent graph for a user."""
1439
+ return self._user_intents.get(user_id)
1440
+
1441
+ def get_decisions(
1442
+ self,
1443
+ user_id: str,
1444
+ goal_id: Optional[str] = None,
1445
+ include_invalidated: bool = False
1446
+ ) -> list[DecisionTrail]:
1447
+ """Get decisions, optionally filtered by goal."""
1448
+ if goal_id:
1449
+ return self._reasoning_store.get_decisions_for_goal(
1450
+ goal_id, include_invalidated=include_invalidated
1451
+ )
1452
+ return self._reasoning_store.get_valid_decisions()
1453
+
1454
+ async def _rerank_with_goals(
1455
+ self,
1456
+ memories: list[MemoryEntry],
1457
+ query: str,
1458
+ active_goals: list[IntentNode],
1459
+ ) -> list[MemoryEntry]:
1460
+ """
1461
+ Re-rank memories based on goal alignment.
1462
+
1463
+ This is MCAL Pillar 3: Goal-Aware Retrieval.
1464
+
1465
+ Instead of pure similarity, we boost memories that
1466
+ help achieve the user's active goals.
1467
+ """
1468
+ if not active_goals or not memories:
1469
+ return memories
1470
+
1471
+ # For now, simple heuristic-based re-ranking
1472
+ # TODO: Implement LLM-based goal alignment scoring
1473
+
1474
+ goal_keywords = set()
1475
+ for goal in active_goals:
1476
+ # Extract keywords from goal content
1477
+ words = goal.content.lower().split()
1478
+ goal_keywords.update(w for w in words if len(w) > 3)
1479
+
1480
+ # Score each memory
1481
+ scored = []
1482
+ for mem in memories:
1483
+ base_score = mem.score or 0.5
1484
+
1485
+ # Boost if memory content relates to active goals
1486
+ mem_words = set(mem.content.lower().split())
1487
+ overlap = len(goal_keywords & mem_words)
1488
+ goal_boost = min(0.3, overlap * 0.1) # Max 0.3 boost
1489
+
1490
+ final_score = base_score + goal_boost
1491
+ mem.score = final_score
1492
+ scored.append(mem)
1493
+
1494
+ # Sort by final score
1495
+ scored.sort(key=lambda m: m.score or 0, reverse=True)
1496
+
1497
+ return scored
1498
+
1499
+ def _get_relevant_decisions(
1500
+ self,
1501
+ query: str,
1502
+ active_goals: list[IntentNode]
1503
+ ) -> list[DecisionTrail]:
1504
+ """Get decisions relevant to the query and active goals."""
1505
+ relevant = []
1506
+
1507
+ # Get decisions for active goals
1508
+ for goal in active_goals:
1509
+ goal_decisions = self._reasoning_store.get_decisions_for_goal(goal.id)
1510
+ relevant.extend(goal_decisions)
1511
+
1512
+ # Also check for keyword matches in query
1513
+ query_lower = query.lower()
1514
+ for decision in self._reasoning_store.get_valid_decisions():
1515
+ if decision not in relevant:
1516
+ if any(word in decision.decision.lower() for word in query_lower.split() if len(word) > 3):
1517
+ relevant.append(decision)
1518
+
1519
+ # Deduplicate and limit
1520
+ seen = set()
1521
+ unique = []
1522
+ for d in relevant:
1523
+ if d.id not in seen:
1524
+ seen.add(d.id)
1525
+ unique.append(d)
1526
+
1527
+ return unique[:10]
1528
+
1529
+ def clear_user_data(self, user_id: str) -> bool:
1530
+ """Clear all data for a user."""
1531
+ # Clear unified graph data
1532
+ if user_id in self._user_graphs:
1533
+ del self._user_graphs[user_id]
1534
+
1535
+ # Clear from persistent storage
1536
+ if self._enable_persistence and self._storage:
1537
+ self._storage.delete_unified_graph(user_id)
1538
+
1539
+ # Clear MCAL data
1540
+ if user_id in self._user_intents:
1541
+ del self._user_intents[user_id]
1542
+
1543
+ if user_id in self._sessions:
1544
+ del self._sessions[user_id]
1545
+
1546
+ # Clear extraction cache (Issue #9)
1547
+ if self._enable_extraction_cache and self._extraction_cache:
1548
+ self._extraction_cache.invalidate(user_id)
1549
+
1550
+ return True
1551
+
1552
+ def get_tiered_model_stats(self) -> Optional[dict]:
1553
+ """
1554
+ Get tiered model usage statistics (Issue #8).
1555
+
1556
+ Returns:
1557
+ Dict with fast_calls, smart_calls, total, fast_ratio
1558
+ or None if tiered models not enabled
1559
+ """
1560
+ if self._enable_tiered_models and isinstance(self._llm, TieredBedrockProvider):
1561
+ return self._llm.get_usage_stats()
1562
+ return None
1563
+
1564
+ def get_cache_stats(self) -> Optional[dict]:
1565
+ """
1566
+ Get extraction cache statistics (Issue #9).
1567
+
1568
+ Returns:
1569
+ Dict with hits, misses, partial_hits, hit_rate, etc.
1570
+ or None if cache not enabled
1571
+ """
1572
+ if self._enable_extraction_cache and self._extraction_cache:
1573
+ return self._extraction_cache.get_stats().to_dict()
1574
+ return None
1575
+
1576
+ def invalidate_cache(self, user_id: Optional[str] = None) -> int:
1577
+ """
1578
+ Invalidate extraction cache.
1579
+
1580
+ Args:
1581
+ user_id: Specific user to invalidate, or None for all
1582
+
1583
+ Returns:
1584
+ Number of entries invalidated
1585
+ """
1586
+ if not self._enable_extraction_cache or not self._extraction_cache:
1587
+ return 0
1588
+
1589
+ if user_id:
1590
+ return 1 if self._extraction_cache.invalidate(user_id) else 0
1591
+ else:
1592
+ return self._extraction_cache.clear()
1593
+
1594
+ def _serialize_decisions(self, decisions: list[DecisionTrail]) -> list[dict]:
1595
+ """Serialize decisions for cache storage."""
1596
+ result = []
1597
+ for d in decisions:
1598
+ result.append({
1599
+ "id": d.id,
1600
+ "decision": d.decision,
1601
+ "context": d.context,
1602
+ "rationale": d.rationale,
1603
+ "confidence": d.confidence,
1604
+ "related_goals": d.related_goals,
1605
+ "dependencies": d.dependencies,
1606
+ "invalidated_by": d.invalidated_by,
1607
+ "turn_id": d.turn_id,
1608
+ "alternatives": [
1609
+ {"option": a.option, "pros": a.pros, "cons": a.cons, "rejection_reason": a.rejection_reason}
1610
+ for a in (d.alternatives or [])
1611
+ ],
1612
+ "evidence": [
1613
+ {"claim": e.claim, "source": e.source.value if e.source else "inferred", "turn_id": e.turn_id, "confidence": e.confidence}
1614
+ for e in (d.evidence or [])
1615
+ ],
1616
+ "trade_offs": [
1617
+ {"gained": t.gained, "sacrificed": t.sacrificed, "justification": t.justification}
1618
+ for t in (d.trade_offs or [])
1619
+ ],
1620
+ })
1621
+ return result
1622
+
1623
+ def _deserialize_decisions(self, data: list[dict]) -> list[DecisionTrail]:
1624
+ """Deserialize decisions from cache storage."""
1625
+ from .core.models import Alternative, Evidence, EvidenceSource, TradeOff
1626
+
1627
+ result = []
1628
+ for d in data:
1629
+ alternatives = [
1630
+ Alternative(
1631
+ option=a.get("option", ""),
1632
+ pros=a.get("pros", []),
1633
+ cons=a.get("cons", []),
1634
+ rejection_reason=a.get("rejection_reason")
1635
+ )
1636
+ for a in d.get("alternatives", [])
1637
+ ]
1638
+
1639
+ evidence = []
1640
+ for e in d.get("evidence", []):
1641
+ try:
1642
+ source = EvidenceSource(e.get("source", "inferred"))
1643
+ except ValueError:
1644
+ source = EvidenceSource.INFERRED
1645
+ evidence.append(Evidence(
1646
+ claim=e.get("claim", ""),
1647
+ source=source,
1648
+ turn_id=e.get("turn_id"),
1649
+ confidence=e.get("confidence", 0.8)
1650
+ ))
1651
+
1652
+ trade_offs = [
1653
+ TradeOff(
1654
+ gained=t.get("gained", ""),
1655
+ sacrificed=t.get("sacrificed", ""),
1656
+ justification=t.get("justification")
1657
+ )
1658
+ for t in d.get("trade_offs", [])
1659
+ ]
1660
+
1661
+ result.append(DecisionTrail(
1662
+ id=d.get("id", ""),
1663
+ decision=d.get("decision", ""),
1664
+ context=d.get("context", ""),
1665
+ rationale=d.get("rationale", ""),
1666
+ confidence=d.get("confidence", 0.5),
1667
+ related_goals=d.get("related_goals", []),
1668
+ dependencies=d.get("dependencies", []),
1669
+ invalidated_by=d.get("invalidated_by"),
1670
+ turn_id=d.get("turn_id"),
1671
+ alternatives=alternatives,
1672
+ evidence=evidence,
1673
+ trade_offs=trade_offs,
1674
+ ))
1675
+
1676
+ return result
1677
+
1678
+ def _goal_type_to_intent_type(self, goal_type: str) -> IntentType:
1679
+ """Convert unified graph goal_type string to IntentType enum."""
1680
+ mapping = {
1681
+ "mission": IntentType.MISSION,
1682
+ "goal": IntentType.GOAL,
1683
+ "task": IntentType.TASK,
1684
+ "decision": IntentType.DECISION,
1685
+ }
1686
+ return mapping.get(goal_type.lower(), IntentType.GOAL)
1687
+
1688
+ def _status_to_intent_status(self, status: str) -> IntentStatus:
1689
+ """Convert unified graph status string to IntentStatus enum."""
1690
+ mapping = {
1691
+ "active": IntentStatus.ACTIVE,
1692
+ "completed": IntentStatus.COMPLETED,
1693
+ "abandoned": IntentStatus.ABANDONED,
1694
+ "pending": IntentStatus.PENDING,
1695
+ "blocked": IntentStatus.BLOCKED,
1696
+ }
1697
+ return mapping.get(status.lower(), IntentStatus.ACTIVE)