alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -194
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -322
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -264
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -81
  26. alma/graph/backends/__init__.py +32 -18
  27. alma/graph/backends/kuzu.py +624 -0
  28. alma/graph/backends/memgraph.py +432 -0
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -432
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -511
  55. alma/observability/__init__.py +91 -0
  56. alma/observability/config.py +302 -0
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -0
  59. alma/observability/metrics.py +583 -0
  60. alma/observability/tracing.py +440 -0
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -366
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -61
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1048
  80. alma/storage/base.py +1083 -525
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -0
  83. alma/storage/file_based.py +614 -619
  84. alma/storage/migrations/__init__.py +21 -0
  85. alma/storage/migrations/base.py +321 -0
  86. alma/storage/migrations/runner.py +323 -0
  87. alma/storage/migrations/version_stores.py +337 -0
  88. alma/storage/migrations/versions/__init__.py +11 -0
  89. alma/storage/migrations/versions/v1_0_0.py +373 -0
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1452
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1358
  95. alma/testing/__init__.py +46 -0
  96. alma/testing/factories.py +301 -0
  97. alma/testing/mocks.py +389 -0
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.0.dist-info/RECORD +0 -76
  110. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,797 @@
1
+ """
2
+ ALMA Two-Stage Verified Retrieval.
3
+
4
+ Provides verification of retrieved memories for high-stakes applications.
5
+ Based on Memory Wall principles: "Pair fuzzy retrieval with exact verification."
6
+
7
+ Two-stage process:
8
+ 1. Fuzzy Recall: Semantic search with expanded candidate set
9
+ 2. Verification: Validate against ground truth, cross-verify, or confidence fallback
10
+
11
+ Verification statuses:
12
+ - VERIFIED: Safe to use, confirmed accurate
13
+ - UNCERTAIN: Use with caution, unconfirmed
14
+ - CONTRADICTED: Needs review, conflicts detected
15
+ - UNVERIFIABLE: No verification method available
16
+ """
17
+
18
+ import logging
19
+ import re
20
+ import time
21
+ from dataclasses import dataclass, field
22
+ from enum import Enum
23
+ from typing import Any, Dict, List, Optional, Protocol, Union
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class VerificationStatus(Enum):
29
+ """Status of memory verification."""
30
+
31
+ VERIFIED = "verified" # Confirmed accurate, safe to use
32
+ UNCERTAIN = "uncertain" # Unconfirmed, use with caution
33
+ CONTRADICTED = "contradicted" # Conflicts detected, needs review
34
+ UNVERIFIABLE = "unverifiable" # No verification method available
35
+
36
+
37
+ class VerificationMethod(Enum):
38
+ """Method used for verification."""
39
+
40
+ GROUND_TRUTH = "ground_truth" # Verified against authoritative sources
41
+ CROSS_VERIFY = "cross_verify" # Verified against other memories
42
+ CONFIDENCE = "confidence" # Confidence-based fallback (no LLM)
43
+ NONE = "none" # No verification performed
44
+
45
+
46
+ @dataclass
47
+ class Verification:
48
+ """
49
+ Result of verifying a single memory.
50
+
51
+ Attributes:
52
+ status: Verification status (VERIFIED, UNCERTAIN, etc.)
53
+ confidence: Confidence in verification result (0.0 - 1.0)
54
+ reason: Human-readable explanation
55
+ method: Method used for verification
56
+ contradicting_source: Source of contradiction if status is CONTRADICTED
57
+ verification_time_ms: Time taken for verification in milliseconds
58
+ """
59
+
60
+ status: VerificationStatus
61
+ confidence: float
62
+ reason: str
63
+ method: VerificationMethod = VerificationMethod.NONE
64
+ contradicting_source: Optional[str] = None
65
+ verification_time_ms: int = 0
66
+
67
+ def __post_init__(self):
68
+ """Validate confidence is in range."""
69
+ self.confidence = max(0.0, min(1.0, self.confidence))
70
+
71
+ def is_usable(self) -> bool:
72
+ """Check if memory is safe to use."""
73
+ return self.status in (
74
+ VerificationStatus.VERIFIED,
75
+ VerificationStatus.UNCERTAIN,
76
+ )
77
+
78
+ def needs_review(self) -> bool:
79
+ """Check if memory needs human review."""
80
+ return self.status == VerificationStatus.CONTRADICTED
81
+
82
+ def to_dict(self) -> Dict[str, Any]:
83
+ """Convert to dictionary."""
84
+ return {
85
+ "status": self.status.value,
86
+ "confidence": self.confidence,
87
+ "reason": self.reason,
88
+ "method": self.method.value,
89
+ "contradicting_source": self.contradicting_source,
90
+ "verification_time_ms": self.verification_time_ms,
91
+ }
92
+
93
+ @classmethod
94
+ def from_dict(cls, data: Dict[str, Any]) -> "Verification":
95
+ """Create from dictionary."""
96
+ return cls(
97
+ status=VerificationStatus(data["status"]),
98
+ confidence=data["confidence"],
99
+ reason=data["reason"],
100
+ method=VerificationMethod(data.get("method", "none")),
101
+ contradicting_source=data.get("contradicting_source"),
102
+ verification_time_ms=data.get("verification_time_ms", 0),
103
+ )
104
+
105
+
106
+ @dataclass
107
+ class VerifiedMemory:
108
+ """
109
+ Memory with verification result attached.
110
+
111
+ Attributes:
112
+ memory: The original memory object
113
+ verification: Verification result
114
+ retrieval_score: Original similarity/relevance score from retrieval
115
+ """
116
+
117
+ memory: Any
118
+ verification: Verification
119
+ retrieval_score: float = 0.0
120
+
121
+ @property
122
+ def status(self) -> VerificationStatus:
123
+ """Shortcut to verification status."""
124
+ return self.verification.status
125
+
126
+ @property
127
+ def is_verified(self) -> bool:
128
+ """Check if memory is verified."""
129
+ return self.status == VerificationStatus.VERIFIED
130
+
131
+ @property
132
+ def is_usable(self) -> bool:
133
+ """Check if memory is usable (verified or uncertain)."""
134
+ return self.verification.is_usable()
135
+
136
+ def combined_score(self, verification_weight: float = 0.5) -> float:
137
+ """
138
+ Compute combined score from retrieval and verification.
139
+
140
+ Args:
141
+ verification_weight: Weight for verification confidence (0-1)
142
+
143
+ Returns:
144
+ Combined score between 0 and 1
145
+ """
146
+ retrieval_weight = 1.0 - verification_weight
147
+ return (
148
+ self.retrieval_score * retrieval_weight
149
+ + self.verification.confidence * verification_weight
150
+ )
151
+
152
+ def to_dict(self) -> Dict[str, Any]:
153
+ """Convert to dictionary."""
154
+ memory_dict = (
155
+ self.memory.to_dict()
156
+ if hasattr(self.memory, "to_dict")
157
+ else {"content": str(self.memory)}
158
+ )
159
+ return {
160
+ "memory": memory_dict,
161
+ "verification": self.verification.to_dict(),
162
+ "retrieval_score": self.retrieval_score,
163
+ }
164
+
165
+
166
+ @dataclass
167
+ class VerifiedResults:
168
+ """
169
+ Container for categorized verification results.
170
+
171
+ Organizes verified memories by status for easy access.
172
+
173
+ Attributes:
174
+ verified: Memories confirmed accurate
175
+ uncertain: Memories with uncertain status
176
+ contradicted: Memories with conflicts detected
177
+ unverifiable: Memories that couldn't be verified
178
+ metadata: Additional information about the verification process
179
+ """
180
+
181
+ verified: List[VerifiedMemory] = field(default_factory=list)
182
+ uncertain: List[VerifiedMemory] = field(default_factory=list)
183
+ contradicted: List[VerifiedMemory] = field(default_factory=list)
184
+ unverifiable: List[VerifiedMemory] = field(default_factory=list)
185
+ metadata: Dict[str, Any] = field(default_factory=dict)
186
+
187
+ @property
188
+ def all_usable(self) -> List[VerifiedMemory]:
189
+ """Get all memories safe to use (verified + uncertain)."""
190
+ return self.verified + self.uncertain
191
+
192
+ @property
193
+ def high_confidence(self) -> List[VerifiedMemory]:
194
+ """Get only verified memories."""
195
+ return self.verified
196
+
197
+ @property
198
+ def needs_review(self) -> List[VerifiedMemory]:
199
+ """Get memories that need human review."""
200
+ return self.contradicted
201
+
202
+ @property
203
+ def total_count(self) -> int:
204
+ """Total number of memories processed."""
205
+ return (
206
+ len(self.verified)
207
+ + len(self.uncertain)
208
+ + len(self.contradicted)
209
+ + len(self.unverifiable)
210
+ )
211
+
212
+ def summary(self) -> Dict[str, Any]:
213
+ """Get summary statistics."""
214
+ total = self.total_count
215
+ return {
216
+ "verified": len(self.verified),
217
+ "uncertain": len(self.uncertain),
218
+ "contradicted": len(self.contradicted),
219
+ "unverifiable": len(self.unverifiable),
220
+ "total": total,
221
+ "usable_count": len(self.all_usable),
222
+ "usable_ratio": len(self.all_usable) / total if total > 0 else 0.0,
223
+ "verification_time_ms": self.metadata.get("total_verification_time_ms", 0),
224
+ }
225
+
226
+ def get_by_status(self, status: VerificationStatus) -> List[VerifiedMemory]:
227
+ """Get memories by verification status."""
228
+ if status == VerificationStatus.VERIFIED:
229
+ return self.verified
230
+ elif status == VerificationStatus.UNCERTAIN:
231
+ return self.uncertain
232
+ elif status == VerificationStatus.CONTRADICTED:
233
+ return self.contradicted
234
+ else:
235
+ return self.unverifiable
236
+
237
+ def sort_by_confidence(self, descending: bool = True) -> None:
238
+ """Sort all categories by verification confidence."""
239
+
240
+ def get_confidence(vm: VerifiedMemory) -> float:
241
+ return vm.verification.confidence
242
+
243
+ self.verified.sort(key=get_confidence, reverse=descending)
244
+ self.uncertain.sort(key=get_confidence, reverse=descending)
245
+ self.contradicted.sort(key=get_confidence, reverse=descending)
246
+ self.unverifiable.sort(key=get_confidence, reverse=descending)
247
+
248
+ def to_dict(self) -> Dict[str, Any]:
249
+ """Convert to dictionary."""
250
+ return {
251
+ "verified": [vm.to_dict() for vm in self.verified],
252
+ "uncertain": [vm.to_dict() for vm in self.uncertain],
253
+ "contradicted": [vm.to_dict() for vm in self.contradicted],
254
+ "unverifiable": [vm.to_dict() for vm in self.unverifiable],
255
+ "metadata": self.metadata,
256
+ "summary": self.summary(),
257
+ }
258
+
259
+
260
+ @dataclass
261
+ class VerificationConfig:
262
+ """
263
+ Configuration for verification behavior.
264
+
265
+ Attributes:
266
+ enabled: Whether verification is enabled
267
+ default_method: Default verification method to use
268
+ confidence_threshold: Threshold for confidence-based verification
269
+ llm_timeout_seconds: Timeout for LLM-based verification
270
+ expand_candidates_factor: Factor to expand candidate set (default 4x)
271
+ max_sources_for_verification: Max sources to use for ground truth
272
+ max_memories_for_cross_verify: Max other memories to cross-verify against
273
+ """
274
+
275
+ enabled: bool = False # Opt-in by default
276
+ default_method: str = "confidence" # confidence | cross_verify | ground_truth
277
+ confidence_threshold: float = 0.7
278
+ llm_timeout_seconds: float = 5.0
279
+ expand_candidates_factor: int = 4
280
+ max_sources_for_verification: int = 5
281
+ max_memories_for_cross_verify: int = 5
282
+
283
+ @classmethod
284
+ def from_dict(cls, data: Dict[str, Any]) -> "VerificationConfig":
285
+ """Create from dictionary."""
286
+ return cls(
287
+ enabled=data.get("enabled", False),
288
+ default_method=data.get("default_method", "confidence"),
289
+ confidence_threshold=data.get("confidence_threshold", 0.7),
290
+ llm_timeout_seconds=data.get("llm_timeout_seconds", 5.0),
291
+ expand_candidates_factor=data.get("expand_candidates_factor", 4),
292
+ max_sources_for_verification=data.get("max_sources_for_verification", 5),
293
+ max_memories_for_cross_verify=data.get("max_memories_for_cross_verify", 5),
294
+ )
295
+
296
+ def to_dict(self) -> Dict[str, Any]:
297
+ """Convert to dictionary."""
298
+ return {
299
+ "enabled": self.enabled,
300
+ "default_method": self.default_method,
301
+ "confidence_threshold": self.confidence_threshold,
302
+ "llm_timeout_seconds": self.llm_timeout_seconds,
303
+ "expand_candidates_factor": self.expand_candidates_factor,
304
+ "max_sources_for_verification": self.max_sources_for_verification,
305
+ "max_memories_for_cross_verify": self.max_memories_for_cross_verify,
306
+ }
307
+
308
+
309
+ class LLMClient(Protocol):
310
+ """Protocol for LLM clients used in verification."""
311
+
312
+ def complete(self, prompt: str, timeout: Optional[float] = None) -> str:
313
+ """Complete a prompt and return the response."""
314
+ ...
315
+
316
+
317
+ class VerifiedRetriever:
318
+ """
319
+ Two-stage retrieval with verification.
320
+
321
+ Stage 1: Fuzzy recall with expanded candidate set
322
+ Stage 2: Verify candidates using one of:
323
+ - Ground truth sources (with LLM)
324
+ - Cross-verification against other memories (with LLM)
325
+ - Confidence-based fallback (no LLM required)
326
+
327
+ Example:
328
+ retriever = VerifiedRetriever(
329
+ retrieval_engine=engine,
330
+ llm_client=llm,
331
+ config=VerificationConfig(enabled=True)
332
+ )
333
+ results = retriever.retrieve_verified(
334
+ query="How to handle API rate limits?",
335
+ agent="dev-agent",
336
+ project_id="my-project"
337
+ )
338
+ for memory in results.high_confidence:
339
+ print(f"Verified: {memory.memory}")
340
+ """
341
+
342
+ def __init__(
343
+ self,
344
+ retrieval_engine: Any,
345
+ llm_client: Optional[LLMClient] = None,
346
+ config: Optional[VerificationConfig] = None,
347
+ ):
348
+ """
349
+ Initialize verified retriever.
350
+
351
+ Args:
352
+ retrieval_engine: RetrievalEngine or compatible retriever
353
+ llm_client: Optional LLM client for verification
354
+ config: Verification configuration
355
+ """
356
+ self.retrieval_engine = retrieval_engine
357
+ self.llm = llm_client
358
+ self.config = config or VerificationConfig()
359
+
360
+ def retrieve_verified(
361
+ self,
362
+ query: str,
363
+ agent: str,
364
+ project_id: str,
365
+ ground_truth_sources: Optional[List[str]] = None,
366
+ cross_verify: Optional[bool] = None,
367
+ top_k: int = 5,
368
+ **retrieval_kwargs: Any,
369
+ ) -> VerifiedResults:
370
+ """
371
+ Two-stage retrieval with verification.
372
+
373
+ Args:
374
+ query: Query string for retrieval
375
+ agent: Agent requesting memories
376
+ project_id: Project context
377
+ ground_truth_sources: Optional authoritative sources for verification
378
+ cross_verify: Whether to cross-verify (None = use config default)
379
+ top_k: Number of final results to return
380
+ **retrieval_kwargs: Additional arguments for retrieval engine
381
+
382
+ Returns:
383
+ VerifiedResults with categorized memories
384
+ """
385
+ start_time = time.time()
386
+
387
+ # Stage 1: Fuzzy recall with expanded candidates
388
+ recall_k = top_k * self.config.expand_candidates_factor
389
+ retrieval_kwargs["top_k"] = recall_k
390
+ retrieval_kwargs["agent"] = agent
391
+ retrieval_kwargs["project_id"] = project_id
392
+
393
+ # Call retrieval engine
394
+ memory_slice = self.retrieval_engine.retrieve(query, **retrieval_kwargs)
395
+
396
+ # Extract all memories from slice
397
+ candidates = self._extract_candidates(memory_slice)
398
+
399
+ # Stage 2: Verification
400
+ results = self._verify_candidates(
401
+ candidates=candidates,
402
+ ground_truth_sources=ground_truth_sources,
403
+ cross_verify=cross_verify,
404
+ )
405
+
406
+ # Limit results to top_k per category
407
+ results.verified = results.verified[:top_k]
408
+ results.uncertain = results.uncertain[:top_k]
409
+ results.contradicted = results.contradicted[:top_k]
410
+ results.unverifiable = results.unverifiable[:top_k]
411
+
412
+ # Record metadata
413
+ total_time_ms = int((time.time() - start_time) * 1000)
414
+ results.metadata["total_verification_time_ms"] = total_time_ms
415
+ results.metadata["total_candidates"] = len(candidates)
416
+ results.metadata["query"] = query
417
+ results.metadata["top_k"] = top_k
418
+
419
+ return results
420
+
421
+ def _extract_candidates(self, memory_slice: Any) -> List[Any]:
422
+ """Extract all memory objects from a MemorySlice or similar container."""
423
+ candidates = []
424
+
425
+ # Handle MemorySlice structure
426
+ if hasattr(memory_slice, "heuristics"):
427
+ candidates.extend(memory_slice.heuristics or [])
428
+ if hasattr(memory_slice, "outcomes"):
429
+ candidates.extend(memory_slice.outcomes or [])
430
+ if hasattr(memory_slice, "knowledge"):
431
+ candidates.extend(memory_slice.knowledge or [])
432
+ if hasattr(memory_slice, "anti_patterns"):
433
+ candidates.extend(memory_slice.anti_patterns or [])
434
+ if hasattr(memory_slice, "preferences"):
435
+ candidates.extend(memory_slice.preferences or [])
436
+
437
+ # Fallback for list-like containers
438
+ if not candidates and hasattr(memory_slice, "__iter__"):
439
+ candidates = list(memory_slice)
440
+
441
+ return candidates
442
+
443
+ def _verify_candidates(
444
+ self,
445
+ candidates: List[Any],
446
+ ground_truth_sources: Optional[List[str]],
447
+ cross_verify: Optional[bool],
448
+ ) -> VerifiedResults:
449
+ """
450
+ Verify each candidate and categorize.
451
+
452
+ Args:
453
+ candidates: List of memory candidates
454
+ ground_truth_sources: Optional authoritative sources
455
+ cross_verify: Whether to use cross-verification
456
+
457
+ Returns:
458
+ VerifiedResults with categorized memories
459
+ """
460
+ results = VerifiedResults()
461
+
462
+ # Determine verification method
463
+ use_cross_verify = (
464
+ cross_verify
465
+ if cross_verify is not None
466
+ else self.config.default_method == "cross_verify"
467
+ )
468
+
469
+ for candidate in candidates:
470
+ # Get retrieval score if available
471
+ retrieval_score = self._get_retrieval_score(candidate)
472
+
473
+ # Verify the candidate
474
+ if ground_truth_sources:
475
+ verification = self._verify_against_sources(
476
+ candidate, ground_truth_sources
477
+ )
478
+ elif use_cross_verify and self.llm:
479
+ verification = self._cross_verify(candidate, candidates)
480
+ else:
481
+ verification = self._confidence_fallback(candidate)
482
+
483
+ # Create verified memory
484
+ vm = VerifiedMemory(
485
+ memory=candidate,
486
+ verification=verification,
487
+ retrieval_score=retrieval_score,
488
+ )
489
+
490
+ # Categorize by status
491
+ if verification.status == VerificationStatus.VERIFIED:
492
+ results.verified.append(vm)
493
+ elif verification.status == VerificationStatus.UNCERTAIN:
494
+ results.uncertain.append(vm)
495
+ elif verification.status == VerificationStatus.CONTRADICTED:
496
+ results.contradicted.append(vm)
497
+ else:
498
+ results.unverifiable.append(vm)
499
+
500
+ # Sort by combined score within each category
501
+ results.sort_by_confidence()
502
+
503
+ return results
504
+
505
+ def _get_retrieval_score(self, memory: Any) -> float:
506
+ """Extract retrieval/similarity score from memory."""
507
+ # Try common attribute names
508
+ for attr in ["similarity_score", "score", "relevance", "confidence"]:
509
+ if hasattr(memory, attr):
510
+ val = getattr(memory, attr)
511
+ if isinstance(val, (int, float)):
512
+ return float(val)
513
+
514
+ # Try metadata
515
+ if hasattr(memory, "metadata") and isinstance(memory.metadata, dict):
516
+ for key in ["similarity_score", "score", "relevance"]:
517
+ if key in memory.metadata:
518
+ return float(memory.metadata[key])
519
+
520
+ return 0.5 # Default middle score
521
+
522
+ def _get_memory_content(self, memory: Any) -> str:
523
+ """Extract content string from memory for verification."""
524
+ # Try common content attributes
525
+ if hasattr(memory, "content"):
526
+ return str(memory.content)
527
+ if hasattr(memory, "fact"):
528
+ return str(memory.fact)
529
+ if hasattr(memory, "strategy"):
530
+ return f"{getattr(memory, 'condition', '')}: {memory.strategy}"
531
+ if hasattr(memory, "task_description"):
532
+ return str(memory.task_description)
533
+ if hasattr(memory, "preference"):
534
+ return str(memory.preference)
535
+ if hasattr(memory, "pattern"):
536
+ return str(memory.pattern)
537
+
538
+ # Fallback to string representation
539
+ return str(memory)
540
+
541
+ def _get_memory_id(self, memory: Any) -> str:
542
+ """Get memory ID for comparison."""
543
+ if hasattr(memory, "id"):
544
+ return str(memory.id)
545
+ return str(id(memory))
546
+
547
+ def _verify_against_sources(
548
+ self,
549
+ memory: Any,
550
+ sources: List[str],
551
+ ) -> Verification:
552
+ """
553
+ Verify memory against authoritative sources using LLM.
554
+
555
+ Args:
556
+ memory: Memory to verify
557
+ sources: List of authoritative source strings
558
+
559
+ Returns:
560
+ Verification result
561
+ """
562
+ start_time = time.time()
563
+
564
+ if not self.llm:
565
+ return Verification(
566
+ status=VerificationStatus.UNVERIFIABLE,
567
+ confidence=0.5,
568
+ reason="No LLM available for ground truth verification",
569
+ method=VerificationMethod.NONE,
570
+ )
571
+
572
+ content = self._get_memory_content(memory)
573
+ limited_sources = sources[: self.config.max_sources_for_verification]
574
+
575
+ prompt = f"""Verify if this memory is consistent with the authoritative sources.
576
+
577
+ Memory to verify:
578
+ {content}
579
+
580
+ Authoritative sources:
581
+ {chr(10).join(f"- {s}" for s in limited_sources)}
582
+
583
+ Respond in this exact format (no other text):
584
+ STATUS: verified|contradicted|uncertain
585
+ CONFIDENCE: 0.0-1.0
586
+ REASON: Brief explanation (one sentence)
587
+ CONTRADICTION: (only if STATUS is contradicted) What specifically contradicts it"""
588
+
589
+ try:
590
+ response = self.llm.complete(
591
+ prompt, timeout=self.config.llm_timeout_seconds
592
+ )
593
+ verification = self._parse_verification_response(response)
594
+ verification.method = VerificationMethod.GROUND_TRUTH
595
+ verification.verification_time_ms = int((time.time() - start_time) * 1000)
596
+ return verification
597
+ except Exception as e:
598
+ logger.warning(f"Ground truth verification failed: {e}")
599
+ return Verification(
600
+ status=VerificationStatus.UNVERIFIABLE,
601
+ confidence=0.5,
602
+ reason=f"Verification failed: {str(e)[:100]}",
603
+ method=VerificationMethod.GROUND_TRUTH,
604
+ verification_time_ms=int((time.time() - start_time) * 1000),
605
+ )
606
+
607
+ def _cross_verify(
608
+ self,
609
+ memory: Any,
610
+ all_candidates: List[Any],
611
+ ) -> Verification:
612
+ """
613
+ Cross-verify memory against other retrieved memories.
614
+
615
+ Args:
616
+ memory: Memory to verify
617
+ all_candidates: All candidate memories
618
+
619
+ Returns:
620
+ Verification result
621
+ """
622
+ start_time = time.time()
623
+
624
+ if not self.llm:
625
+ return self._confidence_fallback(memory)
626
+
627
+ memory_id = self._get_memory_id(memory)
628
+ content = self._get_memory_content(memory)
629
+
630
+ # Get other memories for comparison
631
+ others = [m for m in all_candidates if self._get_memory_id(m) != memory_id][
632
+ : self.config.max_memories_for_cross_verify
633
+ ]
634
+
635
+ if not others:
636
+ return Verification(
637
+ status=VerificationStatus.UNVERIFIABLE,
638
+ confidence=0.5,
639
+ reason="No other memories to cross-verify against",
640
+ method=VerificationMethod.CROSS_VERIFY,
641
+ verification_time_ms=int((time.time() - start_time) * 1000),
642
+ )
643
+
644
+ other_contents = [self._get_memory_content(m) for m in others]
645
+
646
+ prompt = f"""Check if this memory is consistent with related memories.
647
+
648
+ Memory to verify:
649
+ {content}
650
+
651
+ Related memories:
652
+ {chr(10).join(f"- {c}" for c in other_contents)}
653
+
654
+ Respond in this exact format (no other text):
655
+ STATUS: verified|contradicted|uncertain
656
+ CONFIDENCE: 0.0-1.0
657
+ REASON: Brief explanation (one sentence)"""
658
+
659
+ try:
660
+ response = self.llm.complete(
661
+ prompt, timeout=self.config.llm_timeout_seconds
662
+ )
663
+ verification = self._parse_verification_response(response)
664
+ verification.method = VerificationMethod.CROSS_VERIFY
665
+ verification.verification_time_ms = int((time.time() - start_time) * 1000)
666
+ return verification
667
+ except Exception as e:
668
+ logger.warning(f"Cross-verification failed: {e}")
669
+ # Fallback to confidence-based
670
+ fallback = self._confidence_fallback(memory)
671
+ fallback.verification_time_ms = int((time.time() - start_time) * 1000)
672
+ return fallback
673
+
674
+ def _confidence_fallback(self, memory: Any) -> Verification:
675
+ """
676
+ Confidence-based verification fallback (no LLM required).
677
+
678
+ Uses the memory's confidence score to determine verification status.
679
+
680
+ Args:
681
+ memory: Memory to verify
682
+
683
+ Returns:
684
+ Verification result
685
+ """
686
+ # Get confidence from memory
687
+ confidence = 0.5
688
+ if hasattr(memory, "confidence"):
689
+ confidence = float(memory.confidence)
690
+ elif hasattr(memory, "metadata") and isinstance(memory.metadata, dict):
691
+ confidence = float(memory.metadata.get("confidence", 0.5))
692
+
693
+ if confidence >= self.config.confidence_threshold:
694
+ return Verification(
695
+ status=VerificationStatus.VERIFIED,
696
+ confidence=confidence,
697
+ reason=f"High confidence score ({confidence:.2f} >= {self.config.confidence_threshold})",
698
+ method=VerificationMethod.CONFIDENCE,
699
+ )
700
+ elif confidence >= self.config.confidence_threshold * 0.5:
701
+ return Verification(
702
+ status=VerificationStatus.UNCERTAIN,
703
+ confidence=confidence,
704
+ reason=f"Moderate confidence score ({confidence:.2f})",
705
+ method=VerificationMethod.CONFIDENCE,
706
+ )
707
+ else:
708
+ return Verification(
709
+ status=VerificationStatus.UNCERTAIN,
710
+ confidence=confidence,
711
+ reason=f"Low confidence score ({confidence:.2f})",
712
+ method=VerificationMethod.CONFIDENCE,
713
+ )
714
+
715
+ def _parse_verification_response(self, response: str) -> Verification:
716
+ """
717
+ Parse LLM verification response into Verification object.
718
+
719
+ Expected format:
720
+ STATUS: verified|contradicted|uncertain
721
+ CONFIDENCE: 0.0-1.0
722
+ REASON: Brief explanation
723
+ CONTRADICTION: (optional) What contradicts it
724
+
725
+ Args:
726
+ response: LLM response string
727
+
728
+ Returns:
729
+ Verification object
730
+ """
731
+ lines = response.strip().split("\n")
732
+ result = {
733
+ "status": "uncertain",
734
+ "confidence": 0.5,
735
+ "reason": "Unable to parse verification response",
736
+ "contradiction": None,
737
+ }
738
+
739
+ for line in lines:
740
+ line = line.strip()
741
+ if line.upper().startswith("STATUS:"):
742
+ status_str = line.split(":", 1)[1].strip().lower()
743
+ if status_str in ("verified", "contradicted", "uncertain"):
744
+ result["status"] = status_str
745
+ elif line.upper().startswith("CONFIDENCE:"):
746
+ try:
747
+ conf_str = line.split(":", 1)[1].strip()
748
+ # Extract number from string
749
+ match = re.search(r"[\d.]+", conf_str)
750
+ if match:
751
+ result["confidence"] = float(match.group())
752
+ except (ValueError, IndexError):
753
+ pass
754
+ elif line.upper().startswith("REASON:"):
755
+ result["reason"] = line.split(":", 1)[1].strip()
756
+ elif line.upper().startswith("CONTRADICTION:"):
757
+ result["contradiction"] = line.split(":", 1)[1].strip()
758
+
759
+ # Map status string to enum
760
+ status_map = {
761
+ "verified": VerificationStatus.VERIFIED,
762
+ "contradicted": VerificationStatus.CONTRADICTED,
763
+ "uncertain": VerificationStatus.UNCERTAIN,
764
+ }
765
+
766
+ return Verification(
767
+ status=status_map.get(result["status"], VerificationStatus.UNCERTAIN),
768
+ confidence=max(0.0, min(1.0, result["confidence"])),
769
+ reason=result["reason"],
770
+ contradicting_source=result["contradiction"],
771
+ )
772
+
773
+
774
+ def create_verified_retriever(
775
+ retrieval_engine: Any,
776
+ llm_client: Optional[LLMClient] = None,
777
+ config: Optional[Union[VerificationConfig, Dict[str, Any]]] = None,
778
+ ) -> VerifiedRetriever:
779
+ """
780
+ Factory function to create a VerifiedRetriever.
781
+
782
+ Args:
783
+ retrieval_engine: RetrievalEngine or compatible retriever
784
+ llm_client: Optional LLM client for verification
785
+ config: Configuration dict or VerificationConfig
786
+
787
+ Returns:
788
+ Configured VerifiedRetriever
789
+ """
790
+ if isinstance(config, dict):
791
+ config = VerificationConfig.from_dict(config)
792
+
793
+ return VerifiedRetriever(
794
+ retrieval_engine=retrieval_engine,
795
+ llm_client=llm_client,
796
+ config=config,
797
+ )