alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +296 -226
- alma/compression/__init__.py +33 -0
- alma/compression/pipeline.py +980 -0
- alma/confidence/__init__.py +47 -47
- alma/confidence/engine.py +540 -540
- alma/confidence/types.py +351 -351
- alma/config/loader.py +157 -157
- alma/consolidation/__init__.py +23 -23
- alma/consolidation/engine.py +678 -678
- alma/consolidation/prompts.py +84 -84
- alma/core.py +1189 -430
- alma/domains/__init__.py +30 -30
- alma/domains/factory.py +359 -359
- alma/domains/schemas.py +448 -448
- alma/domains/types.py +272 -272
- alma/events/__init__.py +75 -75
- alma/events/emitter.py +285 -284
- alma/events/storage_mixin.py +246 -246
- alma/events/types.py +126 -126
- alma/events/webhook.py +425 -425
- alma/exceptions.py +49 -49
- alma/extraction/__init__.py +31 -31
- alma/extraction/auto_learner.py +265 -265
- alma/extraction/extractor.py +420 -420
- alma/graph/__init__.py +106 -106
- alma/graph/backends/__init__.py +32 -32
- alma/graph/backends/kuzu.py +624 -624
- alma/graph/backends/memgraph.py +432 -432
- alma/graph/backends/memory.py +236 -236
- alma/graph/backends/neo4j.py +417 -417
- alma/graph/base.py +159 -159
- alma/graph/extraction.py +198 -198
- alma/graph/store.py +860 -860
- alma/harness/__init__.py +35 -35
- alma/harness/base.py +386 -386
- alma/harness/domains.py +705 -705
- alma/initializer/__init__.py +37 -37
- alma/initializer/initializer.py +418 -418
- alma/initializer/types.py +250 -250
- alma/integration/__init__.py +62 -62
- alma/integration/claude_agents.py +444 -444
- alma/integration/helena.py +423 -423
- alma/integration/victor.py +471 -471
- alma/learning/__init__.py +101 -86
- alma/learning/decay.py +878 -0
- alma/learning/forgetting.py +1446 -1446
- alma/learning/heuristic_extractor.py +390 -390
- alma/learning/protocols.py +374 -374
- alma/learning/validation.py +346 -346
- alma/mcp/__init__.py +123 -45
- alma/mcp/__main__.py +156 -156
- alma/mcp/resources.py +122 -122
- alma/mcp/server.py +955 -591
- alma/mcp/tools.py +3254 -509
- alma/observability/__init__.py +91 -84
- alma/observability/config.py +302 -302
- alma/observability/guidelines.py +170 -0
- alma/observability/logging.py +424 -424
- alma/observability/metrics.py +583 -583
- alma/observability/tracing.py +440 -440
- alma/progress/__init__.py +21 -21
- alma/progress/tracker.py +607 -607
- alma/progress/types.py +250 -250
- alma/retrieval/__init__.py +134 -53
- alma/retrieval/budget.py +525 -0
- alma/retrieval/cache.py +1304 -1061
- alma/retrieval/embeddings.py +202 -202
- alma/retrieval/engine.py +850 -427
- alma/retrieval/modes.py +365 -0
- alma/retrieval/progressive.py +560 -0
- alma/retrieval/scoring.py +344 -344
- alma/retrieval/trust_scoring.py +637 -0
- alma/retrieval/verification.py +797 -0
- alma/session/__init__.py +19 -19
- alma/session/manager.py +442 -399
- alma/session/types.py +288 -288
- alma/storage/__init__.py +101 -90
- alma/storage/archive.py +233 -0
- alma/storage/azure_cosmos.py +1259 -1259
- alma/storage/base.py +1083 -583
- alma/storage/chroma.py +1443 -1443
- alma/storage/constants.py +103 -103
- alma/storage/file_based.py +614 -614
- alma/storage/migrations/__init__.py +21 -21
- alma/storage/migrations/base.py +321 -321
- alma/storage/migrations/runner.py +323 -323
- alma/storage/migrations/version_stores.py +337 -337
- alma/storage/migrations/versions/__init__.py +11 -11
- alma/storage/migrations/versions/v1_0_0.py +373 -373
- alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
- alma/storage/pinecone.py +1080 -1080
- alma/storage/postgresql.py +1948 -1559
- alma/storage/qdrant.py +1306 -1306
- alma/storage/sqlite_local.py +3041 -1457
- alma/testing/__init__.py +46 -46
- alma/testing/factories.py +301 -301
- alma/testing/mocks.py +389 -389
- alma/types.py +292 -264
- alma/utils/__init__.py +19 -0
- alma/utils/tokenizer.py +521 -0
- alma/workflow/__init__.py +83 -0
- alma/workflow/artifacts.py +170 -0
- alma/workflow/checkpoint.py +311 -0
- alma/workflow/context.py +228 -0
- alma/workflow/outcomes.py +189 -0
- alma/workflow/reducers.py +393 -0
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
- alma_memory-0.7.0.dist-info/RECORD +112 -0
- alma_memory-0.5.1.dist-info/RECORD +0 -93
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,797 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Two-Stage Verified Retrieval.
|
|
3
|
+
|
|
4
|
+
Provides verification of retrieved memories for high-stakes applications.
|
|
5
|
+
Based on Memory Wall principles: "Pair fuzzy retrieval with exact verification."
|
|
6
|
+
|
|
7
|
+
Two-stage process:
|
|
8
|
+
1. Fuzzy Recall: Semantic search with expanded candidate set
|
|
9
|
+
2. Verification: Validate against ground truth, cross-verify, or confidence fallback
|
|
10
|
+
|
|
11
|
+
Verification statuses:
|
|
12
|
+
- VERIFIED: Safe to use, confirmed accurate
|
|
13
|
+
- UNCERTAIN: Use with caution, unconfirmed
|
|
14
|
+
- CONTRADICTED: Needs review, conflicts detected
|
|
15
|
+
- UNVERIFIABLE: No verification method available
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
import re
|
|
20
|
+
import time
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from enum import Enum
|
|
23
|
+
from typing import Any, Dict, List, Optional, Protocol, Union
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class VerificationStatus(Enum):
|
|
29
|
+
"""Status of memory verification."""
|
|
30
|
+
|
|
31
|
+
VERIFIED = "verified" # Confirmed accurate, safe to use
|
|
32
|
+
UNCERTAIN = "uncertain" # Unconfirmed, use with caution
|
|
33
|
+
CONTRADICTED = "contradicted" # Conflicts detected, needs review
|
|
34
|
+
UNVERIFIABLE = "unverifiable" # No verification method available
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class VerificationMethod(Enum):
|
|
38
|
+
"""Method used for verification."""
|
|
39
|
+
|
|
40
|
+
GROUND_TRUTH = "ground_truth" # Verified against authoritative sources
|
|
41
|
+
CROSS_VERIFY = "cross_verify" # Verified against other memories
|
|
42
|
+
CONFIDENCE = "confidence" # Confidence-based fallback (no LLM)
|
|
43
|
+
NONE = "none" # No verification performed
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class Verification:
|
|
48
|
+
"""
|
|
49
|
+
Result of verifying a single memory.
|
|
50
|
+
|
|
51
|
+
Attributes:
|
|
52
|
+
status: Verification status (VERIFIED, UNCERTAIN, etc.)
|
|
53
|
+
confidence: Confidence in verification result (0.0 - 1.0)
|
|
54
|
+
reason: Human-readable explanation
|
|
55
|
+
method: Method used for verification
|
|
56
|
+
contradicting_source: Source of contradiction if status is CONTRADICTED
|
|
57
|
+
verification_time_ms: Time taken for verification in milliseconds
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
status: VerificationStatus
|
|
61
|
+
confidence: float
|
|
62
|
+
reason: str
|
|
63
|
+
method: VerificationMethod = VerificationMethod.NONE
|
|
64
|
+
contradicting_source: Optional[str] = None
|
|
65
|
+
verification_time_ms: int = 0
|
|
66
|
+
|
|
67
|
+
def __post_init__(self):
|
|
68
|
+
"""Validate confidence is in range."""
|
|
69
|
+
self.confidence = max(0.0, min(1.0, self.confidence))
|
|
70
|
+
|
|
71
|
+
def is_usable(self) -> bool:
|
|
72
|
+
"""Check if memory is safe to use."""
|
|
73
|
+
return self.status in (
|
|
74
|
+
VerificationStatus.VERIFIED,
|
|
75
|
+
VerificationStatus.UNCERTAIN,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def needs_review(self) -> bool:
|
|
79
|
+
"""Check if memory needs human review."""
|
|
80
|
+
return self.status == VerificationStatus.CONTRADICTED
|
|
81
|
+
|
|
82
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
83
|
+
"""Convert to dictionary."""
|
|
84
|
+
return {
|
|
85
|
+
"status": self.status.value,
|
|
86
|
+
"confidence": self.confidence,
|
|
87
|
+
"reason": self.reason,
|
|
88
|
+
"method": self.method.value,
|
|
89
|
+
"contradicting_source": self.contradicting_source,
|
|
90
|
+
"verification_time_ms": self.verification_time_ms,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def from_dict(cls, data: Dict[str, Any]) -> "Verification":
|
|
95
|
+
"""Create from dictionary."""
|
|
96
|
+
return cls(
|
|
97
|
+
status=VerificationStatus(data["status"]),
|
|
98
|
+
confidence=data["confidence"],
|
|
99
|
+
reason=data["reason"],
|
|
100
|
+
method=VerificationMethod(data.get("method", "none")),
|
|
101
|
+
contradicting_source=data.get("contradicting_source"),
|
|
102
|
+
verification_time_ms=data.get("verification_time_ms", 0),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass
|
|
107
|
+
class VerifiedMemory:
|
|
108
|
+
"""
|
|
109
|
+
Memory with verification result attached.
|
|
110
|
+
|
|
111
|
+
Attributes:
|
|
112
|
+
memory: The original memory object
|
|
113
|
+
verification: Verification result
|
|
114
|
+
retrieval_score: Original similarity/relevance score from retrieval
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
memory: Any
|
|
118
|
+
verification: Verification
|
|
119
|
+
retrieval_score: float = 0.0
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def status(self) -> VerificationStatus:
|
|
123
|
+
"""Shortcut to verification status."""
|
|
124
|
+
return self.verification.status
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def is_verified(self) -> bool:
|
|
128
|
+
"""Check if memory is verified."""
|
|
129
|
+
return self.status == VerificationStatus.VERIFIED
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def is_usable(self) -> bool:
|
|
133
|
+
"""Check if memory is usable (verified or uncertain)."""
|
|
134
|
+
return self.verification.is_usable()
|
|
135
|
+
|
|
136
|
+
def combined_score(self, verification_weight: float = 0.5) -> float:
|
|
137
|
+
"""
|
|
138
|
+
Compute combined score from retrieval and verification.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
verification_weight: Weight for verification confidence (0-1)
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Combined score between 0 and 1
|
|
145
|
+
"""
|
|
146
|
+
retrieval_weight = 1.0 - verification_weight
|
|
147
|
+
return (
|
|
148
|
+
self.retrieval_score * retrieval_weight
|
|
149
|
+
+ self.verification.confidence * verification_weight
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
153
|
+
"""Convert to dictionary."""
|
|
154
|
+
memory_dict = (
|
|
155
|
+
self.memory.to_dict()
|
|
156
|
+
if hasattr(self.memory, "to_dict")
|
|
157
|
+
else {"content": str(self.memory)}
|
|
158
|
+
)
|
|
159
|
+
return {
|
|
160
|
+
"memory": memory_dict,
|
|
161
|
+
"verification": self.verification.to_dict(),
|
|
162
|
+
"retrieval_score": self.retrieval_score,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@dataclass
|
|
167
|
+
class VerifiedResults:
|
|
168
|
+
"""
|
|
169
|
+
Container for categorized verification results.
|
|
170
|
+
|
|
171
|
+
Organizes verified memories by status for easy access.
|
|
172
|
+
|
|
173
|
+
Attributes:
|
|
174
|
+
verified: Memories confirmed accurate
|
|
175
|
+
uncertain: Memories with uncertain status
|
|
176
|
+
contradicted: Memories with conflicts detected
|
|
177
|
+
unverifiable: Memories that couldn't be verified
|
|
178
|
+
metadata: Additional information about the verification process
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
verified: List[VerifiedMemory] = field(default_factory=list)
|
|
182
|
+
uncertain: List[VerifiedMemory] = field(default_factory=list)
|
|
183
|
+
contradicted: List[VerifiedMemory] = field(default_factory=list)
|
|
184
|
+
unverifiable: List[VerifiedMemory] = field(default_factory=list)
|
|
185
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def all_usable(self) -> List[VerifiedMemory]:
|
|
189
|
+
"""Get all memories safe to use (verified + uncertain)."""
|
|
190
|
+
return self.verified + self.uncertain
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def high_confidence(self) -> List[VerifiedMemory]:
|
|
194
|
+
"""Get only verified memories."""
|
|
195
|
+
return self.verified
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def needs_review(self) -> List[VerifiedMemory]:
|
|
199
|
+
"""Get memories that need human review."""
|
|
200
|
+
return self.contradicted
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def total_count(self) -> int:
|
|
204
|
+
"""Total number of memories processed."""
|
|
205
|
+
return (
|
|
206
|
+
len(self.verified)
|
|
207
|
+
+ len(self.uncertain)
|
|
208
|
+
+ len(self.contradicted)
|
|
209
|
+
+ len(self.unverifiable)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def summary(self) -> Dict[str, Any]:
|
|
213
|
+
"""Get summary statistics."""
|
|
214
|
+
total = self.total_count
|
|
215
|
+
return {
|
|
216
|
+
"verified": len(self.verified),
|
|
217
|
+
"uncertain": len(self.uncertain),
|
|
218
|
+
"contradicted": len(self.contradicted),
|
|
219
|
+
"unverifiable": len(self.unverifiable),
|
|
220
|
+
"total": total,
|
|
221
|
+
"usable_count": len(self.all_usable),
|
|
222
|
+
"usable_ratio": len(self.all_usable) / total if total > 0 else 0.0,
|
|
223
|
+
"verification_time_ms": self.metadata.get("total_verification_time_ms", 0),
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
def get_by_status(self, status: VerificationStatus) -> List[VerifiedMemory]:
|
|
227
|
+
"""Get memories by verification status."""
|
|
228
|
+
if status == VerificationStatus.VERIFIED:
|
|
229
|
+
return self.verified
|
|
230
|
+
elif status == VerificationStatus.UNCERTAIN:
|
|
231
|
+
return self.uncertain
|
|
232
|
+
elif status == VerificationStatus.CONTRADICTED:
|
|
233
|
+
return self.contradicted
|
|
234
|
+
else:
|
|
235
|
+
return self.unverifiable
|
|
236
|
+
|
|
237
|
+
def sort_by_confidence(self, descending: bool = True) -> None:
|
|
238
|
+
"""Sort all categories by verification confidence."""
|
|
239
|
+
|
|
240
|
+
def get_confidence(vm: VerifiedMemory) -> float:
|
|
241
|
+
return vm.verification.confidence
|
|
242
|
+
|
|
243
|
+
self.verified.sort(key=get_confidence, reverse=descending)
|
|
244
|
+
self.uncertain.sort(key=get_confidence, reverse=descending)
|
|
245
|
+
self.contradicted.sort(key=get_confidence, reverse=descending)
|
|
246
|
+
self.unverifiable.sort(key=get_confidence, reverse=descending)
|
|
247
|
+
|
|
248
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
249
|
+
"""Convert to dictionary."""
|
|
250
|
+
return {
|
|
251
|
+
"verified": [vm.to_dict() for vm in self.verified],
|
|
252
|
+
"uncertain": [vm.to_dict() for vm in self.uncertain],
|
|
253
|
+
"contradicted": [vm.to_dict() for vm in self.contradicted],
|
|
254
|
+
"unverifiable": [vm.to_dict() for vm in self.unverifiable],
|
|
255
|
+
"metadata": self.metadata,
|
|
256
|
+
"summary": self.summary(),
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@dataclass
|
|
261
|
+
class VerificationConfig:
|
|
262
|
+
"""
|
|
263
|
+
Configuration for verification behavior.
|
|
264
|
+
|
|
265
|
+
Attributes:
|
|
266
|
+
enabled: Whether verification is enabled
|
|
267
|
+
default_method: Default verification method to use
|
|
268
|
+
confidence_threshold: Threshold for confidence-based verification
|
|
269
|
+
llm_timeout_seconds: Timeout for LLM-based verification
|
|
270
|
+
expand_candidates_factor: Factor to expand candidate set (default 4x)
|
|
271
|
+
max_sources_for_verification: Max sources to use for ground truth
|
|
272
|
+
max_memories_for_cross_verify: Max other memories to cross-verify against
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
enabled: bool = False # Opt-in by default
|
|
276
|
+
default_method: str = "confidence" # confidence | cross_verify | ground_truth
|
|
277
|
+
confidence_threshold: float = 0.7
|
|
278
|
+
llm_timeout_seconds: float = 5.0
|
|
279
|
+
expand_candidates_factor: int = 4
|
|
280
|
+
max_sources_for_verification: int = 5
|
|
281
|
+
max_memories_for_cross_verify: int = 5
|
|
282
|
+
|
|
283
|
+
@classmethod
|
|
284
|
+
def from_dict(cls, data: Dict[str, Any]) -> "VerificationConfig":
|
|
285
|
+
"""Create from dictionary."""
|
|
286
|
+
return cls(
|
|
287
|
+
enabled=data.get("enabled", False),
|
|
288
|
+
default_method=data.get("default_method", "confidence"),
|
|
289
|
+
confidence_threshold=data.get("confidence_threshold", 0.7),
|
|
290
|
+
llm_timeout_seconds=data.get("llm_timeout_seconds", 5.0),
|
|
291
|
+
expand_candidates_factor=data.get("expand_candidates_factor", 4),
|
|
292
|
+
max_sources_for_verification=data.get("max_sources_for_verification", 5),
|
|
293
|
+
max_memories_for_cross_verify=data.get("max_memories_for_cross_verify", 5),
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
297
|
+
"""Convert to dictionary."""
|
|
298
|
+
return {
|
|
299
|
+
"enabled": self.enabled,
|
|
300
|
+
"default_method": self.default_method,
|
|
301
|
+
"confidence_threshold": self.confidence_threshold,
|
|
302
|
+
"llm_timeout_seconds": self.llm_timeout_seconds,
|
|
303
|
+
"expand_candidates_factor": self.expand_candidates_factor,
|
|
304
|
+
"max_sources_for_verification": self.max_sources_for_verification,
|
|
305
|
+
"max_memories_for_cross_verify": self.max_memories_for_cross_verify,
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class LLMClient(Protocol):
|
|
310
|
+
"""Protocol for LLM clients used in verification."""
|
|
311
|
+
|
|
312
|
+
def complete(self, prompt: str, timeout: Optional[float] = None) -> str:
|
|
313
|
+
"""Complete a prompt and return the response."""
|
|
314
|
+
...
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class VerifiedRetriever:
|
|
318
|
+
"""
|
|
319
|
+
Two-stage retrieval with verification.
|
|
320
|
+
|
|
321
|
+
Stage 1: Fuzzy recall with expanded candidate set
|
|
322
|
+
Stage 2: Verify candidates using one of:
|
|
323
|
+
- Ground truth sources (with LLM)
|
|
324
|
+
- Cross-verification against other memories (with LLM)
|
|
325
|
+
- Confidence-based fallback (no LLM required)
|
|
326
|
+
|
|
327
|
+
Example:
|
|
328
|
+
retriever = VerifiedRetriever(
|
|
329
|
+
retrieval_engine=engine,
|
|
330
|
+
llm_client=llm,
|
|
331
|
+
config=VerificationConfig(enabled=True)
|
|
332
|
+
)
|
|
333
|
+
results = retriever.retrieve_verified(
|
|
334
|
+
query="How to handle API rate limits?",
|
|
335
|
+
agent="dev-agent",
|
|
336
|
+
project_id="my-project"
|
|
337
|
+
)
|
|
338
|
+
for memory in results.high_confidence:
|
|
339
|
+
print(f"Verified: {memory.memory}")
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
def __init__(
|
|
343
|
+
self,
|
|
344
|
+
retrieval_engine: Any,
|
|
345
|
+
llm_client: Optional[LLMClient] = None,
|
|
346
|
+
config: Optional[VerificationConfig] = None,
|
|
347
|
+
):
|
|
348
|
+
"""
|
|
349
|
+
Initialize verified retriever.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
retrieval_engine: RetrievalEngine or compatible retriever
|
|
353
|
+
llm_client: Optional LLM client for verification
|
|
354
|
+
config: Verification configuration
|
|
355
|
+
"""
|
|
356
|
+
self.retrieval_engine = retrieval_engine
|
|
357
|
+
self.llm = llm_client
|
|
358
|
+
self.config = config or VerificationConfig()
|
|
359
|
+
|
|
360
|
+
def retrieve_verified(
|
|
361
|
+
self,
|
|
362
|
+
query: str,
|
|
363
|
+
agent: str,
|
|
364
|
+
project_id: str,
|
|
365
|
+
ground_truth_sources: Optional[List[str]] = None,
|
|
366
|
+
cross_verify: Optional[bool] = None,
|
|
367
|
+
top_k: int = 5,
|
|
368
|
+
**retrieval_kwargs: Any,
|
|
369
|
+
) -> VerifiedResults:
|
|
370
|
+
"""
|
|
371
|
+
Two-stage retrieval with verification.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
query: Query string for retrieval
|
|
375
|
+
agent: Agent requesting memories
|
|
376
|
+
project_id: Project context
|
|
377
|
+
ground_truth_sources: Optional authoritative sources for verification
|
|
378
|
+
cross_verify: Whether to cross-verify (None = use config default)
|
|
379
|
+
top_k: Number of final results to return
|
|
380
|
+
**retrieval_kwargs: Additional arguments for retrieval engine
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
VerifiedResults with categorized memories
|
|
384
|
+
"""
|
|
385
|
+
start_time = time.time()
|
|
386
|
+
|
|
387
|
+
# Stage 1: Fuzzy recall with expanded candidates
|
|
388
|
+
recall_k = top_k * self.config.expand_candidates_factor
|
|
389
|
+
retrieval_kwargs["top_k"] = recall_k
|
|
390
|
+
retrieval_kwargs["agent"] = agent
|
|
391
|
+
retrieval_kwargs["project_id"] = project_id
|
|
392
|
+
|
|
393
|
+
# Call retrieval engine
|
|
394
|
+
memory_slice = self.retrieval_engine.retrieve(query, **retrieval_kwargs)
|
|
395
|
+
|
|
396
|
+
# Extract all memories from slice
|
|
397
|
+
candidates = self._extract_candidates(memory_slice)
|
|
398
|
+
|
|
399
|
+
# Stage 2: Verification
|
|
400
|
+
results = self._verify_candidates(
|
|
401
|
+
candidates=candidates,
|
|
402
|
+
ground_truth_sources=ground_truth_sources,
|
|
403
|
+
cross_verify=cross_verify,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Limit results to top_k per category
|
|
407
|
+
results.verified = results.verified[:top_k]
|
|
408
|
+
results.uncertain = results.uncertain[:top_k]
|
|
409
|
+
results.contradicted = results.contradicted[:top_k]
|
|
410
|
+
results.unverifiable = results.unverifiable[:top_k]
|
|
411
|
+
|
|
412
|
+
# Record metadata
|
|
413
|
+
total_time_ms = int((time.time() - start_time) * 1000)
|
|
414
|
+
results.metadata["total_verification_time_ms"] = total_time_ms
|
|
415
|
+
results.metadata["total_candidates"] = len(candidates)
|
|
416
|
+
results.metadata["query"] = query
|
|
417
|
+
results.metadata["top_k"] = top_k
|
|
418
|
+
|
|
419
|
+
return results
|
|
420
|
+
|
|
421
|
+
def _extract_candidates(self, memory_slice: Any) -> List[Any]:
|
|
422
|
+
"""Extract all memory objects from a MemorySlice or similar container."""
|
|
423
|
+
candidates = []
|
|
424
|
+
|
|
425
|
+
# Handle MemorySlice structure
|
|
426
|
+
if hasattr(memory_slice, "heuristics"):
|
|
427
|
+
candidates.extend(memory_slice.heuristics or [])
|
|
428
|
+
if hasattr(memory_slice, "outcomes"):
|
|
429
|
+
candidates.extend(memory_slice.outcomes or [])
|
|
430
|
+
if hasattr(memory_slice, "knowledge"):
|
|
431
|
+
candidates.extend(memory_slice.knowledge or [])
|
|
432
|
+
if hasattr(memory_slice, "anti_patterns"):
|
|
433
|
+
candidates.extend(memory_slice.anti_patterns or [])
|
|
434
|
+
if hasattr(memory_slice, "preferences"):
|
|
435
|
+
candidates.extend(memory_slice.preferences or [])
|
|
436
|
+
|
|
437
|
+
# Fallback for list-like containers
|
|
438
|
+
if not candidates and hasattr(memory_slice, "__iter__"):
|
|
439
|
+
candidates = list(memory_slice)
|
|
440
|
+
|
|
441
|
+
return candidates
|
|
442
|
+
|
|
443
|
+
def _verify_candidates(
|
|
444
|
+
self,
|
|
445
|
+
candidates: List[Any],
|
|
446
|
+
ground_truth_sources: Optional[List[str]],
|
|
447
|
+
cross_verify: Optional[bool],
|
|
448
|
+
) -> VerifiedResults:
|
|
449
|
+
"""
|
|
450
|
+
Verify each candidate and categorize.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
candidates: List of memory candidates
|
|
454
|
+
ground_truth_sources: Optional authoritative sources
|
|
455
|
+
cross_verify: Whether to use cross-verification
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
VerifiedResults with categorized memories
|
|
459
|
+
"""
|
|
460
|
+
results = VerifiedResults()
|
|
461
|
+
|
|
462
|
+
# Determine verification method
|
|
463
|
+
use_cross_verify = (
|
|
464
|
+
cross_verify
|
|
465
|
+
if cross_verify is not None
|
|
466
|
+
else self.config.default_method == "cross_verify"
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
for candidate in candidates:
|
|
470
|
+
# Get retrieval score if available
|
|
471
|
+
retrieval_score = self._get_retrieval_score(candidate)
|
|
472
|
+
|
|
473
|
+
# Verify the candidate
|
|
474
|
+
if ground_truth_sources:
|
|
475
|
+
verification = self._verify_against_sources(
|
|
476
|
+
candidate, ground_truth_sources
|
|
477
|
+
)
|
|
478
|
+
elif use_cross_verify and self.llm:
|
|
479
|
+
verification = self._cross_verify(candidate, candidates)
|
|
480
|
+
else:
|
|
481
|
+
verification = self._confidence_fallback(candidate)
|
|
482
|
+
|
|
483
|
+
# Create verified memory
|
|
484
|
+
vm = VerifiedMemory(
|
|
485
|
+
memory=candidate,
|
|
486
|
+
verification=verification,
|
|
487
|
+
retrieval_score=retrieval_score,
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
# Categorize by status
|
|
491
|
+
if verification.status == VerificationStatus.VERIFIED:
|
|
492
|
+
results.verified.append(vm)
|
|
493
|
+
elif verification.status == VerificationStatus.UNCERTAIN:
|
|
494
|
+
results.uncertain.append(vm)
|
|
495
|
+
elif verification.status == VerificationStatus.CONTRADICTED:
|
|
496
|
+
results.contradicted.append(vm)
|
|
497
|
+
else:
|
|
498
|
+
results.unverifiable.append(vm)
|
|
499
|
+
|
|
500
|
+
# Sort by combined score within each category
|
|
501
|
+
results.sort_by_confidence()
|
|
502
|
+
|
|
503
|
+
return results
|
|
504
|
+
|
|
505
|
+
def _get_retrieval_score(self, memory: Any) -> float:
|
|
506
|
+
"""Extract retrieval/similarity score from memory."""
|
|
507
|
+
# Try common attribute names
|
|
508
|
+
for attr in ["similarity_score", "score", "relevance", "confidence"]:
|
|
509
|
+
if hasattr(memory, attr):
|
|
510
|
+
val = getattr(memory, attr)
|
|
511
|
+
if isinstance(val, (int, float)):
|
|
512
|
+
return float(val)
|
|
513
|
+
|
|
514
|
+
# Try metadata
|
|
515
|
+
if hasattr(memory, "metadata") and isinstance(memory.metadata, dict):
|
|
516
|
+
for key in ["similarity_score", "score", "relevance"]:
|
|
517
|
+
if key in memory.metadata:
|
|
518
|
+
return float(memory.metadata[key])
|
|
519
|
+
|
|
520
|
+
return 0.5 # Default middle score
|
|
521
|
+
|
|
522
|
+
def _get_memory_content(self, memory: Any) -> str:
|
|
523
|
+
"""Extract content string from memory for verification."""
|
|
524
|
+
# Try common content attributes
|
|
525
|
+
if hasattr(memory, "content"):
|
|
526
|
+
return str(memory.content)
|
|
527
|
+
if hasattr(memory, "fact"):
|
|
528
|
+
return str(memory.fact)
|
|
529
|
+
if hasattr(memory, "strategy"):
|
|
530
|
+
return f"{getattr(memory, 'condition', '')}: {memory.strategy}"
|
|
531
|
+
if hasattr(memory, "task_description"):
|
|
532
|
+
return str(memory.task_description)
|
|
533
|
+
if hasattr(memory, "preference"):
|
|
534
|
+
return str(memory.preference)
|
|
535
|
+
if hasattr(memory, "pattern"):
|
|
536
|
+
return str(memory.pattern)
|
|
537
|
+
|
|
538
|
+
# Fallback to string representation
|
|
539
|
+
return str(memory)
|
|
540
|
+
|
|
541
|
+
def _get_memory_id(self, memory: Any) -> str:
|
|
542
|
+
"""Get memory ID for comparison."""
|
|
543
|
+
if hasattr(memory, "id"):
|
|
544
|
+
return str(memory.id)
|
|
545
|
+
return str(id(memory))
|
|
546
|
+
|
|
547
|
+
def _verify_against_sources(
|
|
548
|
+
self,
|
|
549
|
+
memory: Any,
|
|
550
|
+
sources: List[str],
|
|
551
|
+
) -> Verification:
|
|
552
|
+
"""
|
|
553
|
+
Verify memory against authoritative sources using LLM.
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
memory: Memory to verify
|
|
557
|
+
sources: List of authoritative source strings
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
Verification result
|
|
561
|
+
"""
|
|
562
|
+
start_time = time.time()
|
|
563
|
+
|
|
564
|
+
if not self.llm:
|
|
565
|
+
return Verification(
|
|
566
|
+
status=VerificationStatus.UNVERIFIABLE,
|
|
567
|
+
confidence=0.5,
|
|
568
|
+
reason="No LLM available for ground truth verification",
|
|
569
|
+
method=VerificationMethod.NONE,
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
content = self._get_memory_content(memory)
|
|
573
|
+
limited_sources = sources[: self.config.max_sources_for_verification]
|
|
574
|
+
|
|
575
|
+
prompt = f"""Verify if this memory is consistent with the authoritative sources.
|
|
576
|
+
|
|
577
|
+
Memory to verify:
|
|
578
|
+
{content}
|
|
579
|
+
|
|
580
|
+
Authoritative sources:
|
|
581
|
+
{chr(10).join(f"- {s}" for s in limited_sources)}
|
|
582
|
+
|
|
583
|
+
Respond in this exact format (no other text):
|
|
584
|
+
STATUS: verified|contradicted|uncertain
|
|
585
|
+
CONFIDENCE: 0.0-1.0
|
|
586
|
+
REASON: Brief explanation (one sentence)
|
|
587
|
+
CONTRADICTION: (only if STATUS is contradicted) What specifically contradicts it"""
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
response = self.llm.complete(
|
|
591
|
+
prompt, timeout=self.config.llm_timeout_seconds
|
|
592
|
+
)
|
|
593
|
+
verification = self._parse_verification_response(response)
|
|
594
|
+
verification.method = VerificationMethod.GROUND_TRUTH
|
|
595
|
+
verification.verification_time_ms = int((time.time() - start_time) * 1000)
|
|
596
|
+
return verification
|
|
597
|
+
except Exception as e:
|
|
598
|
+
logger.warning(f"Ground truth verification failed: {e}")
|
|
599
|
+
return Verification(
|
|
600
|
+
status=VerificationStatus.UNVERIFIABLE,
|
|
601
|
+
confidence=0.5,
|
|
602
|
+
reason=f"Verification failed: {str(e)[:100]}",
|
|
603
|
+
method=VerificationMethod.GROUND_TRUTH,
|
|
604
|
+
verification_time_ms=int((time.time() - start_time) * 1000),
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
def _cross_verify(
|
|
608
|
+
self,
|
|
609
|
+
memory: Any,
|
|
610
|
+
all_candidates: List[Any],
|
|
611
|
+
) -> Verification:
|
|
612
|
+
"""
|
|
613
|
+
Cross-verify memory against other retrieved memories.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
memory: Memory to verify
|
|
617
|
+
all_candidates: All candidate memories
|
|
618
|
+
|
|
619
|
+
Returns:
|
|
620
|
+
Verification result
|
|
621
|
+
"""
|
|
622
|
+
start_time = time.time()
|
|
623
|
+
|
|
624
|
+
if not self.llm:
|
|
625
|
+
return self._confidence_fallback(memory)
|
|
626
|
+
|
|
627
|
+
memory_id = self._get_memory_id(memory)
|
|
628
|
+
content = self._get_memory_content(memory)
|
|
629
|
+
|
|
630
|
+
# Get other memories for comparison
|
|
631
|
+
others = [m for m in all_candidates if self._get_memory_id(m) != memory_id][
|
|
632
|
+
: self.config.max_memories_for_cross_verify
|
|
633
|
+
]
|
|
634
|
+
|
|
635
|
+
if not others:
|
|
636
|
+
return Verification(
|
|
637
|
+
status=VerificationStatus.UNVERIFIABLE,
|
|
638
|
+
confidence=0.5,
|
|
639
|
+
reason="No other memories to cross-verify against",
|
|
640
|
+
method=VerificationMethod.CROSS_VERIFY,
|
|
641
|
+
verification_time_ms=int((time.time() - start_time) * 1000),
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
other_contents = [self._get_memory_content(m) for m in others]
|
|
645
|
+
|
|
646
|
+
prompt = f"""Check if this memory is consistent with related memories.
|
|
647
|
+
|
|
648
|
+
Memory to verify:
|
|
649
|
+
{content}
|
|
650
|
+
|
|
651
|
+
Related memories:
|
|
652
|
+
{chr(10).join(f"- {c}" for c in other_contents)}
|
|
653
|
+
|
|
654
|
+
Respond in this exact format (no other text):
|
|
655
|
+
STATUS: verified|contradicted|uncertain
|
|
656
|
+
CONFIDENCE: 0.0-1.0
|
|
657
|
+
REASON: Brief explanation (one sentence)"""
|
|
658
|
+
|
|
659
|
+
try:
|
|
660
|
+
response = self.llm.complete(
|
|
661
|
+
prompt, timeout=self.config.llm_timeout_seconds
|
|
662
|
+
)
|
|
663
|
+
verification = self._parse_verification_response(response)
|
|
664
|
+
verification.method = VerificationMethod.CROSS_VERIFY
|
|
665
|
+
verification.verification_time_ms = int((time.time() - start_time) * 1000)
|
|
666
|
+
return verification
|
|
667
|
+
except Exception as e:
|
|
668
|
+
logger.warning(f"Cross-verification failed: {e}")
|
|
669
|
+
# Fallback to confidence-based
|
|
670
|
+
fallback = self._confidence_fallback(memory)
|
|
671
|
+
fallback.verification_time_ms = int((time.time() - start_time) * 1000)
|
|
672
|
+
return fallback
|
|
673
|
+
|
|
674
|
+
def _confidence_fallback(self, memory: Any) -> Verification:
|
|
675
|
+
"""
|
|
676
|
+
Confidence-based verification fallback (no LLM required).
|
|
677
|
+
|
|
678
|
+
Uses the memory's confidence score to determine verification status.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
memory: Memory to verify
|
|
682
|
+
|
|
683
|
+
Returns:
|
|
684
|
+
Verification result
|
|
685
|
+
"""
|
|
686
|
+
# Get confidence from memory
|
|
687
|
+
confidence = 0.5
|
|
688
|
+
if hasattr(memory, "confidence"):
|
|
689
|
+
confidence = float(memory.confidence)
|
|
690
|
+
elif hasattr(memory, "metadata") and isinstance(memory.metadata, dict):
|
|
691
|
+
confidence = float(memory.metadata.get("confidence", 0.5))
|
|
692
|
+
|
|
693
|
+
if confidence >= self.config.confidence_threshold:
|
|
694
|
+
return Verification(
|
|
695
|
+
status=VerificationStatus.VERIFIED,
|
|
696
|
+
confidence=confidence,
|
|
697
|
+
reason=f"High confidence score ({confidence:.2f} >= {self.config.confidence_threshold})",
|
|
698
|
+
method=VerificationMethod.CONFIDENCE,
|
|
699
|
+
)
|
|
700
|
+
elif confidence >= self.config.confidence_threshold * 0.5:
|
|
701
|
+
return Verification(
|
|
702
|
+
status=VerificationStatus.UNCERTAIN,
|
|
703
|
+
confidence=confidence,
|
|
704
|
+
reason=f"Moderate confidence score ({confidence:.2f})",
|
|
705
|
+
method=VerificationMethod.CONFIDENCE,
|
|
706
|
+
)
|
|
707
|
+
else:
|
|
708
|
+
return Verification(
|
|
709
|
+
status=VerificationStatus.UNCERTAIN,
|
|
710
|
+
confidence=confidence,
|
|
711
|
+
reason=f"Low confidence score ({confidence:.2f})",
|
|
712
|
+
method=VerificationMethod.CONFIDENCE,
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
def _parse_verification_response(self, response: str) -> Verification:
|
|
716
|
+
"""
|
|
717
|
+
Parse LLM verification response into Verification object.
|
|
718
|
+
|
|
719
|
+
Expected format:
|
|
720
|
+
STATUS: verified|contradicted|uncertain
|
|
721
|
+
CONFIDENCE: 0.0-1.0
|
|
722
|
+
REASON: Brief explanation
|
|
723
|
+
CONTRADICTION: (optional) What contradicts it
|
|
724
|
+
|
|
725
|
+
Args:
|
|
726
|
+
response: LLM response string
|
|
727
|
+
|
|
728
|
+
Returns:
|
|
729
|
+
Verification object
|
|
730
|
+
"""
|
|
731
|
+
lines = response.strip().split("\n")
|
|
732
|
+
result = {
|
|
733
|
+
"status": "uncertain",
|
|
734
|
+
"confidence": 0.5,
|
|
735
|
+
"reason": "Unable to parse verification response",
|
|
736
|
+
"contradiction": None,
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
for line in lines:
|
|
740
|
+
line = line.strip()
|
|
741
|
+
if line.upper().startswith("STATUS:"):
|
|
742
|
+
status_str = line.split(":", 1)[1].strip().lower()
|
|
743
|
+
if status_str in ("verified", "contradicted", "uncertain"):
|
|
744
|
+
result["status"] = status_str
|
|
745
|
+
elif line.upper().startswith("CONFIDENCE:"):
|
|
746
|
+
try:
|
|
747
|
+
conf_str = line.split(":", 1)[1].strip()
|
|
748
|
+
# Extract number from string
|
|
749
|
+
match = re.search(r"[\d.]+", conf_str)
|
|
750
|
+
if match:
|
|
751
|
+
result["confidence"] = float(match.group())
|
|
752
|
+
except (ValueError, IndexError):
|
|
753
|
+
pass
|
|
754
|
+
elif line.upper().startswith("REASON:"):
|
|
755
|
+
result["reason"] = line.split(":", 1)[1].strip()
|
|
756
|
+
elif line.upper().startswith("CONTRADICTION:"):
|
|
757
|
+
result["contradiction"] = line.split(":", 1)[1].strip()
|
|
758
|
+
|
|
759
|
+
# Map status string to enum
|
|
760
|
+
status_map = {
|
|
761
|
+
"verified": VerificationStatus.VERIFIED,
|
|
762
|
+
"contradicted": VerificationStatus.CONTRADICTED,
|
|
763
|
+
"uncertain": VerificationStatus.UNCERTAIN,
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
return Verification(
|
|
767
|
+
status=status_map.get(result["status"], VerificationStatus.UNCERTAIN),
|
|
768
|
+
confidence=max(0.0, min(1.0, result["confidence"])),
|
|
769
|
+
reason=result["reason"],
|
|
770
|
+
contradicting_source=result["contradiction"],
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def create_verified_retriever(
|
|
775
|
+
retrieval_engine: Any,
|
|
776
|
+
llm_client: Optional[LLMClient] = None,
|
|
777
|
+
config: Optional[Union[VerificationConfig, Dict[str, Any]]] = None,
|
|
778
|
+
) -> VerifiedRetriever:
|
|
779
|
+
"""
|
|
780
|
+
Factory function to create a VerifiedRetriever.
|
|
781
|
+
|
|
782
|
+
Args:
|
|
783
|
+
retrieval_engine: RetrievalEngine or compatible retriever
|
|
784
|
+
llm_client: Optional LLM client for verification
|
|
785
|
+
config: Configuration dict or VerificationConfig
|
|
786
|
+
|
|
787
|
+
Returns:
|
|
788
|
+
Configured VerifiedRetriever
|
|
789
|
+
"""
|
|
790
|
+
if isinstance(config, dict):
|
|
791
|
+
config = VerificationConfig.from_dict(config)
|
|
792
|
+
|
|
793
|
+
return VerifiedRetriever(
|
|
794
|
+
retrieval_engine=retrieval_engine,
|
|
795
|
+
llm_client=llm_client,
|
|
796
|
+
config=config,
|
|
797
|
+
)
|