AbstractMemory 0.0.1__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractmemory/__init__.py +744 -31
- abstractmemory/cognitive/__init__.py +1 -0
- abstractmemory/components/__init__.py +1 -0
- abstractmemory/components/core.py +112 -0
- abstractmemory/components/episodic.py +68 -0
- abstractmemory/components/semantic.py +102 -0
- abstractmemory/components/working.py +50 -0
- abstractmemory/core/__init__.py +1 -0
- abstractmemory/core/interfaces.py +95 -0
- abstractmemory/core/temporal.py +100 -0
- abstractmemory/embeddings/__init__.py +317 -0
- abstractmemory/graph/__init__.py +1 -0
- abstractmemory/graph/knowledge_graph.py +178 -0
- abstractmemory/simple.py +151 -0
- abstractmemory/storage/__init__.py +16 -0
- abstractmemory/storage/dual_manager.py +269 -0
- abstractmemory/storage/lancedb_storage.py +544 -0
- abstractmemory/storage/markdown_storage.py +447 -0
- abstractmemory-0.2.1.dist-info/METADATA +460 -0
- abstractmemory-0.2.1.dist-info/RECORD +23 -0
- {abstractmemory-0.0.1.dist-info → abstractmemory-0.2.1.dist-info}/licenses/LICENSE +4 -1
- abstractmemory-0.0.1.dist-info/METADATA +0 -94
- abstractmemory-0.0.1.dist-info/RECORD +0 -6
- {abstractmemory-0.0.1.dist-info → abstractmemory-0.2.1.dist-info}/WHEEL +0 -0
- {abstractmemory-0.0.1.dist-info → abstractmemory-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Real embedding providers for semantic search capabilities.
|
|
3
|
+
|
|
4
|
+
Provides a unified interface for generating embeddings from real providers:
|
|
5
|
+
AbstractCore EmbeddingManager, OpenAI, Ollama with semantic capabilities.
|
|
6
|
+
|
|
7
|
+
NO FALLBACKS - only real semantic embedding providers are supported.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import hashlib
|
|
12
|
+
from typing import List, Optional, Any, Union
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EmbeddingAdapter:
|
|
19
|
+
"""
|
|
20
|
+
Unified embedding interface for various providers.
|
|
21
|
+
|
|
22
|
+
IMPORTANT: The embedding provider/model must remain consistent within a storage space.
|
|
23
|
+
You can change LLM providers freely, but changing embedding models requires recreating
|
|
24
|
+
your vector database as different models produce incompatible vector spaces.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, provider: Optional[Any] = None):
|
|
28
|
+
"""
|
|
29
|
+
Initialize embedding adapter.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
provider: Embedding provider (AbstractCore EmbeddingManager, OpenAI client, etc.)
|
|
33
|
+
Note: This is for EMBEDDINGS only, not LLM text generation.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ValueError: If no embedding provider is available
|
|
37
|
+
"""
|
|
38
|
+
if provider is None:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
"AbstractMemory semantic search requires a dedicated embedding provider. "
|
|
41
|
+
"This is separate from your LLM provider for text generation. "
|
|
42
|
+
"Please provide: AbstractCore EmbeddingManager, OpenAI client, Ollama with embeddings, etc."
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
self.provider = provider
|
|
46
|
+
self.provider_type = self._detect_provider_type()
|
|
47
|
+
self.embedding_dimension = self._get_embedding_dimension()
|
|
48
|
+
self.model_info = self._get_model_info()
|
|
49
|
+
|
|
50
|
+
def _detect_provider_type(self) -> str:
|
|
51
|
+
"""Detect the type of provider and its embedding capabilities."""
|
|
52
|
+
# Check for AbstractCore EmbeddingManager (preferred)
|
|
53
|
+
try:
|
|
54
|
+
from abstractllm.embeddings import EmbeddingManager
|
|
55
|
+
if isinstance(self.provider, EmbeddingManager):
|
|
56
|
+
return "abstractcore_embeddings"
|
|
57
|
+
except ImportError:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
# Check for AbstractCore provider with embedding support (has specific AbstractCore attributes)
|
|
61
|
+
if hasattr(self.provider, 'generate_embedding') and hasattr(self.provider, 'provider_name'):
|
|
62
|
+
return "abstractcore"
|
|
63
|
+
|
|
64
|
+
# Check for OpenAI client
|
|
65
|
+
if hasattr(self.provider, 'embeddings'):
|
|
66
|
+
return "openai"
|
|
67
|
+
|
|
68
|
+
# Check for provider name attribute (Ollama, MLX, etc.)
|
|
69
|
+
if hasattr(self.provider, 'provider_name'):
|
|
70
|
+
provider_name = getattr(self.provider, 'provider_name', '').lower()
|
|
71
|
+
if 'ollama' in provider_name:
|
|
72
|
+
return "ollama"
|
|
73
|
+
elif 'mlx' in provider_name:
|
|
74
|
+
return "mlx"
|
|
75
|
+
|
|
76
|
+
# Check if provider has generate_embedding method (generic embedding provider)
|
|
77
|
+
if hasattr(self.provider, 'generate_embedding') and callable(getattr(self.provider, 'generate_embedding')):
|
|
78
|
+
return "generic_embedding_provider"
|
|
79
|
+
|
|
80
|
+
# If we can't identify an embedding provider, raise an error
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Unable to identify an embedding provider from: {type(self.provider)}. "
|
|
83
|
+
"Supported providers: AbstractCore EmbeddingManager, OpenAI client, "
|
|
84
|
+
"Ollama with embeddings, or any object with 'generate_embedding()' method."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _get_embedding_dimension(self) -> int:
|
|
88
|
+
"""Get the embedding dimension based on provider type."""
|
|
89
|
+
if self.provider_type == "abstractcore_embeddings":
|
|
90
|
+
# Get dimension from a test embedding
|
|
91
|
+
try:
|
|
92
|
+
test_embedding = self.provider.embed("dimension_test")
|
|
93
|
+
return len(test_embedding)
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.error(f"Failed to get embedding dimension from AbstractCore: {e}")
|
|
96
|
+
raise ValueError("Unable to determine embedding dimension from AbstractCore provider")
|
|
97
|
+
elif self.provider_type == "openai":
|
|
98
|
+
return 1536 # text-embedding-3-small default
|
|
99
|
+
elif self.provider_type == "ollama":
|
|
100
|
+
# Try to get dimension from test embedding
|
|
101
|
+
try:
|
|
102
|
+
test_embedding = self._generate_ollama_embedding("dimension_test")
|
|
103
|
+
return len(test_embedding)
|
|
104
|
+
except:
|
|
105
|
+
return 1024 # Common Ollama embedding dimension
|
|
106
|
+
elif self.provider_type == "generic_embedding_provider":
|
|
107
|
+
# For any provider with generate_embedding method
|
|
108
|
+
try:
|
|
109
|
+
test_embedding = self.provider.generate_embedding("dimension_test")
|
|
110
|
+
return len(test_embedding)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.error(f"Failed to determine embedding dimension from generic provider: {e}")
|
|
113
|
+
raise ValueError(f"Unable to determine embedding dimension: {e}")
|
|
114
|
+
else:
|
|
115
|
+
# For any other provider, attempt to generate a test embedding
|
|
116
|
+
try:
|
|
117
|
+
test_embedding = self.generate_embedding("dimension_test")
|
|
118
|
+
return len(test_embedding)
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logger.error(f"Failed to determine embedding dimension: {e}")
|
|
121
|
+
raise ValueError(f"Unable to determine embedding dimension for provider type: {self.provider_type}")
|
|
122
|
+
|
|
123
|
+
def _get_model_info(self) -> dict:
|
|
124
|
+
"""Get detailed information about the embedding model for consistency tracking."""
|
|
125
|
+
info = {
|
|
126
|
+
"provider_type": self.provider_type,
|
|
127
|
+
"dimension": self.embedding_dimension,
|
|
128
|
+
"created_at": datetime.now().isoformat()
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if self.provider_type == "abstractcore_embeddings":
|
|
132
|
+
# Try to get model name from AbstractCore - only store serializable strings
|
|
133
|
+
try:
|
|
134
|
+
if hasattr(self.provider, 'model'):
|
|
135
|
+
model_attr = getattr(self.provider, 'model')
|
|
136
|
+
if isinstance(model_attr, str):
|
|
137
|
+
info["model_name"] = model_attr
|
|
138
|
+
else:
|
|
139
|
+
# Get string representation of the model
|
|
140
|
+
info["model_name"] = str(type(model_attr).__name__)
|
|
141
|
+
if hasattr(self.provider, 'backend'):
|
|
142
|
+
info["backend"] = str(self.provider.backend)
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.debug(f"Could not extract model info: {e}")
|
|
145
|
+
elif self.provider_type == "openai":
|
|
146
|
+
info["model_name"] = "text-embedding-3-small" # Default assumption
|
|
147
|
+
|
|
148
|
+
return info
|
|
149
|
+
|
|
150
|
+
def generate_embedding(self, text: str) -> List[float]:
|
|
151
|
+
"""
|
|
152
|
+
Generate embedding for the given text using the configured provider.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
text: Input text to embed
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
List[float]: Embedding vector
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
EmbeddingError: If embedding generation fails
|
|
162
|
+
"""
|
|
163
|
+
try:
|
|
164
|
+
if self.provider_type == "abstractcore_embeddings":
|
|
165
|
+
return self._generate_abstractcore_embeddings(text)
|
|
166
|
+
elif self.provider_type == "abstractcore":
|
|
167
|
+
return self._generate_abstractcore_embedding(text)
|
|
168
|
+
elif self.provider_type == "openai":
|
|
169
|
+
return self._generate_openai_embedding(text)
|
|
170
|
+
elif self.provider_type == "ollama":
|
|
171
|
+
return self._generate_ollama_embedding(text)
|
|
172
|
+
elif self.provider_type == "mlx":
|
|
173
|
+
return self._generate_mlx_embedding(text)
|
|
174
|
+
elif self.provider_type == "generic_embedding_provider":
|
|
175
|
+
return self.provider.generate_embedding(text)
|
|
176
|
+
else:
|
|
177
|
+
raise EmbeddingError(f"Unknown provider type: {self.provider_type}")
|
|
178
|
+
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.error(f"Embedding generation failed with {self.provider_type}: {e}")
|
|
181
|
+
raise EmbeddingError(f"Failed to generate embedding: {e}") from e
|
|
182
|
+
|
|
183
|
+
def _generate_abstractcore_embeddings(self, text: str) -> List[float]:
|
|
184
|
+
"""Generate embedding using AbstractCore EmbeddingManager."""
|
|
185
|
+
return self.provider.embed(text)
|
|
186
|
+
|
|
187
|
+
def _generate_abstractcore_embedding(self, text: str) -> List[float]:
|
|
188
|
+
"""Generate embedding using AbstractCore provider."""
|
|
189
|
+
return self.provider.generate_embedding(text)
|
|
190
|
+
|
|
191
|
+
def _generate_openai_embedding(self, text: str) -> List[float]:
|
|
192
|
+
"""Generate embedding using OpenAI client."""
|
|
193
|
+
response = self.provider.embeddings.create(
|
|
194
|
+
model="text-embedding-3-small",
|
|
195
|
+
input=text
|
|
196
|
+
)
|
|
197
|
+
return response.data[0].embedding
|
|
198
|
+
|
|
199
|
+
def _generate_ollama_embedding(self, text: str) -> List[float]:
|
|
200
|
+
"""Generate embedding using Ollama provider."""
|
|
201
|
+
try:
|
|
202
|
+
import requests
|
|
203
|
+
response = requests.post(
|
|
204
|
+
"http://localhost:11434/api/embeddings",
|
|
205
|
+
json={
|
|
206
|
+
"model": "nomic-embed-text",
|
|
207
|
+
"prompt": text
|
|
208
|
+
},
|
|
209
|
+
timeout=30
|
|
210
|
+
)
|
|
211
|
+
if response.status_code == 200:
|
|
212
|
+
embedding = response.json()["embedding"]
|
|
213
|
+
if embedding and isinstance(embedding, list):
|
|
214
|
+
return embedding
|
|
215
|
+
raise EmbeddingError(f"Ollama API returned status {response.status_code}")
|
|
216
|
+
except ImportError:
|
|
217
|
+
raise EmbeddingError("requests library not available for Ollama embedding API")
|
|
218
|
+
except Exception as e:
|
|
219
|
+
raise EmbeddingError(f"Ollama embedding generation failed: {e}") from e
|
|
220
|
+
|
|
221
|
+
def _generate_mlx_embedding(self, text: str) -> List[float]:
|
|
222
|
+
"""Generate embedding using MLX provider."""
|
|
223
|
+
# MLX provider should implement actual MLX embedding model
|
|
224
|
+
raise EmbeddingError(
|
|
225
|
+
"MLX embedding implementation not yet available. "
|
|
226
|
+
"Please use AbstractCore EmbeddingManager or another provider."
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def is_real_embedding(self) -> bool:
|
|
230
|
+
"""Check if this adapter provides real semantic embeddings."""
|
|
231
|
+
return self.provider_type in ["abstractcore_embeddings", "abstractcore", "openai", "ollama", "generic_embedding_provider"]
|
|
232
|
+
|
|
233
|
+
def get_embedding_info(self) -> dict:
|
|
234
|
+
"""Get comprehensive information about the embedding provider for consistency tracking."""
|
|
235
|
+
info = self.model_info.copy()
|
|
236
|
+
info.update({
|
|
237
|
+
"is_real_embedding": self.is_real_embedding(),
|
|
238
|
+
"provider_available": self.provider is not None
|
|
239
|
+
})
|
|
240
|
+
return info
|
|
241
|
+
|
|
242
|
+
def check_consistency_with(self, other_model_info: dict) -> bool:
|
|
243
|
+
"""
|
|
244
|
+
Check if this adapter is consistent with previously stored model info.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
other_model_info: Previously stored model information
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
bool: True if models are compatible for semantic search
|
|
251
|
+
"""
|
|
252
|
+
current_info = self.get_embedding_info()
|
|
253
|
+
|
|
254
|
+
# Check critical compatibility factors
|
|
255
|
+
if current_info.get("provider_type") != other_model_info.get("provider_type"):
|
|
256
|
+
logger.warning(f"Provider type mismatch: {current_info.get('provider_type')} vs {other_model_info.get('provider_type')}")
|
|
257
|
+
return False
|
|
258
|
+
|
|
259
|
+
if current_info.get("dimension") != other_model_info.get("dimension"):
|
|
260
|
+
logger.warning(f"Dimension mismatch: {current_info.get('dimension')} vs {other_model_info.get('dimension')}")
|
|
261
|
+
return False
|
|
262
|
+
|
|
263
|
+
if current_info.get("model_name") != other_model_info.get("model_name"):
|
|
264
|
+
logger.warning(f"Model name mismatch: {current_info.get('model_name')} vs {other_model_info.get('model_name')}")
|
|
265
|
+
return False
|
|
266
|
+
|
|
267
|
+
return True
|
|
268
|
+
|
|
269
|
+
def warn_about_consistency(self, stored_model_info: dict) -> None:
|
|
270
|
+
"""
|
|
271
|
+
Issue warnings about embedding model consistency issues.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
stored_model_info: Information about previously stored embeddings
|
|
275
|
+
"""
|
|
276
|
+
if not self.check_consistency_with(stored_model_info):
|
|
277
|
+
current_info = self.get_embedding_info()
|
|
278
|
+
logger.warning(
|
|
279
|
+
"\n" + "="*80 + "\n"
|
|
280
|
+
"🚨 CRITICAL: EMBEDDING MODEL INCONSISTENCY DETECTED 🚨\n"
|
|
281
|
+
"="*80 + "\n"
|
|
282
|
+
"You are attempting to use a different embedding model than what was\n"
|
|
283
|
+
"previously used in this storage space. This BREAKS semantic search!\n\n"
|
|
284
|
+
f"CURRENT embedding model:\n"
|
|
285
|
+
f" • Provider: {current_info.get('provider_type', 'Unknown')}\n"
|
|
286
|
+
f" • Model: {current_info.get('model_name', 'Unknown')}\n"
|
|
287
|
+
f" • Dimensions: {current_info.get('dimension', 'Unknown')}\n\n"
|
|
288
|
+
f"STORED embedding model:\n"
|
|
289
|
+
f" • Provider: {stored_model_info.get('provider_type', 'Unknown')}\n"
|
|
290
|
+
f" • Model: {stored_model_info.get('model_name', 'Unknown')}\n"
|
|
291
|
+
f" • Dimensions: {stored_model_info.get('dimension', 'Unknown')}\n\n"
|
|
292
|
+
"IMPORTANT: You can change LLM providers freely (Anthropic ↔ OpenAI ↔ Ollama)\n"
|
|
293
|
+
"but embedding models must remain consistent within a storage space.\n\n"
|
|
294
|
+
"TO FIX THIS ISSUE:\n"
|
|
295
|
+
"1. Use the SAME embedding model as stored, OR\n"
|
|
296
|
+
"2. Delete your vector database and start fresh with the new model\n"
|
|
297
|
+
" (this will re-embed all interactions with the new model)\n"
|
|
298
|
+
"="*80
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class EmbeddingError(Exception):
|
|
303
|
+
"""Base exception for embedding-related errors."""
|
|
304
|
+
pass
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def create_embedding_adapter(provider: Optional[Any] = None) -> EmbeddingAdapter:
|
|
308
|
+
"""
|
|
309
|
+
Create an embedding adapter for the given provider.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
provider: LLM provider instance
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
EmbeddingAdapter: Configured adapter
|
|
316
|
+
"""
|
|
317
|
+
return EmbeddingAdapter(provider)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Temporal knowledge graph implementation
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal knowledge graph implementation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import networkx as nx
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Dict, List, Optional, Tuple, Any
|
|
8
|
+
|
|
9
|
+
from abstractmemory.core.temporal import GroundingAnchor, TemporalSpan, RelationalContext
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TemporalKnowledgeGraph:
|
|
13
|
+
"""
|
|
14
|
+
Knowledge graph with bi-temporal modeling.
|
|
15
|
+
Based on Zep/Graphiti architecture.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
self.graph = nx.MultiDiGraph()
|
|
20
|
+
self._node_counter = 0
|
|
21
|
+
self._edge_counter = 0
|
|
22
|
+
self.ontology = {} # Auto-built ontology
|
|
23
|
+
|
|
24
|
+
def add_entity(self, value: str, entity_type: str = 'entity') -> str:
|
|
25
|
+
"""Add or get entity node"""
|
|
26
|
+
# Check for existing entity (deduplication)
|
|
27
|
+
for node_id, data in self.graph.nodes(data=True):
|
|
28
|
+
if data.get('value') == value:
|
|
29
|
+
# Update access time
|
|
30
|
+
self.graph.nodes[node_id]['last_accessed'] = datetime.now()
|
|
31
|
+
return node_id
|
|
32
|
+
|
|
33
|
+
# Create new entity
|
|
34
|
+
node_id = f"entity_{self._node_counter}"
|
|
35
|
+
self._node_counter += 1
|
|
36
|
+
|
|
37
|
+
self.graph.add_node(
|
|
38
|
+
node_id,
|
|
39
|
+
value=value,
|
|
40
|
+
type=entity_type,
|
|
41
|
+
created_at=datetime.now(),
|
|
42
|
+
last_accessed=datetime.now(),
|
|
43
|
+
importance=1.0
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Update ontology
|
|
47
|
+
if entity_type not in self.ontology:
|
|
48
|
+
self.ontology[entity_type] = []
|
|
49
|
+
self.ontology[entity_type].append(node_id)
|
|
50
|
+
|
|
51
|
+
return node_id
|
|
52
|
+
|
|
53
|
+
def add_fact(self, subject: str, predicate: str, object: str,
|
|
54
|
+
event_time: datetime, confidence: float = 1.0,
|
|
55
|
+
source: Optional[str] = None, ingestion_time: Optional[datetime] = None) -> str:
|
|
56
|
+
"""Add temporally anchored fact"""
|
|
57
|
+
|
|
58
|
+
# Get or create nodes
|
|
59
|
+
subj_id = self.add_entity(subject)
|
|
60
|
+
obj_id = self.add_entity(object)
|
|
61
|
+
|
|
62
|
+
# Create grounding anchor
|
|
63
|
+
anchor = GroundingAnchor(
|
|
64
|
+
event_time=event_time,
|
|
65
|
+
ingestion_time=ingestion_time or datetime.now(),
|
|
66
|
+
validity_span=TemporalSpan(start=event_time),
|
|
67
|
+
relational=RelationalContext(user_id="default"), # Will be updated when used in GroundedMemory
|
|
68
|
+
confidence=confidence,
|
|
69
|
+
source=source
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Check for contradictions
|
|
73
|
+
self._handle_contradictions(subj_id, predicate, obj_id, anchor)
|
|
74
|
+
|
|
75
|
+
# Add edge with temporal data
|
|
76
|
+
edge_id = f"edge_{self._edge_counter}"
|
|
77
|
+
self._edge_counter += 1
|
|
78
|
+
|
|
79
|
+
self.graph.add_edge(
|
|
80
|
+
subj_id, obj_id,
|
|
81
|
+
key=edge_id,
|
|
82
|
+
predicate=predicate,
|
|
83
|
+
anchor=anchor,
|
|
84
|
+
confidence=confidence,
|
|
85
|
+
valid=True
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return edge_id
|
|
89
|
+
|
|
90
|
+
def _handle_contradictions(self, subj_id: str, predicate: str,
|
|
91
|
+
obj_id: str, new_anchor: GroundingAnchor):
|
|
92
|
+
"""Handle temporal contradictions"""
|
|
93
|
+
# Check existing edges for contradictions
|
|
94
|
+
for _, _, key, data in self.graph.edges(subj_id, keys=True, data=True):
|
|
95
|
+
if data.get('predicate') == predicate and data.get('valid'):
|
|
96
|
+
old_anchor = data.get('anchor')
|
|
97
|
+
if old_anchor:
|
|
98
|
+
# Check for temporal overlap
|
|
99
|
+
if self._has_temporal_overlap(old_anchor, new_anchor):
|
|
100
|
+
# Invalidate older fact (new info takes precedence)
|
|
101
|
+
if old_anchor.ingestion_time < new_anchor.ingestion_time:
|
|
102
|
+
data['valid'] = False
|
|
103
|
+
old_anchor.validity_span.end = new_anchor.event_time
|
|
104
|
+
old_anchor.validity_span.valid = False
|
|
105
|
+
|
|
106
|
+
def _has_temporal_overlap(self, anchor1: GroundingAnchor,
|
|
107
|
+
anchor2: GroundingAnchor) -> bool:
|
|
108
|
+
"""Check if two anchors have temporal overlap"""
|
|
109
|
+
span1 = anchor1.validity_span
|
|
110
|
+
span2 = anchor2.validity_span
|
|
111
|
+
|
|
112
|
+
# If either span has no end, check if starts overlap
|
|
113
|
+
if span1.end is None or span2.end is None:
|
|
114
|
+
return True # Conservative: assume overlap
|
|
115
|
+
|
|
116
|
+
# Check for actual overlap
|
|
117
|
+
return not (span1.end < span2.start or span2.end < span1.start)
|
|
118
|
+
|
|
119
|
+
def query_at_time(self, query: str, point_in_time: datetime) -> List[Dict[str, Any]]:
|
|
120
|
+
"""Query knowledge state at specific time"""
|
|
121
|
+
results = []
|
|
122
|
+
|
|
123
|
+
for u, v, key, data in self.graph.edges(keys=True, data=True):
|
|
124
|
+
anchor = data.get('anchor')
|
|
125
|
+
if not anchor:
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
# Check if fact was known and valid at this time
|
|
129
|
+
if (anchor.ingestion_time <= point_in_time and
|
|
130
|
+
anchor.event_time <= point_in_time and
|
|
131
|
+
data.get('valid', True)): # Default to True if not explicitly set
|
|
132
|
+
|
|
133
|
+
# Check if still valid at query time
|
|
134
|
+
if (anchor.validity_span.end is None or
|
|
135
|
+
anchor.validity_span.end > point_in_time):
|
|
136
|
+
|
|
137
|
+
# Check if matches query
|
|
138
|
+
if query.lower() in data.get('predicate', '').lower():
|
|
139
|
+
results.append({
|
|
140
|
+
'subject': self.graph.nodes[u]['value'],
|
|
141
|
+
'predicate': data['predicate'],
|
|
142
|
+
'object': self.graph.nodes[v]['value'],
|
|
143
|
+
'confidence': data.get('confidence', 1.0),
|
|
144
|
+
'event_time': anchor.event_time,
|
|
145
|
+
'source': getattr(anchor, 'source', None)
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
return results
|
|
149
|
+
|
|
150
|
+
def get_entity_evolution(self, entity: str, start: datetime,
|
|
151
|
+
end: datetime) -> List[Dict[str, Any]]:
|
|
152
|
+
"""Track how entity's relationships evolved over time"""
|
|
153
|
+
# Find entity node
|
|
154
|
+
entity_id = None
|
|
155
|
+
for node_id, data in self.graph.nodes(data=True):
|
|
156
|
+
if data.get('value') == entity:
|
|
157
|
+
entity_id = node_id
|
|
158
|
+
break
|
|
159
|
+
|
|
160
|
+
if not entity_id:
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
evolution = []
|
|
164
|
+
|
|
165
|
+
# Check all edges involving this entity
|
|
166
|
+
for u, v, key, data in self.graph.edges(keys=True, data=True):
|
|
167
|
+
if u == entity_id or v == entity_id:
|
|
168
|
+
anchor = data.get('anchor')
|
|
169
|
+
if anchor and start <= anchor.event_time <= end:
|
|
170
|
+
evolution.append({
|
|
171
|
+
'time': anchor.event_time,
|
|
172
|
+
'type': 'fact_added' if data.get('valid') else 'fact_invalidated',
|
|
173
|
+
'subject': self.graph.nodes[u]['value'],
|
|
174
|
+
'predicate': data['predicate'],
|
|
175
|
+
'object': self.graph.nodes[v]['value']
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
return sorted(evolution, key=lambda x: x['time'])
|
abstractmemory/simple.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple, efficient memory for task-specific agents.
|
|
3
|
+
No over-engineering - just what's needed for the job.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Optional, Dict, Any
|
|
7
|
+
from collections import deque
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ScratchpadMemory:
|
|
12
|
+
"""
|
|
13
|
+
Lightweight memory for ReAct agents and single-task tools.
|
|
14
|
+
|
|
15
|
+
Use this for:
|
|
16
|
+
- ReAct agent thought-action-observation cycles
|
|
17
|
+
- Summarizer working memory
|
|
18
|
+
- Extractor temporary context
|
|
19
|
+
- Any agent that doesn't need persistence
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
# For a ReAct agent
|
|
23
|
+
scratchpad = ScratchpadMemory(max_entries=20)
|
|
24
|
+
scratchpad.add_thought("Need to search for Python tutorials")
|
|
25
|
+
scratchpad.add_action("search", {"query": "Python basics"})
|
|
26
|
+
scratchpad.add_observation("Found 10 relevant tutorials")
|
|
27
|
+
|
|
28
|
+
# Get full context for next iteration
|
|
29
|
+
context = scratchpad.get_context()
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, max_entries: int = 100):
|
|
33
|
+
"""Initialize scratchpad with bounded size"""
|
|
34
|
+
self.entries: deque = deque(maxlen=max_entries)
|
|
35
|
+
self.thoughts: List[str] = []
|
|
36
|
+
self.actions: List[Dict[str, Any]] = []
|
|
37
|
+
self.observations: List[str] = []
|
|
38
|
+
|
|
39
|
+
def add(self, content: str, entry_type: str = "note"):
|
|
40
|
+
"""Add any entry to scratchpad"""
|
|
41
|
+
entry = {
|
|
42
|
+
"type": entry_type,
|
|
43
|
+
"content": content,
|
|
44
|
+
"timestamp": datetime.now().isoformat()
|
|
45
|
+
}
|
|
46
|
+
self.entries.append(entry)
|
|
47
|
+
|
|
48
|
+
def add_thought(self, thought: str):
|
|
49
|
+
"""Add a thought (for ReAct pattern)"""
|
|
50
|
+
self.thoughts.append(thought)
|
|
51
|
+
self.add(thought, "thought")
|
|
52
|
+
|
|
53
|
+
def add_action(self, action: str, params: Optional[Dict] = None):
|
|
54
|
+
"""Add an action (for ReAct pattern)"""
|
|
55
|
+
action_entry = {"action": action, "params": params or {}}
|
|
56
|
+
self.actions.append(action_entry)
|
|
57
|
+
self.add(f"Action: {action} with {params}", "action")
|
|
58
|
+
|
|
59
|
+
def add_observation(self, observation: str):
|
|
60
|
+
"""Add an observation (for ReAct pattern)"""
|
|
61
|
+
self.observations.append(observation)
|
|
62
|
+
self.add(observation, "observation")
|
|
63
|
+
|
|
64
|
+
def get_context(self, last_n: Optional[int] = None) -> str:
|
|
65
|
+
"""Get scratchpad context as string"""
|
|
66
|
+
entries_to_use = list(self.entries)
|
|
67
|
+
if last_n:
|
|
68
|
+
entries_to_use = entries_to_use[-last_n:]
|
|
69
|
+
|
|
70
|
+
context_lines = []
|
|
71
|
+
for entry in entries_to_use:
|
|
72
|
+
if entry["type"] == "thought":
|
|
73
|
+
context_lines.append(f"Thought: {entry['content']}")
|
|
74
|
+
elif entry["type"] == "action":
|
|
75
|
+
context_lines.append(f"Action: {entry['content']}")
|
|
76
|
+
elif entry["type"] == "observation":
|
|
77
|
+
context_lines.append(f"Observation: {entry['content']}")
|
|
78
|
+
else:
|
|
79
|
+
context_lines.append(entry['content'])
|
|
80
|
+
|
|
81
|
+
return "\n".join(context_lines)
|
|
82
|
+
|
|
83
|
+
def get_react_history(self) -> Dict[str, List]:
|
|
84
|
+
"""Get structured ReAct history"""
|
|
85
|
+
return {
|
|
86
|
+
"thoughts": self.thoughts,
|
|
87
|
+
"actions": self.actions,
|
|
88
|
+
"observations": self.observations
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
def clear(self):
|
|
92
|
+
"""Clear the scratchpad"""
|
|
93
|
+
self.entries.clear()
|
|
94
|
+
self.thoughts.clear()
|
|
95
|
+
self.actions.clear()
|
|
96
|
+
self.observations.clear()
|
|
97
|
+
|
|
98
|
+
def __len__(self) -> int:
|
|
99
|
+
return len(self.entries)
|
|
100
|
+
|
|
101
|
+
def __str__(self) -> str:
|
|
102
|
+
return f"ScratchpadMemory({len(self.entries)} entries)"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class BufferMemory:
|
|
106
|
+
"""
|
|
107
|
+
Simple conversation buffer (wrapper around BasicSession).
|
|
108
|
+
|
|
109
|
+
Use this when BasicSession from AbstractLLM Core is sufficient.
|
|
110
|
+
This is just a thin adapter for compatibility.
|
|
111
|
+
|
|
112
|
+
Example:
|
|
113
|
+
# For a simple chatbot
|
|
114
|
+
memory = BufferMemory(max_messages=50)
|
|
115
|
+
memory.add_message("user", "What's the weather?")
|
|
116
|
+
memory.add_message("assistant", "I don't have weather data")
|
|
117
|
+
context = memory.get_context()
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(self, max_messages: int = 100):
|
|
121
|
+
"""Initialize buffer with size limit"""
|
|
122
|
+
self.messages: deque = deque(maxlen=max_messages)
|
|
123
|
+
|
|
124
|
+
def add_message(self, role: str, content: str):
|
|
125
|
+
"""Add a message to the buffer"""
|
|
126
|
+
self.messages.append({
|
|
127
|
+
"role": role,
|
|
128
|
+
"content": content,
|
|
129
|
+
"timestamp": datetime.now().isoformat()
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
def get_messages(self) -> List[Dict[str, str]]:
|
|
133
|
+
"""Get messages for LLM context"""
|
|
134
|
+
return [{"role": m["role"], "content": m["content"]}
|
|
135
|
+
for m in self.messages]
|
|
136
|
+
|
|
137
|
+
def get_context(self, last_n: Optional[int] = None) -> str:
|
|
138
|
+
"""Get conversation as formatted string"""
|
|
139
|
+
messages = list(self.messages)
|
|
140
|
+
if last_n:
|
|
141
|
+
messages = messages[-last_n:]
|
|
142
|
+
|
|
143
|
+
lines = []
|
|
144
|
+
for msg in messages:
|
|
145
|
+
lines.append(f"{msg['role']}: {msg['content']}")
|
|
146
|
+
|
|
147
|
+
return "\n".join(lines)
|
|
148
|
+
|
|
149
|
+
def clear(self):
|
|
150
|
+
"""Clear the buffer"""
|
|
151
|
+
self.messages.clear()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Storage backends for memory persistence
|
|
2
|
+
|
|
3
|
+
from .dual_manager import DualStorageManager
|
|
4
|
+
from .markdown_storage import MarkdownStorage
|
|
5
|
+
|
|
6
|
+
# LanceDB is optional
|
|
7
|
+
try:
|
|
8
|
+
from .lancedb_storage import LanceDBStorage
|
|
9
|
+
except ImportError:
|
|
10
|
+
LanceDBStorage = None
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
'DualStorageManager',
|
|
14
|
+
'MarkdownStorage',
|
|
15
|
+
'LanceDBStorage'
|
|
16
|
+
]
|