AbstractMemory 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractmemory/__init__.py +744 -31
- abstractmemory/cognitive/__init__.py +1 -0
- abstractmemory/components/__init__.py +1 -0
- abstractmemory/components/core.py +112 -0
- abstractmemory/components/episodic.py +68 -0
- abstractmemory/components/semantic.py +102 -0
- abstractmemory/components/working.py +50 -0
- abstractmemory/core/__init__.py +1 -0
- abstractmemory/core/interfaces.py +95 -0
- abstractmemory/core/temporal.py +100 -0
- abstractmemory/graph/__init__.py +1 -0
- abstractmemory/graph/knowledge_graph.py +178 -0
- abstractmemory/simple.py +151 -0
- abstractmemory/storage/__init__.py +16 -0
- abstractmemory/storage/dual_manager.py +278 -0
- abstractmemory/storage/lancedb_storage.py +425 -0
- abstractmemory/storage/markdown_storage.py +447 -0
- abstractmemory-0.1.0.dist-info/METADATA +331 -0
- abstractmemory-0.1.0.dist-info/RECORD +22 -0
- {abstractmemory-0.0.1.dist-info → abstractmemory-0.1.0.dist-info}/licenses/LICENSE +4 -1
- abstractmemory-0.0.1.dist-info/METADATA +0 -94
- abstractmemory-0.0.1.dist-info/RECORD +0 -6
- {abstractmemory-0.0.1.dist-info → abstractmemory-0.1.0.dist-info}/WHEEL +0 -0
- {abstractmemory-0.0.1.dist-info → abstractmemory-0.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LanceDB Storage Backend with SQL + Vector Search via AbstractCore embeddings.
|
|
3
|
+
Provides powerful querying capabilities for AI memory.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import uuid
|
|
7
|
+
from typing import Optional, Dict, List, Any
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from ..core.interfaces import IStorage
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import lancedb
|
|
17
|
+
LANCEDB_AVAILABLE = True
|
|
18
|
+
except ImportError:
|
|
19
|
+
LANCEDB_AVAILABLE = False
|
|
20
|
+
logger.warning("LanceDB not available. Install with: pip install lancedb")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LanceDBStorage(IStorage):
|
|
24
|
+
"""
|
|
25
|
+
LanceDB storage with vector embeddings from AbstractCore.
|
|
26
|
+
|
|
27
|
+
Tables:
|
|
28
|
+
- interactions: Verbatim user-agent interactions with embeddings
|
|
29
|
+
- experiential_notes: AI reflections and insights with embeddings
|
|
30
|
+
- links: Bidirectional relationships between interactions and notes
|
|
31
|
+
- memory_components: Snapshots of memory components
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, uri: str, embedding_provider: Optional[Any] = None):
|
|
35
|
+
"""
|
|
36
|
+
Initialize LanceDB storage.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
uri: LanceDB connection URI (e.g., "./lance.db")
|
|
40
|
+
embedding_provider: AbstractCore instance for generating embeddings
|
|
41
|
+
"""
|
|
42
|
+
if not LANCEDB_AVAILABLE:
|
|
43
|
+
raise ImportError("LanceDB is required but not installed. Install with: pip install lancedb")
|
|
44
|
+
|
|
45
|
+
self.uri = uri
|
|
46
|
+
self.embedding_provider = embedding_provider
|
|
47
|
+
self.db = lancedb.connect(uri)
|
|
48
|
+
|
|
49
|
+
# Initialize tables
|
|
50
|
+
self._init_tables()
|
|
51
|
+
|
|
52
|
+
def _init_tables(self):
|
|
53
|
+
"""Initialize LanceDB tables with schemas"""
|
|
54
|
+
|
|
55
|
+
# Interactions table schema
|
|
56
|
+
interactions_schema = [
|
|
57
|
+
{"name": "id", "type": "string"},
|
|
58
|
+
{"name": "user_id", "type": "string"},
|
|
59
|
+
{"name": "timestamp", "type": "timestamp"},
|
|
60
|
+
{"name": "user_input", "type": "string"},
|
|
61
|
+
{"name": "agent_response", "type": "string"},
|
|
62
|
+
{"name": "topic", "type": "string"},
|
|
63
|
+
{"name": "metadata", "type": "string"}, # JSON string
|
|
64
|
+
{"name": "embedding", "type": "vector"} # Vector embedding
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
# Experiential notes table schema
|
|
68
|
+
notes_schema = [
|
|
69
|
+
{"name": "id", "type": "string"},
|
|
70
|
+
{"name": "timestamp", "type": "timestamp"},
|
|
71
|
+
{"name": "reflection", "type": "string"},
|
|
72
|
+
{"name": "interaction_id", "type": "string"},
|
|
73
|
+
{"name": "note_type", "type": "string"},
|
|
74
|
+
{"name": "metadata", "type": "string"}, # JSON string
|
|
75
|
+
{"name": "embedding", "type": "vector"} # Vector embedding
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
# Links table schema
|
|
79
|
+
links_schema = [
|
|
80
|
+
{"name": "interaction_id", "type": "string"},
|
|
81
|
+
{"name": "note_id", "type": "string"},
|
|
82
|
+
{"name": "created", "type": "timestamp"},
|
|
83
|
+
{"name": "link_type", "type": "string"}
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
# Memory components table schema
|
|
87
|
+
components_schema = [
|
|
88
|
+
{"name": "component_name", "type": "string"},
|
|
89
|
+
{"name": "timestamp", "type": "timestamp"},
|
|
90
|
+
{"name": "data", "type": "string"}, # JSON string
|
|
91
|
+
{"name": "version", "type": "int64"}
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
# Create tables if they don't exist
|
|
95
|
+
try:
|
|
96
|
+
self.interactions_table = self.db.open_table("interactions")
|
|
97
|
+
except FileNotFoundError:
|
|
98
|
+
# Create empty table with schema
|
|
99
|
+
import pandas as pd
|
|
100
|
+
empty_df = pd.DataFrame(columns=[col["name"] for col in interactions_schema])
|
|
101
|
+
self.interactions_table = self.db.create_table("interactions", empty_df)
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
self.notes_table = self.db.open_table("experiential_notes")
|
|
105
|
+
except FileNotFoundError:
|
|
106
|
+
import pandas as pd
|
|
107
|
+
empty_df = pd.DataFrame(columns=[col["name"] for col in notes_schema])
|
|
108
|
+
self.notes_table = self.db.create_table("experiential_notes", empty_df)
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
self.links_table = self.db.open_table("links")
|
|
112
|
+
except FileNotFoundError:
|
|
113
|
+
import pandas as pd
|
|
114
|
+
empty_df = pd.DataFrame(columns=[col["name"] for col in links_schema])
|
|
115
|
+
self.links_table = self.db.create_table("links", empty_df)
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
self.components_table = self.db.open_table("memory_components")
|
|
119
|
+
except FileNotFoundError:
|
|
120
|
+
import pandas as pd
|
|
121
|
+
empty_df = pd.DataFrame(columns=[col["name"] for col in components_schema])
|
|
122
|
+
self.components_table = self.db.create_table("memory_components", empty_df)
|
|
123
|
+
|
|
124
|
+
def _generate_embedding(self, text: str) -> Optional[List[float]]:
|
|
125
|
+
"""Generate embedding using AbstractCore provider"""
|
|
126
|
+
if self.embedding_provider and hasattr(self.embedding_provider, 'generate_embedding'):
|
|
127
|
+
try:
|
|
128
|
+
return self.embedding_provider.generate_embedding(text)
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.error(f"Failed to generate embedding: {e}")
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
def save_interaction(self, user_id: str, timestamp: datetime,
|
|
134
|
+
user_input: str, agent_response: str,
|
|
135
|
+
topic: str, metadata: Optional[Dict] = None) -> str:
|
|
136
|
+
"""Save verbatim interaction with vector embedding"""
|
|
137
|
+
|
|
138
|
+
interaction_id = f"int_{uuid.uuid4().hex[:8]}"
|
|
139
|
+
|
|
140
|
+
# Generate embedding for the full interaction
|
|
141
|
+
interaction_text = f"{user_input} {agent_response}"
|
|
142
|
+
embedding = self._generate_embedding(interaction_text)
|
|
143
|
+
|
|
144
|
+
# Prepare data
|
|
145
|
+
import json
|
|
146
|
+
import pandas as pd
|
|
147
|
+
|
|
148
|
+
data = {
|
|
149
|
+
"id": interaction_id,
|
|
150
|
+
"user_id": user_id,
|
|
151
|
+
"timestamp": timestamp,
|
|
152
|
+
"user_input": user_input,
|
|
153
|
+
"agent_response": agent_response,
|
|
154
|
+
"topic": topic,
|
|
155
|
+
"metadata": json.dumps(metadata or {}),
|
|
156
|
+
"embedding": embedding or [0.0] * 384 # Default embedding size
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
# Insert into table
|
|
160
|
+
df = pd.DataFrame([data])
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
self.interactions_table.add(df)
|
|
164
|
+
logger.debug(f"Saved interaction {interaction_id} to LanceDB")
|
|
165
|
+
except Exception as e:
|
|
166
|
+
logger.error(f"Failed to save interaction to LanceDB: {e}")
|
|
167
|
+
raise
|
|
168
|
+
|
|
169
|
+
return interaction_id
|
|
170
|
+
|
|
171
|
+
def save_experiential_note(self, timestamp: datetime, reflection: str,
|
|
172
|
+
interaction_id: str, note_type: str = "reflection",
|
|
173
|
+
metadata: Optional[Dict] = None) -> str:
|
|
174
|
+
"""Save AI experiential note with vector embedding"""
|
|
175
|
+
|
|
176
|
+
note_id = f"note_{uuid.uuid4().hex[:8]}"
|
|
177
|
+
|
|
178
|
+
# Generate embedding for the reflection
|
|
179
|
+
embedding = self._generate_embedding(reflection)
|
|
180
|
+
|
|
181
|
+
# Prepare data
|
|
182
|
+
import json
|
|
183
|
+
import pandas as pd
|
|
184
|
+
|
|
185
|
+
data = {
|
|
186
|
+
"id": note_id,
|
|
187
|
+
"timestamp": timestamp,
|
|
188
|
+
"reflection": reflection,
|
|
189
|
+
"interaction_id": interaction_id,
|
|
190
|
+
"note_type": note_type,
|
|
191
|
+
"metadata": json.dumps(metadata or {}),
|
|
192
|
+
"embedding": embedding or [0.0] * 384 # Default embedding size
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
# Insert into table
|
|
196
|
+
df = pd.DataFrame([data])
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
self.notes_table.add(df)
|
|
200
|
+
logger.debug(f"Saved experiential note {note_id} to LanceDB")
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.error(f"Failed to save experiential note to LanceDB: {e}")
|
|
203
|
+
raise
|
|
204
|
+
|
|
205
|
+
return note_id
|
|
206
|
+
|
|
207
|
+
def link_interaction_to_note(self, interaction_id: str, note_id: str) -> None:
|
|
208
|
+
"""Create bidirectional link between interaction and note"""
|
|
209
|
+
|
|
210
|
+
import pandas as pd
|
|
211
|
+
|
|
212
|
+
link_data = {
|
|
213
|
+
"interaction_id": interaction_id,
|
|
214
|
+
"note_id": note_id,
|
|
215
|
+
"created": datetime.now(),
|
|
216
|
+
"link_type": "bidirectional"
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
df = pd.DataFrame([link_data])
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
self.links_table.add(df)
|
|
223
|
+
logger.debug(f"Created link between {interaction_id} and {note_id}")
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.error(f"Failed to create link in LanceDB: {e}")
|
|
226
|
+
|
|
227
|
+
def search_interactions(self, query: str, user_id: Optional[str] = None,
|
|
228
|
+
start_date: Optional[datetime] = None,
|
|
229
|
+
end_date: Optional[datetime] = None) -> List[Dict]:
|
|
230
|
+
"""
|
|
231
|
+
Search interactions using SQL filters and vector similarity.
|
|
232
|
+
|
|
233
|
+
Combines:
|
|
234
|
+
1. SQL filters for user_id, date range
|
|
235
|
+
2. Text search in user_input, agent_response, topic
|
|
236
|
+
3. Vector similarity search if embedding provider available
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
# Start with base query
|
|
241
|
+
query_parts = []
|
|
242
|
+
|
|
243
|
+
# Filter by user_id
|
|
244
|
+
if user_id:
|
|
245
|
+
query_parts.append(f"user_id = '{user_id}'")
|
|
246
|
+
|
|
247
|
+
# Filter by date range
|
|
248
|
+
if start_date:
|
|
249
|
+
query_parts.append(f"timestamp >= '{start_date.isoformat()}'")
|
|
250
|
+
if end_date:
|
|
251
|
+
query_parts.append(f"timestamp <= '{end_date.isoformat()}'")
|
|
252
|
+
|
|
253
|
+
# Build WHERE clause
|
|
254
|
+
where_clause = " AND ".join(query_parts) if query_parts else None
|
|
255
|
+
|
|
256
|
+
# Try vector search first if embedding provider available
|
|
257
|
+
if self.embedding_provider:
|
|
258
|
+
try:
|
|
259
|
+
query_embedding = self._generate_embedding(query)
|
|
260
|
+
if query_embedding:
|
|
261
|
+
# Vector similarity search
|
|
262
|
+
results = self.interactions_table.search(query_embedding).limit(50)
|
|
263
|
+
|
|
264
|
+
# Apply additional filters
|
|
265
|
+
if where_clause:
|
|
266
|
+
results = results.where(where_clause)
|
|
267
|
+
|
|
268
|
+
df = results.to_pandas()
|
|
269
|
+
|
|
270
|
+
return self._convert_df_to_dicts(df)
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.warning(f"Vector search failed, falling back to text search: {e}")
|
|
273
|
+
|
|
274
|
+
# Fallback to text search
|
|
275
|
+
search_conditions = []
|
|
276
|
+
query_lower = query.lower()
|
|
277
|
+
|
|
278
|
+
# Search in multiple text fields
|
|
279
|
+
search_conditions.extend([
|
|
280
|
+
f"LOWER(user_input) LIKE '%{query_lower}%'",
|
|
281
|
+
f"LOWER(agent_response) LIKE '%{query_lower}%'",
|
|
282
|
+
f"LOWER(topic) LIKE '%{query_lower}%'"
|
|
283
|
+
])
|
|
284
|
+
|
|
285
|
+
text_search = "(" + " OR ".join(search_conditions) + ")"
|
|
286
|
+
|
|
287
|
+
# Combine with other filters
|
|
288
|
+
if where_clause:
|
|
289
|
+
final_where = f"({where_clause}) AND {text_search}"
|
|
290
|
+
else:
|
|
291
|
+
final_where = text_search
|
|
292
|
+
|
|
293
|
+
# Execute search
|
|
294
|
+
df = self.interactions_table.search().where(final_where).limit(100).to_pandas()
|
|
295
|
+
|
|
296
|
+
return self._convert_df_to_dicts(df)
|
|
297
|
+
|
|
298
|
+
except Exception as e:
|
|
299
|
+
logger.error(f"Search failed in LanceDB: {e}")
|
|
300
|
+
return []
|
|
301
|
+
|
|
302
|
+
def _convert_df_to_dicts(self, df) -> List[Dict]:
|
|
303
|
+
"""Convert pandas DataFrame to list of dictionaries"""
|
|
304
|
+
import json
|
|
305
|
+
|
|
306
|
+
results = []
|
|
307
|
+
for _, row in df.iterrows():
|
|
308
|
+
try:
|
|
309
|
+
result = {
|
|
310
|
+
"id": row["id"],
|
|
311
|
+
"user_id": row["user_id"],
|
|
312
|
+
"timestamp": row["timestamp"].isoformat() if hasattr(row["timestamp"], 'isoformat') else str(row["timestamp"]),
|
|
313
|
+
"user_input": row["user_input"],
|
|
314
|
+
"agent_response": row["agent_response"],
|
|
315
|
+
"topic": row["topic"],
|
|
316
|
+
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
|
|
317
|
+
}
|
|
318
|
+
results.append(result)
|
|
319
|
+
except Exception as e:
|
|
320
|
+
logger.warning(f"Failed to convert row to dict: {e}")
|
|
321
|
+
continue
|
|
322
|
+
|
|
323
|
+
return results
|
|
324
|
+
|
|
325
|
+
# IStorage interface implementation
|
|
326
|
+
def save(self, key: str, value: Any) -> None:
|
|
327
|
+
"""Generic save for compatibility"""
|
|
328
|
+
if "/" in key:
|
|
329
|
+
component_name = key.split("/")[-1]
|
|
330
|
+
self.save_memory_component(component_name, value)
|
|
331
|
+
|
|
332
|
+
def load(self, key: str) -> Any:
|
|
333
|
+
"""Generic load for compatibility"""
|
|
334
|
+
if "/" in key:
|
|
335
|
+
component_name = key.split("/")[-1]
|
|
336
|
+
return self.load_memory_component(component_name)
|
|
337
|
+
return None
|
|
338
|
+
|
|
339
|
+
def exists(self, key: str) -> bool:
|
|
340
|
+
"""Check if key exists"""
|
|
341
|
+
if "/" in key:
|
|
342
|
+
component_name = key.split("/")[-1]
|
|
343
|
+
try:
|
|
344
|
+
df = self.components_table.search().where(f"component_name = '{component_name}'").limit(1).to_pandas()
|
|
345
|
+
return len(df) > 0
|
|
346
|
+
except:
|
|
347
|
+
return False
|
|
348
|
+
return False
|
|
349
|
+
|
|
350
|
+
def save_memory_component(self, component_name: str, component_data: Any) -> None:
|
|
351
|
+
"""Save memory component to LanceDB"""
|
|
352
|
+
import json
|
|
353
|
+
import pandas as pd
|
|
354
|
+
|
|
355
|
+
# Convert component to JSON
|
|
356
|
+
if hasattr(component_data, '__dict__'):
|
|
357
|
+
data = component_data.__dict__
|
|
358
|
+
else:
|
|
359
|
+
data = component_data
|
|
360
|
+
|
|
361
|
+
# Get next version number
|
|
362
|
+
try:
|
|
363
|
+
existing = self.components_table.search().where(f"component_name = '{component_name}'").to_pandas()
|
|
364
|
+
version = existing["version"].max() + 1 if len(existing) > 0 else 1
|
|
365
|
+
except:
|
|
366
|
+
version = 1
|
|
367
|
+
|
|
368
|
+
component_record = {
|
|
369
|
+
"component_name": component_name,
|
|
370
|
+
"timestamp": datetime.now(),
|
|
371
|
+
"data": json.dumps(data, default=str),
|
|
372
|
+
"version": version
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
df = pd.DataFrame([component_record])
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
self.components_table.add(df)
|
|
379
|
+
logger.debug(f"Saved {component_name} component version {version} to LanceDB")
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.error(f"Failed to save {component_name} component: {e}")
|
|
382
|
+
|
|
383
|
+
def load_memory_component(self, component_name: str) -> Optional[Any]:
|
|
384
|
+
"""Load latest memory component from LanceDB"""
|
|
385
|
+
try:
|
|
386
|
+
import json
|
|
387
|
+
|
|
388
|
+
# Get latest version
|
|
389
|
+
df = self.components_table.search().where(f"component_name = '{component_name}'").to_pandas()
|
|
390
|
+
|
|
391
|
+
if len(df) == 0:
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
# Get the latest version
|
|
395
|
+
latest = df.loc[df["version"].idxmax()]
|
|
396
|
+
|
|
397
|
+
return json.loads(latest["data"])
|
|
398
|
+
|
|
399
|
+
except Exception as e:
|
|
400
|
+
logger.error(f"Failed to load {component_name} component: {e}")
|
|
401
|
+
return None
|
|
402
|
+
|
|
403
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
404
|
+
"""Get storage statistics"""
|
|
405
|
+
try:
|
|
406
|
+
interactions_count = len(self.interactions_table.search().limit(10000).to_pandas())
|
|
407
|
+
notes_count = len(self.notes_table.search().limit(10000).to_pandas())
|
|
408
|
+
links_count = len(self.links_table.search().limit(10000).to_pandas())
|
|
409
|
+
components_count = len(self.components_table.search().limit(1000).to_pandas())
|
|
410
|
+
|
|
411
|
+
return {
|
|
412
|
+
"total_interactions": interactions_count,
|
|
413
|
+
"total_notes": notes_count,
|
|
414
|
+
"total_links": links_count,
|
|
415
|
+
"total_components": components_count,
|
|
416
|
+
"uri": self.uri,
|
|
417
|
+
"embedding_provider_available": self.embedding_provider is not None
|
|
418
|
+
}
|
|
419
|
+
except Exception as e:
|
|
420
|
+
logger.error(f"Failed to get stats: {e}")
|
|
421
|
+
return {
|
|
422
|
+
"error": str(e),
|
|
423
|
+
"uri": self.uri,
|
|
424
|
+
"embedding_provider_available": self.embedding_provider is not None
|
|
425
|
+
}
|