AbstractMemory 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,425 @@
1
+ """
2
+ LanceDB Storage Backend with SQL + Vector Search via AbstractCore embeddings.
3
+ Provides powerful querying capabilities for AI memory.
4
+ """
5
+
6
+ import uuid
7
+ from typing import Optional, Dict, List, Any
8
+ from datetime import datetime
9
+ import logging
10
+
11
+ from ..core.interfaces import IStorage
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ try:
16
+ import lancedb
17
+ LANCEDB_AVAILABLE = True
18
+ except ImportError:
19
+ LANCEDB_AVAILABLE = False
20
+ logger.warning("LanceDB not available. Install with: pip install lancedb")
21
+
22
+
23
+ class LanceDBStorage(IStorage):
24
+ """
25
+ LanceDB storage with vector embeddings from AbstractCore.
26
+
27
+ Tables:
28
+ - interactions: Verbatim user-agent interactions with embeddings
29
+ - experiential_notes: AI reflections and insights with embeddings
30
+ - links: Bidirectional relationships between interactions and notes
31
+ - memory_components: Snapshots of memory components
32
+ """
33
+
34
+ def __init__(self, uri: str, embedding_provider: Optional[Any] = None):
35
+ """
36
+ Initialize LanceDB storage.
37
+
38
+ Args:
39
+ uri: LanceDB connection URI (e.g., "./lance.db")
40
+ embedding_provider: AbstractCore instance for generating embeddings
41
+ """
42
+ if not LANCEDB_AVAILABLE:
43
+ raise ImportError("LanceDB is required but not installed. Install with: pip install lancedb")
44
+
45
+ self.uri = uri
46
+ self.embedding_provider = embedding_provider
47
+ self.db = lancedb.connect(uri)
48
+
49
+ # Initialize tables
50
+ self._init_tables()
51
+
52
+ def _init_tables(self):
53
+ """Initialize LanceDB tables with schemas"""
54
+
55
+ # Interactions table schema
56
+ interactions_schema = [
57
+ {"name": "id", "type": "string"},
58
+ {"name": "user_id", "type": "string"},
59
+ {"name": "timestamp", "type": "timestamp"},
60
+ {"name": "user_input", "type": "string"},
61
+ {"name": "agent_response", "type": "string"},
62
+ {"name": "topic", "type": "string"},
63
+ {"name": "metadata", "type": "string"}, # JSON string
64
+ {"name": "embedding", "type": "vector"} # Vector embedding
65
+ ]
66
+
67
+ # Experiential notes table schema
68
+ notes_schema = [
69
+ {"name": "id", "type": "string"},
70
+ {"name": "timestamp", "type": "timestamp"},
71
+ {"name": "reflection", "type": "string"},
72
+ {"name": "interaction_id", "type": "string"},
73
+ {"name": "note_type", "type": "string"},
74
+ {"name": "metadata", "type": "string"}, # JSON string
75
+ {"name": "embedding", "type": "vector"} # Vector embedding
76
+ ]
77
+
78
+ # Links table schema
79
+ links_schema = [
80
+ {"name": "interaction_id", "type": "string"},
81
+ {"name": "note_id", "type": "string"},
82
+ {"name": "created", "type": "timestamp"},
83
+ {"name": "link_type", "type": "string"}
84
+ ]
85
+
86
+ # Memory components table schema
87
+ components_schema = [
88
+ {"name": "component_name", "type": "string"},
89
+ {"name": "timestamp", "type": "timestamp"},
90
+ {"name": "data", "type": "string"}, # JSON string
91
+ {"name": "version", "type": "int64"}
92
+ ]
93
+
94
+ # Create tables if they don't exist
95
+ try:
96
+ self.interactions_table = self.db.open_table("interactions")
97
+ except FileNotFoundError:
98
+ # Create empty table with schema
99
+ import pandas as pd
100
+ empty_df = pd.DataFrame(columns=[col["name"] for col in interactions_schema])
101
+ self.interactions_table = self.db.create_table("interactions", empty_df)
102
+
103
+ try:
104
+ self.notes_table = self.db.open_table("experiential_notes")
105
+ except FileNotFoundError:
106
+ import pandas as pd
107
+ empty_df = pd.DataFrame(columns=[col["name"] for col in notes_schema])
108
+ self.notes_table = self.db.create_table("experiential_notes", empty_df)
109
+
110
+ try:
111
+ self.links_table = self.db.open_table("links")
112
+ except FileNotFoundError:
113
+ import pandas as pd
114
+ empty_df = pd.DataFrame(columns=[col["name"] for col in links_schema])
115
+ self.links_table = self.db.create_table("links", empty_df)
116
+
117
+ try:
118
+ self.components_table = self.db.open_table("memory_components")
119
+ except FileNotFoundError:
120
+ import pandas as pd
121
+ empty_df = pd.DataFrame(columns=[col["name"] for col in components_schema])
122
+ self.components_table = self.db.create_table("memory_components", empty_df)
123
+
124
+ def _generate_embedding(self, text: str) -> Optional[List[float]]:
125
+ """Generate embedding using AbstractCore provider"""
126
+ if self.embedding_provider and hasattr(self.embedding_provider, 'generate_embedding'):
127
+ try:
128
+ return self.embedding_provider.generate_embedding(text)
129
+ except Exception as e:
130
+ logger.error(f"Failed to generate embedding: {e}")
131
+ return None
132
+
133
+ def save_interaction(self, user_id: str, timestamp: datetime,
134
+ user_input: str, agent_response: str,
135
+ topic: str, metadata: Optional[Dict] = None) -> str:
136
+ """Save verbatim interaction with vector embedding"""
137
+
138
+ interaction_id = f"int_{uuid.uuid4().hex[:8]}"
139
+
140
+ # Generate embedding for the full interaction
141
+ interaction_text = f"{user_input} {agent_response}"
142
+ embedding = self._generate_embedding(interaction_text)
143
+
144
+ # Prepare data
145
+ import json
146
+ import pandas as pd
147
+
148
+ data = {
149
+ "id": interaction_id,
150
+ "user_id": user_id,
151
+ "timestamp": timestamp,
152
+ "user_input": user_input,
153
+ "agent_response": agent_response,
154
+ "topic": topic,
155
+ "metadata": json.dumps(metadata or {}),
156
+ "embedding": embedding or [0.0] * 384 # Default embedding size
157
+ }
158
+
159
+ # Insert into table
160
+ df = pd.DataFrame([data])
161
+
162
+ try:
163
+ self.interactions_table.add(df)
164
+ logger.debug(f"Saved interaction {interaction_id} to LanceDB")
165
+ except Exception as e:
166
+ logger.error(f"Failed to save interaction to LanceDB: {e}")
167
+ raise
168
+
169
+ return interaction_id
170
+
171
+ def save_experiential_note(self, timestamp: datetime, reflection: str,
172
+ interaction_id: str, note_type: str = "reflection",
173
+ metadata: Optional[Dict] = None) -> str:
174
+ """Save AI experiential note with vector embedding"""
175
+
176
+ note_id = f"note_{uuid.uuid4().hex[:8]}"
177
+
178
+ # Generate embedding for the reflection
179
+ embedding = self._generate_embedding(reflection)
180
+
181
+ # Prepare data
182
+ import json
183
+ import pandas as pd
184
+
185
+ data = {
186
+ "id": note_id,
187
+ "timestamp": timestamp,
188
+ "reflection": reflection,
189
+ "interaction_id": interaction_id,
190
+ "note_type": note_type,
191
+ "metadata": json.dumps(metadata or {}),
192
+ "embedding": embedding or [0.0] * 384 # Default embedding size
193
+ }
194
+
195
+ # Insert into table
196
+ df = pd.DataFrame([data])
197
+
198
+ try:
199
+ self.notes_table.add(df)
200
+ logger.debug(f"Saved experiential note {note_id} to LanceDB")
201
+ except Exception as e:
202
+ logger.error(f"Failed to save experiential note to LanceDB: {e}")
203
+ raise
204
+
205
+ return note_id
206
+
207
+ def link_interaction_to_note(self, interaction_id: str, note_id: str) -> None:
208
+ """Create bidirectional link between interaction and note"""
209
+
210
+ import pandas as pd
211
+
212
+ link_data = {
213
+ "interaction_id": interaction_id,
214
+ "note_id": note_id,
215
+ "created": datetime.now(),
216
+ "link_type": "bidirectional"
217
+ }
218
+
219
+ df = pd.DataFrame([link_data])
220
+
221
+ try:
222
+ self.links_table.add(df)
223
+ logger.debug(f"Created link between {interaction_id} and {note_id}")
224
+ except Exception as e:
225
+ logger.error(f"Failed to create link in LanceDB: {e}")
226
+
227
+ def search_interactions(self, query: str, user_id: Optional[str] = None,
228
+ start_date: Optional[datetime] = None,
229
+ end_date: Optional[datetime] = None) -> List[Dict]:
230
+ """
231
+ Search interactions using SQL filters and vector similarity.
232
+
233
+ Combines:
234
+ 1. SQL filters for user_id, date range
235
+ 2. Text search in user_input, agent_response, topic
236
+ 3. Vector similarity search if embedding provider available
237
+ """
238
+
239
+ try:
240
+ # Start with base query
241
+ query_parts = []
242
+
243
+ # Filter by user_id
244
+ if user_id:
245
+ query_parts.append(f"user_id = '{user_id}'")
246
+
247
+ # Filter by date range
248
+ if start_date:
249
+ query_parts.append(f"timestamp >= '{start_date.isoformat()}'")
250
+ if end_date:
251
+ query_parts.append(f"timestamp <= '{end_date.isoformat()}'")
252
+
253
+ # Build WHERE clause
254
+ where_clause = " AND ".join(query_parts) if query_parts else None
255
+
256
+ # Try vector search first if embedding provider available
257
+ if self.embedding_provider:
258
+ try:
259
+ query_embedding = self._generate_embedding(query)
260
+ if query_embedding:
261
+ # Vector similarity search
262
+ results = self.interactions_table.search(query_embedding).limit(50)
263
+
264
+ # Apply additional filters
265
+ if where_clause:
266
+ results = results.where(where_clause)
267
+
268
+ df = results.to_pandas()
269
+
270
+ return self._convert_df_to_dicts(df)
271
+ except Exception as e:
272
+ logger.warning(f"Vector search failed, falling back to text search: {e}")
273
+
274
+ # Fallback to text search
275
+ search_conditions = []
276
+ query_lower = query.lower()
277
+
278
+ # Search in multiple text fields
279
+ search_conditions.extend([
280
+ f"LOWER(user_input) LIKE '%{query_lower}%'",
281
+ f"LOWER(agent_response) LIKE '%{query_lower}%'",
282
+ f"LOWER(topic) LIKE '%{query_lower}%'"
283
+ ])
284
+
285
+ text_search = "(" + " OR ".join(search_conditions) + ")"
286
+
287
+ # Combine with other filters
288
+ if where_clause:
289
+ final_where = f"({where_clause}) AND {text_search}"
290
+ else:
291
+ final_where = text_search
292
+
293
+ # Execute search
294
+ df = self.interactions_table.search().where(final_where).limit(100).to_pandas()
295
+
296
+ return self._convert_df_to_dicts(df)
297
+
298
+ except Exception as e:
299
+ logger.error(f"Search failed in LanceDB: {e}")
300
+ return []
301
+
302
+ def _convert_df_to_dicts(self, df) -> List[Dict]:
303
+ """Convert pandas DataFrame to list of dictionaries"""
304
+ import json
305
+
306
+ results = []
307
+ for _, row in df.iterrows():
308
+ try:
309
+ result = {
310
+ "id": row["id"],
311
+ "user_id": row["user_id"],
312
+ "timestamp": row["timestamp"].isoformat() if hasattr(row["timestamp"], 'isoformat') else str(row["timestamp"]),
313
+ "user_input": row["user_input"],
314
+ "agent_response": row["agent_response"],
315
+ "topic": row["topic"],
316
+ "metadata": json.loads(row["metadata"]) if row["metadata"] else {}
317
+ }
318
+ results.append(result)
319
+ except Exception as e:
320
+ logger.warning(f"Failed to convert row to dict: {e}")
321
+ continue
322
+
323
+ return results
324
+
325
+ # IStorage interface implementation
326
+ def save(self, key: str, value: Any) -> None:
327
+ """Generic save for compatibility"""
328
+ if "/" in key:
329
+ component_name = key.split("/")[-1]
330
+ self.save_memory_component(component_name, value)
331
+
332
+ def load(self, key: str) -> Any:
333
+ """Generic load for compatibility"""
334
+ if "/" in key:
335
+ component_name = key.split("/")[-1]
336
+ return self.load_memory_component(component_name)
337
+ return None
338
+
339
+ def exists(self, key: str) -> bool:
340
+ """Check if key exists"""
341
+ if "/" in key:
342
+ component_name = key.split("/")[-1]
343
+ try:
344
+ df = self.components_table.search().where(f"component_name = '{component_name}'").limit(1).to_pandas()
345
+ return len(df) > 0
346
+ except:
347
+ return False
348
+ return False
349
+
350
+ def save_memory_component(self, component_name: str, component_data: Any) -> None:
351
+ """Save memory component to LanceDB"""
352
+ import json
353
+ import pandas as pd
354
+
355
+ # Convert component to JSON
356
+ if hasattr(component_data, '__dict__'):
357
+ data = component_data.__dict__
358
+ else:
359
+ data = component_data
360
+
361
+ # Get next version number
362
+ try:
363
+ existing = self.components_table.search().where(f"component_name = '{component_name}'").to_pandas()
364
+ version = existing["version"].max() + 1 if len(existing) > 0 else 1
365
+ except:
366
+ version = 1
367
+
368
+ component_record = {
369
+ "component_name": component_name,
370
+ "timestamp": datetime.now(),
371
+ "data": json.dumps(data, default=str),
372
+ "version": version
373
+ }
374
+
375
+ df = pd.DataFrame([component_record])
376
+
377
+ try:
378
+ self.components_table.add(df)
379
+ logger.debug(f"Saved {component_name} component version {version} to LanceDB")
380
+ except Exception as e:
381
+ logger.error(f"Failed to save {component_name} component: {e}")
382
+
383
+ def load_memory_component(self, component_name: str) -> Optional[Any]:
384
+ """Load latest memory component from LanceDB"""
385
+ try:
386
+ import json
387
+
388
+ # Get latest version
389
+ df = self.components_table.search().where(f"component_name = '{component_name}'").to_pandas()
390
+
391
+ if len(df) == 0:
392
+ return None
393
+
394
+ # Get the latest version
395
+ latest = df.loc[df["version"].idxmax()]
396
+
397
+ return json.loads(latest["data"])
398
+
399
+ except Exception as e:
400
+ logger.error(f"Failed to load {component_name} component: {e}")
401
+ return None
402
+
403
+ def get_stats(self) -> Dict[str, Any]:
404
+ """Get storage statistics"""
405
+ try:
406
+ interactions_count = len(self.interactions_table.search().limit(10000).to_pandas())
407
+ notes_count = len(self.notes_table.search().limit(10000).to_pandas())
408
+ links_count = len(self.links_table.search().limit(10000).to_pandas())
409
+ components_count = len(self.components_table.search().limit(1000).to_pandas())
410
+
411
+ return {
412
+ "total_interactions": interactions_count,
413
+ "total_notes": notes_count,
414
+ "total_links": links_count,
415
+ "total_components": components_count,
416
+ "uri": self.uri,
417
+ "embedding_provider_available": self.embedding_provider is not None
418
+ }
419
+ except Exception as e:
420
+ logger.error(f"Failed to get stats: {e}")
421
+ return {
422
+ "error": str(e),
423
+ "uri": self.uri,
424
+ "embedding_provider_available": self.embedding_provider is not None
425
+ }