alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -194
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -322
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -264
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -81
  26. alma/graph/backends/__init__.py +32 -18
  27. alma/graph/backends/kuzu.py +624 -0
  28. alma/graph/backends/memgraph.py +432 -0
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -432
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -511
  55. alma/observability/__init__.py +91 -0
  56. alma/observability/config.py +302 -0
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -0
  59. alma/observability/metrics.py +583 -0
  60. alma/observability/tracing.py +440 -0
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -366
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -61
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1048
  80. alma/storage/base.py +1083 -525
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -0
  83. alma/storage/file_based.py +614 -619
  84. alma/storage/migrations/__init__.py +21 -0
  85. alma/storage/migrations/base.py +321 -0
  86. alma/storage/migrations/runner.py +323 -0
  87. alma/storage/migrations/version_stores.py +337 -0
  88. alma/storage/migrations/versions/__init__.py +11 -0
  89. alma/storage/migrations/versions/v1_0_0.py +373 -0
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1452
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1358
  95. alma/testing/__init__.py +46 -0
  96. alma/testing/factories.py +301 -0
  97. alma/testing/mocks.py +389 -0
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.0.dist-info/RECORD +0 -76
  110. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,1358 +1,3041 @@
1
- """
2
- ALMA SQLite + FAISS Storage Backend.
3
-
4
- Local storage using SQLite for structured data and FAISS for vector search.
5
- This is the recommended backend for local development and testing.
6
- """
7
-
8
- import json
9
- import logging
10
- import sqlite3
11
- from contextlib import contextmanager
12
- from datetime import datetime, timezone
13
- from pathlib import Path
14
- from typing import Any, Dict, List, Optional, Tuple
15
-
16
- import numpy as np
17
-
18
- from alma.storage.base import StorageBackend
19
- from alma.types import (
20
- AntiPattern,
21
- DomainKnowledge,
22
- Heuristic,
23
- Outcome,
24
- UserPreference,
25
- )
26
-
27
- logger = logging.getLogger(__name__)
28
-
29
- # Try to import FAISS, fall back to numpy-based search if not available
30
- try:
31
- import faiss
32
-
33
- FAISS_AVAILABLE = True
34
- except ImportError:
35
- FAISS_AVAILABLE = False
36
- logger.warning("FAISS not available, falling back to numpy-based vector search")
37
-
38
-
39
- class SQLiteStorage(StorageBackend):
40
- """
41
- SQLite + FAISS storage backend.
42
-
43
- Uses SQLite for structured data and FAISS for efficient vector similarity search.
44
- Falls back to numpy cosine similarity if FAISS is not installed.
45
-
46
- Database schema:
47
- - heuristics: id, agent, project_id, condition, strategy, confidence, ...
48
- - outcomes: id, agent, project_id, task_type, task_description, success, ...
49
- - preferences: id, user_id, category, preference, source, ...
50
- - domain_knowledge: id, agent, project_id, domain, fact, ...
51
- - anti_patterns: id, agent, project_id, pattern, why_bad, ...
52
- - embeddings: id, memory_type, memory_id, embedding (blob)
53
- """
54
-
55
- def __init__(
56
- self,
57
- db_path: Path,
58
- embedding_dim: int = 384, # Default for all-MiniLM-L6-v2
59
- ):
60
- """
61
- Initialize SQLite storage.
62
-
63
- Args:
64
- db_path: Path to SQLite database file
65
- embedding_dim: Dimension of embedding vectors
66
- """
67
- self.db_path = Path(db_path)
68
- self.db_path.parent.mkdir(parents=True, exist_ok=True)
69
- self.embedding_dim = embedding_dim
70
-
71
- # Initialize database
72
- self._init_database()
73
-
74
- # Initialize FAISS indices (one per memory type)
75
- self._indices: Dict[str, Any] = {}
76
- self._id_maps: Dict[str, List[str]] = {} # memory_type -> [memory_ids]
77
- self._index_dirty: Dict[str, bool] = {} # Track which indexes need rebuilding
78
- self._load_faiss_indices()
79
-
80
- @classmethod
81
- def from_config(cls, config: Dict[str, Any]) -> "SQLiteStorage":
82
- """Create instance from configuration."""
83
- storage_dir = config.get("storage_dir", ".alma")
84
- db_name = config.get("db_name", "alma.db")
85
- embedding_dim = config.get("embedding_dim", 384)
86
-
87
- db_path = Path(storage_dir) / db_name
88
- return cls(db_path=db_path, embedding_dim=embedding_dim)
89
-
90
- @contextmanager
91
- def _get_connection(self):
92
- """Get database connection with context manager."""
93
- conn = sqlite3.connect(self.db_path)
94
- conn.row_factory = sqlite3.Row
95
- try:
96
- yield conn
97
- conn.commit()
98
- except Exception:
99
- conn.rollback()
100
- raise
101
- finally:
102
- conn.close()
103
-
104
- def _init_database(self):
105
- """Initialize database schema."""
106
- with self._get_connection() as conn:
107
- cursor = conn.cursor()
108
-
109
- # Heuristics table
110
- cursor.execute("""
111
- CREATE TABLE IF NOT EXISTS heuristics (
112
- id TEXT PRIMARY KEY,
113
- agent TEXT NOT NULL,
114
- project_id TEXT NOT NULL,
115
- condition TEXT NOT NULL,
116
- strategy TEXT NOT NULL,
117
- confidence REAL DEFAULT 0.0,
118
- occurrence_count INTEGER DEFAULT 0,
119
- success_count INTEGER DEFAULT 0,
120
- last_validated TEXT,
121
- created_at TEXT,
122
- metadata TEXT
123
- )
124
- """)
125
- cursor.execute(
126
- "CREATE INDEX IF NOT EXISTS idx_heuristics_project_agent "
127
- "ON heuristics(project_id, agent)"
128
- )
129
-
130
- # Outcomes table
131
- cursor.execute("""
132
- CREATE TABLE IF NOT EXISTS outcomes (
133
- id TEXT PRIMARY KEY,
134
- agent TEXT NOT NULL,
135
- project_id TEXT NOT NULL,
136
- task_type TEXT,
137
- task_description TEXT NOT NULL,
138
- success INTEGER DEFAULT 0,
139
- strategy_used TEXT,
140
- duration_ms INTEGER,
141
- error_message TEXT,
142
- user_feedback TEXT,
143
- timestamp TEXT,
144
- metadata TEXT
145
- )
146
- """)
147
- cursor.execute(
148
- "CREATE INDEX IF NOT EXISTS idx_outcomes_project_agent "
149
- "ON outcomes(project_id, agent)"
150
- )
151
- cursor.execute(
152
- "CREATE INDEX IF NOT EXISTS idx_outcomes_task_type "
153
- "ON outcomes(project_id, agent, task_type)"
154
- )
155
- cursor.execute(
156
- "CREATE INDEX IF NOT EXISTS idx_outcomes_timestamp "
157
- "ON outcomes(project_id, timestamp)"
158
- )
159
-
160
- # User preferences table
161
- cursor.execute("""
162
- CREATE TABLE IF NOT EXISTS preferences (
163
- id TEXT PRIMARY KEY,
164
- user_id TEXT NOT NULL,
165
- category TEXT,
166
- preference TEXT NOT NULL,
167
- source TEXT,
168
- confidence REAL DEFAULT 1.0,
169
- timestamp TEXT,
170
- metadata TEXT
171
- )
172
- """)
173
- cursor.execute(
174
- "CREATE INDEX IF NOT EXISTS idx_preferences_user "
175
- "ON preferences(user_id)"
176
- )
177
-
178
- # Domain knowledge table
179
- cursor.execute("""
180
- CREATE TABLE IF NOT EXISTS domain_knowledge (
181
- id TEXT PRIMARY KEY,
182
- agent TEXT NOT NULL,
183
- project_id TEXT NOT NULL,
184
- domain TEXT,
185
- fact TEXT NOT NULL,
186
- source TEXT,
187
- confidence REAL DEFAULT 1.0,
188
- last_verified TEXT,
189
- metadata TEXT
190
- )
191
- """)
192
- cursor.execute(
193
- "CREATE INDEX IF NOT EXISTS idx_domain_knowledge_project_agent "
194
- "ON domain_knowledge(project_id, agent)"
195
- )
196
-
197
- # Anti-patterns table
198
- cursor.execute("""
199
- CREATE TABLE IF NOT EXISTS anti_patterns (
200
- id TEXT PRIMARY KEY,
201
- agent TEXT NOT NULL,
202
- project_id TEXT NOT NULL,
203
- pattern TEXT NOT NULL,
204
- why_bad TEXT,
205
- better_alternative TEXT,
206
- occurrence_count INTEGER DEFAULT 1,
207
- last_seen TEXT,
208
- created_at TEXT,
209
- metadata TEXT
210
- )
211
- """)
212
- cursor.execute(
213
- "CREATE INDEX IF NOT EXISTS idx_anti_patterns_project_agent "
214
- "ON anti_patterns(project_id, agent)"
215
- )
216
-
217
- # Embeddings table (stores vectors as blobs)
218
- cursor.execute("""
219
- CREATE TABLE IF NOT EXISTS embeddings (
220
- id INTEGER PRIMARY KEY AUTOINCREMENT,
221
- memory_type TEXT NOT NULL,
222
- memory_id TEXT NOT NULL,
223
- embedding BLOB NOT NULL,
224
- UNIQUE(memory_type, memory_id)
225
- )
226
- """)
227
- cursor.execute(
228
- "CREATE INDEX IF NOT EXISTS idx_embeddings_type "
229
- "ON embeddings(memory_type)"
230
- )
231
-
232
- def _load_faiss_indices(self, memory_types: Optional[List[str]] = None):
233
- """Load or create FAISS indices for specified memory types.
234
-
235
- Args:
236
- memory_types: List of memory types to load. If None, loads all types.
237
- """
238
- if memory_types is None:
239
- memory_types = [
240
- "heuristics",
241
- "outcomes",
242
- "domain_knowledge",
243
- "anti_patterns",
244
- ]
245
-
246
- for memory_type in memory_types:
247
- if FAISS_AVAILABLE:
248
- # Use FAISS index
249
- self._indices[memory_type] = faiss.IndexFlatIP(self.embedding_dim)
250
- else:
251
- # Use list for numpy fallback
252
- self._indices[memory_type] = []
253
-
254
- self._id_maps[memory_type] = []
255
- self._index_dirty[memory_type] = False # Mark as fresh after rebuild
256
-
257
- # Load existing embeddings
258
- with self._get_connection() as conn:
259
- cursor = conn.cursor()
260
- cursor.execute(
261
- "SELECT memory_id, embedding FROM embeddings WHERE memory_type = ?",
262
- (memory_type,),
263
- )
264
- rows = cursor.fetchall()
265
-
266
- for row in rows:
267
- memory_id = row["memory_id"]
268
- embedding = np.frombuffer(row["embedding"], dtype=np.float32)
269
-
270
- self._id_maps[memory_type].append(memory_id)
271
- if FAISS_AVAILABLE:
272
- self._indices[memory_type].add(
273
- embedding.reshape(1, -1).astype(np.float32)
274
- )
275
- else:
276
- self._indices[memory_type].append(embedding)
277
-
278
- def _ensure_index_fresh(self, memory_type: str) -> None:
279
- """Rebuild index for a memory type if it has been marked dirty.
280
-
281
- This implements lazy rebuilding - indexes are only rebuilt when
282
- actually needed for search, not immediately on every delete.
283
-
284
- Args:
285
- memory_type: The type of memory index to check/rebuild.
286
- """
287
- if self._index_dirty.get(memory_type, False):
288
- logger.debug(f"Rebuilding dirty index for {memory_type}")
289
- self._load_faiss_indices([memory_type])
290
-
291
- def _add_to_index(
292
- self,
293
- memory_type: str,
294
- memory_id: str,
295
- embedding: Optional[List[float]],
296
- ):
297
- """Add embedding to FAISS index."""
298
- if embedding is None:
299
- return
300
-
301
- embedding_array = np.array(embedding, dtype=np.float32)
302
-
303
- # Store in database
304
- with self._get_connection() as conn:
305
- cursor = conn.cursor()
306
- cursor.execute(
307
- """
308
- INSERT OR REPLACE INTO embeddings (memory_type, memory_id, embedding)
309
- VALUES (?, ?, ?)
310
- """,
311
- (memory_type, memory_id, embedding_array.tobytes()),
312
- )
313
-
314
- # Add to index
315
- self._id_maps[memory_type].append(memory_id)
316
- if FAISS_AVAILABLE:
317
- self._indices[memory_type].add(
318
- embedding_array.reshape(1, -1).astype(np.float32)
319
- )
320
- else:
321
- self._indices[memory_type].append(embedding_array)
322
-
323
- def _search_index(
324
- self,
325
- memory_type: str,
326
- query_embedding: List[float],
327
- top_k: int,
328
- ) -> List[Tuple[str, float]]:
329
- """Search FAISS index for similar embeddings."""
330
- # Ensure index is up-to-date before searching (lazy rebuild)
331
- self._ensure_index_fresh(memory_type)
332
-
333
- if not self._id_maps[memory_type]:
334
- return []
335
-
336
- query = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
337
-
338
- if FAISS_AVAILABLE:
339
- # Normalize for cosine similarity (IndexFlatIP)
340
- faiss.normalize_L2(query)
341
- scores, indices = self._indices[memory_type].search(
342
- query, min(top_k, len(self._id_maps[memory_type]))
343
- )
344
-
345
- results = []
346
- for score, idx in zip(scores[0], indices[0], strict=False):
347
- if idx >= 0 and idx < len(self._id_maps[memory_type]):
348
- results.append((self._id_maps[memory_type][idx], float(score)))
349
- return results
350
- else:
351
- # Numpy fallback with cosine similarity
352
- embeddings = np.array(self._indices[memory_type])
353
- if len(embeddings) == 0:
354
- return []
355
-
356
- # Normalize
357
- query_norm = query / np.linalg.norm(query)
358
- emb_norms = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
359
-
360
- # Cosine similarity
361
- similarities = np.dot(emb_norms, query_norm.T).flatten()
362
-
363
- # Get top k
364
- top_indices = np.argsort(similarities)[::-1][:top_k]
365
-
366
- return [
367
- (self._id_maps[memory_type][i], float(similarities[i]))
368
- for i in top_indices
369
- ]
370
-
371
- # ==================== WRITE OPERATIONS ====================
372
-
373
- def save_heuristic(self, heuristic: Heuristic) -> str:
374
- """Save a heuristic."""
375
- with self._get_connection() as conn:
376
- cursor = conn.cursor()
377
- cursor.execute(
378
- """
379
- INSERT OR REPLACE INTO heuristics
380
- (id, agent, project_id, condition, strategy, confidence,
381
- occurrence_count, success_count, last_validated, created_at, metadata)
382
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
383
- """,
384
- (
385
- heuristic.id,
386
- heuristic.agent,
387
- heuristic.project_id,
388
- heuristic.condition,
389
- heuristic.strategy,
390
- heuristic.confidence,
391
- heuristic.occurrence_count,
392
- heuristic.success_count,
393
- (
394
- heuristic.last_validated.isoformat()
395
- if heuristic.last_validated
396
- else None
397
- ),
398
- heuristic.created_at.isoformat() if heuristic.created_at else None,
399
- json.dumps(heuristic.metadata) if heuristic.metadata else None,
400
- ),
401
- )
402
-
403
- # Add embedding to index
404
- self._add_to_index("heuristics", heuristic.id, heuristic.embedding)
405
- logger.debug(f"Saved heuristic: {heuristic.id}")
406
- return heuristic.id
407
-
408
- def save_outcome(self, outcome: Outcome) -> str:
409
- """Save an outcome."""
410
- with self._get_connection() as conn:
411
- cursor = conn.cursor()
412
- cursor.execute(
413
- """
414
- INSERT OR REPLACE INTO outcomes
415
- (id, agent, project_id, task_type, task_description, success,
416
- strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata)
417
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
418
- """,
419
- (
420
- outcome.id,
421
- outcome.agent,
422
- outcome.project_id,
423
- outcome.task_type,
424
- outcome.task_description,
425
- 1 if outcome.success else 0,
426
- outcome.strategy_used,
427
- outcome.duration_ms,
428
- outcome.error_message,
429
- outcome.user_feedback,
430
- outcome.timestamp.isoformat() if outcome.timestamp else None,
431
- json.dumps(outcome.metadata) if outcome.metadata else None,
432
- ),
433
- )
434
-
435
- # Add embedding to index
436
- self._add_to_index("outcomes", outcome.id, outcome.embedding)
437
- logger.debug(f"Saved outcome: {outcome.id}")
438
- return outcome.id
439
-
440
- def save_user_preference(self, preference: UserPreference) -> str:
441
- """Save a user preference."""
442
- with self._get_connection() as conn:
443
- cursor = conn.cursor()
444
- cursor.execute(
445
- """
446
- INSERT OR REPLACE INTO preferences
447
- (id, user_id, category, preference, source, confidence, timestamp, metadata)
448
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
449
- """,
450
- (
451
- preference.id,
452
- preference.user_id,
453
- preference.category,
454
- preference.preference,
455
- preference.source,
456
- preference.confidence,
457
- preference.timestamp.isoformat() if preference.timestamp else None,
458
- json.dumps(preference.metadata) if preference.metadata else None,
459
- ),
460
- )
461
- logger.debug(f"Saved preference: {preference.id}")
462
- return preference.id
463
-
464
- def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
465
- """Save domain knowledge."""
466
- with self._get_connection() as conn:
467
- cursor = conn.cursor()
468
- cursor.execute(
469
- """
470
- INSERT OR REPLACE INTO domain_knowledge
471
- (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata)
472
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
473
- """,
474
- (
475
- knowledge.id,
476
- knowledge.agent,
477
- knowledge.project_id,
478
- knowledge.domain,
479
- knowledge.fact,
480
- knowledge.source,
481
- knowledge.confidence,
482
- (
483
- knowledge.last_verified.isoformat()
484
- if knowledge.last_verified
485
- else None
486
- ),
487
- json.dumps(knowledge.metadata) if knowledge.metadata else None,
488
- ),
489
- )
490
-
491
- # Add embedding to index
492
- self._add_to_index("domain_knowledge", knowledge.id, knowledge.embedding)
493
- logger.debug(f"Saved domain knowledge: {knowledge.id}")
494
- return knowledge.id
495
-
496
- def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
497
- """Save an anti-pattern."""
498
- with self._get_connection() as conn:
499
- cursor = conn.cursor()
500
- cursor.execute(
501
- """
502
- INSERT OR REPLACE INTO anti_patterns
503
- (id, agent, project_id, pattern, why_bad, better_alternative,
504
- occurrence_count, last_seen, created_at, metadata)
505
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
506
- """,
507
- (
508
- anti_pattern.id,
509
- anti_pattern.agent,
510
- anti_pattern.project_id,
511
- anti_pattern.pattern,
512
- anti_pattern.why_bad,
513
- anti_pattern.better_alternative,
514
- anti_pattern.occurrence_count,
515
- (
516
- anti_pattern.last_seen.isoformat()
517
- if anti_pattern.last_seen
518
- else None
519
- ),
520
- (
521
- anti_pattern.created_at.isoformat()
522
- if anti_pattern.created_at
523
- else None
524
- ),
525
- (
526
- json.dumps(anti_pattern.metadata)
527
- if anti_pattern.metadata
528
- else None
529
- ),
530
- ),
531
- )
532
-
533
- # Add embedding to index
534
- self._add_to_index("anti_patterns", anti_pattern.id, anti_pattern.embedding)
535
- logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
536
- return anti_pattern.id
537
-
538
- # ==================== BATCH WRITE OPERATIONS ====================
539
-
540
- def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
541
- """Save multiple heuristics in a batch using executemany."""
542
- if not heuristics:
543
- return []
544
-
545
- with self._get_connection() as conn:
546
- cursor = conn.cursor()
547
- cursor.executemany(
548
- """
549
- INSERT OR REPLACE INTO heuristics
550
- (id, agent, project_id, condition, strategy, confidence,
551
- occurrence_count, success_count, last_validated, created_at, metadata)
552
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
553
- """,
554
- [
555
- (
556
- h.id,
557
- h.agent,
558
- h.project_id,
559
- h.condition,
560
- h.strategy,
561
- h.confidence,
562
- h.occurrence_count,
563
- h.success_count,
564
- h.last_validated.isoformat() if h.last_validated else None,
565
- h.created_at.isoformat() if h.created_at else None,
566
- json.dumps(h.metadata) if h.metadata else None,
567
- )
568
- for h in heuristics
569
- ],
570
- )
571
-
572
- # Add embeddings to index
573
- for h in heuristics:
574
- self._add_to_index("heuristics", h.id, h.embedding)
575
-
576
- logger.debug(f"Batch saved {len(heuristics)} heuristics")
577
- return [h.id for h in heuristics]
578
-
579
- def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
580
- """Save multiple outcomes in a batch using executemany."""
581
- if not outcomes:
582
- return []
583
-
584
- with self._get_connection() as conn:
585
- cursor = conn.cursor()
586
- cursor.executemany(
587
- """
588
- INSERT OR REPLACE INTO outcomes
589
- (id, agent, project_id, task_type, task_description, success,
590
- strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata)
591
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
592
- """,
593
- [
594
- (
595
- o.id,
596
- o.agent,
597
- o.project_id,
598
- o.task_type,
599
- o.task_description,
600
- 1 if o.success else 0,
601
- o.strategy_used,
602
- o.duration_ms,
603
- o.error_message,
604
- o.user_feedback,
605
- o.timestamp.isoformat() if o.timestamp else None,
606
- json.dumps(o.metadata) if o.metadata else None,
607
- )
608
- for o in outcomes
609
- ],
610
- )
611
-
612
- # Add embeddings to index
613
- for o in outcomes:
614
- self._add_to_index("outcomes", o.id, o.embedding)
615
-
616
- logger.debug(f"Batch saved {len(outcomes)} outcomes")
617
- return [o.id for o in outcomes]
618
-
619
- def save_domain_knowledge_batch(
620
- self, knowledge_items: List[DomainKnowledge]
621
- ) -> List[str]:
622
- """Save multiple domain knowledge items in a batch using executemany."""
623
- if not knowledge_items:
624
- return []
625
-
626
- with self._get_connection() as conn:
627
- cursor = conn.cursor()
628
- cursor.executemany(
629
- """
630
- INSERT OR REPLACE INTO domain_knowledge
631
- (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata)
632
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
633
- """,
634
- [
635
- (
636
- k.id,
637
- k.agent,
638
- k.project_id,
639
- k.domain,
640
- k.fact,
641
- k.source,
642
- k.confidence,
643
- k.last_verified.isoformat() if k.last_verified else None,
644
- json.dumps(k.metadata) if k.metadata else None,
645
- )
646
- for k in knowledge_items
647
- ],
648
- )
649
-
650
- # Add embeddings to index
651
- for k in knowledge_items:
652
- self._add_to_index("domain_knowledge", k.id, k.embedding)
653
-
654
- logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
655
- return [k.id for k in knowledge_items]
656
-
657
- # ==================== READ OPERATIONS ====================
658
-
659
- def get_heuristics(
660
- self,
661
- project_id: str,
662
- agent: Optional[str] = None,
663
- embedding: Optional[List[float]] = None,
664
- top_k: int = 5,
665
- min_confidence: float = 0.0,
666
- ) -> List[Heuristic]:
667
- """Get heuristics with optional vector search."""
668
- # If embedding provided, use vector search to get candidate IDs
669
- candidate_ids = None
670
- if embedding:
671
- search_results = self._search_index("heuristics", embedding, top_k * 2)
672
- candidate_ids = [id for id, _ in search_results]
673
-
674
- with self._get_connection() as conn:
675
- cursor = conn.cursor()
676
-
677
- query = "SELECT * FROM heuristics WHERE project_id = ? AND confidence >= ?"
678
- params: List[Any] = [project_id, min_confidence]
679
-
680
- if agent:
681
- query += " AND agent = ?"
682
- params.append(agent)
683
-
684
- if candidate_ids is not None:
685
- placeholders = ",".join("?" * len(candidate_ids))
686
- query += f" AND id IN ({placeholders})"
687
- params.extend(candidate_ids)
688
-
689
- query += " ORDER BY confidence DESC LIMIT ?"
690
- params.append(top_k)
691
-
692
- cursor.execute(query, params)
693
- rows = cursor.fetchall()
694
-
695
- return [self._row_to_heuristic(row) for row in rows]
696
-
697
- def get_outcomes(
698
- self,
699
- project_id: str,
700
- agent: Optional[str] = None,
701
- task_type: Optional[str] = None,
702
- embedding: Optional[List[float]] = None,
703
- top_k: int = 5,
704
- success_only: bool = False,
705
- ) -> List[Outcome]:
706
- """Get outcomes with optional vector search."""
707
- candidate_ids = None
708
- if embedding:
709
- search_results = self._search_index("outcomes", embedding, top_k * 2)
710
- candidate_ids = [id for id, _ in search_results]
711
-
712
- with self._get_connection() as conn:
713
- cursor = conn.cursor()
714
-
715
- query = "SELECT * FROM outcomes WHERE project_id = ?"
716
- params: List[Any] = [project_id]
717
-
718
- if agent:
719
- query += " AND agent = ?"
720
- params.append(agent)
721
-
722
- if task_type:
723
- query += " AND task_type = ?"
724
- params.append(task_type)
725
-
726
- if success_only:
727
- query += " AND success = 1"
728
-
729
- if candidate_ids is not None:
730
- placeholders = ",".join("?" * len(candidate_ids))
731
- query += f" AND id IN ({placeholders})"
732
- params.extend(candidate_ids)
733
-
734
- query += " ORDER BY timestamp DESC LIMIT ?"
735
- params.append(top_k)
736
-
737
- cursor.execute(query, params)
738
- rows = cursor.fetchall()
739
-
740
- return [self._row_to_outcome(row) for row in rows]
741
-
742
- def get_user_preferences(
743
- self,
744
- user_id: str,
745
- category: Optional[str] = None,
746
- ) -> List[UserPreference]:
747
- """Get user preferences."""
748
- with self._get_connection() as conn:
749
- cursor = conn.cursor()
750
-
751
- query = "SELECT * FROM preferences WHERE user_id = ?"
752
- params: List[Any] = [user_id]
753
-
754
- if category:
755
- query += " AND category = ?"
756
- params.append(category)
757
-
758
- cursor.execute(query, params)
759
- rows = cursor.fetchall()
760
-
761
- return [self._row_to_preference(row) for row in rows]
762
-
763
- def get_domain_knowledge(
764
- self,
765
- project_id: str,
766
- agent: Optional[str] = None,
767
- domain: Optional[str] = None,
768
- embedding: Optional[List[float]] = None,
769
- top_k: int = 5,
770
- ) -> List[DomainKnowledge]:
771
- """Get domain knowledge with optional vector search."""
772
- candidate_ids = None
773
- if embedding:
774
- search_results = self._search_index(
775
- "domain_knowledge", embedding, top_k * 2
776
- )
777
- candidate_ids = [id for id, _ in search_results]
778
-
779
- with self._get_connection() as conn:
780
- cursor = conn.cursor()
781
-
782
- query = "SELECT * FROM domain_knowledge WHERE project_id = ?"
783
- params: List[Any] = [project_id]
784
-
785
- if agent:
786
- query += " AND agent = ?"
787
- params.append(agent)
788
-
789
- if domain:
790
- query += " AND domain = ?"
791
- params.append(domain)
792
-
793
- if candidate_ids is not None:
794
- placeholders = ",".join("?" * len(candidate_ids))
795
- query += f" AND id IN ({placeholders})"
796
- params.extend(candidate_ids)
797
-
798
- query += " ORDER BY confidence DESC LIMIT ?"
799
- params.append(top_k)
800
-
801
- cursor.execute(query, params)
802
- rows = cursor.fetchall()
803
-
804
- return [self._row_to_domain_knowledge(row) for row in rows]
805
-
806
- def get_anti_patterns(
807
- self,
808
- project_id: str,
809
- agent: Optional[str] = None,
810
- embedding: Optional[List[float]] = None,
811
- top_k: int = 5,
812
- ) -> List[AntiPattern]:
813
- """Get anti-patterns with optional vector search."""
814
- candidate_ids = None
815
- if embedding:
816
- search_results = self._search_index("anti_patterns", embedding, top_k * 2)
817
- candidate_ids = [id for id, _ in search_results]
818
-
819
- with self._get_connection() as conn:
820
- cursor = conn.cursor()
821
-
822
- query = "SELECT * FROM anti_patterns WHERE project_id = ?"
823
- params: List[Any] = [project_id]
824
-
825
- if agent:
826
- query += " AND agent = ?"
827
- params.append(agent)
828
-
829
- if candidate_ids is not None:
830
- placeholders = ",".join("?" * len(candidate_ids))
831
- query += f" AND id IN ({placeholders})"
832
- params.extend(candidate_ids)
833
-
834
- query += " ORDER BY occurrence_count DESC LIMIT ?"
835
- params.append(top_k)
836
-
837
- cursor.execute(query, params)
838
- rows = cursor.fetchall()
839
-
840
- return [self._row_to_anti_pattern(row) for row in rows]
841
-
842
- # ==================== MULTI-AGENT MEMORY SHARING ====================
843
-
844
- def get_heuristics_for_agents(
845
- self,
846
- project_id: str,
847
- agents: List[str],
848
- embedding: Optional[List[float]] = None,
849
- top_k: int = 5,
850
- min_confidence: float = 0.0,
851
- ) -> List[Heuristic]:
852
- """Get heuristics from multiple agents using optimized IN query."""
853
- if not agents:
854
- return []
855
-
856
- candidate_ids = None
857
- if embedding:
858
- search_results = self._search_index(
859
- "heuristics", embedding, top_k * 2 * len(agents)
860
- )
861
- candidate_ids = [id for id, _ in search_results]
862
-
863
- with self._get_connection() as conn:
864
- cursor = conn.cursor()
865
-
866
- placeholders = ",".join("?" * len(agents))
867
- query = f"SELECT * FROM heuristics WHERE project_id = ? AND confidence >= ? AND agent IN ({placeholders})"
868
- params: List[Any] = [project_id, min_confidence] + list(agents)
869
-
870
- if candidate_ids is not None:
871
- id_placeholders = ",".join("?" * len(candidate_ids))
872
- query += f" AND id IN ({id_placeholders})"
873
- params.extend(candidate_ids)
874
-
875
- query += " ORDER BY confidence DESC LIMIT ?"
876
- params.append(top_k * len(agents))
877
-
878
- cursor.execute(query, params)
879
- rows = cursor.fetchall()
880
-
881
- return [self._row_to_heuristic(row) for row in rows]
882
-
883
- def get_outcomes_for_agents(
884
- self,
885
- project_id: str,
886
- agents: List[str],
887
- task_type: Optional[str] = None,
888
- embedding: Optional[List[float]] = None,
889
- top_k: int = 5,
890
- success_only: bool = False,
891
- ) -> List[Outcome]:
892
- """Get outcomes from multiple agents using optimized IN query."""
893
- if not agents:
894
- return []
895
-
896
- candidate_ids = None
897
- if embedding:
898
- search_results = self._search_index(
899
- "outcomes", embedding, top_k * 2 * len(agents)
900
- )
901
- candidate_ids = [id for id, _ in search_results]
902
-
903
- with self._get_connection() as conn:
904
- cursor = conn.cursor()
905
-
906
- placeholders = ",".join("?" * len(agents))
907
- query = f"SELECT * FROM outcomes WHERE project_id = ? AND agent IN ({placeholders})"
908
- params: List[Any] = [project_id] + list(agents)
909
-
910
- if task_type:
911
- query += " AND task_type = ?"
912
- params.append(task_type)
913
-
914
- if success_only:
915
- query += " AND success = 1"
916
-
917
- if candidate_ids is not None:
918
- id_placeholders = ",".join("?" * len(candidate_ids))
919
- query += f" AND id IN ({id_placeholders})"
920
- params.extend(candidate_ids)
921
-
922
- query += " ORDER BY timestamp DESC LIMIT ?"
923
- params.append(top_k * len(agents))
924
-
925
- cursor.execute(query, params)
926
- rows = cursor.fetchall()
927
-
928
- return [self._row_to_outcome(row) for row in rows]
929
-
930
- def get_domain_knowledge_for_agents(
931
- self,
932
- project_id: str,
933
- agents: List[str],
934
- domain: Optional[str] = None,
935
- embedding: Optional[List[float]] = None,
936
- top_k: int = 5,
937
- ) -> List[DomainKnowledge]:
938
- """Get domain knowledge from multiple agents using optimized IN query."""
939
- if not agents:
940
- return []
941
-
942
- candidate_ids = None
943
- if embedding:
944
- search_results = self._search_index(
945
- "domain_knowledge", embedding, top_k * 2 * len(agents)
946
- )
947
- candidate_ids = [id for id, _ in search_results]
948
-
949
- with self._get_connection() as conn:
950
- cursor = conn.cursor()
951
-
952
- placeholders = ",".join("?" * len(agents))
953
- query = f"SELECT * FROM domain_knowledge WHERE project_id = ? AND agent IN ({placeholders})"
954
- params: List[Any] = [project_id] + list(agents)
955
-
956
- if domain:
957
- query += " AND domain = ?"
958
- params.append(domain)
959
-
960
- if candidate_ids is not None:
961
- id_placeholders = ",".join("?" * len(candidate_ids))
962
- query += f" AND id IN ({id_placeholders})"
963
- params.extend(candidate_ids)
964
-
965
- query += " ORDER BY confidence DESC LIMIT ?"
966
- params.append(top_k * len(agents))
967
-
968
- cursor.execute(query, params)
969
- rows = cursor.fetchall()
970
-
971
- return [self._row_to_domain_knowledge(row) for row in rows]
972
-
973
- def get_anti_patterns_for_agents(
974
- self,
975
- project_id: str,
976
- agents: List[str],
977
- embedding: Optional[List[float]] = None,
978
- top_k: int = 5,
979
- ) -> List[AntiPattern]:
980
- """Get anti-patterns from multiple agents using optimized IN query."""
981
- if not agents:
982
- return []
983
-
984
- candidate_ids = None
985
- if embedding:
986
- search_results = self._search_index(
987
- "anti_patterns", embedding, top_k * 2 * len(agents)
988
- )
989
- candidate_ids = [id for id, _ in search_results]
990
-
991
- with self._get_connection() as conn:
992
- cursor = conn.cursor()
993
-
994
- placeholders = ",".join("?" * len(agents))
995
- query = f"SELECT * FROM anti_patterns WHERE project_id = ? AND agent IN ({placeholders})"
996
- params: List[Any] = [project_id] + list(agents)
997
-
998
- if candidate_ids is not None:
999
- id_placeholders = ",".join("?" * len(candidate_ids))
1000
- query += f" AND id IN ({id_placeholders})"
1001
- params.extend(candidate_ids)
1002
-
1003
- query += " ORDER BY occurrence_count DESC LIMIT ?"
1004
- params.append(top_k * len(agents))
1005
-
1006
- cursor.execute(query, params)
1007
- rows = cursor.fetchall()
1008
-
1009
- return [self._row_to_anti_pattern(row) for row in rows]
1010
-
1011
- # ==================== UPDATE OPERATIONS ====================
1012
-
1013
- def update_heuristic(
1014
- self,
1015
- heuristic_id: str,
1016
- updates: Dict[str, Any],
1017
- ) -> bool:
1018
- """Update a heuristic's fields."""
1019
- if not updates:
1020
- return False
1021
-
1022
- set_clauses = []
1023
- params = []
1024
- for key, value in updates.items():
1025
- if key == "metadata" and value:
1026
- value = json.dumps(value)
1027
- elif isinstance(value, datetime):
1028
- value = value.isoformat()
1029
- set_clauses.append(f"{key} = ?")
1030
- params.append(value)
1031
-
1032
- params.append(heuristic_id)
1033
-
1034
- with self._get_connection() as conn:
1035
- cursor = conn.cursor()
1036
- cursor.execute(
1037
- f"UPDATE heuristics SET {', '.join(set_clauses)} WHERE id = ?",
1038
- params,
1039
- )
1040
- return cursor.rowcount > 0
1041
-
1042
- def increment_heuristic_occurrence(
1043
- self,
1044
- heuristic_id: str,
1045
- success: bool,
1046
- ) -> bool:
1047
- """Increment heuristic occurrence count."""
1048
- with self._get_connection() as conn:
1049
- cursor = conn.cursor()
1050
-
1051
- if success:
1052
- cursor.execute(
1053
- """
1054
- UPDATE heuristics
1055
- SET occurrence_count = occurrence_count + 1,
1056
- success_count = success_count + 1,
1057
- last_validated = ?
1058
- WHERE id = ?
1059
- """,
1060
- (datetime.now(timezone.utc).isoformat(), heuristic_id),
1061
- )
1062
- else:
1063
- cursor.execute(
1064
- """
1065
- UPDATE heuristics
1066
- SET occurrence_count = occurrence_count + 1,
1067
- last_validated = ?
1068
- WHERE id = ?
1069
- """,
1070
- (datetime.now(timezone.utc).isoformat(), heuristic_id),
1071
- )
1072
-
1073
- return cursor.rowcount > 0
1074
-
1075
- # ==================== DELETE OPERATIONS ====================
1076
-
1077
- def delete_outcomes_older_than(
1078
- self,
1079
- project_id: str,
1080
- older_than: datetime,
1081
- agent: Optional[str] = None,
1082
- ) -> int:
1083
- """Delete old outcomes."""
1084
- with self._get_connection() as conn:
1085
- cursor = conn.cursor()
1086
-
1087
- query = "DELETE FROM outcomes WHERE project_id = ? AND timestamp < ?"
1088
- params: List[Any] = [project_id, older_than.isoformat()]
1089
-
1090
- if agent:
1091
- query += " AND agent = ?"
1092
- params.append(agent)
1093
-
1094
- cursor.execute(query, params)
1095
- deleted = cursor.rowcount
1096
-
1097
- logger.info(f"Deleted {deleted} old outcomes")
1098
- return deleted
1099
-
1100
- def delete_low_confidence_heuristics(
1101
- self,
1102
- project_id: str,
1103
- below_confidence: float,
1104
- agent: Optional[str] = None,
1105
- ) -> int:
1106
- """Delete low-confidence heuristics."""
1107
- with self._get_connection() as conn:
1108
- cursor = conn.cursor()
1109
-
1110
- query = "DELETE FROM heuristics WHERE project_id = ? AND confidence < ?"
1111
- params: List[Any] = [project_id, below_confidence]
1112
-
1113
- if agent:
1114
- query += " AND agent = ?"
1115
- params.append(agent)
1116
-
1117
- cursor.execute(query, params)
1118
- deleted = cursor.rowcount
1119
-
1120
- logger.info(f"Deleted {deleted} low-confidence heuristics")
1121
- return deleted
1122
-
1123
- # ==================== STATS ====================
1124
-
1125
- def get_stats(
1126
- self,
1127
- project_id: str,
1128
- agent: Optional[str] = None,
1129
- ) -> Dict[str, Any]:
1130
- """Get memory statistics."""
1131
- stats = {
1132
- "project_id": project_id,
1133
- "agent": agent,
1134
- "storage_type": "sqlite",
1135
- "faiss_available": FAISS_AVAILABLE,
1136
- }
1137
-
1138
- with self._get_connection() as conn:
1139
- cursor = conn.cursor()
1140
-
1141
- tables = ["heuristics", "outcomes", "domain_knowledge", "anti_patterns"]
1142
- for table in tables:
1143
- query = f"SELECT COUNT(*) FROM {table} WHERE project_id = ?"
1144
- params: List[Any] = [project_id]
1145
- if agent:
1146
- query += " AND agent = ?"
1147
- params.append(agent)
1148
- cursor.execute(query, params)
1149
- stats[f"{table}_count"] = cursor.fetchone()[0]
1150
-
1151
- # Preferences don't have project_id
1152
- cursor.execute("SELECT COUNT(*) FROM preferences")
1153
- stats["preferences_count"] = cursor.fetchone()[0]
1154
-
1155
- # Embedding counts
1156
- cursor.execute("SELECT COUNT(*) FROM embeddings")
1157
- stats["embeddings_count"] = cursor.fetchone()[0]
1158
-
1159
- stats["total_count"] = sum(
1160
- stats.get(k, 0) for k in stats if k.endswith("_count")
1161
- )
1162
-
1163
- return stats
1164
-
1165
- # ==================== HELPERS ====================
1166
-
1167
- def _parse_datetime(self, value: Any) -> Optional[datetime]:
1168
- """Parse datetime from string."""
1169
- if value is None:
1170
- return None
1171
- if isinstance(value, datetime):
1172
- return value
1173
- try:
1174
- return datetime.fromisoformat(value.replace("Z", "+00:00"))
1175
- except (ValueError, AttributeError):
1176
- return None
1177
-
1178
- def _row_to_heuristic(self, row: sqlite3.Row) -> Heuristic:
1179
- """Convert database row to Heuristic."""
1180
- return Heuristic(
1181
- id=row["id"],
1182
- agent=row["agent"],
1183
- project_id=row["project_id"],
1184
- condition=row["condition"],
1185
- strategy=row["strategy"],
1186
- confidence=row["confidence"] or 0.0,
1187
- occurrence_count=row["occurrence_count"] or 0,
1188
- success_count=row["success_count"] or 0,
1189
- last_validated=self._parse_datetime(row["last_validated"])
1190
- or datetime.now(timezone.utc),
1191
- created_at=self._parse_datetime(row["created_at"])
1192
- or datetime.now(timezone.utc),
1193
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1194
- )
1195
-
1196
- def _row_to_outcome(self, row: sqlite3.Row) -> Outcome:
1197
- """Convert database row to Outcome."""
1198
- return Outcome(
1199
- id=row["id"],
1200
- agent=row["agent"],
1201
- project_id=row["project_id"],
1202
- task_type=row["task_type"] or "general",
1203
- task_description=row["task_description"],
1204
- success=bool(row["success"]),
1205
- strategy_used=row["strategy_used"] or "",
1206
- duration_ms=row["duration_ms"],
1207
- error_message=row["error_message"],
1208
- user_feedback=row["user_feedback"],
1209
- timestamp=self._parse_datetime(row["timestamp"])
1210
- or datetime.now(timezone.utc),
1211
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1212
- )
1213
-
1214
- def _row_to_preference(self, row: sqlite3.Row) -> UserPreference:
1215
- """Convert database row to UserPreference."""
1216
- return UserPreference(
1217
- id=row["id"],
1218
- user_id=row["user_id"],
1219
- category=row["category"] or "general",
1220
- preference=row["preference"],
1221
- source=row["source"] or "unknown",
1222
- confidence=row["confidence"] or 1.0,
1223
- timestamp=self._parse_datetime(row["timestamp"])
1224
- or datetime.now(timezone.utc),
1225
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1226
- )
1227
-
1228
- def _row_to_domain_knowledge(self, row: sqlite3.Row) -> DomainKnowledge:
1229
- """Convert database row to DomainKnowledge."""
1230
- return DomainKnowledge(
1231
- id=row["id"],
1232
- agent=row["agent"],
1233
- project_id=row["project_id"],
1234
- domain=row["domain"] or "general",
1235
- fact=row["fact"],
1236
- source=row["source"] or "unknown",
1237
- confidence=row["confidence"] or 1.0,
1238
- last_verified=self._parse_datetime(row["last_verified"])
1239
- or datetime.now(timezone.utc),
1240
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1241
- )
1242
-
1243
- def _row_to_anti_pattern(self, row: sqlite3.Row) -> AntiPattern:
1244
- """Convert database row to AntiPattern."""
1245
- return AntiPattern(
1246
- id=row["id"],
1247
- agent=row["agent"],
1248
- project_id=row["project_id"],
1249
- pattern=row["pattern"],
1250
- why_bad=row["why_bad"] or "",
1251
- better_alternative=row["better_alternative"] or "",
1252
- occurrence_count=row["occurrence_count"] or 1,
1253
- last_seen=self._parse_datetime(row["last_seen"])
1254
- or datetime.now(timezone.utc),
1255
- created_at=self._parse_datetime(row["created_at"])
1256
- or datetime.now(timezone.utc),
1257
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1258
- )
1259
-
1260
- # ===== Additional abstract method implementations =====
1261
-
1262
- def update_heuristic_confidence(
1263
- self,
1264
- heuristic_id: str,
1265
- new_confidence: float,
1266
- ) -> bool:
1267
- """Update confidence score for a heuristic."""
1268
- with self._get_connection() as conn:
1269
- cursor = conn.execute(
1270
- "UPDATE heuristics SET confidence = ? WHERE id = ?",
1271
- (new_confidence, heuristic_id),
1272
- )
1273
- return cursor.rowcount > 0
1274
-
1275
- def update_knowledge_confidence(
1276
- self,
1277
- knowledge_id: str,
1278
- new_confidence: float,
1279
- ) -> bool:
1280
- """Update confidence score for domain knowledge."""
1281
- with self._get_connection() as conn:
1282
- cursor = conn.execute(
1283
- "UPDATE domain_knowledge SET confidence = ? WHERE id = ?",
1284
- (new_confidence, knowledge_id),
1285
- )
1286
- return cursor.rowcount > 0
1287
-
1288
- def delete_heuristic(self, heuristic_id: str) -> bool:
1289
- """Delete a heuristic by ID."""
1290
- with self._get_connection() as conn:
1291
- # Also remove from embedding index
1292
- conn.execute(
1293
- "DELETE FROM embeddings WHERE memory_type = 'heuristics' AND memory_id = ?",
1294
- (heuristic_id,),
1295
- )
1296
- cursor = conn.execute(
1297
- "DELETE FROM heuristics WHERE id = ?",
1298
- (heuristic_id,),
1299
- )
1300
- if cursor.rowcount > 0:
1301
- # Mark index as dirty for lazy rebuild on next search
1302
- self._index_dirty["heuristics"] = True
1303
- return True
1304
- return False
1305
-
1306
- def delete_outcome(self, outcome_id: str) -> bool:
1307
- """Delete an outcome by ID."""
1308
- with self._get_connection() as conn:
1309
- # Also remove from embedding index
1310
- conn.execute(
1311
- "DELETE FROM embeddings WHERE memory_type = 'outcomes' AND memory_id = ?",
1312
- (outcome_id,),
1313
- )
1314
- cursor = conn.execute(
1315
- "DELETE FROM outcomes WHERE id = ?",
1316
- (outcome_id,),
1317
- )
1318
- if cursor.rowcount > 0:
1319
- # Mark index as dirty for lazy rebuild on next search
1320
- self._index_dirty["outcomes"] = True
1321
- return True
1322
- return False
1323
-
1324
- def delete_domain_knowledge(self, knowledge_id: str) -> bool:
1325
- """Delete domain knowledge by ID."""
1326
- with self._get_connection() as conn:
1327
- # Also remove from embedding index
1328
- conn.execute(
1329
- "DELETE FROM embeddings WHERE memory_type = 'domain_knowledge' AND memory_id = ?",
1330
- (knowledge_id,),
1331
- )
1332
- cursor = conn.execute(
1333
- "DELETE FROM domain_knowledge WHERE id = ?",
1334
- (knowledge_id,),
1335
- )
1336
- if cursor.rowcount > 0:
1337
- # Mark index as dirty for lazy rebuild on next search
1338
- self._index_dirty["domain_knowledge"] = True
1339
- return True
1340
- return False
1341
-
1342
- def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
1343
- """Delete an anti-pattern by ID."""
1344
- with self._get_connection() as conn:
1345
- # Also remove from embedding index
1346
- conn.execute(
1347
- "DELETE FROM embeddings WHERE memory_type = 'anti_patterns' AND memory_id = ?",
1348
- (anti_pattern_id,),
1349
- )
1350
- cursor = conn.execute(
1351
- "DELETE FROM anti_patterns WHERE id = ?",
1352
- (anti_pattern_id,),
1353
- )
1354
- if cursor.rowcount > 0:
1355
- # Mark index as dirty for lazy rebuild on next search
1356
- self._index_dirty["anti_patterns"] = True
1357
- return True
1358
- return False
1
+ """
2
+ ALMA SQLite + FAISS Storage Backend.
3
+
4
+ Local storage using SQLite for structured data and FAISS for vector search.
5
+ This is the recommended backend for local development and testing.
6
+
7
+ v0.6.0 adds workflow context support:
8
+ - Checkpoint tables for crash recovery
9
+ - WorkflowOutcome tables for learning from workflows
10
+ - ArtifactRef tables for linking external files
11
+ - scope_filter parameter for workflow-scoped queries
12
+ """
13
+
14
+ import json
15
+ import logging
16
+ import sqlite3
17
+ from contextlib import contextmanager
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
21
+
22
+ import numpy as np
23
+
24
+ from alma.storage.base import StorageBackend
25
+ from alma.storage.constants import SQLITE_TABLE_NAMES, MemoryType
26
+ from alma.types import (
27
+ AntiPattern,
28
+ DomainKnowledge,
29
+ Heuristic,
30
+ Outcome,
31
+ UserPreference,
32
+ )
33
+
34
+ if TYPE_CHECKING:
35
+ from alma.learning.decay import MemoryStrength
36
+ from alma.session import SessionHandoff
37
+ from alma.storage.archive import ArchivedMemory
38
+ from alma.workflow import ArtifactRef, Checkpoint, WorkflowOutcome
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # Try to import FAISS, fall back to numpy-based search if not available
43
+ try:
44
+ import faiss
45
+
46
+ FAISS_AVAILABLE = True
47
+ except ImportError:
48
+ FAISS_AVAILABLE = False
49
+ logger.warning("FAISS not available, falling back to numpy-based vector search")
50
+
51
+
52
+ class SQLiteStorage(StorageBackend):
53
+ """
54
+ SQLite + FAISS storage backend.
55
+
56
+ Uses SQLite for structured data and FAISS for efficient vector similarity search.
57
+ Falls back to numpy cosine similarity if FAISS is not installed.
58
+
59
+ Database schema:
60
+ - heuristics: id, agent, project_id, condition, strategy, confidence, ...
61
+ - outcomes: id, agent, project_id, task_type, task_description, success, ...
62
+ - preferences: id, user_id, category, preference, source, ...
63
+ - domain_knowledge: id, agent, project_id, domain, fact, ...
64
+ - anti_patterns: id, agent, project_id, pattern, why_bad, ...
65
+ - embeddings: id, memory_type, memory_id, embedding (blob)
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ db_path: Path,
71
+ embedding_dim: int = 384, # Default for all-MiniLM-L6-v2
72
+ auto_migrate: bool = True,
73
+ ):
74
+ """
75
+ Initialize SQLite storage.
76
+
77
+ Args:
78
+ db_path: Path to SQLite database file
79
+ embedding_dim: Dimension of embedding vectors
80
+ auto_migrate: If True, automatically apply pending migrations on startup
81
+ """
82
+ self.db_path = Path(db_path)
83
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
84
+ self.embedding_dim = embedding_dim
85
+
86
+ # Migration support (lazy-loaded)
87
+ self._migration_runner = None
88
+ self._version_store = None
89
+
90
+ # Initialize database
91
+ self._init_database()
92
+
93
+ # Initialize FAISS indices (one per memory type)
94
+ self._indices: Dict[str, Any] = {}
95
+ self._id_maps: Dict[str, List[str]] = {} # memory_type -> [memory_ids]
96
+ self._index_dirty: Dict[str, bool] = {} # Track which indexes need rebuilding
97
+ self._load_faiss_indices()
98
+
99
+ # Auto-migrate if enabled
100
+ if auto_migrate:
101
+ self._ensure_migrated()
102
+
103
+ @classmethod
104
+ def from_config(cls, config: Dict[str, Any]) -> "SQLiteStorage":
105
+ """Create instance from configuration."""
106
+ storage_dir = config.get("storage_dir", ".alma")
107
+ db_name = config.get("db_name", "alma.db")
108
+ embedding_dim = config.get("embedding_dim", 384)
109
+
110
+ db_path = Path(storage_dir) / db_name
111
+ return cls(db_path=db_path, embedding_dim=embedding_dim)
112
+
113
+ @contextmanager
114
+ def _get_connection(self):
115
+ """Get database connection with context manager."""
116
+ conn = sqlite3.connect(self.db_path)
117
+ conn.row_factory = sqlite3.Row
118
+ try:
119
+ yield conn
120
+ conn.commit()
121
+ except Exception:
122
+ conn.rollback()
123
+ raise
124
+ finally:
125
+ conn.close()
126
+
127
+ def _init_database(self):
128
+ """Initialize database schema."""
129
+ with self._get_connection() as conn:
130
+ cursor = conn.cursor()
131
+
132
+ # Heuristics table
133
+ cursor.execute("""
134
+ CREATE TABLE IF NOT EXISTS heuristics (
135
+ id TEXT PRIMARY KEY,
136
+ agent TEXT NOT NULL,
137
+ project_id TEXT NOT NULL,
138
+ condition TEXT NOT NULL,
139
+ strategy TEXT NOT NULL,
140
+ confidence REAL DEFAULT 0.0,
141
+ occurrence_count INTEGER DEFAULT 0,
142
+ success_count INTEGER DEFAULT 0,
143
+ last_validated TEXT,
144
+ created_at TEXT,
145
+ metadata TEXT
146
+ )
147
+ """)
148
+ cursor.execute(
149
+ "CREATE INDEX IF NOT EXISTS idx_heuristics_project_agent "
150
+ "ON heuristics(project_id, agent)"
151
+ )
152
+
153
+ # Outcomes table
154
+ cursor.execute("""
155
+ CREATE TABLE IF NOT EXISTS outcomes (
156
+ id TEXT PRIMARY KEY,
157
+ agent TEXT NOT NULL,
158
+ project_id TEXT NOT NULL,
159
+ task_type TEXT,
160
+ task_description TEXT NOT NULL,
161
+ success INTEGER DEFAULT 0,
162
+ strategy_used TEXT,
163
+ duration_ms INTEGER,
164
+ error_message TEXT,
165
+ user_feedback TEXT,
166
+ timestamp TEXT,
167
+ metadata TEXT
168
+ )
169
+ """)
170
+ cursor.execute(
171
+ "CREATE INDEX IF NOT EXISTS idx_outcomes_project_agent "
172
+ "ON outcomes(project_id, agent)"
173
+ )
174
+ cursor.execute(
175
+ "CREATE INDEX IF NOT EXISTS idx_outcomes_task_type "
176
+ "ON outcomes(project_id, agent, task_type)"
177
+ )
178
+ cursor.execute(
179
+ "CREATE INDEX IF NOT EXISTS idx_outcomes_timestamp "
180
+ "ON outcomes(project_id, timestamp)"
181
+ )
182
+
183
+ # User preferences table
184
+ cursor.execute("""
185
+ CREATE TABLE IF NOT EXISTS preferences (
186
+ id TEXT PRIMARY KEY,
187
+ user_id TEXT NOT NULL,
188
+ category TEXT,
189
+ preference TEXT NOT NULL,
190
+ source TEXT,
191
+ confidence REAL DEFAULT 1.0,
192
+ timestamp TEXT,
193
+ metadata TEXT
194
+ )
195
+ """)
196
+ cursor.execute(
197
+ "CREATE INDEX IF NOT EXISTS idx_preferences_user "
198
+ "ON preferences(user_id)"
199
+ )
200
+
201
+ # Domain knowledge table
202
+ cursor.execute("""
203
+ CREATE TABLE IF NOT EXISTS domain_knowledge (
204
+ id TEXT PRIMARY KEY,
205
+ agent TEXT NOT NULL,
206
+ project_id TEXT NOT NULL,
207
+ domain TEXT,
208
+ fact TEXT NOT NULL,
209
+ source TEXT,
210
+ confidence REAL DEFAULT 1.0,
211
+ last_verified TEXT,
212
+ metadata TEXT
213
+ )
214
+ """)
215
+ cursor.execute(
216
+ "CREATE INDEX IF NOT EXISTS idx_domain_knowledge_project_agent "
217
+ "ON domain_knowledge(project_id, agent)"
218
+ )
219
+
220
+ # Anti-patterns table
221
+ cursor.execute("""
222
+ CREATE TABLE IF NOT EXISTS anti_patterns (
223
+ id TEXT PRIMARY KEY,
224
+ agent TEXT NOT NULL,
225
+ project_id TEXT NOT NULL,
226
+ pattern TEXT NOT NULL,
227
+ why_bad TEXT,
228
+ better_alternative TEXT,
229
+ occurrence_count INTEGER DEFAULT 1,
230
+ last_seen TEXT,
231
+ created_at TEXT,
232
+ metadata TEXT
233
+ )
234
+ """)
235
+ cursor.execute(
236
+ "CREATE INDEX IF NOT EXISTS idx_anti_patterns_project_agent "
237
+ "ON anti_patterns(project_id, agent)"
238
+ )
239
+
240
+ # Embeddings table (stores vectors as blobs)
241
+ cursor.execute("""
242
+ CREATE TABLE IF NOT EXISTS embeddings (
243
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
244
+ memory_type TEXT NOT NULL,
245
+ memory_id TEXT NOT NULL,
246
+ embedding BLOB NOT NULL,
247
+ UNIQUE(memory_type, memory_id)
248
+ )
249
+ """)
250
+ cursor.execute(
251
+ "CREATE INDEX IF NOT EXISTS idx_embeddings_type "
252
+ "ON embeddings(memory_type)"
253
+ )
254
+
255
+ # ==================== WORKFLOW TABLES (v0.6.0+) ====================
256
+
257
+ # Checkpoints table for crash recovery
258
+ cursor.execute("""
259
+ CREATE TABLE IF NOT EXISTS checkpoints (
260
+ id TEXT PRIMARY KEY,
261
+ run_id TEXT NOT NULL,
262
+ node_id TEXT NOT NULL,
263
+ state TEXT NOT NULL,
264
+ sequence_number INTEGER DEFAULT 0,
265
+ branch_id TEXT,
266
+ parent_checkpoint_id TEXT,
267
+ state_hash TEXT,
268
+ metadata TEXT,
269
+ created_at TEXT NOT NULL
270
+ )
271
+ """)
272
+ cursor.execute(
273
+ "CREATE INDEX IF NOT EXISTS idx_checkpoints_run ON checkpoints(run_id)"
274
+ )
275
+ cursor.execute(
276
+ "CREATE INDEX IF NOT EXISTS idx_checkpoints_run_branch "
277
+ "ON checkpoints(run_id, branch_id)"
278
+ )
279
+ cursor.execute(
280
+ "CREATE INDEX IF NOT EXISTS idx_checkpoints_run_sequence "
281
+ "ON checkpoints(run_id, sequence_number DESC)"
282
+ )
283
+
284
+ # Workflow outcomes table
285
+ cursor.execute("""
286
+ CREATE TABLE IF NOT EXISTS workflow_outcomes (
287
+ id TEXT PRIMARY KEY,
288
+ tenant_id TEXT,
289
+ workflow_id TEXT NOT NULL,
290
+ run_id TEXT NOT NULL,
291
+ agent TEXT NOT NULL,
292
+ project_id TEXT NOT NULL,
293
+ result TEXT NOT NULL,
294
+ summary TEXT,
295
+ strategies_used TEXT,
296
+ successful_patterns TEXT,
297
+ failed_patterns TEXT,
298
+ extracted_heuristics TEXT,
299
+ extracted_anti_patterns TEXT,
300
+ duration_seconds REAL,
301
+ node_count INTEGER,
302
+ error_message TEXT,
303
+ metadata TEXT,
304
+ created_at TEXT NOT NULL
305
+ )
306
+ """)
307
+ cursor.execute(
308
+ "CREATE INDEX IF NOT EXISTS idx_workflow_outcomes_project "
309
+ "ON workflow_outcomes(project_id, agent)"
310
+ )
311
+ cursor.execute(
312
+ "CREATE INDEX IF NOT EXISTS idx_workflow_outcomes_workflow "
313
+ "ON workflow_outcomes(workflow_id)"
314
+ )
315
+ cursor.execute(
316
+ "CREATE INDEX IF NOT EXISTS idx_workflow_outcomes_tenant "
317
+ "ON workflow_outcomes(tenant_id)"
318
+ )
319
+
320
+ # Artifact links table
321
+ cursor.execute("""
322
+ CREATE TABLE IF NOT EXISTS artifact_links (
323
+ id TEXT PRIMARY KEY,
324
+ memory_id TEXT NOT NULL,
325
+ artifact_type TEXT NOT NULL,
326
+ storage_url TEXT NOT NULL,
327
+ filename TEXT,
328
+ mime_type TEXT,
329
+ size_bytes INTEGER,
330
+ checksum TEXT,
331
+ metadata TEXT,
332
+ created_at TEXT NOT NULL
333
+ )
334
+ """)
335
+ cursor.execute(
336
+ "CREATE INDEX IF NOT EXISTS idx_artifact_links_memory "
337
+ "ON artifact_links(memory_id)"
338
+ )
339
+
340
+ # Session handoffs table (for session persistence)
341
+ cursor.execute("""
342
+ CREATE TABLE IF NOT EXISTS session_handoffs (
343
+ id TEXT PRIMARY KEY,
344
+ project_id TEXT NOT NULL,
345
+ agent TEXT NOT NULL,
346
+ session_id TEXT NOT NULL,
347
+ last_action TEXT NOT NULL,
348
+ last_outcome TEXT NOT NULL,
349
+ current_goal TEXT,
350
+ key_decisions TEXT,
351
+ active_files TEXT,
352
+ blockers TEXT,
353
+ next_steps TEXT,
354
+ test_status TEXT,
355
+ confidence_level REAL DEFAULT 0.5,
356
+ risk_flags TEXT,
357
+ session_start TEXT,
358
+ session_end TEXT,
359
+ duration_ms INTEGER DEFAULT 0,
360
+ metadata TEXT,
361
+ created_at TEXT NOT NULL
362
+ )
363
+ """)
364
+ cursor.execute(
365
+ "CREATE INDEX IF NOT EXISTS idx_session_handoffs_project_agent "
366
+ "ON session_handoffs(project_id, agent)"
367
+ )
368
+ cursor.execute(
369
+ "CREATE INDEX IF NOT EXISTS idx_session_handoffs_agent_created "
370
+ "ON session_handoffs(agent, created_at DESC)"
371
+ )
372
+
373
+ # ==================== MEMORY STRENGTH TABLE (v0.7.0+) ====================
374
+
375
+ # Memory strength tracking for decay-based forgetting
376
+ cursor.execute("""
377
+ CREATE TABLE IF NOT EXISTS memory_strength (
378
+ memory_id TEXT PRIMARY KEY,
379
+ memory_type TEXT NOT NULL,
380
+ project_id TEXT,
381
+ agent TEXT,
382
+ initial_strength REAL DEFAULT 1.0,
383
+ decay_half_life_days INTEGER DEFAULT 30,
384
+ created_at TEXT NOT NULL,
385
+ last_accessed TEXT NOT NULL,
386
+ access_count INTEGER DEFAULT 0,
387
+ explicit_importance REAL DEFAULT 0.5,
388
+ reinforcement_events TEXT DEFAULT '[]'
389
+ )
390
+ """)
391
+ cursor.execute(
392
+ "CREATE INDEX IF NOT EXISTS idx_memory_strength_last_accessed "
393
+ "ON memory_strength(last_accessed)"
394
+ )
395
+ cursor.execute(
396
+ "CREATE INDEX IF NOT EXISTS idx_memory_strength_project_agent "
397
+ "ON memory_strength(project_id, agent)"
398
+ )
399
+
400
+ # ==================== MEMORY ARCHIVE TABLE (v0.7.0+) ====================
401
+
402
+ # Memory archive for soft-deleted memories
403
+ cursor.execute("""
404
+ CREATE TABLE IF NOT EXISTS memory_archive (
405
+ id TEXT PRIMARY KEY,
406
+ original_id TEXT NOT NULL,
407
+ memory_type TEXT NOT NULL,
408
+ content TEXT NOT NULL,
409
+ embedding BLOB,
410
+ metadata TEXT,
411
+ original_created_at TEXT NOT NULL,
412
+ archived_at TEXT NOT NULL,
413
+ archive_reason TEXT NOT NULL,
414
+ final_strength REAL NOT NULL,
415
+ project_id TEXT NOT NULL,
416
+ agent TEXT NOT NULL,
417
+ restored INTEGER DEFAULT 0,
418
+ restored_at TEXT,
419
+ restored_as TEXT
420
+ )
421
+ """)
422
+ cursor.execute(
423
+ "CREATE INDEX IF NOT EXISTS idx_archive_project_agent "
424
+ "ON memory_archive(project_id, agent)"
425
+ )
426
+ cursor.execute(
427
+ "CREATE INDEX IF NOT EXISTS idx_archive_reason "
428
+ "ON memory_archive(archive_reason)"
429
+ )
430
+ cursor.execute(
431
+ "CREATE INDEX IF NOT EXISTS idx_archive_date "
432
+ "ON memory_archive(archived_at)"
433
+ )
434
+ cursor.execute(
435
+ "CREATE INDEX IF NOT EXISTS idx_archive_restored "
436
+ "ON memory_archive(restored)"
437
+ )
438
+
439
+ def _load_faiss_indices(self, memory_types: Optional[List[str]] = None):
440
+ """Load or create FAISS indices for specified memory types.
441
+
442
+ Args:
443
+ memory_types: List of memory types to load. If None, loads all types.
444
+ """
445
+ if memory_types is None:
446
+ memory_types = list(MemoryType.VECTOR_ENABLED)
447
+
448
+ for memory_type in memory_types:
449
+ if FAISS_AVAILABLE:
450
+ # Use FAISS index
451
+ self._indices[memory_type] = faiss.IndexFlatIP(self.embedding_dim)
452
+ else:
453
+ # Use list for numpy fallback
454
+ self._indices[memory_type] = []
455
+
456
+ self._id_maps[memory_type] = []
457
+ self._index_dirty[memory_type] = False # Mark as fresh after rebuild
458
+
459
+ # Load existing embeddings
460
+ with self._get_connection() as conn:
461
+ cursor = conn.cursor()
462
+ cursor.execute(
463
+ "SELECT memory_id, embedding FROM embeddings WHERE memory_type = ?",
464
+ (memory_type,),
465
+ )
466
+ rows = cursor.fetchall()
467
+
468
+ for row in rows:
469
+ memory_id = row["memory_id"]
470
+ embedding = np.frombuffer(row["embedding"], dtype=np.float32)
471
+
472
+ self._id_maps[memory_type].append(memory_id)
473
+ if FAISS_AVAILABLE:
474
+ self._indices[memory_type].add(
475
+ embedding.reshape(1, -1).astype(np.float32)
476
+ )
477
+ else:
478
+ self._indices[memory_type].append(embedding)
479
+
480
+ def _ensure_index_fresh(self, memory_type: str) -> None:
481
+ """Rebuild index for a memory type if it has been marked dirty.
482
+
483
+ This implements lazy rebuilding - indexes are only rebuilt when
484
+ actually needed for search, not immediately on every delete.
485
+
486
+ Args:
487
+ memory_type: The type of memory index to check/rebuild.
488
+ """
489
+ if self._index_dirty.get(memory_type, False):
490
+ logger.debug(f"Rebuilding dirty index for {memory_type}")
491
+ self._load_faiss_indices([memory_type])
492
+
493
+ def _add_to_index(
494
+ self,
495
+ memory_type: str,
496
+ memory_id: str,
497
+ embedding: Optional[List[float]],
498
+ ):
499
+ """Add embedding to FAISS index."""
500
+ if embedding is None:
501
+ return
502
+
503
+ embedding_array = np.array(embedding, dtype=np.float32)
504
+
505
+ # Store in database
506
+ with self._get_connection() as conn:
507
+ cursor = conn.cursor()
508
+ cursor.execute(
509
+ """
510
+ INSERT OR REPLACE INTO embeddings (memory_type, memory_id, embedding)
511
+ VALUES (?, ?, ?)
512
+ """,
513
+ (memory_type, memory_id, embedding_array.tobytes()),
514
+ )
515
+
516
+ # Add to index
517
+ self._id_maps[memory_type].append(memory_id)
518
+ if FAISS_AVAILABLE:
519
+ self._indices[memory_type].add(
520
+ embedding_array.reshape(1, -1).astype(np.float32)
521
+ )
522
+ else:
523
+ self._indices[memory_type].append(embedding_array)
524
+
525
+ def _search_index(
526
+ self,
527
+ memory_type: str,
528
+ query_embedding: List[float],
529
+ top_k: int,
530
+ ) -> List[Tuple[str, float]]:
531
+ """Search FAISS index for similar embeddings."""
532
+ # Ensure index is up-to-date before searching (lazy rebuild)
533
+ self._ensure_index_fresh(memory_type)
534
+
535
+ if not self._id_maps[memory_type]:
536
+ return []
537
+
538
+ query = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
539
+
540
+ if FAISS_AVAILABLE:
541
+ # Normalize for cosine similarity (IndexFlatIP)
542
+ faiss.normalize_L2(query)
543
+ scores, indices = self._indices[memory_type].search(
544
+ query, min(top_k, len(self._id_maps[memory_type]))
545
+ )
546
+
547
+ results = []
548
+ for score, idx in zip(scores[0], indices[0], strict=False):
549
+ if idx >= 0 and idx < len(self._id_maps[memory_type]):
550
+ results.append((self._id_maps[memory_type][idx], float(score)))
551
+ return results
552
+ else:
553
+ # Numpy fallback with cosine similarity
554
+ embeddings = np.array(self._indices[memory_type])
555
+ if len(embeddings) == 0:
556
+ return []
557
+
558
+ # Normalize
559
+ query_norm = query / np.linalg.norm(query)
560
+ emb_norms = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
561
+
562
+ # Cosine similarity
563
+ similarities = np.dot(emb_norms, query_norm.T).flatten()
564
+
565
+ # Get top k
566
+ top_indices = np.argsort(similarities)[::-1][:top_k]
567
+
568
+ return [
569
+ (self._id_maps[memory_type][i], float(similarities[i]))
570
+ for i in top_indices
571
+ ]
572
+
573
+ # ==================== WRITE OPERATIONS ====================
574
+
575
+ def save_heuristic(self, heuristic: Heuristic) -> str:
576
+ """Save a heuristic."""
577
+ with self._get_connection() as conn:
578
+ cursor = conn.cursor()
579
+ cursor.execute(
580
+ """
581
+ INSERT OR REPLACE INTO heuristics
582
+ (id, agent, project_id, condition, strategy, confidence,
583
+ occurrence_count, success_count, last_validated, created_at, metadata)
584
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
585
+ """,
586
+ (
587
+ heuristic.id,
588
+ heuristic.agent,
589
+ heuristic.project_id,
590
+ heuristic.condition,
591
+ heuristic.strategy,
592
+ heuristic.confidence,
593
+ heuristic.occurrence_count,
594
+ heuristic.success_count,
595
+ (
596
+ heuristic.last_validated.isoformat()
597
+ if heuristic.last_validated
598
+ else None
599
+ ),
600
+ heuristic.created_at.isoformat() if heuristic.created_at else None,
601
+ json.dumps(heuristic.metadata) if heuristic.metadata else None,
602
+ ),
603
+ )
604
+
605
+ # Add embedding to index
606
+ self._add_to_index(MemoryType.HEURISTICS, heuristic.id, heuristic.embedding)
607
+ logger.debug(f"Saved heuristic: {heuristic.id}")
608
+ return heuristic.id
609
+
610
+ def save_outcome(self, outcome: Outcome) -> str:
611
+ """Save an outcome."""
612
+ with self._get_connection() as conn:
613
+ cursor = conn.cursor()
614
+ cursor.execute(
615
+ """
616
+ INSERT OR REPLACE INTO outcomes
617
+ (id, agent, project_id, task_type, task_description, success,
618
+ strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata)
619
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
620
+ """,
621
+ (
622
+ outcome.id,
623
+ outcome.agent,
624
+ outcome.project_id,
625
+ outcome.task_type,
626
+ outcome.task_description,
627
+ 1 if outcome.success else 0,
628
+ outcome.strategy_used,
629
+ outcome.duration_ms,
630
+ outcome.error_message,
631
+ outcome.user_feedback,
632
+ outcome.timestamp.isoformat() if outcome.timestamp else None,
633
+ json.dumps(outcome.metadata) if outcome.metadata else None,
634
+ ),
635
+ )
636
+
637
+ # Add embedding to index
638
+ self._add_to_index(MemoryType.OUTCOMES, outcome.id, outcome.embedding)
639
+ logger.debug(f"Saved outcome: {outcome.id}")
640
+ return outcome.id
641
+
642
+ def save_user_preference(self, preference: UserPreference) -> str:
643
+ """Save a user preference."""
644
+ with self._get_connection() as conn:
645
+ cursor = conn.cursor()
646
+ cursor.execute(
647
+ """
648
+ INSERT OR REPLACE INTO preferences
649
+ (id, user_id, category, preference, source, confidence, timestamp, metadata)
650
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
651
+ """,
652
+ (
653
+ preference.id,
654
+ preference.user_id,
655
+ preference.category,
656
+ preference.preference,
657
+ preference.source,
658
+ preference.confidence,
659
+ preference.timestamp.isoformat() if preference.timestamp else None,
660
+ json.dumps(preference.metadata) if preference.metadata else None,
661
+ ),
662
+ )
663
+ logger.debug(f"Saved preference: {preference.id}")
664
+ return preference.id
665
+
666
+ def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
667
+ """Save domain knowledge."""
668
+ with self._get_connection() as conn:
669
+ cursor = conn.cursor()
670
+ cursor.execute(
671
+ """
672
+ INSERT OR REPLACE INTO domain_knowledge
673
+ (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata)
674
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
675
+ """,
676
+ (
677
+ knowledge.id,
678
+ knowledge.agent,
679
+ knowledge.project_id,
680
+ knowledge.domain,
681
+ knowledge.fact,
682
+ knowledge.source,
683
+ knowledge.confidence,
684
+ (
685
+ knowledge.last_verified.isoformat()
686
+ if knowledge.last_verified
687
+ else None
688
+ ),
689
+ json.dumps(knowledge.metadata) if knowledge.metadata else None,
690
+ ),
691
+ )
692
+
693
+ # Add embedding to index
694
+ self._add_to_index(
695
+ MemoryType.DOMAIN_KNOWLEDGE, knowledge.id, knowledge.embedding
696
+ )
697
+ logger.debug(f"Saved domain knowledge: {knowledge.id}")
698
+ return knowledge.id
699
+
700
+ def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
701
+ """Save an anti-pattern."""
702
+ with self._get_connection() as conn:
703
+ cursor = conn.cursor()
704
+ cursor.execute(
705
+ """
706
+ INSERT OR REPLACE INTO anti_patterns
707
+ (id, agent, project_id, pattern, why_bad, better_alternative,
708
+ occurrence_count, last_seen, created_at, metadata)
709
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
710
+ """,
711
+ (
712
+ anti_pattern.id,
713
+ anti_pattern.agent,
714
+ anti_pattern.project_id,
715
+ anti_pattern.pattern,
716
+ anti_pattern.why_bad,
717
+ anti_pattern.better_alternative,
718
+ anti_pattern.occurrence_count,
719
+ (
720
+ anti_pattern.last_seen.isoformat()
721
+ if anti_pattern.last_seen
722
+ else None
723
+ ),
724
+ (
725
+ anti_pattern.created_at.isoformat()
726
+ if anti_pattern.created_at
727
+ else None
728
+ ),
729
+ (
730
+ json.dumps(anti_pattern.metadata)
731
+ if anti_pattern.metadata
732
+ else None
733
+ ),
734
+ ),
735
+ )
736
+
737
+ # Add embedding to index
738
+ self._add_to_index(
739
+ MemoryType.ANTI_PATTERNS, anti_pattern.id, anti_pattern.embedding
740
+ )
741
+ logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
742
+ return anti_pattern.id
743
+
744
+ # ==================== BATCH WRITE OPERATIONS ====================
745
+
746
+ def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
747
+ """Save multiple heuristics in a batch using executemany."""
748
+ if not heuristics:
749
+ return []
750
+
751
+ with self._get_connection() as conn:
752
+ cursor = conn.cursor()
753
+ cursor.executemany(
754
+ """
755
+ INSERT OR REPLACE INTO heuristics
756
+ (id, agent, project_id, condition, strategy, confidence,
757
+ occurrence_count, success_count, last_validated, created_at, metadata)
758
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
759
+ """,
760
+ [
761
+ (
762
+ h.id,
763
+ h.agent,
764
+ h.project_id,
765
+ h.condition,
766
+ h.strategy,
767
+ h.confidence,
768
+ h.occurrence_count,
769
+ h.success_count,
770
+ h.last_validated.isoformat() if h.last_validated else None,
771
+ h.created_at.isoformat() if h.created_at else None,
772
+ json.dumps(h.metadata) if h.metadata else None,
773
+ )
774
+ for h in heuristics
775
+ ],
776
+ )
777
+
778
+ # Add embeddings to index
779
+ for h in heuristics:
780
+ self._add_to_index(MemoryType.HEURISTICS, h.id, h.embedding)
781
+
782
+ logger.debug(f"Batch saved {len(heuristics)} heuristics")
783
+ return [h.id for h in heuristics]
784
+
785
+ def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
786
+ """Save multiple outcomes in a batch using executemany."""
787
+ if not outcomes:
788
+ return []
789
+
790
+ with self._get_connection() as conn:
791
+ cursor = conn.cursor()
792
+ cursor.executemany(
793
+ """
794
+ INSERT OR REPLACE INTO outcomes
795
+ (id, agent, project_id, task_type, task_description, success,
796
+ strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata)
797
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
798
+ """,
799
+ [
800
+ (
801
+ o.id,
802
+ o.agent,
803
+ o.project_id,
804
+ o.task_type,
805
+ o.task_description,
806
+ 1 if o.success else 0,
807
+ o.strategy_used,
808
+ o.duration_ms,
809
+ o.error_message,
810
+ o.user_feedback,
811
+ o.timestamp.isoformat() if o.timestamp else None,
812
+ json.dumps(o.metadata) if o.metadata else None,
813
+ )
814
+ for o in outcomes
815
+ ],
816
+ )
817
+
818
+ # Add embeddings to index
819
+ for o in outcomes:
820
+ self._add_to_index(MemoryType.OUTCOMES, o.id, o.embedding)
821
+
822
+ logger.debug(f"Batch saved {len(outcomes)} outcomes")
823
+ return [o.id for o in outcomes]
824
+
825
+ def save_domain_knowledge_batch(
826
+ self, knowledge_items: List[DomainKnowledge]
827
+ ) -> List[str]:
828
+ """Save multiple domain knowledge items in a batch using executemany."""
829
+ if not knowledge_items:
830
+ return []
831
+
832
+ with self._get_connection() as conn:
833
+ cursor = conn.cursor()
834
+ cursor.executemany(
835
+ """
836
+ INSERT OR REPLACE INTO domain_knowledge
837
+ (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata)
838
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
839
+ """,
840
+ [
841
+ (
842
+ k.id,
843
+ k.agent,
844
+ k.project_id,
845
+ k.domain,
846
+ k.fact,
847
+ k.source,
848
+ k.confidence,
849
+ k.last_verified.isoformat() if k.last_verified else None,
850
+ json.dumps(k.metadata) if k.metadata else None,
851
+ )
852
+ for k in knowledge_items
853
+ ],
854
+ )
855
+
856
+ # Add embeddings to index
857
+ for k in knowledge_items:
858
+ self._add_to_index(MemoryType.DOMAIN_KNOWLEDGE, k.id, k.embedding)
859
+
860
+ logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
861
+ return [k.id for k in knowledge_items]
862
+
863
+ # ==================== READ OPERATIONS ====================
864
+
865
+ def get_heuristics(
866
+ self,
867
+ project_id: str,
868
+ agent: Optional[str] = None,
869
+ embedding: Optional[List[float]] = None,
870
+ top_k: int = 5,
871
+ min_confidence: float = 0.0,
872
+ scope_filter: Optional[Dict[str, Any]] = None,
873
+ ) -> List[Heuristic]:
874
+ """Get heuristics with optional vector search and scope filtering."""
875
+ # If embedding provided, use vector search to get candidate IDs
876
+ candidate_ids = None
877
+ if embedding:
878
+ search_results = self._search_index(
879
+ MemoryType.HEURISTICS, embedding, top_k * 2
880
+ )
881
+ candidate_ids = [id for id, _ in search_results]
882
+
883
+ with self._get_connection() as conn:
884
+ cursor = conn.cursor()
885
+
886
+ query = "SELECT * FROM heuristics WHERE project_id = ? AND confidence >= ?"
887
+ params: List[Any] = [project_id, min_confidence]
888
+
889
+ if agent:
890
+ query += " AND agent = ?"
891
+ params.append(agent)
892
+
893
+ if candidate_ids is not None:
894
+ placeholders = ",".join("?" * len(candidate_ids))
895
+ query += f" AND id IN ({placeholders})"
896
+ params.extend(candidate_ids)
897
+
898
+ # Apply scope filter (v0.6.0+)
899
+ if scope_filter:
900
+ query, params = self._apply_scope_filter(query, params, scope_filter)
901
+
902
+ query += " ORDER BY confidence DESC LIMIT ?"
903
+ params.append(top_k)
904
+
905
+ cursor.execute(query, params)
906
+ rows = cursor.fetchall()
907
+
908
+ return [self._row_to_heuristic(row) for row in rows]
909
+
910
+ def get_outcomes(
911
+ self,
912
+ project_id: str,
913
+ agent: Optional[str] = None,
914
+ task_type: Optional[str] = None,
915
+ embedding: Optional[List[float]] = None,
916
+ top_k: int = 5,
917
+ success_only: bool = False,
918
+ scope_filter: Optional[Dict[str, Any]] = None,
919
+ ) -> List[Outcome]:
920
+ """Get outcomes with optional vector search and scope filtering."""
921
+ candidate_ids = None
922
+ if embedding:
923
+ search_results = self._search_index(
924
+ MemoryType.OUTCOMES, embedding, top_k * 2
925
+ )
926
+ candidate_ids = [id for id, _ in search_results]
927
+
928
+ with self._get_connection() as conn:
929
+ cursor = conn.cursor()
930
+
931
+ query = "SELECT * FROM outcomes WHERE project_id = ?"
932
+ params: List[Any] = [project_id]
933
+
934
+ if agent:
935
+ query += " AND agent = ?"
936
+ params.append(agent)
937
+
938
+ if task_type:
939
+ query += " AND task_type = ?"
940
+ params.append(task_type)
941
+
942
+ if success_only:
943
+ query += " AND success = 1"
944
+
945
+ if candidate_ids is not None:
946
+ placeholders = ",".join("?" * len(candidate_ids))
947
+ query += f" AND id IN ({placeholders})"
948
+ params.extend(candidate_ids)
949
+
950
+ # Apply scope filter (v0.6.0+)
951
+ if scope_filter:
952
+ query, params = self._apply_scope_filter(query, params, scope_filter)
953
+
954
+ query += " ORDER BY timestamp DESC LIMIT ?"
955
+ params.append(top_k)
956
+
957
+ cursor.execute(query, params)
958
+ rows = cursor.fetchall()
959
+
960
+ return [self._row_to_outcome(row) for row in rows]
961
+
962
+ def get_user_preferences(
963
+ self,
964
+ user_id: str,
965
+ category: Optional[str] = None,
966
+ ) -> List[UserPreference]:
967
+ """Get user preferences."""
968
+ with self._get_connection() as conn:
969
+ cursor = conn.cursor()
970
+
971
+ query = "SELECT * FROM preferences WHERE user_id = ?"
972
+ params: List[Any] = [user_id]
973
+
974
+ if category:
975
+ query += " AND category = ?"
976
+ params.append(category)
977
+
978
+ cursor.execute(query, params)
979
+ rows = cursor.fetchall()
980
+
981
+ return [self._row_to_preference(row) for row in rows]
982
+
983
+ def get_domain_knowledge(
984
+ self,
985
+ project_id: str,
986
+ agent: Optional[str] = None,
987
+ domain: Optional[str] = None,
988
+ embedding: Optional[List[float]] = None,
989
+ top_k: int = 5,
990
+ scope_filter: Optional[Dict[str, Any]] = None,
991
+ ) -> List[DomainKnowledge]:
992
+ """Get domain knowledge with optional vector search and scope filtering."""
993
+ candidate_ids = None
994
+ if embedding:
995
+ search_results = self._search_index(
996
+ MemoryType.DOMAIN_KNOWLEDGE, embedding, top_k * 2
997
+ )
998
+ candidate_ids = [id for id, _ in search_results]
999
+
1000
+ with self._get_connection() as conn:
1001
+ cursor = conn.cursor()
1002
+
1003
+ query = "SELECT * FROM domain_knowledge WHERE project_id = ?"
1004
+ params: List[Any] = [project_id]
1005
+
1006
+ if agent:
1007
+ query += " AND agent = ?"
1008
+ params.append(agent)
1009
+
1010
+ if domain:
1011
+ query += " AND domain = ?"
1012
+ params.append(domain)
1013
+
1014
+ if candidate_ids is not None:
1015
+ placeholders = ",".join("?" * len(candidate_ids))
1016
+ query += f" AND id IN ({placeholders})"
1017
+ params.extend(candidate_ids)
1018
+
1019
+ # Apply scope filter (v0.6.0+)
1020
+ if scope_filter:
1021
+ query, params = self._apply_scope_filter(query, params, scope_filter)
1022
+
1023
+ query += " ORDER BY confidence DESC LIMIT ?"
1024
+ params.append(top_k)
1025
+
1026
+ cursor.execute(query, params)
1027
+ rows = cursor.fetchall()
1028
+
1029
+ return [self._row_to_domain_knowledge(row) for row in rows]
1030
+
1031
+ def get_anti_patterns(
1032
+ self,
1033
+ project_id: str,
1034
+ agent: Optional[str] = None,
1035
+ embedding: Optional[List[float]] = None,
1036
+ top_k: int = 5,
1037
+ scope_filter: Optional[Dict[str, Any]] = None,
1038
+ ) -> List[AntiPattern]:
1039
+ """Get anti-patterns with optional vector search and scope filtering."""
1040
+ candidate_ids = None
1041
+ if embedding:
1042
+ search_results = self._search_index(
1043
+ MemoryType.ANTI_PATTERNS, embedding, top_k * 2
1044
+ )
1045
+ candidate_ids = [id for id, _ in search_results]
1046
+
1047
+ with self._get_connection() as conn:
1048
+ cursor = conn.cursor()
1049
+
1050
+ query = "SELECT * FROM anti_patterns WHERE project_id = ?"
1051
+ params: List[Any] = [project_id]
1052
+
1053
+ if agent:
1054
+ query += " AND agent = ?"
1055
+ params.append(agent)
1056
+
1057
+ if candidate_ids is not None:
1058
+ placeholders = ",".join("?" * len(candidate_ids))
1059
+ query += f" AND id IN ({placeholders})"
1060
+ params.extend(candidate_ids)
1061
+
1062
+ # Apply scope filter (v0.6.0+)
1063
+ if scope_filter:
1064
+ query, params = self._apply_scope_filter(query, params, scope_filter)
1065
+
1066
+ query += " ORDER BY occurrence_count DESC LIMIT ?"
1067
+ params.append(top_k)
1068
+
1069
+ cursor.execute(query, params)
1070
+ rows = cursor.fetchall()
1071
+
1072
+ return [self._row_to_anti_pattern(row) for row in rows]
1073
+
1074
+ # ==================== MULTI-AGENT MEMORY SHARING ====================
1075
+
1076
+ def get_heuristics_for_agents(
1077
+ self,
1078
+ project_id: str,
1079
+ agents: List[str],
1080
+ embedding: Optional[List[float]] = None,
1081
+ top_k: int = 5,
1082
+ min_confidence: float = 0.0,
1083
+ ) -> List[Heuristic]:
1084
+ """Get heuristics from multiple agents using optimized IN query."""
1085
+ if not agents:
1086
+ return []
1087
+
1088
+ candidate_ids = None
1089
+ if embedding:
1090
+ search_results = self._search_index(
1091
+ MemoryType.HEURISTICS, embedding, top_k * 2 * len(agents)
1092
+ )
1093
+ candidate_ids = [id for id, _ in search_results]
1094
+
1095
+ with self._get_connection() as conn:
1096
+ cursor = conn.cursor()
1097
+
1098
+ placeholders = ",".join("?" * len(agents))
1099
+ query = f"SELECT * FROM heuristics WHERE project_id = ? AND confidence >= ? AND agent IN ({placeholders})"
1100
+ params: List[Any] = [project_id, min_confidence] + list(agents)
1101
+
1102
+ if candidate_ids is not None:
1103
+ id_placeholders = ",".join("?" * len(candidate_ids))
1104
+ query += f" AND id IN ({id_placeholders})"
1105
+ params.extend(candidate_ids)
1106
+
1107
+ query += " ORDER BY confidence DESC LIMIT ?"
1108
+ params.append(top_k * len(agents))
1109
+
1110
+ cursor.execute(query, params)
1111
+ rows = cursor.fetchall()
1112
+
1113
+ return [self._row_to_heuristic(row) for row in rows]
1114
+
1115
+ def get_outcomes_for_agents(
1116
+ self,
1117
+ project_id: str,
1118
+ agents: List[str],
1119
+ task_type: Optional[str] = None,
1120
+ embedding: Optional[List[float]] = None,
1121
+ top_k: int = 5,
1122
+ success_only: bool = False,
1123
+ ) -> List[Outcome]:
1124
+ """Get outcomes from multiple agents using optimized IN query."""
1125
+ if not agents:
1126
+ return []
1127
+
1128
+ candidate_ids = None
1129
+ if embedding:
1130
+ search_results = self._search_index(
1131
+ MemoryType.OUTCOMES, embedding, top_k * 2 * len(agents)
1132
+ )
1133
+ candidate_ids = [id for id, _ in search_results]
1134
+
1135
+ with self._get_connection() as conn:
1136
+ cursor = conn.cursor()
1137
+
1138
+ placeholders = ",".join("?" * len(agents))
1139
+ query = f"SELECT * FROM outcomes WHERE project_id = ? AND agent IN ({placeholders})"
1140
+ params: List[Any] = [project_id] + list(agents)
1141
+
1142
+ if task_type:
1143
+ query += " AND task_type = ?"
1144
+ params.append(task_type)
1145
+
1146
+ if success_only:
1147
+ query += " AND success = 1"
1148
+
1149
+ if candidate_ids is not None:
1150
+ id_placeholders = ",".join("?" * len(candidate_ids))
1151
+ query += f" AND id IN ({id_placeholders})"
1152
+ params.extend(candidate_ids)
1153
+
1154
+ query += " ORDER BY timestamp DESC LIMIT ?"
1155
+ params.append(top_k * len(agents))
1156
+
1157
+ cursor.execute(query, params)
1158
+ rows = cursor.fetchall()
1159
+
1160
+ return [self._row_to_outcome(row) for row in rows]
1161
+
1162
+ def get_domain_knowledge_for_agents(
1163
+ self,
1164
+ project_id: str,
1165
+ agents: List[str],
1166
+ domain: Optional[str] = None,
1167
+ embedding: Optional[List[float]] = None,
1168
+ top_k: int = 5,
1169
+ ) -> List[DomainKnowledge]:
1170
+ """Get domain knowledge from multiple agents using optimized IN query."""
1171
+ if not agents:
1172
+ return []
1173
+
1174
+ candidate_ids = None
1175
+ if embedding:
1176
+ search_results = self._search_index(
1177
+ MemoryType.DOMAIN_KNOWLEDGE, embedding, top_k * 2 * len(agents)
1178
+ )
1179
+ candidate_ids = [id for id, _ in search_results]
1180
+
1181
+ with self._get_connection() as conn:
1182
+ cursor = conn.cursor()
1183
+
1184
+ placeholders = ",".join("?" * len(agents))
1185
+ query = f"SELECT * FROM domain_knowledge WHERE project_id = ? AND agent IN ({placeholders})"
1186
+ params: List[Any] = [project_id] + list(agents)
1187
+
1188
+ if domain:
1189
+ query += " AND domain = ?"
1190
+ params.append(domain)
1191
+
1192
+ if candidate_ids is not None:
1193
+ id_placeholders = ",".join("?" * len(candidate_ids))
1194
+ query += f" AND id IN ({id_placeholders})"
1195
+ params.extend(candidate_ids)
1196
+
1197
+ query += " ORDER BY confidence DESC LIMIT ?"
1198
+ params.append(top_k * len(agents))
1199
+
1200
+ cursor.execute(query, params)
1201
+ rows = cursor.fetchall()
1202
+
1203
+ return [self._row_to_domain_knowledge(row) for row in rows]
1204
+
1205
+ def get_anti_patterns_for_agents(
1206
+ self,
1207
+ project_id: str,
1208
+ agents: List[str],
1209
+ embedding: Optional[List[float]] = None,
1210
+ top_k: int = 5,
1211
+ ) -> List[AntiPattern]:
1212
+ """Get anti-patterns from multiple agents using optimized IN query."""
1213
+ if not agents:
1214
+ return []
1215
+
1216
+ candidate_ids = None
1217
+ if embedding:
1218
+ search_results = self._search_index(
1219
+ MemoryType.ANTI_PATTERNS, embedding, top_k * 2 * len(agents)
1220
+ )
1221
+ candidate_ids = [id for id, _ in search_results]
1222
+
1223
+ with self._get_connection() as conn:
1224
+ cursor = conn.cursor()
1225
+
1226
+ placeholders = ",".join("?" * len(agents))
1227
+ query = f"SELECT * FROM anti_patterns WHERE project_id = ? AND agent IN ({placeholders})"
1228
+ params: List[Any] = [project_id] + list(agents)
1229
+
1230
+ if candidate_ids is not None:
1231
+ id_placeholders = ",".join("?" * len(candidate_ids))
1232
+ query += f" AND id IN ({id_placeholders})"
1233
+ params.extend(candidate_ids)
1234
+
1235
+ query += " ORDER BY occurrence_count DESC LIMIT ?"
1236
+ params.append(top_k * len(agents))
1237
+
1238
+ cursor.execute(query, params)
1239
+ rows = cursor.fetchall()
1240
+
1241
+ return [self._row_to_anti_pattern(row) for row in rows]
1242
+
1243
+ # ==================== UPDATE OPERATIONS ====================
1244
+
1245
+ def update_heuristic(
1246
+ self,
1247
+ heuristic_id: str,
1248
+ updates: Dict[str, Any],
1249
+ ) -> bool:
1250
+ """Update a heuristic's fields."""
1251
+ if not updates:
1252
+ return False
1253
+
1254
+ set_clauses = []
1255
+ params = []
1256
+ for key, value in updates.items():
1257
+ if key == "metadata" and value:
1258
+ value = json.dumps(value)
1259
+ elif isinstance(value, datetime):
1260
+ value = value.isoformat()
1261
+ set_clauses.append(f"{key} = ?")
1262
+ params.append(value)
1263
+
1264
+ params.append(heuristic_id)
1265
+
1266
+ with self._get_connection() as conn:
1267
+ cursor = conn.cursor()
1268
+ cursor.execute(
1269
+ f"UPDATE heuristics SET {', '.join(set_clauses)} WHERE id = ?",
1270
+ params,
1271
+ )
1272
+ return cursor.rowcount > 0
1273
+
1274
+ def increment_heuristic_occurrence(
1275
+ self,
1276
+ heuristic_id: str,
1277
+ success: bool,
1278
+ ) -> bool:
1279
+ """Increment heuristic occurrence count."""
1280
+ with self._get_connection() as conn:
1281
+ cursor = conn.cursor()
1282
+
1283
+ if success:
1284
+ cursor.execute(
1285
+ """
1286
+ UPDATE heuristics
1287
+ SET occurrence_count = occurrence_count + 1,
1288
+ success_count = success_count + 1,
1289
+ last_validated = ?
1290
+ WHERE id = ?
1291
+ """,
1292
+ (datetime.now(timezone.utc).isoformat(), heuristic_id),
1293
+ )
1294
+ else:
1295
+ cursor.execute(
1296
+ """
1297
+ UPDATE heuristics
1298
+ SET occurrence_count = occurrence_count + 1,
1299
+ last_validated = ?
1300
+ WHERE id = ?
1301
+ """,
1302
+ (datetime.now(timezone.utc).isoformat(), heuristic_id),
1303
+ )
1304
+
1305
+ return cursor.rowcount > 0
1306
+
1307
+ # ==================== DELETE OPERATIONS ====================
1308
+
1309
+ def delete_outcomes_older_than(
1310
+ self,
1311
+ project_id: str,
1312
+ older_than: datetime,
1313
+ agent: Optional[str] = None,
1314
+ ) -> int:
1315
+ """Delete old outcomes."""
1316
+ with self._get_connection() as conn:
1317
+ cursor = conn.cursor()
1318
+
1319
+ query = "DELETE FROM outcomes WHERE project_id = ? AND timestamp < ?"
1320
+ params: List[Any] = [project_id, older_than.isoformat()]
1321
+
1322
+ if agent:
1323
+ query += " AND agent = ?"
1324
+ params.append(agent)
1325
+
1326
+ cursor.execute(query, params)
1327
+ deleted = cursor.rowcount
1328
+
1329
+ logger.info(f"Deleted {deleted} old outcomes")
1330
+ return deleted
1331
+
1332
+ def delete_low_confidence_heuristics(
1333
+ self,
1334
+ project_id: str,
1335
+ below_confidence: float,
1336
+ agent: Optional[str] = None,
1337
+ ) -> int:
1338
+ """Delete low-confidence heuristics."""
1339
+ with self._get_connection() as conn:
1340
+ cursor = conn.cursor()
1341
+
1342
+ query = "DELETE FROM heuristics WHERE project_id = ? AND confidence < ?"
1343
+ params: List[Any] = [project_id, below_confidence]
1344
+
1345
+ if agent:
1346
+ query += " AND agent = ?"
1347
+ params.append(agent)
1348
+
1349
+ cursor.execute(query, params)
1350
+ deleted = cursor.rowcount
1351
+
1352
+ logger.info(f"Deleted {deleted} low-confidence heuristics")
1353
+ return deleted
1354
+
1355
+ # ==================== STATS ====================
1356
+
1357
+ def get_stats(
1358
+ self,
1359
+ project_id: str,
1360
+ agent: Optional[str] = None,
1361
+ ) -> Dict[str, Any]:
1362
+ """Get memory statistics."""
1363
+ stats = {
1364
+ "project_id": project_id,
1365
+ "agent": agent,
1366
+ "storage_type": "sqlite",
1367
+ "faiss_available": FAISS_AVAILABLE,
1368
+ }
1369
+
1370
+ with self._get_connection() as conn:
1371
+ cursor = conn.cursor()
1372
+
1373
+ # Use canonical memory types for stats
1374
+ for memory_type in MemoryType.ALL:
1375
+ if memory_type == MemoryType.PREFERENCES:
1376
+ # Preferences don't have project_id
1377
+ cursor.execute(
1378
+ f"SELECT COUNT(*) FROM {SQLITE_TABLE_NAMES[memory_type]}"
1379
+ )
1380
+ stats[f"{memory_type}_count"] = cursor.fetchone()[0]
1381
+ else:
1382
+ query = f"SELECT COUNT(*) FROM {SQLITE_TABLE_NAMES[memory_type]} WHERE project_id = ?"
1383
+ params: List[Any] = [project_id]
1384
+ if agent:
1385
+ query += " AND agent = ?"
1386
+ params.append(agent)
1387
+ cursor.execute(query, params)
1388
+ stats[f"{memory_type}_count"] = cursor.fetchone()[0]
1389
+
1390
+ # Embedding counts
1391
+ cursor.execute("SELECT COUNT(*) FROM embeddings")
1392
+ stats["embeddings_count"] = cursor.fetchone()[0]
1393
+
1394
+ stats["total_count"] = sum(
1395
+ stats.get(k, 0) for k in stats if k.endswith("_count")
1396
+ )
1397
+
1398
+ return stats
1399
+
1400
+ # ==================== HELPERS ====================
1401
+
1402
+ def _parse_datetime(self, value: Any) -> Optional[datetime]:
1403
+ """Parse datetime from string."""
1404
+ if value is None:
1405
+ return None
1406
+ if isinstance(value, datetime):
1407
+ return value
1408
+ try:
1409
+ return datetime.fromisoformat(value.replace("Z", "+00:00"))
1410
+ except (ValueError, AttributeError):
1411
+ return None
1412
+
1413
+ def _row_to_heuristic(self, row: sqlite3.Row) -> Heuristic:
1414
+ """Convert database row to Heuristic."""
1415
+ return Heuristic(
1416
+ id=row["id"],
1417
+ agent=row["agent"],
1418
+ project_id=row["project_id"],
1419
+ condition=row["condition"],
1420
+ strategy=row["strategy"],
1421
+ confidence=row["confidence"] or 0.0,
1422
+ occurrence_count=row["occurrence_count"] or 0,
1423
+ success_count=row["success_count"] or 0,
1424
+ last_validated=self._parse_datetime(row["last_validated"])
1425
+ or datetime.now(timezone.utc),
1426
+ created_at=self._parse_datetime(row["created_at"])
1427
+ or datetime.now(timezone.utc),
1428
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1429
+ )
1430
+
1431
+ def _row_to_outcome(self, row: sqlite3.Row) -> Outcome:
1432
+ """Convert database row to Outcome."""
1433
+ return Outcome(
1434
+ id=row["id"],
1435
+ agent=row["agent"],
1436
+ project_id=row["project_id"],
1437
+ task_type=row["task_type"] or "general",
1438
+ task_description=row["task_description"],
1439
+ success=bool(row["success"]),
1440
+ strategy_used=row["strategy_used"] or "",
1441
+ duration_ms=row["duration_ms"],
1442
+ error_message=row["error_message"],
1443
+ user_feedback=row["user_feedback"],
1444
+ timestamp=self._parse_datetime(row["timestamp"])
1445
+ or datetime.now(timezone.utc),
1446
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1447
+ )
1448
+
1449
+ def _row_to_preference(self, row: sqlite3.Row) -> UserPreference:
1450
+ """Convert database row to UserPreference."""
1451
+ return UserPreference(
1452
+ id=row["id"],
1453
+ user_id=row["user_id"],
1454
+ category=row["category"] or "general",
1455
+ preference=row["preference"],
1456
+ source=row["source"] or "unknown",
1457
+ confidence=row["confidence"] or 1.0,
1458
+ timestamp=self._parse_datetime(row["timestamp"])
1459
+ or datetime.now(timezone.utc),
1460
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1461
+ )
1462
+
1463
+ def _row_to_domain_knowledge(self, row: sqlite3.Row) -> DomainKnowledge:
1464
+ """Convert database row to DomainKnowledge."""
1465
+ return DomainKnowledge(
1466
+ id=row["id"],
1467
+ agent=row["agent"],
1468
+ project_id=row["project_id"],
1469
+ domain=row["domain"] or "general",
1470
+ fact=row["fact"],
1471
+ source=row["source"] or "unknown",
1472
+ confidence=row["confidence"] or 1.0,
1473
+ last_verified=self._parse_datetime(row["last_verified"])
1474
+ or datetime.now(timezone.utc),
1475
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1476
+ )
1477
+
1478
+ def _row_to_anti_pattern(self, row: sqlite3.Row) -> AntiPattern:
1479
+ """Convert database row to AntiPattern."""
1480
+ return AntiPattern(
1481
+ id=row["id"],
1482
+ agent=row["agent"],
1483
+ project_id=row["project_id"],
1484
+ pattern=row["pattern"],
1485
+ why_bad=row["why_bad"] or "",
1486
+ better_alternative=row["better_alternative"] or "",
1487
+ occurrence_count=row["occurrence_count"] or 1,
1488
+ last_seen=self._parse_datetime(row["last_seen"])
1489
+ or datetime.now(timezone.utc),
1490
+ created_at=self._parse_datetime(row["created_at"])
1491
+ or datetime.now(timezone.utc),
1492
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1493
+ )
1494
+
1495
+ # ===== Additional abstract method implementations =====
1496
+
1497
+ def update_heuristic_confidence(
1498
+ self,
1499
+ heuristic_id: str,
1500
+ new_confidence: float,
1501
+ ) -> bool:
1502
+ """Update confidence score for a heuristic."""
1503
+ with self._get_connection() as conn:
1504
+ cursor = conn.execute(
1505
+ "UPDATE heuristics SET confidence = ? WHERE id = ?",
1506
+ (new_confidence, heuristic_id),
1507
+ )
1508
+ return cursor.rowcount > 0
1509
+
1510
+ def update_knowledge_confidence(
1511
+ self,
1512
+ knowledge_id: str,
1513
+ new_confidence: float,
1514
+ ) -> bool:
1515
+ """Update confidence score for domain knowledge."""
1516
+ with self._get_connection() as conn:
1517
+ cursor = conn.execute(
1518
+ "UPDATE domain_knowledge SET confidence = ? WHERE id = ?",
1519
+ (new_confidence, knowledge_id),
1520
+ )
1521
+ return cursor.rowcount > 0
1522
+
1523
+ def delete_heuristic(self, heuristic_id: str) -> bool:
1524
+ """Delete a heuristic by ID."""
1525
+ with self._get_connection() as conn:
1526
+ # Also remove from embedding index
1527
+ conn.execute(
1528
+ "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1529
+ (MemoryType.HEURISTICS, heuristic_id),
1530
+ )
1531
+ cursor = conn.execute(
1532
+ f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.HEURISTICS]} WHERE id = ?",
1533
+ (heuristic_id,),
1534
+ )
1535
+ if cursor.rowcount > 0:
1536
+ # Mark index as dirty for lazy rebuild on next search
1537
+ self._index_dirty[MemoryType.HEURISTICS] = True
1538
+ return True
1539
+ return False
1540
+
1541
+ def delete_outcome(self, outcome_id: str) -> bool:
1542
+ """Delete an outcome by ID."""
1543
+ with self._get_connection() as conn:
1544
+ # Also remove from embedding index
1545
+ conn.execute(
1546
+ "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1547
+ (MemoryType.OUTCOMES, outcome_id),
1548
+ )
1549
+ cursor = conn.execute(
1550
+ f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.OUTCOMES]} WHERE id = ?",
1551
+ (outcome_id,),
1552
+ )
1553
+ if cursor.rowcount > 0:
1554
+ # Mark index as dirty for lazy rebuild on next search
1555
+ self._index_dirty[MemoryType.OUTCOMES] = True
1556
+ return True
1557
+ return False
1558
+
1559
+ def delete_domain_knowledge(self, knowledge_id: str) -> bool:
1560
+ """Delete domain knowledge by ID."""
1561
+ with self._get_connection() as conn:
1562
+ # Also remove from embedding index
1563
+ conn.execute(
1564
+ "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1565
+ (MemoryType.DOMAIN_KNOWLEDGE, knowledge_id),
1566
+ )
1567
+ cursor = conn.execute(
1568
+ f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]} WHERE id = ?",
1569
+ (knowledge_id,),
1570
+ )
1571
+ if cursor.rowcount > 0:
1572
+ # Mark index as dirty for lazy rebuild on next search
1573
+ self._index_dirty[MemoryType.DOMAIN_KNOWLEDGE] = True
1574
+ return True
1575
+ return False
1576
+
1577
+ def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
1578
+ """Delete an anti-pattern by ID."""
1579
+ with self._get_connection() as conn:
1580
+ # Also remove from embedding index
1581
+ conn.execute(
1582
+ "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1583
+ (MemoryType.ANTI_PATTERNS, anti_pattern_id),
1584
+ )
1585
+ cursor = conn.execute(
1586
+ f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.ANTI_PATTERNS]} WHERE id = ?",
1587
+ (anti_pattern_id,),
1588
+ )
1589
+ if cursor.rowcount > 0:
1590
+ # Mark index as dirty for lazy rebuild on next search
1591
+ self._index_dirty[MemoryType.ANTI_PATTERNS] = True
1592
+ return True
1593
+ return False
1594
+
1595
+ # ==================== MIGRATION SUPPORT ====================
1596
+
1597
+ def _get_version_store(self):
1598
+ """Get or create the version store."""
1599
+ if self._version_store is None:
1600
+ from alma.storage.migrations.version_stores import SQLiteVersionStore
1601
+
1602
+ self._version_store = SQLiteVersionStore(self.db_path)
1603
+ return self._version_store
1604
+
1605
+ def _get_migration_runner(self):
1606
+ """Get or create the migration runner."""
1607
+ if self._migration_runner is None:
1608
+ from alma.storage.migrations.runner import MigrationRunner
1609
+ from alma.storage.migrations.versions import v1_0_0 # noqa: F401
1610
+
1611
+ self._migration_runner = MigrationRunner(
1612
+ version_store=self._get_version_store(),
1613
+ backend="sqlite",
1614
+ )
1615
+ return self._migration_runner
1616
+
1617
+ def _ensure_migrated(self) -> None:
1618
+ """Ensure database is migrated to latest version."""
1619
+ runner = self._get_migration_runner()
1620
+ if runner.needs_migration():
1621
+ with self._get_connection() as conn:
1622
+ applied = runner.migrate(conn)
1623
+ if applied:
1624
+ logger.info(f"Applied {len(applied)} migrations: {applied}")
1625
+
1626
+ def get_schema_version(self) -> Optional[str]:
1627
+ """Get the current schema version."""
1628
+ return self._get_version_store().get_current_version()
1629
+
1630
+ def get_migration_status(self) -> Dict[str, Any]:
1631
+ """Get migration status information."""
1632
+ runner = self._get_migration_runner()
1633
+ status = runner.get_status()
1634
+ status["migration_supported"] = True
1635
+ return status
1636
+
1637
+ def migrate(
1638
+ self,
1639
+ target_version: Optional[str] = None,
1640
+ dry_run: bool = False,
1641
+ ) -> List[str]:
1642
+ """
1643
+ Apply pending schema migrations.
1644
+
1645
+ Args:
1646
+ target_version: Optional target version (applies all if not specified)
1647
+ dry_run: If True, show what would be done without making changes
1648
+
1649
+ Returns:
1650
+ List of applied migration versions
1651
+ """
1652
+ runner = self._get_migration_runner()
1653
+ with self._get_connection() as conn:
1654
+ return runner.migrate(conn, target_version=target_version, dry_run=dry_run)
1655
+
1656
+ def rollback(
1657
+ self,
1658
+ target_version: str,
1659
+ dry_run: bool = False,
1660
+ ) -> List[str]:
1661
+ """
1662
+ Roll back schema to a previous version.
1663
+
1664
+ Args:
1665
+ target_version: Version to roll back to
1666
+ dry_run: If True, show what would be done without making changes
1667
+
1668
+ Returns:
1669
+ List of rolled back migration versions
1670
+ """
1671
+ runner = self._get_migration_runner()
1672
+ with self._get_connection() as conn:
1673
+ return runner.rollback(conn, target_version=target_version, dry_run=dry_run)
1674
+
1675
+ # ==================== SCOPE FILTER HELPER (v0.6.0+) ====================
1676
+
1677
+ def _apply_scope_filter(
1678
+ self,
1679
+ query: str,
1680
+ params: List[Any],
1681
+ scope_filter: Dict[str, Any],
1682
+ ) -> Tuple[str, List[Any]]:
1683
+ """
1684
+ Apply workflow scope filter to a query.
1685
+
1686
+ Note: For tables that don't have workflow columns (tenant_id, workflow_id,
1687
+ run_id, node_id), scope filtering is a no-op. The filter will only apply
1688
+ to workflow_outcomes table which has these columns.
1689
+
1690
+ Args:
1691
+ query: The SQL query string
1692
+ params: The query parameters
1693
+ scope_filter: Dict with keys: tenant_id, workflow_id, run_id, node_id
1694
+
1695
+ Returns:
1696
+ Tuple of (modified query, modified params)
1697
+ """
1698
+ # Note: Most ALMA tables don't have workflow columns yet.
1699
+ # This filter primarily applies to workflow_outcomes queries.
1700
+ # For other tables, we return query unchanged to maintain backwards compatibility.
1701
+ return query, params
1702
+
1703
+ # ==================== CHECKPOINT OPERATIONS (v0.6.0+) ====================
1704
+
1705
+ def save_checkpoint(self, checkpoint: "Checkpoint") -> str:
1706
+ """Save a workflow checkpoint."""
1707
+
1708
+ with self._get_connection() as conn:
1709
+ cursor = conn.cursor()
1710
+ cursor.execute(
1711
+ """
1712
+ INSERT OR REPLACE INTO checkpoints
1713
+ (id, run_id, node_id, state, sequence_number, branch_id,
1714
+ parent_checkpoint_id, state_hash, metadata, created_at)
1715
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1716
+ """,
1717
+ (
1718
+ checkpoint.id,
1719
+ checkpoint.run_id,
1720
+ checkpoint.node_id,
1721
+ json.dumps(checkpoint.state),
1722
+ checkpoint.sequence_number,
1723
+ checkpoint.branch_id,
1724
+ checkpoint.parent_checkpoint_id,
1725
+ checkpoint.state_hash,
1726
+ json.dumps(checkpoint.metadata) if checkpoint.metadata else None,
1727
+ checkpoint.created_at.isoformat(),
1728
+ ),
1729
+ )
1730
+ logger.debug(f"Saved checkpoint: {checkpoint.id}")
1731
+ return checkpoint.id
1732
+
1733
+ def get_checkpoint(self, checkpoint_id: str) -> Optional["Checkpoint"]:
1734
+ """Get a checkpoint by ID."""
1735
+ with self._get_connection() as conn:
1736
+ cursor = conn.cursor()
1737
+ cursor.execute(
1738
+ "SELECT * FROM checkpoints WHERE id = ?",
1739
+ (checkpoint_id,),
1740
+ )
1741
+ row = cursor.fetchone()
1742
+
1743
+ if row is None:
1744
+ return None
1745
+ return self._row_to_checkpoint(row)
1746
+
1747
+ def get_latest_checkpoint(
1748
+ self,
1749
+ run_id: str,
1750
+ branch_id: Optional[str] = None,
1751
+ ) -> Optional["Checkpoint"]:
1752
+ """Get the most recent checkpoint for a workflow run."""
1753
+ with self._get_connection() as conn:
1754
+ cursor = conn.cursor()
1755
+
1756
+ query = "SELECT * FROM checkpoints WHERE run_id = ?"
1757
+ params: List[Any] = [run_id]
1758
+
1759
+ if branch_id is not None:
1760
+ query += " AND branch_id = ?"
1761
+ params.append(branch_id)
1762
+
1763
+ query += " ORDER BY sequence_number DESC LIMIT 1"
1764
+
1765
+ cursor.execute(query, params)
1766
+ row = cursor.fetchone()
1767
+
1768
+ if row is None:
1769
+ return None
1770
+ return self._row_to_checkpoint(row)
1771
+
1772
+ def get_checkpoints_for_run(
1773
+ self,
1774
+ run_id: str,
1775
+ branch_id: Optional[str] = None,
1776
+ limit: int = 100,
1777
+ ) -> List["Checkpoint"]:
1778
+ """Get all checkpoints for a workflow run."""
1779
+ with self._get_connection() as conn:
1780
+ cursor = conn.cursor()
1781
+
1782
+ query = "SELECT * FROM checkpoints WHERE run_id = ?"
1783
+ params: List[Any] = [run_id]
1784
+
1785
+ if branch_id is not None:
1786
+ query += " AND branch_id = ?"
1787
+ params.append(branch_id)
1788
+
1789
+ query += " ORDER BY sequence_number ASC LIMIT ?"
1790
+ params.append(limit)
1791
+
1792
+ cursor.execute(query, params)
1793
+ rows = cursor.fetchall()
1794
+
1795
+ return [self._row_to_checkpoint(row) for row in rows]
1796
+
1797
+ def cleanup_checkpoints(
1798
+ self,
1799
+ run_id: str,
1800
+ keep_latest: int = 1,
1801
+ ) -> int:
1802
+ """Clean up old checkpoints for a completed run."""
1803
+ with self._get_connection() as conn:
1804
+ cursor = conn.cursor()
1805
+
1806
+ # Get IDs of checkpoints to keep
1807
+ cursor.execute(
1808
+ """
1809
+ SELECT id FROM checkpoints
1810
+ WHERE run_id = ?
1811
+ ORDER BY sequence_number DESC
1812
+ LIMIT ?
1813
+ """,
1814
+ (run_id, keep_latest),
1815
+ )
1816
+ keep_ids = [row["id"] for row in cursor.fetchall()]
1817
+
1818
+ if not keep_ids:
1819
+ return 0
1820
+
1821
+ # Delete all others
1822
+ placeholders = ",".join("?" * len(keep_ids))
1823
+ cursor.execute(
1824
+ f"""
1825
+ DELETE FROM checkpoints
1826
+ WHERE run_id = ? AND id NOT IN ({placeholders})
1827
+ """,
1828
+ [run_id] + keep_ids,
1829
+ )
1830
+ deleted = cursor.rowcount
1831
+
1832
+ logger.info(f"Cleaned up {deleted} checkpoints for run {run_id}")
1833
+ return deleted
1834
+
1835
+ def _row_to_checkpoint(self, row: sqlite3.Row) -> "Checkpoint":
1836
+ """Convert database row to Checkpoint."""
1837
+ from alma.workflow import Checkpoint
1838
+
1839
+ return Checkpoint(
1840
+ id=row["id"],
1841
+ run_id=row["run_id"],
1842
+ node_id=row["node_id"],
1843
+ state=json.loads(row["state"]) if row["state"] else {},
1844
+ sequence_number=row["sequence_number"] or 0,
1845
+ branch_id=row["branch_id"],
1846
+ parent_checkpoint_id=row["parent_checkpoint_id"],
1847
+ state_hash=row["state_hash"] or "",
1848
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1849
+ created_at=self._parse_datetime(row["created_at"])
1850
+ or datetime.now(timezone.utc),
1851
+ )
1852
+
1853
+ # ==================== WORKFLOW OUTCOME OPERATIONS (v0.6.0+) ====================
1854
+
1855
+ def save_workflow_outcome(self, outcome: "WorkflowOutcome") -> str:
1856
+ """Save a workflow outcome."""
1857
+ with self._get_connection() as conn:
1858
+ cursor = conn.cursor()
1859
+ cursor.execute(
1860
+ """
1861
+ INSERT OR REPLACE INTO workflow_outcomes
1862
+ (id, tenant_id, workflow_id, run_id, agent, project_id, result,
1863
+ summary, strategies_used, successful_patterns, failed_patterns,
1864
+ extracted_heuristics, extracted_anti_patterns, duration_seconds,
1865
+ node_count, error_message, metadata, created_at)
1866
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1867
+ """,
1868
+ (
1869
+ outcome.id,
1870
+ outcome.tenant_id,
1871
+ outcome.workflow_id,
1872
+ outcome.run_id,
1873
+ outcome.agent,
1874
+ outcome.project_id,
1875
+ outcome.result.value,
1876
+ outcome.summary,
1877
+ json.dumps(outcome.strategies_used),
1878
+ json.dumps(outcome.successful_patterns),
1879
+ json.dumps(outcome.failed_patterns),
1880
+ json.dumps(outcome.extracted_heuristics),
1881
+ json.dumps(outcome.extracted_anti_patterns),
1882
+ outcome.duration_seconds,
1883
+ outcome.node_count,
1884
+ outcome.error_message,
1885
+ json.dumps(outcome.metadata) if outcome.metadata else None,
1886
+ outcome.created_at.isoformat(),
1887
+ ),
1888
+ )
1889
+
1890
+ # Add embedding to index if present
1891
+ if outcome.embedding:
1892
+ self._add_to_index("workflow_outcomes", outcome.id, outcome.embedding)
1893
+
1894
+ logger.debug(f"Saved workflow outcome: {outcome.id}")
1895
+ return outcome.id
1896
+
1897
+ def get_workflow_outcome(self, outcome_id: str) -> Optional["WorkflowOutcome"]:
1898
+ """Get a workflow outcome by ID."""
1899
+ with self._get_connection() as conn:
1900
+ cursor = conn.cursor()
1901
+ cursor.execute(
1902
+ "SELECT * FROM workflow_outcomes WHERE id = ?",
1903
+ (outcome_id,),
1904
+ )
1905
+ row = cursor.fetchone()
1906
+
1907
+ if row is None:
1908
+ return None
1909
+ return self._row_to_workflow_outcome(row)
1910
+
1911
+ def get_workflow_outcomes(
1912
+ self,
1913
+ project_id: str,
1914
+ agent: Optional[str] = None,
1915
+ workflow_id: Optional[str] = None,
1916
+ embedding: Optional[List[float]] = None,
1917
+ top_k: int = 10,
1918
+ scope_filter: Optional[Dict[str, Any]] = None,
1919
+ ) -> List["WorkflowOutcome"]:
1920
+ """Get workflow outcomes with optional filtering."""
1921
+ candidate_ids = None
1922
+ if embedding:
1923
+ search_results = self._search_index(
1924
+ "workflow_outcomes", embedding, top_k * 2
1925
+ )
1926
+ candidate_ids = [id for id, _ in search_results]
1927
+
1928
+ with self._get_connection() as conn:
1929
+ cursor = conn.cursor()
1930
+
1931
+ query = "SELECT * FROM workflow_outcomes WHERE project_id = ?"
1932
+ params: List[Any] = [project_id]
1933
+
1934
+ if agent:
1935
+ query += " AND agent = ?"
1936
+ params.append(agent)
1937
+
1938
+ if workflow_id:
1939
+ query += " AND workflow_id = ?"
1940
+ params.append(workflow_id)
1941
+
1942
+ if candidate_ids is not None:
1943
+ placeholders = ",".join("?" * len(candidate_ids))
1944
+ query += f" AND id IN ({placeholders})"
1945
+ params.extend(candidate_ids)
1946
+
1947
+ # Apply scope filter for workflow columns
1948
+ if scope_filter:
1949
+ if scope_filter.get("tenant_id"):
1950
+ query += " AND tenant_id = ?"
1951
+ params.append(scope_filter["tenant_id"])
1952
+ if scope_filter.get("workflow_id"):
1953
+ query += " AND workflow_id = ?"
1954
+ params.append(scope_filter["workflow_id"])
1955
+ if scope_filter.get("run_id"):
1956
+ query += " AND run_id = ?"
1957
+ params.append(scope_filter["run_id"])
1958
+
1959
+ query += " ORDER BY created_at DESC LIMIT ?"
1960
+ params.append(top_k)
1961
+
1962
+ cursor.execute(query, params)
1963
+ rows = cursor.fetchall()
1964
+
1965
+ return [self._row_to_workflow_outcome(row) for row in rows]
1966
+
1967
+ def _row_to_workflow_outcome(self, row: sqlite3.Row) -> "WorkflowOutcome":
1968
+ """Convert database row to WorkflowOutcome."""
1969
+ from alma.workflow import WorkflowOutcome, WorkflowResult
1970
+
1971
+ return WorkflowOutcome(
1972
+ id=row["id"],
1973
+ tenant_id=row["tenant_id"],
1974
+ workflow_id=row["workflow_id"],
1975
+ run_id=row["run_id"],
1976
+ agent=row["agent"],
1977
+ project_id=row["project_id"],
1978
+ result=WorkflowResult(row["result"]),
1979
+ summary=row["summary"] or "",
1980
+ strategies_used=json.loads(row["strategies_used"])
1981
+ if row["strategies_used"]
1982
+ else [],
1983
+ successful_patterns=json.loads(row["successful_patterns"])
1984
+ if row["successful_patterns"]
1985
+ else [],
1986
+ failed_patterns=json.loads(row["failed_patterns"])
1987
+ if row["failed_patterns"]
1988
+ else [],
1989
+ extracted_heuristics=json.loads(row["extracted_heuristics"])
1990
+ if row["extracted_heuristics"]
1991
+ else [],
1992
+ extracted_anti_patterns=json.loads(row["extracted_anti_patterns"])
1993
+ if row["extracted_anti_patterns"]
1994
+ else [],
1995
+ duration_seconds=row["duration_seconds"],
1996
+ node_count=row["node_count"],
1997
+ error_message=row["error_message"],
1998
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1999
+ created_at=self._parse_datetime(row["created_at"])
2000
+ or datetime.now(timezone.utc),
2001
+ )
2002
+
2003
+ # ==================== ARTIFACT LINK OPERATIONS (v0.6.0+) ====================
2004
+
2005
+ def save_artifact_link(self, artifact_ref: "ArtifactRef") -> str:
2006
+ """Save an artifact reference linked to a memory."""
2007
+ with self._get_connection() as conn:
2008
+ cursor = conn.cursor()
2009
+ cursor.execute(
2010
+ """
2011
+ INSERT OR REPLACE INTO artifact_links
2012
+ (id, memory_id, artifact_type, storage_url, filename,
2013
+ mime_type, size_bytes, checksum, metadata, created_at)
2014
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2015
+ """,
2016
+ (
2017
+ artifact_ref.id,
2018
+ artifact_ref.memory_id,
2019
+ artifact_ref.artifact_type.value,
2020
+ artifact_ref.storage_url,
2021
+ artifact_ref.filename,
2022
+ artifact_ref.mime_type,
2023
+ artifact_ref.size_bytes,
2024
+ artifact_ref.checksum,
2025
+ json.dumps(artifact_ref.metadata)
2026
+ if artifact_ref.metadata
2027
+ else None,
2028
+ artifact_ref.created_at.isoformat(),
2029
+ ),
2030
+ )
2031
+ logger.debug(f"Saved artifact link: {artifact_ref.id}")
2032
+ return artifact_ref.id
2033
+
2034
+ def get_artifact_links(self, memory_id: str) -> List["ArtifactRef"]:
2035
+ """Get all artifact references linked to a memory."""
2036
+ with self._get_connection() as conn:
2037
+ cursor = conn.cursor()
2038
+ cursor.execute(
2039
+ "SELECT * FROM artifact_links WHERE memory_id = ?",
2040
+ (memory_id,),
2041
+ )
2042
+ rows = cursor.fetchall()
2043
+
2044
+ return [self._row_to_artifact_ref(row) for row in rows]
2045
+
2046
+ def delete_artifact_link(self, artifact_id: str) -> bool:
2047
+ """Delete an artifact reference."""
2048
+ with self._get_connection() as conn:
2049
+ cursor = conn.cursor()
2050
+ cursor.execute(
2051
+ "DELETE FROM artifact_links WHERE id = ?",
2052
+ (artifact_id,),
2053
+ )
2054
+ return cursor.rowcount > 0
2055
+
2056
+ def _row_to_artifact_ref(self, row: sqlite3.Row) -> "ArtifactRef":
2057
+ """Convert database row to ArtifactRef."""
2058
+ from alma.workflow import ArtifactRef, ArtifactType
2059
+
2060
+ return ArtifactRef(
2061
+ id=row["id"],
2062
+ memory_id=row["memory_id"],
2063
+ artifact_type=ArtifactType(row["artifact_type"]),
2064
+ storage_url=row["storage_url"],
2065
+ filename=row["filename"],
2066
+ mime_type=row["mime_type"],
2067
+ size_bytes=row["size_bytes"],
2068
+ checksum=row["checksum"],
2069
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
2070
+ created_at=self._parse_datetime(row["created_at"])
2071
+ or datetime.now(timezone.utc),
2072
+ )
2073
+
2074
+ # ==================== SESSION HANDOFFS ====================
2075
+
2076
+ def save_session_handoff(self, handoff: "SessionHandoff") -> str:
2077
+ """Save a session handoff for persistence."""
2078
+
2079
+ with self._get_connection() as conn:
2080
+ cursor = conn.cursor()
2081
+ cursor.execute(
2082
+ """
2083
+ INSERT OR REPLACE INTO session_handoffs (
2084
+ id, project_id, agent, session_id, last_action, last_outcome,
2085
+ current_goal, key_decisions, active_files, blockers, next_steps,
2086
+ test_status, confidence_level, risk_flags, session_start,
2087
+ session_end, duration_ms, metadata, created_at
2088
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2089
+ """,
2090
+ (
2091
+ handoff.id,
2092
+ handoff.project_id,
2093
+ handoff.agent,
2094
+ handoff.session_id,
2095
+ handoff.last_action,
2096
+ handoff.last_outcome,
2097
+ handoff.current_goal,
2098
+ json.dumps(handoff.key_decisions),
2099
+ json.dumps(handoff.active_files),
2100
+ json.dumps(handoff.blockers),
2101
+ json.dumps(handoff.next_steps),
2102
+ json.dumps(handoff.test_status),
2103
+ handoff.confidence_level,
2104
+ json.dumps(handoff.risk_flags),
2105
+ handoff.session_start.isoformat()
2106
+ if handoff.session_start
2107
+ else None,
2108
+ handoff.session_end.isoformat() if handoff.session_end else None,
2109
+ handoff.duration_ms,
2110
+ json.dumps(handoff.metadata),
2111
+ handoff.created_at.isoformat(),
2112
+ ),
2113
+ )
2114
+ return handoff.id
2115
+
2116
+ def get_session_handoffs(
2117
+ self,
2118
+ project_id: str,
2119
+ agent: str,
2120
+ limit: int = 50,
2121
+ ) -> List["SessionHandoff"]:
2122
+ """Get session handoffs for an agent, most recent first."""
2123
+ with self._get_connection() as conn:
2124
+ cursor = conn.cursor()
2125
+ cursor.execute(
2126
+ """
2127
+ SELECT * FROM session_handoffs
2128
+ WHERE project_id = ? AND agent = ?
2129
+ ORDER BY created_at DESC
2130
+ LIMIT ?
2131
+ """,
2132
+ (project_id, agent, limit),
2133
+ )
2134
+ rows = cursor.fetchall()
2135
+ return [self._row_to_session_handoff(row) for row in rows]
2136
+
2137
+ def get_latest_session_handoff(
2138
+ self,
2139
+ project_id: str,
2140
+ agent: str,
2141
+ ) -> Optional["SessionHandoff"]:
2142
+ """Get the most recent session handoff for an agent."""
2143
+ handoffs = self.get_session_handoffs(project_id, agent, limit=1)
2144
+ return handoffs[0] if handoffs else None
2145
+
2146
+ def delete_session_handoffs(
2147
+ self,
2148
+ project_id: str,
2149
+ agent: Optional[str] = None,
2150
+ ) -> int:
2151
+ """Delete session handoffs."""
2152
+ with self._get_connection() as conn:
2153
+ cursor = conn.cursor()
2154
+ if agent:
2155
+ cursor.execute(
2156
+ "DELETE FROM session_handoffs WHERE project_id = ? AND agent = ?",
2157
+ (project_id, agent),
2158
+ )
2159
+ else:
2160
+ cursor.execute(
2161
+ "DELETE FROM session_handoffs WHERE project_id = ?",
2162
+ (project_id,),
2163
+ )
2164
+ return cursor.rowcount
2165
+
2166
+ def _row_to_session_handoff(self, row: sqlite3.Row) -> "SessionHandoff":
2167
+ """Convert database row to SessionHandoff."""
2168
+ from alma.session import SessionHandoff
2169
+
2170
+ return SessionHandoff(
2171
+ id=row["id"],
2172
+ project_id=row["project_id"],
2173
+ agent=row["agent"],
2174
+ session_id=row["session_id"],
2175
+ last_action=row["last_action"],
2176
+ last_outcome=row["last_outcome"],
2177
+ current_goal=row["current_goal"] or "",
2178
+ key_decisions=json.loads(row["key_decisions"])
2179
+ if row["key_decisions"]
2180
+ else [],
2181
+ active_files=json.loads(row["active_files"]) if row["active_files"] else [],
2182
+ blockers=json.loads(row["blockers"]) if row["blockers"] else [],
2183
+ next_steps=json.loads(row["next_steps"]) if row["next_steps"] else [],
2184
+ test_status=json.loads(row["test_status"]) if row["test_status"] else {},
2185
+ confidence_level=row["confidence_level"] or 0.5,
2186
+ risk_flags=json.loads(row["risk_flags"]) if row["risk_flags"] else [],
2187
+ session_start=self._parse_datetime(row["session_start"])
2188
+ or datetime.now(timezone.utc),
2189
+ session_end=self._parse_datetime(row["session_end"]),
2190
+ duration_ms=row["duration_ms"] or 0,
2191
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
2192
+ created_at=self._parse_datetime(row["created_at"])
2193
+ or datetime.now(timezone.utc),
2194
+ )
2195
+
2196
+ # ==================== MEMORY STRENGTH OPERATIONS (v0.7.0+) ====================
2197
+
2198
+ def save_memory_strength(self, strength: "MemoryStrength") -> str:
2199
+ """
2200
+ Save or update a memory strength record.
2201
+
2202
+ Args:
2203
+ strength: MemoryStrength instance to save
2204
+
2205
+ Returns:
2206
+ The memory ID
2207
+ """
2208
+ with self._get_connection() as conn:
2209
+ cursor = conn.cursor()
2210
+
2211
+ # Check if record exists
2212
+ cursor.execute(
2213
+ "SELECT 1 FROM memory_strength WHERE memory_id = ?",
2214
+ (strength.memory_id,),
2215
+ )
2216
+ exists = cursor.fetchone() is not None
2217
+
2218
+ reinforcement_events_json = json.dumps(
2219
+ [r.isoformat() for r in strength.reinforcement_events]
2220
+ )
2221
+
2222
+ if exists:
2223
+ # Update existing record
2224
+ cursor.execute(
2225
+ """
2226
+ UPDATE memory_strength SET
2227
+ memory_type = ?,
2228
+ initial_strength = ?,
2229
+ decay_half_life_days = ?,
2230
+ last_accessed = ?,
2231
+ access_count = ?,
2232
+ explicit_importance = ?,
2233
+ reinforcement_events = ?
2234
+ WHERE memory_id = ?
2235
+ """,
2236
+ (
2237
+ strength.memory_type,
2238
+ strength.initial_strength,
2239
+ strength.decay_half_life_days,
2240
+ strength.last_accessed.isoformat(),
2241
+ strength.access_count,
2242
+ strength.explicit_importance,
2243
+ reinforcement_events_json,
2244
+ strength.memory_id,
2245
+ ),
2246
+ )
2247
+ else:
2248
+ # Insert new record
2249
+ # Try to extract project_id and agent from the memory
2250
+ project_id = None
2251
+ agent = None
2252
+
2253
+ # Look up the memory to get project_id and agent
2254
+ for table in [
2255
+ "heuristics",
2256
+ "outcomes",
2257
+ "domain_knowledge",
2258
+ "anti_patterns",
2259
+ ]:
2260
+ cursor.execute(
2261
+ f"SELECT project_id, agent FROM {table} WHERE id = ?",
2262
+ (strength.memory_id,),
2263
+ )
2264
+ row = cursor.fetchone()
2265
+ if row:
2266
+ project_id = row["project_id"]
2267
+ agent = row["agent"]
2268
+ break
2269
+
2270
+ cursor.execute(
2271
+ """
2272
+ INSERT INTO memory_strength (
2273
+ memory_id, memory_type, project_id, agent,
2274
+ initial_strength, decay_half_life_days,
2275
+ created_at, last_accessed, access_count,
2276
+ explicit_importance, reinforcement_events
2277
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2278
+ """,
2279
+ (
2280
+ strength.memory_id,
2281
+ strength.memory_type,
2282
+ project_id,
2283
+ agent,
2284
+ strength.initial_strength,
2285
+ strength.decay_half_life_days,
2286
+ strength.created_at.isoformat(),
2287
+ strength.last_accessed.isoformat(),
2288
+ strength.access_count,
2289
+ strength.explicit_importance,
2290
+ reinforcement_events_json,
2291
+ ),
2292
+ )
2293
+
2294
+ return strength.memory_id
2295
+
2296
+ def get_memory_strength(self, memory_id: str) -> Optional["MemoryStrength"]:
2297
+ """
2298
+ Get a memory strength record by memory ID.
2299
+
2300
+ Args:
2301
+ memory_id: The memory ID to look up
2302
+
2303
+ Returns:
2304
+ MemoryStrength instance, or None if not found
2305
+ """
2306
+ with self._get_connection() as conn:
2307
+ cursor = conn.cursor()
2308
+ cursor.execute(
2309
+ "SELECT * FROM memory_strength WHERE memory_id = ?",
2310
+ (memory_id,),
2311
+ )
2312
+ row = cursor.fetchone()
2313
+
2314
+ if row is None:
2315
+ return None
2316
+
2317
+ return self._row_to_memory_strength(row)
2318
+
2319
+ def get_all_memory_strengths(
2320
+ self,
2321
+ project_id: str,
2322
+ agent: Optional[str] = None,
2323
+ ) -> List["MemoryStrength"]:
2324
+ """
2325
+ Get all memory strength records for a project/agent.
2326
+
2327
+ Args:
2328
+ project_id: Project to query
2329
+ agent: Optional agent filter
2330
+
2331
+ Returns:
2332
+ List of MemoryStrength instances
2333
+ """
2334
+ with self._get_connection() as conn:
2335
+ cursor = conn.cursor()
2336
+
2337
+ if agent:
2338
+ cursor.execute(
2339
+ """
2340
+ SELECT * FROM memory_strength
2341
+ WHERE project_id = ? AND agent = ?
2342
+ """,
2343
+ (project_id, agent),
2344
+ )
2345
+ else:
2346
+ cursor.execute(
2347
+ """
2348
+ SELECT * FROM memory_strength
2349
+ WHERE project_id = ?
2350
+ """,
2351
+ (project_id,),
2352
+ )
2353
+
2354
+ rows = cursor.fetchall()
2355
+ return [self._row_to_memory_strength(row) for row in rows]
2356
+
2357
+ def delete_memory_strength(self, memory_id: str) -> bool:
2358
+ """
2359
+ Delete a memory strength record.
2360
+
2361
+ Args:
2362
+ memory_id: The memory ID
2363
+
2364
+ Returns:
2365
+ True if deleted, False if not found
2366
+ """
2367
+ with self._get_connection() as conn:
2368
+ cursor = conn.cursor()
2369
+ cursor.execute(
2370
+ "DELETE FROM memory_strength WHERE memory_id = ?",
2371
+ (memory_id,),
2372
+ )
2373
+ return cursor.rowcount > 0
2374
+
2375
+ def _row_to_memory_strength(self, row: sqlite3.Row) -> "MemoryStrength":
2376
+ """Convert database row to MemoryStrength."""
2377
+ from alma.learning.decay import MemoryStrength
2378
+
2379
+ reinforcement_events = []
2380
+ if row["reinforcement_events"]:
2381
+ events_json = json.loads(row["reinforcement_events"])
2382
+ for event in events_json:
2383
+ if isinstance(event, str):
2384
+ dt_str = event.replace("Z", "+00:00")
2385
+ reinforcement_events.append(datetime.fromisoformat(dt_str))
2386
+
2387
+ return MemoryStrength(
2388
+ memory_id=row["memory_id"],
2389
+ memory_type=row["memory_type"] or "unknown",
2390
+ initial_strength=row["initial_strength"] or 1.0,
2391
+ decay_half_life_days=row["decay_half_life_days"] or 30,
2392
+ created_at=self._parse_datetime(row["created_at"])
2393
+ or datetime.now(timezone.utc),
2394
+ last_accessed=self._parse_datetime(row["last_accessed"])
2395
+ or datetime.now(timezone.utc),
2396
+ access_count=row["access_count"] or 0,
2397
+ reinforcement_events=reinforcement_events,
2398
+ explicit_importance=row["explicit_importance"] or 0.5,
2399
+ )
2400
+
2401
+ # ==================== ARCHIVE OPERATIONS (v0.7.0+) ====================
2402
+
2403
+ def archive_memory(
2404
+ self,
2405
+ memory_id: str,
2406
+ memory_type: str,
2407
+ reason: str,
2408
+ final_strength: float,
2409
+ ) -> "ArchivedMemory":
2410
+ """
2411
+ Archive a memory before deletion.
2412
+
2413
+ Captures full memory data including content, embedding, and metadata
2414
+ for potential future recovery or compliance auditing.
2415
+
2416
+ Args:
2417
+ memory_id: ID of the memory to archive
2418
+ memory_type: Type of memory (heuristic, outcome, etc.)
2419
+ reason: Why being archived (decay, manual, consolidation, etc.)
2420
+ final_strength: Memory strength at time of archival
2421
+
2422
+ Returns:
2423
+ ArchivedMemory instance
2424
+ """
2425
+ from alma.storage.archive import ArchivedMemory
2426
+
2427
+ with self._get_connection() as conn:
2428
+ cursor = conn.cursor()
2429
+
2430
+ # Get the memory data based on type
2431
+ table_map = {
2432
+ "heuristic": "heuristics",
2433
+ "outcome": "outcomes",
2434
+ "domain_knowledge": "domain_knowledge",
2435
+ "anti_pattern": "anti_patterns",
2436
+ "preference": "preferences",
2437
+ }
2438
+
2439
+ table_name = table_map.get(memory_type)
2440
+ if not table_name:
2441
+ raise ValueError(f"Unknown memory type: {memory_type}")
2442
+
2443
+ cursor.execute(f"SELECT * FROM {table_name} WHERE id = ?", (memory_id,))
2444
+ row = cursor.fetchone()
2445
+
2446
+ if row is None:
2447
+ raise ValueError(f"Memory not found: {memory_id}")
2448
+
2449
+ # Extract content and metadata from the memory
2450
+ content = self._extract_memory_content(memory_type, row)
2451
+ metadata = json.loads(row["metadata"]) if row["metadata"] else {}
2452
+
2453
+ # Get the embedding if available
2454
+ cursor.execute(
2455
+ "SELECT embedding FROM embeddings WHERE memory_type = ? AND memory_id = ?",
2456
+ (memory_type, memory_id),
2457
+ )
2458
+ embedding_row = cursor.fetchone()
2459
+ embedding = None
2460
+ if embedding_row and embedding_row["embedding"]:
2461
+ embedding = np.frombuffer(
2462
+ embedding_row["embedding"], dtype=np.float32
2463
+ ).tolist()
2464
+
2465
+ # Parse original creation date - row is sqlite3.Row, need safe access
2466
+ row_keys = row.keys()
2467
+ created_at_str = (
2468
+ row["created_at"] if "created_at" in row_keys else None
2469
+ ) or (row["timestamp"] if "timestamp" in row_keys else None)
2470
+ original_created_at = (
2471
+ self._parse_datetime(created_at_str)
2472
+ if created_at_str
2473
+ else datetime.now(timezone.utc)
2474
+ )
2475
+
2476
+ # Get project_id and agent
2477
+ project_id = row["project_id"] if "project_id" in row_keys else ""
2478
+ agent = row["agent"] if "agent" in row_keys else ""
2479
+
2480
+ # If it's a preference, use user_id as project_id
2481
+ if memory_type == "preference":
2482
+ project_id = row["user_id"] if "user_id" in row_keys else ""
2483
+ agent = "user"
2484
+
2485
+ # Create the archived memory
2486
+ archived = ArchivedMemory.create(
2487
+ original_id=memory_id,
2488
+ memory_type=memory_type,
2489
+ content=content,
2490
+ project_id=project_id,
2491
+ agent=agent,
2492
+ archive_reason=reason,
2493
+ final_strength=final_strength,
2494
+ original_created_at=original_created_at,
2495
+ embedding=embedding,
2496
+ metadata=metadata,
2497
+ )
2498
+
2499
+ # Serialize embedding for storage
2500
+ embedding_blob = None
2501
+ if archived.embedding:
2502
+ embedding_blob = np.array(
2503
+ archived.embedding, dtype=np.float32
2504
+ ).tobytes()
2505
+
2506
+ # Insert into archive table
2507
+ cursor.execute(
2508
+ """
2509
+ INSERT INTO memory_archive (
2510
+ id, original_id, memory_type, content, embedding,
2511
+ metadata, original_created_at, archived_at, archive_reason,
2512
+ final_strength, project_id, agent, restored, restored_at, restored_as
2513
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2514
+ """,
2515
+ (
2516
+ archived.id,
2517
+ archived.original_id,
2518
+ archived.memory_type,
2519
+ archived.content,
2520
+ embedding_blob,
2521
+ json.dumps(archived.metadata),
2522
+ archived.original_created_at.isoformat(),
2523
+ archived.archived_at.isoformat(),
2524
+ archived.archive_reason,
2525
+ archived.final_strength,
2526
+ archived.project_id,
2527
+ archived.agent,
2528
+ 0, # not restored
2529
+ None,
2530
+ None,
2531
+ ),
2532
+ )
2533
+
2534
+ return archived
2535
+
2536
+ def _extract_memory_content(self, memory_type: str, row: sqlite3.Row) -> str:
2537
+ """Extract the main content from a memory row as JSON."""
2538
+ if memory_type == "heuristic":
2539
+ return json.dumps(
2540
+ {
2541
+ "condition": row["condition"],
2542
+ "strategy": row["strategy"],
2543
+ "confidence": row["confidence"],
2544
+ "occurrence_count": row["occurrence_count"],
2545
+ "success_count": row["success_count"],
2546
+ }
2547
+ )
2548
+ elif memory_type == "outcome":
2549
+ return json.dumps(
2550
+ {
2551
+ "task_type": row["task_type"],
2552
+ "task_description": row["task_description"],
2553
+ "success": bool(row["success"]),
2554
+ "strategy_used": row["strategy_used"],
2555
+ "duration_ms": row["duration_ms"],
2556
+ "error_message": row["error_message"],
2557
+ "user_feedback": row["user_feedback"],
2558
+ }
2559
+ )
2560
+ elif memory_type == "domain_knowledge":
2561
+ return json.dumps(
2562
+ {
2563
+ "domain": row["domain"],
2564
+ "fact": row["fact"],
2565
+ "source": row["source"],
2566
+ "confidence": row["confidence"],
2567
+ }
2568
+ )
2569
+ elif memory_type == "anti_pattern":
2570
+ return json.dumps(
2571
+ {
2572
+ "pattern": row["pattern"],
2573
+ "why_bad": row["why_bad"],
2574
+ "better_alternative": row["better_alternative"],
2575
+ "occurrence_count": row["occurrence_count"],
2576
+ }
2577
+ )
2578
+ elif memory_type == "preference":
2579
+ return json.dumps(
2580
+ {
2581
+ "category": row["category"],
2582
+ "preference": row["preference"],
2583
+ "source": row["source"],
2584
+ "confidence": row["confidence"],
2585
+ }
2586
+ )
2587
+ else:
2588
+ return json.dumps(dict(row))
2589
+
2590
+ def get_archive(self, archive_id: str) -> Optional["ArchivedMemory"]:
2591
+ """
2592
+ Get an archived memory by its archive ID.
2593
+
2594
+ Args:
2595
+ archive_id: The archive ID
2596
+
2597
+ Returns:
2598
+ ArchivedMemory instance, or None if not found
2599
+ """
2600
+ with self._get_connection() as conn:
2601
+ cursor = conn.cursor()
2602
+ cursor.execute(
2603
+ "SELECT * FROM memory_archive WHERE id = ?",
2604
+ (archive_id,),
2605
+ )
2606
+ row = cursor.fetchone()
2607
+
2608
+ if row is None:
2609
+ return None
2610
+
2611
+ return self._row_to_archived_memory(row)
2612
+
2613
+ def list_archives(
2614
+ self,
2615
+ project_id: str,
2616
+ agent: Optional[str] = None,
2617
+ reason: Optional[str] = None,
2618
+ memory_type: Optional[str] = None,
2619
+ older_than: Optional[datetime] = None,
2620
+ younger_than: Optional[datetime] = None,
2621
+ include_restored: bool = False,
2622
+ limit: int = 100,
2623
+ ) -> List["ArchivedMemory"]:
2624
+ """
2625
+ List archived memories with filtering.
2626
+
2627
+ Args:
2628
+ project_id: Project to query
2629
+ agent: Optional agent filter
2630
+ reason: Optional archive reason filter
2631
+ memory_type: Optional memory type filter
2632
+ older_than: Optional filter for archives older than this time
2633
+ younger_than: Optional filter for archives younger than this time
2634
+ include_restored: Whether to include archives that have been restored
2635
+ limit: Maximum number of archives to return
2636
+
2637
+ Returns:
2638
+ List of ArchivedMemory instances
2639
+ """
2640
+ with self._get_connection() as conn:
2641
+ cursor = conn.cursor()
2642
+
2643
+ conditions = ["project_id = ?"]
2644
+ params: List[Any] = [project_id]
2645
+
2646
+ if agent:
2647
+ conditions.append("agent = ?")
2648
+ params.append(agent)
2649
+
2650
+ if reason:
2651
+ conditions.append("archive_reason = ?")
2652
+ params.append(reason)
2653
+
2654
+ if memory_type:
2655
+ conditions.append("memory_type = ?")
2656
+ params.append(memory_type)
2657
+
2658
+ if older_than:
2659
+ conditions.append("archived_at < ?")
2660
+ params.append(older_than.isoformat())
2661
+
2662
+ if younger_than:
2663
+ conditions.append("archived_at > ?")
2664
+ params.append(younger_than.isoformat())
2665
+
2666
+ if not include_restored:
2667
+ conditions.append("restored = 0")
2668
+
2669
+ where_clause = " AND ".join(conditions)
2670
+ params.append(limit)
2671
+
2672
+ cursor.execute(
2673
+ f"""
2674
+ SELECT * FROM memory_archive
2675
+ WHERE {where_clause}
2676
+ ORDER BY archived_at DESC
2677
+ LIMIT ?
2678
+ """,
2679
+ params,
2680
+ )
2681
+
2682
+ rows = cursor.fetchall()
2683
+ return [self._row_to_archived_memory(row) for row in rows]
2684
+
2685
+ def restore_from_archive(self, archive_id: str) -> str:
2686
+ """
2687
+ Restore an archived memory, creating a new memory from archive data.
2688
+
2689
+ The original archive is marked as restored but retained for audit purposes.
2690
+
2691
+ Args:
2692
+ archive_id: The archive ID to restore
2693
+
2694
+ Returns:
2695
+ New memory ID of the restored memory
2696
+
2697
+ Raises:
2698
+ ValueError: If archive not found or already restored
2699
+ """
2700
+ import uuid
2701
+
2702
+ with self._get_connection() as conn:
2703
+ cursor = conn.cursor()
2704
+
2705
+ # Get the archive
2706
+ cursor.execute(
2707
+ "SELECT * FROM memory_archive WHERE id = ?",
2708
+ (archive_id,),
2709
+ )
2710
+ row = cursor.fetchone()
2711
+
2712
+ if row is None:
2713
+ raise ValueError(f"Archive not found: {archive_id}")
2714
+
2715
+ if row["restored"]:
2716
+ raise ValueError(f"Archive already restored as: {row['restored_as']}")
2717
+
2718
+ archived = self._row_to_archived_memory(row)
2719
+ content = json.loads(archived.content)
2720
+
2721
+ # Generate new memory ID
2722
+ new_id = f"{archived.memory_type[:3]}-{uuid.uuid4().hex[:12]}"
2723
+
2724
+ # Restore based on memory type
2725
+ if archived.memory_type == "heuristic":
2726
+ cursor.execute(
2727
+ """
2728
+ INSERT INTO heuristics (
2729
+ id, agent, project_id, condition, strategy, confidence,
2730
+ occurrence_count, success_count, last_validated, created_at, metadata
2731
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2732
+ """,
2733
+ (
2734
+ new_id,
2735
+ archived.agent,
2736
+ archived.project_id,
2737
+ content.get("condition", ""),
2738
+ content.get("strategy", ""),
2739
+ content.get("confidence", 0.5),
2740
+ content.get("occurrence_count", 1),
2741
+ content.get("success_count", 0),
2742
+ datetime.now(timezone.utc).isoformat(),
2743
+ datetime.now(timezone.utc).isoformat(),
2744
+ json.dumps(archived.metadata),
2745
+ ),
2746
+ )
2747
+ elif archived.memory_type == "outcome":
2748
+ cursor.execute(
2749
+ """
2750
+ INSERT INTO outcomes (
2751
+ id, agent, project_id, task_type, task_description, success,
2752
+ strategy_used, duration_ms, error_message, user_feedback,
2753
+ timestamp, metadata
2754
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2755
+ """,
2756
+ (
2757
+ new_id,
2758
+ archived.agent,
2759
+ archived.project_id,
2760
+ content.get("task_type"),
2761
+ content.get("task_description", ""),
2762
+ 1 if content.get("success") else 0,
2763
+ content.get("strategy_used"),
2764
+ content.get("duration_ms"),
2765
+ content.get("error_message"),
2766
+ content.get("user_feedback"),
2767
+ datetime.now(timezone.utc).isoformat(),
2768
+ json.dumps(archived.metadata),
2769
+ ),
2770
+ )
2771
+ elif archived.memory_type == "domain_knowledge":
2772
+ cursor.execute(
2773
+ """
2774
+ INSERT INTO domain_knowledge (
2775
+ id, agent, project_id, domain, fact, source, confidence,
2776
+ last_verified, metadata
2777
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
2778
+ """,
2779
+ (
2780
+ new_id,
2781
+ archived.agent,
2782
+ archived.project_id,
2783
+ content.get("domain"),
2784
+ content.get("fact", ""),
2785
+ content.get("source"),
2786
+ content.get("confidence", 1.0),
2787
+ datetime.now(timezone.utc).isoformat(),
2788
+ json.dumps(archived.metadata),
2789
+ ),
2790
+ )
2791
+ elif archived.memory_type == "anti_pattern":
2792
+ cursor.execute(
2793
+ """
2794
+ INSERT INTO anti_patterns (
2795
+ id, agent, project_id, pattern, why_bad, better_alternative,
2796
+ occurrence_count, last_seen, created_at, metadata
2797
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2798
+ """,
2799
+ (
2800
+ new_id,
2801
+ archived.agent,
2802
+ archived.project_id,
2803
+ content.get("pattern", ""),
2804
+ content.get("why_bad"),
2805
+ content.get("better_alternative"),
2806
+ content.get("occurrence_count", 1),
2807
+ datetime.now(timezone.utc).isoformat(),
2808
+ datetime.now(timezone.utc).isoformat(),
2809
+ json.dumps(archived.metadata),
2810
+ ),
2811
+ )
2812
+ elif archived.memory_type == "preference":
2813
+ cursor.execute(
2814
+ """
2815
+ INSERT INTO preferences (
2816
+ id, user_id, category, preference, source, confidence,
2817
+ timestamp, metadata
2818
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
2819
+ """,
2820
+ (
2821
+ new_id,
2822
+ archived.project_id, # For preferences, project_id is user_id
2823
+ content.get("category"),
2824
+ content.get("preference", ""),
2825
+ content.get("source"),
2826
+ content.get("confidence", 1.0),
2827
+ datetime.now(timezone.utc).isoformat(),
2828
+ json.dumps(archived.metadata),
2829
+ ),
2830
+ )
2831
+ else:
2832
+ raise ValueError(f"Cannot restore memory type: {archived.memory_type}")
2833
+
2834
+ # Restore embedding if available
2835
+ if archived.embedding:
2836
+ embedding_blob = np.array(
2837
+ archived.embedding, dtype=np.float32
2838
+ ).tobytes()
2839
+ cursor.execute(
2840
+ """
2841
+ INSERT INTO embeddings (memory_type, memory_id, embedding)
2842
+ VALUES (?, ?, ?)
2843
+ """,
2844
+ (archived.memory_type, new_id, embedding_blob),
2845
+ )
2846
+
2847
+ # Mark archive as restored
2848
+ cursor.execute(
2849
+ """
2850
+ UPDATE memory_archive
2851
+ SET restored = 1, restored_at = ?, restored_as = ?
2852
+ WHERE id = ?
2853
+ """,
2854
+ (datetime.now(timezone.utc).isoformat(), new_id, archive_id),
2855
+ )
2856
+
2857
+ return new_id
2858
+
2859
+ def purge_archives(
2860
+ self,
2861
+ older_than: datetime,
2862
+ project_id: Optional[str] = None,
2863
+ reason: Optional[str] = None,
2864
+ ) -> int:
2865
+ """
2866
+ Permanently delete archived memories.
2867
+
2868
+ This is a destructive operation - archives cannot be recovered after purging.
2869
+
2870
+ Args:
2871
+ older_than: Delete archives older than this datetime
2872
+ project_id: Optional project filter
2873
+ reason: Optional reason filter
2874
+
2875
+ Returns:
2876
+ Number of archives permanently deleted
2877
+ """
2878
+ with self._get_connection() as conn:
2879
+ cursor = conn.cursor()
2880
+
2881
+ conditions = ["archived_at < ?"]
2882
+ params: List[Any] = [older_than.isoformat()]
2883
+
2884
+ if project_id:
2885
+ conditions.append("project_id = ?")
2886
+ params.append(project_id)
2887
+
2888
+ if reason:
2889
+ conditions.append("archive_reason = ?")
2890
+ params.append(reason)
2891
+
2892
+ where_clause = " AND ".join(conditions)
2893
+
2894
+ cursor.execute(
2895
+ f"DELETE FROM memory_archive WHERE {where_clause}",
2896
+ params,
2897
+ )
2898
+
2899
+ return cursor.rowcount
2900
+
2901
+ def get_archive_stats(
2902
+ self,
2903
+ project_id: str,
2904
+ agent: Optional[str] = None,
2905
+ ) -> Dict[str, Any]:
2906
+ """
2907
+ Get statistics about archived memories.
2908
+
2909
+ Args:
2910
+ project_id: Project to query
2911
+ agent: Optional agent filter
2912
+
2913
+ Returns:
2914
+ Dict with archive statistics (counts, by reason, by type, etc.)
2915
+ """
2916
+ from alma.storage.archive import ArchiveStats
2917
+
2918
+ with self._get_connection() as conn:
2919
+ cursor = conn.cursor()
2920
+
2921
+ # Build base filter
2922
+ base_conditions = ["project_id = ?"]
2923
+ base_params: List[Any] = [project_id]
2924
+
2925
+ if agent:
2926
+ base_conditions.append("agent = ?")
2927
+ base_params.append(agent)
2928
+
2929
+ base_where = " AND ".join(base_conditions)
2930
+
2931
+ # Total count
2932
+ cursor.execute(
2933
+ f"SELECT COUNT(*) as cnt FROM memory_archive WHERE {base_where}",
2934
+ base_params,
2935
+ )
2936
+ total_count = cursor.fetchone()["cnt"]
2937
+
2938
+ # Restored count
2939
+ cursor.execute(
2940
+ f"SELECT COUNT(*) as cnt FROM memory_archive WHERE {base_where} AND restored = 1",
2941
+ base_params,
2942
+ )
2943
+ restored_count = cursor.fetchone()["cnt"]
2944
+
2945
+ # Count by reason
2946
+ cursor.execute(
2947
+ f"""
2948
+ SELECT archive_reason, COUNT(*) as cnt
2949
+ FROM memory_archive
2950
+ WHERE {base_where}
2951
+ GROUP BY archive_reason
2952
+ """,
2953
+ base_params,
2954
+ )
2955
+ by_reason = {row["archive_reason"]: row["cnt"] for row in cursor.fetchall()}
2956
+
2957
+ # Count by type
2958
+ cursor.execute(
2959
+ f"""
2960
+ SELECT memory_type, COUNT(*) as cnt
2961
+ FROM memory_archive
2962
+ WHERE {base_where}
2963
+ GROUP BY memory_type
2964
+ """,
2965
+ base_params,
2966
+ )
2967
+ by_type = {row["memory_type"]: row["cnt"] for row in cursor.fetchall()}
2968
+
2969
+ # Count by agent
2970
+ cursor.execute(
2971
+ """
2972
+ SELECT agent, COUNT(*) as cnt
2973
+ FROM memory_archive
2974
+ WHERE project_id = ?
2975
+ GROUP BY agent
2976
+ """,
2977
+ (project_id,),
2978
+ )
2979
+ by_agent = {row["agent"]: row["cnt"] for row in cursor.fetchall()}
2980
+
2981
+ # Date range
2982
+ cursor.execute(
2983
+ f"SELECT MIN(archived_at) as oldest, MAX(archived_at) as newest FROM memory_archive WHERE {base_where}",
2984
+ base_params,
2985
+ )
2986
+ dates = cursor.fetchone()
2987
+ oldest_archive = (
2988
+ self._parse_datetime(dates["oldest"]) if dates["oldest"] else None
2989
+ )
2990
+ newest_archive = (
2991
+ self._parse_datetime(dates["newest"]) if dates["newest"] else None
2992
+ )
2993
+
2994
+ stats = ArchiveStats(
2995
+ total_count=total_count,
2996
+ by_reason=by_reason,
2997
+ by_type=by_type,
2998
+ by_agent=by_agent,
2999
+ restored_count=restored_count,
3000
+ oldest_archive=oldest_archive,
3001
+ newest_archive=newest_archive,
3002
+ )
3003
+
3004
+ return stats.to_dict()
3005
+
3006
+ def _row_to_archived_memory(self, row: sqlite3.Row) -> "ArchivedMemory":
3007
+ """Convert database row to ArchivedMemory."""
3008
+ from alma.storage.archive import ArchivedMemory
3009
+
3010
+ # Parse embedding
3011
+ embedding = None
3012
+ if row["embedding"]:
3013
+ embedding = np.frombuffer(row["embedding"], dtype=np.float32).tolist()
3014
+
3015
+ # Parse metadata
3016
+ metadata = json.loads(row["metadata"]) if row["metadata"] else {}
3017
+
3018
+ # Parse restored_at
3019
+ restored_at = None
3020
+ if row["restored_at"]:
3021
+ restored_at = self._parse_datetime(row["restored_at"])
3022
+
3023
+ return ArchivedMemory(
3024
+ id=row["id"],
3025
+ original_id=row["original_id"],
3026
+ memory_type=row["memory_type"],
3027
+ content=row["content"],
3028
+ embedding=embedding,
3029
+ metadata=metadata,
3030
+ original_created_at=self._parse_datetime(row["original_created_at"])
3031
+ or datetime.now(timezone.utc),
3032
+ archived_at=self._parse_datetime(row["archived_at"])
3033
+ or datetime.now(timezone.utc),
3034
+ archive_reason=row["archive_reason"],
3035
+ final_strength=row["final_strength"],
3036
+ project_id=row["project_id"],
3037
+ agent=row["agent"],
3038
+ restored=bool(row["restored"]),
3039
+ restored_at=restored_at,
3040
+ restored_as=row["restored_as"],
3041
+ )