alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -226
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -430
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -265
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -106
  26. alma/graph/backends/__init__.py +32 -32
  27. alma/graph/backends/kuzu.py +624 -624
  28. alma/graph/backends/memgraph.py +432 -432
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -444
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -509
  55. alma/observability/__init__.py +91 -84
  56. alma/observability/config.py +302 -302
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -424
  59. alma/observability/metrics.py +583 -583
  60. alma/observability/tracing.py +440 -440
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -427
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -90
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1259
  80. alma/storage/base.py +1083 -583
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -103
  83. alma/storage/file_based.py +614 -614
  84. alma/storage/migrations/__init__.py +21 -21
  85. alma/storage/migrations/base.py +321 -321
  86. alma/storage/migrations/runner.py +323 -323
  87. alma/storage/migrations/version_stores.py +337 -337
  88. alma/storage/migrations/versions/__init__.py +11 -11
  89. alma/storage/migrations/versions/v1_0_0.py +373 -373
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1559
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1457
  95. alma/testing/__init__.py +46 -46
  96. alma/testing/factories.py +301 -301
  97. alma/testing/mocks.py +389 -389
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.1.dist-info/RECORD +0 -93
  110. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,1457 +1,3041 @@
1
- """
2
- ALMA SQLite + FAISS Storage Backend.
3
-
4
- Local storage using SQLite for structured data and FAISS for vector search.
5
- This is the recommended backend for local development and testing.
6
- """
7
-
8
- import json
9
- import logging
10
- import sqlite3
11
- from contextlib import contextmanager
12
- from datetime import datetime, timezone
13
- from pathlib import Path
14
- from typing import Any, Dict, List, Optional, Tuple
15
-
16
- import numpy as np
17
-
18
- from alma.storage.base import StorageBackend
19
- from alma.storage.constants import SQLITE_TABLE_NAMES, MemoryType
20
- from alma.types import (
21
- AntiPattern,
22
- DomainKnowledge,
23
- Heuristic,
24
- Outcome,
25
- UserPreference,
26
- )
27
-
28
- logger = logging.getLogger(__name__)
29
-
30
- # Try to import FAISS, fall back to numpy-based search if not available
31
- try:
32
- import faiss
33
-
34
- FAISS_AVAILABLE = True
35
- except ImportError:
36
- FAISS_AVAILABLE = False
37
- logger.warning("FAISS not available, falling back to numpy-based vector search")
38
-
39
-
40
- class SQLiteStorage(StorageBackend):
41
- """
42
- SQLite + FAISS storage backend.
43
-
44
- Uses SQLite for structured data and FAISS for efficient vector similarity search.
45
- Falls back to numpy cosine similarity if FAISS is not installed.
46
-
47
- Database schema:
48
- - heuristics: id, agent, project_id, condition, strategy, confidence, ...
49
- - outcomes: id, agent, project_id, task_type, task_description, success, ...
50
- - preferences: id, user_id, category, preference, source, ...
51
- - domain_knowledge: id, agent, project_id, domain, fact, ...
52
- - anti_patterns: id, agent, project_id, pattern, why_bad, ...
53
- - embeddings: id, memory_type, memory_id, embedding (blob)
54
- """
55
-
56
- def __init__(
57
- self,
58
- db_path: Path,
59
- embedding_dim: int = 384, # Default for all-MiniLM-L6-v2
60
- auto_migrate: bool = True,
61
- ):
62
- """
63
- Initialize SQLite storage.
64
-
65
- Args:
66
- db_path: Path to SQLite database file
67
- embedding_dim: Dimension of embedding vectors
68
- auto_migrate: If True, automatically apply pending migrations on startup
69
- """
70
- self.db_path = Path(db_path)
71
- self.db_path.parent.mkdir(parents=True, exist_ok=True)
72
- self.embedding_dim = embedding_dim
73
-
74
- # Migration support (lazy-loaded)
75
- self._migration_runner = None
76
- self._version_store = None
77
-
78
- # Initialize database
79
- self._init_database()
80
-
81
- # Initialize FAISS indices (one per memory type)
82
- self._indices: Dict[str, Any] = {}
83
- self._id_maps: Dict[str, List[str]] = {} # memory_type -> [memory_ids]
84
- self._index_dirty: Dict[str, bool] = {} # Track which indexes need rebuilding
85
- self._load_faiss_indices()
86
-
87
- # Auto-migrate if enabled
88
- if auto_migrate:
89
- self._ensure_migrated()
90
-
91
- @classmethod
92
- def from_config(cls, config: Dict[str, Any]) -> "SQLiteStorage":
93
- """Create instance from configuration."""
94
- storage_dir = config.get("storage_dir", ".alma")
95
- db_name = config.get("db_name", "alma.db")
96
- embedding_dim = config.get("embedding_dim", 384)
97
-
98
- db_path = Path(storage_dir) / db_name
99
- return cls(db_path=db_path, embedding_dim=embedding_dim)
100
-
101
- @contextmanager
102
- def _get_connection(self):
103
- """Get database connection with context manager."""
104
- conn = sqlite3.connect(self.db_path)
105
- conn.row_factory = sqlite3.Row
106
- try:
107
- yield conn
108
- conn.commit()
109
- except Exception:
110
- conn.rollback()
111
- raise
112
- finally:
113
- conn.close()
114
-
115
- def _init_database(self):
116
- """Initialize database schema."""
117
- with self._get_connection() as conn:
118
- cursor = conn.cursor()
119
-
120
- # Heuristics table
121
- cursor.execute("""
122
- CREATE TABLE IF NOT EXISTS heuristics (
123
- id TEXT PRIMARY KEY,
124
- agent TEXT NOT NULL,
125
- project_id TEXT NOT NULL,
126
- condition TEXT NOT NULL,
127
- strategy TEXT NOT NULL,
128
- confidence REAL DEFAULT 0.0,
129
- occurrence_count INTEGER DEFAULT 0,
130
- success_count INTEGER DEFAULT 0,
131
- last_validated TEXT,
132
- created_at TEXT,
133
- metadata TEXT
134
- )
135
- """)
136
- cursor.execute(
137
- "CREATE INDEX IF NOT EXISTS idx_heuristics_project_agent "
138
- "ON heuristics(project_id, agent)"
139
- )
140
-
141
- # Outcomes table
142
- cursor.execute("""
143
- CREATE TABLE IF NOT EXISTS outcomes (
144
- id TEXT PRIMARY KEY,
145
- agent TEXT NOT NULL,
146
- project_id TEXT NOT NULL,
147
- task_type TEXT,
148
- task_description TEXT NOT NULL,
149
- success INTEGER DEFAULT 0,
150
- strategy_used TEXT,
151
- duration_ms INTEGER,
152
- error_message TEXT,
153
- user_feedback TEXT,
154
- timestamp TEXT,
155
- metadata TEXT
156
- )
157
- """)
158
- cursor.execute(
159
- "CREATE INDEX IF NOT EXISTS idx_outcomes_project_agent "
160
- "ON outcomes(project_id, agent)"
161
- )
162
- cursor.execute(
163
- "CREATE INDEX IF NOT EXISTS idx_outcomes_task_type "
164
- "ON outcomes(project_id, agent, task_type)"
165
- )
166
- cursor.execute(
167
- "CREATE INDEX IF NOT EXISTS idx_outcomes_timestamp "
168
- "ON outcomes(project_id, timestamp)"
169
- )
170
-
171
- # User preferences table
172
- cursor.execute("""
173
- CREATE TABLE IF NOT EXISTS preferences (
174
- id TEXT PRIMARY KEY,
175
- user_id TEXT NOT NULL,
176
- category TEXT,
177
- preference TEXT NOT NULL,
178
- source TEXT,
179
- confidence REAL DEFAULT 1.0,
180
- timestamp TEXT,
181
- metadata TEXT
182
- )
183
- """)
184
- cursor.execute(
185
- "CREATE INDEX IF NOT EXISTS idx_preferences_user "
186
- "ON preferences(user_id)"
187
- )
188
-
189
- # Domain knowledge table
190
- cursor.execute("""
191
- CREATE TABLE IF NOT EXISTS domain_knowledge (
192
- id TEXT PRIMARY KEY,
193
- agent TEXT NOT NULL,
194
- project_id TEXT NOT NULL,
195
- domain TEXT,
196
- fact TEXT NOT NULL,
197
- source TEXT,
198
- confidence REAL DEFAULT 1.0,
199
- last_verified TEXT,
200
- metadata TEXT
201
- )
202
- """)
203
- cursor.execute(
204
- "CREATE INDEX IF NOT EXISTS idx_domain_knowledge_project_agent "
205
- "ON domain_knowledge(project_id, agent)"
206
- )
207
-
208
- # Anti-patterns table
209
- cursor.execute("""
210
- CREATE TABLE IF NOT EXISTS anti_patterns (
211
- id TEXT PRIMARY KEY,
212
- agent TEXT NOT NULL,
213
- project_id TEXT NOT NULL,
214
- pattern TEXT NOT NULL,
215
- why_bad TEXT,
216
- better_alternative TEXT,
217
- occurrence_count INTEGER DEFAULT 1,
218
- last_seen TEXT,
219
- created_at TEXT,
220
- metadata TEXT
221
- )
222
- """)
223
- cursor.execute(
224
- "CREATE INDEX IF NOT EXISTS idx_anti_patterns_project_agent "
225
- "ON anti_patterns(project_id, agent)"
226
- )
227
-
228
- # Embeddings table (stores vectors as blobs)
229
- cursor.execute("""
230
- CREATE TABLE IF NOT EXISTS embeddings (
231
- id INTEGER PRIMARY KEY AUTOINCREMENT,
232
- memory_type TEXT NOT NULL,
233
- memory_id TEXT NOT NULL,
234
- embedding BLOB NOT NULL,
235
- UNIQUE(memory_type, memory_id)
236
- )
237
- """)
238
- cursor.execute(
239
- "CREATE INDEX IF NOT EXISTS idx_embeddings_type "
240
- "ON embeddings(memory_type)"
241
- )
242
-
243
- def _load_faiss_indices(self, memory_types: Optional[List[str]] = None):
244
- """Load or create FAISS indices for specified memory types.
245
-
246
- Args:
247
- memory_types: List of memory types to load. If None, loads all types.
248
- """
249
- if memory_types is None:
250
- memory_types = list(MemoryType.VECTOR_ENABLED)
251
-
252
- for memory_type in memory_types:
253
- if FAISS_AVAILABLE:
254
- # Use FAISS index
255
- self._indices[memory_type] = faiss.IndexFlatIP(self.embedding_dim)
256
- else:
257
- # Use list for numpy fallback
258
- self._indices[memory_type] = []
259
-
260
- self._id_maps[memory_type] = []
261
- self._index_dirty[memory_type] = False # Mark as fresh after rebuild
262
-
263
- # Load existing embeddings
264
- with self._get_connection() as conn:
265
- cursor = conn.cursor()
266
- cursor.execute(
267
- "SELECT memory_id, embedding FROM embeddings WHERE memory_type = ?",
268
- (memory_type,),
269
- )
270
- rows = cursor.fetchall()
271
-
272
- for row in rows:
273
- memory_id = row["memory_id"]
274
- embedding = np.frombuffer(row["embedding"], dtype=np.float32)
275
-
276
- self._id_maps[memory_type].append(memory_id)
277
- if FAISS_AVAILABLE:
278
- self._indices[memory_type].add(
279
- embedding.reshape(1, -1).astype(np.float32)
280
- )
281
- else:
282
- self._indices[memory_type].append(embedding)
283
-
284
- def _ensure_index_fresh(self, memory_type: str) -> None:
285
- """Rebuild index for a memory type if it has been marked dirty.
286
-
287
- This implements lazy rebuilding - indexes are only rebuilt when
288
- actually needed for search, not immediately on every delete.
289
-
290
- Args:
291
- memory_type: The type of memory index to check/rebuild.
292
- """
293
- if self._index_dirty.get(memory_type, False):
294
- logger.debug(f"Rebuilding dirty index for {memory_type}")
295
- self._load_faiss_indices([memory_type])
296
-
297
- def _add_to_index(
298
- self,
299
- memory_type: str,
300
- memory_id: str,
301
- embedding: Optional[List[float]],
302
- ):
303
- """Add embedding to FAISS index."""
304
- if embedding is None:
305
- return
306
-
307
- embedding_array = np.array(embedding, dtype=np.float32)
308
-
309
- # Store in database
310
- with self._get_connection() as conn:
311
- cursor = conn.cursor()
312
- cursor.execute(
313
- """
314
- INSERT OR REPLACE INTO embeddings (memory_type, memory_id, embedding)
315
- VALUES (?, ?, ?)
316
- """,
317
- (memory_type, memory_id, embedding_array.tobytes()),
318
- )
319
-
320
- # Add to index
321
- self._id_maps[memory_type].append(memory_id)
322
- if FAISS_AVAILABLE:
323
- self._indices[memory_type].add(
324
- embedding_array.reshape(1, -1).astype(np.float32)
325
- )
326
- else:
327
- self._indices[memory_type].append(embedding_array)
328
-
329
- def _search_index(
330
- self,
331
- memory_type: str,
332
- query_embedding: List[float],
333
- top_k: int,
334
- ) -> List[Tuple[str, float]]:
335
- """Search FAISS index for similar embeddings."""
336
- # Ensure index is up-to-date before searching (lazy rebuild)
337
- self._ensure_index_fresh(memory_type)
338
-
339
- if not self._id_maps[memory_type]:
340
- return []
341
-
342
- query = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
343
-
344
- if FAISS_AVAILABLE:
345
- # Normalize for cosine similarity (IndexFlatIP)
346
- faiss.normalize_L2(query)
347
- scores, indices = self._indices[memory_type].search(
348
- query, min(top_k, len(self._id_maps[memory_type]))
349
- )
350
-
351
- results = []
352
- for score, idx in zip(scores[0], indices[0], strict=False):
353
- if idx >= 0 and idx < len(self._id_maps[memory_type]):
354
- results.append((self._id_maps[memory_type][idx], float(score)))
355
- return results
356
- else:
357
- # Numpy fallback with cosine similarity
358
- embeddings = np.array(self._indices[memory_type])
359
- if len(embeddings) == 0:
360
- return []
361
-
362
- # Normalize
363
- query_norm = query / np.linalg.norm(query)
364
- emb_norms = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
365
-
366
- # Cosine similarity
367
- similarities = np.dot(emb_norms, query_norm.T).flatten()
368
-
369
- # Get top k
370
- top_indices = np.argsort(similarities)[::-1][:top_k]
371
-
372
- return [
373
- (self._id_maps[memory_type][i], float(similarities[i]))
374
- for i in top_indices
375
- ]
376
-
377
- # ==================== WRITE OPERATIONS ====================
378
-
379
- def save_heuristic(self, heuristic: Heuristic) -> str:
380
- """Save a heuristic."""
381
- with self._get_connection() as conn:
382
- cursor = conn.cursor()
383
- cursor.execute(
384
- """
385
- INSERT OR REPLACE INTO heuristics
386
- (id, agent, project_id, condition, strategy, confidence,
387
- occurrence_count, success_count, last_validated, created_at, metadata)
388
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
389
- """,
390
- (
391
- heuristic.id,
392
- heuristic.agent,
393
- heuristic.project_id,
394
- heuristic.condition,
395
- heuristic.strategy,
396
- heuristic.confidence,
397
- heuristic.occurrence_count,
398
- heuristic.success_count,
399
- (
400
- heuristic.last_validated.isoformat()
401
- if heuristic.last_validated
402
- else None
403
- ),
404
- heuristic.created_at.isoformat() if heuristic.created_at else None,
405
- json.dumps(heuristic.metadata) if heuristic.metadata else None,
406
- ),
407
- )
408
-
409
- # Add embedding to index
410
- self._add_to_index(MemoryType.HEURISTICS, heuristic.id, heuristic.embedding)
411
- logger.debug(f"Saved heuristic: {heuristic.id}")
412
- return heuristic.id
413
-
414
- def save_outcome(self, outcome: Outcome) -> str:
415
- """Save an outcome."""
416
- with self._get_connection() as conn:
417
- cursor = conn.cursor()
418
- cursor.execute(
419
- """
420
- INSERT OR REPLACE INTO outcomes
421
- (id, agent, project_id, task_type, task_description, success,
422
- strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata)
423
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
424
- """,
425
- (
426
- outcome.id,
427
- outcome.agent,
428
- outcome.project_id,
429
- outcome.task_type,
430
- outcome.task_description,
431
- 1 if outcome.success else 0,
432
- outcome.strategy_used,
433
- outcome.duration_ms,
434
- outcome.error_message,
435
- outcome.user_feedback,
436
- outcome.timestamp.isoformat() if outcome.timestamp else None,
437
- json.dumps(outcome.metadata) if outcome.metadata else None,
438
- ),
439
- )
440
-
441
- # Add embedding to index
442
- self._add_to_index(MemoryType.OUTCOMES, outcome.id, outcome.embedding)
443
- logger.debug(f"Saved outcome: {outcome.id}")
444
- return outcome.id
445
-
446
- def save_user_preference(self, preference: UserPreference) -> str:
447
- """Save a user preference."""
448
- with self._get_connection() as conn:
449
- cursor = conn.cursor()
450
- cursor.execute(
451
- """
452
- INSERT OR REPLACE INTO preferences
453
- (id, user_id, category, preference, source, confidence, timestamp, metadata)
454
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
455
- """,
456
- (
457
- preference.id,
458
- preference.user_id,
459
- preference.category,
460
- preference.preference,
461
- preference.source,
462
- preference.confidence,
463
- preference.timestamp.isoformat() if preference.timestamp else None,
464
- json.dumps(preference.metadata) if preference.metadata else None,
465
- ),
466
- )
467
- logger.debug(f"Saved preference: {preference.id}")
468
- return preference.id
469
-
470
- def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
471
- """Save domain knowledge."""
472
- with self._get_connection() as conn:
473
- cursor = conn.cursor()
474
- cursor.execute(
475
- """
476
- INSERT OR REPLACE INTO domain_knowledge
477
- (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata)
478
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
479
- """,
480
- (
481
- knowledge.id,
482
- knowledge.agent,
483
- knowledge.project_id,
484
- knowledge.domain,
485
- knowledge.fact,
486
- knowledge.source,
487
- knowledge.confidence,
488
- (
489
- knowledge.last_verified.isoformat()
490
- if knowledge.last_verified
491
- else None
492
- ),
493
- json.dumps(knowledge.metadata) if knowledge.metadata else None,
494
- ),
495
- )
496
-
497
- # Add embedding to index
498
- self._add_to_index(
499
- MemoryType.DOMAIN_KNOWLEDGE, knowledge.id, knowledge.embedding
500
- )
501
- logger.debug(f"Saved domain knowledge: {knowledge.id}")
502
- return knowledge.id
503
-
504
- def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
505
- """Save an anti-pattern."""
506
- with self._get_connection() as conn:
507
- cursor = conn.cursor()
508
- cursor.execute(
509
- """
510
- INSERT OR REPLACE INTO anti_patterns
511
- (id, agent, project_id, pattern, why_bad, better_alternative,
512
- occurrence_count, last_seen, created_at, metadata)
513
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
514
- """,
515
- (
516
- anti_pattern.id,
517
- anti_pattern.agent,
518
- anti_pattern.project_id,
519
- anti_pattern.pattern,
520
- anti_pattern.why_bad,
521
- anti_pattern.better_alternative,
522
- anti_pattern.occurrence_count,
523
- (
524
- anti_pattern.last_seen.isoformat()
525
- if anti_pattern.last_seen
526
- else None
527
- ),
528
- (
529
- anti_pattern.created_at.isoformat()
530
- if anti_pattern.created_at
531
- else None
532
- ),
533
- (
534
- json.dumps(anti_pattern.metadata)
535
- if anti_pattern.metadata
536
- else None
537
- ),
538
- ),
539
- )
540
-
541
- # Add embedding to index
542
- self._add_to_index(
543
- MemoryType.ANTI_PATTERNS, anti_pattern.id, anti_pattern.embedding
544
- )
545
- logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
546
- return anti_pattern.id
547
-
548
- # ==================== BATCH WRITE OPERATIONS ====================
549
-
550
- def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
551
- """Save multiple heuristics in a batch using executemany."""
552
- if not heuristics:
553
- return []
554
-
555
- with self._get_connection() as conn:
556
- cursor = conn.cursor()
557
- cursor.executemany(
558
- """
559
- INSERT OR REPLACE INTO heuristics
560
- (id, agent, project_id, condition, strategy, confidence,
561
- occurrence_count, success_count, last_validated, created_at, metadata)
562
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
563
- """,
564
- [
565
- (
566
- h.id,
567
- h.agent,
568
- h.project_id,
569
- h.condition,
570
- h.strategy,
571
- h.confidence,
572
- h.occurrence_count,
573
- h.success_count,
574
- h.last_validated.isoformat() if h.last_validated else None,
575
- h.created_at.isoformat() if h.created_at else None,
576
- json.dumps(h.metadata) if h.metadata else None,
577
- )
578
- for h in heuristics
579
- ],
580
- )
581
-
582
- # Add embeddings to index
583
- for h in heuristics:
584
- self._add_to_index(MemoryType.HEURISTICS, h.id, h.embedding)
585
-
586
- logger.debug(f"Batch saved {len(heuristics)} heuristics")
587
- return [h.id for h in heuristics]
588
-
589
- def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
590
- """Save multiple outcomes in a batch using executemany."""
591
- if not outcomes:
592
- return []
593
-
594
- with self._get_connection() as conn:
595
- cursor = conn.cursor()
596
- cursor.executemany(
597
- """
598
- INSERT OR REPLACE INTO outcomes
599
- (id, agent, project_id, task_type, task_description, success,
600
- strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata)
601
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
602
- """,
603
- [
604
- (
605
- o.id,
606
- o.agent,
607
- o.project_id,
608
- o.task_type,
609
- o.task_description,
610
- 1 if o.success else 0,
611
- o.strategy_used,
612
- o.duration_ms,
613
- o.error_message,
614
- o.user_feedback,
615
- o.timestamp.isoformat() if o.timestamp else None,
616
- json.dumps(o.metadata) if o.metadata else None,
617
- )
618
- for o in outcomes
619
- ],
620
- )
621
-
622
- # Add embeddings to index
623
- for o in outcomes:
624
- self._add_to_index(MemoryType.OUTCOMES, o.id, o.embedding)
625
-
626
- logger.debug(f"Batch saved {len(outcomes)} outcomes")
627
- return [o.id for o in outcomes]
628
-
629
- def save_domain_knowledge_batch(
630
- self, knowledge_items: List[DomainKnowledge]
631
- ) -> List[str]:
632
- """Save multiple domain knowledge items in a batch using executemany."""
633
- if not knowledge_items:
634
- return []
635
-
636
- with self._get_connection() as conn:
637
- cursor = conn.cursor()
638
- cursor.executemany(
639
- """
640
- INSERT OR REPLACE INTO domain_knowledge
641
- (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata)
642
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
643
- """,
644
- [
645
- (
646
- k.id,
647
- k.agent,
648
- k.project_id,
649
- k.domain,
650
- k.fact,
651
- k.source,
652
- k.confidence,
653
- k.last_verified.isoformat() if k.last_verified else None,
654
- json.dumps(k.metadata) if k.metadata else None,
655
- )
656
- for k in knowledge_items
657
- ],
658
- )
659
-
660
- # Add embeddings to index
661
- for k in knowledge_items:
662
- self._add_to_index(MemoryType.DOMAIN_KNOWLEDGE, k.id, k.embedding)
663
-
664
- logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
665
- return [k.id for k in knowledge_items]
666
-
667
- # ==================== READ OPERATIONS ====================
668
-
669
- def get_heuristics(
670
- self,
671
- project_id: str,
672
- agent: Optional[str] = None,
673
- embedding: Optional[List[float]] = None,
674
- top_k: int = 5,
675
- min_confidence: float = 0.0,
676
- ) -> List[Heuristic]:
677
- """Get heuristics with optional vector search."""
678
- # If embedding provided, use vector search to get candidate IDs
679
- candidate_ids = None
680
- if embedding:
681
- search_results = self._search_index(
682
- MemoryType.HEURISTICS, embedding, top_k * 2
683
- )
684
- candidate_ids = [id for id, _ in search_results]
685
-
686
- with self._get_connection() as conn:
687
- cursor = conn.cursor()
688
-
689
- query = "SELECT * FROM heuristics WHERE project_id = ? AND confidence >= ?"
690
- params: List[Any] = [project_id, min_confidence]
691
-
692
- if agent:
693
- query += " AND agent = ?"
694
- params.append(agent)
695
-
696
- if candidate_ids is not None:
697
- placeholders = ",".join("?" * len(candidate_ids))
698
- query += f" AND id IN ({placeholders})"
699
- params.extend(candidate_ids)
700
-
701
- query += " ORDER BY confidence DESC LIMIT ?"
702
- params.append(top_k)
703
-
704
- cursor.execute(query, params)
705
- rows = cursor.fetchall()
706
-
707
- return [self._row_to_heuristic(row) for row in rows]
708
-
709
- def get_outcomes(
710
- self,
711
- project_id: str,
712
- agent: Optional[str] = None,
713
- task_type: Optional[str] = None,
714
- embedding: Optional[List[float]] = None,
715
- top_k: int = 5,
716
- success_only: bool = False,
717
- ) -> List[Outcome]:
718
- """Get outcomes with optional vector search."""
719
- candidate_ids = None
720
- if embedding:
721
- search_results = self._search_index(
722
- MemoryType.OUTCOMES, embedding, top_k * 2
723
- )
724
- candidate_ids = [id for id, _ in search_results]
725
-
726
- with self._get_connection() as conn:
727
- cursor = conn.cursor()
728
-
729
- query = "SELECT * FROM outcomes WHERE project_id = ?"
730
- params: List[Any] = [project_id]
731
-
732
- if agent:
733
- query += " AND agent = ?"
734
- params.append(agent)
735
-
736
- if task_type:
737
- query += " AND task_type = ?"
738
- params.append(task_type)
739
-
740
- if success_only:
741
- query += " AND success = 1"
742
-
743
- if candidate_ids is not None:
744
- placeholders = ",".join("?" * len(candidate_ids))
745
- query += f" AND id IN ({placeholders})"
746
- params.extend(candidate_ids)
747
-
748
- query += " ORDER BY timestamp DESC LIMIT ?"
749
- params.append(top_k)
750
-
751
- cursor.execute(query, params)
752
- rows = cursor.fetchall()
753
-
754
- return [self._row_to_outcome(row) for row in rows]
755
-
756
- def get_user_preferences(
757
- self,
758
- user_id: str,
759
- category: Optional[str] = None,
760
- ) -> List[UserPreference]:
761
- """Get user preferences."""
762
- with self._get_connection() as conn:
763
- cursor = conn.cursor()
764
-
765
- query = "SELECT * FROM preferences WHERE user_id = ?"
766
- params: List[Any] = [user_id]
767
-
768
- if category:
769
- query += " AND category = ?"
770
- params.append(category)
771
-
772
- cursor.execute(query, params)
773
- rows = cursor.fetchall()
774
-
775
- return [self._row_to_preference(row) for row in rows]
776
-
777
- def get_domain_knowledge(
778
- self,
779
- project_id: str,
780
- agent: Optional[str] = None,
781
- domain: Optional[str] = None,
782
- embedding: Optional[List[float]] = None,
783
- top_k: int = 5,
784
- ) -> List[DomainKnowledge]:
785
- """Get domain knowledge with optional vector search."""
786
- candidate_ids = None
787
- if embedding:
788
- search_results = self._search_index(
789
- MemoryType.DOMAIN_KNOWLEDGE, embedding, top_k * 2
790
- )
791
- candidate_ids = [id for id, _ in search_results]
792
-
793
- with self._get_connection() as conn:
794
- cursor = conn.cursor()
795
-
796
- query = "SELECT * FROM domain_knowledge WHERE project_id = ?"
797
- params: List[Any] = [project_id]
798
-
799
- if agent:
800
- query += " AND agent = ?"
801
- params.append(agent)
802
-
803
- if domain:
804
- query += " AND domain = ?"
805
- params.append(domain)
806
-
807
- if candidate_ids is not None:
808
- placeholders = ",".join("?" * len(candidate_ids))
809
- query += f" AND id IN ({placeholders})"
810
- params.extend(candidate_ids)
811
-
812
- query += " ORDER BY confidence DESC LIMIT ?"
813
- params.append(top_k)
814
-
815
- cursor.execute(query, params)
816
- rows = cursor.fetchall()
817
-
818
- return [self._row_to_domain_knowledge(row) for row in rows]
819
-
820
- def get_anti_patterns(
821
- self,
822
- project_id: str,
823
- agent: Optional[str] = None,
824
- embedding: Optional[List[float]] = None,
825
- top_k: int = 5,
826
- ) -> List[AntiPattern]:
827
- """Get anti-patterns with optional vector search."""
828
- candidate_ids = None
829
- if embedding:
830
- search_results = self._search_index(
831
- MemoryType.ANTI_PATTERNS, embedding, top_k * 2
832
- )
833
- candidate_ids = [id for id, _ in search_results]
834
-
835
- with self._get_connection() as conn:
836
- cursor = conn.cursor()
837
-
838
- query = "SELECT * FROM anti_patterns WHERE project_id = ?"
839
- params: List[Any] = [project_id]
840
-
841
- if agent:
842
- query += " AND agent = ?"
843
- params.append(agent)
844
-
845
- if candidate_ids is not None:
846
- placeholders = ",".join("?" * len(candidate_ids))
847
- query += f" AND id IN ({placeholders})"
848
- params.extend(candidate_ids)
849
-
850
- query += " ORDER BY occurrence_count DESC LIMIT ?"
851
- params.append(top_k)
852
-
853
- cursor.execute(query, params)
854
- rows = cursor.fetchall()
855
-
856
- return [self._row_to_anti_pattern(row) for row in rows]
857
-
858
- # ==================== MULTI-AGENT MEMORY SHARING ====================
859
-
860
- def get_heuristics_for_agents(
861
- self,
862
- project_id: str,
863
- agents: List[str],
864
- embedding: Optional[List[float]] = None,
865
- top_k: int = 5,
866
- min_confidence: float = 0.0,
867
- ) -> List[Heuristic]:
868
- """Get heuristics from multiple agents using optimized IN query."""
869
- if not agents:
870
- return []
871
-
872
- candidate_ids = None
873
- if embedding:
874
- search_results = self._search_index(
875
- MemoryType.HEURISTICS, embedding, top_k * 2 * len(agents)
876
- )
877
- candidate_ids = [id for id, _ in search_results]
878
-
879
- with self._get_connection() as conn:
880
- cursor = conn.cursor()
881
-
882
- placeholders = ",".join("?" * len(agents))
883
- query = f"SELECT * FROM heuristics WHERE project_id = ? AND confidence >= ? AND agent IN ({placeholders})"
884
- params: List[Any] = [project_id, min_confidence] + list(agents)
885
-
886
- if candidate_ids is not None:
887
- id_placeholders = ",".join("?" * len(candidate_ids))
888
- query += f" AND id IN ({id_placeholders})"
889
- params.extend(candidate_ids)
890
-
891
- query += " ORDER BY confidence DESC LIMIT ?"
892
- params.append(top_k * len(agents))
893
-
894
- cursor.execute(query, params)
895
- rows = cursor.fetchall()
896
-
897
- return [self._row_to_heuristic(row) for row in rows]
898
-
899
- def get_outcomes_for_agents(
900
- self,
901
- project_id: str,
902
- agents: List[str],
903
- task_type: Optional[str] = None,
904
- embedding: Optional[List[float]] = None,
905
- top_k: int = 5,
906
- success_only: bool = False,
907
- ) -> List[Outcome]:
908
- """Get outcomes from multiple agents using optimized IN query."""
909
- if not agents:
910
- return []
911
-
912
- candidate_ids = None
913
- if embedding:
914
- search_results = self._search_index(
915
- MemoryType.OUTCOMES, embedding, top_k * 2 * len(agents)
916
- )
917
- candidate_ids = [id for id, _ in search_results]
918
-
919
- with self._get_connection() as conn:
920
- cursor = conn.cursor()
921
-
922
- placeholders = ",".join("?" * len(agents))
923
- query = f"SELECT * FROM outcomes WHERE project_id = ? AND agent IN ({placeholders})"
924
- params: List[Any] = [project_id] + list(agents)
925
-
926
- if task_type:
927
- query += " AND task_type = ?"
928
- params.append(task_type)
929
-
930
- if success_only:
931
- query += " AND success = 1"
932
-
933
- if candidate_ids is not None:
934
- id_placeholders = ",".join("?" * len(candidate_ids))
935
- query += f" AND id IN ({id_placeholders})"
936
- params.extend(candidate_ids)
937
-
938
- query += " ORDER BY timestamp DESC LIMIT ?"
939
- params.append(top_k * len(agents))
940
-
941
- cursor.execute(query, params)
942
- rows = cursor.fetchall()
943
-
944
- return [self._row_to_outcome(row) for row in rows]
945
-
946
- def get_domain_knowledge_for_agents(
947
- self,
948
- project_id: str,
949
- agents: List[str],
950
- domain: Optional[str] = None,
951
- embedding: Optional[List[float]] = None,
952
- top_k: int = 5,
953
- ) -> List[DomainKnowledge]:
954
- """Get domain knowledge from multiple agents using optimized IN query."""
955
- if not agents:
956
- return []
957
-
958
- candidate_ids = None
959
- if embedding:
960
- search_results = self._search_index(
961
- MemoryType.DOMAIN_KNOWLEDGE, embedding, top_k * 2 * len(agents)
962
- )
963
- candidate_ids = [id for id, _ in search_results]
964
-
965
- with self._get_connection() as conn:
966
- cursor = conn.cursor()
967
-
968
- placeholders = ",".join("?" * len(agents))
969
- query = f"SELECT * FROM domain_knowledge WHERE project_id = ? AND agent IN ({placeholders})"
970
- params: List[Any] = [project_id] + list(agents)
971
-
972
- if domain:
973
- query += " AND domain = ?"
974
- params.append(domain)
975
-
976
- if candidate_ids is not None:
977
- id_placeholders = ",".join("?" * len(candidate_ids))
978
- query += f" AND id IN ({id_placeholders})"
979
- params.extend(candidate_ids)
980
-
981
- query += " ORDER BY confidence DESC LIMIT ?"
982
- params.append(top_k * len(agents))
983
-
984
- cursor.execute(query, params)
985
- rows = cursor.fetchall()
986
-
987
- return [self._row_to_domain_knowledge(row) for row in rows]
988
-
989
- def get_anti_patterns_for_agents(
990
- self,
991
- project_id: str,
992
- agents: List[str],
993
- embedding: Optional[List[float]] = None,
994
- top_k: int = 5,
995
- ) -> List[AntiPattern]:
996
- """Get anti-patterns from multiple agents using optimized IN query."""
997
- if not agents:
998
- return []
999
-
1000
- candidate_ids = None
1001
- if embedding:
1002
- search_results = self._search_index(
1003
- MemoryType.ANTI_PATTERNS, embedding, top_k * 2 * len(agents)
1004
- )
1005
- candidate_ids = [id for id, _ in search_results]
1006
-
1007
- with self._get_connection() as conn:
1008
- cursor = conn.cursor()
1009
-
1010
- placeholders = ",".join("?" * len(agents))
1011
- query = f"SELECT * FROM anti_patterns WHERE project_id = ? AND agent IN ({placeholders})"
1012
- params: List[Any] = [project_id] + list(agents)
1013
-
1014
- if candidate_ids is not None:
1015
- id_placeholders = ",".join("?" * len(candidate_ids))
1016
- query += f" AND id IN ({id_placeholders})"
1017
- params.extend(candidate_ids)
1018
-
1019
- query += " ORDER BY occurrence_count DESC LIMIT ?"
1020
- params.append(top_k * len(agents))
1021
-
1022
- cursor.execute(query, params)
1023
- rows = cursor.fetchall()
1024
-
1025
- return [self._row_to_anti_pattern(row) for row in rows]
1026
-
1027
- # ==================== UPDATE OPERATIONS ====================
1028
-
1029
- def update_heuristic(
1030
- self,
1031
- heuristic_id: str,
1032
- updates: Dict[str, Any],
1033
- ) -> bool:
1034
- """Update a heuristic's fields."""
1035
- if not updates:
1036
- return False
1037
-
1038
- set_clauses = []
1039
- params = []
1040
- for key, value in updates.items():
1041
- if key == "metadata" and value:
1042
- value = json.dumps(value)
1043
- elif isinstance(value, datetime):
1044
- value = value.isoformat()
1045
- set_clauses.append(f"{key} = ?")
1046
- params.append(value)
1047
-
1048
- params.append(heuristic_id)
1049
-
1050
- with self._get_connection() as conn:
1051
- cursor = conn.cursor()
1052
- cursor.execute(
1053
- f"UPDATE heuristics SET {', '.join(set_clauses)} WHERE id = ?",
1054
- params,
1055
- )
1056
- return cursor.rowcount > 0
1057
-
1058
- def increment_heuristic_occurrence(
1059
- self,
1060
- heuristic_id: str,
1061
- success: bool,
1062
- ) -> bool:
1063
- """Increment heuristic occurrence count."""
1064
- with self._get_connection() as conn:
1065
- cursor = conn.cursor()
1066
-
1067
- if success:
1068
- cursor.execute(
1069
- """
1070
- UPDATE heuristics
1071
- SET occurrence_count = occurrence_count + 1,
1072
- success_count = success_count + 1,
1073
- last_validated = ?
1074
- WHERE id = ?
1075
- """,
1076
- (datetime.now(timezone.utc).isoformat(), heuristic_id),
1077
- )
1078
- else:
1079
- cursor.execute(
1080
- """
1081
- UPDATE heuristics
1082
- SET occurrence_count = occurrence_count + 1,
1083
- last_validated = ?
1084
- WHERE id = ?
1085
- """,
1086
- (datetime.now(timezone.utc).isoformat(), heuristic_id),
1087
- )
1088
-
1089
- return cursor.rowcount > 0
1090
-
1091
- # ==================== DELETE OPERATIONS ====================
1092
-
1093
- def delete_outcomes_older_than(
1094
- self,
1095
- project_id: str,
1096
- older_than: datetime,
1097
- agent: Optional[str] = None,
1098
- ) -> int:
1099
- """Delete old outcomes."""
1100
- with self._get_connection() as conn:
1101
- cursor = conn.cursor()
1102
-
1103
- query = "DELETE FROM outcomes WHERE project_id = ? AND timestamp < ?"
1104
- params: List[Any] = [project_id, older_than.isoformat()]
1105
-
1106
- if agent:
1107
- query += " AND agent = ?"
1108
- params.append(agent)
1109
-
1110
- cursor.execute(query, params)
1111
- deleted = cursor.rowcount
1112
-
1113
- logger.info(f"Deleted {deleted} old outcomes")
1114
- return deleted
1115
-
1116
- def delete_low_confidence_heuristics(
1117
- self,
1118
- project_id: str,
1119
- below_confidence: float,
1120
- agent: Optional[str] = None,
1121
- ) -> int:
1122
- """Delete low-confidence heuristics."""
1123
- with self._get_connection() as conn:
1124
- cursor = conn.cursor()
1125
-
1126
- query = "DELETE FROM heuristics WHERE project_id = ? AND confidence < ?"
1127
- params: List[Any] = [project_id, below_confidence]
1128
-
1129
- if agent:
1130
- query += " AND agent = ?"
1131
- params.append(agent)
1132
-
1133
- cursor.execute(query, params)
1134
- deleted = cursor.rowcount
1135
-
1136
- logger.info(f"Deleted {deleted} low-confidence heuristics")
1137
- return deleted
1138
-
1139
- # ==================== STATS ====================
1140
-
1141
- def get_stats(
1142
- self,
1143
- project_id: str,
1144
- agent: Optional[str] = None,
1145
- ) -> Dict[str, Any]:
1146
- """Get memory statistics."""
1147
- stats = {
1148
- "project_id": project_id,
1149
- "agent": agent,
1150
- "storage_type": "sqlite",
1151
- "faiss_available": FAISS_AVAILABLE,
1152
- }
1153
-
1154
- with self._get_connection() as conn:
1155
- cursor = conn.cursor()
1156
-
1157
- # Use canonical memory types for stats
1158
- for memory_type in MemoryType.ALL:
1159
- if memory_type == MemoryType.PREFERENCES:
1160
- # Preferences don't have project_id
1161
- cursor.execute(
1162
- f"SELECT COUNT(*) FROM {SQLITE_TABLE_NAMES[memory_type]}"
1163
- )
1164
- stats[f"{memory_type}_count"] = cursor.fetchone()[0]
1165
- else:
1166
- query = f"SELECT COUNT(*) FROM {SQLITE_TABLE_NAMES[memory_type]} WHERE project_id = ?"
1167
- params: List[Any] = [project_id]
1168
- if agent:
1169
- query += " AND agent = ?"
1170
- params.append(agent)
1171
- cursor.execute(query, params)
1172
- stats[f"{memory_type}_count"] = cursor.fetchone()[0]
1173
-
1174
- # Embedding counts
1175
- cursor.execute("SELECT COUNT(*) FROM embeddings")
1176
- stats["embeddings_count"] = cursor.fetchone()[0]
1177
-
1178
- stats["total_count"] = sum(
1179
- stats.get(k, 0) for k in stats if k.endswith("_count")
1180
- )
1181
-
1182
- return stats
1183
-
1184
- # ==================== HELPERS ====================
1185
-
1186
- def _parse_datetime(self, value: Any) -> Optional[datetime]:
1187
- """Parse datetime from string."""
1188
- if value is None:
1189
- return None
1190
- if isinstance(value, datetime):
1191
- return value
1192
- try:
1193
- return datetime.fromisoformat(value.replace("Z", "+00:00"))
1194
- except (ValueError, AttributeError):
1195
- return None
1196
-
1197
- def _row_to_heuristic(self, row: sqlite3.Row) -> Heuristic:
1198
- """Convert database row to Heuristic."""
1199
- return Heuristic(
1200
- id=row["id"],
1201
- agent=row["agent"],
1202
- project_id=row["project_id"],
1203
- condition=row["condition"],
1204
- strategy=row["strategy"],
1205
- confidence=row["confidence"] or 0.0,
1206
- occurrence_count=row["occurrence_count"] or 0,
1207
- success_count=row["success_count"] or 0,
1208
- last_validated=self._parse_datetime(row["last_validated"])
1209
- or datetime.now(timezone.utc),
1210
- created_at=self._parse_datetime(row["created_at"])
1211
- or datetime.now(timezone.utc),
1212
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1213
- )
1214
-
1215
- def _row_to_outcome(self, row: sqlite3.Row) -> Outcome:
1216
- """Convert database row to Outcome."""
1217
- return Outcome(
1218
- id=row["id"],
1219
- agent=row["agent"],
1220
- project_id=row["project_id"],
1221
- task_type=row["task_type"] or "general",
1222
- task_description=row["task_description"],
1223
- success=bool(row["success"]),
1224
- strategy_used=row["strategy_used"] or "",
1225
- duration_ms=row["duration_ms"],
1226
- error_message=row["error_message"],
1227
- user_feedback=row["user_feedback"],
1228
- timestamp=self._parse_datetime(row["timestamp"])
1229
- or datetime.now(timezone.utc),
1230
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1231
- )
1232
-
1233
- def _row_to_preference(self, row: sqlite3.Row) -> UserPreference:
1234
- """Convert database row to UserPreference."""
1235
- return UserPreference(
1236
- id=row["id"],
1237
- user_id=row["user_id"],
1238
- category=row["category"] or "general",
1239
- preference=row["preference"],
1240
- source=row["source"] or "unknown",
1241
- confidence=row["confidence"] or 1.0,
1242
- timestamp=self._parse_datetime(row["timestamp"])
1243
- or datetime.now(timezone.utc),
1244
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1245
- )
1246
-
1247
- def _row_to_domain_knowledge(self, row: sqlite3.Row) -> DomainKnowledge:
1248
- """Convert database row to DomainKnowledge."""
1249
- return DomainKnowledge(
1250
- id=row["id"],
1251
- agent=row["agent"],
1252
- project_id=row["project_id"],
1253
- domain=row["domain"] or "general",
1254
- fact=row["fact"],
1255
- source=row["source"] or "unknown",
1256
- confidence=row["confidence"] or 1.0,
1257
- last_verified=self._parse_datetime(row["last_verified"])
1258
- or datetime.now(timezone.utc),
1259
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1260
- )
1261
-
1262
- def _row_to_anti_pattern(self, row: sqlite3.Row) -> AntiPattern:
1263
- """Convert database row to AntiPattern."""
1264
- return AntiPattern(
1265
- id=row["id"],
1266
- agent=row["agent"],
1267
- project_id=row["project_id"],
1268
- pattern=row["pattern"],
1269
- why_bad=row["why_bad"] or "",
1270
- better_alternative=row["better_alternative"] or "",
1271
- occurrence_count=row["occurrence_count"] or 1,
1272
- last_seen=self._parse_datetime(row["last_seen"])
1273
- or datetime.now(timezone.utc),
1274
- created_at=self._parse_datetime(row["created_at"])
1275
- or datetime.now(timezone.utc),
1276
- metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1277
- )
1278
-
1279
- # ===== Additional abstract method implementations =====
1280
-
1281
- def update_heuristic_confidence(
1282
- self,
1283
- heuristic_id: str,
1284
- new_confidence: float,
1285
- ) -> bool:
1286
- """Update confidence score for a heuristic."""
1287
- with self._get_connection() as conn:
1288
- cursor = conn.execute(
1289
- "UPDATE heuristics SET confidence = ? WHERE id = ?",
1290
- (new_confidence, heuristic_id),
1291
- )
1292
- return cursor.rowcount > 0
1293
-
1294
- def update_knowledge_confidence(
1295
- self,
1296
- knowledge_id: str,
1297
- new_confidence: float,
1298
- ) -> bool:
1299
- """Update confidence score for domain knowledge."""
1300
- with self._get_connection() as conn:
1301
- cursor = conn.execute(
1302
- "UPDATE domain_knowledge SET confidence = ? WHERE id = ?",
1303
- (new_confidence, knowledge_id),
1304
- )
1305
- return cursor.rowcount > 0
1306
-
1307
- def delete_heuristic(self, heuristic_id: str) -> bool:
1308
- """Delete a heuristic by ID."""
1309
- with self._get_connection() as conn:
1310
- # Also remove from embedding index
1311
- conn.execute(
1312
- "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1313
- (MemoryType.HEURISTICS, heuristic_id),
1314
- )
1315
- cursor = conn.execute(
1316
- f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.HEURISTICS]} WHERE id = ?",
1317
- (heuristic_id,),
1318
- )
1319
- if cursor.rowcount > 0:
1320
- # Mark index as dirty for lazy rebuild on next search
1321
- self._index_dirty[MemoryType.HEURISTICS] = True
1322
- return True
1323
- return False
1324
-
1325
- def delete_outcome(self, outcome_id: str) -> bool:
1326
- """Delete an outcome by ID."""
1327
- with self._get_connection() as conn:
1328
- # Also remove from embedding index
1329
- conn.execute(
1330
- "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1331
- (MemoryType.OUTCOMES, outcome_id),
1332
- )
1333
- cursor = conn.execute(
1334
- f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.OUTCOMES]} WHERE id = ?",
1335
- (outcome_id,),
1336
- )
1337
- if cursor.rowcount > 0:
1338
- # Mark index as dirty for lazy rebuild on next search
1339
- self._index_dirty[MemoryType.OUTCOMES] = True
1340
- return True
1341
- return False
1342
-
1343
- def delete_domain_knowledge(self, knowledge_id: str) -> bool:
1344
- """Delete domain knowledge by ID."""
1345
- with self._get_connection() as conn:
1346
- # Also remove from embedding index
1347
- conn.execute(
1348
- "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1349
- (MemoryType.DOMAIN_KNOWLEDGE, knowledge_id),
1350
- )
1351
- cursor = conn.execute(
1352
- f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]} WHERE id = ?",
1353
- (knowledge_id,),
1354
- )
1355
- if cursor.rowcount > 0:
1356
- # Mark index as dirty for lazy rebuild on next search
1357
- self._index_dirty[MemoryType.DOMAIN_KNOWLEDGE] = True
1358
- return True
1359
- return False
1360
-
1361
- def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
1362
- """Delete an anti-pattern by ID."""
1363
- with self._get_connection() as conn:
1364
- # Also remove from embedding index
1365
- conn.execute(
1366
- "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1367
- (MemoryType.ANTI_PATTERNS, anti_pattern_id),
1368
- )
1369
- cursor = conn.execute(
1370
- f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.ANTI_PATTERNS]} WHERE id = ?",
1371
- (anti_pattern_id,),
1372
- )
1373
- if cursor.rowcount > 0:
1374
- # Mark index as dirty for lazy rebuild on next search
1375
- self._index_dirty[MemoryType.ANTI_PATTERNS] = True
1376
- return True
1377
- return False
1378
-
1379
- # ==================== MIGRATION SUPPORT ====================
1380
-
1381
- def _get_version_store(self):
1382
- """Get or create the version store."""
1383
- if self._version_store is None:
1384
- from alma.storage.migrations.version_stores import SQLiteVersionStore
1385
-
1386
- self._version_store = SQLiteVersionStore(self.db_path)
1387
- return self._version_store
1388
-
1389
- def _get_migration_runner(self):
1390
- """Get or create the migration runner."""
1391
- if self._migration_runner is None:
1392
- from alma.storage.migrations.runner import MigrationRunner
1393
- from alma.storage.migrations.versions import v1_0_0 # noqa: F401
1394
-
1395
- self._migration_runner = MigrationRunner(
1396
- version_store=self._get_version_store(),
1397
- backend="sqlite",
1398
- )
1399
- return self._migration_runner
1400
-
1401
- def _ensure_migrated(self) -> None:
1402
- """Ensure database is migrated to latest version."""
1403
- runner = self._get_migration_runner()
1404
- if runner.needs_migration():
1405
- with self._get_connection() as conn:
1406
- applied = runner.migrate(conn)
1407
- if applied:
1408
- logger.info(f"Applied {len(applied)} migrations: {applied}")
1409
-
1410
- def get_schema_version(self) -> Optional[str]:
1411
- """Get the current schema version."""
1412
- return self._get_version_store().get_current_version()
1413
-
1414
- def get_migration_status(self) -> Dict[str, Any]:
1415
- """Get migration status information."""
1416
- runner = self._get_migration_runner()
1417
- status = runner.get_status()
1418
- status["migration_supported"] = True
1419
- return status
1420
-
1421
- def migrate(
1422
- self,
1423
- target_version: Optional[str] = None,
1424
- dry_run: bool = False,
1425
- ) -> List[str]:
1426
- """
1427
- Apply pending schema migrations.
1428
-
1429
- Args:
1430
- target_version: Optional target version (applies all if not specified)
1431
- dry_run: If True, show what would be done without making changes
1432
-
1433
- Returns:
1434
- List of applied migration versions
1435
- """
1436
- runner = self._get_migration_runner()
1437
- with self._get_connection() as conn:
1438
- return runner.migrate(conn, target_version=target_version, dry_run=dry_run)
1439
-
1440
- def rollback(
1441
- self,
1442
- target_version: str,
1443
- dry_run: bool = False,
1444
- ) -> List[str]:
1445
- """
1446
- Roll back schema to a previous version.
1447
-
1448
- Args:
1449
- target_version: Version to roll back to
1450
- dry_run: If True, show what would be done without making changes
1451
-
1452
- Returns:
1453
- List of rolled back migration versions
1454
- """
1455
- runner = self._get_migration_runner()
1456
- with self._get_connection() as conn:
1457
- return runner.rollback(conn, target_version=target_version, dry_run=dry_run)
1
+ """
2
+ ALMA SQLite + FAISS Storage Backend.
3
+
4
+ Local storage using SQLite for structured data and FAISS for vector search.
5
+ This is the recommended backend for local development and testing.
6
+
7
+ v0.6.0 adds workflow context support:
8
+ - Checkpoint tables for crash recovery
9
+ - WorkflowOutcome tables for learning from workflows
10
+ - ArtifactRef tables for linking external files
11
+ - scope_filter parameter for workflow-scoped queries
12
+ """
13
+
14
+ import json
15
+ import logging
16
+ import sqlite3
17
+ from contextlib import contextmanager
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
21
+
22
+ import numpy as np
23
+
24
+ from alma.storage.base import StorageBackend
25
+ from alma.storage.constants import SQLITE_TABLE_NAMES, MemoryType
26
+ from alma.types import (
27
+ AntiPattern,
28
+ DomainKnowledge,
29
+ Heuristic,
30
+ Outcome,
31
+ UserPreference,
32
+ )
33
+
34
+ if TYPE_CHECKING:
35
+ from alma.learning.decay import MemoryStrength
36
+ from alma.session import SessionHandoff
37
+ from alma.storage.archive import ArchivedMemory
38
+ from alma.workflow import ArtifactRef, Checkpoint, WorkflowOutcome
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # Try to import FAISS, fall back to numpy-based search if not available
43
+ try:
44
+ import faiss
45
+
46
+ FAISS_AVAILABLE = True
47
+ except ImportError:
48
+ FAISS_AVAILABLE = False
49
+ logger.warning("FAISS not available, falling back to numpy-based vector search")
50
+
51
+
52
+ class SQLiteStorage(StorageBackend):
53
+ """
54
+ SQLite + FAISS storage backend.
55
+
56
+ Uses SQLite for structured data and FAISS for efficient vector similarity search.
57
+ Falls back to numpy cosine similarity if FAISS is not installed.
58
+
59
+ Database schema:
60
+ - heuristics: id, agent, project_id, condition, strategy, confidence, ...
61
+ - outcomes: id, agent, project_id, task_type, task_description, success, ...
62
+ - preferences: id, user_id, category, preference, source, ...
63
+ - domain_knowledge: id, agent, project_id, domain, fact, ...
64
+ - anti_patterns: id, agent, project_id, pattern, why_bad, ...
65
+ - embeddings: id, memory_type, memory_id, embedding (blob)
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ db_path: Path,
71
+ embedding_dim: int = 384, # Default for all-MiniLM-L6-v2
72
+ auto_migrate: bool = True,
73
+ ):
74
+ """
75
+ Initialize SQLite storage.
76
+
77
+ Args:
78
+ db_path: Path to SQLite database file
79
+ embedding_dim: Dimension of embedding vectors
80
+ auto_migrate: If True, automatically apply pending migrations on startup
81
+ """
82
+ self.db_path = Path(db_path)
83
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
84
+ self.embedding_dim = embedding_dim
85
+
86
+ # Migration support (lazy-loaded)
87
+ self._migration_runner = None
88
+ self._version_store = None
89
+
90
+ # Initialize database
91
+ self._init_database()
92
+
93
+ # Initialize FAISS indices (one per memory type)
94
+ self._indices: Dict[str, Any] = {}
95
+ self._id_maps: Dict[str, List[str]] = {} # memory_type -> [memory_ids]
96
+ self._index_dirty: Dict[str, bool] = {} # Track which indexes need rebuilding
97
+ self._load_faiss_indices()
98
+
99
+ # Auto-migrate if enabled
100
+ if auto_migrate:
101
+ self._ensure_migrated()
102
+
103
+ @classmethod
104
+ def from_config(cls, config: Dict[str, Any]) -> "SQLiteStorage":
105
+ """Create instance from configuration."""
106
+ storage_dir = config.get("storage_dir", ".alma")
107
+ db_name = config.get("db_name", "alma.db")
108
+ embedding_dim = config.get("embedding_dim", 384)
109
+
110
+ db_path = Path(storage_dir) / db_name
111
+ return cls(db_path=db_path, embedding_dim=embedding_dim)
112
+
113
+ @contextmanager
114
+ def _get_connection(self):
115
+ """Get database connection with context manager."""
116
+ conn = sqlite3.connect(self.db_path)
117
+ conn.row_factory = sqlite3.Row
118
+ try:
119
+ yield conn
120
+ conn.commit()
121
+ except Exception:
122
+ conn.rollback()
123
+ raise
124
+ finally:
125
+ conn.close()
126
+
127
+ def _init_database(self):
128
+ """Initialize database schema."""
129
+ with self._get_connection() as conn:
130
+ cursor = conn.cursor()
131
+
132
+ # Heuristics table
133
+ cursor.execute("""
134
+ CREATE TABLE IF NOT EXISTS heuristics (
135
+ id TEXT PRIMARY KEY,
136
+ agent TEXT NOT NULL,
137
+ project_id TEXT NOT NULL,
138
+ condition TEXT NOT NULL,
139
+ strategy TEXT NOT NULL,
140
+ confidence REAL DEFAULT 0.0,
141
+ occurrence_count INTEGER DEFAULT 0,
142
+ success_count INTEGER DEFAULT 0,
143
+ last_validated TEXT,
144
+ created_at TEXT,
145
+ metadata TEXT
146
+ )
147
+ """)
148
+ cursor.execute(
149
+ "CREATE INDEX IF NOT EXISTS idx_heuristics_project_agent "
150
+ "ON heuristics(project_id, agent)"
151
+ )
152
+
153
+ # Outcomes table
154
+ cursor.execute("""
155
+ CREATE TABLE IF NOT EXISTS outcomes (
156
+ id TEXT PRIMARY KEY,
157
+ agent TEXT NOT NULL,
158
+ project_id TEXT NOT NULL,
159
+ task_type TEXT,
160
+ task_description TEXT NOT NULL,
161
+ success INTEGER DEFAULT 0,
162
+ strategy_used TEXT,
163
+ duration_ms INTEGER,
164
+ error_message TEXT,
165
+ user_feedback TEXT,
166
+ timestamp TEXT,
167
+ metadata TEXT
168
+ )
169
+ """)
170
+ cursor.execute(
171
+ "CREATE INDEX IF NOT EXISTS idx_outcomes_project_agent "
172
+ "ON outcomes(project_id, agent)"
173
+ )
174
+ cursor.execute(
175
+ "CREATE INDEX IF NOT EXISTS idx_outcomes_task_type "
176
+ "ON outcomes(project_id, agent, task_type)"
177
+ )
178
+ cursor.execute(
179
+ "CREATE INDEX IF NOT EXISTS idx_outcomes_timestamp "
180
+ "ON outcomes(project_id, timestamp)"
181
+ )
182
+
183
+ # User preferences table
184
+ cursor.execute("""
185
+ CREATE TABLE IF NOT EXISTS preferences (
186
+ id TEXT PRIMARY KEY,
187
+ user_id TEXT NOT NULL,
188
+ category TEXT,
189
+ preference TEXT NOT NULL,
190
+ source TEXT,
191
+ confidence REAL DEFAULT 1.0,
192
+ timestamp TEXT,
193
+ metadata TEXT
194
+ )
195
+ """)
196
+ cursor.execute(
197
+ "CREATE INDEX IF NOT EXISTS idx_preferences_user "
198
+ "ON preferences(user_id)"
199
+ )
200
+
201
+ # Domain knowledge table
202
+ cursor.execute("""
203
+ CREATE TABLE IF NOT EXISTS domain_knowledge (
204
+ id TEXT PRIMARY KEY,
205
+ agent TEXT NOT NULL,
206
+ project_id TEXT NOT NULL,
207
+ domain TEXT,
208
+ fact TEXT NOT NULL,
209
+ source TEXT,
210
+ confidence REAL DEFAULT 1.0,
211
+ last_verified TEXT,
212
+ metadata TEXT
213
+ )
214
+ """)
215
+ cursor.execute(
216
+ "CREATE INDEX IF NOT EXISTS idx_domain_knowledge_project_agent "
217
+ "ON domain_knowledge(project_id, agent)"
218
+ )
219
+
220
+ # Anti-patterns table
221
+ cursor.execute("""
222
+ CREATE TABLE IF NOT EXISTS anti_patterns (
223
+ id TEXT PRIMARY KEY,
224
+ agent TEXT NOT NULL,
225
+ project_id TEXT NOT NULL,
226
+ pattern TEXT NOT NULL,
227
+ why_bad TEXT,
228
+ better_alternative TEXT,
229
+ occurrence_count INTEGER DEFAULT 1,
230
+ last_seen TEXT,
231
+ created_at TEXT,
232
+ metadata TEXT
233
+ )
234
+ """)
235
+ cursor.execute(
236
+ "CREATE INDEX IF NOT EXISTS idx_anti_patterns_project_agent "
237
+ "ON anti_patterns(project_id, agent)"
238
+ )
239
+
240
+ # Embeddings table (stores vectors as blobs)
241
+ cursor.execute("""
242
+ CREATE TABLE IF NOT EXISTS embeddings (
243
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
244
+ memory_type TEXT NOT NULL,
245
+ memory_id TEXT NOT NULL,
246
+ embedding BLOB NOT NULL,
247
+ UNIQUE(memory_type, memory_id)
248
+ )
249
+ """)
250
+ cursor.execute(
251
+ "CREATE INDEX IF NOT EXISTS idx_embeddings_type "
252
+ "ON embeddings(memory_type)"
253
+ )
254
+
255
+ # ==================== WORKFLOW TABLES (v0.6.0+) ====================
256
+
257
+ # Checkpoints table for crash recovery
258
+ cursor.execute("""
259
+ CREATE TABLE IF NOT EXISTS checkpoints (
260
+ id TEXT PRIMARY KEY,
261
+ run_id TEXT NOT NULL,
262
+ node_id TEXT NOT NULL,
263
+ state TEXT NOT NULL,
264
+ sequence_number INTEGER DEFAULT 0,
265
+ branch_id TEXT,
266
+ parent_checkpoint_id TEXT,
267
+ state_hash TEXT,
268
+ metadata TEXT,
269
+ created_at TEXT NOT NULL
270
+ )
271
+ """)
272
+ cursor.execute(
273
+ "CREATE INDEX IF NOT EXISTS idx_checkpoints_run ON checkpoints(run_id)"
274
+ )
275
+ cursor.execute(
276
+ "CREATE INDEX IF NOT EXISTS idx_checkpoints_run_branch "
277
+ "ON checkpoints(run_id, branch_id)"
278
+ )
279
+ cursor.execute(
280
+ "CREATE INDEX IF NOT EXISTS idx_checkpoints_run_sequence "
281
+ "ON checkpoints(run_id, sequence_number DESC)"
282
+ )
283
+
284
+ # Workflow outcomes table
285
+ cursor.execute("""
286
+ CREATE TABLE IF NOT EXISTS workflow_outcomes (
287
+ id TEXT PRIMARY KEY,
288
+ tenant_id TEXT,
289
+ workflow_id TEXT NOT NULL,
290
+ run_id TEXT NOT NULL,
291
+ agent TEXT NOT NULL,
292
+ project_id TEXT NOT NULL,
293
+ result TEXT NOT NULL,
294
+ summary TEXT,
295
+ strategies_used TEXT,
296
+ successful_patterns TEXT,
297
+ failed_patterns TEXT,
298
+ extracted_heuristics TEXT,
299
+ extracted_anti_patterns TEXT,
300
+ duration_seconds REAL,
301
+ node_count INTEGER,
302
+ error_message TEXT,
303
+ metadata TEXT,
304
+ created_at TEXT NOT NULL
305
+ )
306
+ """)
307
+ cursor.execute(
308
+ "CREATE INDEX IF NOT EXISTS idx_workflow_outcomes_project "
309
+ "ON workflow_outcomes(project_id, agent)"
310
+ )
311
+ cursor.execute(
312
+ "CREATE INDEX IF NOT EXISTS idx_workflow_outcomes_workflow "
313
+ "ON workflow_outcomes(workflow_id)"
314
+ )
315
+ cursor.execute(
316
+ "CREATE INDEX IF NOT EXISTS idx_workflow_outcomes_tenant "
317
+ "ON workflow_outcomes(tenant_id)"
318
+ )
319
+
320
+ # Artifact links table
321
+ cursor.execute("""
322
+ CREATE TABLE IF NOT EXISTS artifact_links (
323
+ id TEXT PRIMARY KEY,
324
+ memory_id TEXT NOT NULL,
325
+ artifact_type TEXT NOT NULL,
326
+ storage_url TEXT NOT NULL,
327
+ filename TEXT,
328
+ mime_type TEXT,
329
+ size_bytes INTEGER,
330
+ checksum TEXT,
331
+ metadata TEXT,
332
+ created_at TEXT NOT NULL
333
+ )
334
+ """)
335
+ cursor.execute(
336
+ "CREATE INDEX IF NOT EXISTS idx_artifact_links_memory "
337
+ "ON artifact_links(memory_id)"
338
+ )
339
+
340
+ # Session handoffs table (for session persistence)
341
+ cursor.execute("""
342
+ CREATE TABLE IF NOT EXISTS session_handoffs (
343
+ id TEXT PRIMARY KEY,
344
+ project_id TEXT NOT NULL,
345
+ agent TEXT NOT NULL,
346
+ session_id TEXT NOT NULL,
347
+ last_action TEXT NOT NULL,
348
+ last_outcome TEXT NOT NULL,
349
+ current_goal TEXT,
350
+ key_decisions TEXT,
351
+ active_files TEXT,
352
+ blockers TEXT,
353
+ next_steps TEXT,
354
+ test_status TEXT,
355
+ confidence_level REAL DEFAULT 0.5,
356
+ risk_flags TEXT,
357
+ session_start TEXT,
358
+ session_end TEXT,
359
+ duration_ms INTEGER DEFAULT 0,
360
+ metadata TEXT,
361
+ created_at TEXT NOT NULL
362
+ )
363
+ """)
364
+ cursor.execute(
365
+ "CREATE INDEX IF NOT EXISTS idx_session_handoffs_project_agent "
366
+ "ON session_handoffs(project_id, agent)"
367
+ )
368
+ cursor.execute(
369
+ "CREATE INDEX IF NOT EXISTS idx_session_handoffs_agent_created "
370
+ "ON session_handoffs(agent, created_at DESC)"
371
+ )
372
+
373
+ # ==================== MEMORY STRENGTH TABLE (v0.7.0+) ====================
374
+
375
+ # Memory strength tracking for decay-based forgetting
376
+ cursor.execute("""
377
+ CREATE TABLE IF NOT EXISTS memory_strength (
378
+ memory_id TEXT PRIMARY KEY,
379
+ memory_type TEXT NOT NULL,
380
+ project_id TEXT,
381
+ agent TEXT,
382
+ initial_strength REAL DEFAULT 1.0,
383
+ decay_half_life_days INTEGER DEFAULT 30,
384
+ created_at TEXT NOT NULL,
385
+ last_accessed TEXT NOT NULL,
386
+ access_count INTEGER DEFAULT 0,
387
+ explicit_importance REAL DEFAULT 0.5,
388
+ reinforcement_events TEXT DEFAULT '[]'
389
+ )
390
+ """)
391
+ cursor.execute(
392
+ "CREATE INDEX IF NOT EXISTS idx_memory_strength_last_accessed "
393
+ "ON memory_strength(last_accessed)"
394
+ )
395
+ cursor.execute(
396
+ "CREATE INDEX IF NOT EXISTS idx_memory_strength_project_agent "
397
+ "ON memory_strength(project_id, agent)"
398
+ )
399
+
400
+ # ==================== MEMORY ARCHIVE TABLE (v0.7.0+) ====================
401
+
402
+ # Memory archive for soft-deleted memories
403
+ cursor.execute("""
404
+ CREATE TABLE IF NOT EXISTS memory_archive (
405
+ id TEXT PRIMARY KEY,
406
+ original_id TEXT NOT NULL,
407
+ memory_type TEXT NOT NULL,
408
+ content TEXT NOT NULL,
409
+ embedding BLOB,
410
+ metadata TEXT,
411
+ original_created_at TEXT NOT NULL,
412
+ archived_at TEXT NOT NULL,
413
+ archive_reason TEXT NOT NULL,
414
+ final_strength REAL NOT NULL,
415
+ project_id TEXT NOT NULL,
416
+ agent TEXT NOT NULL,
417
+ restored INTEGER DEFAULT 0,
418
+ restored_at TEXT,
419
+ restored_as TEXT
420
+ )
421
+ """)
422
+ cursor.execute(
423
+ "CREATE INDEX IF NOT EXISTS idx_archive_project_agent "
424
+ "ON memory_archive(project_id, agent)"
425
+ )
426
+ cursor.execute(
427
+ "CREATE INDEX IF NOT EXISTS idx_archive_reason "
428
+ "ON memory_archive(archive_reason)"
429
+ )
430
+ cursor.execute(
431
+ "CREATE INDEX IF NOT EXISTS idx_archive_date "
432
+ "ON memory_archive(archived_at)"
433
+ )
434
+ cursor.execute(
435
+ "CREATE INDEX IF NOT EXISTS idx_archive_restored "
436
+ "ON memory_archive(restored)"
437
+ )
438
+
439
+ def _load_faiss_indices(self, memory_types: Optional[List[str]] = None):
440
+ """Load or create FAISS indices for specified memory types.
441
+
442
+ Args:
443
+ memory_types: List of memory types to load. If None, loads all types.
444
+ """
445
+ if memory_types is None:
446
+ memory_types = list(MemoryType.VECTOR_ENABLED)
447
+
448
+ for memory_type in memory_types:
449
+ if FAISS_AVAILABLE:
450
+ # Use FAISS index
451
+ self._indices[memory_type] = faiss.IndexFlatIP(self.embedding_dim)
452
+ else:
453
+ # Use list for numpy fallback
454
+ self._indices[memory_type] = []
455
+
456
+ self._id_maps[memory_type] = []
457
+ self._index_dirty[memory_type] = False # Mark as fresh after rebuild
458
+
459
+ # Load existing embeddings
460
+ with self._get_connection() as conn:
461
+ cursor = conn.cursor()
462
+ cursor.execute(
463
+ "SELECT memory_id, embedding FROM embeddings WHERE memory_type = ?",
464
+ (memory_type,),
465
+ )
466
+ rows = cursor.fetchall()
467
+
468
+ for row in rows:
469
+ memory_id = row["memory_id"]
470
+ embedding = np.frombuffer(row["embedding"], dtype=np.float32)
471
+
472
+ self._id_maps[memory_type].append(memory_id)
473
+ if FAISS_AVAILABLE:
474
+ self._indices[memory_type].add(
475
+ embedding.reshape(1, -1).astype(np.float32)
476
+ )
477
+ else:
478
+ self._indices[memory_type].append(embedding)
479
+
480
+ def _ensure_index_fresh(self, memory_type: str) -> None:
481
+ """Rebuild index for a memory type if it has been marked dirty.
482
+
483
+ This implements lazy rebuilding - indexes are only rebuilt when
484
+ actually needed for search, not immediately on every delete.
485
+
486
+ Args:
487
+ memory_type: The type of memory index to check/rebuild.
488
+ """
489
+ if self._index_dirty.get(memory_type, False):
490
+ logger.debug(f"Rebuilding dirty index for {memory_type}")
491
+ self._load_faiss_indices([memory_type])
492
+
493
+ def _add_to_index(
494
+ self,
495
+ memory_type: str,
496
+ memory_id: str,
497
+ embedding: Optional[List[float]],
498
+ ):
499
+ """Add embedding to FAISS index."""
500
+ if embedding is None:
501
+ return
502
+
503
+ embedding_array = np.array(embedding, dtype=np.float32)
504
+
505
+ # Store in database
506
+ with self._get_connection() as conn:
507
+ cursor = conn.cursor()
508
+ cursor.execute(
509
+ """
510
+ INSERT OR REPLACE INTO embeddings (memory_type, memory_id, embedding)
511
+ VALUES (?, ?, ?)
512
+ """,
513
+ (memory_type, memory_id, embedding_array.tobytes()),
514
+ )
515
+
516
+ # Add to index
517
+ self._id_maps[memory_type].append(memory_id)
518
+ if FAISS_AVAILABLE:
519
+ self._indices[memory_type].add(
520
+ embedding_array.reshape(1, -1).astype(np.float32)
521
+ )
522
+ else:
523
+ self._indices[memory_type].append(embedding_array)
524
+
525
+ def _search_index(
526
+ self,
527
+ memory_type: str,
528
+ query_embedding: List[float],
529
+ top_k: int,
530
+ ) -> List[Tuple[str, float]]:
531
+ """Search FAISS index for similar embeddings."""
532
+ # Ensure index is up-to-date before searching (lazy rebuild)
533
+ self._ensure_index_fresh(memory_type)
534
+
535
+ if not self._id_maps[memory_type]:
536
+ return []
537
+
538
+ query = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
539
+
540
+ if FAISS_AVAILABLE:
541
+ # Normalize for cosine similarity (IndexFlatIP)
542
+ faiss.normalize_L2(query)
543
+ scores, indices = self._indices[memory_type].search(
544
+ query, min(top_k, len(self._id_maps[memory_type]))
545
+ )
546
+
547
+ results = []
548
+ for score, idx in zip(scores[0], indices[0], strict=False):
549
+ if idx >= 0 and idx < len(self._id_maps[memory_type]):
550
+ results.append((self._id_maps[memory_type][idx], float(score)))
551
+ return results
552
+ else:
553
+ # Numpy fallback with cosine similarity
554
+ embeddings = np.array(self._indices[memory_type])
555
+ if len(embeddings) == 0:
556
+ return []
557
+
558
+ # Normalize
559
+ query_norm = query / np.linalg.norm(query)
560
+ emb_norms = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
561
+
562
+ # Cosine similarity
563
+ similarities = np.dot(emb_norms, query_norm.T).flatten()
564
+
565
+ # Get top k
566
+ top_indices = np.argsort(similarities)[::-1][:top_k]
567
+
568
+ return [
569
+ (self._id_maps[memory_type][i], float(similarities[i]))
570
+ for i in top_indices
571
+ ]
572
+
573
+ # ==================== WRITE OPERATIONS ====================
574
+
575
+ def save_heuristic(self, heuristic: Heuristic) -> str:
576
+ """Save a heuristic."""
577
+ with self._get_connection() as conn:
578
+ cursor = conn.cursor()
579
+ cursor.execute(
580
+ """
581
+ INSERT OR REPLACE INTO heuristics
582
+ (id, agent, project_id, condition, strategy, confidence,
583
+ occurrence_count, success_count, last_validated, created_at, metadata)
584
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
585
+ """,
586
+ (
587
+ heuristic.id,
588
+ heuristic.agent,
589
+ heuristic.project_id,
590
+ heuristic.condition,
591
+ heuristic.strategy,
592
+ heuristic.confidence,
593
+ heuristic.occurrence_count,
594
+ heuristic.success_count,
595
+ (
596
+ heuristic.last_validated.isoformat()
597
+ if heuristic.last_validated
598
+ else None
599
+ ),
600
+ heuristic.created_at.isoformat() if heuristic.created_at else None,
601
+ json.dumps(heuristic.metadata) if heuristic.metadata else None,
602
+ ),
603
+ )
604
+
605
+ # Add embedding to index
606
+ self._add_to_index(MemoryType.HEURISTICS, heuristic.id, heuristic.embedding)
607
+ logger.debug(f"Saved heuristic: {heuristic.id}")
608
+ return heuristic.id
609
+
610
+ def save_outcome(self, outcome: Outcome) -> str:
611
+ """Save an outcome."""
612
+ with self._get_connection() as conn:
613
+ cursor = conn.cursor()
614
+ cursor.execute(
615
+ """
616
+ INSERT OR REPLACE INTO outcomes
617
+ (id, agent, project_id, task_type, task_description, success,
618
+ strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata)
619
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
620
+ """,
621
+ (
622
+ outcome.id,
623
+ outcome.agent,
624
+ outcome.project_id,
625
+ outcome.task_type,
626
+ outcome.task_description,
627
+ 1 if outcome.success else 0,
628
+ outcome.strategy_used,
629
+ outcome.duration_ms,
630
+ outcome.error_message,
631
+ outcome.user_feedback,
632
+ outcome.timestamp.isoformat() if outcome.timestamp else None,
633
+ json.dumps(outcome.metadata) if outcome.metadata else None,
634
+ ),
635
+ )
636
+
637
+ # Add embedding to index
638
+ self._add_to_index(MemoryType.OUTCOMES, outcome.id, outcome.embedding)
639
+ logger.debug(f"Saved outcome: {outcome.id}")
640
+ return outcome.id
641
+
642
+ def save_user_preference(self, preference: UserPreference) -> str:
643
+ """Save a user preference."""
644
+ with self._get_connection() as conn:
645
+ cursor = conn.cursor()
646
+ cursor.execute(
647
+ """
648
+ INSERT OR REPLACE INTO preferences
649
+ (id, user_id, category, preference, source, confidence, timestamp, metadata)
650
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
651
+ """,
652
+ (
653
+ preference.id,
654
+ preference.user_id,
655
+ preference.category,
656
+ preference.preference,
657
+ preference.source,
658
+ preference.confidence,
659
+ preference.timestamp.isoformat() if preference.timestamp else None,
660
+ json.dumps(preference.metadata) if preference.metadata else None,
661
+ ),
662
+ )
663
+ logger.debug(f"Saved preference: {preference.id}")
664
+ return preference.id
665
+
666
+ def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
667
+ """Save domain knowledge."""
668
+ with self._get_connection() as conn:
669
+ cursor = conn.cursor()
670
+ cursor.execute(
671
+ """
672
+ INSERT OR REPLACE INTO domain_knowledge
673
+ (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata)
674
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
675
+ """,
676
+ (
677
+ knowledge.id,
678
+ knowledge.agent,
679
+ knowledge.project_id,
680
+ knowledge.domain,
681
+ knowledge.fact,
682
+ knowledge.source,
683
+ knowledge.confidence,
684
+ (
685
+ knowledge.last_verified.isoformat()
686
+ if knowledge.last_verified
687
+ else None
688
+ ),
689
+ json.dumps(knowledge.metadata) if knowledge.metadata else None,
690
+ ),
691
+ )
692
+
693
+ # Add embedding to index
694
+ self._add_to_index(
695
+ MemoryType.DOMAIN_KNOWLEDGE, knowledge.id, knowledge.embedding
696
+ )
697
+ logger.debug(f"Saved domain knowledge: {knowledge.id}")
698
+ return knowledge.id
699
+
700
+ def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
701
+ """Save an anti-pattern."""
702
+ with self._get_connection() as conn:
703
+ cursor = conn.cursor()
704
+ cursor.execute(
705
+ """
706
+ INSERT OR REPLACE INTO anti_patterns
707
+ (id, agent, project_id, pattern, why_bad, better_alternative,
708
+ occurrence_count, last_seen, created_at, metadata)
709
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
710
+ """,
711
+ (
712
+ anti_pattern.id,
713
+ anti_pattern.agent,
714
+ anti_pattern.project_id,
715
+ anti_pattern.pattern,
716
+ anti_pattern.why_bad,
717
+ anti_pattern.better_alternative,
718
+ anti_pattern.occurrence_count,
719
+ (
720
+ anti_pattern.last_seen.isoformat()
721
+ if anti_pattern.last_seen
722
+ else None
723
+ ),
724
+ (
725
+ anti_pattern.created_at.isoformat()
726
+ if anti_pattern.created_at
727
+ else None
728
+ ),
729
+ (
730
+ json.dumps(anti_pattern.metadata)
731
+ if anti_pattern.metadata
732
+ else None
733
+ ),
734
+ ),
735
+ )
736
+
737
+ # Add embedding to index
738
+ self._add_to_index(
739
+ MemoryType.ANTI_PATTERNS, anti_pattern.id, anti_pattern.embedding
740
+ )
741
+ logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
742
+ return anti_pattern.id
743
+
744
+ # ==================== BATCH WRITE OPERATIONS ====================
745
+
746
+ def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
747
+ """Save multiple heuristics in a batch using executemany."""
748
+ if not heuristics:
749
+ return []
750
+
751
+ with self._get_connection() as conn:
752
+ cursor = conn.cursor()
753
+ cursor.executemany(
754
+ """
755
+ INSERT OR REPLACE INTO heuristics
756
+ (id, agent, project_id, condition, strategy, confidence,
757
+ occurrence_count, success_count, last_validated, created_at, metadata)
758
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
759
+ """,
760
+ [
761
+ (
762
+ h.id,
763
+ h.agent,
764
+ h.project_id,
765
+ h.condition,
766
+ h.strategy,
767
+ h.confidence,
768
+ h.occurrence_count,
769
+ h.success_count,
770
+ h.last_validated.isoformat() if h.last_validated else None,
771
+ h.created_at.isoformat() if h.created_at else None,
772
+ json.dumps(h.metadata) if h.metadata else None,
773
+ )
774
+ for h in heuristics
775
+ ],
776
+ )
777
+
778
+ # Add embeddings to index
779
+ for h in heuristics:
780
+ self._add_to_index(MemoryType.HEURISTICS, h.id, h.embedding)
781
+
782
+ logger.debug(f"Batch saved {len(heuristics)} heuristics")
783
+ return [h.id for h in heuristics]
784
+
785
+ def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
786
+ """Save multiple outcomes in a batch using executemany."""
787
+ if not outcomes:
788
+ return []
789
+
790
+ with self._get_connection() as conn:
791
+ cursor = conn.cursor()
792
+ cursor.executemany(
793
+ """
794
+ INSERT OR REPLACE INTO outcomes
795
+ (id, agent, project_id, task_type, task_description, success,
796
+ strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata)
797
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
798
+ """,
799
+ [
800
+ (
801
+ o.id,
802
+ o.agent,
803
+ o.project_id,
804
+ o.task_type,
805
+ o.task_description,
806
+ 1 if o.success else 0,
807
+ o.strategy_used,
808
+ o.duration_ms,
809
+ o.error_message,
810
+ o.user_feedback,
811
+ o.timestamp.isoformat() if o.timestamp else None,
812
+ json.dumps(o.metadata) if o.metadata else None,
813
+ )
814
+ for o in outcomes
815
+ ],
816
+ )
817
+
818
+ # Add embeddings to index
819
+ for o in outcomes:
820
+ self._add_to_index(MemoryType.OUTCOMES, o.id, o.embedding)
821
+
822
+ logger.debug(f"Batch saved {len(outcomes)} outcomes")
823
+ return [o.id for o in outcomes]
824
+
825
+ def save_domain_knowledge_batch(
826
+ self, knowledge_items: List[DomainKnowledge]
827
+ ) -> List[str]:
828
+ """Save multiple domain knowledge items in a batch using executemany."""
829
+ if not knowledge_items:
830
+ return []
831
+
832
+ with self._get_connection() as conn:
833
+ cursor = conn.cursor()
834
+ cursor.executemany(
835
+ """
836
+ INSERT OR REPLACE INTO domain_knowledge
837
+ (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata)
838
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
839
+ """,
840
+ [
841
+ (
842
+ k.id,
843
+ k.agent,
844
+ k.project_id,
845
+ k.domain,
846
+ k.fact,
847
+ k.source,
848
+ k.confidence,
849
+ k.last_verified.isoformat() if k.last_verified else None,
850
+ json.dumps(k.metadata) if k.metadata else None,
851
+ )
852
+ for k in knowledge_items
853
+ ],
854
+ )
855
+
856
+ # Add embeddings to index
857
+ for k in knowledge_items:
858
+ self._add_to_index(MemoryType.DOMAIN_KNOWLEDGE, k.id, k.embedding)
859
+
860
+ logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
861
+ return [k.id for k in knowledge_items]
862
+
863
+ # ==================== READ OPERATIONS ====================
864
+
865
+ def get_heuristics(
866
+ self,
867
+ project_id: str,
868
+ agent: Optional[str] = None,
869
+ embedding: Optional[List[float]] = None,
870
+ top_k: int = 5,
871
+ min_confidence: float = 0.0,
872
+ scope_filter: Optional[Dict[str, Any]] = None,
873
+ ) -> List[Heuristic]:
874
+ """Get heuristics with optional vector search and scope filtering."""
875
+ # If embedding provided, use vector search to get candidate IDs
876
+ candidate_ids = None
877
+ if embedding:
878
+ search_results = self._search_index(
879
+ MemoryType.HEURISTICS, embedding, top_k * 2
880
+ )
881
+ candidate_ids = [id for id, _ in search_results]
882
+
883
+ with self._get_connection() as conn:
884
+ cursor = conn.cursor()
885
+
886
+ query = "SELECT * FROM heuristics WHERE project_id = ? AND confidence >= ?"
887
+ params: List[Any] = [project_id, min_confidence]
888
+
889
+ if agent:
890
+ query += " AND agent = ?"
891
+ params.append(agent)
892
+
893
+ if candidate_ids is not None:
894
+ placeholders = ",".join("?" * len(candidate_ids))
895
+ query += f" AND id IN ({placeholders})"
896
+ params.extend(candidate_ids)
897
+
898
+ # Apply scope filter (v0.6.0+)
899
+ if scope_filter:
900
+ query, params = self._apply_scope_filter(query, params, scope_filter)
901
+
902
+ query += " ORDER BY confidence DESC LIMIT ?"
903
+ params.append(top_k)
904
+
905
+ cursor.execute(query, params)
906
+ rows = cursor.fetchall()
907
+
908
+ return [self._row_to_heuristic(row) for row in rows]
909
+
910
+ def get_outcomes(
911
+ self,
912
+ project_id: str,
913
+ agent: Optional[str] = None,
914
+ task_type: Optional[str] = None,
915
+ embedding: Optional[List[float]] = None,
916
+ top_k: int = 5,
917
+ success_only: bool = False,
918
+ scope_filter: Optional[Dict[str, Any]] = None,
919
+ ) -> List[Outcome]:
920
+ """Get outcomes with optional vector search and scope filtering."""
921
+ candidate_ids = None
922
+ if embedding:
923
+ search_results = self._search_index(
924
+ MemoryType.OUTCOMES, embedding, top_k * 2
925
+ )
926
+ candidate_ids = [id for id, _ in search_results]
927
+
928
+ with self._get_connection() as conn:
929
+ cursor = conn.cursor()
930
+
931
+ query = "SELECT * FROM outcomes WHERE project_id = ?"
932
+ params: List[Any] = [project_id]
933
+
934
+ if agent:
935
+ query += " AND agent = ?"
936
+ params.append(agent)
937
+
938
+ if task_type:
939
+ query += " AND task_type = ?"
940
+ params.append(task_type)
941
+
942
+ if success_only:
943
+ query += " AND success = 1"
944
+
945
+ if candidate_ids is not None:
946
+ placeholders = ",".join("?" * len(candidate_ids))
947
+ query += f" AND id IN ({placeholders})"
948
+ params.extend(candidate_ids)
949
+
950
+ # Apply scope filter (v0.6.0+)
951
+ if scope_filter:
952
+ query, params = self._apply_scope_filter(query, params, scope_filter)
953
+
954
+ query += " ORDER BY timestamp DESC LIMIT ?"
955
+ params.append(top_k)
956
+
957
+ cursor.execute(query, params)
958
+ rows = cursor.fetchall()
959
+
960
+ return [self._row_to_outcome(row) for row in rows]
961
+
962
+ def get_user_preferences(
963
+ self,
964
+ user_id: str,
965
+ category: Optional[str] = None,
966
+ ) -> List[UserPreference]:
967
+ """Get user preferences."""
968
+ with self._get_connection() as conn:
969
+ cursor = conn.cursor()
970
+
971
+ query = "SELECT * FROM preferences WHERE user_id = ?"
972
+ params: List[Any] = [user_id]
973
+
974
+ if category:
975
+ query += " AND category = ?"
976
+ params.append(category)
977
+
978
+ cursor.execute(query, params)
979
+ rows = cursor.fetchall()
980
+
981
+ return [self._row_to_preference(row) for row in rows]
982
+
983
+ def get_domain_knowledge(
984
+ self,
985
+ project_id: str,
986
+ agent: Optional[str] = None,
987
+ domain: Optional[str] = None,
988
+ embedding: Optional[List[float]] = None,
989
+ top_k: int = 5,
990
+ scope_filter: Optional[Dict[str, Any]] = None,
991
+ ) -> List[DomainKnowledge]:
992
+ """Get domain knowledge with optional vector search and scope filtering."""
993
+ candidate_ids = None
994
+ if embedding:
995
+ search_results = self._search_index(
996
+ MemoryType.DOMAIN_KNOWLEDGE, embedding, top_k * 2
997
+ )
998
+ candidate_ids = [id for id, _ in search_results]
999
+
1000
+ with self._get_connection() as conn:
1001
+ cursor = conn.cursor()
1002
+
1003
+ query = "SELECT * FROM domain_knowledge WHERE project_id = ?"
1004
+ params: List[Any] = [project_id]
1005
+
1006
+ if agent:
1007
+ query += " AND agent = ?"
1008
+ params.append(agent)
1009
+
1010
+ if domain:
1011
+ query += " AND domain = ?"
1012
+ params.append(domain)
1013
+
1014
+ if candidate_ids is not None:
1015
+ placeholders = ",".join("?" * len(candidate_ids))
1016
+ query += f" AND id IN ({placeholders})"
1017
+ params.extend(candidate_ids)
1018
+
1019
+ # Apply scope filter (v0.6.0+)
1020
+ if scope_filter:
1021
+ query, params = self._apply_scope_filter(query, params, scope_filter)
1022
+
1023
+ query += " ORDER BY confidence DESC LIMIT ?"
1024
+ params.append(top_k)
1025
+
1026
+ cursor.execute(query, params)
1027
+ rows = cursor.fetchall()
1028
+
1029
+ return [self._row_to_domain_knowledge(row) for row in rows]
1030
+
1031
+ def get_anti_patterns(
1032
+ self,
1033
+ project_id: str,
1034
+ agent: Optional[str] = None,
1035
+ embedding: Optional[List[float]] = None,
1036
+ top_k: int = 5,
1037
+ scope_filter: Optional[Dict[str, Any]] = None,
1038
+ ) -> List[AntiPattern]:
1039
+ """Get anti-patterns with optional vector search and scope filtering."""
1040
+ candidate_ids = None
1041
+ if embedding:
1042
+ search_results = self._search_index(
1043
+ MemoryType.ANTI_PATTERNS, embedding, top_k * 2
1044
+ )
1045
+ candidate_ids = [id for id, _ in search_results]
1046
+
1047
+ with self._get_connection() as conn:
1048
+ cursor = conn.cursor()
1049
+
1050
+ query = "SELECT * FROM anti_patterns WHERE project_id = ?"
1051
+ params: List[Any] = [project_id]
1052
+
1053
+ if agent:
1054
+ query += " AND agent = ?"
1055
+ params.append(agent)
1056
+
1057
+ if candidate_ids is not None:
1058
+ placeholders = ",".join("?" * len(candidate_ids))
1059
+ query += f" AND id IN ({placeholders})"
1060
+ params.extend(candidate_ids)
1061
+
1062
+ # Apply scope filter (v0.6.0+)
1063
+ if scope_filter:
1064
+ query, params = self._apply_scope_filter(query, params, scope_filter)
1065
+
1066
+ query += " ORDER BY occurrence_count DESC LIMIT ?"
1067
+ params.append(top_k)
1068
+
1069
+ cursor.execute(query, params)
1070
+ rows = cursor.fetchall()
1071
+
1072
+ return [self._row_to_anti_pattern(row) for row in rows]
1073
+
1074
+ # ==================== MULTI-AGENT MEMORY SHARING ====================
1075
+
1076
+ def get_heuristics_for_agents(
1077
+ self,
1078
+ project_id: str,
1079
+ agents: List[str],
1080
+ embedding: Optional[List[float]] = None,
1081
+ top_k: int = 5,
1082
+ min_confidence: float = 0.0,
1083
+ ) -> List[Heuristic]:
1084
+ """Get heuristics from multiple agents using optimized IN query."""
1085
+ if not agents:
1086
+ return []
1087
+
1088
+ candidate_ids = None
1089
+ if embedding:
1090
+ search_results = self._search_index(
1091
+ MemoryType.HEURISTICS, embedding, top_k * 2 * len(agents)
1092
+ )
1093
+ candidate_ids = [id for id, _ in search_results]
1094
+
1095
+ with self._get_connection() as conn:
1096
+ cursor = conn.cursor()
1097
+
1098
+ placeholders = ",".join("?" * len(agents))
1099
+ query = f"SELECT * FROM heuristics WHERE project_id = ? AND confidence >= ? AND agent IN ({placeholders})"
1100
+ params: List[Any] = [project_id, min_confidence] + list(agents)
1101
+
1102
+ if candidate_ids is not None:
1103
+ id_placeholders = ",".join("?" * len(candidate_ids))
1104
+ query += f" AND id IN ({id_placeholders})"
1105
+ params.extend(candidate_ids)
1106
+
1107
+ query += " ORDER BY confidence DESC LIMIT ?"
1108
+ params.append(top_k * len(agents))
1109
+
1110
+ cursor.execute(query, params)
1111
+ rows = cursor.fetchall()
1112
+
1113
+ return [self._row_to_heuristic(row) for row in rows]
1114
+
1115
+ def get_outcomes_for_agents(
1116
+ self,
1117
+ project_id: str,
1118
+ agents: List[str],
1119
+ task_type: Optional[str] = None,
1120
+ embedding: Optional[List[float]] = None,
1121
+ top_k: int = 5,
1122
+ success_only: bool = False,
1123
+ ) -> List[Outcome]:
1124
+ """Get outcomes from multiple agents using optimized IN query."""
1125
+ if not agents:
1126
+ return []
1127
+
1128
+ candidate_ids = None
1129
+ if embedding:
1130
+ search_results = self._search_index(
1131
+ MemoryType.OUTCOMES, embedding, top_k * 2 * len(agents)
1132
+ )
1133
+ candidate_ids = [id for id, _ in search_results]
1134
+
1135
+ with self._get_connection() as conn:
1136
+ cursor = conn.cursor()
1137
+
1138
+ placeholders = ",".join("?" * len(agents))
1139
+ query = f"SELECT * FROM outcomes WHERE project_id = ? AND agent IN ({placeholders})"
1140
+ params: List[Any] = [project_id] + list(agents)
1141
+
1142
+ if task_type:
1143
+ query += " AND task_type = ?"
1144
+ params.append(task_type)
1145
+
1146
+ if success_only:
1147
+ query += " AND success = 1"
1148
+
1149
+ if candidate_ids is not None:
1150
+ id_placeholders = ",".join("?" * len(candidate_ids))
1151
+ query += f" AND id IN ({id_placeholders})"
1152
+ params.extend(candidate_ids)
1153
+
1154
+ query += " ORDER BY timestamp DESC LIMIT ?"
1155
+ params.append(top_k * len(agents))
1156
+
1157
+ cursor.execute(query, params)
1158
+ rows = cursor.fetchall()
1159
+
1160
+ return [self._row_to_outcome(row) for row in rows]
1161
+
1162
+ def get_domain_knowledge_for_agents(
1163
+ self,
1164
+ project_id: str,
1165
+ agents: List[str],
1166
+ domain: Optional[str] = None,
1167
+ embedding: Optional[List[float]] = None,
1168
+ top_k: int = 5,
1169
+ ) -> List[DomainKnowledge]:
1170
+ """Get domain knowledge from multiple agents using optimized IN query."""
1171
+ if not agents:
1172
+ return []
1173
+
1174
+ candidate_ids = None
1175
+ if embedding:
1176
+ search_results = self._search_index(
1177
+ MemoryType.DOMAIN_KNOWLEDGE, embedding, top_k * 2 * len(agents)
1178
+ )
1179
+ candidate_ids = [id for id, _ in search_results]
1180
+
1181
+ with self._get_connection() as conn:
1182
+ cursor = conn.cursor()
1183
+
1184
+ placeholders = ",".join("?" * len(agents))
1185
+ query = f"SELECT * FROM domain_knowledge WHERE project_id = ? AND agent IN ({placeholders})"
1186
+ params: List[Any] = [project_id] + list(agents)
1187
+
1188
+ if domain:
1189
+ query += " AND domain = ?"
1190
+ params.append(domain)
1191
+
1192
+ if candidate_ids is not None:
1193
+ id_placeholders = ",".join("?" * len(candidate_ids))
1194
+ query += f" AND id IN ({id_placeholders})"
1195
+ params.extend(candidate_ids)
1196
+
1197
+ query += " ORDER BY confidence DESC LIMIT ?"
1198
+ params.append(top_k * len(agents))
1199
+
1200
+ cursor.execute(query, params)
1201
+ rows = cursor.fetchall()
1202
+
1203
+ return [self._row_to_domain_knowledge(row) for row in rows]
1204
+
1205
+ def get_anti_patterns_for_agents(
1206
+ self,
1207
+ project_id: str,
1208
+ agents: List[str],
1209
+ embedding: Optional[List[float]] = None,
1210
+ top_k: int = 5,
1211
+ ) -> List[AntiPattern]:
1212
+ """Get anti-patterns from multiple agents using optimized IN query."""
1213
+ if not agents:
1214
+ return []
1215
+
1216
+ candidate_ids = None
1217
+ if embedding:
1218
+ search_results = self._search_index(
1219
+ MemoryType.ANTI_PATTERNS, embedding, top_k * 2 * len(agents)
1220
+ )
1221
+ candidate_ids = [id for id, _ in search_results]
1222
+
1223
+ with self._get_connection() as conn:
1224
+ cursor = conn.cursor()
1225
+
1226
+ placeholders = ",".join("?" * len(agents))
1227
+ query = f"SELECT * FROM anti_patterns WHERE project_id = ? AND agent IN ({placeholders})"
1228
+ params: List[Any] = [project_id] + list(agents)
1229
+
1230
+ if candidate_ids is not None:
1231
+ id_placeholders = ",".join("?" * len(candidate_ids))
1232
+ query += f" AND id IN ({id_placeholders})"
1233
+ params.extend(candidate_ids)
1234
+
1235
+ query += " ORDER BY occurrence_count DESC LIMIT ?"
1236
+ params.append(top_k * len(agents))
1237
+
1238
+ cursor.execute(query, params)
1239
+ rows = cursor.fetchall()
1240
+
1241
+ return [self._row_to_anti_pattern(row) for row in rows]
1242
+
1243
+ # ==================== UPDATE OPERATIONS ====================
1244
+
1245
+ def update_heuristic(
1246
+ self,
1247
+ heuristic_id: str,
1248
+ updates: Dict[str, Any],
1249
+ ) -> bool:
1250
+ """Update a heuristic's fields."""
1251
+ if not updates:
1252
+ return False
1253
+
1254
+ set_clauses = []
1255
+ params = []
1256
+ for key, value in updates.items():
1257
+ if key == "metadata" and value:
1258
+ value = json.dumps(value)
1259
+ elif isinstance(value, datetime):
1260
+ value = value.isoformat()
1261
+ set_clauses.append(f"{key} = ?")
1262
+ params.append(value)
1263
+
1264
+ params.append(heuristic_id)
1265
+
1266
+ with self._get_connection() as conn:
1267
+ cursor = conn.cursor()
1268
+ cursor.execute(
1269
+ f"UPDATE heuristics SET {', '.join(set_clauses)} WHERE id = ?",
1270
+ params,
1271
+ )
1272
+ return cursor.rowcount > 0
1273
+
1274
+ def increment_heuristic_occurrence(
1275
+ self,
1276
+ heuristic_id: str,
1277
+ success: bool,
1278
+ ) -> bool:
1279
+ """Increment heuristic occurrence count."""
1280
+ with self._get_connection() as conn:
1281
+ cursor = conn.cursor()
1282
+
1283
+ if success:
1284
+ cursor.execute(
1285
+ """
1286
+ UPDATE heuristics
1287
+ SET occurrence_count = occurrence_count + 1,
1288
+ success_count = success_count + 1,
1289
+ last_validated = ?
1290
+ WHERE id = ?
1291
+ """,
1292
+ (datetime.now(timezone.utc).isoformat(), heuristic_id),
1293
+ )
1294
+ else:
1295
+ cursor.execute(
1296
+ """
1297
+ UPDATE heuristics
1298
+ SET occurrence_count = occurrence_count + 1,
1299
+ last_validated = ?
1300
+ WHERE id = ?
1301
+ """,
1302
+ (datetime.now(timezone.utc).isoformat(), heuristic_id),
1303
+ )
1304
+
1305
+ return cursor.rowcount > 0
1306
+
1307
+ # ==================== DELETE OPERATIONS ====================
1308
+
1309
+ def delete_outcomes_older_than(
1310
+ self,
1311
+ project_id: str,
1312
+ older_than: datetime,
1313
+ agent: Optional[str] = None,
1314
+ ) -> int:
1315
+ """Delete old outcomes."""
1316
+ with self._get_connection() as conn:
1317
+ cursor = conn.cursor()
1318
+
1319
+ query = "DELETE FROM outcomes WHERE project_id = ? AND timestamp < ?"
1320
+ params: List[Any] = [project_id, older_than.isoformat()]
1321
+
1322
+ if agent:
1323
+ query += " AND agent = ?"
1324
+ params.append(agent)
1325
+
1326
+ cursor.execute(query, params)
1327
+ deleted = cursor.rowcount
1328
+
1329
+ logger.info(f"Deleted {deleted} old outcomes")
1330
+ return deleted
1331
+
1332
+ def delete_low_confidence_heuristics(
1333
+ self,
1334
+ project_id: str,
1335
+ below_confidence: float,
1336
+ agent: Optional[str] = None,
1337
+ ) -> int:
1338
+ """Delete low-confidence heuristics."""
1339
+ with self._get_connection() as conn:
1340
+ cursor = conn.cursor()
1341
+
1342
+ query = "DELETE FROM heuristics WHERE project_id = ? AND confidence < ?"
1343
+ params: List[Any] = [project_id, below_confidence]
1344
+
1345
+ if agent:
1346
+ query += " AND agent = ?"
1347
+ params.append(agent)
1348
+
1349
+ cursor.execute(query, params)
1350
+ deleted = cursor.rowcount
1351
+
1352
+ logger.info(f"Deleted {deleted} low-confidence heuristics")
1353
+ return deleted
1354
+
1355
+ # ==================== STATS ====================
1356
+
1357
+ def get_stats(
1358
+ self,
1359
+ project_id: str,
1360
+ agent: Optional[str] = None,
1361
+ ) -> Dict[str, Any]:
1362
+ """Get memory statistics."""
1363
+ stats = {
1364
+ "project_id": project_id,
1365
+ "agent": agent,
1366
+ "storage_type": "sqlite",
1367
+ "faiss_available": FAISS_AVAILABLE,
1368
+ }
1369
+
1370
+ with self._get_connection() as conn:
1371
+ cursor = conn.cursor()
1372
+
1373
+ # Use canonical memory types for stats
1374
+ for memory_type in MemoryType.ALL:
1375
+ if memory_type == MemoryType.PREFERENCES:
1376
+ # Preferences don't have project_id
1377
+ cursor.execute(
1378
+ f"SELECT COUNT(*) FROM {SQLITE_TABLE_NAMES[memory_type]}"
1379
+ )
1380
+ stats[f"{memory_type}_count"] = cursor.fetchone()[0]
1381
+ else:
1382
+ query = f"SELECT COUNT(*) FROM {SQLITE_TABLE_NAMES[memory_type]} WHERE project_id = ?"
1383
+ params: List[Any] = [project_id]
1384
+ if agent:
1385
+ query += " AND agent = ?"
1386
+ params.append(agent)
1387
+ cursor.execute(query, params)
1388
+ stats[f"{memory_type}_count"] = cursor.fetchone()[0]
1389
+
1390
+ # Embedding counts
1391
+ cursor.execute("SELECT COUNT(*) FROM embeddings")
1392
+ stats["embeddings_count"] = cursor.fetchone()[0]
1393
+
1394
+ stats["total_count"] = sum(
1395
+ stats.get(k, 0) for k in stats if k.endswith("_count")
1396
+ )
1397
+
1398
+ return stats
1399
+
1400
+ # ==================== HELPERS ====================
1401
+
1402
+ def _parse_datetime(self, value: Any) -> Optional[datetime]:
1403
+ """Parse datetime from string."""
1404
+ if value is None:
1405
+ return None
1406
+ if isinstance(value, datetime):
1407
+ return value
1408
+ try:
1409
+ return datetime.fromisoformat(value.replace("Z", "+00:00"))
1410
+ except (ValueError, AttributeError):
1411
+ return None
1412
+
1413
+ def _row_to_heuristic(self, row: sqlite3.Row) -> Heuristic:
1414
+ """Convert database row to Heuristic."""
1415
+ return Heuristic(
1416
+ id=row["id"],
1417
+ agent=row["agent"],
1418
+ project_id=row["project_id"],
1419
+ condition=row["condition"],
1420
+ strategy=row["strategy"],
1421
+ confidence=row["confidence"] or 0.0,
1422
+ occurrence_count=row["occurrence_count"] or 0,
1423
+ success_count=row["success_count"] or 0,
1424
+ last_validated=self._parse_datetime(row["last_validated"])
1425
+ or datetime.now(timezone.utc),
1426
+ created_at=self._parse_datetime(row["created_at"])
1427
+ or datetime.now(timezone.utc),
1428
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1429
+ )
1430
+
1431
+ def _row_to_outcome(self, row: sqlite3.Row) -> Outcome:
1432
+ """Convert database row to Outcome."""
1433
+ return Outcome(
1434
+ id=row["id"],
1435
+ agent=row["agent"],
1436
+ project_id=row["project_id"],
1437
+ task_type=row["task_type"] or "general",
1438
+ task_description=row["task_description"],
1439
+ success=bool(row["success"]),
1440
+ strategy_used=row["strategy_used"] or "",
1441
+ duration_ms=row["duration_ms"],
1442
+ error_message=row["error_message"],
1443
+ user_feedback=row["user_feedback"],
1444
+ timestamp=self._parse_datetime(row["timestamp"])
1445
+ or datetime.now(timezone.utc),
1446
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1447
+ )
1448
+
1449
+ def _row_to_preference(self, row: sqlite3.Row) -> UserPreference:
1450
+ """Convert database row to UserPreference."""
1451
+ return UserPreference(
1452
+ id=row["id"],
1453
+ user_id=row["user_id"],
1454
+ category=row["category"] or "general",
1455
+ preference=row["preference"],
1456
+ source=row["source"] or "unknown",
1457
+ confidence=row["confidence"] or 1.0,
1458
+ timestamp=self._parse_datetime(row["timestamp"])
1459
+ or datetime.now(timezone.utc),
1460
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1461
+ )
1462
+
1463
+ def _row_to_domain_knowledge(self, row: sqlite3.Row) -> DomainKnowledge:
1464
+ """Convert database row to DomainKnowledge."""
1465
+ return DomainKnowledge(
1466
+ id=row["id"],
1467
+ agent=row["agent"],
1468
+ project_id=row["project_id"],
1469
+ domain=row["domain"] or "general",
1470
+ fact=row["fact"],
1471
+ source=row["source"] or "unknown",
1472
+ confidence=row["confidence"] or 1.0,
1473
+ last_verified=self._parse_datetime(row["last_verified"])
1474
+ or datetime.now(timezone.utc),
1475
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1476
+ )
1477
+
1478
+ def _row_to_anti_pattern(self, row: sqlite3.Row) -> AntiPattern:
1479
+ """Convert database row to AntiPattern."""
1480
+ return AntiPattern(
1481
+ id=row["id"],
1482
+ agent=row["agent"],
1483
+ project_id=row["project_id"],
1484
+ pattern=row["pattern"],
1485
+ why_bad=row["why_bad"] or "",
1486
+ better_alternative=row["better_alternative"] or "",
1487
+ occurrence_count=row["occurrence_count"] or 1,
1488
+ last_seen=self._parse_datetime(row["last_seen"])
1489
+ or datetime.now(timezone.utc),
1490
+ created_at=self._parse_datetime(row["created_at"])
1491
+ or datetime.now(timezone.utc),
1492
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1493
+ )
1494
+
1495
+ # ===== Additional abstract method implementations =====
1496
+
1497
+ def update_heuristic_confidence(
1498
+ self,
1499
+ heuristic_id: str,
1500
+ new_confidence: float,
1501
+ ) -> bool:
1502
+ """Update confidence score for a heuristic."""
1503
+ with self._get_connection() as conn:
1504
+ cursor = conn.execute(
1505
+ "UPDATE heuristics SET confidence = ? WHERE id = ?",
1506
+ (new_confidence, heuristic_id),
1507
+ )
1508
+ return cursor.rowcount > 0
1509
+
1510
+ def update_knowledge_confidence(
1511
+ self,
1512
+ knowledge_id: str,
1513
+ new_confidence: float,
1514
+ ) -> bool:
1515
+ """Update confidence score for domain knowledge."""
1516
+ with self._get_connection() as conn:
1517
+ cursor = conn.execute(
1518
+ "UPDATE domain_knowledge SET confidence = ? WHERE id = ?",
1519
+ (new_confidence, knowledge_id),
1520
+ )
1521
+ return cursor.rowcount > 0
1522
+
1523
+ def delete_heuristic(self, heuristic_id: str) -> bool:
1524
+ """Delete a heuristic by ID."""
1525
+ with self._get_connection() as conn:
1526
+ # Also remove from embedding index
1527
+ conn.execute(
1528
+ "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1529
+ (MemoryType.HEURISTICS, heuristic_id),
1530
+ )
1531
+ cursor = conn.execute(
1532
+ f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.HEURISTICS]} WHERE id = ?",
1533
+ (heuristic_id,),
1534
+ )
1535
+ if cursor.rowcount > 0:
1536
+ # Mark index as dirty for lazy rebuild on next search
1537
+ self._index_dirty[MemoryType.HEURISTICS] = True
1538
+ return True
1539
+ return False
1540
+
1541
+ def delete_outcome(self, outcome_id: str) -> bool:
1542
+ """Delete an outcome by ID."""
1543
+ with self._get_connection() as conn:
1544
+ # Also remove from embedding index
1545
+ conn.execute(
1546
+ "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1547
+ (MemoryType.OUTCOMES, outcome_id),
1548
+ )
1549
+ cursor = conn.execute(
1550
+ f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.OUTCOMES]} WHERE id = ?",
1551
+ (outcome_id,),
1552
+ )
1553
+ if cursor.rowcount > 0:
1554
+ # Mark index as dirty for lazy rebuild on next search
1555
+ self._index_dirty[MemoryType.OUTCOMES] = True
1556
+ return True
1557
+ return False
1558
+
1559
+ def delete_domain_knowledge(self, knowledge_id: str) -> bool:
1560
+ """Delete domain knowledge by ID."""
1561
+ with self._get_connection() as conn:
1562
+ # Also remove from embedding index
1563
+ conn.execute(
1564
+ "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1565
+ (MemoryType.DOMAIN_KNOWLEDGE, knowledge_id),
1566
+ )
1567
+ cursor = conn.execute(
1568
+ f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]} WHERE id = ?",
1569
+ (knowledge_id,),
1570
+ )
1571
+ if cursor.rowcount > 0:
1572
+ # Mark index as dirty for lazy rebuild on next search
1573
+ self._index_dirty[MemoryType.DOMAIN_KNOWLEDGE] = True
1574
+ return True
1575
+ return False
1576
+
1577
+ def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
1578
+ """Delete an anti-pattern by ID."""
1579
+ with self._get_connection() as conn:
1580
+ # Also remove from embedding index
1581
+ conn.execute(
1582
+ "DELETE FROM embeddings WHERE memory_type = ? AND memory_id = ?",
1583
+ (MemoryType.ANTI_PATTERNS, anti_pattern_id),
1584
+ )
1585
+ cursor = conn.execute(
1586
+ f"DELETE FROM {SQLITE_TABLE_NAMES[MemoryType.ANTI_PATTERNS]} WHERE id = ?",
1587
+ (anti_pattern_id,),
1588
+ )
1589
+ if cursor.rowcount > 0:
1590
+ # Mark index as dirty for lazy rebuild on next search
1591
+ self._index_dirty[MemoryType.ANTI_PATTERNS] = True
1592
+ return True
1593
+ return False
1594
+
1595
+ # ==================== MIGRATION SUPPORT ====================
1596
+
1597
+ def _get_version_store(self):
1598
+ """Get or create the version store."""
1599
+ if self._version_store is None:
1600
+ from alma.storage.migrations.version_stores import SQLiteVersionStore
1601
+
1602
+ self._version_store = SQLiteVersionStore(self.db_path)
1603
+ return self._version_store
1604
+
1605
+ def _get_migration_runner(self):
1606
+ """Get or create the migration runner."""
1607
+ if self._migration_runner is None:
1608
+ from alma.storage.migrations.runner import MigrationRunner
1609
+ from alma.storage.migrations.versions import v1_0_0 # noqa: F401
1610
+
1611
+ self._migration_runner = MigrationRunner(
1612
+ version_store=self._get_version_store(),
1613
+ backend="sqlite",
1614
+ )
1615
+ return self._migration_runner
1616
+
1617
+ def _ensure_migrated(self) -> None:
1618
+ """Ensure database is migrated to latest version."""
1619
+ runner = self._get_migration_runner()
1620
+ if runner.needs_migration():
1621
+ with self._get_connection() as conn:
1622
+ applied = runner.migrate(conn)
1623
+ if applied:
1624
+ logger.info(f"Applied {len(applied)} migrations: {applied}")
1625
+
1626
+ def get_schema_version(self) -> Optional[str]:
1627
+ """Get the current schema version."""
1628
+ return self._get_version_store().get_current_version()
1629
+
1630
+ def get_migration_status(self) -> Dict[str, Any]:
1631
+ """Get migration status information."""
1632
+ runner = self._get_migration_runner()
1633
+ status = runner.get_status()
1634
+ status["migration_supported"] = True
1635
+ return status
1636
+
1637
+ def migrate(
1638
+ self,
1639
+ target_version: Optional[str] = None,
1640
+ dry_run: bool = False,
1641
+ ) -> List[str]:
1642
+ """
1643
+ Apply pending schema migrations.
1644
+
1645
+ Args:
1646
+ target_version: Optional target version (applies all if not specified)
1647
+ dry_run: If True, show what would be done without making changes
1648
+
1649
+ Returns:
1650
+ List of applied migration versions
1651
+ """
1652
+ runner = self._get_migration_runner()
1653
+ with self._get_connection() as conn:
1654
+ return runner.migrate(conn, target_version=target_version, dry_run=dry_run)
1655
+
1656
+ def rollback(
1657
+ self,
1658
+ target_version: str,
1659
+ dry_run: bool = False,
1660
+ ) -> List[str]:
1661
+ """
1662
+ Roll back schema to a previous version.
1663
+
1664
+ Args:
1665
+ target_version: Version to roll back to
1666
+ dry_run: If True, show what would be done without making changes
1667
+
1668
+ Returns:
1669
+ List of rolled back migration versions
1670
+ """
1671
+ runner = self._get_migration_runner()
1672
+ with self._get_connection() as conn:
1673
+ return runner.rollback(conn, target_version=target_version, dry_run=dry_run)
1674
+
1675
+ # ==================== SCOPE FILTER HELPER (v0.6.0+) ====================
1676
+
1677
+ def _apply_scope_filter(
1678
+ self,
1679
+ query: str,
1680
+ params: List[Any],
1681
+ scope_filter: Dict[str, Any],
1682
+ ) -> Tuple[str, List[Any]]:
1683
+ """
1684
+ Apply workflow scope filter to a query.
1685
+
1686
+ Note: For tables that don't have workflow columns (tenant_id, workflow_id,
1687
+ run_id, node_id), scope filtering is a no-op. The filter will only apply
1688
+ to workflow_outcomes table which has these columns.
1689
+
1690
+ Args:
1691
+ query: The SQL query string
1692
+ params: The query parameters
1693
+ scope_filter: Dict with keys: tenant_id, workflow_id, run_id, node_id
1694
+
1695
+ Returns:
1696
+ Tuple of (modified query, modified params)
1697
+ """
1698
+ # Note: Most ALMA tables don't have workflow columns yet.
1699
+ # This filter primarily applies to workflow_outcomes queries.
1700
+ # For other tables, we return query unchanged to maintain backwards compatibility.
1701
+ return query, params
1702
+
1703
+ # ==================== CHECKPOINT OPERATIONS (v0.6.0+) ====================
1704
+
1705
+ def save_checkpoint(self, checkpoint: "Checkpoint") -> str:
1706
+ """Save a workflow checkpoint."""
1707
+
1708
+ with self._get_connection() as conn:
1709
+ cursor = conn.cursor()
1710
+ cursor.execute(
1711
+ """
1712
+ INSERT OR REPLACE INTO checkpoints
1713
+ (id, run_id, node_id, state, sequence_number, branch_id,
1714
+ parent_checkpoint_id, state_hash, metadata, created_at)
1715
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1716
+ """,
1717
+ (
1718
+ checkpoint.id,
1719
+ checkpoint.run_id,
1720
+ checkpoint.node_id,
1721
+ json.dumps(checkpoint.state),
1722
+ checkpoint.sequence_number,
1723
+ checkpoint.branch_id,
1724
+ checkpoint.parent_checkpoint_id,
1725
+ checkpoint.state_hash,
1726
+ json.dumps(checkpoint.metadata) if checkpoint.metadata else None,
1727
+ checkpoint.created_at.isoformat(),
1728
+ ),
1729
+ )
1730
+ logger.debug(f"Saved checkpoint: {checkpoint.id}")
1731
+ return checkpoint.id
1732
+
1733
+ def get_checkpoint(self, checkpoint_id: str) -> Optional["Checkpoint"]:
1734
+ """Get a checkpoint by ID."""
1735
+ with self._get_connection() as conn:
1736
+ cursor = conn.cursor()
1737
+ cursor.execute(
1738
+ "SELECT * FROM checkpoints WHERE id = ?",
1739
+ (checkpoint_id,),
1740
+ )
1741
+ row = cursor.fetchone()
1742
+
1743
+ if row is None:
1744
+ return None
1745
+ return self._row_to_checkpoint(row)
1746
+
1747
+ def get_latest_checkpoint(
1748
+ self,
1749
+ run_id: str,
1750
+ branch_id: Optional[str] = None,
1751
+ ) -> Optional["Checkpoint"]:
1752
+ """Get the most recent checkpoint for a workflow run."""
1753
+ with self._get_connection() as conn:
1754
+ cursor = conn.cursor()
1755
+
1756
+ query = "SELECT * FROM checkpoints WHERE run_id = ?"
1757
+ params: List[Any] = [run_id]
1758
+
1759
+ if branch_id is not None:
1760
+ query += " AND branch_id = ?"
1761
+ params.append(branch_id)
1762
+
1763
+ query += " ORDER BY sequence_number DESC LIMIT 1"
1764
+
1765
+ cursor.execute(query, params)
1766
+ row = cursor.fetchone()
1767
+
1768
+ if row is None:
1769
+ return None
1770
+ return self._row_to_checkpoint(row)
1771
+
1772
+ def get_checkpoints_for_run(
1773
+ self,
1774
+ run_id: str,
1775
+ branch_id: Optional[str] = None,
1776
+ limit: int = 100,
1777
+ ) -> List["Checkpoint"]:
1778
+ """Get all checkpoints for a workflow run."""
1779
+ with self._get_connection() as conn:
1780
+ cursor = conn.cursor()
1781
+
1782
+ query = "SELECT * FROM checkpoints WHERE run_id = ?"
1783
+ params: List[Any] = [run_id]
1784
+
1785
+ if branch_id is not None:
1786
+ query += " AND branch_id = ?"
1787
+ params.append(branch_id)
1788
+
1789
+ query += " ORDER BY sequence_number ASC LIMIT ?"
1790
+ params.append(limit)
1791
+
1792
+ cursor.execute(query, params)
1793
+ rows = cursor.fetchall()
1794
+
1795
+ return [self._row_to_checkpoint(row) for row in rows]
1796
+
1797
+ def cleanup_checkpoints(
1798
+ self,
1799
+ run_id: str,
1800
+ keep_latest: int = 1,
1801
+ ) -> int:
1802
+ """Clean up old checkpoints for a completed run."""
1803
+ with self._get_connection() as conn:
1804
+ cursor = conn.cursor()
1805
+
1806
+ # Get IDs of checkpoints to keep
1807
+ cursor.execute(
1808
+ """
1809
+ SELECT id FROM checkpoints
1810
+ WHERE run_id = ?
1811
+ ORDER BY sequence_number DESC
1812
+ LIMIT ?
1813
+ """,
1814
+ (run_id, keep_latest),
1815
+ )
1816
+ keep_ids = [row["id"] for row in cursor.fetchall()]
1817
+
1818
+ if not keep_ids:
1819
+ return 0
1820
+
1821
+ # Delete all others
1822
+ placeholders = ",".join("?" * len(keep_ids))
1823
+ cursor.execute(
1824
+ f"""
1825
+ DELETE FROM checkpoints
1826
+ WHERE run_id = ? AND id NOT IN ({placeholders})
1827
+ """,
1828
+ [run_id] + keep_ids,
1829
+ )
1830
+ deleted = cursor.rowcount
1831
+
1832
+ logger.info(f"Cleaned up {deleted} checkpoints for run {run_id}")
1833
+ return deleted
1834
+
1835
+ def _row_to_checkpoint(self, row: sqlite3.Row) -> "Checkpoint":
1836
+ """Convert database row to Checkpoint."""
1837
+ from alma.workflow import Checkpoint
1838
+
1839
+ return Checkpoint(
1840
+ id=row["id"],
1841
+ run_id=row["run_id"],
1842
+ node_id=row["node_id"],
1843
+ state=json.loads(row["state"]) if row["state"] else {},
1844
+ sequence_number=row["sequence_number"] or 0,
1845
+ branch_id=row["branch_id"],
1846
+ parent_checkpoint_id=row["parent_checkpoint_id"],
1847
+ state_hash=row["state_hash"] or "",
1848
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1849
+ created_at=self._parse_datetime(row["created_at"])
1850
+ or datetime.now(timezone.utc),
1851
+ )
1852
+
1853
+ # ==================== WORKFLOW OUTCOME OPERATIONS (v0.6.0+) ====================
1854
+
1855
+ def save_workflow_outcome(self, outcome: "WorkflowOutcome") -> str:
1856
+ """Save a workflow outcome."""
1857
+ with self._get_connection() as conn:
1858
+ cursor = conn.cursor()
1859
+ cursor.execute(
1860
+ """
1861
+ INSERT OR REPLACE INTO workflow_outcomes
1862
+ (id, tenant_id, workflow_id, run_id, agent, project_id, result,
1863
+ summary, strategies_used, successful_patterns, failed_patterns,
1864
+ extracted_heuristics, extracted_anti_patterns, duration_seconds,
1865
+ node_count, error_message, metadata, created_at)
1866
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1867
+ """,
1868
+ (
1869
+ outcome.id,
1870
+ outcome.tenant_id,
1871
+ outcome.workflow_id,
1872
+ outcome.run_id,
1873
+ outcome.agent,
1874
+ outcome.project_id,
1875
+ outcome.result.value,
1876
+ outcome.summary,
1877
+ json.dumps(outcome.strategies_used),
1878
+ json.dumps(outcome.successful_patterns),
1879
+ json.dumps(outcome.failed_patterns),
1880
+ json.dumps(outcome.extracted_heuristics),
1881
+ json.dumps(outcome.extracted_anti_patterns),
1882
+ outcome.duration_seconds,
1883
+ outcome.node_count,
1884
+ outcome.error_message,
1885
+ json.dumps(outcome.metadata) if outcome.metadata else None,
1886
+ outcome.created_at.isoformat(),
1887
+ ),
1888
+ )
1889
+
1890
+ # Add embedding to index if present
1891
+ if outcome.embedding:
1892
+ self._add_to_index("workflow_outcomes", outcome.id, outcome.embedding)
1893
+
1894
+ logger.debug(f"Saved workflow outcome: {outcome.id}")
1895
+ return outcome.id
1896
+
1897
+ def get_workflow_outcome(self, outcome_id: str) -> Optional["WorkflowOutcome"]:
1898
+ """Get a workflow outcome by ID."""
1899
+ with self._get_connection() as conn:
1900
+ cursor = conn.cursor()
1901
+ cursor.execute(
1902
+ "SELECT * FROM workflow_outcomes WHERE id = ?",
1903
+ (outcome_id,),
1904
+ )
1905
+ row = cursor.fetchone()
1906
+
1907
+ if row is None:
1908
+ return None
1909
+ return self._row_to_workflow_outcome(row)
1910
+
1911
+ def get_workflow_outcomes(
1912
+ self,
1913
+ project_id: str,
1914
+ agent: Optional[str] = None,
1915
+ workflow_id: Optional[str] = None,
1916
+ embedding: Optional[List[float]] = None,
1917
+ top_k: int = 10,
1918
+ scope_filter: Optional[Dict[str, Any]] = None,
1919
+ ) -> List["WorkflowOutcome"]:
1920
+ """Get workflow outcomes with optional filtering."""
1921
+ candidate_ids = None
1922
+ if embedding:
1923
+ search_results = self._search_index(
1924
+ "workflow_outcomes", embedding, top_k * 2
1925
+ )
1926
+ candidate_ids = [id for id, _ in search_results]
1927
+
1928
+ with self._get_connection() as conn:
1929
+ cursor = conn.cursor()
1930
+
1931
+ query = "SELECT * FROM workflow_outcomes WHERE project_id = ?"
1932
+ params: List[Any] = [project_id]
1933
+
1934
+ if agent:
1935
+ query += " AND agent = ?"
1936
+ params.append(agent)
1937
+
1938
+ if workflow_id:
1939
+ query += " AND workflow_id = ?"
1940
+ params.append(workflow_id)
1941
+
1942
+ if candidate_ids is not None:
1943
+ placeholders = ",".join("?" * len(candidate_ids))
1944
+ query += f" AND id IN ({placeholders})"
1945
+ params.extend(candidate_ids)
1946
+
1947
+ # Apply scope filter for workflow columns
1948
+ if scope_filter:
1949
+ if scope_filter.get("tenant_id"):
1950
+ query += " AND tenant_id = ?"
1951
+ params.append(scope_filter["tenant_id"])
1952
+ if scope_filter.get("workflow_id"):
1953
+ query += " AND workflow_id = ?"
1954
+ params.append(scope_filter["workflow_id"])
1955
+ if scope_filter.get("run_id"):
1956
+ query += " AND run_id = ?"
1957
+ params.append(scope_filter["run_id"])
1958
+
1959
+ query += " ORDER BY created_at DESC LIMIT ?"
1960
+ params.append(top_k)
1961
+
1962
+ cursor.execute(query, params)
1963
+ rows = cursor.fetchall()
1964
+
1965
+ return [self._row_to_workflow_outcome(row) for row in rows]
1966
+
1967
+ def _row_to_workflow_outcome(self, row: sqlite3.Row) -> "WorkflowOutcome":
1968
+ """Convert database row to WorkflowOutcome."""
1969
+ from alma.workflow import WorkflowOutcome, WorkflowResult
1970
+
1971
+ return WorkflowOutcome(
1972
+ id=row["id"],
1973
+ tenant_id=row["tenant_id"],
1974
+ workflow_id=row["workflow_id"],
1975
+ run_id=row["run_id"],
1976
+ agent=row["agent"],
1977
+ project_id=row["project_id"],
1978
+ result=WorkflowResult(row["result"]),
1979
+ summary=row["summary"] or "",
1980
+ strategies_used=json.loads(row["strategies_used"])
1981
+ if row["strategies_used"]
1982
+ else [],
1983
+ successful_patterns=json.loads(row["successful_patterns"])
1984
+ if row["successful_patterns"]
1985
+ else [],
1986
+ failed_patterns=json.loads(row["failed_patterns"])
1987
+ if row["failed_patterns"]
1988
+ else [],
1989
+ extracted_heuristics=json.loads(row["extracted_heuristics"])
1990
+ if row["extracted_heuristics"]
1991
+ else [],
1992
+ extracted_anti_patterns=json.loads(row["extracted_anti_patterns"])
1993
+ if row["extracted_anti_patterns"]
1994
+ else [],
1995
+ duration_seconds=row["duration_seconds"],
1996
+ node_count=row["node_count"],
1997
+ error_message=row["error_message"],
1998
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
1999
+ created_at=self._parse_datetime(row["created_at"])
2000
+ or datetime.now(timezone.utc),
2001
+ )
2002
+
2003
+ # ==================== ARTIFACT LINK OPERATIONS (v0.6.0+) ====================
2004
+
2005
+ def save_artifact_link(self, artifact_ref: "ArtifactRef") -> str:
2006
+ """Save an artifact reference linked to a memory."""
2007
+ with self._get_connection() as conn:
2008
+ cursor = conn.cursor()
2009
+ cursor.execute(
2010
+ """
2011
+ INSERT OR REPLACE INTO artifact_links
2012
+ (id, memory_id, artifact_type, storage_url, filename,
2013
+ mime_type, size_bytes, checksum, metadata, created_at)
2014
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2015
+ """,
2016
+ (
2017
+ artifact_ref.id,
2018
+ artifact_ref.memory_id,
2019
+ artifact_ref.artifact_type.value,
2020
+ artifact_ref.storage_url,
2021
+ artifact_ref.filename,
2022
+ artifact_ref.mime_type,
2023
+ artifact_ref.size_bytes,
2024
+ artifact_ref.checksum,
2025
+ json.dumps(artifact_ref.metadata)
2026
+ if artifact_ref.metadata
2027
+ else None,
2028
+ artifact_ref.created_at.isoformat(),
2029
+ ),
2030
+ )
2031
+ logger.debug(f"Saved artifact link: {artifact_ref.id}")
2032
+ return artifact_ref.id
2033
+
2034
+ def get_artifact_links(self, memory_id: str) -> List["ArtifactRef"]:
2035
+ """Get all artifact references linked to a memory."""
2036
+ with self._get_connection() as conn:
2037
+ cursor = conn.cursor()
2038
+ cursor.execute(
2039
+ "SELECT * FROM artifact_links WHERE memory_id = ?",
2040
+ (memory_id,),
2041
+ )
2042
+ rows = cursor.fetchall()
2043
+
2044
+ return [self._row_to_artifact_ref(row) for row in rows]
2045
+
2046
+ def delete_artifact_link(self, artifact_id: str) -> bool:
2047
+ """Delete an artifact reference."""
2048
+ with self._get_connection() as conn:
2049
+ cursor = conn.cursor()
2050
+ cursor.execute(
2051
+ "DELETE FROM artifact_links WHERE id = ?",
2052
+ (artifact_id,),
2053
+ )
2054
+ return cursor.rowcount > 0
2055
+
2056
+ def _row_to_artifact_ref(self, row: sqlite3.Row) -> "ArtifactRef":
2057
+ """Convert database row to ArtifactRef."""
2058
+ from alma.workflow import ArtifactRef, ArtifactType
2059
+
2060
+ return ArtifactRef(
2061
+ id=row["id"],
2062
+ memory_id=row["memory_id"],
2063
+ artifact_type=ArtifactType(row["artifact_type"]),
2064
+ storage_url=row["storage_url"],
2065
+ filename=row["filename"],
2066
+ mime_type=row["mime_type"],
2067
+ size_bytes=row["size_bytes"],
2068
+ checksum=row["checksum"],
2069
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
2070
+ created_at=self._parse_datetime(row["created_at"])
2071
+ or datetime.now(timezone.utc),
2072
+ )
2073
+
2074
+ # ==================== SESSION HANDOFFS ====================
2075
+
2076
+ def save_session_handoff(self, handoff: "SessionHandoff") -> str:
2077
+ """Save a session handoff for persistence."""
2078
+
2079
+ with self._get_connection() as conn:
2080
+ cursor = conn.cursor()
2081
+ cursor.execute(
2082
+ """
2083
+ INSERT OR REPLACE INTO session_handoffs (
2084
+ id, project_id, agent, session_id, last_action, last_outcome,
2085
+ current_goal, key_decisions, active_files, blockers, next_steps,
2086
+ test_status, confidence_level, risk_flags, session_start,
2087
+ session_end, duration_ms, metadata, created_at
2088
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2089
+ """,
2090
+ (
2091
+ handoff.id,
2092
+ handoff.project_id,
2093
+ handoff.agent,
2094
+ handoff.session_id,
2095
+ handoff.last_action,
2096
+ handoff.last_outcome,
2097
+ handoff.current_goal,
2098
+ json.dumps(handoff.key_decisions),
2099
+ json.dumps(handoff.active_files),
2100
+ json.dumps(handoff.blockers),
2101
+ json.dumps(handoff.next_steps),
2102
+ json.dumps(handoff.test_status),
2103
+ handoff.confidence_level,
2104
+ json.dumps(handoff.risk_flags),
2105
+ handoff.session_start.isoformat()
2106
+ if handoff.session_start
2107
+ else None,
2108
+ handoff.session_end.isoformat() if handoff.session_end else None,
2109
+ handoff.duration_ms,
2110
+ json.dumps(handoff.metadata),
2111
+ handoff.created_at.isoformat(),
2112
+ ),
2113
+ )
2114
+ return handoff.id
2115
+
2116
+ def get_session_handoffs(
2117
+ self,
2118
+ project_id: str,
2119
+ agent: str,
2120
+ limit: int = 50,
2121
+ ) -> List["SessionHandoff"]:
2122
+ """Get session handoffs for an agent, most recent first."""
2123
+ with self._get_connection() as conn:
2124
+ cursor = conn.cursor()
2125
+ cursor.execute(
2126
+ """
2127
+ SELECT * FROM session_handoffs
2128
+ WHERE project_id = ? AND agent = ?
2129
+ ORDER BY created_at DESC
2130
+ LIMIT ?
2131
+ """,
2132
+ (project_id, agent, limit),
2133
+ )
2134
+ rows = cursor.fetchall()
2135
+ return [self._row_to_session_handoff(row) for row in rows]
2136
+
2137
+ def get_latest_session_handoff(
2138
+ self,
2139
+ project_id: str,
2140
+ agent: str,
2141
+ ) -> Optional["SessionHandoff"]:
2142
+ """Get the most recent session handoff for an agent."""
2143
+ handoffs = self.get_session_handoffs(project_id, agent, limit=1)
2144
+ return handoffs[0] if handoffs else None
2145
+
2146
+ def delete_session_handoffs(
2147
+ self,
2148
+ project_id: str,
2149
+ agent: Optional[str] = None,
2150
+ ) -> int:
2151
+ """Delete session handoffs."""
2152
+ with self._get_connection() as conn:
2153
+ cursor = conn.cursor()
2154
+ if agent:
2155
+ cursor.execute(
2156
+ "DELETE FROM session_handoffs WHERE project_id = ? AND agent = ?",
2157
+ (project_id, agent),
2158
+ )
2159
+ else:
2160
+ cursor.execute(
2161
+ "DELETE FROM session_handoffs WHERE project_id = ?",
2162
+ (project_id,),
2163
+ )
2164
+ return cursor.rowcount
2165
+
2166
+ def _row_to_session_handoff(self, row: sqlite3.Row) -> "SessionHandoff":
2167
+ """Convert database row to SessionHandoff."""
2168
+ from alma.session import SessionHandoff
2169
+
2170
+ return SessionHandoff(
2171
+ id=row["id"],
2172
+ project_id=row["project_id"],
2173
+ agent=row["agent"],
2174
+ session_id=row["session_id"],
2175
+ last_action=row["last_action"],
2176
+ last_outcome=row["last_outcome"],
2177
+ current_goal=row["current_goal"] or "",
2178
+ key_decisions=json.loads(row["key_decisions"])
2179
+ if row["key_decisions"]
2180
+ else [],
2181
+ active_files=json.loads(row["active_files"]) if row["active_files"] else [],
2182
+ blockers=json.loads(row["blockers"]) if row["blockers"] else [],
2183
+ next_steps=json.loads(row["next_steps"]) if row["next_steps"] else [],
2184
+ test_status=json.loads(row["test_status"]) if row["test_status"] else {},
2185
+ confidence_level=row["confidence_level"] or 0.5,
2186
+ risk_flags=json.loads(row["risk_flags"]) if row["risk_flags"] else [],
2187
+ session_start=self._parse_datetime(row["session_start"])
2188
+ or datetime.now(timezone.utc),
2189
+ session_end=self._parse_datetime(row["session_end"]),
2190
+ duration_ms=row["duration_ms"] or 0,
2191
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
2192
+ created_at=self._parse_datetime(row["created_at"])
2193
+ or datetime.now(timezone.utc),
2194
+ )
2195
+
2196
+ # ==================== MEMORY STRENGTH OPERATIONS (v0.7.0+) ====================
2197
+
2198
+ def save_memory_strength(self, strength: "MemoryStrength") -> str:
2199
+ """
2200
+ Save or update a memory strength record.
2201
+
2202
+ Args:
2203
+ strength: MemoryStrength instance to save
2204
+
2205
+ Returns:
2206
+ The memory ID
2207
+ """
2208
+ with self._get_connection() as conn:
2209
+ cursor = conn.cursor()
2210
+
2211
+ # Check if record exists
2212
+ cursor.execute(
2213
+ "SELECT 1 FROM memory_strength WHERE memory_id = ?",
2214
+ (strength.memory_id,),
2215
+ )
2216
+ exists = cursor.fetchone() is not None
2217
+
2218
+ reinforcement_events_json = json.dumps(
2219
+ [r.isoformat() for r in strength.reinforcement_events]
2220
+ )
2221
+
2222
+ if exists:
2223
+ # Update existing record
2224
+ cursor.execute(
2225
+ """
2226
+ UPDATE memory_strength SET
2227
+ memory_type = ?,
2228
+ initial_strength = ?,
2229
+ decay_half_life_days = ?,
2230
+ last_accessed = ?,
2231
+ access_count = ?,
2232
+ explicit_importance = ?,
2233
+ reinforcement_events = ?
2234
+ WHERE memory_id = ?
2235
+ """,
2236
+ (
2237
+ strength.memory_type,
2238
+ strength.initial_strength,
2239
+ strength.decay_half_life_days,
2240
+ strength.last_accessed.isoformat(),
2241
+ strength.access_count,
2242
+ strength.explicit_importance,
2243
+ reinforcement_events_json,
2244
+ strength.memory_id,
2245
+ ),
2246
+ )
2247
+ else:
2248
+ # Insert new record
2249
+ # Try to extract project_id and agent from the memory
2250
+ project_id = None
2251
+ agent = None
2252
+
2253
+ # Look up the memory to get project_id and agent
2254
+ for table in [
2255
+ "heuristics",
2256
+ "outcomes",
2257
+ "domain_knowledge",
2258
+ "anti_patterns",
2259
+ ]:
2260
+ cursor.execute(
2261
+ f"SELECT project_id, agent FROM {table} WHERE id = ?",
2262
+ (strength.memory_id,),
2263
+ )
2264
+ row = cursor.fetchone()
2265
+ if row:
2266
+ project_id = row["project_id"]
2267
+ agent = row["agent"]
2268
+ break
2269
+
2270
+ cursor.execute(
2271
+ """
2272
+ INSERT INTO memory_strength (
2273
+ memory_id, memory_type, project_id, agent,
2274
+ initial_strength, decay_half_life_days,
2275
+ created_at, last_accessed, access_count,
2276
+ explicit_importance, reinforcement_events
2277
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2278
+ """,
2279
+ (
2280
+ strength.memory_id,
2281
+ strength.memory_type,
2282
+ project_id,
2283
+ agent,
2284
+ strength.initial_strength,
2285
+ strength.decay_half_life_days,
2286
+ strength.created_at.isoformat(),
2287
+ strength.last_accessed.isoformat(),
2288
+ strength.access_count,
2289
+ strength.explicit_importance,
2290
+ reinforcement_events_json,
2291
+ ),
2292
+ )
2293
+
2294
+ return strength.memory_id
2295
+
2296
+ def get_memory_strength(self, memory_id: str) -> Optional["MemoryStrength"]:
2297
+ """
2298
+ Get a memory strength record by memory ID.
2299
+
2300
+ Args:
2301
+ memory_id: The memory ID to look up
2302
+
2303
+ Returns:
2304
+ MemoryStrength instance, or None if not found
2305
+ """
2306
+ with self._get_connection() as conn:
2307
+ cursor = conn.cursor()
2308
+ cursor.execute(
2309
+ "SELECT * FROM memory_strength WHERE memory_id = ?",
2310
+ (memory_id,),
2311
+ )
2312
+ row = cursor.fetchone()
2313
+
2314
+ if row is None:
2315
+ return None
2316
+
2317
+ return self._row_to_memory_strength(row)
2318
+
2319
+ def get_all_memory_strengths(
2320
+ self,
2321
+ project_id: str,
2322
+ agent: Optional[str] = None,
2323
+ ) -> List["MemoryStrength"]:
2324
+ """
2325
+ Get all memory strength records for a project/agent.
2326
+
2327
+ Args:
2328
+ project_id: Project to query
2329
+ agent: Optional agent filter
2330
+
2331
+ Returns:
2332
+ List of MemoryStrength instances
2333
+ """
2334
+ with self._get_connection() as conn:
2335
+ cursor = conn.cursor()
2336
+
2337
+ if agent:
2338
+ cursor.execute(
2339
+ """
2340
+ SELECT * FROM memory_strength
2341
+ WHERE project_id = ? AND agent = ?
2342
+ """,
2343
+ (project_id, agent),
2344
+ )
2345
+ else:
2346
+ cursor.execute(
2347
+ """
2348
+ SELECT * FROM memory_strength
2349
+ WHERE project_id = ?
2350
+ """,
2351
+ (project_id,),
2352
+ )
2353
+
2354
+ rows = cursor.fetchall()
2355
+ return [self._row_to_memory_strength(row) for row in rows]
2356
+
2357
+ def delete_memory_strength(self, memory_id: str) -> bool:
2358
+ """
2359
+ Delete a memory strength record.
2360
+
2361
+ Args:
2362
+ memory_id: The memory ID
2363
+
2364
+ Returns:
2365
+ True if deleted, False if not found
2366
+ """
2367
+ with self._get_connection() as conn:
2368
+ cursor = conn.cursor()
2369
+ cursor.execute(
2370
+ "DELETE FROM memory_strength WHERE memory_id = ?",
2371
+ (memory_id,),
2372
+ )
2373
+ return cursor.rowcount > 0
2374
+
2375
+ def _row_to_memory_strength(self, row: sqlite3.Row) -> "MemoryStrength":
2376
+ """Convert database row to MemoryStrength."""
2377
+ from alma.learning.decay import MemoryStrength
2378
+
2379
+ reinforcement_events = []
2380
+ if row["reinforcement_events"]:
2381
+ events_json = json.loads(row["reinforcement_events"])
2382
+ for event in events_json:
2383
+ if isinstance(event, str):
2384
+ dt_str = event.replace("Z", "+00:00")
2385
+ reinforcement_events.append(datetime.fromisoformat(dt_str))
2386
+
2387
+ return MemoryStrength(
2388
+ memory_id=row["memory_id"],
2389
+ memory_type=row["memory_type"] or "unknown",
2390
+ initial_strength=row["initial_strength"] or 1.0,
2391
+ decay_half_life_days=row["decay_half_life_days"] or 30,
2392
+ created_at=self._parse_datetime(row["created_at"])
2393
+ or datetime.now(timezone.utc),
2394
+ last_accessed=self._parse_datetime(row["last_accessed"])
2395
+ or datetime.now(timezone.utc),
2396
+ access_count=row["access_count"] or 0,
2397
+ reinforcement_events=reinforcement_events,
2398
+ explicit_importance=row["explicit_importance"] or 0.5,
2399
+ )
2400
+
2401
+ # ==================== ARCHIVE OPERATIONS (v0.7.0+) ====================
2402
+
2403
+ def archive_memory(
2404
+ self,
2405
+ memory_id: str,
2406
+ memory_type: str,
2407
+ reason: str,
2408
+ final_strength: float,
2409
+ ) -> "ArchivedMemory":
2410
+ """
2411
+ Archive a memory before deletion.
2412
+
2413
+ Captures full memory data including content, embedding, and metadata
2414
+ for potential future recovery or compliance auditing.
2415
+
2416
+ Args:
2417
+ memory_id: ID of the memory to archive
2418
+ memory_type: Type of memory (heuristic, outcome, etc.)
2419
+ reason: Why being archived (decay, manual, consolidation, etc.)
2420
+ final_strength: Memory strength at time of archival
2421
+
2422
+ Returns:
2423
+ ArchivedMemory instance
2424
+ """
2425
+ from alma.storage.archive import ArchivedMemory
2426
+
2427
+ with self._get_connection() as conn:
2428
+ cursor = conn.cursor()
2429
+
2430
+ # Get the memory data based on type
2431
+ table_map = {
2432
+ "heuristic": "heuristics",
2433
+ "outcome": "outcomes",
2434
+ "domain_knowledge": "domain_knowledge",
2435
+ "anti_pattern": "anti_patterns",
2436
+ "preference": "preferences",
2437
+ }
2438
+
2439
+ table_name = table_map.get(memory_type)
2440
+ if not table_name:
2441
+ raise ValueError(f"Unknown memory type: {memory_type}")
2442
+
2443
+ cursor.execute(f"SELECT * FROM {table_name} WHERE id = ?", (memory_id,))
2444
+ row = cursor.fetchone()
2445
+
2446
+ if row is None:
2447
+ raise ValueError(f"Memory not found: {memory_id}")
2448
+
2449
+ # Extract content and metadata from the memory
2450
+ content = self._extract_memory_content(memory_type, row)
2451
+ metadata = json.loads(row["metadata"]) if row["metadata"] else {}
2452
+
2453
+ # Get the embedding if available
2454
+ cursor.execute(
2455
+ "SELECT embedding FROM embeddings WHERE memory_type = ? AND memory_id = ?",
2456
+ (memory_type, memory_id),
2457
+ )
2458
+ embedding_row = cursor.fetchone()
2459
+ embedding = None
2460
+ if embedding_row and embedding_row["embedding"]:
2461
+ embedding = np.frombuffer(
2462
+ embedding_row["embedding"], dtype=np.float32
2463
+ ).tolist()
2464
+
2465
+ # Parse original creation date - row is sqlite3.Row, need safe access
2466
+ row_keys = row.keys()
2467
+ created_at_str = (
2468
+ row["created_at"] if "created_at" in row_keys else None
2469
+ ) or (row["timestamp"] if "timestamp" in row_keys else None)
2470
+ original_created_at = (
2471
+ self._parse_datetime(created_at_str)
2472
+ if created_at_str
2473
+ else datetime.now(timezone.utc)
2474
+ )
2475
+
2476
+ # Get project_id and agent
2477
+ project_id = row["project_id"] if "project_id" in row_keys else ""
2478
+ agent = row["agent"] if "agent" in row_keys else ""
2479
+
2480
+ # If it's a preference, use user_id as project_id
2481
+ if memory_type == "preference":
2482
+ project_id = row["user_id"] if "user_id" in row_keys else ""
2483
+ agent = "user"
2484
+
2485
+ # Create the archived memory
2486
+ archived = ArchivedMemory.create(
2487
+ original_id=memory_id,
2488
+ memory_type=memory_type,
2489
+ content=content,
2490
+ project_id=project_id,
2491
+ agent=agent,
2492
+ archive_reason=reason,
2493
+ final_strength=final_strength,
2494
+ original_created_at=original_created_at,
2495
+ embedding=embedding,
2496
+ metadata=metadata,
2497
+ )
2498
+
2499
+ # Serialize embedding for storage
2500
+ embedding_blob = None
2501
+ if archived.embedding:
2502
+ embedding_blob = np.array(
2503
+ archived.embedding, dtype=np.float32
2504
+ ).tobytes()
2505
+
2506
+ # Insert into archive table
2507
+ cursor.execute(
2508
+ """
2509
+ INSERT INTO memory_archive (
2510
+ id, original_id, memory_type, content, embedding,
2511
+ metadata, original_created_at, archived_at, archive_reason,
2512
+ final_strength, project_id, agent, restored, restored_at, restored_as
2513
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2514
+ """,
2515
+ (
2516
+ archived.id,
2517
+ archived.original_id,
2518
+ archived.memory_type,
2519
+ archived.content,
2520
+ embedding_blob,
2521
+ json.dumps(archived.metadata),
2522
+ archived.original_created_at.isoformat(),
2523
+ archived.archived_at.isoformat(),
2524
+ archived.archive_reason,
2525
+ archived.final_strength,
2526
+ archived.project_id,
2527
+ archived.agent,
2528
+ 0, # not restored
2529
+ None,
2530
+ None,
2531
+ ),
2532
+ )
2533
+
2534
+ return archived
2535
+
2536
+ def _extract_memory_content(self, memory_type: str, row: sqlite3.Row) -> str:
2537
+ """Extract the main content from a memory row as JSON."""
2538
+ if memory_type == "heuristic":
2539
+ return json.dumps(
2540
+ {
2541
+ "condition": row["condition"],
2542
+ "strategy": row["strategy"],
2543
+ "confidence": row["confidence"],
2544
+ "occurrence_count": row["occurrence_count"],
2545
+ "success_count": row["success_count"],
2546
+ }
2547
+ )
2548
+ elif memory_type == "outcome":
2549
+ return json.dumps(
2550
+ {
2551
+ "task_type": row["task_type"],
2552
+ "task_description": row["task_description"],
2553
+ "success": bool(row["success"]),
2554
+ "strategy_used": row["strategy_used"],
2555
+ "duration_ms": row["duration_ms"],
2556
+ "error_message": row["error_message"],
2557
+ "user_feedback": row["user_feedback"],
2558
+ }
2559
+ )
2560
+ elif memory_type == "domain_knowledge":
2561
+ return json.dumps(
2562
+ {
2563
+ "domain": row["domain"],
2564
+ "fact": row["fact"],
2565
+ "source": row["source"],
2566
+ "confidence": row["confidence"],
2567
+ }
2568
+ )
2569
+ elif memory_type == "anti_pattern":
2570
+ return json.dumps(
2571
+ {
2572
+ "pattern": row["pattern"],
2573
+ "why_bad": row["why_bad"],
2574
+ "better_alternative": row["better_alternative"],
2575
+ "occurrence_count": row["occurrence_count"],
2576
+ }
2577
+ )
2578
+ elif memory_type == "preference":
2579
+ return json.dumps(
2580
+ {
2581
+ "category": row["category"],
2582
+ "preference": row["preference"],
2583
+ "source": row["source"],
2584
+ "confidence": row["confidence"],
2585
+ }
2586
+ )
2587
+ else:
2588
+ return json.dumps(dict(row))
2589
+
2590
+ def get_archive(self, archive_id: str) -> Optional["ArchivedMemory"]:
2591
+ """
2592
+ Get an archived memory by its archive ID.
2593
+
2594
+ Args:
2595
+ archive_id: The archive ID
2596
+
2597
+ Returns:
2598
+ ArchivedMemory instance, or None if not found
2599
+ """
2600
+ with self._get_connection() as conn:
2601
+ cursor = conn.cursor()
2602
+ cursor.execute(
2603
+ "SELECT * FROM memory_archive WHERE id = ?",
2604
+ (archive_id,),
2605
+ )
2606
+ row = cursor.fetchone()
2607
+
2608
+ if row is None:
2609
+ return None
2610
+
2611
+ return self._row_to_archived_memory(row)
2612
+
2613
+ def list_archives(
2614
+ self,
2615
+ project_id: str,
2616
+ agent: Optional[str] = None,
2617
+ reason: Optional[str] = None,
2618
+ memory_type: Optional[str] = None,
2619
+ older_than: Optional[datetime] = None,
2620
+ younger_than: Optional[datetime] = None,
2621
+ include_restored: bool = False,
2622
+ limit: int = 100,
2623
+ ) -> List["ArchivedMemory"]:
2624
+ """
2625
+ List archived memories with filtering.
2626
+
2627
+ Args:
2628
+ project_id: Project to query
2629
+ agent: Optional agent filter
2630
+ reason: Optional archive reason filter
2631
+ memory_type: Optional memory type filter
2632
+ older_than: Optional filter for archives older than this time
2633
+ younger_than: Optional filter for archives younger than this time
2634
+ include_restored: Whether to include archives that have been restored
2635
+ limit: Maximum number of archives to return
2636
+
2637
+ Returns:
2638
+ List of ArchivedMemory instances
2639
+ """
2640
+ with self._get_connection() as conn:
2641
+ cursor = conn.cursor()
2642
+
2643
+ conditions = ["project_id = ?"]
2644
+ params: List[Any] = [project_id]
2645
+
2646
+ if agent:
2647
+ conditions.append("agent = ?")
2648
+ params.append(agent)
2649
+
2650
+ if reason:
2651
+ conditions.append("archive_reason = ?")
2652
+ params.append(reason)
2653
+
2654
+ if memory_type:
2655
+ conditions.append("memory_type = ?")
2656
+ params.append(memory_type)
2657
+
2658
+ if older_than:
2659
+ conditions.append("archived_at < ?")
2660
+ params.append(older_than.isoformat())
2661
+
2662
+ if younger_than:
2663
+ conditions.append("archived_at > ?")
2664
+ params.append(younger_than.isoformat())
2665
+
2666
+ if not include_restored:
2667
+ conditions.append("restored = 0")
2668
+
2669
+ where_clause = " AND ".join(conditions)
2670
+ params.append(limit)
2671
+
2672
+ cursor.execute(
2673
+ f"""
2674
+ SELECT * FROM memory_archive
2675
+ WHERE {where_clause}
2676
+ ORDER BY archived_at DESC
2677
+ LIMIT ?
2678
+ """,
2679
+ params,
2680
+ )
2681
+
2682
+ rows = cursor.fetchall()
2683
+ return [self._row_to_archived_memory(row) for row in rows]
2684
+
2685
+ def restore_from_archive(self, archive_id: str) -> str:
2686
+ """
2687
+ Restore an archived memory, creating a new memory from archive data.
2688
+
2689
+ The original archive is marked as restored but retained for audit purposes.
2690
+
2691
+ Args:
2692
+ archive_id: The archive ID to restore
2693
+
2694
+ Returns:
2695
+ New memory ID of the restored memory
2696
+
2697
+ Raises:
2698
+ ValueError: If archive not found or already restored
2699
+ """
2700
+ import uuid
2701
+
2702
+ with self._get_connection() as conn:
2703
+ cursor = conn.cursor()
2704
+
2705
+ # Get the archive
2706
+ cursor.execute(
2707
+ "SELECT * FROM memory_archive WHERE id = ?",
2708
+ (archive_id,),
2709
+ )
2710
+ row = cursor.fetchone()
2711
+
2712
+ if row is None:
2713
+ raise ValueError(f"Archive not found: {archive_id}")
2714
+
2715
+ if row["restored"]:
2716
+ raise ValueError(f"Archive already restored as: {row['restored_as']}")
2717
+
2718
+ archived = self._row_to_archived_memory(row)
2719
+ content = json.loads(archived.content)
2720
+
2721
+ # Generate new memory ID
2722
+ new_id = f"{archived.memory_type[:3]}-{uuid.uuid4().hex[:12]}"
2723
+
2724
+ # Restore based on memory type
2725
+ if archived.memory_type == "heuristic":
2726
+ cursor.execute(
2727
+ """
2728
+ INSERT INTO heuristics (
2729
+ id, agent, project_id, condition, strategy, confidence,
2730
+ occurrence_count, success_count, last_validated, created_at, metadata
2731
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2732
+ """,
2733
+ (
2734
+ new_id,
2735
+ archived.agent,
2736
+ archived.project_id,
2737
+ content.get("condition", ""),
2738
+ content.get("strategy", ""),
2739
+ content.get("confidence", 0.5),
2740
+ content.get("occurrence_count", 1),
2741
+ content.get("success_count", 0),
2742
+ datetime.now(timezone.utc).isoformat(),
2743
+ datetime.now(timezone.utc).isoformat(),
2744
+ json.dumps(archived.metadata),
2745
+ ),
2746
+ )
2747
+ elif archived.memory_type == "outcome":
2748
+ cursor.execute(
2749
+ """
2750
+ INSERT INTO outcomes (
2751
+ id, agent, project_id, task_type, task_description, success,
2752
+ strategy_used, duration_ms, error_message, user_feedback,
2753
+ timestamp, metadata
2754
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2755
+ """,
2756
+ (
2757
+ new_id,
2758
+ archived.agent,
2759
+ archived.project_id,
2760
+ content.get("task_type"),
2761
+ content.get("task_description", ""),
2762
+ 1 if content.get("success") else 0,
2763
+ content.get("strategy_used"),
2764
+ content.get("duration_ms"),
2765
+ content.get("error_message"),
2766
+ content.get("user_feedback"),
2767
+ datetime.now(timezone.utc).isoformat(),
2768
+ json.dumps(archived.metadata),
2769
+ ),
2770
+ )
2771
+ elif archived.memory_type == "domain_knowledge":
2772
+ cursor.execute(
2773
+ """
2774
+ INSERT INTO domain_knowledge (
2775
+ id, agent, project_id, domain, fact, source, confidence,
2776
+ last_verified, metadata
2777
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
2778
+ """,
2779
+ (
2780
+ new_id,
2781
+ archived.agent,
2782
+ archived.project_id,
2783
+ content.get("domain"),
2784
+ content.get("fact", ""),
2785
+ content.get("source"),
2786
+ content.get("confidence", 1.0),
2787
+ datetime.now(timezone.utc).isoformat(),
2788
+ json.dumps(archived.metadata),
2789
+ ),
2790
+ )
2791
+ elif archived.memory_type == "anti_pattern":
2792
+ cursor.execute(
2793
+ """
2794
+ INSERT INTO anti_patterns (
2795
+ id, agent, project_id, pattern, why_bad, better_alternative,
2796
+ occurrence_count, last_seen, created_at, metadata
2797
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2798
+ """,
2799
+ (
2800
+ new_id,
2801
+ archived.agent,
2802
+ archived.project_id,
2803
+ content.get("pattern", ""),
2804
+ content.get("why_bad"),
2805
+ content.get("better_alternative"),
2806
+ content.get("occurrence_count", 1),
2807
+ datetime.now(timezone.utc).isoformat(),
2808
+ datetime.now(timezone.utc).isoformat(),
2809
+ json.dumps(archived.metadata),
2810
+ ),
2811
+ )
2812
+ elif archived.memory_type == "preference":
2813
+ cursor.execute(
2814
+ """
2815
+ INSERT INTO preferences (
2816
+ id, user_id, category, preference, source, confidence,
2817
+ timestamp, metadata
2818
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
2819
+ """,
2820
+ (
2821
+ new_id,
2822
+ archived.project_id, # For preferences, project_id is user_id
2823
+ content.get("category"),
2824
+ content.get("preference", ""),
2825
+ content.get("source"),
2826
+ content.get("confidence", 1.0),
2827
+ datetime.now(timezone.utc).isoformat(),
2828
+ json.dumps(archived.metadata),
2829
+ ),
2830
+ )
2831
+ else:
2832
+ raise ValueError(f"Cannot restore memory type: {archived.memory_type}")
2833
+
2834
+ # Restore embedding if available
2835
+ if archived.embedding:
2836
+ embedding_blob = np.array(
2837
+ archived.embedding, dtype=np.float32
2838
+ ).tobytes()
2839
+ cursor.execute(
2840
+ """
2841
+ INSERT INTO embeddings (memory_type, memory_id, embedding)
2842
+ VALUES (?, ?, ?)
2843
+ """,
2844
+ (archived.memory_type, new_id, embedding_blob),
2845
+ )
2846
+
2847
+ # Mark archive as restored
2848
+ cursor.execute(
2849
+ """
2850
+ UPDATE memory_archive
2851
+ SET restored = 1, restored_at = ?, restored_as = ?
2852
+ WHERE id = ?
2853
+ """,
2854
+ (datetime.now(timezone.utc).isoformat(), new_id, archive_id),
2855
+ )
2856
+
2857
+ return new_id
2858
+
2859
+ def purge_archives(
2860
+ self,
2861
+ older_than: datetime,
2862
+ project_id: Optional[str] = None,
2863
+ reason: Optional[str] = None,
2864
+ ) -> int:
2865
+ """
2866
+ Permanently delete archived memories.
2867
+
2868
+ This is a destructive operation - archives cannot be recovered after purging.
2869
+
2870
+ Args:
2871
+ older_than: Delete archives older than this datetime
2872
+ project_id: Optional project filter
2873
+ reason: Optional reason filter
2874
+
2875
+ Returns:
2876
+ Number of archives permanently deleted
2877
+ """
2878
+ with self._get_connection() as conn:
2879
+ cursor = conn.cursor()
2880
+
2881
+ conditions = ["archived_at < ?"]
2882
+ params: List[Any] = [older_than.isoformat()]
2883
+
2884
+ if project_id:
2885
+ conditions.append("project_id = ?")
2886
+ params.append(project_id)
2887
+
2888
+ if reason:
2889
+ conditions.append("archive_reason = ?")
2890
+ params.append(reason)
2891
+
2892
+ where_clause = " AND ".join(conditions)
2893
+
2894
+ cursor.execute(
2895
+ f"DELETE FROM memory_archive WHERE {where_clause}",
2896
+ params,
2897
+ )
2898
+
2899
+ return cursor.rowcount
2900
+
2901
+ def get_archive_stats(
2902
+ self,
2903
+ project_id: str,
2904
+ agent: Optional[str] = None,
2905
+ ) -> Dict[str, Any]:
2906
+ """
2907
+ Get statistics about archived memories.
2908
+
2909
+ Args:
2910
+ project_id: Project to query
2911
+ agent: Optional agent filter
2912
+
2913
+ Returns:
2914
+ Dict with archive statistics (counts, by reason, by type, etc.)
2915
+ """
2916
+ from alma.storage.archive import ArchiveStats
2917
+
2918
+ with self._get_connection() as conn:
2919
+ cursor = conn.cursor()
2920
+
2921
+ # Build base filter
2922
+ base_conditions = ["project_id = ?"]
2923
+ base_params: List[Any] = [project_id]
2924
+
2925
+ if agent:
2926
+ base_conditions.append("agent = ?")
2927
+ base_params.append(agent)
2928
+
2929
+ base_where = " AND ".join(base_conditions)
2930
+
2931
+ # Total count
2932
+ cursor.execute(
2933
+ f"SELECT COUNT(*) as cnt FROM memory_archive WHERE {base_where}",
2934
+ base_params,
2935
+ )
2936
+ total_count = cursor.fetchone()["cnt"]
2937
+
2938
+ # Restored count
2939
+ cursor.execute(
2940
+ f"SELECT COUNT(*) as cnt FROM memory_archive WHERE {base_where} AND restored = 1",
2941
+ base_params,
2942
+ )
2943
+ restored_count = cursor.fetchone()["cnt"]
2944
+
2945
+ # Count by reason
2946
+ cursor.execute(
2947
+ f"""
2948
+ SELECT archive_reason, COUNT(*) as cnt
2949
+ FROM memory_archive
2950
+ WHERE {base_where}
2951
+ GROUP BY archive_reason
2952
+ """,
2953
+ base_params,
2954
+ )
2955
+ by_reason = {row["archive_reason"]: row["cnt"] for row in cursor.fetchall()}
2956
+
2957
+ # Count by type
2958
+ cursor.execute(
2959
+ f"""
2960
+ SELECT memory_type, COUNT(*) as cnt
2961
+ FROM memory_archive
2962
+ WHERE {base_where}
2963
+ GROUP BY memory_type
2964
+ """,
2965
+ base_params,
2966
+ )
2967
+ by_type = {row["memory_type"]: row["cnt"] for row in cursor.fetchall()}
2968
+
2969
+ # Count by agent
2970
+ cursor.execute(
2971
+ """
2972
+ SELECT agent, COUNT(*) as cnt
2973
+ FROM memory_archive
2974
+ WHERE project_id = ?
2975
+ GROUP BY agent
2976
+ """,
2977
+ (project_id,),
2978
+ )
2979
+ by_agent = {row["agent"]: row["cnt"] for row in cursor.fetchall()}
2980
+
2981
+ # Date range
2982
+ cursor.execute(
2983
+ f"SELECT MIN(archived_at) as oldest, MAX(archived_at) as newest FROM memory_archive WHERE {base_where}",
2984
+ base_params,
2985
+ )
2986
+ dates = cursor.fetchone()
2987
+ oldest_archive = (
2988
+ self._parse_datetime(dates["oldest"]) if dates["oldest"] else None
2989
+ )
2990
+ newest_archive = (
2991
+ self._parse_datetime(dates["newest"]) if dates["newest"] else None
2992
+ )
2993
+
2994
+ stats = ArchiveStats(
2995
+ total_count=total_count,
2996
+ by_reason=by_reason,
2997
+ by_type=by_type,
2998
+ by_agent=by_agent,
2999
+ restored_count=restored_count,
3000
+ oldest_archive=oldest_archive,
3001
+ newest_archive=newest_archive,
3002
+ )
3003
+
3004
+ return stats.to_dict()
3005
+
3006
+ def _row_to_archived_memory(self, row: sqlite3.Row) -> "ArchivedMemory":
3007
+ """Convert database row to ArchivedMemory."""
3008
+ from alma.storage.archive import ArchivedMemory
3009
+
3010
+ # Parse embedding
3011
+ embedding = None
3012
+ if row["embedding"]:
3013
+ embedding = np.frombuffer(row["embedding"], dtype=np.float32).tolist()
3014
+
3015
+ # Parse metadata
3016
+ metadata = json.loads(row["metadata"]) if row["metadata"] else {}
3017
+
3018
+ # Parse restored_at
3019
+ restored_at = None
3020
+ if row["restored_at"]:
3021
+ restored_at = self._parse_datetime(row["restored_at"])
3022
+
3023
+ return ArchivedMemory(
3024
+ id=row["id"],
3025
+ original_id=row["original_id"],
3026
+ memory_type=row["memory_type"],
3027
+ content=row["content"],
3028
+ embedding=embedding,
3029
+ metadata=metadata,
3030
+ original_created_at=self._parse_datetime(row["original_created_at"])
3031
+ or datetime.now(timezone.utc),
3032
+ archived_at=self._parse_datetime(row["archived_at"])
3033
+ or datetime.now(timezone.utc),
3034
+ archive_reason=row["archive_reason"],
3035
+ final_strength=row["final_strength"],
3036
+ project_id=row["project_id"],
3037
+ agent=row["agent"],
3038
+ restored=bool(row["restored"]),
3039
+ restored_at=restored_at,
3040
+ restored_as=row["restored_as"],
3041
+ )