claude-memory-agent 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.env.example +107 -0
  2. package/README.md +200 -0
  3. package/agent_card.py +512 -0
  4. package/bin/cli.js +181 -0
  5. package/bin/postinstall.js +216 -0
  6. package/config.py +104 -0
  7. package/dashboard.html +2689 -0
  8. package/hooks/README.md +196 -0
  9. package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
  10. package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
  11. package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
  12. package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
  13. package/hooks/auto-detect-response.py +348 -0
  14. package/hooks/auto_capture.py +255 -0
  15. package/hooks/detect-correction.py +173 -0
  16. package/hooks/grounding-hook.py +348 -0
  17. package/hooks/log-tool-use.py +234 -0
  18. package/hooks/log-user-request.py +208 -0
  19. package/hooks/pre-tool-decision.py +218 -0
  20. package/hooks/problem-detector.py +343 -0
  21. package/hooks/session_end.py +192 -0
  22. package/hooks/session_start.py +227 -0
  23. package/install.py +887 -0
  24. package/main.py +2859 -0
  25. package/manager.py +997 -0
  26. package/package.json +55 -0
  27. package/requirements.txt +8 -0
  28. package/run_server.py +136 -0
  29. package/services/__init__.py +50 -0
  30. package/services/__pycache__/__init__.cpython-312.pyc +0 -0
  31. package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
  32. package/services/__pycache__/auth.cpython-312.pyc +0 -0
  33. package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
  34. package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
  35. package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
  36. package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
  37. package/services/__pycache__/confidence.cpython-312.pyc +0 -0
  38. package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
  39. package/services/__pycache__/database.cpython-312.pyc +0 -0
  40. package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
  41. package/services/__pycache__/insights.cpython-312.pyc +0 -0
  42. package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
  43. package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
  44. package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
  45. package/services/__pycache__/timeline.cpython-312.pyc +0 -0
  46. package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
  47. package/services/__pycache__/websocket.cpython-312.pyc +0 -0
  48. package/services/agent_registry.py +753 -0
  49. package/services/auth.py +331 -0
  50. package/services/auto_inject.py +250 -0
  51. package/services/claude_md_sync.py +275 -0
  52. package/services/cleanup.py +667 -0
  53. package/services/compaction_flush.py +447 -0
  54. package/services/confidence.py +301 -0
  55. package/services/daily_log.py +333 -0
  56. package/services/database.py +2485 -0
  57. package/services/embeddings.py +358 -0
  58. package/services/insights.py +632 -0
  59. package/services/llm_analyzer.py +595 -0
  60. package/services/memory_md_sync.py +409 -0
  61. package/services/retry_queue.py +453 -0
  62. package/services/timeline.py +579 -0
  63. package/services/vector_index.py +398 -0
  64. package/services/websocket.py +257 -0
  65. package/skills/__init__.py +6 -0
  66. package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
  67. package/skills/__pycache__/admin.cpython-312.pyc +0 -0
  68. package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
  69. package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
  70. package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
  71. package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
  72. package/skills/__pycache__/insights.cpython-312.pyc +0 -0
  73. package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
  74. package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
  75. package/skills/__pycache__/search.cpython-312.pyc +0 -0
  76. package/skills/__pycache__/state.cpython-312.pyc +0 -0
  77. package/skills/__pycache__/store.cpython-312.pyc +0 -0
  78. package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
  79. package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
  80. package/skills/__pycache__/verification.cpython-312.pyc +0 -0
  81. package/skills/admin.py +469 -0
  82. package/skills/checkpoint.py +198 -0
  83. package/skills/claude_md.py +363 -0
  84. package/skills/cleanup.py +241 -0
  85. package/skills/grounding.py +801 -0
  86. package/skills/insights.py +231 -0
  87. package/skills/natural_language.py +277 -0
  88. package/skills/retrieve.py +67 -0
  89. package/skills/search.py +213 -0
  90. package/skills/state.py +182 -0
  91. package/skills/store.py +179 -0
  92. package/skills/summarize.py +588 -0
  93. package/skills/timeline.py +387 -0
  94. package/skills/verification.py +391 -0
  95. package/start_daemon.py +155 -0
  96. package/test_automation.py +221 -0
  97. package/test_complete.py +338 -0
  98. package/test_full.py +322 -0
  99. package/update_system.py +817 -0
  100. package/verify_db.py +134 -0
@@ -0,0 +1,2485 @@
1
+ """Database service using SQLite with FAISS vector indexing.
2
+
3
+ Uses FAISS for O(log n) similarity search when available,
4
+ falls back to numpy-based O(n) search otherwise.
5
+
6
+ Features:
7
+ - Connection pooling for SQLite (thread-safe connections)
8
+ - Retry logic with exponential backoff for transient failures
9
+ - Query timeout handling
10
+ - Comprehensive error handling with logging
11
+ """
12
+ import os
13
+ import json
14
+ import sqlite3
15
+ import numpy as np
16
+ import logging
17
+ import time
18
+ import threading
19
+ from queue import Queue, Empty
20
+ from functools import wraps
21
+ from datetime import datetime
22
+ from typing import List, Optional, Dict, Any, Tuple, Callable
23
+ from pathlib import Path
24
+ from contextlib import contextmanager
25
+ from dotenv import load_dotenv
26
+
27
+ load_dotenv()
28
+
29
+ # Configure logging
30
+ logger = logging.getLogger(__name__)
31
+ if not logger.handlers:
32
+ handler = logging.StreamHandler()
33
+ handler.setFormatter(logging.Formatter(
34
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
35
+ ))
36
+ logger.addHandler(handler)
37
+ logger.setLevel(logging.INFO)
38
+
39
+ DB_PATH = os.getenv("DATABASE_PATH", str(Path(__file__).parent.parent / "memories.db"))
40
+ USE_VECTOR_INDEX = os.getenv("USE_VECTOR_INDEX", "true").lower() == "true"
41
+
42
+ # Connection pool settings
43
+ DB_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "5"))
44
+ DB_TIMEOUT = float(os.getenv("DB_TIMEOUT", "30.0")) # Query timeout in seconds
45
+ DB_MAX_RETRIES = int(os.getenv("DB_MAX_RETRIES", "3"))
46
+ DB_RETRY_BASE_DELAY = float(os.getenv("DB_RETRY_BASE_DELAY", "0.1")) # Base delay for exponential backoff
47
+
48
+
49
+ # Custom exceptions for structured error handling
50
+ class DatabaseError(Exception):
51
+ """Base exception for database errors."""
52
+ def __init__(self, message: str, error_code: str, original_error: Optional[Exception] = None):
53
+ super().__init__(message)
54
+ self.error_code = error_code
55
+ self.original_error = original_error
56
+
57
+
58
+ class ConnectionPoolError(DatabaseError):
59
+ """Error related to connection pool."""
60
+ def __init__(self, message: str, original_error: Optional[Exception] = None):
61
+ super().__init__(message, "DB_POOL_ERROR", original_error)
62
+
63
+
64
+ class QueryTimeoutError(DatabaseError):
65
+ """Query execution timeout."""
66
+ def __init__(self, message: str, original_error: Optional[Exception] = None):
67
+ super().__init__(message, "DB_TIMEOUT", original_error)
68
+
69
+
70
+ class RetryExhaustedError(DatabaseError):
71
+ """All retry attempts failed."""
72
+ def __init__(self, message: str, original_error: Optional[Exception] = None):
73
+ super().__init__(message, "DB_RETRY_EXHAUSTED", original_error)
74
+
75
+
76
+ class MigrationError(DatabaseError):
77
+ """Database migration failed."""
78
+ def __init__(self, message: str, original_error: Optional[Exception] = None):
79
+ super().__init__(message, "DB_MIGRATION_ERROR", original_error)
80
+
81
+
82
+ class SQLiteConnectionPool:
83
+ """Thread-safe connection pool for SQLite.
84
+
85
+ SQLite has limited connection pooling needs compared to client-server DBs,
86
+ but this provides:
87
+ - Thread-safe connection management
88
+ - Connection reuse to avoid repeated file opens
89
+ - Graceful connection lifecycle management
90
+ """
91
+
92
+ def __init__(self, db_path: str, pool_size: int = 5, timeout: float = 30.0):
93
+ self.db_path = db_path
94
+ self.pool_size = pool_size
95
+ self.timeout = timeout
96
+ self._pool: Queue = Queue(maxsize=pool_size)
97
+ self._lock = threading.Lock()
98
+ self._created_connections = 0
99
+ self._active_connections = 0
100
+
101
+ def _create_connection(self) -> sqlite3.Connection:
102
+ """Create a new SQLite connection with optimal settings."""
103
+ conn = sqlite3.connect(
104
+ self.db_path,
105
+ timeout=self.timeout,
106
+ check_same_thread=False,
107
+ isolation_level=None # Autocommit mode for better concurrency
108
+ )
109
+ conn.row_factory = sqlite3.Row
110
+ # Enable WAL mode for better concurrent read/write performance
111
+ conn.execute("PRAGMA journal_mode=WAL")
112
+ conn.execute("PRAGMA synchronous=NORMAL")
113
+ conn.execute("PRAGMA cache_size=-64000") # 64MB cache
114
+ conn.execute("PRAGMA busy_timeout=30000") # 30 second busy timeout
115
+ return conn
116
+
117
+ def get_connection(self) -> sqlite3.Connection:
118
+ """Get a connection from the pool or create a new one."""
119
+ try:
120
+ # Try to get from pool (non-blocking first)
121
+ conn = self._pool.get_nowait()
122
+ self._active_connections += 1
123
+ return conn
124
+ except Empty:
125
+ pass
126
+
127
+ # Create new connection if pool not full
128
+ with self._lock:
129
+ if self._created_connections < self.pool_size:
130
+ conn = self._create_connection()
131
+ self._created_connections += 1
132
+ self._active_connections += 1
133
+ logger.debug(f"Created new connection (total: {self._created_connections})")
134
+ return conn
135
+
136
+ # Pool is full, wait for available connection
137
+ try:
138
+ conn = self._pool.get(timeout=self.timeout)
139
+ self._active_connections += 1
140
+ return conn
141
+ except Empty:
142
+ raise ConnectionPoolError(
143
+ f"Connection pool exhausted (size={self.pool_size}, timeout={self.timeout}s)"
144
+ )
145
+
146
+ def return_connection(self, conn: sqlite3.Connection):
147
+ """Return a connection to the pool."""
148
+ if conn is None:
149
+ return
150
+
151
+ self._active_connections -= 1
152
+
153
+ try:
154
+ # Check if connection is still valid
155
+ conn.execute("SELECT 1")
156
+ self._pool.put_nowait(conn)
157
+ except (sqlite3.Error, sqlite3.ProgrammingError):
158
+ # Connection is bad, close it
159
+ try:
160
+ conn.close()
161
+ except Exception:
162
+ pass
163
+ with self._lock:
164
+ self._created_connections -= 1
165
+ logger.warning("Closed invalid connection from pool")
166
+
167
+ def close_all(self):
168
+ """Close all connections in the pool."""
169
+ with self._lock:
170
+ while not self._pool.empty():
171
+ try:
172
+ conn = self._pool.get_nowait()
173
+ conn.close()
174
+ except Empty:
175
+ break
176
+ except Exception as e:
177
+ logger.warning(f"Error closing connection: {e}")
178
+ self._created_connections = 0
179
+ self._active_connections = 0
180
+ logger.info("Connection pool closed")
181
+
182
+ def get_stats(self) -> Dict[str, Any]:
183
+ """Get pool statistics."""
184
+ return {
185
+ "pool_size": self.pool_size,
186
+ "created_connections": self._created_connections,
187
+ "active_connections": self._active_connections,
188
+ "available_connections": self._pool.qsize(),
189
+ "timeout": self.timeout
190
+ }
191
+
192
+
193
+ def with_retry(
194
+ max_retries: int = DB_MAX_RETRIES,
195
+ base_delay: float = DB_RETRY_BASE_DELAY,
196
+ retryable_errors: tuple = (sqlite3.OperationalError, sqlite3.DatabaseError)
197
+ ):
198
+ """Decorator for retry logic with exponential backoff.
199
+
200
+ Args:
201
+ max_retries: Maximum number of retry attempts
202
+ base_delay: Base delay in seconds (will be multiplied exponentially)
203
+ retryable_errors: Tuple of exception types that should trigger retry
204
+ """
205
+ def decorator(func: Callable):
206
+ @wraps(func)
207
+ async def async_wrapper(*args, **kwargs):
208
+ last_error = None
209
+ for attempt in range(max_retries + 1):
210
+ try:
211
+ return await func(*args, **kwargs)
212
+ except retryable_errors as e:
213
+ last_error = e
214
+ if attempt < max_retries:
215
+ delay = base_delay * (2 ** attempt) # Exponential backoff
216
+ logger.warning(
217
+ f"Retry {attempt + 1}/{max_retries} for {func.__name__} "
218
+ f"after {delay:.2f}s due to: {str(e)}"
219
+ )
220
+ time.sleep(delay)
221
+ else:
222
+ logger.error(
223
+ f"All {max_retries} retries exhausted for {func.__name__}: {str(e)}"
224
+ )
225
+ raise RetryExhaustedError(
226
+ f"Operation {func.__name__} failed after {max_retries} retries",
227
+ original_error=last_error
228
+ )
229
+
230
+ @wraps(func)
231
+ def sync_wrapper(*args, **kwargs):
232
+ last_error = None
233
+ for attempt in range(max_retries + 1):
234
+ try:
235
+ return func(*args, **kwargs)
236
+ except retryable_errors as e:
237
+ last_error = e
238
+ if attempt < max_retries:
239
+ delay = base_delay * (2 ** attempt)
240
+ logger.warning(
241
+ f"Retry {attempt + 1}/{max_retries} for {func.__name__} "
242
+ f"after {delay:.2f}s due to: {str(e)}"
243
+ )
244
+ time.sleep(delay)
245
+ else:
246
+ logger.error(
247
+ f"All {max_retries} retries exhausted for {func.__name__}: {str(e)}"
248
+ )
249
+ raise RetryExhaustedError(
250
+ f"Operation {func.__name__} failed after {max_retries} retries",
251
+ original_error=last_error
252
+ )
253
+
254
+ # Return appropriate wrapper based on function type
255
+ import asyncio
256
+ if asyncio.iscoroutinefunction(func):
257
+ return async_wrapper
258
+ return sync_wrapper
259
+
260
+ return decorator
261
+
262
+
263
+ def normalize_path(path: str) -> str:
264
+ """Normalize file paths to prevent duplicates from different separators.
265
+
266
+ Converts all paths to forward slashes (Unix-style) for consistency.
267
+ This prevents 'C:/foo' and 'C:\\foo' being treated as different projects.
268
+ Also normalizes Windows drive letters to uppercase for case-insensitive matching.
269
+ """
270
+ if not path:
271
+ return path
272
+ # Convert to forward slashes and remove trailing slashes
273
+ normalized = path.replace("\\", "/").rstrip("/")
274
+ # Normalize Windows drive letter to uppercase (c: -> C:)
275
+ if len(normalized) >= 2 and normalized[1] == ':':
276
+ normalized = normalized[0].upper() + normalized[1:]
277
+ return normalized
278
+
279
+
280
+ class DatabaseService:
281
+ """Service for vector storage and retrieval using SQLite + FAISS.
282
+
283
+ Features:
284
+ - FAISS vector indexing for O(log n) similarity search
285
+ - Automatic index building on startup
286
+ - Incremental index updates on insert
287
+ - Fallback to numpy-based search if FAISS unavailable
288
+ - Connection pooling for thread-safe access
289
+ - Retry logic with exponential backoff
290
+ - Query timeout handling
291
+ """
292
+
293
+ def __init__(self):
294
+ self.db_path = DB_PATH
295
+ self.conn: Optional[sqlite3.Connection] = None
296
+ self._connection_pool: Optional[SQLiteConnectionPool] = None
297
+
298
+ # Vector indexes (lazy loaded)
299
+ self._memories_index = None
300
+ self._patterns_index = None
301
+ self._timeline_index = None
302
+ self._use_vector_index = USE_VECTOR_INDEX
303
+ self._index_initialized = False
304
+
305
+ @contextmanager
306
+ def get_connection(self):
307
+ """Context manager for getting a connection from the pool.
308
+
309
+ Usage:
310
+ with self.get_connection() as conn:
311
+ cursor = conn.cursor()
312
+ cursor.execute(...)
313
+
314
+ Falls back to self.conn if pool not initialized.
315
+ """
316
+ if self._connection_pool:
317
+ conn = self._connection_pool.get_connection()
318
+ try:
319
+ yield conn
320
+ finally:
321
+ self._connection_pool.return_connection(conn)
322
+ else:
323
+ # Fallback for backward compatibility
324
+ yield self.conn
325
+
326
+ async def connect(self):
327
+ """Establish database connection and initialize connection pool."""
328
+ try:
329
+ # Initialize connection pool
330
+ self._connection_pool = SQLiteConnectionPool(
331
+ db_path=self.db_path,
332
+ pool_size=DB_POOL_SIZE,
333
+ timeout=DB_TIMEOUT
334
+ )
335
+ # Keep a primary connection for backward compatibility
336
+ self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
337
+ self.conn.row_factory = sqlite3.Row
338
+ # Enable WAL mode on primary connection too
339
+ self.conn.execute("PRAGMA journal_mode=WAL")
340
+ self.conn.execute("PRAGMA busy_timeout=30000")
341
+ logger.info(f"Database connected with pool size {DB_POOL_SIZE}")
342
+ except sqlite3.Error as e:
343
+ logger.error(f"Failed to connect to database: {e}")
344
+ raise ConnectionPoolError(f"Failed to connect to database: {e}", original_error=e)
345
+
346
+ async def disconnect(self):
347
+ """Close database connection, connection pool, and save indexes."""
348
+ # Save indexes
349
+ if self._memories_index:
350
+ try:
351
+ self._memories_index.save()
352
+ except Exception as e:
353
+ logger.warning(f"Failed to save memories index: {e}")
354
+ if self._patterns_index:
355
+ try:
356
+ self._patterns_index.save()
357
+ except Exception as e:
358
+ logger.warning(f"Failed to save patterns index: {e}")
359
+ if self._timeline_index:
360
+ try:
361
+ self._timeline_index.save()
362
+ except Exception as e:
363
+ logger.warning(f"Failed to save timeline index: {e}")
364
+
365
+ # Close connection pool
366
+ if self._connection_pool:
367
+ self._connection_pool.close_all()
368
+ self._connection_pool = None
369
+
370
+ # Close primary connection
371
+ if self.conn:
372
+ try:
373
+ self.conn.close()
374
+ except Exception as e:
375
+ logger.warning(f"Error closing primary connection: {e}")
376
+ self.conn = None
377
+
378
+ logger.info("Database disconnected")
379
+
380
+ def get_pool_stats(self) -> Dict[str, Any]:
381
+ """Get connection pool statistics."""
382
+ if self._connection_pool:
383
+ return self._connection_pool.get_stats()
384
+ return {"pool_initialized": False}
385
+
386
+ async def _init_vector_indexes(self):
387
+ """Initialize vector indexes from database."""
388
+ if self._index_initialized or not self._use_vector_index:
389
+ return
390
+
391
+ try:
392
+ from services.vector_index import get_index
393
+
394
+ # Initialize memories index
395
+ self._memories_index = get_index("memories")
396
+ if self._memories_index.size() == 0:
397
+ await self._rebuild_memories_index()
398
+
399
+ # Initialize patterns index
400
+ self._patterns_index = get_index("patterns")
401
+ if self._patterns_index.size() == 0:
402
+ await self._rebuild_patterns_index()
403
+
404
+ # Initialize timeline index
405
+ self._timeline_index = get_index("timeline")
406
+ if self._timeline_index.size() == 0:
407
+ await self._rebuild_timeline_index()
408
+
409
+ self._index_initialized = True
410
+ except ImportError:
411
+ # FAISS not available, will use numpy fallback
412
+ self._use_vector_index = False
413
+
414
+ async def _rebuild_memories_index(self):
415
+ """Rebuild the memories vector index from database."""
416
+ if not self._memories_index:
417
+ return
418
+
419
+ cursor = self.conn.cursor()
420
+ cursor.execute("SELECT id, embedding FROM memories WHERE embedding IS NOT NULL")
421
+ rows = cursor.fetchall()
422
+
423
+ items = []
424
+ for row in rows:
425
+ embedding = self._deserialize_embedding(row["embedding"])
426
+ if embedding:
427
+ items.append((row["id"], embedding))
428
+
429
+ if items:
430
+ self._memories_index.rebuild(items)
431
+ self._memories_index.save()
432
+
433
+ async def _rebuild_patterns_index(self):
434
+ """Rebuild the patterns vector index from database."""
435
+ if not self._patterns_index:
436
+ return
437
+
438
+ cursor = self.conn.cursor()
439
+ cursor.execute("SELECT id, embedding FROM patterns WHERE embedding IS NOT NULL")
440
+ rows = cursor.fetchall()
441
+
442
+ items = []
443
+ for row in rows:
444
+ embedding = self._deserialize_embedding(row["embedding"])
445
+ if embedding:
446
+ items.append((row["id"], embedding))
447
+
448
+ if items:
449
+ self._patterns_index.rebuild(items)
450
+ self._patterns_index.save()
451
+
452
+ async def _rebuild_timeline_index(self):
453
+ """Rebuild the timeline vector index from database."""
454
+ if not self._timeline_index:
455
+ return
456
+
457
+ cursor = self.conn.cursor()
458
+ cursor.execute("SELECT id, embedding FROM timeline_events WHERE embedding IS NOT NULL")
459
+ rows = cursor.fetchall()
460
+
461
+ items = []
462
+ for row in rows:
463
+ embedding = self._deserialize_embedding(row["embedding"])
464
+ if embedding:
465
+ items.append((row["id"], embedding))
466
+
467
+ if items:
468
+ self._timeline_index.rebuild(items)
469
+ self._timeline_index.save()
470
+
471
+ def get_index_stats(self) -> Dict[str, Any]:
472
+ """Get statistics about vector indexes."""
473
+ stats = {
474
+ "use_vector_index": self._use_vector_index,
475
+ "index_initialized": self._index_initialized
476
+ }
477
+ if self._memories_index:
478
+ stats["memories"] = self._memories_index.get_stats()
479
+ if self._patterns_index:
480
+ stats["patterns"] = self._patterns_index.get_stats()
481
+ if self._timeline_index:
482
+ stats["timeline"] = self._timeline_index.get_stats()
483
+ return stats
484
+
485
+ async def initialize_schema(self):
486
+ """Create necessary tables if they don't exist."""
487
+ cursor = self.conn.cursor()
488
+
489
+ # Main memories table with rich context
490
+ cursor.execute("""
491
+ CREATE TABLE IF NOT EXISTS memories (
492
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
493
+
494
+ -- Content
495
+ type TEXT NOT NULL,
496
+ content TEXT NOT NULL,
497
+ embedding TEXT,
498
+
499
+ -- Project Context
500
+ project_path TEXT,
501
+ project_name TEXT,
502
+ project_type TEXT,
503
+ tech_stack TEXT,
504
+
505
+ -- Session Context
506
+ session_id TEXT,
507
+ chat_id TEXT,
508
+
509
+ -- Agent/Skill Context
510
+ agent_type TEXT,
511
+ skill_used TEXT,
512
+ tools_used TEXT,
513
+
514
+ -- Outcome
515
+ outcome TEXT,
516
+ success INTEGER,
517
+ user_feedback TEXT,
518
+
519
+ -- Metadata
520
+ tags TEXT,
521
+ metadata TEXT DEFAULT '{}',
522
+ importance INTEGER DEFAULT 5,
523
+
524
+ -- Timestamps
525
+ created_at TEXT DEFAULT (datetime('now')),
526
+ updated_at TEXT DEFAULT (datetime('now')),
527
+ last_accessed TEXT
528
+ )
529
+ """)
530
+
531
+ # Projects table - store project-level knowledge
532
+ cursor.execute("""
533
+ CREATE TABLE IF NOT EXISTS projects (
534
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
535
+ path TEXT UNIQUE NOT NULL,
536
+ name TEXT,
537
+ type TEXT,
538
+ tech_stack TEXT,
539
+ conventions TEXT,
540
+ preferences TEXT,
541
+ created_at TEXT DEFAULT (datetime('now')),
542
+ updated_at TEXT DEFAULT (datetime('now'))
543
+ )
544
+ """)
545
+
546
+ # Patterns table - reusable solutions
547
+ cursor.execute("""
548
+ CREATE TABLE IF NOT EXISTS patterns (
549
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
550
+ name TEXT NOT NULL,
551
+ problem_type TEXT,
552
+ solution TEXT NOT NULL,
553
+ embedding TEXT,
554
+ tech_context TEXT,
555
+ success_count INTEGER DEFAULT 1,
556
+ failure_count INTEGER DEFAULT 0,
557
+ metadata TEXT DEFAULT '{}',
558
+ created_at TEXT DEFAULT (datetime('now')),
559
+ updated_at TEXT DEFAULT (datetime('now'))
560
+ )
561
+ """)
562
+
563
+ # Create indexes for memories/patterns
564
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type)")
565
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_project ON memories(project_path)")
566
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id)")
567
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_type)")
568
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_success ON memories(success)")
569
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_importance ON memories(importance)")
570
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_patterns_problem ON patterns(problem_type)")
571
+
572
+ # Migration helper function
573
+ def safe_add_column(table: str, column: str, column_def: str):
574
+ """Safely add a column if it doesn't exist, with proper error handling."""
575
+ try:
576
+ cursor.execute(f"SELECT {column} FROM {table} LIMIT 1")
577
+ logger.debug(f"Column {table}.{column} already exists")
578
+ except sqlite3.OperationalError as e:
579
+ if "no such column" in str(e).lower():
580
+ try:
581
+ cursor.execute(f"ALTER TABLE {table} ADD COLUMN {column} {column_def}")
582
+ logger.info(f"Migration: Added column {table}.{column}")
583
+ except sqlite3.OperationalError as alter_error:
584
+ if "duplicate column" not in str(alter_error).lower():
585
+ logger.error(f"Failed to add column {table}.{column}: {alter_error}")
586
+ raise MigrationError(
587
+ f"Failed to add column {table}.{column}",
588
+ original_error=alter_error
589
+ )
590
+ else:
591
+ logger.error(f"Unexpected error checking column {table}.{column}: {e}")
592
+ raise MigrationError(
593
+ f"Unexpected error during migration check for {table}.{column}",
594
+ original_error=e
595
+ )
596
+ except Exception as e:
597
+ logger.error(f"Unexpected error in migration for {table}.{column}: {e}")
598
+ raise MigrationError(
599
+ f"Migration failed for {table}.{column}",
600
+ original_error=e
601
+ )
602
+
603
+ # Migration: Add access_count column if it doesn't exist
604
+ safe_add_column("memories", "access_count", "INTEGER DEFAULT 0")
605
+
606
+ # Migration: Add decay_factor column if it doesn't exist
607
+ safe_add_column("memories", "decay_factor", "REAL DEFAULT 1.0")
608
+
609
+ # Migration: Add embedding_model column if it doesn't exist
610
+ safe_add_column("memories", "embedding_model", "TEXT DEFAULT 'nomic-embed-text'")
611
+
612
+ # ============================================================
613
+ # SESSION TIMELINE TABLES (Anti-Hallucination Layer)
614
+ # ============================================================
615
+
616
+ # Timeline events - chronological log of all session activity
617
+ cursor.execute("""
618
+ CREATE TABLE IF NOT EXISTS timeline_events (
619
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
620
+
621
+ -- Session Context
622
+ session_id TEXT NOT NULL,
623
+ project_path TEXT,
624
+
625
+ -- Event Identity
626
+ event_type TEXT NOT NULL,
627
+ sequence_num INTEGER NOT NULL,
628
+
629
+ -- Content
630
+ summary TEXT NOT NULL,
631
+ details TEXT,
632
+ embedding TEXT,
633
+
634
+ -- Causal Chain
635
+ parent_event_id INTEGER,
636
+ root_event_id INTEGER,
637
+
638
+ -- Entity References
639
+ entities TEXT,
640
+
641
+ -- Outcome
642
+ status TEXT DEFAULT 'completed',
643
+ outcome TEXT,
644
+ confidence REAL,
645
+
646
+ -- Flags
647
+ is_anchor INTEGER DEFAULT 0,
648
+ is_reversible INTEGER DEFAULT 1,
649
+ needs_verification INTEGER DEFAULT 0,
650
+
651
+ -- Timestamps
652
+ created_at TEXT DEFAULT (datetime('now')),
653
+
654
+ FOREIGN KEY (parent_event_id) REFERENCES timeline_events(id),
655
+ FOREIGN KEY (root_event_id) REFERENCES timeline_events(id)
656
+ )
657
+ """)
658
+
659
+ # Session state - current context for active session
660
+ cursor.execute("""
661
+ CREATE TABLE IF NOT EXISTS session_state (
662
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
663
+ session_id TEXT UNIQUE NOT NULL,
664
+ project_path TEXT,
665
+
666
+ -- Current State
667
+ current_goal TEXT,
668
+ pending_questions TEXT,
669
+ entity_registry TEXT,
670
+ decisions_summary TEXT,
671
+
672
+ -- Checkpoint tracking
673
+ last_checkpoint_id INTEGER,
674
+ events_since_checkpoint INTEGER DEFAULT 0,
675
+
676
+ -- Timestamps
677
+ created_at TEXT DEFAULT (datetime('now')),
678
+ updated_at TEXT DEFAULT (datetime('now')),
679
+ last_activity_at TEXT DEFAULT (datetime('now')),
680
+
681
+ FOREIGN KEY (last_checkpoint_id) REFERENCES checkpoints(id)
682
+ )
683
+ """)
684
+
685
+ # Checkpoints - session snapshots for resumption
686
+ cursor.execute("""
687
+ CREATE TABLE IF NOT EXISTS checkpoints (
688
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
689
+ session_id TEXT NOT NULL,
690
+ event_id INTEGER,
691
+
692
+ -- Checkpoint Content
693
+ summary TEXT NOT NULL,
694
+ key_facts TEXT,
695
+ decisions TEXT,
696
+ entities TEXT,
697
+
698
+ -- State at Checkpoint
699
+ current_goal TEXT,
700
+ pending_items TEXT,
701
+
702
+ -- For retrieval
703
+ embedding TEXT,
704
+ event_count INTEGER,
705
+
706
+ -- Timestamps
707
+ created_at TEXT DEFAULT (datetime('now')),
708
+
709
+ FOREIGN KEY (event_id) REFERENCES timeline_events(id)
710
+ )
711
+ """)
712
+
713
+ # Timeline indexes
714
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_session ON timeline_events(session_id, sequence_num)")
715
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_type ON timeline_events(event_type)")
716
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_parent ON timeline_events(parent_event_id)")
717
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_root ON timeline_events(root_event_id)")
718
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_created ON timeline_events(created_at)")
719
+
720
+ # Session state indexes
721
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_session_project ON session_state(project_path)")
722
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_session_activity ON session_state(last_activity_at)")
723
+
724
+ # Checkpoint indexes
725
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_checkpoint_session ON checkpoints(session_id, created_at DESC)")
726
+
727
+ # ============================================================
728
+ # AGENT CONFIGURATION TABLES
729
+ # ============================================================
730
+
731
+ # Project agent configurations
732
+ cursor.execute("""
733
+ CREATE TABLE IF NOT EXISTS project_agent_config (
734
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
735
+ project_path TEXT NOT NULL,
736
+ agent_id TEXT NOT NULL,
737
+ enabled INTEGER DEFAULT 1,
738
+ priority INTEGER DEFAULT 5,
739
+ settings TEXT DEFAULT '{}',
740
+ created_at TEXT DEFAULT (datetime('now')),
741
+ updated_at TEXT DEFAULT (datetime('now')),
742
+ UNIQUE(project_path, agent_id)
743
+ )
744
+ """)
745
+
746
+ # MCP server configurations per project
747
+ cursor.execute("""
748
+ CREATE TABLE IF NOT EXISTS project_mcp_config (
749
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
750
+ project_path TEXT NOT NULL,
751
+ mcp_id TEXT NOT NULL,
752
+ enabled INTEGER DEFAULT 1,
753
+ settings TEXT DEFAULT '{}',
754
+ created_at TEXT DEFAULT (datetime('now')),
755
+ updated_at TEXT DEFAULT (datetime('now')),
756
+ UNIQUE(project_path, mcp_id)
757
+ )
758
+ """)
759
+
760
+ # Hook configurations per project
761
+ cursor.execute("""
762
+ CREATE TABLE IF NOT EXISTS project_hook_config (
763
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
764
+ project_path TEXT NOT NULL,
765
+ hook_id TEXT NOT NULL,
766
+ enabled INTEGER DEFAULT 1,
767
+ settings TEXT DEFAULT '{}',
768
+ created_at TEXT DEFAULT (datetime('now')),
769
+ updated_at TEXT DEFAULT (datetime('now')),
770
+ UNIQUE(project_path, hook_id)
771
+ )
772
+ """)
773
+
774
+ # Project preferences
775
+ cursor.execute("""
776
+ CREATE TABLE IF NOT EXISTS project_preferences (
777
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
778
+ project_path TEXT UNIQUE NOT NULL,
779
+ name TEXT,
780
+ description TEXT,
781
+ color TEXT DEFAULT '#58a6ff',
782
+ icon TEXT DEFAULT 'folder',
783
+ default_model TEXT DEFAULT 'sonnet',
784
+ auto_memory INTEGER DEFAULT 1,
785
+ auto_checkpoint INTEGER DEFAULT 1,
786
+ settings TEXT DEFAULT '{}',
787
+ created_at TEXT DEFAULT (datetime('now')),
788
+ updated_at TEXT DEFAULT (datetime('now'))
789
+ )
790
+ """)
791
+
792
+ # Agent config indexes
793
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_agent_config_project ON project_agent_config(project_path)")
794
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_mcp_config_project ON project_mcp_config(project_path)")
795
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_hook_config_project ON project_hook_config(project_path)")
796
+
797
+ # ============================================================
798
+ # INSIGHTS TABLE (Cross-Session Learning)
799
+ # ============================================================
800
+
801
+ # Aggregated insights from cross-session analysis
802
+ cursor.execute("""
803
+ CREATE TABLE IF NOT EXISTS insights (
804
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
805
+
806
+ -- Insight Identity
807
+ insight_type TEXT NOT NULL,
808
+ title TEXT NOT NULL,
809
+ description TEXT NOT NULL,
810
+
811
+ -- Evidence
812
+ evidence_ids TEXT,
813
+ evidence_count INTEGER DEFAULT 1,
814
+ source_sessions TEXT,
815
+
816
+ -- Scoring
817
+ confidence REAL DEFAULT 0.5,
818
+ impact_score REAL DEFAULT 5.0,
819
+ validation_count INTEGER DEFAULT 0,
820
+ invalidation_count INTEGER DEFAULT 0,
821
+
822
+ -- Categorization
823
+ category TEXT,
824
+ tags TEXT,
825
+ project_path TEXT,
826
+ tech_context TEXT,
827
+
828
+ -- For similarity search
829
+ embedding TEXT,
830
+
831
+ -- Status
832
+ status TEXT DEFAULT 'active',
833
+ applied_to_claude_md INTEGER DEFAULT 0,
834
+
835
+ -- Timestamps
836
+ created_at TEXT DEFAULT (datetime('now')),
837
+ updated_at TEXT DEFAULT (datetime('now')),
838
+ last_validated_at TEXT
839
+ )
840
+ """)
841
+
842
+ # Insight feedback for accuracy tracking
843
+ cursor.execute("""
844
+ CREATE TABLE IF NOT EXISTS insight_feedback (
845
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
846
+ insight_id INTEGER NOT NULL,
847
+ session_id TEXT,
848
+ feedback_type TEXT NOT NULL,
849
+ helpful INTEGER,
850
+ comment TEXT,
851
+ created_at TEXT DEFAULT (datetime('now')),
852
+ FOREIGN KEY (insight_id) REFERENCES insights(id)
853
+ )
854
+ """)
855
+
856
+ # Insight indexes
857
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_insights_type ON insights(insight_type)")
858
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_insights_status ON insights(status)")
859
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_insights_project ON insights(project_path)")
860
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_insights_confidence ON insights(confidence DESC)")
861
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_insight_feedback ON insight_feedback(insight_id)")
862
+
863
+ # ============================================================
864
+ # MEMORY CLEANUP AND ARCHIVAL TABLES
865
+ # ============================================================
866
+
867
+ # Archived memories (soft-deleted for recovery)
868
+ cursor.execute("""
869
+ CREATE TABLE IF NOT EXISTS memory_archive (
870
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
871
+ original_id INTEGER NOT NULL,
872
+
873
+ -- Original memory data
874
+ type TEXT NOT NULL,
875
+ content TEXT NOT NULL,
876
+ embedding TEXT,
877
+ project_path TEXT,
878
+ session_id TEXT,
879
+ importance INTEGER,
880
+ access_count INTEGER,
881
+ decay_factor REAL,
882
+ metadata TEXT,
883
+
884
+ -- Archive metadata
885
+ archive_reason TEXT NOT NULL,
886
+ archived_at TEXT DEFAULT (datetime('now')),
887
+ archived_by TEXT,
888
+ relevance_score_at_archive REAL,
889
+ expires_at TEXT
890
+ )
891
+ """)
892
+
893
+ # Cleanup configuration per project
894
+ cursor.execute("""
895
+ CREATE TABLE IF NOT EXISTS cleanup_config (
896
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
897
+ project_path TEXT UNIQUE,
898
+
899
+ -- Retention settings
900
+ retention_days INTEGER DEFAULT 90,
901
+ min_relevance_score REAL DEFAULT 0.1,
902
+ keep_high_importance INTEGER DEFAULT 1,
903
+ importance_threshold INTEGER DEFAULT 7,
904
+
905
+ -- Deduplication settings
906
+ dedup_enabled INTEGER DEFAULT 1,
907
+ dedup_threshold REAL DEFAULT 0.95,
908
+
909
+ -- Archive settings
910
+ archive_before_delete INTEGER DEFAULT 1,
911
+ archive_retention_days INTEGER DEFAULT 365,
912
+
913
+ -- Schedule
914
+ auto_cleanup_enabled INTEGER DEFAULT 0,
915
+ last_cleanup_at TEXT,
916
+
917
+ created_at TEXT DEFAULT (datetime('now')),
918
+ updated_at TEXT DEFAULT (datetime('now'))
919
+ )
920
+ """)
921
+
922
+ # Cleanup audit log
923
+ cursor.execute("""
924
+ CREATE TABLE IF NOT EXISTS cleanup_log (
925
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
926
+ cleanup_type TEXT NOT NULL,
927
+ project_path TEXT,
928
+ memories_archived INTEGER DEFAULT 0,
929
+ memories_deleted INTEGER DEFAULT 0,
930
+ memories_merged INTEGER DEFAULT 0,
931
+ details TEXT,
932
+ created_at TEXT DEFAULT (datetime('now'))
933
+ )
934
+ """)
935
+
936
+ # Archive indexes
937
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_original ON memory_archive(original_id)")
938
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_project ON memory_archive(project_path)")
939
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_reason ON memory_archive(archive_reason)")
940
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_cleanup_project ON cleanup_config(project_path)")
941
+
942
+ # ============================================================
943
+ # ANCHOR CONFLICT RESOLUTION TABLES
944
+ # ============================================================
945
+
946
+ # Anchor conflicts for manual resolution
947
+ cursor.execute("""
948
+ CREATE TABLE IF NOT EXISTS anchor_conflicts (
949
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
950
+ session_id TEXT,
951
+ project_path TEXT,
952
+
953
+ -- The conflicting anchors
954
+ anchor1_id INTEGER NOT NULL,
955
+ anchor2_id INTEGER NOT NULL,
956
+ anchor1_summary TEXT,
957
+ anchor2_summary TEXT,
958
+
959
+ -- Conflict details
960
+ conflict_type TEXT NOT NULL,
961
+ similarity_score REAL,
962
+ auto_resolution_attempted INTEGER DEFAULT 0,
963
+
964
+ -- Resolution
965
+ status TEXT DEFAULT 'unresolved',
966
+ resolution TEXT,
967
+ resolved_anchor_id INTEGER,
968
+ resolved_at TEXT,
969
+ resolved_by TEXT,
970
+
971
+ created_at TEXT DEFAULT (datetime('now')),
972
+
973
+ FOREIGN KEY (anchor1_id) REFERENCES timeline_events(id),
974
+ FOREIGN KEY (anchor2_id) REFERENCES timeline_events(id)
975
+ )
976
+ """)
977
+
978
+ # Anchor history to track fact evolution
979
+ cursor.execute("""
980
+ CREATE TABLE IF NOT EXISTS anchor_history (
981
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
982
+ anchor_id INTEGER NOT NULL,
983
+ session_id TEXT,
984
+ project_path TEXT,
985
+
986
+ -- State tracking
987
+ action TEXT NOT NULL,
988
+ previous_summary TEXT,
989
+ new_summary TEXT,
990
+ superseded_by INTEGER,
991
+
992
+ -- Context
993
+ reason TEXT,
994
+ confidence REAL,
995
+
996
+ created_at TEXT DEFAULT (datetime('now')),
997
+
998
+ FOREIGN KEY (anchor_id) REFERENCES timeline_events(id),
999
+ FOREIGN KEY (superseded_by) REFERENCES timeline_events(id)
1000
+ )
1001
+ """)
1002
+
1003
+ # Conflict indexes
1004
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_conflicts_status ON anchor_conflicts(status)")
1005
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_conflicts_session ON anchor_conflicts(session_id)")
1006
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_anchor_history ON anchor_history(anchor_id)")
1007
+
1008
+ # ============================================================
1009
+ # MARKDOWN SYNC TABLES (Moltbot-inspired transparency)
1010
+ # ============================================================
1011
+
1012
+ # Markdown sync tracking - tracks which memories are synced to markdown files
1013
+ cursor.execute("""
1014
+ CREATE TABLE IF NOT EXISTS markdown_syncs (
1015
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1016
+ file_type TEXT NOT NULL, -- 'memory_md', 'daily_log', 'flush'
1017
+ file_path TEXT NOT NULL,
1018
+ memory_id INTEGER,
1019
+ project_path TEXT,
1020
+ synced_at TEXT DEFAULT (datetime('now')),
1021
+ content_hash TEXT,
1022
+
1023
+ FOREIGN KEY (memory_id) REFERENCES memories(id)
1024
+ )
1025
+ """)
1026
+
1027
+ # Indexes for markdown_syncs
1028
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_markdown_syncs_type ON markdown_syncs(file_type)")
1029
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_markdown_syncs_project ON markdown_syncs(project_path)")
1030
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_markdown_syncs_memory ON markdown_syncs(memory_id)")
1031
+
1032
+ # Migration: Add last_flush_at column to session_state if it doesn't exist
1033
+ safe_add_column("session_state", "last_flush_at", "TEXT")
1034
+
1035
+ self.conn.commit()
1036
+
1037
+ def _serialize_embedding(self, embedding: List[float]) -> str:
1038
+ return json.dumps(embedding)
1039
+
1040
+ def _deserialize_embedding(self, embedding_str: str) -> List[float]:
1041
+ return json.loads(embedding_str) if embedding_str else []
1042
+
1043
+ def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
1044
+ a = np.array(vec1)
1045
+ b = np.array(vec2)
1046
+ norm_a = np.linalg.norm(a)
1047
+ norm_b = np.linalg.norm(b)
1048
+ if norm_a == 0 or norm_b == 0:
1049
+ return 0.0
1050
+ return float(np.dot(a, b) / (norm_a * norm_b))
1051
+
1052
+ def calculate_relevance_score(
1053
+ self,
1054
+ importance: int,
1055
+ created_at: str,
1056
+ last_accessed: Optional[str],
1057
+ access_count: int,
1058
+ decay_factor: float = 1.0,
1059
+ recency_half_life_days: float = 30.0
1060
+ ) -> float:
1061
+ """Calculate relevance score based on importance, recency, and access patterns.
1062
+
1063
+ Formula: base_importance * recency_factor * access_factor * decay_factor
1064
+
1065
+ Args:
1066
+ importance: Base importance (1-10)
1067
+ created_at: Creation timestamp
1068
+ last_accessed: Last access timestamp (None if never accessed)
1069
+ access_count: Number of times accessed
1070
+ decay_factor: Manual decay/boost multiplier
1071
+ recency_half_life_days: Days until score halves
1072
+
1073
+ Returns:
1074
+ Relevance score (0.0 to ~10.0)
1075
+ """
1076
+ import math
1077
+
1078
+ # Base importance (normalized to 0-1)
1079
+ base = importance / 10.0
1080
+
1081
+ # Recency factor: exponential decay based on age
1082
+ now = datetime.now()
1083
+ try:
1084
+ # Use last_accessed if available, otherwise created_at
1085
+ reference_time = last_accessed or created_at
1086
+ if reference_time:
1087
+ # Parse timestamp (SQLite format: YYYY-MM-DD HH:MM:SS)
1088
+ ref_dt = datetime.fromisoformat(reference_time.replace('Z', '+00:00'))
1089
+ age_days = (now - ref_dt.replace(tzinfo=None)).days
1090
+ # Exponential decay: score halves every half_life_days
1091
+ recency_factor = math.pow(0.5, age_days / recency_half_life_days)
1092
+ else:
1093
+ recency_factor = 1.0
1094
+ except (ValueError, TypeError):
1095
+ recency_factor = 1.0
1096
+
1097
+ # Access factor: boost frequently accessed memories (log scale)
1098
+ # +1 to avoid log(0), cap at reasonable value
1099
+ access_factor = 1.0 + 0.1 * math.log(1 + min(access_count, 100))
1100
+
1101
+ # Combine factors
1102
+ score = base * recency_factor * access_factor * decay_factor
1103
+
1104
+ return round(score, 4)
1105
+
1106
+ async def update_access_stats(self, memory_id: int):
1107
+ """Update access statistics for a memory."""
1108
+ cursor = self.conn.cursor()
1109
+ cursor.execute(
1110
+ """
1111
+ UPDATE memories
1112
+ SET last_accessed = datetime('now'),
1113
+ access_count = COALESCE(access_count, 0) + 1
1114
+ WHERE id = ?
1115
+ """,
1116
+ (memory_id,)
1117
+ )
1118
+ self.conn.commit()
1119
+
1120
+ async def boost_memory(self, memory_id: int, factor: float = 1.5) -> bool:
1121
+ """Boost a memory's relevance by increasing its decay_factor.
1122
+
1123
+ Args:
1124
+ memory_id: ID of the memory to boost
1125
+ factor: Multiplier to apply to current decay_factor
1126
+
1127
+ Returns:
1128
+ True if successful
1129
+ """
1130
+ cursor = self.conn.cursor()
1131
+ cursor.execute(
1132
+ """
1133
+ UPDATE memories
1134
+ SET decay_factor = COALESCE(decay_factor, 1.0) * ?,
1135
+ updated_at = datetime('now')
1136
+ WHERE id = ?
1137
+ """,
1138
+ (factor, memory_id)
1139
+ )
1140
+ self.conn.commit()
1141
+ return cursor.rowcount > 0
1142
+
1143
+ async def decay_memory(self, memory_id: int, factor: float = 0.5) -> bool:
1144
+ """Reduce a memory's relevance by decreasing its decay_factor.
1145
+
1146
+ Args:
1147
+ memory_id: ID of the memory to decay
1148
+ factor: Multiplier to apply to current decay_factor
1149
+
1150
+ Returns:
1151
+ True if successful
1152
+ """
1153
+ cursor = self.conn.cursor()
1154
+ cursor.execute(
1155
+ """
1156
+ UPDATE memories
1157
+ SET decay_factor = COALESCE(decay_factor, 1.0) * ?,
1158
+ updated_at = datetime('now')
1159
+ WHERE id = ?
1160
+ """,
1161
+ (factor, memory_id)
1162
+ )
1163
+ self.conn.commit()
1164
+ return cursor.rowcount > 0
1165
+
1166
+ async def get_memories_by_relevance(
1167
+ self,
1168
+ limit: int = 20,
1169
+ memory_type: Optional[str] = None,
1170
+ project_path: Optional[str] = None,
1171
+ min_relevance: float = 0.1
1172
+ ) -> List[Dict[str, Any]]:
1173
+ """Get memories sorted by relevance score.
1174
+
1175
+ Args:
1176
+ limit: Maximum number of results
1177
+ memory_type: Filter by type
1178
+ project_path: Filter by project
1179
+ min_relevance: Minimum relevance score threshold
1180
+
1181
+ Returns:
1182
+ List of memories with relevance scores
1183
+ """
1184
+ project_path = normalize_path(project_path)
1185
+ cursor = self.conn.cursor()
1186
+
1187
+ query = """
1188
+ SELECT id, type, content, importance, created_at, last_accessed,
1189
+ COALESCE(access_count, 0) as access_count,
1190
+ COALESCE(decay_factor, 1.0) as decay_factor,
1191
+ project_path, project_name, outcome, success
1192
+ FROM memories WHERE 1=1
1193
+ """
1194
+ params = []
1195
+
1196
+ if memory_type:
1197
+ query += " AND type = ?"
1198
+ params.append(memory_type)
1199
+ if project_path:
1200
+ query += " AND project_path = ?"
1201
+ params.append(project_path)
1202
+
1203
+ cursor.execute(query, params)
1204
+ rows = cursor.fetchall()
1205
+
1206
+ results = []
1207
+ for row in rows:
1208
+ relevance = self.calculate_relevance_score(
1209
+ importance=row["importance"],
1210
+ created_at=row["created_at"],
1211
+ last_accessed=row["last_accessed"],
1212
+ access_count=row["access_count"],
1213
+ decay_factor=row["decay_factor"]
1214
+ )
1215
+
1216
+ if relevance >= min_relevance:
1217
+ results.append({
1218
+ "id": row["id"],
1219
+ "type": row["type"],
1220
+ "content": row["content"],
1221
+ "relevance_score": relevance,
1222
+ "importance": row["importance"],
1223
+ "access_count": row["access_count"],
1224
+ "decay_factor": row["decay_factor"],
1225
+ "project_path": row["project_path"],
1226
+ "outcome": row["outcome"],
1227
+ "success": bool(row["success"]) if row["success"] is not None else None,
1228
+ "created_at": row["created_at"],
1229
+ "last_accessed": row["last_accessed"]
1230
+ })
1231
+
1232
+ # Sort by relevance
1233
+ results.sort(key=lambda x: x["relevance_score"], reverse=True)
1234
+ return results[:limit]
1235
+
1236
+ async def store_memory(
1237
+ self,
1238
+ memory_type: str,
1239
+ content: str,
1240
+ embedding: List[float],
1241
+ metadata: Optional[Dict[str, Any]] = None,
1242
+ session_id: Optional[str] = None,
1243
+ # New context fields
1244
+ project_path: Optional[str] = None,
1245
+ project_name: Optional[str] = None,
1246
+ project_type: Optional[str] = None,
1247
+ tech_stack: Optional[List[str]] = None,
1248
+ chat_id: Optional[str] = None,
1249
+ agent_type: Optional[str] = None,
1250
+ skill_used: Optional[str] = None,
1251
+ tools_used: Optional[List[str]] = None,
1252
+ outcome: Optional[str] = None,
1253
+ success: Optional[bool] = None,
1254
+ tags: Optional[List[str]] = None,
1255
+ importance: int = 5
1256
+ ) -> int:
1257
+ """Store a memory with full context.
1258
+
1259
+ Also adds the embedding to the FAISS index for fast search.
1260
+ """
1261
+ # Normalize project path to prevent duplicates
1262
+ project_path = normalize_path(project_path)
1263
+
1264
+ cursor = self.conn.cursor()
1265
+ cursor.execute(
1266
+ """
1267
+ INSERT INTO memories (
1268
+ type, content, embedding, metadata,
1269
+ project_path, project_name, project_type, tech_stack,
1270
+ session_id, chat_id,
1271
+ agent_type, skill_used, tools_used,
1272
+ outcome, success,
1273
+ tags, importance
1274
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1275
+ """,
1276
+ (
1277
+ memory_type,
1278
+ content,
1279
+ self._serialize_embedding(embedding),
1280
+ json.dumps(metadata or {}),
1281
+ project_path,
1282
+ project_name,
1283
+ project_type,
1284
+ json.dumps(tech_stack) if tech_stack else None,
1285
+ session_id,
1286
+ chat_id,
1287
+ agent_type,
1288
+ skill_used,
1289
+ json.dumps(tools_used) if tools_used else None,
1290
+ outcome,
1291
+ 1 if success else (0 if success is False else None),
1292
+ json.dumps(tags) if tags else None,
1293
+ importance
1294
+ )
1295
+ )
1296
+ self.conn.commit()
1297
+ memory_id = cursor.lastrowid
1298
+
1299
+ # Add to FAISS index if available
1300
+ if self._memories_index and embedding:
1301
+ self._memories_index.add(memory_id, embedding)
1302
+
1303
+ return memory_id
1304
+
1305
+ async def search_similar(
1306
+ self,
1307
+ embedding: List[float],
1308
+ limit: int = 10,
1309
+ memory_type: Optional[str] = None,
1310
+ session_id: Optional[str] = None,
1311
+ project_path: Optional[str] = None,
1312
+ agent_type: Optional[str] = None,
1313
+ success_only: bool = False,
1314
+ threshold: float = 0.5
1315
+ ) -> List[Dict[str, Any]]:
1316
+ """Search for similar memories with optional filters.
1317
+
1318
+ Uses FAISS index for fast similarity search when available,
1319
+ falls back to numpy-based linear search otherwise.
1320
+ """
1321
+ # Normalize project path for consistent matching
1322
+ project_path = normalize_path(project_path)
1323
+
1324
+ # Ensure indexes are initialized
1325
+ await self._init_vector_indexes()
1326
+
1327
+ cursor = self.conn.cursor()
1328
+ has_filters = memory_type or session_id or project_path or agent_type or success_only
1329
+
1330
+ # Try FAISS index first (if no filters or willing to post-filter)
1331
+ if self._memories_index and self._memories_index.size() > 0:
1332
+ # Get more candidates than needed to allow for filtering
1333
+ candidate_limit = limit * 5 if has_filters else limit * 2
1334
+
1335
+ # FAISS search
1336
+ candidates = self._memories_index.search(
1337
+ query_embedding=embedding,
1338
+ k=candidate_limit,
1339
+ threshold=threshold
1340
+ )
1341
+
1342
+ if candidates:
1343
+ # Get full records for candidates
1344
+ candidate_ids = [c[0] for c in candidates]
1345
+ similarity_map = {c[0]: c[1] for c in candidates}
1346
+
1347
+ # Build query with filters
1348
+ placeholders = ",".join("?" * len(candidate_ids))
1349
+ query = f"""
1350
+ SELECT id, type, content, metadata,
1351
+ project_path, project_name, project_type, tech_stack,
1352
+ session_id, chat_id, agent_type, skill_used, tools_used,
1353
+ outcome, success, tags, importance, created_at
1354
+ FROM memories WHERE id IN ({placeholders})
1355
+ """
1356
+ params = list(candidate_ids)
1357
+
1358
+ if memory_type:
1359
+ query += " AND type = ?"
1360
+ params.append(memory_type)
1361
+ if session_id:
1362
+ query += " AND session_id = ?"
1363
+ params.append(session_id)
1364
+ if project_path:
1365
+ query += " AND project_path = ?"
1366
+ params.append(project_path)
1367
+ if agent_type:
1368
+ query += " AND agent_type = ?"
1369
+ params.append(agent_type)
1370
+ if success_only:
1371
+ query += " AND success = 1"
1372
+
1373
+ cursor.execute(query, params)
1374
+ rows = cursor.fetchall()
1375
+
1376
+ results = []
1377
+ for row in rows:
1378
+ similarity = similarity_map.get(row["id"], 0)
1379
+ results.append({
1380
+ "id": row["id"],
1381
+ "type": row["type"],
1382
+ "content": row["content"],
1383
+ "similarity": similarity,
1384
+ "search_method": "faiss",
1385
+ "project": {
1386
+ "path": row["project_path"],
1387
+ "name": row["project_name"],
1388
+ "type": row["project_type"],
1389
+ "tech_stack": json.loads(row["tech_stack"]) if row["tech_stack"] else None
1390
+ },
1391
+ "session_id": row["session_id"],
1392
+ "agent": {
1393
+ "type": row["agent_type"],
1394
+ "skill": row["skill_used"],
1395
+ "tools": json.loads(row["tools_used"]) if row["tools_used"] else None
1396
+ },
1397
+ "outcome": row["outcome"],
1398
+ "success": bool(row["success"]) if row["success"] is not None else None,
1399
+ "tags": json.loads(row["tags"]) if row["tags"] else None,
1400
+ "importance": row["importance"],
1401
+ "created_at": row["created_at"],
1402
+ "metadata": json.loads(row["metadata"]) if row["metadata"] else {}
1403
+ })
1404
+
1405
+ # Sort by similarity * importance for better ranking
1406
+ results.sort(key=lambda x: x["similarity"] * (x["importance"] / 10), reverse=True)
1407
+
1408
+ # Update last_accessed for returned results
1409
+ if results:
1410
+ ids = [r["id"] for r in results[:limit]]
1411
+ cursor.execute(
1412
+ f"UPDATE memories SET last_accessed = datetime('now') WHERE id IN ({','.join('?' * len(ids))})",
1413
+ ids
1414
+ )
1415
+ self.conn.commit()
1416
+
1417
+ return results[:limit]
1418
+
1419
+ # Fallback to numpy-based search (original implementation)
1420
+ query = """
1421
+ SELECT id, type, content, embedding, metadata,
1422
+ project_path, project_name, project_type, tech_stack,
1423
+ session_id, chat_id, agent_type, skill_used, tools_used,
1424
+ outcome, success, tags, importance, created_at
1425
+ FROM memories WHERE 1=1
1426
+ """
1427
+ params = []
1428
+
1429
+ if memory_type:
1430
+ query += " AND type = ?"
1431
+ params.append(memory_type)
1432
+ if session_id:
1433
+ query += " AND session_id = ?"
1434
+ params.append(session_id)
1435
+ if project_path:
1436
+ query += " AND project_path = ?"
1437
+ params.append(project_path)
1438
+ if agent_type:
1439
+ query += " AND agent_type = ?"
1440
+ params.append(agent_type)
1441
+ if success_only:
1442
+ query += " AND success = 1"
1443
+
1444
+ cursor.execute(query, params)
1445
+ rows = cursor.fetchall()
1446
+
1447
+ results = []
1448
+ for row in rows:
1449
+ stored_embedding = self._deserialize_embedding(row["embedding"])
1450
+ if stored_embedding:
1451
+ similarity = self._cosine_similarity(embedding, stored_embedding)
1452
+ if similarity >= threshold:
1453
+ results.append({
1454
+ "id": row["id"],
1455
+ "type": row["type"],
1456
+ "content": row["content"],
1457
+ "similarity": similarity,
1458
+ "search_method": "numpy",
1459
+ "project": {
1460
+ "path": row["project_path"],
1461
+ "name": row["project_name"],
1462
+ "type": row["project_type"],
1463
+ "tech_stack": json.loads(row["tech_stack"]) if row["tech_stack"] else None
1464
+ },
1465
+ "session_id": row["session_id"],
1466
+ "agent": {
1467
+ "type": row["agent_type"],
1468
+ "skill": row["skill_used"],
1469
+ "tools": json.loads(row["tools_used"]) if row["tools_used"] else None
1470
+ },
1471
+ "outcome": row["outcome"],
1472
+ "success": bool(row["success"]) if row["success"] is not None else None,
1473
+ "tags": json.loads(row["tags"]) if row["tags"] else None,
1474
+ "importance": row["importance"],
1475
+ "created_at": row["created_at"],
1476
+ "metadata": json.loads(row["metadata"]) if row["metadata"] else {}
1477
+ })
1478
+
1479
+ # Sort by similarity * importance for better ranking
1480
+ results.sort(key=lambda x: x["similarity"] * (x["importance"] / 10), reverse=True)
1481
+
1482
+ # Update last_accessed for returned results
1483
+ if results:
1484
+ ids = [r["id"] for r in results[:limit]]
1485
+ cursor.execute(
1486
+ f"UPDATE memories SET last_accessed = datetime('now') WHERE id IN ({','.join('?' * len(ids))})",
1487
+ ids
1488
+ )
1489
+ self.conn.commit()
1490
+
1491
+ return results[:limit]
1492
+
1493
+ async def keyword_search(
1494
+ self,
1495
+ query: str,
1496
+ limit: int = 10,
1497
+ memory_type: Optional[str] = None,
1498
+ session_id: Optional[str] = None,
1499
+ project_path: Optional[str] = None,
1500
+ agent_type: Optional[str] = None,
1501
+ success_only: bool = False
1502
+ ) -> List[Dict[str, Any]]:
1503
+ """Fallback keyword search when embeddings are unavailable.
1504
+
1505
+ Uses SQLite FTS-like matching with LIKE queries on content.
1506
+ Results are ranked by keyword match count and importance.
1507
+ """
1508
+ # Normalize project path for consistent matching
1509
+ project_path = normalize_path(project_path)
1510
+
1511
+ cursor = self.conn.cursor()
1512
+
1513
+ # Extract keywords from query (simple tokenization)
1514
+ keywords = [k.strip().lower() for k in query.split() if len(k.strip()) >= 3]
1515
+ if not keywords:
1516
+ keywords = [query.lower()]
1517
+
1518
+ # Build query with keyword matching
1519
+ sql = """
1520
+ SELECT id, type, content, metadata,
1521
+ project_path, project_name, project_type, tech_stack,
1522
+ session_id, chat_id, agent_type, skill_used, tools_used,
1523
+ outcome, success, tags, importance, created_at
1524
+ FROM memories WHERE 1=1
1525
+ """
1526
+ params = []
1527
+
1528
+ if memory_type:
1529
+ sql += " AND type = ?"
1530
+ params.append(memory_type)
1531
+ if session_id:
1532
+ sql += " AND session_id = ?"
1533
+ params.append(session_id)
1534
+ if project_path:
1535
+ sql += " AND project_path = ?"
1536
+ params.append(project_path)
1537
+ if agent_type:
1538
+ sql += " AND agent_type = ?"
1539
+ params.append(agent_type)
1540
+ if success_only:
1541
+ sql += " AND success = 1"
1542
+
1543
+ # Add keyword conditions (OR matching)
1544
+ keyword_conditions = []
1545
+ for kw in keywords:
1546
+ keyword_conditions.append("LOWER(content) LIKE ?")
1547
+ params.append(f"%{kw}%")
1548
+
1549
+ if keyword_conditions:
1550
+ sql += f" AND ({' OR '.join(keyword_conditions)})"
1551
+
1552
+ sql += " ORDER BY importance DESC, created_at DESC"
1553
+ sql += f" LIMIT {limit * 3}" # Get more for ranking
1554
+
1555
+ cursor.execute(sql, params)
1556
+ rows = cursor.fetchall()
1557
+
1558
+ results = []
1559
+ for row in rows:
1560
+ content_lower = row["content"].lower()
1561
+ # Calculate keyword match score
1562
+ match_count = sum(1 for kw in keywords if kw in content_lower)
1563
+ keyword_score = match_count / len(keywords) if keywords else 0
1564
+
1565
+ results.append({
1566
+ "id": row["id"],
1567
+ "type": row["type"],
1568
+ "content": row["content"],
1569
+ "similarity": keyword_score, # Use keyword score as pseudo-similarity
1570
+ "match_type": "keyword",
1571
+ "keywords_matched": match_count,
1572
+ "project": {
1573
+ "path": row["project_path"],
1574
+ "name": row["project_name"],
1575
+ "type": row["project_type"],
1576
+ "tech_stack": json.loads(row["tech_stack"]) if row["tech_stack"] else None
1577
+ },
1578
+ "session_id": row["session_id"],
1579
+ "agent": {
1580
+ "type": row["agent_type"],
1581
+ "skill": row["skill_used"],
1582
+ "tools": json.loads(row["tools_used"]) if row["tools_used"] else None
1583
+ },
1584
+ "outcome": row["outcome"],
1585
+ "success": bool(row["success"]) if row["success"] is not None else None,
1586
+ "tags": json.loads(row["tags"]) if row["tags"] else None,
1587
+ "importance": row["importance"],
1588
+ "created_at": row["created_at"],
1589
+ "metadata": json.loads(row["metadata"]) if row["metadata"] else {}
1590
+ })
1591
+
1592
+ # Sort by keyword score * importance
1593
+ results.sort(key=lambda x: x["similarity"] * (x["importance"] / 10), reverse=True)
1594
+
1595
+ # Update last_accessed for returned results
1596
+ if results:
1597
+ ids = [r["id"] for r in results[:limit]]
1598
+ cursor.execute(
1599
+ f"UPDATE memories SET last_accessed = datetime('now') WHERE id IN ({','.join('?' * len(ids))})",
1600
+ ids
1601
+ )
1602
+ self.conn.commit()
1603
+
1604
+ return results[:limit]
1605
+
1606
+ async def store_project(
1607
+ self,
1608
+ path: str,
1609
+ name: Optional[str] = None,
1610
+ project_type: Optional[str] = None,
1611
+ tech_stack: Optional[List[str]] = None,
1612
+ conventions: Optional[Dict[str, Any]] = None,
1613
+ preferences: Optional[Dict[str, Any]] = None
1614
+ ) -> int:
1615
+ """Store or update project information."""
1616
+ # Normalize path to prevent duplicates
1617
+ path = normalize_path(path)
1618
+
1619
+ cursor = self.conn.cursor()
1620
+ cursor.execute(
1621
+ """
1622
+ INSERT INTO projects (path, name, type, tech_stack, conventions, preferences)
1623
+ VALUES (?, ?, ?, ?, ?, ?)
1624
+ ON CONFLICT(path) DO UPDATE SET
1625
+ name = excluded.name,
1626
+ type = excluded.type,
1627
+ tech_stack = excluded.tech_stack,
1628
+ conventions = excluded.conventions,
1629
+ preferences = excluded.preferences,
1630
+ updated_at = datetime('now')
1631
+ """,
1632
+ (
1633
+ path,
1634
+ name,
1635
+ project_type,
1636
+ json.dumps(tech_stack) if tech_stack else None,
1637
+ json.dumps(conventions) if conventions else None,
1638
+ json.dumps(preferences) if preferences else None
1639
+ )
1640
+ )
1641
+ self.conn.commit()
1642
+ return cursor.lastrowid
1643
+
1644
+ async def get_project(self, path: str) -> Optional[Dict[str, Any]]:
1645
+ """Get project information."""
1646
+ # Normalize path for consistent matching
1647
+ path = normalize_path(path)
1648
+
1649
+ cursor = self.conn.cursor()
1650
+ cursor.execute("SELECT * FROM projects WHERE path = ?", (path,))
1651
+ row = cursor.fetchone()
1652
+ if row:
1653
+ return {
1654
+ "id": row["id"],
1655
+ "path": row["path"],
1656
+ "name": row["name"],
1657
+ "type": row["type"],
1658
+ "tech_stack": json.loads(row["tech_stack"]) if row["tech_stack"] else None,
1659
+ "conventions": json.loads(row["conventions"]) if row["conventions"] else None,
1660
+ "preferences": json.loads(row["preferences"]) if row["preferences"] else None
1661
+ }
1662
+ return None
1663
+
1664
+ async def store_pattern(
1665
+ self,
1666
+ name: str,
1667
+ solution: str,
1668
+ embedding: List[float],
1669
+ problem_type: Optional[str] = None,
1670
+ tech_context: Optional[List[str]] = None,
1671
+ metadata: Optional[Dict[str, Any]] = None
1672
+ ) -> int:
1673
+ """Store a reusable pattern/solution."""
1674
+ cursor = self.conn.cursor()
1675
+ cursor.execute(
1676
+ """
1677
+ INSERT INTO patterns (name, problem_type, solution, embedding, tech_context, metadata)
1678
+ VALUES (?, ?, ?, ?, ?, ?)
1679
+ """,
1680
+ (
1681
+ name,
1682
+ problem_type,
1683
+ solution,
1684
+ self._serialize_embedding(embedding),
1685
+ json.dumps(tech_context) if tech_context else None,
1686
+ json.dumps(metadata or {})
1687
+ )
1688
+ )
1689
+ self.conn.commit()
1690
+ return cursor.lastrowid
1691
+
1692
+ async def search_patterns(
1693
+ self,
1694
+ embedding: List[float],
1695
+ limit: int = 5,
1696
+ problem_type: Optional[str] = None,
1697
+ threshold: float = 0.5
1698
+ ) -> List[Dict[str, Any]]:
1699
+ """Search for similar patterns."""
1700
+ cursor = self.conn.cursor()
1701
+
1702
+ query = "SELECT * FROM patterns WHERE 1=1"
1703
+ params = []
1704
+ if problem_type:
1705
+ query += " AND problem_type = ?"
1706
+ params.append(problem_type)
1707
+
1708
+ cursor.execute(query, params)
1709
+ rows = cursor.fetchall()
1710
+
1711
+ results = []
1712
+ for row in rows:
1713
+ stored_embedding = self._deserialize_embedding(row["embedding"])
1714
+ if stored_embedding:
1715
+ similarity = self._cosine_similarity(embedding, stored_embedding)
1716
+ if similarity >= threshold:
1717
+ # Weight by success rate
1718
+ total = row["success_count"] + row["failure_count"]
1719
+ success_rate = row["success_count"] / total if total > 0 else 0.5
1720
+
1721
+ results.append({
1722
+ "id": row["id"],
1723
+ "name": row["name"],
1724
+ "problem_type": row["problem_type"],
1725
+ "solution": row["solution"],
1726
+ "tech_context": json.loads(row["tech_context"]) if row["tech_context"] else None,
1727
+ "similarity": similarity,
1728
+ "success_rate": success_rate,
1729
+ "score": similarity * success_rate
1730
+ })
1731
+
1732
+ results.sort(key=lambda x: x["score"], reverse=True)
1733
+ return results[:limit]
1734
+
1735
+ async def keyword_search_patterns(
1736
+ self,
1737
+ query: str,
1738
+ limit: int = 5,
1739
+ problem_type: Optional[str] = None
1740
+ ) -> List[Dict[str, Any]]:
1741
+ """Fallback keyword search for patterns when embeddings unavailable."""
1742
+ cursor = self.conn.cursor()
1743
+
1744
+ # Extract keywords from query
1745
+ keywords = [k.strip().lower() for k in query.split() if len(k.strip()) >= 3]
1746
+ if not keywords:
1747
+ keywords = [query.lower()]
1748
+
1749
+ sql = "SELECT * FROM patterns WHERE 1=1"
1750
+ params = []
1751
+
1752
+ if problem_type:
1753
+ sql += " AND problem_type = ?"
1754
+ params.append(problem_type)
1755
+
1756
+ # Add keyword conditions
1757
+ keyword_conditions = []
1758
+ for kw in keywords:
1759
+ keyword_conditions.append("(LOWER(name) LIKE ? OR LOWER(solution) LIKE ?)")
1760
+ params.append(f"%{kw}%")
1761
+ params.append(f"%{kw}%")
1762
+
1763
+ if keyword_conditions:
1764
+ sql += f" AND ({' OR '.join(keyword_conditions)})"
1765
+
1766
+ sql += " ORDER BY success_count DESC, created_at DESC"
1767
+ sql += f" LIMIT {limit * 2}"
1768
+
1769
+ cursor.execute(sql, params)
1770
+ rows = cursor.fetchall()
1771
+
1772
+ results = []
1773
+ for row in rows:
1774
+ # Calculate keyword match score
1775
+ combined_text = f"{row['name']} {row['solution']}".lower()
1776
+ match_count = sum(1 for kw in keywords if kw in combined_text)
1777
+ keyword_score = match_count / len(keywords) if keywords else 0
1778
+
1779
+ total = row["success_count"] + row["failure_count"]
1780
+ success_rate = row["success_count"] / total if total > 0 else 0.5
1781
+
1782
+ results.append({
1783
+ "id": row["id"],
1784
+ "name": row["name"],
1785
+ "problem_type": row["problem_type"],
1786
+ "solution": row["solution"],
1787
+ "tech_context": json.loads(row["tech_context"]) if row["tech_context"] else None,
1788
+ "similarity": keyword_score,
1789
+ "match_type": "keyword",
1790
+ "keywords_matched": match_count,
1791
+ "success_rate": success_rate,
1792
+ "score": keyword_score * success_rate
1793
+ })
1794
+
1795
+ results.sort(key=lambda x: x["score"], reverse=True)
1796
+ return results[:limit]
1797
+
1798
+ async def update_pattern_outcome(self, pattern_id: int, success: bool):
1799
+ """Update pattern success/failure count."""
1800
+ cursor = self.conn.cursor()
1801
+ if success:
1802
+ cursor.execute("UPDATE patterns SET success_count = success_count + 1 WHERE id = ?", (pattern_id,))
1803
+ else:
1804
+ cursor.execute("UPDATE patterns SET failure_count = failure_count + 1 WHERE id = ?", (pattern_id,))
1805
+ self.conn.commit()
1806
+
1807
+ async def get_memory(self, memory_id: int) -> Optional[Dict[str, Any]]:
1808
+ """Retrieve a specific memory by ID."""
1809
+ cursor = self.conn.cursor()
1810
+ cursor.execute("SELECT * FROM memories WHERE id = ?", (memory_id,))
1811
+ row = cursor.fetchone()
1812
+ if row:
1813
+ return {
1814
+ "id": row["id"],
1815
+ "type": row["type"],
1816
+ "content": row["content"],
1817
+ "project": {
1818
+ "path": row["project_path"],
1819
+ "name": row["project_name"],
1820
+ "type": row["project_type"]
1821
+ },
1822
+ "session_id": row["session_id"],
1823
+ "agent_type": row["agent_type"],
1824
+ "skill_used": row["skill_used"],
1825
+ "outcome": row["outcome"],
1826
+ "success": bool(row["success"]) if row["success"] is not None else None,
1827
+ "importance": row["importance"],
1828
+ "created_at": row["created_at"],
1829
+ "metadata": json.loads(row["metadata"]) if row["metadata"] else {}
1830
+ }
1831
+ return None
1832
+
1833
+ async def get_memories_by_type(
1834
+ self,
1835
+ memory_type: str,
1836
+ limit: int = 50,
1837
+ session_id: Optional[str] = None,
1838
+ project_path: Optional[str] = None
1839
+ ) -> List[Dict[str, Any]]:
1840
+ """Retrieve memories by type."""
1841
+ # Normalize project path for consistent matching
1842
+ project_path = normalize_path(project_path)
1843
+
1844
+ cursor = self.conn.cursor()
1845
+
1846
+ query = "SELECT * FROM memories WHERE type = ?"
1847
+ params = [memory_type]
1848
+
1849
+ if session_id:
1850
+ query += " AND session_id = ?"
1851
+ params.append(session_id)
1852
+ if project_path:
1853
+ query += " AND project_path = ?"
1854
+ params.append(project_path)
1855
+
1856
+ query += " ORDER BY importance DESC, created_at DESC LIMIT ?"
1857
+ params.append(limit)
1858
+
1859
+ cursor.execute(query, params)
1860
+ rows = cursor.fetchall()
1861
+
1862
+ return [
1863
+ {
1864
+ "id": row["id"],
1865
+ "type": row["type"],
1866
+ "content": row["content"],
1867
+ "project_path": row["project_path"],
1868
+ "session_id": row["session_id"],
1869
+ "importance": row["importance"],
1870
+ "created_at": row["created_at"]
1871
+ }
1872
+ for row in rows
1873
+ ]
1874
+
1875
+ async def delete_memory(self, memory_id: int) -> bool:
1876
+ cursor = self.conn.cursor()
1877
+ cursor.execute("DELETE FROM memories WHERE id = ?", (memory_id,))
1878
+ self.conn.commit()
1879
+ return cursor.rowcount > 0
1880
+
1881
+ async def get_stats(self) -> Dict[str, Any]:
1882
+ """Get comprehensive memory statistics."""
1883
+ cursor = self.conn.cursor()
1884
+
1885
+ cursor.execute("SELECT COUNT(*) as total FROM memories")
1886
+ total = cursor.fetchone()["total"]
1887
+
1888
+ cursor.execute("SELECT type, COUNT(*) as count FROM memories GROUP BY type")
1889
+ by_type = {row["type"]: row["count"] for row in cursor.fetchall()}
1890
+
1891
+ cursor.execute("SELECT project_path, COUNT(*) as count FROM memories WHERE project_path IS NOT NULL GROUP BY project_path")
1892
+ by_project = {row["project_path"]: row["count"] for row in cursor.fetchall()}
1893
+
1894
+ cursor.execute("SELECT agent_type, COUNT(*) as count FROM memories WHERE agent_type IS NOT NULL GROUP BY agent_type")
1895
+ by_agent = {row["agent_type"]: row["count"] for row in cursor.fetchall()}
1896
+
1897
+ cursor.execute("SELECT COUNT(*) as count FROM patterns")
1898
+ patterns_count = cursor.fetchone()["count"]
1899
+
1900
+ cursor.execute("SELECT COUNT(*) as count FROM projects")
1901
+ projects_count = cursor.fetchone()["count"]
1902
+
1903
+ return {
1904
+ "total_memories": total,
1905
+ "by_type": by_type,
1906
+ "by_project": by_project,
1907
+ "by_agent": by_agent,
1908
+ "patterns_count": patterns_count,
1909
+ "projects_count": projects_count,
1910
+ "database": self.db_path
1911
+ }
1912
+
1913
+ # ============================================================
1914
+ # TIMELINE METHODS
1915
+ # ============================================================
1916
+
1917
+ async def get_next_sequence_num(self, session_id: str) -> int:
1918
+ """Get the next sequence number for a session."""
1919
+ cursor = self.conn.cursor()
1920
+ cursor.execute(
1921
+ "SELECT MAX(sequence_num) as max_seq FROM timeline_events WHERE session_id = ?",
1922
+ (session_id,)
1923
+ )
1924
+ row = cursor.fetchone()
1925
+ return (row["max_seq"] or 0) + 1
1926
+
1927
+ async def store_timeline_event(
1928
+ self,
1929
+ session_id: str,
1930
+ event_type: str,
1931
+ summary: str,
1932
+ details: Optional[str] = None,
1933
+ embedding: Optional[List[float]] = None,
1934
+ project_path: Optional[str] = None,
1935
+ parent_event_id: Optional[int] = None,
1936
+ root_event_id: Optional[int] = None,
1937
+ entities: Optional[Dict[str, List[str]]] = None,
1938
+ status: str = "completed",
1939
+ outcome: Optional[str] = None,
1940
+ confidence: Optional[float] = None,
1941
+ is_anchor: bool = False
1942
+ ) -> int:
1943
+ """Store a timeline event."""
1944
+ # Normalize project path to prevent duplicates
1945
+ project_path = normalize_path(project_path)
1946
+
1947
+ cursor = self.conn.cursor()
1948
+
1949
+ # Get next sequence number
1950
+ sequence_num = await self.get_next_sequence_num(session_id)
1951
+
1952
+ cursor.execute(
1953
+ """
1954
+ INSERT INTO timeline_events (
1955
+ session_id, project_path, event_type, sequence_num,
1956
+ summary, details, embedding,
1957
+ parent_event_id, root_event_id, entities,
1958
+ status, outcome, confidence, is_anchor
1959
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1960
+ """,
1961
+ (
1962
+ session_id,
1963
+ project_path,
1964
+ event_type,
1965
+ sequence_num,
1966
+ summary[:200] if summary else "", # Limit summary length
1967
+ details,
1968
+ self._serialize_embedding(embedding) if embedding else None,
1969
+ parent_event_id,
1970
+ root_event_id,
1971
+ json.dumps(entities) if entities else None,
1972
+ status,
1973
+ outcome,
1974
+ confidence,
1975
+ 1 if is_anchor else 0
1976
+ )
1977
+ )
1978
+ self.conn.commit()
1979
+
1980
+ # Update session state events counter
1981
+ await self._increment_events_since_checkpoint(session_id)
1982
+
1983
+ return cursor.lastrowid
1984
+
1985
+ async def get_timeline_events(
1986
+ self,
1987
+ session_id: str,
1988
+ limit: int = 20,
1989
+ event_type: Optional[str] = None,
1990
+ since_event_id: Optional[int] = None,
1991
+ anchors_only: bool = False
1992
+ ) -> List[Dict[str, Any]]:
1993
+ """Get timeline events for a session."""
1994
+ cursor = self.conn.cursor()
1995
+
1996
+ query = "SELECT * FROM timeline_events WHERE session_id = ?"
1997
+ params = [session_id]
1998
+
1999
+ if event_type:
2000
+ query += " AND event_type = ?"
2001
+ params.append(event_type)
2002
+
2003
+ if since_event_id:
2004
+ query += " AND id > ?"
2005
+ params.append(since_event_id)
2006
+
2007
+ if anchors_only:
2008
+ query += " AND is_anchor = 1"
2009
+
2010
+ query += " ORDER BY sequence_num DESC LIMIT ?"
2011
+ params.append(limit)
2012
+
2013
+ cursor.execute(query, params)
2014
+ rows = cursor.fetchall()
2015
+
2016
+ return [
2017
+ {
2018
+ "id": row["id"],
2019
+ "session_id": row["session_id"],
2020
+ "event_type": row["event_type"],
2021
+ "sequence_num": row["sequence_num"],
2022
+ "summary": row["summary"],
2023
+ "details": row["details"],
2024
+ "parent_event_id": row["parent_event_id"],
2025
+ "root_event_id": row["root_event_id"],
2026
+ "entities": json.loads(row["entities"]) if row["entities"] else None,
2027
+ "status": row["status"],
2028
+ "outcome": row["outcome"],
2029
+ "confidence": row["confidence"],
2030
+ "is_anchor": bool(row["is_anchor"]),
2031
+ "created_at": row["created_at"]
2032
+ }
2033
+ for row in rows
2034
+ ]
2035
+
2036
+ async def search_timeline_events(
2037
+ self,
2038
+ embedding: List[float],
2039
+ session_id: Optional[str] = None,
2040
+ limit: int = 10,
2041
+ threshold: float = 0.5
2042
+ ) -> List[Dict[str, Any]]:
2043
+ """Semantic search across timeline events."""
2044
+ cursor = self.conn.cursor()
2045
+
2046
+ query = "SELECT * FROM timeline_events WHERE embedding IS NOT NULL"
2047
+ params = []
2048
+
2049
+ if session_id:
2050
+ query += " AND session_id = ?"
2051
+ params.append(session_id)
2052
+
2053
+ cursor.execute(query, params)
2054
+ rows = cursor.fetchall()
2055
+
2056
+ results = []
2057
+ for row in rows:
2058
+ stored_embedding = self._deserialize_embedding(row["embedding"])
2059
+ if stored_embedding:
2060
+ similarity = self._cosine_similarity(embedding, stored_embedding)
2061
+ if similarity >= threshold:
2062
+ results.append({
2063
+ "id": row["id"],
2064
+ "session_id": row["session_id"],
2065
+ "event_type": row["event_type"],
2066
+ "sequence_num": row["sequence_num"],
2067
+ "summary": row["summary"],
2068
+ "details": row["details"],
2069
+ "similarity": similarity,
2070
+ "is_anchor": bool(row["is_anchor"]),
2071
+ "created_at": row["created_at"]
2072
+ })
2073
+
2074
+ results.sort(key=lambda x: x["similarity"], reverse=True)
2075
+ return results[:limit]
2076
+
2077
+ # ============================================================
2078
+ # SESSION STATE METHODS
2079
+ # ============================================================
2080
+
2081
+ async def get_or_create_session_state(
2082
+ self,
2083
+ session_id: str,
2084
+ project_path: Optional[str] = None
2085
+ ) -> Dict[str, Any]:
2086
+ """Get or create session state."""
2087
+ # Normalize project path to prevent duplicates
2088
+ project_path = normalize_path(project_path)
2089
+
2090
+ cursor = self.conn.cursor()
2091
+
2092
+ cursor.execute("SELECT * FROM session_state WHERE session_id = ?", (session_id,))
2093
+ row = cursor.fetchone()
2094
+
2095
+ if row:
2096
+ return {
2097
+ "id": row["id"],
2098
+ "session_id": row["session_id"],
2099
+ "project_path": row["project_path"],
2100
+ "current_goal": row["current_goal"],
2101
+ "pending_questions": json.loads(row["pending_questions"]) if row["pending_questions"] else [],
2102
+ "entity_registry": json.loads(row["entity_registry"]) if row["entity_registry"] else {},
2103
+ "decisions_summary": row["decisions_summary"],
2104
+ "last_checkpoint_id": row["last_checkpoint_id"],
2105
+ "events_since_checkpoint": row["events_since_checkpoint"],
2106
+ "created_at": row["created_at"],
2107
+ "updated_at": row["updated_at"],
2108
+ "last_activity_at": row["last_activity_at"]
2109
+ }
2110
+
2111
+ # Create new session state
2112
+ cursor.execute(
2113
+ """
2114
+ INSERT INTO session_state (session_id, project_path)
2115
+ VALUES (?, ?)
2116
+ """,
2117
+ (session_id, project_path)
2118
+ )
2119
+ self.conn.commit()
2120
+
2121
+ return {
2122
+ "id": cursor.lastrowid,
2123
+ "session_id": session_id,
2124
+ "project_path": project_path,
2125
+ "current_goal": None,
2126
+ "pending_questions": [],
2127
+ "entity_registry": {},
2128
+ "decisions_summary": None,
2129
+ "last_checkpoint_id": None,
2130
+ "events_since_checkpoint": 0,
2131
+ "created_at": datetime.now().isoformat(),
2132
+ "updated_at": datetime.now().isoformat(),
2133
+ "last_activity_at": datetime.now().isoformat()
2134
+ }
2135
+
2136
+ async def update_session_state(
2137
+ self,
2138
+ session_id: str,
2139
+ current_goal: Optional[str] = None,
2140
+ pending_questions: Optional[List[str]] = None,
2141
+ entity_registry: Optional[Dict[str, str]] = None,
2142
+ decisions_summary: Optional[str] = None,
2143
+ last_checkpoint_id: Optional[int] = None,
2144
+ reset_events_counter: bool = False
2145
+ ) -> bool:
2146
+ """Update session state fields."""
2147
+ cursor = self.conn.cursor()
2148
+
2149
+ # Build dynamic update
2150
+ updates = ["updated_at = datetime('now')", "last_activity_at = datetime('now')"]
2151
+ params = []
2152
+
2153
+ if current_goal is not None:
2154
+ updates.append("current_goal = ?")
2155
+ params.append(current_goal)
2156
+
2157
+ if pending_questions is not None:
2158
+ updates.append("pending_questions = ?")
2159
+ params.append(json.dumps(pending_questions))
2160
+
2161
+ if entity_registry is not None:
2162
+ updates.append("entity_registry = ?")
2163
+ params.append(json.dumps(entity_registry))
2164
+
2165
+ if decisions_summary is not None:
2166
+ updates.append("decisions_summary = ?")
2167
+ params.append(decisions_summary)
2168
+
2169
+ if last_checkpoint_id is not None:
2170
+ updates.append("last_checkpoint_id = ?")
2171
+ params.append(last_checkpoint_id)
2172
+
2173
+ if reset_events_counter:
2174
+ updates.append("events_since_checkpoint = 0")
2175
+
2176
+ params.append(session_id)
2177
+
2178
+ cursor.execute(
2179
+ f"UPDATE session_state SET {', '.join(updates)} WHERE session_id = ?",
2180
+ params
2181
+ )
2182
+ self.conn.commit()
2183
+ return cursor.rowcount > 0
2184
+
2185
+ async def _increment_events_since_checkpoint(self, session_id: str):
2186
+ """Increment the events counter for a session."""
2187
+ cursor = self.conn.cursor()
2188
+ cursor.execute(
2189
+ """
2190
+ UPDATE session_state
2191
+ SET events_since_checkpoint = events_since_checkpoint + 1,
2192
+ last_activity_at = datetime('now')
2193
+ WHERE session_id = ?
2194
+ """,
2195
+ (session_id,)
2196
+ )
2197
+ self.conn.commit()
2198
+
2199
+ async def get_latest_session_for_project(
2200
+ self,
2201
+ project_path: str
2202
+ ) -> Optional[Dict[str, Any]]:
2203
+ """Get the most recent session state for a project."""
2204
+ # Normalize project path for consistent matching
2205
+ project_path = normalize_path(project_path)
2206
+
2207
+ cursor = self.conn.cursor()
2208
+ cursor.execute(
2209
+ """
2210
+ SELECT * FROM session_state
2211
+ WHERE project_path = ?
2212
+ ORDER BY last_activity_at DESC
2213
+ LIMIT 1
2214
+ """,
2215
+ (project_path,)
2216
+ )
2217
+ row = cursor.fetchone()
2218
+
2219
+ if row:
2220
+ return {
2221
+ "id": row["id"],
2222
+ "session_id": row["session_id"],
2223
+ "project_path": row["project_path"],
2224
+ "current_goal": row["current_goal"],
2225
+ "pending_questions": json.loads(row["pending_questions"]) if row["pending_questions"] else [],
2226
+ "entity_registry": json.loads(row["entity_registry"]) if row["entity_registry"] else {},
2227
+ "decisions_summary": row["decisions_summary"],
2228
+ "last_checkpoint_id": row["last_checkpoint_id"],
2229
+ "events_since_checkpoint": row["events_since_checkpoint"],
2230
+ "last_activity_at": row["last_activity_at"]
2231
+ }
2232
+ return None
2233
+
2234
+ # ============================================================
2235
+ # CHECKPOINT METHODS
2236
+ # ============================================================
2237
+
2238
+ async def store_checkpoint(
2239
+ self,
2240
+ session_id: str,
2241
+ summary: str,
2242
+ event_id: Optional[int] = None,
2243
+ key_facts: Optional[List[str]] = None,
2244
+ decisions: Optional[List[str]] = None,
2245
+ entities: Optional[Dict[str, str]] = None,
2246
+ current_goal: Optional[str] = None,
2247
+ pending_items: Optional[List[str]] = None,
2248
+ embedding: Optional[List[float]] = None,
2249
+ event_count: Optional[int] = None
2250
+ ) -> int:
2251
+ """Store a checkpoint."""
2252
+ cursor = self.conn.cursor()
2253
+
2254
+ cursor.execute(
2255
+ """
2256
+ INSERT INTO checkpoints (
2257
+ session_id, event_id, summary, key_facts, decisions,
2258
+ entities, current_goal, pending_items, embedding, event_count
2259
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2260
+ """,
2261
+ (
2262
+ session_id,
2263
+ event_id,
2264
+ summary,
2265
+ json.dumps(key_facts) if key_facts else None,
2266
+ json.dumps(decisions) if decisions else None,
2267
+ json.dumps(entities) if entities else None,
2268
+ current_goal,
2269
+ json.dumps(pending_items) if pending_items else None,
2270
+ self._serialize_embedding(embedding) if embedding else None,
2271
+ event_count
2272
+ )
2273
+ )
2274
+ self.conn.commit()
2275
+
2276
+ checkpoint_id = cursor.lastrowid
2277
+
2278
+ # Update session state with new checkpoint
2279
+ await self.update_session_state(
2280
+ session_id,
2281
+ last_checkpoint_id=checkpoint_id,
2282
+ reset_events_counter=True
2283
+ )
2284
+
2285
+ return checkpoint_id
2286
+
2287
+ async def get_latest_checkpoint(
2288
+ self,
2289
+ session_id: str
2290
+ ) -> Optional[Dict[str, Any]]:
2291
+ """Get the latest checkpoint for a session."""
2292
+ cursor = self.conn.cursor()
2293
+ cursor.execute(
2294
+ """
2295
+ SELECT * FROM checkpoints
2296
+ WHERE session_id = ?
2297
+ ORDER BY created_at DESC
2298
+ LIMIT 1
2299
+ """,
2300
+ (session_id,)
2301
+ )
2302
+ row = cursor.fetchone()
2303
+
2304
+ if row:
2305
+ return {
2306
+ "id": row["id"],
2307
+ "session_id": row["session_id"],
2308
+ "event_id": row["event_id"],
2309
+ "summary": row["summary"],
2310
+ "key_facts": json.loads(row["key_facts"]) if row["key_facts"] else [],
2311
+ "decisions": json.loads(row["decisions"]) if row["decisions"] else [],
2312
+ "entities": json.loads(row["entities"]) if row["entities"] else {},
2313
+ "current_goal": row["current_goal"],
2314
+ "pending_items": json.loads(row["pending_items"]) if row["pending_items"] else [],
2315
+ "event_count": row["event_count"],
2316
+ "created_at": row["created_at"]
2317
+ }
2318
+ return None
2319
+
2320
+ async def get_checkpoints_for_session(
2321
+ self,
2322
+ session_id: str,
2323
+ limit: int = 10
2324
+ ) -> List[Dict[str, Any]]:
2325
+ """Get all checkpoints for a session."""
2326
+ cursor = self.conn.cursor()
2327
+ cursor.execute(
2328
+ """
2329
+ SELECT * FROM checkpoints
2330
+ WHERE session_id = ?
2331
+ ORDER BY created_at DESC
2332
+ LIMIT ?
2333
+ """,
2334
+ (session_id, limit)
2335
+ )
2336
+
2337
+ return [
2338
+ {
2339
+ "id": row["id"],
2340
+ "session_id": row["session_id"],
2341
+ "summary": row["summary"],
2342
+ "current_goal": row["current_goal"],
2343
+ "event_count": row["event_count"],
2344
+ "created_at": row["created_at"]
2345
+ }
2346
+ for row in cursor.fetchall()
2347
+ ]
2348
+
2349
+ # ============================================================
2350
+ # GENERIC QUERY METHOD
2351
+ # ============================================================
2352
+
2353
+ @with_retry(max_retries=DB_MAX_RETRIES, base_delay=DB_RETRY_BASE_DELAY)
2354
+ async def execute_query(
2355
+ self,
2356
+ query: str,
2357
+ params: tuple = (),
2358
+ timeout: Optional[float] = None
2359
+ ) -> List[Dict[str, Any]]:
2360
+ """Execute a raw SQL query and return results as list of dicts.
2361
+
2362
+ Args:
2363
+ query: SQL query to execute
2364
+ params: Query parameters
2365
+ timeout: Optional query timeout in seconds (uses DB_TIMEOUT if not specified)
2366
+
2367
+ Returns:
2368
+ List of dictionaries representing rows
2369
+
2370
+ Raises:
2371
+ QueryTimeoutError: If query exceeds timeout
2372
+ RetryExhaustedError: If all retry attempts fail
2373
+ DatabaseError: For other database errors
2374
+ """
2375
+ effective_timeout = timeout or DB_TIMEOUT
2376
+
2377
+ try:
2378
+ with self.get_connection() as conn:
2379
+ cursor = conn.cursor()
2380
+
2381
+ # Set timeout for this query
2382
+ start_time = time.time()
2383
+
2384
+ cursor.execute(query, params)
2385
+ rows = cursor.fetchall()
2386
+
2387
+ # Check if query took too long (for logging/monitoring)
2388
+ elapsed = time.time() - start_time
2389
+ if elapsed > effective_timeout * 0.8:
2390
+ logger.warning(
2391
+ f"Slow query detected ({elapsed:.2f}s): {query[:100]}..."
2392
+ )
2393
+
2394
+ if not rows:
2395
+ return []
2396
+
2397
+ # Convert Row objects to dicts
2398
+ return [dict(row) for row in rows]
2399
+
2400
+ except sqlite3.OperationalError as e:
2401
+ error_str = str(e).lower()
2402
+ if "database is locked" in error_str or "busy" in error_str:
2403
+ logger.warning(f"Database busy/locked, will retry: {e}")
2404
+ raise # Let retry decorator handle it
2405
+ elif "unable to open database" in error_str:
2406
+ raise ConnectionPoolError(f"Cannot open database: {e}", original_error=e)
2407
+ else:
2408
+ raise DatabaseError(
2409
+ f"Query execution failed: {e}",
2410
+ error_code="DB_QUERY_ERROR",
2411
+ original_error=e
2412
+ )
2413
+ except sqlite3.IntegrityError as e:
2414
+ raise DatabaseError(
2415
+ f"Integrity constraint violation: {e}",
2416
+ error_code="DB_INTEGRITY_ERROR",
2417
+ original_error=e
2418
+ )
2419
+ except Exception as e:
2420
+ logger.error(f"Unexpected error executing query: {e}")
2421
+ raise DatabaseError(
2422
+ f"Unexpected database error: {e}",
2423
+ error_code="DB_UNKNOWN_ERROR",
2424
+ original_error=e
2425
+ )
2426
+
2427
+ async def execute_write(
2428
+ self,
2429
+ query: str,
2430
+ params: tuple = (),
2431
+ commit: bool = True
2432
+ ) -> int:
2433
+ """Execute a write query (INSERT, UPDATE, DELETE) with retry logic.
2434
+
2435
+ Args:
2436
+ query: SQL query to execute
2437
+ params: Query parameters
2438
+ commit: Whether to commit the transaction
2439
+
2440
+ Returns:
2441
+ Number of affected rows (or lastrowid for INSERT)
2442
+
2443
+ Raises:
2444
+ RetryExhaustedError: If all retry attempts fail
2445
+ DatabaseError: For other database errors
2446
+ """
2447
+ return await self._execute_write_with_retry(query, params, commit)
2448
+
2449
+ @with_retry(max_retries=DB_MAX_RETRIES, base_delay=DB_RETRY_BASE_DELAY)
2450
+ async def _execute_write_with_retry(
2451
+ self,
2452
+ query: str,
2453
+ params: tuple,
2454
+ commit: bool
2455
+ ) -> int:
2456
+ """Internal write execution with retry decorator."""
2457
+ try:
2458
+ with self.get_connection() as conn:
2459
+ cursor = conn.cursor()
2460
+ cursor.execute(query, params)
2461
+
2462
+ if commit:
2463
+ conn.commit()
2464
+
2465
+ # Return lastrowid for INSERT, rowcount for UPDATE/DELETE
2466
+ if query.strip().upper().startswith("INSERT"):
2467
+ return cursor.lastrowid
2468
+ return cursor.rowcount
2469
+
2470
+ except sqlite3.OperationalError as e:
2471
+ error_str = str(e).lower()
2472
+ if "database is locked" in error_str or "busy" in error_str:
2473
+ logger.warning(f"Database busy/locked during write, will retry: {e}")
2474
+ raise # Let retry decorator handle it
2475
+ raise DatabaseError(
2476
+ f"Write operation failed: {e}",
2477
+ error_code="DB_WRITE_ERROR",
2478
+ original_error=e
2479
+ )
2480
+ except sqlite3.IntegrityError as e:
2481
+ raise DatabaseError(
2482
+ f"Integrity constraint violation: {e}",
2483
+ error_code="DB_INTEGRITY_ERROR",
2484
+ original_error=e
2485
+ )