claude-memory-agent 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +107 -0
- package/README.md +200 -0
- package/agent_card.py +512 -0
- package/bin/cli.js +181 -0
- package/bin/postinstall.js +216 -0
- package/config.py +104 -0
- package/dashboard.html +2689 -0
- package/hooks/README.md +196 -0
- package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
- package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
- package/hooks/auto-detect-response.py +348 -0
- package/hooks/auto_capture.py +255 -0
- package/hooks/detect-correction.py +173 -0
- package/hooks/grounding-hook.py +348 -0
- package/hooks/log-tool-use.py +234 -0
- package/hooks/log-user-request.py +208 -0
- package/hooks/pre-tool-decision.py +218 -0
- package/hooks/problem-detector.py +343 -0
- package/hooks/session_end.py +192 -0
- package/hooks/session_start.py +227 -0
- package/install.py +887 -0
- package/main.py +2859 -0
- package/manager.py +997 -0
- package/package.json +55 -0
- package/requirements.txt +8 -0
- package/run_server.py +136 -0
- package/services/__init__.py +50 -0
- package/services/__pycache__/__init__.cpython-312.pyc +0 -0
- package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
- package/services/__pycache__/auth.cpython-312.pyc +0 -0
- package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
- package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
- package/services/__pycache__/confidence.cpython-312.pyc +0 -0
- package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
- package/services/__pycache__/database.cpython-312.pyc +0 -0
- package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
- package/services/__pycache__/insights.cpython-312.pyc +0 -0
- package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
- package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
- package/services/__pycache__/timeline.cpython-312.pyc +0 -0
- package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
- package/services/__pycache__/websocket.cpython-312.pyc +0 -0
- package/services/agent_registry.py +753 -0
- package/services/auth.py +331 -0
- package/services/auto_inject.py +250 -0
- package/services/claude_md_sync.py +275 -0
- package/services/cleanup.py +667 -0
- package/services/compaction_flush.py +447 -0
- package/services/confidence.py +301 -0
- package/services/daily_log.py +333 -0
- package/services/database.py +2485 -0
- package/services/embeddings.py +358 -0
- package/services/insights.py +632 -0
- package/services/llm_analyzer.py +595 -0
- package/services/memory_md_sync.py +409 -0
- package/services/retry_queue.py +453 -0
- package/services/timeline.py +579 -0
- package/services/vector_index.py +398 -0
- package/services/websocket.py +257 -0
- package/skills/__init__.py +6 -0
- package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/__pycache__/admin.cpython-312.pyc +0 -0
- package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
- package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
- package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
- package/skills/__pycache__/insights.cpython-312.pyc +0 -0
- package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
- package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
- package/skills/__pycache__/search.cpython-312.pyc +0 -0
- package/skills/__pycache__/state.cpython-312.pyc +0 -0
- package/skills/__pycache__/store.cpython-312.pyc +0 -0
- package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
- package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
- package/skills/__pycache__/verification.cpython-312.pyc +0 -0
- package/skills/admin.py +469 -0
- package/skills/checkpoint.py +198 -0
- package/skills/claude_md.py +363 -0
- package/skills/cleanup.py +241 -0
- package/skills/grounding.py +801 -0
- package/skills/insights.py +231 -0
- package/skills/natural_language.py +277 -0
- package/skills/retrieve.py +67 -0
- package/skills/search.py +213 -0
- package/skills/state.py +182 -0
- package/skills/store.py +179 -0
- package/skills/summarize.py +588 -0
- package/skills/timeline.py +387 -0
- package/skills/verification.py +391 -0
- package/start_daemon.py +155 -0
- package/test_automation.py +221 -0
- package/test_complete.py +338 -0
- package/test_full.py +322 -0
- package/update_system.py +817 -0
- package/verify_db.py +134 -0
|
@@ -0,0 +1,2485 @@
|
|
|
1
|
+
"""Database service using SQLite with FAISS vector indexing.
|
|
2
|
+
|
|
3
|
+
Uses FAISS for O(log n) similarity search when available,
|
|
4
|
+
falls back to numpy-based O(n) search otherwise.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Connection pooling for SQLite (thread-safe connections)
|
|
8
|
+
- Retry logic with exponential backoff for transient failures
|
|
9
|
+
- Query timeout handling
|
|
10
|
+
- Comprehensive error handling with logging
|
|
11
|
+
"""
|
|
12
|
+
import os
|
|
13
|
+
import json
|
|
14
|
+
import sqlite3
|
|
15
|
+
import numpy as np
|
|
16
|
+
import logging
|
|
17
|
+
import time
|
|
18
|
+
import threading
|
|
19
|
+
from queue import Queue, Empty
|
|
20
|
+
from functools import wraps
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from typing import List, Optional, Dict, Any, Tuple, Callable
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from contextlib import contextmanager
|
|
25
|
+
from dotenv import load_dotenv
|
|
26
|
+
|
|
27
|
+
load_dotenv()
|
|
28
|
+
|
|
29
|
+
# Configure logging
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
if not logger.handlers:
|
|
32
|
+
handler = logging.StreamHandler()
|
|
33
|
+
handler.setFormatter(logging.Formatter(
|
|
34
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
35
|
+
))
|
|
36
|
+
logger.addHandler(handler)
|
|
37
|
+
logger.setLevel(logging.INFO)
|
|
38
|
+
|
|
39
|
+
DB_PATH = os.getenv("DATABASE_PATH", str(Path(__file__).parent.parent / "memories.db"))
|
|
40
|
+
USE_VECTOR_INDEX = os.getenv("USE_VECTOR_INDEX", "true").lower() == "true"
|
|
41
|
+
|
|
42
|
+
# Connection pool settings
|
|
43
|
+
DB_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "5"))
|
|
44
|
+
DB_TIMEOUT = float(os.getenv("DB_TIMEOUT", "30.0")) # Query timeout in seconds
|
|
45
|
+
DB_MAX_RETRIES = int(os.getenv("DB_MAX_RETRIES", "3"))
|
|
46
|
+
DB_RETRY_BASE_DELAY = float(os.getenv("DB_RETRY_BASE_DELAY", "0.1")) # Base delay for exponential backoff
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Custom exceptions for structured error handling
|
|
50
|
+
class DatabaseError(Exception):
|
|
51
|
+
"""Base exception for database errors."""
|
|
52
|
+
def __init__(self, message: str, error_code: str, original_error: Optional[Exception] = None):
|
|
53
|
+
super().__init__(message)
|
|
54
|
+
self.error_code = error_code
|
|
55
|
+
self.original_error = original_error
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ConnectionPoolError(DatabaseError):
|
|
59
|
+
"""Error related to connection pool."""
|
|
60
|
+
def __init__(self, message: str, original_error: Optional[Exception] = None):
|
|
61
|
+
super().__init__(message, "DB_POOL_ERROR", original_error)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class QueryTimeoutError(DatabaseError):
|
|
65
|
+
"""Query execution timeout."""
|
|
66
|
+
def __init__(self, message: str, original_error: Optional[Exception] = None):
|
|
67
|
+
super().__init__(message, "DB_TIMEOUT", original_error)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class RetryExhaustedError(DatabaseError):
|
|
71
|
+
"""All retry attempts failed."""
|
|
72
|
+
def __init__(self, message: str, original_error: Optional[Exception] = None):
|
|
73
|
+
super().__init__(message, "DB_RETRY_EXHAUSTED", original_error)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class MigrationError(DatabaseError):
|
|
77
|
+
"""Database migration failed."""
|
|
78
|
+
def __init__(self, message: str, original_error: Optional[Exception] = None):
|
|
79
|
+
super().__init__(message, "DB_MIGRATION_ERROR", original_error)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class SQLiteConnectionPool:
|
|
83
|
+
"""Thread-safe connection pool for SQLite.
|
|
84
|
+
|
|
85
|
+
SQLite has limited connection pooling needs compared to client-server DBs,
|
|
86
|
+
but this provides:
|
|
87
|
+
- Thread-safe connection management
|
|
88
|
+
- Connection reuse to avoid repeated file opens
|
|
89
|
+
- Graceful connection lifecycle management
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
def __init__(self, db_path: str, pool_size: int = 5, timeout: float = 30.0):
|
|
93
|
+
self.db_path = db_path
|
|
94
|
+
self.pool_size = pool_size
|
|
95
|
+
self.timeout = timeout
|
|
96
|
+
self._pool: Queue = Queue(maxsize=pool_size)
|
|
97
|
+
self._lock = threading.Lock()
|
|
98
|
+
self._created_connections = 0
|
|
99
|
+
self._active_connections = 0
|
|
100
|
+
|
|
101
|
+
def _create_connection(self) -> sqlite3.Connection:
|
|
102
|
+
"""Create a new SQLite connection with optimal settings."""
|
|
103
|
+
conn = sqlite3.connect(
|
|
104
|
+
self.db_path,
|
|
105
|
+
timeout=self.timeout,
|
|
106
|
+
check_same_thread=False,
|
|
107
|
+
isolation_level=None # Autocommit mode for better concurrency
|
|
108
|
+
)
|
|
109
|
+
conn.row_factory = sqlite3.Row
|
|
110
|
+
# Enable WAL mode for better concurrent read/write performance
|
|
111
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
112
|
+
conn.execute("PRAGMA synchronous=NORMAL")
|
|
113
|
+
conn.execute("PRAGMA cache_size=-64000") # 64MB cache
|
|
114
|
+
conn.execute("PRAGMA busy_timeout=30000") # 30 second busy timeout
|
|
115
|
+
return conn
|
|
116
|
+
|
|
117
|
+
def get_connection(self) -> sqlite3.Connection:
|
|
118
|
+
"""Get a connection from the pool or create a new one."""
|
|
119
|
+
try:
|
|
120
|
+
# Try to get from pool (non-blocking first)
|
|
121
|
+
conn = self._pool.get_nowait()
|
|
122
|
+
self._active_connections += 1
|
|
123
|
+
return conn
|
|
124
|
+
except Empty:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
# Create new connection if pool not full
|
|
128
|
+
with self._lock:
|
|
129
|
+
if self._created_connections < self.pool_size:
|
|
130
|
+
conn = self._create_connection()
|
|
131
|
+
self._created_connections += 1
|
|
132
|
+
self._active_connections += 1
|
|
133
|
+
logger.debug(f"Created new connection (total: {self._created_connections})")
|
|
134
|
+
return conn
|
|
135
|
+
|
|
136
|
+
# Pool is full, wait for available connection
|
|
137
|
+
try:
|
|
138
|
+
conn = self._pool.get(timeout=self.timeout)
|
|
139
|
+
self._active_connections += 1
|
|
140
|
+
return conn
|
|
141
|
+
except Empty:
|
|
142
|
+
raise ConnectionPoolError(
|
|
143
|
+
f"Connection pool exhausted (size={self.pool_size}, timeout={self.timeout}s)"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def return_connection(self, conn: sqlite3.Connection):
|
|
147
|
+
"""Return a connection to the pool."""
|
|
148
|
+
if conn is None:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
self._active_connections -= 1
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
# Check if connection is still valid
|
|
155
|
+
conn.execute("SELECT 1")
|
|
156
|
+
self._pool.put_nowait(conn)
|
|
157
|
+
except (sqlite3.Error, sqlite3.ProgrammingError):
|
|
158
|
+
# Connection is bad, close it
|
|
159
|
+
try:
|
|
160
|
+
conn.close()
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
163
|
+
with self._lock:
|
|
164
|
+
self._created_connections -= 1
|
|
165
|
+
logger.warning("Closed invalid connection from pool")
|
|
166
|
+
|
|
167
|
+
def close_all(self):
|
|
168
|
+
"""Close all connections in the pool."""
|
|
169
|
+
with self._lock:
|
|
170
|
+
while not self._pool.empty():
|
|
171
|
+
try:
|
|
172
|
+
conn = self._pool.get_nowait()
|
|
173
|
+
conn.close()
|
|
174
|
+
except Empty:
|
|
175
|
+
break
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.warning(f"Error closing connection: {e}")
|
|
178
|
+
self._created_connections = 0
|
|
179
|
+
self._active_connections = 0
|
|
180
|
+
logger.info("Connection pool closed")
|
|
181
|
+
|
|
182
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
183
|
+
"""Get pool statistics."""
|
|
184
|
+
return {
|
|
185
|
+
"pool_size": self.pool_size,
|
|
186
|
+
"created_connections": self._created_connections,
|
|
187
|
+
"active_connections": self._active_connections,
|
|
188
|
+
"available_connections": self._pool.qsize(),
|
|
189
|
+
"timeout": self.timeout
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def with_retry(
|
|
194
|
+
max_retries: int = DB_MAX_RETRIES,
|
|
195
|
+
base_delay: float = DB_RETRY_BASE_DELAY,
|
|
196
|
+
retryable_errors: tuple = (sqlite3.OperationalError, sqlite3.DatabaseError)
|
|
197
|
+
):
|
|
198
|
+
"""Decorator for retry logic with exponential backoff.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
max_retries: Maximum number of retry attempts
|
|
202
|
+
base_delay: Base delay in seconds (will be multiplied exponentially)
|
|
203
|
+
retryable_errors: Tuple of exception types that should trigger retry
|
|
204
|
+
"""
|
|
205
|
+
def decorator(func: Callable):
|
|
206
|
+
@wraps(func)
|
|
207
|
+
async def async_wrapper(*args, **kwargs):
|
|
208
|
+
last_error = None
|
|
209
|
+
for attempt in range(max_retries + 1):
|
|
210
|
+
try:
|
|
211
|
+
return await func(*args, **kwargs)
|
|
212
|
+
except retryable_errors as e:
|
|
213
|
+
last_error = e
|
|
214
|
+
if attempt < max_retries:
|
|
215
|
+
delay = base_delay * (2 ** attempt) # Exponential backoff
|
|
216
|
+
logger.warning(
|
|
217
|
+
f"Retry {attempt + 1}/{max_retries} for {func.__name__} "
|
|
218
|
+
f"after {delay:.2f}s due to: {str(e)}"
|
|
219
|
+
)
|
|
220
|
+
time.sleep(delay)
|
|
221
|
+
else:
|
|
222
|
+
logger.error(
|
|
223
|
+
f"All {max_retries} retries exhausted for {func.__name__}: {str(e)}"
|
|
224
|
+
)
|
|
225
|
+
raise RetryExhaustedError(
|
|
226
|
+
f"Operation {func.__name__} failed after {max_retries} retries",
|
|
227
|
+
original_error=last_error
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
@wraps(func)
|
|
231
|
+
def sync_wrapper(*args, **kwargs):
|
|
232
|
+
last_error = None
|
|
233
|
+
for attempt in range(max_retries + 1):
|
|
234
|
+
try:
|
|
235
|
+
return func(*args, **kwargs)
|
|
236
|
+
except retryable_errors as e:
|
|
237
|
+
last_error = e
|
|
238
|
+
if attempt < max_retries:
|
|
239
|
+
delay = base_delay * (2 ** attempt)
|
|
240
|
+
logger.warning(
|
|
241
|
+
f"Retry {attempt + 1}/{max_retries} for {func.__name__} "
|
|
242
|
+
f"after {delay:.2f}s due to: {str(e)}"
|
|
243
|
+
)
|
|
244
|
+
time.sleep(delay)
|
|
245
|
+
else:
|
|
246
|
+
logger.error(
|
|
247
|
+
f"All {max_retries} retries exhausted for {func.__name__}: {str(e)}"
|
|
248
|
+
)
|
|
249
|
+
raise RetryExhaustedError(
|
|
250
|
+
f"Operation {func.__name__} failed after {max_retries} retries",
|
|
251
|
+
original_error=last_error
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Return appropriate wrapper based on function type
|
|
255
|
+
import asyncio
|
|
256
|
+
if asyncio.iscoroutinefunction(func):
|
|
257
|
+
return async_wrapper
|
|
258
|
+
return sync_wrapper
|
|
259
|
+
|
|
260
|
+
return decorator
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def normalize_path(path: str) -> str:
|
|
264
|
+
"""Normalize file paths to prevent duplicates from different separators.
|
|
265
|
+
|
|
266
|
+
Converts all paths to forward slashes (Unix-style) for consistency.
|
|
267
|
+
This prevents 'C:/foo' and 'C:\\foo' being treated as different projects.
|
|
268
|
+
Also normalizes Windows drive letters to uppercase for case-insensitive matching.
|
|
269
|
+
"""
|
|
270
|
+
if not path:
|
|
271
|
+
return path
|
|
272
|
+
# Convert to forward slashes and remove trailing slashes
|
|
273
|
+
normalized = path.replace("\\", "/").rstrip("/")
|
|
274
|
+
# Normalize Windows drive letter to uppercase (c: -> C:)
|
|
275
|
+
if len(normalized) >= 2 and normalized[1] == ':':
|
|
276
|
+
normalized = normalized[0].upper() + normalized[1:]
|
|
277
|
+
return normalized
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class DatabaseService:
|
|
281
|
+
"""Service for vector storage and retrieval using SQLite + FAISS.
|
|
282
|
+
|
|
283
|
+
Features:
|
|
284
|
+
- FAISS vector indexing for O(log n) similarity search
|
|
285
|
+
- Automatic index building on startup
|
|
286
|
+
- Incremental index updates on insert
|
|
287
|
+
- Fallback to numpy-based search if FAISS unavailable
|
|
288
|
+
- Connection pooling for thread-safe access
|
|
289
|
+
- Retry logic with exponential backoff
|
|
290
|
+
- Query timeout handling
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
def __init__(self):
|
|
294
|
+
self.db_path = DB_PATH
|
|
295
|
+
self.conn: Optional[sqlite3.Connection] = None
|
|
296
|
+
self._connection_pool: Optional[SQLiteConnectionPool] = None
|
|
297
|
+
|
|
298
|
+
# Vector indexes (lazy loaded)
|
|
299
|
+
self._memories_index = None
|
|
300
|
+
self._patterns_index = None
|
|
301
|
+
self._timeline_index = None
|
|
302
|
+
self._use_vector_index = USE_VECTOR_INDEX
|
|
303
|
+
self._index_initialized = False
|
|
304
|
+
|
|
305
|
+
@contextmanager
|
|
306
|
+
def get_connection(self):
|
|
307
|
+
"""Context manager for getting a connection from the pool.
|
|
308
|
+
|
|
309
|
+
Usage:
|
|
310
|
+
with self.get_connection() as conn:
|
|
311
|
+
cursor = conn.cursor()
|
|
312
|
+
cursor.execute(...)
|
|
313
|
+
|
|
314
|
+
Falls back to self.conn if pool not initialized.
|
|
315
|
+
"""
|
|
316
|
+
if self._connection_pool:
|
|
317
|
+
conn = self._connection_pool.get_connection()
|
|
318
|
+
try:
|
|
319
|
+
yield conn
|
|
320
|
+
finally:
|
|
321
|
+
self._connection_pool.return_connection(conn)
|
|
322
|
+
else:
|
|
323
|
+
# Fallback for backward compatibility
|
|
324
|
+
yield self.conn
|
|
325
|
+
|
|
326
|
+
async def connect(self):
|
|
327
|
+
"""Establish database connection and initialize connection pool."""
|
|
328
|
+
try:
|
|
329
|
+
# Initialize connection pool
|
|
330
|
+
self._connection_pool = SQLiteConnectionPool(
|
|
331
|
+
db_path=self.db_path,
|
|
332
|
+
pool_size=DB_POOL_SIZE,
|
|
333
|
+
timeout=DB_TIMEOUT
|
|
334
|
+
)
|
|
335
|
+
# Keep a primary connection for backward compatibility
|
|
336
|
+
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
|
337
|
+
self.conn.row_factory = sqlite3.Row
|
|
338
|
+
# Enable WAL mode on primary connection too
|
|
339
|
+
self.conn.execute("PRAGMA journal_mode=WAL")
|
|
340
|
+
self.conn.execute("PRAGMA busy_timeout=30000")
|
|
341
|
+
logger.info(f"Database connected with pool size {DB_POOL_SIZE}")
|
|
342
|
+
except sqlite3.Error as e:
|
|
343
|
+
logger.error(f"Failed to connect to database: {e}")
|
|
344
|
+
raise ConnectionPoolError(f"Failed to connect to database: {e}", original_error=e)
|
|
345
|
+
|
|
346
|
+
async def disconnect(self):
|
|
347
|
+
"""Close database connection, connection pool, and save indexes."""
|
|
348
|
+
# Save indexes
|
|
349
|
+
if self._memories_index:
|
|
350
|
+
try:
|
|
351
|
+
self._memories_index.save()
|
|
352
|
+
except Exception as e:
|
|
353
|
+
logger.warning(f"Failed to save memories index: {e}")
|
|
354
|
+
if self._patterns_index:
|
|
355
|
+
try:
|
|
356
|
+
self._patterns_index.save()
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.warning(f"Failed to save patterns index: {e}")
|
|
359
|
+
if self._timeline_index:
|
|
360
|
+
try:
|
|
361
|
+
self._timeline_index.save()
|
|
362
|
+
except Exception as e:
|
|
363
|
+
logger.warning(f"Failed to save timeline index: {e}")
|
|
364
|
+
|
|
365
|
+
# Close connection pool
|
|
366
|
+
if self._connection_pool:
|
|
367
|
+
self._connection_pool.close_all()
|
|
368
|
+
self._connection_pool = None
|
|
369
|
+
|
|
370
|
+
# Close primary connection
|
|
371
|
+
if self.conn:
|
|
372
|
+
try:
|
|
373
|
+
self.conn.close()
|
|
374
|
+
except Exception as e:
|
|
375
|
+
logger.warning(f"Error closing primary connection: {e}")
|
|
376
|
+
self.conn = None
|
|
377
|
+
|
|
378
|
+
logger.info("Database disconnected")
|
|
379
|
+
|
|
380
|
+
def get_pool_stats(self) -> Dict[str, Any]:
|
|
381
|
+
"""Get connection pool statistics."""
|
|
382
|
+
if self._connection_pool:
|
|
383
|
+
return self._connection_pool.get_stats()
|
|
384
|
+
return {"pool_initialized": False}
|
|
385
|
+
|
|
386
|
+
async def _init_vector_indexes(self):
|
|
387
|
+
"""Initialize vector indexes from database."""
|
|
388
|
+
if self._index_initialized or not self._use_vector_index:
|
|
389
|
+
return
|
|
390
|
+
|
|
391
|
+
try:
|
|
392
|
+
from services.vector_index import get_index
|
|
393
|
+
|
|
394
|
+
# Initialize memories index
|
|
395
|
+
self._memories_index = get_index("memories")
|
|
396
|
+
if self._memories_index.size() == 0:
|
|
397
|
+
await self._rebuild_memories_index()
|
|
398
|
+
|
|
399
|
+
# Initialize patterns index
|
|
400
|
+
self._patterns_index = get_index("patterns")
|
|
401
|
+
if self._patterns_index.size() == 0:
|
|
402
|
+
await self._rebuild_patterns_index()
|
|
403
|
+
|
|
404
|
+
# Initialize timeline index
|
|
405
|
+
self._timeline_index = get_index("timeline")
|
|
406
|
+
if self._timeline_index.size() == 0:
|
|
407
|
+
await self._rebuild_timeline_index()
|
|
408
|
+
|
|
409
|
+
self._index_initialized = True
|
|
410
|
+
except ImportError:
|
|
411
|
+
# FAISS not available, will use numpy fallback
|
|
412
|
+
self._use_vector_index = False
|
|
413
|
+
|
|
414
|
+
async def _rebuild_memories_index(self):
|
|
415
|
+
"""Rebuild the memories vector index from database."""
|
|
416
|
+
if not self._memories_index:
|
|
417
|
+
return
|
|
418
|
+
|
|
419
|
+
cursor = self.conn.cursor()
|
|
420
|
+
cursor.execute("SELECT id, embedding FROM memories WHERE embedding IS NOT NULL")
|
|
421
|
+
rows = cursor.fetchall()
|
|
422
|
+
|
|
423
|
+
items = []
|
|
424
|
+
for row in rows:
|
|
425
|
+
embedding = self._deserialize_embedding(row["embedding"])
|
|
426
|
+
if embedding:
|
|
427
|
+
items.append((row["id"], embedding))
|
|
428
|
+
|
|
429
|
+
if items:
|
|
430
|
+
self._memories_index.rebuild(items)
|
|
431
|
+
self._memories_index.save()
|
|
432
|
+
|
|
433
|
+
async def _rebuild_patterns_index(self):
|
|
434
|
+
"""Rebuild the patterns vector index from database."""
|
|
435
|
+
if not self._patterns_index:
|
|
436
|
+
return
|
|
437
|
+
|
|
438
|
+
cursor = self.conn.cursor()
|
|
439
|
+
cursor.execute("SELECT id, embedding FROM patterns WHERE embedding IS NOT NULL")
|
|
440
|
+
rows = cursor.fetchall()
|
|
441
|
+
|
|
442
|
+
items = []
|
|
443
|
+
for row in rows:
|
|
444
|
+
embedding = self._deserialize_embedding(row["embedding"])
|
|
445
|
+
if embedding:
|
|
446
|
+
items.append((row["id"], embedding))
|
|
447
|
+
|
|
448
|
+
if items:
|
|
449
|
+
self._patterns_index.rebuild(items)
|
|
450
|
+
self._patterns_index.save()
|
|
451
|
+
|
|
452
|
+
async def _rebuild_timeline_index(self):
|
|
453
|
+
"""Rebuild the timeline vector index from database."""
|
|
454
|
+
if not self._timeline_index:
|
|
455
|
+
return
|
|
456
|
+
|
|
457
|
+
cursor = self.conn.cursor()
|
|
458
|
+
cursor.execute("SELECT id, embedding FROM timeline_events WHERE embedding IS NOT NULL")
|
|
459
|
+
rows = cursor.fetchall()
|
|
460
|
+
|
|
461
|
+
items = []
|
|
462
|
+
for row in rows:
|
|
463
|
+
embedding = self._deserialize_embedding(row["embedding"])
|
|
464
|
+
if embedding:
|
|
465
|
+
items.append((row["id"], embedding))
|
|
466
|
+
|
|
467
|
+
if items:
|
|
468
|
+
self._timeline_index.rebuild(items)
|
|
469
|
+
self._timeline_index.save()
|
|
470
|
+
|
|
471
|
+
def get_index_stats(self) -> Dict[str, Any]:
|
|
472
|
+
"""Get statistics about vector indexes."""
|
|
473
|
+
stats = {
|
|
474
|
+
"use_vector_index": self._use_vector_index,
|
|
475
|
+
"index_initialized": self._index_initialized
|
|
476
|
+
}
|
|
477
|
+
if self._memories_index:
|
|
478
|
+
stats["memories"] = self._memories_index.get_stats()
|
|
479
|
+
if self._patterns_index:
|
|
480
|
+
stats["patterns"] = self._patterns_index.get_stats()
|
|
481
|
+
if self._timeline_index:
|
|
482
|
+
stats["timeline"] = self._timeline_index.get_stats()
|
|
483
|
+
return stats
|
|
484
|
+
|
|
485
|
+
async def initialize_schema(self):
|
|
486
|
+
"""Create necessary tables if they don't exist."""
|
|
487
|
+
cursor = self.conn.cursor()
|
|
488
|
+
|
|
489
|
+
# Main memories table with rich context
|
|
490
|
+
cursor.execute("""
|
|
491
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
492
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
493
|
+
|
|
494
|
+
-- Content
|
|
495
|
+
type TEXT NOT NULL,
|
|
496
|
+
content TEXT NOT NULL,
|
|
497
|
+
embedding TEXT,
|
|
498
|
+
|
|
499
|
+
-- Project Context
|
|
500
|
+
project_path TEXT,
|
|
501
|
+
project_name TEXT,
|
|
502
|
+
project_type TEXT,
|
|
503
|
+
tech_stack TEXT,
|
|
504
|
+
|
|
505
|
+
-- Session Context
|
|
506
|
+
session_id TEXT,
|
|
507
|
+
chat_id TEXT,
|
|
508
|
+
|
|
509
|
+
-- Agent/Skill Context
|
|
510
|
+
agent_type TEXT,
|
|
511
|
+
skill_used TEXT,
|
|
512
|
+
tools_used TEXT,
|
|
513
|
+
|
|
514
|
+
-- Outcome
|
|
515
|
+
outcome TEXT,
|
|
516
|
+
success INTEGER,
|
|
517
|
+
user_feedback TEXT,
|
|
518
|
+
|
|
519
|
+
-- Metadata
|
|
520
|
+
tags TEXT,
|
|
521
|
+
metadata TEXT DEFAULT '{}',
|
|
522
|
+
importance INTEGER DEFAULT 5,
|
|
523
|
+
|
|
524
|
+
-- Timestamps
|
|
525
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
526
|
+
updated_at TEXT DEFAULT (datetime('now')),
|
|
527
|
+
last_accessed TEXT
|
|
528
|
+
)
|
|
529
|
+
""")
|
|
530
|
+
|
|
531
|
+
# Projects table - store project-level knowledge
|
|
532
|
+
cursor.execute("""
|
|
533
|
+
CREATE TABLE IF NOT EXISTS projects (
|
|
534
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
535
|
+
path TEXT UNIQUE NOT NULL,
|
|
536
|
+
name TEXT,
|
|
537
|
+
type TEXT,
|
|
538
|
+
tech_stack TEXT,
|
|
539
|
+
conventions TEXT,
|
|
540
|
+
preferences TEXT,
|
|
541
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
542
|
+
updated_at TEXT DEFAULT (datetime('now'))
|
|
543
|
+
)
|
|
544
|
+
""")
|
|
545
|
+
|
|
546
|
+
# Patterns table - reusable solutions
|
|
547
|
+
cursor.execute("""
|
|
548
|
+
CREATE TABLE IF NOT EXISTS patterns (
|
|
549
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
550
|
+
name TEXT NOT NULL,
|
|
551
|
+
problem_type TEXT,
|
|
552
|
+
solution TEXT NOT NULL,
|
|
553
|
+
embedding TEXT,
|
|
554
|
+
tech_context TEXT,
|
|
555
|
+
success_count INTEGER DEFAULT 1,
|
|
556
|
+
failure_count INTEGER DEFAULT 0,
|
|
557
|
+
metadata TEXT DEFAULT '{}',
|
|
558
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
559
|
+
updated_at TEXT DEFAULT (datetime('now'))
|
|
560
|
+
)
|
|
561
|
+
""")
|
|
562
|
+
|
|
563
|
+
# Create indexes for memories/patterns
|
|
564
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type)")
|
|
565
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_project ON memories(project_path)")
|
|
566
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id)")
|
|
567
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_type)")
|
|
568
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_success ON memories(success)")
|
|
569
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_importance ON memories(importance)")
|
|
570
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_patterns_problem ON patterns(problem_type)")
|
|
571
|
+
|
|
572
|
+
# Migration helper function
|
|
573
|
+
def safe_add_column(table: str, column: str, column_def: str):
|
|
574
|
+
"""Safely add a column if it doesn't exist, with proper error handling."""
|
|
575
|
+
try:
|
|
576
|
+
cursor.execute(f"SELECT {column} FROM {table} LIMIT 1")
|
|
577
|
+
logger.debug(f"Column {table}.{column} already exists")
|
|
578
|
+
except sqlite3.OperationalError as e:
|
|
579
|
+
if "no such column" in str(e).lower():
|
|
580
|
+
try:
|
|
581
|
+
cursor.execute(f"ALTER TABLE {table} ADD COLUMN {column} {column_def}")
|
|
582
|
+
logger.info(f"Migration: Added column {table}.{column}")
|
|
583
|
+
except sqlite3.OperationalError as alter_error:
|
|
584
|
+
if "duplicate column" not in str(alter_error).lower():
|
|
585
|
+
logger.error(f"Failed to add column {table}.{column}: {alter_error}")
|
|
586
|
+
raise MigrationError(
|
|
587
|
+
f"Failed to add column {table}.{column}",
|
|
588
|
+
original_error=alter_error
|
|
589
|
+
)
|
|
590
|
+
else:
|
|
591
|
+
logger.error(f"Unexpected error checking column {table}.{column}: {e}")
|
|
592
|
+
raise MigrationError(
|
|
593
|
+
f"Unexpected error during migration check for {table}.{column}",
|
|
594
|
+
original_error=e
|
|
595
|
+
)
|
|
596
|
+
except Exception as e:
|
|
597
|
+
logger.error(f"Unexpected error in migration for {table}.{column}: {e}")
|
|
598
|
+
raise MigrationError(
|
|
599
|
+
f"Migration failed for {table}.{column}",
|
|
600
|
+
original_error=e
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
# Migration: Add access_count column if it doesn't exist
|
|
604
|
+
safe_add_column("memories", "access_count", "INTEGER DEFAULT 0")
|
|
605
|
+
|
|
606
|
+
# Migration: Add decay_factor column if it doesn't exist
|
|
607
|
+
safe_add_column("memories", "decay_factor", "REAL DEFAULT 1.0")
|
|
608
|
+
|
|
609
|
+
# Migration: Add embedding_model column if it doesn't exist
|
|
610
|
+
safe_add_column("memories", "embedding_model", "TEXT DEFAULT 'nomic-embed-text'")
|
|
611
|
+
|
|
612
|
+
# ============================================================
|
|
613
|
+
# SESSION TIMELINE TABLES (Anti-Hallucination Layer)
|
|
614
|
+
# ============================================================
|
|
615
|
+
|
|
616
|
+
# Timeline events - chronological log of all session activity
|
|
617
|
+
cursor.execute("""
|
|
618
|
+
CREATE TABLE IF NOT EXISTS timeline_events (
|
|
619
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
620
|
+
|
|
621
|
+
-- Session Context
|
|
622
|
+
session_id TEXT NOT NULL,
|
|
623
|
+
project_path TEXT,
|
|
624
|
+
|
|
625
|
+
-- Event Identity
|
|
626
|
+
event_type TEXT NOT NULL,
|
|
627
|
+
sequence_num INTEGER NOT NULL,
|
|
628
|
+
|
|
629
|
+
-- Content
|
|
630
|
+
summary TEXT NOT NULL,
|
|
631
|
+
details TEXT,
|
|
632
|
+
embedding TEXT,
|
|
633
|
+
|
|
634
|
+
-- Causal Chain
|
|
635
|
+
parent_event_id INTEGER,
|
|
636
|
+
root_event_id INTEGER,
|
|
637
|
+
|
|
638
|
+
-- Entity References
|
|
639
|
+
entities TEXT,
|
|
640
|
+
|
|
641
|
+
-- Outcome
|
|
642
|
+
status TEXT DEFAULT 'completed',
|
|
643
|
+
outcome TEXT,
|
|
644
|
+
confidence REAL,
|
|
645
|
+
|
|
646
|
+
-- Flags
|
|
647
|
+
is_anchor INTEGER DEFAULT 0,
|
|
648
|
+
is_reversible INTEGER DEFAULT 1,
|
|
649
|
+
needs_verification INTEGER DEFAULT 0,
|
|
650
|
+
|
|
651
|
+
-- Timestamps
|
|
652
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
653
|
+
|
|
654
|
+
FOREIGN KEY (parent_event_id) REFERENCES timeline_events(id),
|
|
655
|
+
FOREIGN KEY (root_event_id) REFERENCES timeline_events(id)
|
|
656
|
+
)
|
|
657
|
+
""")
|
|
658
|
+
|
|
659
|
+
# Session state - current context for active session
|
|
660
|
+
cursor.execute("""
|
|
661
|
+
CREATE TABLE IF NOT EXISTS session_state (
|
|
662
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
663
|
+
session_id TEXT UNIQUE NOT NULL,
|
|
664
|
+
project_path TEXT,
|
|
665
|
+
|
|
666
|
+
-- Current State
|
|
667
|
+
current_goal TEXT,
|
|
668
|
+
pending_questions TEXT,
|
|
669
|
+
entity_registry TEXT,
|
|
670
|
+
decisions_summary TEXT,
|
|
671
|
+
|
|
672
|
+
-- Checkpoint tracking
|
|
673
|
+
last_checkpoint_id INTEGER,
|
|
674
|
+
events_since_checkpoint INTEGER DEFAULT 0,
|
|
675
|
+
|
|
676
|
+
-- Timestamps
|
|
677
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
678
|
+
updated_at TEXT DEFAULT (datetime('now')),
|
|
679
|
+
last_activity_at TEXT DEFAULT (datetime('now')),
|
|
680
|
+
|
|
681
|
+
FOREIGN KEY (last_checkpoint_id) REFERENCES checkpoints(id)
|
|
682
|
+
)
|
|
683
|
+
""")
|
|
684
|
+
|
|
685
|
+
# Checkpoints - session snapshots for resumption
|
|
686
|
+
cursor.execute("""
|
|
687
|
+
CREATE TABLE IF NOT EXISTS checkpoints (
|
|
688
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
689
|
+
session_id TEXT NOT NULL,
|
|
690
|
+
event_id INTEGER,
|
|
691
|
+
|
|
692
|
+
-- Checkpoint Content
|
|
693
|
+
summary TEXT NOT NULL,
|
|
694
|
+
key_facts TEXT,
|
|
695
|
+
decisions TEXT,
|
|
696
|
+
entities TEXT,
|
|
697
|
+
|
|
698
|
+
-- State at Checkpoint
|
|
699
|
+
current_goal TEXT,
|
|
700
|
+
pending_items TEXT,
|
|
701
|
+
|
|
702
|
+
-- For retrieval
|
|
703
|
+
embedding TEXT,
|
|
704
|
+
event_count INTEGER,
|
|
705
|
+
|
|
706
|
+
-- Timestamps
|
|
707
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
708
|
+
|
|
709
|
+
FOREIGN KEY (event_id) REFERENCES timeline_events(id)
|
|
710
|
+
)
|
|
711
|
+
""")
|
|
712
|
+
|
|
713
|
+
# Timeline indexes
|
|
714
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_session ON timeline_events(session_id, sequence_num)")
|
|
715
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_type ON timeline_events(event_type)")
|
|
716
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_parent ON timeline_events(parent_event_id)")
|
|
717
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_root ON timeline_events(root_event_id)")
|
|
718
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_timeline_created ON timeline_events(created_at)")
|
|
719
|
+
|
|
720
|
+
# Session state indexes
|
|
721
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_session_project ON session_state(project_path)")
|
|
722
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_session_activity ON session_state(last_activity_at)")
|
|
723
|
+
|
|
724
|
+
# Checkpoint indexes
|
|
725
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_checkpoint_session ON checkpoints(session_id, created_at DESC)")
|
|
726
|
+
|
|
727
|
+
# ============================================================
|
|
728
|
+
# AGENT CONFIGURATION TABLES
|
|
729
|
+
# ============================================================
|
|
730
|
+
|
|
731
|
+
# Project agent configurations
|
|
732
|
+
cursor.execute("""
|
|
733
|
+
CREATE TABLE IF NOT EXISTS project_agent_config (
|
|
734
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
735
|
+
project_path TEXT NOT NULL,
|
|
736
|
+
agent_id TEXT NOT NULL,
|
|
737
|
+
enabled INTEGER DEFAULT 1,
|
|
738
|
+
priority INTEGER DEFAULT 5,
|
|
739
|
+
settings TEXT DEFAULT '{}',
|
|
740
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
741
|
+
updated_at TEXT DEFAULT (datetime('now')),
|
|
742
|
+
UNIQUE(project_path, agent_id)
|
|
743
|
+
)
|
|
744
|
+
""")
|
|
745
|
+
|
|
746
|
+
# MCP server configurations per project
|
|
747
|
+
cursor.execute("""
|
|
748
|
+
CREATE TABLE IF NOT EXISTS project_mcp_config (
|
|
749
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
750
|
+
project_path TEXT NOT NULL,
|
|
751
|
+
mcp_id TEXT NOT NULL,
|
|
752
|
+
enabled INTEGER DEFAULT 1,
|
|
753
|
+
settings TEXT DEFAULT '{}',
|
|
754
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
755
|
+
updated_at TEXT DEFAULT (datetime('now')),
|
|
756
|
+
UNIQUE(project_path, mcp_id)
|
|
757
|
+
)
|
|
758
|
+
""")
|
|
759
|
+
|
|
760
|
+
# Hook configurations per project
|
|
761
|
+
cursor.execute("""
|
|
762
|
+
CREATE TABLE IF NOT EXISTS project_hook_config (
|
|
763
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
764
|
+
project_path TEXT NOT NULL,
|
|
765
|
+
hook_id TEXT NOT NULL,
|
|
766
|
+
enabled INTEGER DEFAULT 1,
|
|
767
|
+
settings TEXT DEFAULT '{}',
|
|
768
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
769
|
+
updated_at TEXT DEFAULT (datetime('now')),
|
|
770
|
+
UNIQUE(project_path, hook_id)
|
|
771
|
+
)
|
|
772
|
+
""")
|
|
773
|
+
|
|
774
|
+
# Project preferences
|
|
775
|
+
cursor.execute("""
|
|
776
|
+
CREATE TABLE IF NOT EXISTS project_preferences (
|
|
777
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
778
|
+
project_path TEXT UNIQUE NOT NULL,
|
|
779
|
+
name TEXT,
|
|
780
|
+
description TEXT,
|
|
781
|
+
color TEXT DEFAULT '#58a6ff',
|
|
782
|
+
icon TEXT DEFAULT 'folder',
|
|
783
|
+
default_model TEXT DEFAULT 'sonnet',
|
|
784
|
+
auto_memory INTEGER DEFAULT 1,
|
|
785
|
+
auto_checkpoint INTEGER DEFAULT 1,
|
|
786
|
+
settings TEXT DEFAULT '{}',
|
|
787
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
788
|
+
updated_at TEXT DEFAULT (datetime('now'))
|
|
789
|
+
)
|
|
790
|
+
""")
|
|
791
|
+
|
|
792
|
+
# Agent config indexes
|
|
793
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_agent_config_project ON project_agent_config(project_path)")
|
|
794
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_mcp_config_project ON project_mcp_config(project_path)")
|
|
795
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_hook_config_project ON project_hook_config(project_path)")
|
|
796
|
+
|
|
797
|
+
# ============================================================
|
|
798
|
+
# INSIGHTS TABLE (Cross-Session Learning)
|
|
799
|
+
# ============================================================
|
|
800
|
+
|
|
801
|
+
# Aggregated insights from cross-session analysis
|
|
802
|
+
cursor.execute("""
|
|
803
|
+
CREATE TABLE IF NOT EXISTS insights (
|
|
804
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
805
|
+
|
|
806
|
+
-- Insight Identity
|
|
807
|
+
insight_type TEXT NOT NULL,
|
|
808
|
+
title TEXT NOT NULL,
|
|
809
|
+
description TEXT NOT NULL,
|
|
810
|
+
|
|
811
|
+
-- Evidence
|
|
812
|
+
evidence_ids TEXT,
|
|
813
|
+
evidence_count INTEGER DEFAULT 1,
|
|
814
|
+
source_sessions TEXT,
|
|
815
|
+
|
|
816
|
+
-- Scoring
|
|
817
|
+
confidence REAL DEFAULT 0.5,
|
|
818
|
+
impact_score REAL DEFAULT 5.0,
|
|
819
|
+
validation_count INTEGER DEFAULT 0,
|
|
820
|
+
invalidation_count INTEGER DEFAULT 0,
|
|
821
|
+
|
|
822
|
+
-- Categorization
|
|
823
|
+
category TEXT,
|
|
824
|
+
tags TEXT,
|
|
825
|
+
project_path TEXT,
|
|
826
|
+
tech_context TEXT,
|
|
827
|
+
|
|
828
|
+
-- For similarity search
|
|
829
|
+
embedding TEXT,
|
|
830
|
+
|
|
831
|
+
-- Status
|
|
832
|
+
status TEXT DEFAULT 'active',
|
|
833
|
+
applied_to_claude_md INTEGER DEFAULT 0,
|
|
834
|
+
|
|
835
|
+
-- Timestamps
|
|
836
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
837
|
+
updated_at TEXT DEFAULT (datetime('now')),
|
|
838
|
+
last_validated_at TEXT
|
|
839
|
+
)
|
|
840
|
+
""")
|
|
841
|
+
|
|
842
|
+
# Insight feedback for accuracy tracking
|
|
843
|
+
cursor.execute("""
|
|
844
|
+
CREATE TABLE IF NOT EXISTS insight_feedback (
|
|
845
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
846
|
+
insight_id INTEGER NOT NULL,
|
|
847
|
+
session_id TEXT,
|
|
848
|
+
feedback_type TEXT NOT NULL,
|
|
849
|
+
helpful INTEGER,
|
|
850
|
+
comment TEXT,
|
|
851
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
852
|
+
FOREIGN KEY (insight_id) REFERENCES insights(id)
|
|
853
|
+
)
|
|
854
|
+
""")
|
|
855
|
+
|
|
856
|
+
# Insight indexes
|
|
857
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_insights_type ON insights(insight_type)")
|
|
858
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_insights_status ON insights(status)")
|
|
859
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_insights_project ON insights(project_path)")
|
|
860
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_insights_confidence ON insights(confidence DESC)")
|
|
861
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_insight_feedback ON insight_feedback(insight_id)")
|
|
862
|
+
|
|
863
|
+
# ============================================================
|
|
864
|
+
# MEMORY CLEANUP AND ARCHIVAL TABLES
|
|
865
|
+
# ============================================================
|
|
866
|
+
|
|
867
|
+
# Archived memories (soft-deleted for recovery)
|
|
868
|
+
cursor.execute("""
|
|
869
|
+
CREATE TABLE IF NOT EXISTS memory_archive (
|
|
870
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
871
|
+
original_id INTEGER NOT NULL,
|
|
872
|
+
|
|
873
|
+
-- Original memory data
|
|
874
|
+
type TEXT NOT NULL,
|
|
875
|
+
content TEXT NOT NULL,
|
|
876
|
+
embedding TEXT,
|
|
877
|
+
project_path TEXT,
|
|
878
|
+
session_id TEXT,
|
|
879
|
+
importance INTEGER,
|
|
880
|
+
access_count INTEGER,
|
|
881
|
+
decay_factor REAL,
|
|
882
|
+
metadata TEXT,
|
|
883
|
+
|
|
884
|
+
-- Archive metadata
|
|
885
|
+
archive_reason TEXT NOT NULL,
|
|
886
|
+
archived_at TEXT DEFAULT (datetime('now')),
|
|
887
|
+
archived_by TEXT,
|
|
888
|
+
relevance_score_at_archive REAL,
|
|
889
|
+
expires_at TEXT
|
|
890
|
+
)
|
|
891
|
+
""")
|
|
892
|
+
|
|
893
|
+
# Cleanup configuration per project
|
|
894
|
+
cursor.execute("""
|
|
895
|
+
CREATE TABLE IF NOT EXISTS cleanup_config (
|
|
896
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
897
|
+
project_path TEXT UNIQUE,
|
|
898
|
+
|
|
899
|
+
-- Retention settings
|
|
900
|
+
retention_days INTEGER DEFAULT 90,
|
|
901
|
+
min_relevance_score REAL DEFAULT 0.1,
|
|
902
|
+
keep_high_importance INTEGER DEFAULT 1,
|
|
903
|
+
importance_threshold INTEGER DEFAULT 7,
|
|
904
|
+
|
|
905
|
+
-- Deduplication settings
|
|
906
|
+
dedup_enabled INTEGER DEFAULT 1,
|
|
907
|
+
dedup_threshold REAL DEFAULT 0.95,
|
|
908
|
+
|
|
909
|
+
-- Archive settings
|
|
910
|
+
archive_before_delete INTEGER DEFAULT 1,
|
|
911
|
+
archive_retention_days INTEGER DEFAULT 365,
|
|
912
|
+
|
|
913
|
+
-- Schedule
|
|
914
|
+
auto_cleanup_enabled INTEGER DEFAULT 0,
|
|
915
|
+
last_cleanup_at TEXT,
|
|
916
|
+
|
|
917
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
918
|
+
updated_at TEXT DEFAULT (datetime('now'))
|
|
919
|
+
)
|
|
920
|
+
""")
|
|
921
|
+
|
|
922
|
+
# Cleanup audit log
|
|
923
|
+
cursor.execute("""
|
|
924
|
+
CREATE TABLE IF NOT EXISTS cleanup_log (
|
|
925
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
926
|
+
cleanup_type TEXT NOT NULL,
|
|
927
|
+
project_path TEXT,
|
|
928
|
+
memories_archived INTEGER DEFAULT 0,
|
|
929
|
+
memories_deleted INTEGER DEFAULT 0,
|
|
930
|
+
memories_merged INTEGER DEFAULT 0,
|
|
931
|
+
details TEXT,
|
|
932
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
933
|
+
)
|
|
934
|
+
""")
|
|
935
|
+
|
|
936
|
+
# Archive indexes
|
|
937
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_original ON memory_archive(original_id)")
|
|
938
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_project ON memory_archive(project_path)")
|
|
939
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_reason ON memory_archive(archive_reason)")
|
|
940
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_cleanup_project ON cleanup_config(project_path)")
|
|
941
|
+
|
|
942
|
+
# ============================================================
|
|
943
|
+
# ANCHOR CONFLICT RESOLUTION TABLES
|
|
944
|
+
# ============================================================
|
|
945
|
+
|
|
946
|
+
# Anchor conflicts for manual resolution
|
|
947
|
+
cursor.execute("""
|
|
948
|
+
CREATE TABLE IF NOT EXISTS anchor_conflicts (
|
|
949
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
950
|
+
session_id TEXT,
|
|
951
|
+
project_path TEXT,
|
|
952
|
+
|
|
953
|
+
-- The conflicting anchors
|
|
954
|
+
anchor1_id INTEGER NOT NULL,
|
|
955
|
+
anchor2_id INTEGER NOT NULL,
|
|
956
|
+
anchor1_summary TEXT,
|
|
957
|
+
anchor2_summary TEXT,
|
|
958
|
+
|
|
959
|
+
-- Conflict details
|
|
960
|
+
conflict_type TEXT NOT NULL,
|
|
961
|
+
similarity_score REAL,
|
|
962
|
+
auto_resolution_attempted INTEGER DEFAULT 0,
|
|
963
|
+
|
|
964
|
+
-- Resolution
|
|
965
|
+
status TEXT DEFAULT 'unresolved',
|
|
966
|
+
resolution TEXT,
|
|
967
|
+
resolved_anchor_id INTEGER,
|
|
968
|
+
resolved_at TEXT,
|
|
969
|
+
resolved_by TEXT,
|
|
970
|
+
|
|
971
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
972
|
+
|
|
973
|
+
FOREIGN KEY (anchor1_id) REFERENCES timeline_events(id),
|
|
974
|
+
FOREIGN KEY (anchor2_id) REFERENCES timeline_events(id)
|
|
975
|
+
)
|
|
976
|
+
""")
|
|
977
|
+
|
|
978
|
+
# Anchor history to track fact evolution
|
|
979
|
+
cursor.execute("""
|
|
980
|
+
CREATE TABLE IF NOT EXISTS anchor_history (
|
|
981
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
982
|
+
anchor_id INTEGER NOT NULL,
|
|
983
|
+
session_id TEXT,
|
|
984
|
+
project_path TEXT,
|
|
985
|
+
|
|
986
|
+
-- State tracking
|
|
987
|
+
action TEXT NOT NULL,
|
|
988
|
+
previous_summary TEXT,
|
|
989
|
+
new_summary TEXT,
|
|
990
|
+
superseded_by INTEGER,
|
|
991
|
+
|
|
992
|
+
-- Context
|
|
993
|
+
reason TEXT,
|
|
994
|
+
confidence REAL,
|
|
995
|
+
|
|
996
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
997
|
+
|
|
998
|
+
FOREIGN KEY (anchor_id) REFERENCES timeline_events(id),
|
|
999
|
+
FOREIGN KEY (superseded_by) REFERENCES timeline_events(id)
|
|
1000
|
+
)
|
|
1001
|
+
""")
|
|
1002
|
+
|
|
1003
|
+
# Conflict indexes
|
|
1004
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_conflicts_status ON anchor_conflicts(status)")
|
|
1005
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_conflicts_session ON anchor_conflicts(session_id)")
|
|
1006
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_anchor_history ON anchor_history(anchor_id)")
|
|
1007
|
+
|
|
1008
|
+
# ============================================================
|
|
1009
|
+
# MARKDOWN SYNC TABLES (Moltbot-inspired transparency)
|
|
1010
|
+
# ============================================================
|
|
1011
|
+
|
|
1012
|
+
# Markdown sync tracking - tracks which memories are synced to markdown files
|
|
1013
|
+
cursor.execute("""
|
|
1014
|
+
CREATE TABLE IF NOT EXISTS markdown_syncs (
|
|
1015
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1016
|
+
file_type TEXT NOT NULL, -- 'memory_md', 'daily_log', 'flush'
|
|
1017
|
+
file_path TEXT NOT NULL,
|
|
1018
|
+
memory_id INTEGER,
|
|
1019
|
+
project_path TEXT,
|
|
1020
|
+
synced_at TEXT DEFAULT (datetime('now')),
|
|
1021
|
+
content_hash TEXT,
|
|
1022
|
+
|
|
1023
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id)
|
|
1024
|
+
)
|
|
1025
|
+
""")
|
|
1026
|
+
|
|
1027
|
+
# Indexes for markdown_syncs
|
|
1028
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_markdown_syncs_type ON markdown_syncs(file_type)")
|
|
1029
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_markdown_syncs_project ON markdown_syncs(project_path)")
|
|
1030
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_markdown_syncs_memory ON markdown_syncs(memory_id)")
|
|
1031
|
+
|
|
1032
|
+
# Migration: Add last_flush_at column to session_state if it doesn't exist
|
|
1033
|
+
safe_add_column("session_state", "last_flush_at", "TEXT")
|
|
1034
|
+
|
|
1035
|
+
self.conn.commit()
|
|
1036
|
+
|
|
1037
|
+
def _serialize_embedding(self, embedding: List[float]) -> str:
|
|
1038
|
+
return json.dumps(embedding)
|
|
1039
|
+
|
|
1040
|
+
def _deserialize_embedding(self, embedding_str: str) -> List[float]:
|
|
1041
|
+
return json.loads(embedding_str) if embedding_str else []
|
|
1042
|
+
|
|
1043
|
+
def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
|
1044
|
+
a = np.array(vec1)
|
|
1045
|
+
b = np.array(vec2)
|
|
1046
|
+
norm_a = np.linalg.norm(a)
|
|
1047
|
+
norm_b = np.linalg.norm(b)
|
|
1048
|
+
if norm_a == 0 or norm_b == 0:
|
|
1049
|
+
return 0.0
|
|
1050
|
+
return float(np.dot(a, b) / (norm_a * norm_b))
|
|
1051
|
+
|
|
1052
|
+
def calculate_relevance_score(
|
|
1053
|
+
self,
|
|
1054
|
+
importance: int,
|
|
1055
|
+
created_at: str,
|
|
1056
|
+
last_accessed: Optional[str],
|
|
1057
|
+
access_count: int,
|
|
1058
|
+
decay_factor: float = 1.0,
|
|
1059
|
+
recency_half_life_days: float = 30.0
|
|
1060
|
+
) -> float:
|
|
1061
|
+
"""Calculate relevance score based on importance, recency, and access patterns.
|
|
1062
|
+
|
|
1063
|
+
Formula: base_importance * recency_factor * access_factor * decay_factor
|
|
1064
|
+
|
|
1065
|
+
Args:
|
|
1066
|
+
importance: Base importance (1-10)
|
|
1067
|
+
created_at: Creation timestamp
|
|
1068
|
+
last_accessed: Last access timestamp (None if never accessed)
|
|
1069
|
+
access_count: Number of times accessed
|
|
1070
|
+
decay_factor: Manual decay/boost multiplier
|
|
1071
|
+
recency_half_life_days: Days until score halves
|
|
1072
|
+
|
|
1073
|
+
Returns:
|
|
1074
|
+
Relevance score (0.0 to ~10.0)
|
|
1075
|
+
"""
|
|
1076
|
+
import math
|
|
1077
|
+
|
|
1078
|
+
# Base importance (normalized to 0-1)
|
|
1079
|
+
base = importance / 10.0
|
|
1080
|
+
|
|
1081
|
+
# Recency factor: exponential decay based on age
|
|
1082
|
+
now = datetime.now()
|
|
1083
|
+
try:
|
|
1084
|
+
# Use last_accessed if available, otherwise created_at
|
|
1085
|
+
reference_time = last_accessed or created_at
|
|
1086
|
+
if reference_time:
|
|
1087
|
+
# Parse timestamp (SQLite format: YYYY-MM-DD HH:MM:SS)
|
|
1088
|
+
ref_dt = datetime.fromisoformat(reference_time.replace('Z', '+00:00'))
|
|
1089
|
+
age_days = (now - ref_dt.replace(tzinfo=None)).days
|
|
1090
|
+
# Exponential decay: score halves every half_life_days
|
|
1091
|
+
recency_factor = math.pow(0.5, age_days / recency_half_life_days)
|
|
1092
|
+
else:
|
|
1093
|
+
recency_factor = 1.0
|
|
1094
|
+
except (ValueError, TypeError):
|
|
1095
|
+
recency_factor = 1.0
|
|
1096
|
+
|
|
1097
|
+
# Access factor: boost frequently accessed memories (log scale)
|
|
1098
|
+
# +1 to avoid log(0), cap at reasonable value
|
|
1099
|
+
access_factor = 1.0 + 0.1 * math.log(1 + min(access_count, 100))
|
|
1100
|
+
|
|
1101
|
+
# Combine factors
|
|
1102
|
+
score = base * recency_factor * access_factor * decay_factor
|
|
1103
|
+
|
|
1104
|
+
return round(score, 4)
|
|
1105
|
+
|
|
1106
|
+
async def update_access_stats(self, memory_id: int):
|
|
1107
|
+
"""Update access statistics for a memory."""
|
|
1108
|
+
cursor = self.conn.cursor()
|
|
1109
|
+
cursor.execute(
|
|
1110
|
+
"""
|
|
1111
|
+
UPDATE memories
|
|
1112
|
+
SET last_accessed = datetime('now'),
|
|
1113
|
+
access_count = COALESCE(access_count, 0) + 1
|
|
1114
|
+
WHERE id = ?
|
|
1115
|
+
""",
|
|
1116
|
+
(memory_id,)
|
|
1117
|
+
)
|
|
1118
|
+
self.conn.commit()
|
|
1119
|
+
|
|
1120
|
+
async def boost_memory(self, memory_id: int, factor: float = 1.5) -> bool:
|
|
1121
|
+
"""Boost a memory's relevance by increasing its decay_factor.
|
|
1122
|
+
|
|
1123
|
+
Args:
|
|
1124
|
+
memory_id: ID of the memory to boost
|
|
1125
|
+
factor: Multiplier to apply to current decay_factor
|
|
1126
|
+
|
|
1127
|
+
Returns:
|
|
1128
|
+
True if successful
|
|
1129
|
+
"""
|
|
1130
|
+
cursor = self.conn.cursor()
|
|
1131
|
+
cursor.execute(
|
|
1132
|
+
"""
|
|
1133
|
+
UPDATE memories
|
|
1134
|
+
SET decay_factor = COALESCE(decay_factor, 1.0) * ?,
|
|
1135
|
+
updated_at = datetime('now')
|
|
1136
|
+
WHERE id = ?
|
|
1137
|
+
""",
|
|
1138
|
+
(factor, memory_id)
|
|
1139
|
+
)
|
|
1140
|
+
self.conn.commit()
|
|
1141
|
+
return cursor.rowcount > 0
|
|
1142
|
+
|
|
1143
|
+
async def decay_memory(self, memory_id: int, factor: float = 0.5) -> bool:
|
|
1144
|
+
"""Reduce a memory's relevance by decreasing its decay_factor.
|
|
1145
|
+
|
|
1146
|
+
Args:
|
|
1147
|
+
memory_id: ID of the memory to decay
|
|
1148
|
+
factor: Multiplier to apply to current decay_factor
|
|
1149
|
+
|
|
1150
|
+
Returns:
|
|
1151
|
+
True if successful
|
|
1152
|
+
"""
|
|
1153
|
+
cursor = self.conn.cursor()
|
|
1154
|
+
cursor.execute(
|
|
1155
|
+
"""
|
|
1156
|
+
UPDATE memories
|
|
1157
|
+
SET decay_factor = COALESCE(decay_factor, 1.0) * ?,
|
|
1158
|
+
updated_at = datetime('now')
|
|
1159
|
+
WHERE id = ?
|
|
1160
|
+
""",
|
|
1161
|
+
(factor, memory_id)
|
|
1162
|
+
)
|
|
1163
|
+
self.conn.commit()
|
|
1164
|
+
return cursor.rowcount > 0
|
|
1165
|
+
|
|
1166
|
+
async def get_memories_by_relevance(
|
|
1167
|
+
self,
|
|
1168
|
+
limit: int = 20,
|
|
1169
|
+
memory_type: Optional[str] = None,
|
|
1170
|
+
project_path: Optional[str] = None,
|
|
1171
|
+
min_relevance: float = 0.1
|
|
1172
|
+
) -> List[Dict[str, Any]]:
|
|
1173
|
+
"""Get memories sorted by relevance score.
|
|
1174
|
+
|
|
1175
|
+
Args:
|
|
1176
|
+
limit: Maximum number of results
|
|
1177
|
+
memory_type: Filter by type
|
|
1178
|
+
project_path: Filter by project
|
|
1179
|
+
min_relevance: Minimum relevance score threshold
|
|
1180
|
+
|
|
1181
|
+
Returns:
|
|
1182
|
+
List of memories with relevance scores
|
|
1183
|
+
"""
|
|
1184
|
+
project_path = normalize_path(project_path)
|
|
1185
|
+
cursor = self.conn.cursor()
|
|
1186
|
+
|
|
1187
|
+
query = """
|
|
1188
|
+
SELECT id, type, content, importance, created_at, last_accessed,
|
|
1189
|
+
COALESCE(access_count, 0) as access_count,
|
|
1190
|
+
COALESCE(decay_factor, 1.0) as decay_factor,
|
|
1191
|
+
project_path, project_name, outcome, success
|
|
1192
|
+
FROM memories WHERE 1=1
|
|
1193
|
+
"""
|
|
1194
|
+
params = []
|
|
1195
|
+
|
|
1196
|
+
if memory_type:
|
|
1197
|
+
query += " AND type = ?"
|
|
1198
|
+
params.append(memory_type)
|
|
1199
|
+
if project_path:
|
|
1200
|
+
query += " AND project_path = ?"
|
|
1201
|
+
params.append(project_path)
|
|
1202
|
+
|
|
1203
|
+
cursor.execute(query, params)
|
|
1204
|
+
rows = cursor.fetchall()
|
|
1205
|
+
|
|
1206
|
+
results = []
|
|
1207
|
+
for row in rows:
|
|
1208
|
+
relevance = self.calculate_relevance_score(
|
|
1209
|
+
importance=row["importance"],
|
|
1210
|
+
created_at=row["created_at"],
|
|
1211
|
+
last_accessed=row["last_accessed"],
|
|
1212
|
+
access_count=row["access_count"],
|
|
1213
|
+
decay_factor=row["decay_factor"]
|
|
1214
|
+
)
|
|
1215
|
+
|
|
1216
|
+
if relevance >= min_relevance:
|
|
1217
|
+
results.append({
|
|
1218
|
+
"id": row["id"],
|
|
1219
|
+
"type": row["type"],
|
|
1220
|
+
"content": row["content"],
|
|
1221
|
+
"relevance_score": relevance,
|
|
1222
|
+
"importance": row["importance"],
|
|
1223
|
+
"access_count": row["access_count"],
|
|
1224
|
+
"decay_factor": row["decay_factor"],
|
|
1225
|
+
"project_path": row["project_path"],
|
|
1226
|
+
"outcome": row["outcome"],
|
|
1227
|
+
"success": bool(row["success"]) if row["success"] is not None else None,
|
|
1228
|
+
"created_at": row["created_at"],
|
|
1229
|
+
"last_accessed": row["last_accessed"]
|
|
1230
|
+
})
|
|
1231
|
+
|
|
1232
|
+
# Sort by relevance
|
|
1233
|
+
results.sort(key=lambda x: x["relevance_score"], reverse=True)
|
|
1234
|
+
return results[:limit]
|
|
1235
|
+
|
|
1236
|
+
async def store_memory(
|
|
1237
|
+
self,
|
|
1238
|
+
memory_type: str,
|
|
1239
|
+
content: str,
|
|
1240
|
+
embedding: List[float],
|
|
1241
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1242
|
+
session_id: Optional[str] = None,
|
|
1243
|
+
# New context fields
|
|
1244
|
+
project_path: Optional[str] = None,
|
|
1245
|
+
project_name: Optional[str] = None,
|
|
1246
|
+
project_type: Optional[str] = None,
|
|
1247
|
+
tech_stack: Optional[List[str]] = None,
|
|
1248
|
+
chat_id: Optional[str] = None,
|
|
1249
|
+
agent_type: Optional[str] = None,
|
|
1250
|
+
skill_used: Optional[str] = None,
|
|
1251
|
+
tools_used: Optional[List[str]] = None,
|
|
1252
|
+
outcome: Optional[str] = None,
|
|
1253
|
+
success: Optional[bool] = None,
|
|
1254
|
+
tags: Optional[List[str]] = None,
|
|
1255
|
+
importance: int = 5
|
|
1256
|
+
) -> int:
|
|
1257
|
+
"""Store a memory with full context.
|
|
1258
|
+
|
|
1259
|
+
Also adds the embedding to the FAISS index for fast search.
|
|
1260
|
+
"""
|
|
1261
|
+
# Normalize project path to prevent duplicates
|
|
1262
|
+
project_path = normalize_path(project_path)
|
|
1263
|
+
|
|
1264
|
+
cursor = self.conn.cursor()
|
|
1265
|
+
cursor.execute(
|
|
1266
|
+
"""
|
|
1267
|
+
INSERT INTO memories (
|
|
1268
|
+
type, content, embedding, metadata,
|
|
1269
|
+
project_path, project_name, project_type, tech_stack,
|
|
1270
|
+
session_id, chat_id,
|
|
1271
|
+
agent_type, skill_used, tools_used,
|
|
1272
|
+
outcome, success,
|
|
1273
|
+
tags, importance
|
|
1274
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1275
|
+
""",
|
|
1276
|
+
(
|
|
1277
|
+
memory_type,
|
|
1278
|
+
content,
|
|
1279
|
+
self._serialize_embedding(embedding),
|
|
1280
|
+
json.dumps(metadata or {}),
|
|
1281
|
+
project_path,
|
|
1282
|
+
project_name,
|
|
1283
|
+
project_type,
|
|
1284
|
+
json.dumps(tech_stack) if tech_stack else None,
|
|
1285
|
+
session_id,
|
|
1286
|
+
chat_id,
|
|
1287
|
+
agent_type,
|
|
1288
|
+
skill_used,
|
|
1289
|
+
json.dumps(tools_used) if tools_used else None,
|
|
1290
|
+
outcome,
|
|
1291
|
+
1 if success else (0 if success is False else None),
|
|
1292
|
+
json.dumps(tags) if tags else None,
|
|
1293
|
+
importance
|
|
1294
|
+
)
|
|
1295
|
+
)
|
|
1296
|
+
self.conn.commit()
|
|
1297
|
+
memory_id = cursor.lastrowid
|
|
1298
|
+
|
|
1299
|
+
# Add to FAISS index if available
|
|
1300
|
+
if self._memories_index and embedding:
|
|
1301
|
+
self._memories_index.add(memory_id, embedding)
|
|
1302
|
+
|
|
1303
|
+
return memory_id
|
|
1304
|
+
|
|
1305
|
+
async def search_similar(
|
|
1306
|
+
self,
|
|
1307
|
+
embedding: List[float],
|
|
1308
|
+
limit: int = 10,
|
|
1309
|
+
memory_type: Optional[str] = None,
|
|
1310
|
+
session_id: Optional[str] = None,
|
|
1311
|
+
project_path: Optional[str] = None,
|
|
1312
|
+
agent_type: Optional[str] = None,
|
|
1313
|
+
success_only: bool = False,
|
|
1314
|
+
threshold: float = 0.5
|
|
1315
|
+
) -> List[Dict[str, Any]]:
|
|
1316
|
+
"""Search for similar memories with optional filters.
|
|
1317
|
+
|
|
1318
|
+
Uses FAISS index for fast similarity search when available,
|
|
1319
|
+
falls back to numpy-based linear search otherwise.
|
|
1320
|
+
"""
|
|
1321
|
+
# Normalize project path for consistent matching
|
|
1322
|
+
project_path = normalize_path(project_path)
|
|
1323
|
+
|
|
1324
|
+
# Ensure indexes are initialized
|
|
1325
|
+
await self._init_vector_indexes()
|
|
1326
|
+
|
|
1327
|
+
cursor = self.conn.cursor()
|
|
1328
|
+
has_filters = memory_type or session_id or project_path or agent_type or success_only
|
|
1329
|
+
|
|
1330
|
+
# Try FAISS index first (if no filters or willing to post-filter)
|
|
1331
|
+
if self._memories_index and self._memories_index.size() > 0:
|
|
1332
|
+
# Get more candidates than needed to allow for filtering
|
|
1333
|
+
candidate_limit = limit * 5 if has_filters else limit * 2
|
|
1334
|
+
|
|
1335
|
+
# FAISS search
|
|
1336
|
+
candidates = self._memories_index.search(
|
|
1337
|
+
query_embedding=embedding,
|
|
1338
|
+
k=candidate_limit,
|
|
1339
|
+
threshold=threshold
|
|
1340
|
+
)
|
|
1341
|
+
|
|
1342
|
+
if candidates:
|
|
1343
|
+
# Get full records for candidates
|
|
1344
|
+
candidate_ids = [c[0] for c in candidates]
|
|
1345
|
+
similarity_map = {c[0]: c[1] for c in candidates}
|
|
1346
|
+
|
|
1347
|
+
# Build query with filters
|
|
1348
|
+
placeholders = ",".join("?" * len(candidate_ids))
|
|
1349
|
+
query = f"""
|
|
1350
|
+
SELECT id, type, content, metadata,
|
|
1351
|
+
project_path, project_name, project_type, tech_stack,
|
|
1352
|
+
session_id, chat_id, agent_type, skill_used, tools_used,
|
|
1353
|
+
outcome, success, tags, importance, created_at
|
|
1354
|
+
FROM memories WHERE id IN ({placeholders})
|
|
1355
|
+
"""
|
|
1356
|
+
params = list(candidate_ids)
|
|
1357
|
+
|
|
1358
|
+
if memory_type:
|
|
1359
|
+
query += " AND type = ?"
|
|
1360
|
+
params.append(memory_type)
|
|
1361
|
+
if session_id:
|
|
1362
|
+
query += " AND session_id = ?"
|
|
1363
|
+
params.append(session_id)
|
|
1364
|
+
if project_path:
|
|
1365
|
+
query += " AND project_path = ?"
|
|
1366
|
+
params.append(project_path)
|
|
1367
|
+
if agent_type:
|
|
1368
|
+
query += " AND agent_type = ?"
|
|
1369
|
+
params.append(agent_type)
|
|
1370
|
+
if success_only:
|
|
1371
|
+
query += " AND success = 1"
|
|
1372
|
+
|
|
1373
|
+
cursor.execute(query, params)
|
|
1374
|
+
rows = cursor.fetchall()
|
|
1375
|
+
|
|
1376
|
+
results = []
|
|
1377
|
+
for row in rows:
|
|
1378
|
+
similarity = similarity_map.get(row["id"], 0)
|
|
1379
|
+
results.append({
|
|
1380
|
+
"id": row["id"],
|
|
1381
|
+
"type": row["type"],
|
|
1382
|
+
"content": row["content"],
|
|
1383
|
+
"similarity": similarity,
|
|
1384
|
+
"search_method": "faiss",
|
|
1385
|
+
"project": {
|
|
1386
|
+
"path": row["project_path"],
|
|
1387
|
+
"name": row["project_name"],
|
|
1388
|
+
"type": row["project_type"],
|
|
1389
|
+
"tech_stack": json.loads(row["tech_stack"]) if row["tech_stack"] else None
|
|
1390
|
+
},
|
|
1391
|
+
"session_id": row["session_id"],
|
|
1392
|
+
"agent": {
|
|
1393
|
+
"type": row["agent_type"],
|
|
1394
|
+
"skill": row["skill_used"],
|
|
1395
|
+
"tools": json.loads(row["tools_used"]) if row["tools_used"] else None
|
|
1396
|
+
},
|
|
1397
|
+
"outcome": row["outcome"],
|
|
1398
|
+
"success": bool(row["success"]) if row["success"] is not None else None,
|
|
1399
|
+
"tags": json.loads(row["tags"]) if row["tags"] else None,
|
|
1400
|
+
"importance": row["importance"],
|
|
1401
|
+
"created_at": row["created_at"],
|
|
1402
|
+
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
|
|
1403
|
+
})
|
|
1404
|
+
|
|
1405
|
+
# Sort by similarity * importance for better ranking
|
|
1406
|
+
results.sort(key=lambda x: x["similarity"] * (x["importance"] / 10), reverse=True)
|
|
1407
|
+
|
|
1408
|
+
# Update last_accessed for returned results
|
|
1409
|
+
if results:
|
|
1410
|
+
ids = [r["id"] for r in results[:limit]]
|
|
1411
|
+
cursor.execute(
|
|
1412
|
+
f"UPDATE memories SET last_accessed = datetime('now') WHERE id IN ({','.join('?' * len(ids))})",
|
|
1413
|
+
ids
|
|
1414
|
+
)
|
|
1415
|
+
self.conn.commit()
|
|
1416
|
+
|
|
1417
|
+
return results[:limit]
|
|
1418
|
+
|
|
1419
|
+
# Fallback to numpy-based search (original implementation)
|
|
1420
|
+
query = """
|
|
1421
|
+
SELECT id, type, content, embedding, metadata,
|
|
1422
|
+
project_path, project_name, project_type, tech_stack,
|
|
1423
|
+
session_id, chat_id, agent_type, skill_used, tools_used,
|
|
1424
|
+
outcome, success, tags, importance, created_at
|
|
1425
|
+
FROM memories WHERE 1=1
|
|
1426
|
+
"""
|
|
1427
|
+
params = []
|
|
1428
|
+
|
|
1429
|
+
if memory_type:
|
|
1430
|
+
query += " AND type = ?"
|
|
1431
|
+
params.append(memory_type)
|
|
1432
|
+
if session_id:
|
|
1433
|
+
query += " AND session_id = ?"
|
|
1434
|
+
params.append(session_id)
|
|
1435
|
+
if project_path:
|
|
1436
|
+
query += " AND project_path = ?"
|
|
1437
|
+
params.append(project_path)
|
|
1438
|
+
if agent_type:
|
|
1439
|
+
query += " AND agent_type = ?"
|
|
1440
|
+
params.append(agent_type)
|
|
1441
|
+
if success_only:
|
|
1442
|
+
query += " AND success = 1"
|
|
1443
|
+
|
|
1444
|
+
cursor.execute(query, params)
|
|
1445
|
+
rows = cursor.fetchall()
|
|
1446
|
+
|
|
1447
|
+
results = []
|
|
1448
|
+
for row in rows:
|
|
1449
|
+
stored_embedding = self._deserialize_embedding(row["embedding"])
|
|
1450
|
+
if stored_embedding:
|
|
1451
|
+
similarity = self._cosine_similarity(embedding, stored_embedding)
|
|
1452
|
+
if similarity >= threshold:
|
|
1453
|
+
results.append({
|
|
1454
|
+
"id": row["id"],
|
|
1455
|
+
"type": row["type"],
|
|
1456
|
+
"content": row["content"],
|
|
1457
|
+
"similarity": similarity,
|
|
1458
|
+
"search_method": "numpy",
|
|
1459
|
+
"project": {
|
|
1460
|
+
"path": row["project_path"],
|
|
1461
|
+
"name": row["project_name"],
|
|
1462
|
+
"type": row["project_type"],
|
|
1463
|
+
"tech_stack": json.loads(row["tech_stack"]) if row["tech_stack"] else None
|
|
1464
|
+
},
|
|
1465
|
+
"session_id": row["session_id"],
|
|
1466
|
+
"agent": {
|
|
1467
|
+
"type": row["agent_type"],
|
|
1468
|
+
"skill": row["skill_used"],
|
|
1469
|
+
"tools": json.loads(row["tools_used"]) if row["tools_used"] else None
|
|
1470
|
+
},
|
|
1471
|
+
"outcome": row["outcome"],
|
|
1472
|
+
"success": bool(row["success"]) if row["success"] is not None else None,
|
|
1473
|
+
"tags": json.loads(row["tags"]) if row["tags"] else None,
|
|
1474
|
+
"importance": row["importance"],
|
|
1475
|
+
"created_at": row["created_at"],
|
|
1476
|
+
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
|
|
1477
|
+
})
|
|
1478
|
+
|
|
1479
|
+
# Sort by similarity * importance for better ranking
|
|
1480
|
+
results.sort(key=lambda x: x["similarity"] * (x["importance"] / 10), reverse=True)
|
|
1481
|
+
|
|
1482
|
+
# Update last_accessed for returned results
|
|
1483
|
+
if results:
|
|
1484
|
+
ids = [r["id"] for r in results[:limit]]
|
|
1485
|
+
cursor.execute(
|
|
1486
|
+
f"UPDATE memories SET last_accessed = datetime('now') WHERE id IN ({','.join('?' * len(ids))})",
|
|
1487
|
+
ids
|
|
1488
|
+
)
|
|
1489
|
+
self.conn.commit()
|
|
1490
|
+
|
|
1491
|
+
return results[:limit]
|
|
1492
|
+
|
|
1493
|
+
async def keyword_search(
|
|
1494
|
+
self,
|
|
1495
|
+
query: str,
|
|
1496
|
+
limit: int = 10,
|
|
1497
|
+
memory_type: Optional[str] = None,
|
|
1498
|
+
session_id: Optional[str] = None,
|
|
1499
|
+
project_path: Optional[str] = None,
|
|
1500
|
+
agent_type: Optional[str] = None,
|
|
1501
|
+
success_only: bool = False
|
|
1502
|
+
) -> List[Dict[str, Any]]:
|
|
1503
|
+
"""Fallback keyword search when embeddings are unavailable.
|
|
1504
|
+
|
|
1505
|
+
Uses SQLite FTS-like matching with LIKE queries on content.
|
|
1506
|
+
Results are ranked by keyword match count and importance.
|
|
1507
|
+
"""
|
|
1508
|
+
# Normalize project path for consistent matching
|
|
1509
|
+
project_path = normalize_path(project_path)
|
|
1510
|
+
|
|
1511
|
+
cursor = self.conn.cursor()
|
|
1512
|
+
|
|
1513
|
+
# Extract keywords from query (simple tokenization)
|
|
1514
|
+
keywords = [k.strip().lower() for k in query.split() if len(k.strip()) >= 3]
|
|
1515
|
+
if not keywords:
|
|
1516
|
+
keywords = [query.lower()]
|
|
1517
|
+
|
|
1518
|
+
# Build query with keyword matching
|
|
1519
|
+
sql = """
|
|
1520
|
+
SELECT id, type, content, metadata,
|
|
1521
|
+
project_path, project_name, project_type, tech_stack,
|
|
1522
|
+
session_id, chat_id, agent_type, skill_used, tools_used,
|
|
1523
|
+
outcome, success, tags, importance, created_at
|
|
1524
|
+
FROM memories WHERE 1=1
|
|
1525
|
+
"""
|
|
1526
|
+
params = []
|
|
1527
|
+
|
|
1528
|
+
if memory_type:
|
|
1529
|
+
sql += " AND type = ?"
|
|
1530
|
+
params.append(memory_type)
|
|
1531
|
+
if session_id:
|
|
1532
|
+
sql += " AND session_id = ?"
|
|
1533
|
+
params.append(session_id)
|
|
1534
|
+
if project_path:
|
|
1535
|
+
sql += " AND project_path = ?"
|
|
1536
|
+
params.append(project_path)
|
|
1537
|
+
if agent_type:
|
|
1538
|
+
sql += " AND agent_type = ?"
|
|
1539
|
+
params.append(agent_type)
|
|
1540
|
+
if success_only:
|
|
1541
|
+
sql += " AND success = 1"
|
|
1542
|
+
|
|
1543
|
+
# Add keyword conditions (OR matching)
|
|
1544
|
+
keyword_conditions = []
|
|
1545
|
+
for kw in keywords:
|
|
1546
|
+
keyword_conditions.append("LOWER(content) LIKE ?")
|
|
1547
|
+
params.append(f"%{kw}%")
|
|
1548
|
+
|
|
1549
|
+
if keyword_conditions:
|
|
1550
|
+
sql += f" AND ({' OR '.join(keyword_conditions)})"
|
|
1551
|
+
|
|
1552
|
+
sql += " ORDER BY importance DESC, created_at DESC"
|
|
1553
|
+
sql += f" LIMIT {limit * 3}" # Get more for ranking
|
|
1554
|
+
|
|
1555
|
+
cursor.execute(sql, params)
|
|
1556
|
+
rows = cursor.fetchall()
|
|
1557
|
+
|
|
1558
|
+
results = []
|
|
1559
|
+
for row in rows:
|
|
1560
|
+
content_lower = row["content"].lower()
|
|
1561
|
+
# Calculate keyword match score
|
|
1562
|
+
match_count = sum(1 for kw in keywords if kw in content_lower)
|
|
1563
|
+
keyword_score = match_count / len(keywords) if keywords else 0
|
|
1564
|
+
|
|
1565
|
+
results.append({
|
|
1566
|
+
"id": row["id"],
|
|
1567
|
+
"type": row["type"],
|
|
1568
|
+
"content": row["content"],
|
|
1569
|
+
"similarity": keyword_score, # Use keyword score as pseudo-similarity
|
|
1570
|
+
"match_type": "keyword",
|
|
1571
|
+
"keywords_matched": match_count,
|
|
1572
|
+
"project": {
|
|
1573
|
+
"path": row["project_path"],
|
|
1574
|
+
"name": row["project_name"],
|
|
1575
|
+
"type": row["project_type"],
|
|
1576
|
+
"tech_stack": json.loads(row["tech_stack"]) if row["tech_stack"] else None
|
|
1577
|
+
},
|
|
1578
|
+
"session_id": row["session_id"],
|
|
1579
|
+
"agent": {
|
|
1580
|
+
"type": row["agent_type"],
|
|
1581
|
+
"skill": row["skill_used"],
|
|
1582
|
+
"tools": json.loads(row["tools_used"]) if row["tools_used"] else None
|
|
1583
|
+
},
|
|
1584
|
+
"outcome": row["outcome"],
|
|
1585
|
+
"success": bool(row["success"]) if row["success"] is not None else None,
|
|
1586
|
+
"tags": json.loads(row["tags"]) if row["tags"] else None,
|
|
1587
|
+
"importance": row["importance"],
|
|
1588
|
+
"created_at": row["created_at"],
|
|
1589
|
+
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
|
|
1590
|
+
})
|
|
1591
|
+
|
|
1592
|
+
# Sort by keyword score * importance
|
|
1593
|
+
results.sort(key=lambda x: x["similarity"] * (x["importance"] / 10), reverse=True)
|
|
1594
|
+
|
|
1595
|
+
# Update last_accessed for returned results
|
|
1596
|
+
if results:
|
|
1597
|
+
ids = [r["id"] for r in results[:limit]]
|
|
1598
|
+
cursor.execute(
|
|
1599
|
+
f"UPDATE memories SET last_accessed = datetime('now') WHERE id IN ({','.join('?' * len(ids))})",
|
|
1600
|
+
ids
|
|
1601
|
+
)
|
|
1602
|
+
self.conn.commit()
|
|
1603
|
+
|
|
1604
|
+
return results[:limit]
|
|
1605
|
+
|
|
1606
|
+
async def store_project(
|
|
1607
|
+
self,
|
|
1608
|
+
path: str,
|
|
1609
|
+
name: Optional[str] = None,
|
|
1610
|
+
project_type: Optional[str] = None,
|
|
1611
|
+
tech_stack: Optional[List[str]] = None,
|
|
1612
|
+
conventions: Optional[Dict[str, Any]] = None,
|
|
1613
|
+
preferences: Optional[Dict[str, Any]] = None
|
|
1614
|
+
) -> int:
|
|
1615
|
+
"""Store or update project information."""
|
|
1616
|
+
# Normalize path to prevent duplicates
|
|
1617
|
+
path = normalize_path(path)
|
|
1618
|
+
|
|
1619
|
+
cursor = self.conn.cursor()
|
|
1620
|
+
cursor.execute(
|
|
1621
|
+
"""
|
|
1622
|
+
INSERT INTO projects (path, name, type, tech_stack, conventions, preferences)
|
|
1623
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
1624
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
1625
|
+
name = excluded.name,
|
|
1626
|
+
type = excluded.type,
|
|
1627
|
+
tech_stack = excluded.tech_stack,
|
|
1628
|
+
conventions = excluded.conventions,
|
|
1629
|
+
preferences = excluded.preferences,
|
|
1630
|
+
updated_at = datetime('now')
|
|
1631
|
+
""",
|
|
1632
|
+
(
|
|
1633
|
+
path,
|
|
1634
|
+
name,
|
|
1635
|
+
project_type,
|
|
1636
|
+
json.dumps(tech_stack) if tech_stack else None,
|
|
1637
|
+
json.dumps(conventions) if conventions else None,
|
|
1638
|
+
json.dumps(preferences) if preferences else None
|
|
1639
|
+
)
|
|
1640
|
+
)
|
|
1641
|
+
self.conn.commit()
|
|
1642
|
+
return cursor.lastrowid
|
|
1643
|
+
|
|
1644
|
+
async def get_project(self, path: str) -> Optional[Dict[str, Any]]:
|
|
1645
|
+
"""Get project information."""
|
|
1646
|
+
# Normalize path for consistent matching
|
|
1647
|
+
path = normalize_path(path)
|
|
1648
|
+
|
|
1649
|
+
cursor = self.conn.cursor()
|
|
1650
|
+
cursor.execute("SELECT * FROM projects WHERE path = ?", (path,))
|
|
1651
|
+
row = cursor.fetchone()
|
|
1652
|
+
if row:
|
|
1653
|
+
return {
|
|
1654
|
+
"id": row["id"],
|
|
1655
|
+
"path": row["path"],
|
|
1656
|
+
"name": row["name"],
|
|
1657
|
+
"type": row["type"],
|
|
1658
|
+
"tech_stack": json.loads(row["tech_stack"]) if row["tech_stack"] else None,
|
|
1659
|
+
"conventions": json.loads(row["conventions"]) if row["conventions"] else None,
|
|
1660
|
+
"preferences": json.loads(row["preferences"]) if row["preferences"] else None
|
|
1661
|
+
}
|
|
1662
|
+
return None
|
|
1663
|
+
|
|
1664
|
+
async def store_pattern(
|
|
1665
|
+
self,
|
|
1666
|
+
name: str,
|
|
1667
|
+
solution: str,
|
|
1668
|
+
embedding: List[float],
|
|
1669
|
+
problem_type: Optional[str] = None,
|
|
1670
|
+
tech_context: Optional[List[str]] = None,
|
|
1671
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
1672
|
+
) -> int:
|
|
1673
|
+
"""Store a reusable pattern/solution."""
|
|
1674
|
+
cursor = self.conn.cursor()
|
|
1675
|
+
cursor.execute(
|
|
1676
|
+
"""
|
|
1677
|
+
INSERT INTO patterns (name, problem_type, solution, embedding, tech_context, metadata)
|
|
1678
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
1679
|
+
""",
|
|
1680
|
+
(
|
|
1681
|
+
name,
|
|
1682
|
+
problem_type,
|
|
1683
|
+
solution,
|
|
1684
|
+
self._serialize_embedding(embedding),
|
|
1685
|
+
json.dumps(tech_context) if tech_context else None,
|
|
1686
|
+
json.dumps(metadata or {})
|
|
1687
|
+
)
|
|
1688
|
+
)
|
|
1689
|
+
self.conn.commit()
|
|
1690
|
+
return cursor.lastrowid
|
|
1691
|
+
|
|
1692
|
+
async def search_patterns(
|
|
1693
|
+
self,
|
|
1694
|
+
embedding: List[float],
|
|
1695
|
+
limit: int = 5,
|
|
1696
|
+
problem_type: Optional[str] = None,
|
|
1697
|
+
threshold: float = 0.5
|
|
1698
|
+
) -> List[Dict[str, Any]]:
|
|
1699
|
+
"""Search for similar patterns."""
|
|
1700
|
+
cursor = self.conn.cursor()
|
|
1701
|
+
|
|
1702
|
+
query = "SELECT * FROM patterns WHERE 1=1"
|
|
1703
|
+
params = []
|
|
1704
|
+
if problem_type:
|
|
1705
|
+
query += " AND problem_type = ?"
|
|
1706
|
+
params.append(problem_type)
|
|
1707
|
+
|
|
1708
|
+
cursor.execute(query, params)
|
|
1709
|
+
rows = cursor.fetchall()
|
|
1710
|
+
|
|
1711
|
+
results = []
|
|
1712
|
+
for row in rows:
|
|
1713
|
+
stored_embedding = self._deserialize_embedding(row["embedding"])
|
|
1714
|
+
if stored_embedding:
|
|
1715
|
+
similarity = self._cosine_similarity(embedding, stored_embedding)
|
|
1716
|
+
if similarity >= threshold:
|
|
1717
|
+
# Weight by success rate
|
|
1718
|
+
total = row["success_count"] + row["failure_count"]
|
|
1719
|
+
success_rate = row["success_count"] / total if total > 0 else 0.5
|
|
1720
|
+
|
|
1721
|
+
results.append({
|
|
1722
|
+
"id": row["id"],
|
|
1723
|
+
"name": row["name"],
|
|
1724
|
+
"problem_type": row["problem_type"],
|
|
1725
|
+
"solution": row["solution"],
|
|
1726
|
+
"tech_context": json.loads(row["tech_context"]) if row["tech_context"] else None,
|
|
1727
|
+
"similarity": similarity,
|
|
1728
|
+
"success_rate": success_rate,
|
|
1729
|
+
"score": similarity * success_rate
|
|
1730
|
+
})
|
|
1731
|
+
|
|
1732
|
+
results.sort(key=lambda x: x["score"], reverse=True)
|
|
1733
|
+
return results[:limit]
|
|
1734
|
+
|
|
1735
|
+
async def keyword_search_patterns(
|
|
1736
|
+
self,
|
|
1737
|
+
query: str,
|
|
1738
|
+
limit: int = 5,
|
|
1739
|
+
problem_type: Optional[str] = None
|
|
1740
|
+
) -> List[Dict[str, Any]]:
|
|
1741
|
+
"""Fallback keyword search for patterns when embeddings unavailable."""
|
|
1742
|
+
cursor = self.conn.cursor()
|
|
1743
|
+
|
|
1744
|
+
# Extract keywords from query
|
|
1745
|
+
keywords = [k.strip().lower() for k in query.split() if len(k.strip()) >= 3]
|
|
1746
|
+
if not keywords:
|
|
1747
|
+
keywords = [query.lower()]
|
|
1748
|
+
|
|
1749
|
+
sql = "SELECT * FROM patterns WHERE 1=1"
|
|
1750
|
+
params = []
|
|
1751
|
+
|
|
1752
|
+
if problem_type:
|
|
1753
|
+
sql += " AND problem_type = ?"
|
|
1754
|
+
params.append(problem_type)
|
|
1755
|
+
|
|
1756
|
+
# Add keyword conditions
|
|
1757
|
+
keyword_conditions = []
|
|
1758
|
+
for kw in keywords:
|
|
1759
|
+
keyword_conditions.append("(LOWER(name) LIKE ? OR LOWER(solution) LIKE ?)")
|
|
1760
|
+
params.append(f"%{kw}%")
|
|
1761
|
+
params.append(f"%{kw}%")
|
|
1762
|
+
|
|
1763
|
+
if keyword_conditions:
|
|
1764
|
+
sql += f" AND ({' OR '.join(keyword_conditions)})"
|
|
1765
|
+
|
|
1766
|
+
sql += " ORDER BY success_count DESC, created_at DESC"
|
|
1767
|
+
sql += f" LIMIT {limit * 2}"
|
|
1768
|
+
|
|
1769
|
+
cursor.execute(sql, params)
|
|
1770
|
+
rows = cursor.fetchall()
|
|
1771
|
+
|
|
1772
|
+
results = []
|
|
1773
|
+
for row in rows:
|
|
1774
|
+
# Calculate keyword match score
|
|
1775
|
+
combined_text = f"{row['name']} {row['solution']}".lower()
|
|
1776
|
+
match_count = sum(1 for kw in keywords if kw in combined_text)
|
|
1777
|
+
keyword_score = match_count / len(keywords) if keywords else 0
|
|
1778
|
+
|
|
1779
|
+
total = row["success_count"] + row["failure_count"]
|
|
1780
|
+
success_rate = row["success_count"] / total if total > 0 else 0.5
|
|
1781
|
+
|
|
1782
|
+
results.append({
|
|
1783
|
+
"id": row["id"],
|
|
1784
|
+
"name": row["name"],
|
|
1785
|
+
"problem_type": row["problem_type"],
|
|
1786
|
+
"solution": row["solution"],
|
|
1787
|
+
"tech_context": json.loads(row["tech_context"]) if row["tech_context"] else None,
|
|
1788
|
+
"similarity": keyword_score,
|
|
1789
|
+
"match_type": "keyword",
|
|
1790
|
+
"keywords_matched": match_count,
|
|
1791
|
+
"success_rate": success_rate,
|
|
1792
|
+
"score": keyword_score * success_rate
|
|
1793
|
+
})
|
|
1794
|
+
|
|
1795
|
+
results.sort(key=lambda x: x["score"], reverse=True)
|
|
1796
|
+
return results[:limit]
|
|
1797
|
+
|
|
1798
|
+
async def update_pattern_outcome(self, pattern_id: int, success: bool):
|
|
1799
|
+
"""Update pattern success/failure count."""
|
|
1800
|
+
cursor = self.conn.cursor()
|
|
1801
|
+
if success:
|
|
1802
|
+
cursor.execute("UPDATE patterns SET success_count = success_count + 1 WHERE id = ?", (pattern_id,))
|
|
1803
|
+
else:
|
|
1804
|
+
cursor.execute("UPDATE patterns SET failure_count = failure_count + 1 WHERE id = ?", (pattern_id,))
|
|
1805
|
+
self.conn.commit()
|
|
1806
|
+
|
|
1807
|
+
async def get_memory(self, memory_id: int) -> Optional[Dict[str, Any]]:
|
|
1808
|
+
"""Retrieve a specific memory by ID."""
|
|
1809
|
+
cursor = self.conn.cursor()
|
|
1810
|
+
cursor.execute("SELECT * FROM memories WHERE id = ?", (memory_id,))
|
|
1811
|
+
row = cursor.fetchone()
|
|
1812
|
+
if row:
|
|
1813
|
+
return {
|
|
1814
|
+
"id": row["id"],
|
|
1815
|
+
"type": row["type"],
|
|
1816
|
+
"content": row["content"],
|
|
1817
|
+
"project": {
|
|
1818
|
+
"path": row["project_path"],
|
|
1819
|
+
"name": row["project_name"],
|
|
1820
|
+
"type": row["project_type"]
|
|
1821
|
+
},
|
|
1822
|
+
"session_id": row["session_id"],
|
|
1823
|
+
"agent_type": row["agent_type"],
|
|
1824
|
+
"skill_used": row["skill_used"],
|
|
1825
|
+
"outcome": row["outcome"],
|
|
1826
|
+
"success": bool(row["success"]) if row["success"] is not None else None,
|
|
1827
|
+
"importance": row["importance"],
|
|
1828
|
+
"created_at": row["created_at"],
|
|
1829
|
+
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
|
|
1830
|
+
}
|
|
1831
|
+
return None
|
|
1832
|
+
|
|
1833
|
+
async def get_memories_by_type(
|
|
1834
|
+
self,
|
|
1835
|
+
memory_type: str,
|
|
1836
|
+
limit: int = 50,
|
|
1837
|
+
session_id: Optional[str] = None,
|
|
1838
|
+
project_path: Optional[str] = None
|
|
1839
|
+
) -> List[Dict[str, Any]]:
|
|
1840
|
+
"""Retrieve memories by type."""
|
|
1841
|
+
# Normalize project path for consistent matching
|
|
1842
|
+
project_path = normalize_path(project_path)
|
|
1843
|
+
|
|
1844
|
+
cursor = self.conn.cursor()
|
|
1845
|
+
|
|
1846
|
+
query = "SELECT * FROM memories WHERE type = ?"
|
|
1847
|
+
params = [memory_type]
|
|
1848
|
+
|
|
1849
|
+
if session_id:
|
|
1850
|
+
query += " AND session_id = ?"
|
|
1851
|
+
params.append(session_id)
|
|
1852
|
+
if project_path:
|
|
1853
|
+
query += " AND project_path = ?"
|
|
1854
|
+
params.append(project_path)
|
|
1855
|
+
|
|
1856
|
+
query += " ORDER BY importance DESC, created_at DESC LIMIT ?"
|
|
1857
|
+
params.append(limit)
|
|
1858
|
+
|
|
1859
|
+
cursor.execute(query, params)
|
|
1860
|
+
rows = cursor.fetchall()
|
|
1861
|
+
|
|
1862
|
+
return [
|
|
1863
|
+
{
|
|
1864
|
+
"id": row["id"],
|
|
1865
|
+
"type": row["type"],
|
|
1866
|
+
"content": row["content"],
|
|
1867
|
+
"project_path": row["project_path"],
|
|
1868
|
+
"session_id": row["session_id"],
|
|
1869
|
+
"importance": row["importance"],
|
|
1870
|
+
"created_at": row["created_at"]
|
|
1871
|
+
}
|
|
1872
|
+
for row in rows
|
|
1873
|
+
]
|
|
1874
|
+
|
|
1875
|
+
async def delete_memory(self, memory_id: int) -> bool:
|
|
1876
|
+
cursor = self.conn.cursor()
|
|
1877
|
+
cursor.execute("DELETE FROM memories WHERE id = ?", (memory_id,))
|
|
1878
|
+
self.conn.commit()
|
|
1879
|
+
return cursor.rowcount > 0
|
|
1880
|
+
|
|
1881
|
+
async def get_stats(self) -> Dict[str, Any]:
|
|
1882
|
+
"""Get comprehensive memory statistics."""
|
|
1883
|
+
cursor = self.conn.cursor()
|
|
1884
|
+
|
|
1885
|
+
cursor.execute("SELECT COUNT(*) as total FROM memories")
|
|
1886
|
+
total = cursor.fetchone()["total"]
|
|
1887
|
+
|
|
1888
|
+
cursor.execute("SELECT type, COUNT(*) as count FROM memories GROUP BY type")
|
|
1889
|
+
by_type = {row["type"]: row["count"] for row in cursor.fetchall()}
|
|
1890
|
+
|
|
1891
|
+
cursor.execute("SELECT project_path, COUNT(*) as count FROM memories WHERE project_path IS NOT NULL GROUP BY project_path")
|
|
1892
|
+
by_project = {row["project_path"]: row["count"] for row in cursor.fetchall()}
|
|
1893
|
+
|
|
1894
|
+
cursor.execute("SELECT agent_type, COUNT(*) as count FROM memories WHERE agent_type IS NOT NULL GROUP BY agent_type")
|
|
1895
|
+
by_agent = {row["agent_type"]: row["count"] for row in cursor.fetchall()}
|
|
1896
|
+
|
|
1897
|
+
cursor.execute("SELECT COUNT(*) as count FROM patterns")
|
|
1898
|
+
patterns_count = cursor.fetchone()["count"]
|
|
1899
|
+
|
|
1900
|
+
cursor.execute("SELECT COUNT(*) as count FROM projects")
|
|
1901
|
+
projects_count = cursor.fetchone()["count"]
|
|
1902
|
+
|
|
1903
|
+
return {
|
|
1904
|
+
"total_memories": total,
|
|
1905
|
+
"by_type": by_type,
|
|
1906
|
+
"by_project": by_project,
|
|
1907
|
+
"by_agent": by_agent,
|
|
1908
|
+
"patterns_count": patterns_count,
|
|
1909
|
+
"projects_count": projects_count,
|
|
1910
|
+
"database": self.db_path
|
|
1911
|
+
}
|
|
1912
|
+
|
|
1913
|
+
# ============================================================
|
|
1914
|
+
# TIMELINE METHODS
|
|
1915
|
+
# ============================================================
|
|
1916
|
+
|
|
1917
|
+
async def get_next_sequence_num(self, session_id: str) -> int:
|
|
1918
|
+
"""Get the next sequence number for a session."""
|
|
1919
|
+
cursor = self.conn.cursor()
|
|
1920
|
+
cursor.execute(
|
|
1921
|
+
"SELECT MAX(sequence_num) as max_seq FROM timeline_events WHERE session_id = ?",
|
|
1922
|
+
(session_id,)
|
|
1923
|
+
)
|
|
1924
|
+
row = cursor.fetchone()
|
|
1925
|
+
return (row["max_seq"] or 0) + 1
|
|
1926
|
+
|
|
1927
|
+
async def store_timeline_event(
|
|
1928
|
+
self,
|
|
1929
|
+
session_id: str,
|
|
1930
|
+
event_type: str,
|
|
1931
|
+
summary: str,
|
|
1932
|
+
details: Optional[str] = None,
|
|
1933
|
+
embedding: Optional[List[float]] = None,
|
|
1934
|
+
project_path: Optional[str] = None,
|
|
1935
|
+
parent_event_id: Optional[int] = None,
|
|
1936
|
+
root_event_id: Optional[int] = None,
|
|
1937
|
+
entities: Optional[Dict[str, List[str]]] = None,
|
|
1938
|
+
status: str = "completed",
|
|
1939
|
+
outcome: Optional[str] = None,
|
|
1940
|
+
confidence: Optional[float] = None,
|
|
1941
|
+
is_anchor: bool = False
|
|
1942
|
+
) -> int:
|
|
1943
|
+
"""Store a timeline event."""
|
|
1944
|
+
# Normalize project path to prevent duplicates
|
|
1945
|
+
project_path = normalize_path(project_path)
|
|
1946
|
+
|
|
1947
|
+
cursor = self.conn.cursor()
|
|
1948
|
+
|
|
1949
|
+
# Get next sequence number
|
|
1950
|
+
sequence_num = await self.get_next_sequence_num(session_id)
|
|
1951
|
+
|
|
1952
|
+
cursor.execute(
|
|
1953
|
+
"""
|
|
1954
|
+
INSERT INTO timeline_events (
|
|
1955
|
+
session_id, project_path, event_type, sequence_num,
|
|
1956
|
+
summary, details, embedding,
|
|
1957
|
+
parent_event_id, root_event_id, entities,
|
|
1958
|
+
status, outcome, confidence, is_anchor
|
|
1959
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1960
|
+
""",
|
|
1961
|
+
(
|
|
1962
|
+
session_id,
|
|
1963
|
+
project_path,
|
|
1964
|
+
event_type,
|
|
1965
|
+
sequence_num,
|
|
1966
|
+
summary[:200] if summary else "", # Limit summary length
|
|
1967
|
+
details,
|
|
1968
|
+
self._serialize_embedding(embedding) if embedding else None,
|
|
1969
|
+
parent_event_id,
|
|
1970
|
+
root_event_id,
|
|
1971
|
+
json.dumps(entities) if entities else None,
|
|
1972
|
+
status,
|
|
1973
|
+
outcome,
|
|
1974
|
+
confidence,
|
|
1975
|
+
1 if is_anchor else 0
|
|
1976
|
+
)
|
|
1977
|
+
)
|
|
1978
|
+
self.conn.commit()
|
|
1979
|
+
|
|
1980
|
+
# Update session state events counter
|
|
1981
|
+
await self._increment_events_since_checkpoint(session_id)
|
|
1982
|
+
|
|
1983
|
+
return cursor.lastrowid
|
|
1984
|
+
|
|
1985
|
+
async def get_timeline_events(
|
|
1986
|
+
self,
|
|
1987
|
+
session_id: str,
|
|
1988
|
+
limit: int = 20,
|
|
1989
|
+
event_type: Optional[str] = None,
|
|
1990
|
+
since_event_id: Optional[int] = None,
|
|
1991
|
+
anchors_only: bool = False
|
|
1992
|
+
) -> List[Dict[str, Any]]:
|
|
1993
|
+
"""Get timeline events for a session."""
|
|
1994
|
+
cursor = self.conn.cursor()
|
|
1995
|
+
|
|
1996
|
+
query = "SELECT * FROM timeline_events WHERE session_id = ?"
|
|
1997
|
+
params = [session_id]
|
|
1998
|
+
|
|
1999
|
+
if event_type:
|
|
2000
|
+
query += " AND event_type = ?"
|
|
2001
|
+
params.append(event_type)
|
|
2002
|
+
|
|
2003
|
+
if since_event_id:
|
|
2004
|
+
query += " AND id > ?"
|
|
2005
|
+
params.append(since_event_id)
|
|
2006
|
+
|
|
2007
|
+
if anchors_only:
|
|
2008
|
+
query += " AND is_anchor = 1"
|
|
2009
|
+
|
|
2010
|
+
query += " ORDER BY sequence_num DESC LIMIT ?"
|
|
2011
|
+
params.append(limit)
|
|
2012
|
+
|
|
2013
|
+
cursor.execute(query, params)
|
|
2014
|
+
rows = cursor.fetchall()
|
|
2015
|
+
|
|
2016
|
+
return [
|
|
2017
|
+
{
|
|
2018
|
+
"id": row["id"],
|
|
2019
|
+
"session_id": row["session_id"],
|
|
2020
|
+
"event_type": row["event_type"],
|
|
2021
|
+
"sequence_num": row["sequence_num"],
|
|
2022
|
+
"summary": row["summary"],
|
|
2023
|
+
"details": row["details"],
|
|
2024
|
+
"parent_event_id": row["parent_event_id"],
|
|
2025
|
+
"root_event_id": row["root_event_id"],
|
|
2026
|
+
"entities": json.loads(row["entities"]) if row["entities"] else None,
|
|
2027
|
+
"status": row["status"],
|
|
2028
|
+
"outcome": row["outcome"],
|
|
2029
|
+
"confidence": row["confidence"],
|
|
2030
|
+
"is_anchor": bool(row["is_anchor"]),
|
|
2031
|
+
"created_at": row["created_at"]
|
|
2032
|
+
}
|
|
2033
|
+
for row in rows
|
|
2034
|
+
]
|
|
2035
|
+
|
|
2036
|
+
async def search_timeline_events(
|
|
2037
|
+
self,
|
|
2038
|
+
embedding: List[float],
|
|
2039
|
+
session_id: Optional[str] = None,
|
|
2040
|
+
limit: int = 10,
|
|
2041
|
+
threshold: float = 0.5
|
|
2042
|
+
) -> List[Dict[str, Any]]:
|
|
2043
|
+
"""Semantic search across timeline events."""
|
|
2044
|
+
cursor = self.conn.cursor()
|
|
2045
|
+
|
|
2046
|
+
query = "SELECT * FROM timeline_events WHERE embedding IS NOT NULL"
|
|
2047
|
+
params = []
|
|
2048
|
+
|
|
2049
|
+
if session_id:
|
|
2050
|
+
query += " AND session_id = ?"
|
|
2051
|
+
params.append(session_id)
|
|
2052
|
+
|
|
2053
|
+
cursor.execute(query, params)
|
|
2054
|
+
rows = cursor.fetchall()
|
|
2055
|
+
|
|
2056
|
+
results = []
|
|
2057
|
+
for row in rows:
|
|
2058
|
+
stored_embedding = self._deserialize_embedding(row["embedding"])
|
|
2059
|
+
if stored_embedding:
|
|
2060
|
+
similarity = self._cosine_similarity(embedding, stored_embedding)
|
|
2061
|
+
if similarity >= threshold:
|
|
2062
|
+
results.append({
|
|
2063
|
+
"id": row["id"],
|
|
2064
|
+
"session_id": row["session_id"],
|
|
2065
|
+
"event_type": row["event_type"],
|
|
2066
|
+
"sequence_num": row["sequence_num"],
|
|
2067
|
+
"summary": row["summary"],
|
|
2068
|
+
"details": row["details"],
|
|
2069
|
+
"similarity": similarity,
|
|
2070
|
+
"is_anchor": bool(row["is_anchor"]),
|
|
2071
|
+
"created_at": row["created_at"]
|
|
2072
|
+
})
|
|
2073
|
+
|
|
2074
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
2075
|
+
return results[:limit]
|
|
2076
|
+
|
|
2077
|
+
# ============================================================
|
|
2078
|
+
# SESSION STATE METHODS
|
|
2079
|
+
# ============================================================
|
|
2080
|
+
|
|
2081
|
+
async def get_or_create_session_state(
|
|
2082
|
+
self,
|
|
2083
|
+
session_id: str,
|
|
2084
|
+
project_path: Optional[str] = None
|
|
2085
|
+
) -> Dict[str, Any]:
|
|
2086
|
+
"""Get or create session state."""
|
|
2087
|
+
# Normalize project path to prevent duplicates
|
|
2088
|
+
project_path = normalize_path(project_path)
|
|
2089
|
+
|
|
2090
|
+
cursor = self.conn.cursor()
|
|
2091
|
+
|
|
2092
|
+
cursor.execute("SELECT * FROM session_state WHERE session_id = ?", (session_id,))
|
|
2093
|
+
row = cursor.fetchone()
|
|
2094
|
+
|
|
2095
|
+
if row:
|
|
2096
|
+
return {
|
|
2097
|
+
"id": row["id"],
|
|
2098
|
+
"session_id": row["session_id"],
|
|
2099
|
+
"project_path": row["project_path"],
|
|
2100
|
+
"current_goal": row["current_goal"],
|
|
2101
|
+
"pending_questions": json.loads(row["pending_questions"]) if row["pending_questions"] else [],
|
|
2102
|
+
"entity_registry": json.loads(row["entity_registry"]) if row["entity_registry"] else {},
|
|
2103
|
+
"decisions_summary": row["decisions_summary"],
|
|
2104
|
+
"last_checkpoint_id": row["last_checkpoint_id"],
|
|
2105
|
+
"events_since_checkpoint": row["events_since_checkpoint"],
|
|
2106
|
+
"created_at": row["created_at"],
|
|
2107
|
+
"updated_at": row["updated_at"],
|
|
2108
|
+
"last_activity_at": row["last_activity_at"]
|
|
2109
|
+
}
|
|
2110
|
+
|
|
2111
|
+
# Create new session state
|
|
2112
|
+
cursor.execute(
|
|
2113
|
+
"""
|
|
2114
|
+
INSERT INTO session_state (session_id, project_path)
|
|
2115
|
+
VALUES (?, ?)
|
|
2116
|
+
""",
|
|
2117
|
+
(session_id, project_path)
|
|
2118
|
+
)
|
|
2119
|
+
self.conn.commit()
|
|
2120
|
+
|
|
2121
|
+
return {
|
|
2122
|
+
"id": cursor.lastrowid,
|
|
2123
|
+
"session_id": session_id,
|
|
2124
|
+
"project_path": project_path,
|
|
2125
|
+
"current_goal": None,
|
|
2126
|
+
"pending_questions": [],
|
|
2127
|
+
"entity_registry": {},
|
|
2128
|
+
"decisions_summary": None,
|
|
2129
|
+
"last_checkpoint_id": None,
|
|
2130
|
+
"events_since_checkpoint": 0,
|
|
2131
|
+
"created_at": datetime.now().isoformat(),
|
|
2132
|
+
"updated_at": datetime.now().isoformat(),
|
|
2133
|
+
"last_activity_at": datetime.now().isoformat()
|
|
2134
|
+
}
|
|
2135
|
+
|
|
2136
|
+
async def update_session_state(
|
|
2137
|
+
self,
|
|
2138
|
+
session_id: str,
|
|
2139
|
+
current_goal: Optional[str] = None,
|
|
2140
|
+
pending_questions: Optional[List[str]] = None,
|
|
2141
|
+
entity_registry: Optional[Dict[str, str]] = None,
|
|
2142
|
+
decisions_summary: Optional[str] = None,
|
|
2143
|
+
last_checkpoint_id: Optional[int] = None,
|
|
2144
|
+
reset_events_counter: bool = False
|
|
2145
|
+
) -> bool:
|
|
2146
|
+
"""Update session state fields."""
|
|
2147
|
+
cursor = self.conn.cursor()
|
|
2148
|
+
|
|
2149
|
+
# Build dynamic update
|
|
2150
|
+
updates = ["updated_at = datetime('now')", "last_activity_at = datetime('now')"]
|
|
2151
|
+
params = []
|
|
2152
|
+
|
|
2153
|
+
if current_goal is not None:
|
|
2154
|
+
updates.append("current_goal = ?")
|
|
2155
|
+
params.append(current_goal)
|
|
2156
|
+
|
|
2157
|
+
if pending_questions is not None:
|
|
2158
|
+
updates.append("pending_questions = ?")
|
|
2159
|
+
params.append(json.dumps(pending_questions))
|
|
2160
|
+
|
|
2161
|
+
if entity_registry is not None:
|
|
2162
|
+
updates.append("entity_registry = ?")
|
|
2163
|
+
params.append(json.dumps(entity_registry))
|
|
2164
|
+
|
|
2165
|
+
if decisions_summary is not None:
|
|
2166
|
+
updates.append("decisions_summary = ?")
|
|
2167
|
+
params.append(decisions_summary)
|
|
2168
|
+
|
|
2169
|
+
if last_checkpoint_id is not None:
|
|
2170
|
+
updates.append("last_checkpoint_id = ?")
|
|
2171
|
+
params.append(last_checkpoint_id)
|
|
2172
|
+
|
|
2173
|
+
if reset_events_counter:
|
|
2174
|
+
updates.append("events_since_checkpoint = 0")
|
|
2175
|
+
|
|
2176
|
+
params.append(session_id)
|
|
2177
|
+
|
|
2178
|
+
cursor.execute(
|
|
2179
|
+
f"UPDATE session_state SET {', '.join(updates)} WHERE session_id = ?",
|
|
2180
|
+
params
|
|
2181
|
+
)
|
|
2182
|
+
self.conn.commit()
|
|
2183
|
+
return cursor.rowcount > 0
|
|
2184
|
+
|
|
2185
|
+
async def _increment_events_since_checkpoint(self, session_id: str):
|
|
2186
|
+
"""Increment the events counter for a session."""
|
|
2187
|
+
cursor = self.conn.cursor()
|
|
2188
|
+
cursor.execute(
|
|
2189
|
+
"""
|
|
2190
|
+
UPDATE session_state
|
|
2191
|
+
SET events_since_checkpoint = events_since_checkpoint + 1,
|
|
2192
|
+
last_activity_at = datetime('now')
|
|
2193
|
+
WHERE session_id = ?
|
|
2194
|
+
""",
|
|
2195
|
+
(session_id,)
|
|
2196
|
+
)
|
|
2197
|
+
self.conn.commit()
|
|
2198
|
+
|
|
2199
|
+
async def get_latest_session_for_project(
|
|
2200
|
+
self,
|
|
2201
|
+
project_path: str
|
|
2202
|
+
) -> Optional[Dict[str, Any]]:
|
|
2203
|
+
"""Get the most recent session state for a project."""
|
|
2204
|
+
# Normalize project path for consistent matching
|
|
2205
|
+
project_path = normalize_path(project_path)
|
|
2206
|
+
|
|
2207
|
+
cursor = self.conn.cursor()
|
|
2208
|
+
cursor.execute(
|
|
2209
|
+
"""
|
|
2210
|
+
SELECT * FROM session_state
|
|
2211
|
+
WHERE project_path = ?
|
|
2212
|
+
ORDER BY last_activity_at DESC
|
|
2213
|
+
LIMIT 1
|
|
2214
|
+
""",
|
|
2215
|
+
(project_path,)
|
|
2216
|
+
)
|
|
2217
|
+
row = cursor.fetchone()
|
|
2218
|
+
|
|
2219
|
+
if row:
|
|
2220
|
+
return {
|
|
2221
|
+
"id": row["id"],
|
|
2222
|
+
"session_id": row["session_id"],
|
|
2223
|
+
"project_path": row["project_path"],
|
|
2224
|
+
"current_goal": row["current_goal"],
|
|
2225
|
+
"pending_questions": json.loads(row["pending_questions"]) if row["pending_questions"] else [],
|
|
2226
|
+
"entity_registry": json.loads(row["entity_registry"]) if row["entity_registry"] else {},
|
|
2227
|
+
"decisions_summary": row["decisions_summary"],
|
|
2228
|
+
"last_checkpoint_id": row["last_checkpoint_id"],
|
|
2229
|
+
"events_since_checkpoint": row["events_since_checkpoint"],
|
|
2230
|
+
"last_activity_at": row["last_activity_at"]
|
|
2231
|
+
}
|
|
2232
|
+
return None
|
|
2233
|
+
|
|
2234
|
+
# ============================================================
|
|
2235
|
+
# CHECKPOINT METHODS
|
|
2236
|
+
# ============================================================
|
|
2237
|
+
|
|
2238
|
+
async def store_checkpoint(
|
|
2239
|
+
self,
|
|
2240
|
+
session_id: str,
|
|
2241
|
+
summary: str,
|
|
2242
|
+
event_id: Optional[int] = None,
|
|
2243
|
+
key_facts: Optional[List[str]] = None,
|
|
2244
|
+
decisions: Optional[List[str]] = None,
|
|
2245
|
+
entities: Optional[Dict[str, str]] = None,
|
|
2246
|
+
current_goal: Optional[str] = None,
|
|
2247
|
+
pending_items: Optional[List[str]] = None,
|
|
2248
|
+
embedding: Optional[List[float]] = None,
|
|
2249
|
+
event_count: Optional[int] = None
|
|
2250
|
+
) -> int:
|
|
2251
|
+
"""Store a checkpoint."""
|
|
2252
|
+
cursor = self.conn.cursor()
|
|
2253
|
+
|
|
2254
|
+
cursor.execute(
|
|
2255
|
+
"""
|
|
2256
|
+
INSERT INTO checkpoints (
|
|
2257
|
+
session_id, event_id, summary, key_facts, decisions,
|
|
2258
|
+
entities, current_goal, pending_items, embedding, event_count
|
|
2259
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2260
|
+
""",
|
|
2261
|
+
(
|
|
2262
|
+
session_id,
|
|
2263
|
+
event_id,
|
|
2264
|
+
summary,
|
|
2265
|
+
json.dumps(key_facts) if key_facts else None,
|
|
2266
|
+
json.dumps(decisions) if decisions else None,
|
|
2267
|
+
json.dumps(entities) if entities else None,
|
|
2268
|
+
current_goal,
|
|
2269
|
+
json.dumps(pending_items) if pending_items else None,
|
|
2270
|
+
self._serialize_embedding(embedding) if embedding else None,
|
|
2271
|
+
event_count
|
|
2272
|
+
)
|
|
2273
|
+
)
|
|
2274
|
+
self.conn.commit()
|
|
2275
|
+
|
|
2276
|
+
checkpoint_id = cursor.lastrowid
|
|
2277
|
+
|
|
2278
|
+
# Update session state with new checkpoint
|
|
2279
|
+
await self.update_session_state(
|
|
2280
|
+
session_id,
|
|
2281
|
+
last_checkpoint_id=checkpoint_id,
|
|
2282
|
+
reset_events_counter=True
|
|
2283
|
+
)
|
|
2284
|
+
|
|
2285
|
+
return checkpoint_id
|
|
2286
|
+
|
|
2287
|
+
async def get_latest_checkpoint(
|
|
2288
|
+
self,
|
|
2289
|
+
session_id: str
|
|
2290
|
+
) -> Optional[Dict[str, Any]]:
|
|
2291
|
+
"""Get the latest checkpoint for a session."""
|
|
2292
|
+
cursor = self.conn.cursor()
|
|
2293
|
+
cursor.execute(
|
|
2294
|
+
"""
|
|
2295
|
+
SELECT * FROM checkpoints
|
|
2296
|
+
WHERE session_id = ?
|
|
2297
|
+
ORDER BY created_at DESC
|
|
2298
|
+
LIMIT 1
|
|
2299
|
+
""",
|
|
2300
|
+
(session_id,)
|
|
2301
|
+
)
|
|
2302
|
+
row = cursor.fetchone()
|
|
2303
|
+
|
|
2304
|
+
if row:
|
|
2305
|
+
return {
|
|
2306
|
+
"id": row["id"],
|
|
2307
|
+
"session_id": row["session_id"],
|
|
2308
|
+
"event_id": row["event_id"],
|
|
2309
|
+
"summary": row["summary"],
|
|
2310
|
+
"key_facts": json.loads(row["key_facts"]) if row["key_facts"] else [],
|
|
2311
|
+
"decisions": json.loads(row["decisions"]) if row["decisions"] else [],
|
|
2312
|
+
"entities": json.loads(row["entities"]) if row["entities"] else {},
|
|
2313
|
+
"current_goal": row["current_goal"],
|
|
2314
|
+
"pending_items": json.loads(row["pending_items"]) if row["pending_items"] else [],
|
|
2315
|
+
"event_count": row["event_count"],
|
|
2316
|
+
"created_at": row["created_at"]
|
|
2317
|
+
}
|
|
2318
|
+
return None
|
|
2319
|
+
|
|
2320
|
+
async def get_checkpoints_for_session(
|
|
2321
|
+
self,
|
|
2322
|
+
session_id: str,
|
|
2323
|
+
limit: int = 10
|
|
2324
|
+
) -> List[Dict[str, Any]]:
|
|
2325
|
+
"""Get all checkpoints for a session."""
|
|
2326
|
+
cursor = self.conn.cursor()
|
|
2327
|
+
cursor.execute(
|
|
2328
|
+
"""
|
|
2329
|
+
SELECT * FROM checkpoints
|
|
2330
|
+
WHERE session_id = ?
|
|
2331
|
+
ORDER BY created_at DESC
|
|
2332
|
+
LIMIT ?
|
|
2333
|
+
""",
|
|
2334
|
+
(session_id, limit)
|
|
2335
|
+
)
|
|
2336
|
+
|
|
2337
|
+
return [
|
|
2338
|
+
{
|
|
2339
|
+
"id": row["id"],
|
|
2340
|
+
"session_id": row["session_id"],
|
|
2341
|
+
"summary": row["summary"],
|
|
2342
|
+
"current_goal": row["current_goal"],
|
|
2343
|
+
"event_count": row["event_count"],
|
|
2344
|
+
"created_at": row["created_at"]
|
|
2345
|
+
}
|
|
2346
|
+
for row in cursor.fetchall()
|
|
2347
|
+
]
|
|
2348
|
+
|
|
2349
|
+
# ============================================================
|
|
2350
|
+
# GENERIC QUERY METHOD
|
|
2351
|
+
# ============================================================
|
|
2352
|
+
|
|
2353
|
+
@with_retry(max_retries=DB_MAX_RETRIES, base_delay=DB_RETRY_BASE_DELAY)
|
|
2354
|
+
async def execute_query(
|
|
2355
|
+
self,
|
|
2356
|
+
query: str,
|
|
2357
|
+
params: tuple = (),
|
|
2358
|
+
timeout: Optional[float] = None
|
|
2359
|
+
) -> List[Dict[str, Any]]:
|
|
2360
|
+
"""Execute a raw SQL query and return results as list of dicts.
|
|
2361
|
+
|
|
2362
|
+
Args:
|
|
2363
|
+
query: SQL query to execute
|
|
2364
|
+
params: Query parameters
|
|
2365
|
+
timeout: Optional query timeout in seconds (uses DB_TIMEOUT if not specified)
|
|
2366
|
+
|
|
2367
|
+
Returns:
|
|
2368
|
+
List of dictionaries representing rows
|
|
2369
|
+
|
|
2370
|
+
Raises:
|
|
2371
|
+
QueryTimeoutError: If query exceeds timeout
|
|
2372
|
+
RetryExhaustedError: If all retry attempts fail
|
|
2373
|
+
DatabaseError: For other database errors
|
|
2374
|
+
"""
|
|
2375
|
+
effective_timeout = timeout or DB_TIMEOUT
|
|
2376
|
+
|
|
2377
|
+
try:
|
|
2378
|
+
with self.get_connection() as conn:
|
|
2379
|
+
cursor = conn.cursor()
|
|
2380
|
+
|
|
2381
|
+
# Set timeout for this query
|
|
2382
|
+
start_time = time.time()
|
|
2383
|
+
|
|
2384
|
+
cursor.execute(query, params)
|
|
2385
|
+
rows = cursor.fetchall()
|
|
2386
|
+
|
|
2387
|
+
# Check if query took too long (for logging/monitoring)
|
|
2388
|
+
elapsed = time.time() - start_time
|
|
2389
|
+
if elapsed > effective_timeout * 0.8:
|
|
2390
|
+
logger.warning(
|
|
2391
|
+
f"Slow query detected ({elapsed:.2f}s): {query[:100]}..."
|
|
2392
|
+
)
|
|
2393
|
+
|
|
2394
|
+
if not rows:
|
|
2395
|
+
return []
|
|
2396
|
+
|
|
2397
|
+
# Convert Row objects to dicts
|
|
2398
|
+
return [dict(row) for row in rows]
|
|
2399
|
+
|
|
2400
|
+
except sqlite3.OperationalError as e:
|
|
2401
|
+
error_str = str(e).lower()
|
|
2402
|
+
if "database is locked" in error_str or "busy" in error_str:
|
|
2403
|
+
logger.warning(f"Database busy/locked, will retry: {e}")
|
|
2404
|
+
raise # Let retry decorator handle it
|
|
2405
|
+
elif "unable to open database" in error_str:
|
|
2406
|
+
raise ConnectionPoolError(f"Cannot open database: {e}", original_error=e)
|
|
2407
|
+
else:
|
|
2408
|
+
raise DatabaseError(
|
|
2409
|
+
f"Query execution failed: {e}",
|
|
2410
|
+
error_code="DB_QUERY_ERROR",
|
|
2411
|
+
original_error=e
|
|
2412
|
+
)
|
|
2413
|
+
except sqlite3.IntegrityError as e:
|
|
2414
|
+
raise DatabaseError(
|
|
2415
|
+
f"Integrity constraint violation: {e}",
|
|
2416
|
+
error_code="DB_INTEGRITY_ERROR",
|
|
2417
|
+
original_error=e
|
|
2418
|
+
)
|
|
2419
|
+
except Exception as e:
|
|
2420
|
+
logger.error(f"Unexpected error executing query: {e}")
|
|
2421
|
+
raise DatabaseError(
|
|
2422
|
+
f"Unexpected database error: {e}",
|
|
2423
|
+
error_code="DB_UNKNOWN_ERROR",
|
|
2424
|
+
original_error=e
|
|
2425
|
+
)
|
|
2426
|
+
|
|
2427
|
+
async def execute_write(
|
|
2428
|
+
self,
|
|
2429
|
+
query: str,
|
|
2430
|
+
params: tuple = (),
|
|
2431
|
+
commit: bool = True
|
|
2432
|
+
) -> int:
|
|
2433
|
+
"""Execute a write query (INSERT, UPDATE, DELETE) with retry logic.
|
|
2434
|
+
|
|
2435
|
+
Args:
|
|
2436
|
+
query: SQL query to execute
|
|
2437
|
+
params: Query parameters
|
|
2438
|
+
commit: Whether to commit the transaction
|
|
2439
|
+
|
|
2440
|
+
Returns:
|
|
2441
|
+
Number of affected rows (or lastrowid for INSERT)
|
|
2442
|
+
|
|
2443
|
+
Raises:
|
|
2444
|
+
RetryExhaustedError: If all retry attempts fail
|
|
2445
|
+
DatabaseError: For other database errors
|
|
2446
|
+
"""
|
|
2447
|
+
return await self._execute_write_with_retry(query, params, commit)
|
|
2448
|
+
|
|
2449
|
+
@with_retry(max_retries=DB_MAX_RETRIES, base_delay=DB_RETRY_BASE_DELAY)
|
|
2450
|
+
async def _execute_write_with_retry(
|
|
2451
|
+
self,
|
|
2452
|
+
query: str,
|
|
2453
|
+
params: tuple,
|
|
2454
|
+
commit: bool
|
|
2455
|
+
) -> int:
|
|
2456
|
+
"""Internal write execution with retry decorator."""
|
|
2457
|
+
try:
|
|
2458
|
+
with self.get_connection() as conn:
|
|
2459
|
+
cursor = conn.cursor()
|
|
2460
|
+
cursor.execute(query, params)
|
|
2461
|
+
|
|
2462
|
+
if commit:
|
|
2463
|
+
conn.commit()
|
|
2464
|
+
|
|
2465
|
+
# Return lastrowid for INSERT, rowcount for UPDATE/DELETE
|
|
2466
|
+
if query.strip().upper().startswith("INSERT"):
|
|
2467
|
+
return cursor.lastrowid
|
|
2468
|
+
return cursor.rowcount
|
|
2469
|
+
|
|
2470
|
+
except sqlite3.OperationalError as e:
|
|
2471
|
+
error_str = str(e).lower()
|
|
2472
|
+
if "database is locked" in error_str or "busy" in error_str:
|
|
2473
|
+
logger.warning(f"Database busy/locked during write, will retry: {e}")
|
|
2474
|
+
raise # Let retry decorator handle it
|
|
2475
|
+
raise DatabaseError(
|
|
2476
|
+
f"Write operation failed: {e}",
|
|
2477
|
+
error_code="DB_WRITE_ERROR",
|
|
2478
|
+
original_error=e
|
|
2479
|
+
)
|
|
2480
|
+
except sqlite3.IntegrityError as e:
|
|
2481
|
+
raise DatabaseError(
|
|
2482
|
+
f"Integrity constraint violation: {e}",
|
|
2483
|
+
error_code="DB_INTEGRITY_ERROR",
|
|
2484
|
+
original_error=e
|
|
2485
|
+
)
|