alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -226
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -430
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -265
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -106
  26. alma/graph/backends/__init__.py +32 -32
  27. alma/graph/backends/kuzu.py +624 -624
  28. alma/graph/backends/memgraph.py +432 -432
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -444
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -509
  55. alma/observability/__init__.py +91 -84
  56. alma/observability/config.py +302 -302
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -424
  59. alma/observability/metrics.py +583 -583
  60. alma/observability/tracing.py +440 -440
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -427
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -90
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1259
  80. alma/storage/base.py +1083 -583
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -103
  83. alma/storage/file_based.py +614 -614
  84. alma/storage/migrations/__init__.py +21 -21
  85. alma/storage/migrations/base.py +321 -321
  86. alma/storage/migrations/runner.py +323 -323
  87. alma/storage/migrations/version_stores.py +337 -337
  88. alma/storage/migrations/versions/__init__.py +11 -11
  89. alma/storage/migrations/versions/v1_0_0.py +373 -373
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1559
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1457
  95. alma/testing/__init__.py +46 -46
  96. alma/testing/factories.py +301 -301
  97. alma/testing/mocks.py +389 -389
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.1.dist-info/RECORD +0 -93
  110. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,1559 +1,1948 @@
1
- """
2
- ALMA PostgreSQL Storage Backend.
3
-
4
- Production-ready storage using PostgreSQL with pgvector extension for
5
- native vector similarity search. Supports connection pooling.
6
-
7
- Recommended for:
8
- - Customer deployments (Azure PostgreSQL, AWS RDS, etc.)
9
- - Self-hosted production environments
10
- - High-availability requirements
11
- """
12
-
13
- import json
14
- import logging
15
- import os
16
- from contextlib import contextmanager
17
- from datetime import datetime, timezone
18
- from typing import Any, Dict, List, Optional
19
-
20
- # numpy is optional - only needed for fallback similarity when pgvector unavailable
21
- try:
22
- import numpy as np
23
-
24
- NUMPY_AVAILABLE = True
25
- except ImportError:
26
- np = None # type: ignore
27
- NUMPY_AVAILABLE = False
28
-
29
- from alma.storage.base import StorageBackend
30
- from alma.storage.constants import POSTGRESQL_TABLE_NAMES, MemoryType
31
- from alma.types import (
32
- AntiPattern,
33
- DomainKnowledge,
34
- Heuristic,
35
- Outcome,
36
- UserPreference,
37
- )
38
-
39
- logger = logging.getLogger(__name__)
40
-
41
- # Try to import psycopg (v3) with connection pooling
42
- try:
43
- from psycopg.rows import dict_row
44
- from psycopg_pool import ConnectionPool
45
-
46
- PSYCOPG_AVAILABLE = True
47
- except ImportError:
48
- PSYCOPG_AVAILABLE = False
49
- logger.warning(
50
- "psycopg not installed. Install with: pip install 'alma-memory[postgres]'"
51
- )
52
-
53
-
54
- class PostgreSQLStorage(StorageBackend):
55
- """
56
- PostgreSQL storage backend with pgvector support.
57
-
58
- Uses native PostgreSQL vector operations for efficient similarity search.
59
- Falls back to application-level cosine similarity if pgvector is not installed.
60
-
61
- Database schema (uses canonical memory type names with alma_ prefix):
62
- - alma_heuristics: id, agent, project_id, condition, strategy, ...
63
- - alma_outcomes: id, agent, project_id, task_type, ...
64
- - alma_preferences: id, user_id, category, preference, ...
65
- - alma_domain_knowledge: id, agent, project_id, domain, fact, ...
66
- - alma_anti_patterns: id, agent, project_id, pattern, ...
67
-
68
- Vector search:
69
- - Uses pgvector extension if available
70
- - Embeddings stored as VECTOR type with cosine distance operator (<=>)
71
-
72
- Table names are derived from alma.storage.constants.POSTGRESQL_TABLE_NAMES
73
- for consistency across all storage backends.
74
- """
75
-
76
- # Table names from constants for consistent naming
77
- TABLE_NAMES = POSTGRESQL_TABLE_NAMES
78
-
79
- def __init__(
80
- self,
81
- host: str,
82
- port: int,
83
- database: str,
84
- user: str,
85
- password: str,
86
- embedding_dim: int = 384,
87
- pool_size: int = 10,
88
- schema: str = "public",
89
- ssl_mode: str = "prefer",
90
- auto_migrate: bool = True,
91
- ):
92
- """
93
- Initialize PostgreSQL storage.
94
-
95
- Args:
96
- host: Database host
97
- port: Database port
98
- database: Database name
99
- user: Database user
100
- password: Database password
101
- embedding_dim: Dimension of embedding vectors
102
- pool_size: Connection pool size
103
- schema: Database schema (default: public)
104
- ssl_mode: SSL mode (disable, allow, prefer, require, verify-ca, verify-full)
105
- auto_migrate: If True, automatically apply pending migrations on startup
106
- """
107
- if not PSYCOPG_AVAILABLE:
108
- raise ImportError(
109
- "psycopg not installed. Install with: pip install 'alma-memory[postgres]'"
110
- )
111
-
112
- self.embedding_dim = embedding_dim
113
- self.schema = schema
114
- self._pgvector_available = False
115
-
116
- # Migration support (lazy-loaded)
117
- self._migration_runner = None
118
- self._version_store = None
119
-
120
- # Build connection string
121
- conninfo = (
122
- f"host={host} port={port} dbname={database} "
123
- f"user={user} password={password} sslmode={ssl_mode}"
124
- )
125
-
126
- # Create connection pool
127
- self._pool = ConnectionPool(
128
- conninfo=conninfo,
129
- min_size=1,
130
- max_size=pool_size,
131
- kwargs={"row_factory": dict_row},
132
- )
133
-
134
- # Initialize database
135
- self._init_database()
136
-
137
- # Auto-migrate if enabled
138
- if auto_migrate:
139
- self._ensure_migrated()
140
-
141
- @classmethod
142
- def from_config(cls, config: Dict[str, Any]) -> "PostgreSQLStorage":
143
- """Create instance from configuration."""
144
- pg_config = config.get("postgres", {})
145
-
146
- # Support environment variable expansion
147
- def get_value(key: str, default: Any = None) -> Any:
148
- value = pg_config.get(key, default)
149
- if (
150
- isinstance(value, str)
151
- and value.startswith("${")
152
- and value.endswith("}")
153
- ):
154
- env_var = value[2:-1]
155
- return os.environ.get(env_var, default)
156
- return value
157
-
158
- return cls(
159
- host=get_value("host", "localhost"),
160
- port=int(get_value("port", 5432)),
161
- database=get_value("database", "alma_memory"),
162
- user=get_value("user", "postgres"),
163
- password=get_value("password", ""),
164
- embedding_dim=int(config.get("embedding_dim", 384)),
165
- pool_size=int(get_value("pool_size", 10)),
166
- schema=get_value("schema", "public"),
167
- ssl_mode=get_value("ssl_mode", "prefer"),
168
- )
169
-
170
- @contextmanager
171
- def _get_connection(self):
172
- """Get database connection from pool."""
173
- with self._pool.connection() as conn:
174
- yield conn
175
-
176
- def _init_database(self):
177
- """Initialize database schema and pgvector extension."""
178
- with self._get_connection() as conn:
179
- # Try to enable pgvector extension
180
- try:
181
- conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
182
- conn.commit()
183
- self._pgvector_available = True
184
- logger.info("pgvector extension enabled")
185
- except Exception as e:
186
- conn.rollback() # Important: rollback to clear aborted transaction
187
- logger.warning(f"pgvector not available: {e}. Using fallback search.")
188
- self._pgvector_available = False
189
-
190
- # Create tables
191
- vector_type = (
192
- f"VECTOR({self.embedding_dim})" if self._pgvector_available else "BYTEA"
193
- )
194
-
195
- # Heuristics table
196
- heuristics_table = self.TABLE_NAMES[MemoryType.HEURISTICS]
197
- conn.execute(f"""
198
- CREATE TABLE IF NOT EXISTS {self.schema}.{heuristics_table} (
199
- id TEXT PRIMARY KEY,
200
- agent TEXT NOT NULL,
201
- project_id TEXT NOT NULL,
202
- condition TEXT NOT NULL,
203
- strategy TEXT NOT NULL,
204
- confidence REAL DEFAULT 0.0,
205
- occurrence_count INTEGER DEFAULT 0,
206
- success_count INTEGER DEFAULT 0,
207
- last_validated TIMESTAMPTZ,
208
- created_at TIMESTAMPTZ DEFAULT NOW(),
209
- metadata JSONB,
210
- embedding {vector_type}
211
- )
212
- """)
213
- conn.execute(f"""
214
- CREATE INDEX IF NOT EXISTS idx_heuristics_project_agent
215
- ON {self.schema}.{heuristics_table}(project_id, agent)
216
- """)
217
- # Confidence index for efficient filtering by confidence score
218
- conn.execute(f"""
219
- CREATE INDEX IF NOT EXISTS idx_heuristics_confidence
220
- ON {self.schema}.{heuristics_table}(project_id, confidence DESC)
221
- """)
222
-
223
- # Outcomes table
224
- outcomes_table = self.TABLE_NAMES[MemoryType.OUTCOMES]
225
- conn.execute(f"""
226
- CREATE TABLE IF NOT EXISTS {self.schema}.{outcomes_table} (
227
- id TEXT PRIMARY KEY,
228
- agent TEXT NOT NULL,
229
- project_id TEXT NOT NULL,
230
- task_type TEXT,
231
- task_description TEXT NOT NULL,
232
- success BOOLEAN DEFAULT FALSE,
233
- strategy_used TEXT,
234
- duration_ms INTEGER,
235
- error_message TEXT,
236
- user_feedback TEXT,
237
- timestamp TIMESTAMPTZ DEFAULT NOW(),
238
- metadata JSONB,
239
- embedding {vector_type}
240
- )
241
- """)
242
- conn.execute(f"""
243
- CREATE INDEX IF NOT EXISTS idx_outcomes_project_agent
244
- ON {self.schema}.{outcomes_table}(project_id, agent)
245
- """)
246
- conn.execute(f"""
247
- CREATE INDEX IF NOT EXISTS idx_outcomes_task_type
248
- ON {self.schema}.{outcomes_table}(project_id, agent, task_type)
249
- """)
250
- conn.execute(f"""
251
- CREATE INDEX IF NOT EXISTS idx_outcomes_timestamp
252
- ON {self.schema}.{outcomes_table}(project_id, timestamp DESC)
253
- """)
254
-
255
- # User preferences table
256
- preferences_table = self.TABLE_NAMES[MemoryType.PREFERENCES]
257
- conn.execute(f"""
258
- CREATE TABLE IF NOT EXISTS {self.schema}.{preferences_table} (
259
- id TEXT PRIMARY KEY,
260
- user_id TEXT NOT NULL,
261
- category TEXT,
262
- preference TEXT NOT NULL,
263
- source TEXT,
264
- confidence REAL DEFAULT 1.0,
265
- timestamp TIMESTAMPTZ DEFAULT NOW(),
266
- metadata JSONB
267
- )
268
- """)
269
- conn.execute(f"""
270
- CREATE INDEX IF NOT EXISTS idx_preferences_user
271
- ON {self.schema}.{preferences_table}(user_id)
272
- """)
273
-
274
- # Domain knowledge table
275
- domain_knowledge_table = self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]
276
- conn.execute(f"""
277
- CREATE TABLE IF NOT EXISTS {self.schema}.{domain_knowledge_table} (
278
- id TEXT PRIMARY KEY,
279
- agent TEXT NOT NULL,
280
- project_id TEXT NOT NULL,
281
- domain TEXT,
282
- fact TEXT NOT NULL,
283
- source TEXT,
284
- confidence REAL DEFAULT 1.0,
285
- last_verified TIMESTAMPTZ DEFAULT NOW(),
286
- metadata JSONB,
287
- embedding {vector_type}
288
- )
289
- """)
290
- conn.execute(f"""
291
- CREATE INDEX IF NOT EXISTS idx_domain_knowledge_project_agent
292
- ON {self.schema}.{domain_knowledge_table}(project_id, agent)
293
- """)
294
- # Confidence index for efficient filtering by confidence score
295
- conn.execute(f"""
296
- CREATE INDEX IF NOT EXISTS idx_domain_knowledge_confidence
297
- ON {self.schema}.{domain_knowledge_table}(project_id, confidence DESC)
298
- """)
299
-
300
- # Anti-patterns table
301
- anti_patterns_table = self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]
302
- conn.execute(f"""
303
- CREATE TABLE IF NOT EXISTS {self.schema}.{anti_patterns_table} (
304
- id TEXT PRIMARY KEY,
305
- agent TEXT NOT NULL,
306
- project_id TEXT NOT NULL,
307
- pattern TEXT NOT NULL,
308
- why_bad TEXT,
309
- better_alternative TEXT,
310
- occurrence_count INTEGER DEFAULT 1,
311
- last_seen TIMESTAMPTZ DEFAULT NOW(),
312
- created_at TIMESTAMPTZ DEFAULT NOW(),
313
- metadata JSONB,
314
- embedding {vector_type}
315
- )
316
- """)
317
- conn.execute(f"""
318
- CREATE INDEX IF NOT EXISTS idx_anti_patterns_project_agent
319
- ON {self.schema}.{anti_patterns_table}(project_id, agent)
320
- """)
321
-
322
- # Create vector indexes if pgvector available
323
- # Using HNSW instead of IVFFlat because HNSW can be built on empty tables
324
- # IVFFlat requires existing data to build, which causes silent failures on fresh databases
325
- if self._pgvector_available:
326
- # Vector-enabled tables use canonical memory type names
327
- vector_tables = [
328
- self.TABLE_NAMES[mt] for mt in MemoryType.VECTOR_ENABLED
329
- ]
330
- for table in vector_tables:
331
- try:
332
- conn.execute(f"""
333
- CREATE INDEX IF NOT EXISTS idx_{table}_embedding
334
- ON {self.schema}.{table}
335
- USING hnsw (embedding vector_cosine_ops)
336
- WITH (m = 16, ef_construction = 64)
337
- """)
338
- except Exception as e:
339
- logger.warning(f"Failed to create HNSW index for {table}: {e}")
340
-
341
- conn.commit()
342
-
343
- def _embedding_to_db(self, embedding: Optional[List[float]]) -> Any:
344
- """Convert embedding to database format."""
345
- if embedding is None:
346
- return None
347
- if self._pgvector_available:
348
- # pgvector expects string format: '[1.0, 2.0, 3.0]'
349
- return f"[{','.join(str(x) for x in embedding)}]"
350
- else:
351
- # Store as bytes (requires numpy)
352
- if not NUMPY_AVAILABLE:
353
- raise ImportError("numpy required for non-pgvector embedding storage")
354
- return np.array(embedding, dtype=np.float32).tobytes()
355
-
356
- def _embedding_from_db(self, value: Any) -> Optional[List[float]]:
357
- """Convert embedding from database format."""
358
- if value is None:
359
- return None
360
- if self._pgvector_available:
361
- # pgvector returns as string or list
362
- if isinstance(value, str):
363
- value = value.strip("[]")
364
- return [float(x) for x in value.split(",")]
365
- return list(value)
366
- else:
367
- # Stored as bytes (requires numpy)
368
- if not NUMPY_AVAILABLE or np is None:
369
- return None
370
- return np.frombuffer(value, dtype=np.float32).tolist()
371
-
372
- def _cosine_similarity(self, a: List[float], b: List[float]) -> float:
373
- """Compute cosine similarity between two vectors."""
374
- if not NUMPY_AVAILABLE or np is None:
375
- # Fallback to pure Python
376
- dot = sum(x * y for x, y in zip(a, b, strict=False))
377
- norm_a = sum(x * x for x in a) ** 0.5
378
- norm_b = sum(x * x for x in b) ** 0.5
379
- return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0
380
- a_arr = np.array(a)
381
- b_arr = np.array(b)
382
- return float(
383
- np.dot(a_arr, b_arr) / (np.linalg.norm(a_arr) * np.linalg.norm(b_arr))
384
- )
385
-
386
- # ==================== WRITE OPERATIONS ====================
387
-
388
- def save_heuristic(self, heuristic: Heuristic) -> str:
389
- """Save a heuristic."""
390
- with self._get_connection() as conn:
391
- conn.execute(
392
- f"""
393
- INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
394
- (id, agent, project_id, condition, strategy, confidence,
395
- occurrence_count, success_count, last_validated, created_at, metadata, embedding)
396
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
397
- ON CONFLICT (id) DO UPDATE SET
398
- condition = EXCLUDED.condition,
399
- strategy = EXCLUDED.strategy,
400
- confidence = EXCLUDED.confidence,
401
- occurrence_count = EXCLUDED.occurrence_count,
402
- success_count = EXCLUDED.success_count,
403
- last_validated = EXCLUDED.last_validated,
404
- metadata = EXCLUDED.metadata,
405
- embedding = EXCLUDED.embedding
406
- """,
407
- (
408
- heuristic.id,
409
- heuristic.agent,
410
- heuristic.project_id,
411
- heuristic.condition,
412
- heuristic.strategy,
413
- heuristic.confidence,
414
- heuristic.occurrence_count,
415
- heuristic.success_count,
416
- heuristic.last_validated,
417
- heuristic.created_at,
418
- json.dumps(heuristic.metadata) if heuristic.metadata else None,
419
- self._embedding_to_db(heuristic.embedding),
420
- ),
421
- )
422
- conn.commit()
423
-
424
- logger.debug(f"Saved heuristic: {heuristic.id}")
425
- return heuristic.id
426
-
427
- def save_outcome(self, outcome: Outcome) -> str:
428
- """Save an outcome."""
429
- with self._get_connection() as conn:
430
- conn.execute(
431
- f"""
432
- INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
433
- (id, agent, project_id, task_type, task_description, success,
434
- strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata, embedding)
435
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
436
- ON CONFLICT (id) DO UPDATE SET
437
- task_description = EXCLUDED.task_description,
438
- success = EXCLUDED.success,
439
- strategy_used = EXCLUDED.strategy_used,
440
- duration_ms = EXCLUDED.duration_ms,
441
- error_message = EXCLUDED.error_message,
442
- user_feedback = EXCLUDED.user_feedback,
443
- metadata = EXCLUDED.metadata,
444
- embedding = EXCLUDED.embedding
445
- """,
446
- (
447
- outcome.id,
448
- outcome.agent,
449
- outcome.project_id,
450
- outcome.task_type,
451
- outcome.task_description,
452
- outcome.success,
453
- outcome.strategy_used,
454
- outcome.duration_ms,
455
- outcome.error_message,
456
- outcome.user_feedback,
457
- outcome.timestamp,
458
- json.dumps(outcome.metadata) if outcome.metadata else None,
459
- self._embedding_to_db(outcome.embedding),
460
- ),
461
- )
462
- conn.commit()
463
-
464
- logger.debug(f"Saved outcome: {outcome.id}")
465
- return outcome.id
466
-
467
- def save_user_preference(self, preference: UserPreference) -> str:
468
- """Save a user preference."""
469
- with self._get_connection() as conn:
470
- conn.execute(
471
- f"""
472
- INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.PREFERENCES]}
473
- (id, user_id, category, preference, source, confidence, timestamp, metadata)
474
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
475
- ON CONFLICT (id) DO UPDATE SET
476
- preference = EXCLUDED.preference,
477
- source = EXCLUDED.source,
478
- confidence = EXCLUDED.confidence,
479
- metadata = EXCLUDED.metadata
480
- """,
481
- (
482
- preference.id,
483
- preference.user_id,
484
- preference.category,
485
- preference.preference,
486
- preference.source,
487
- preference.confidence,
488
- preference.timestamp,
489
- json.dumps(preference.metadata) if preference.metadata else None,
490
- ),
491
- )
492
- conn.commit()
493
-
494
- logger.debug(f"Saved preference: {preference.id}")
495
- return preference.id
496
-
497
- def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
498
- """Save domain knowledge."""
499
- with self._get_connection() as conn:
500
- conn.execute(
501
- f"""
502
- INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
503
- (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata, embedding)
504
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
505
- ON CONFLICT (id) DO UPDATE SET
506
- fact = EXCLUDED.fact,
507
- source = EXCLUDED.source,
508
- confidence = EXCLUDED.confidence,
509
- last_verified = EXCLUDED.last_verified,
510
- metadata = EXCLUDED.metadata,
511
- embedding = EXCLUDED.embedding
512
- """,
513
- (
514
- knowledge.id,
515
- knowledge.agent,
516
- knowledge.project_id,
517
- knowledge.domain,
518
- knowledge.fact,
519
- knowledge.source,
520
- knowledge.confidence,
521
- knowledge.last_verified,
522
- json.dumps(knowledge.metadata) if knowledge.metadata else None,
523
- self._embedding_to_db(knowledge.embedding),
524
- ),
525
- )
526
- conn.commit()
527
-
528
- logger.debug(f"Saved domain knowledge: {knowledge.id}")
529
- return knowledge.id
530
-
531
- def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
532
- """Save an anti-pattern."""
533
- with self._get_connection() as conn:
534
- conn.execute(
535
- f"""
536
- INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
537
- (id, agent, project_id, pattern, why_bad, better_alternative,
538
- occurrence_count, last_seen, created_at, metadata, embedding)
539
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
540
- ON CONFLICT (id) DO UPDATE SET
541
- pattern = EXCLUDED.pattern,
542
- why_bad = EXCLUDED.why_bad,
543
- better_alternative = EXCLUDED.better_alternative,
544
- occurrence_count = EXCLUDED.occurrence_count,
545
- last_seen = EXCLUDED.last_seen,
546
- metadata = EXCLUDED.metadata,
547
- embedding = EXCLUDED.embedding
548
- """,
549
- (
550
- anti_pattern.id,
551
- anti_pattern.agent,
552
- anti_pattern.project_id,
553
- anti_pattern.pattern,
554
- anti_pattern.why_bad,
555
- anti_pattern.better_alternative,
556
- anti_pattern.occurrence_count,
557
- anti_pattern.last_seen,
558
- anti_pattern.created_at,
559
- (
560
- json.dumps(anti_pattern.metadata)
561
- if anti_pattern.metadata
562
- else None
563
- ),
564
- self._embedding_to_db(anti_pattern.embedding),
565
- ),
566
- )
567
- conn.commit()
568
-
569
- logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
570
- return anti_pattern.id
571
-
572
- # ==================== BATCH WRITE OPERATIONS ====================
573
-
574
- def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
575
- """Save multiple heuristics in a batch using executemany."""
576
- if not heuristics:
577
- return []
578
-
579
- with self._get_connection() as conn:
580
- conn.executemany(
581
- f"""
582
- INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
583
- (id, agent, project_id, condition, strategy, confidence,
584
- occurrence_count, success_count, last_validated, created_at, metadata, embedding)
585
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
586
- ON CONFLICT (id) DO UPDATE SET
587
- condition = EXCLUDED.condition,
588
- strategy = EXCLUDED.strategy,
589
- confidence = EXCLUDED.confidence,
590
- occurrence_count = EXCLUDED.occurrence_count,
591
- success_count = EXCLUDED.success_count,
592
- last_validated = EXCLUDED.last_validated,
593
- metadata = EXCLUDED.metadata,
594
- embedding = EXCLUDED.embedding
595
- """,
596
- [
597
- (
598
- h.id,
599
- h.agent,
600
- h.project_id,
601
- h.condition,
602
- h.strategy,
603
- h.confidence,
604
- h.occurrence_count,
605
- h.success_count,
606
- h.last_validated,
607
- h.created_at,
608
- json.dumps(h.metadata) if h.metadata else None,
609
- self._embedding_to_db(h.embedding),
610
- )
611
- for h in heuristics
612
- ],
613
- )
614
- conn.commit()
615
-
616
- logger.debug(f"Batch saved {len(heuristics)} heuristics")
617
- return [h.id for h in heuristics]
618
-
619
- def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
620
- """Save multiple outcomes in a batch using executemany."""
621
- if not outcomes:
622
- return []
623
-
624
- with self._get_connection() as conn:
625
- conn.executemany(
626
- f"""
627
- INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
628
- (id, agent, project_id, task_type, task_description, success,
629
- strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata, embedding)
630
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
631
- ON CONFLICT (id) DO UPDATE SET
632
- task_description = EXCLUDED.task_description,
633
- success = EXCLUDED.success,
634
- strategy_used = EXCLUDED.strategy_used,
635
- duration_ms = EXCLUDED.duration_ms,
636
- error_message = EXCLUDED.error_message,
637
- user_feedback = EXCLUDED.user_feedback,
638
- metadata = EXCLUDED.metadata,
639
- embedding = EXCLUDED.embedding
640
- """,
641
- [
642
- (
643
- o.id,
644
- o.agent,
645
- o.project_id,
646
- o.task_type,
647
- o.task_description,
648
- o.success,
649
- o.strategy_used,
650
- o.duration_ms,
651
- o.error_message,
652
- o.user_feedback,
653
- o.timestamp,
654
- json.dumps(o.metadata) if o.metadata else None,
655
- self._embedding_to_db(o.embedding),
656
- )
657
- for o in outcomes
658
- ],
659
- )
660
- conn.commit()
661
-
662
- logger.debug(f"Batch saved {len(outcomes)} outcomes")
663
- return [o.id for o in outcomes]
664
-
665
- def save_domain_knowledge_batch(
666
- self, knowledge_items: List[DomainKnowledge]
667
- ) -> List[str]:
668
- """Save multiple domain knowledge items in a batch using executemany."""
669
- if not knowledge_items:
670
- return []
671
-
672
- with self._get_connection() as conn:
673
- conn.executemany(
674
- f"""
675
- INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
676
- (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata, embedding)
677
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
678
- ON CONFLICT (id) DO UPDATE SET
679
- fact = EXCLUDED.fact,
680
- source = EXCLUDED.source,
681
- confidence = EXCLUDED.confidence,
682
- last_verified = EXCLUDED.last_verified,
683
- metadata = EXCLUDED.metadata,
684
- embedding = EXCLUDED.embedding
685
- """,
686
- [
687
- (
688
- k.id,
689
- k.agent,
690
- k.project_id,
691
- k.domain,
692
- k.fact,
693
- k.source,
694
- k.confidence,
695
- k.last_verified,
696
- json.dumps(k.metadata) if k.metadata else None,
697
- self._embedding_to_db(k.embedding),
698
- )
699
- for k in knowledge_items
700
- ],
701
- )
702
- conn.commit()
703
-
704
- logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
705
- return [k.id for k in knowledge_items]
706
-
707
- # ==================== READ OPERATIONS ====================
708
-
709
- def get_heuristics(
710
- self,
711
- project_id: str,
712
- agent: Optional[str] = None,
713
- embedding: Optional[List[float]] = None,
714
- top_k: int = 5,
715
- min_confidence: float = 0.0,
716
- ) -> List[Heuristic]:
717
- """Get heuristics with optional vector search."""
718
- with self._get_connection() as conn:
719
- if embedding and self._pgvector_available:
720
- # Use pgvector similarity search
721
- query = f"""
722
- SELECT *, 1 - (embedding <=> %s::vector) as similarity
723
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
724
- WHERE project_id = %s AND confidence >= %s
725
- """
726
- params: List[Any] = [
727
- self._embedding_to_db(embedding),
728
- project_id,
729
- min_confidence,
730
- ]
731
-
732
- if agent:
733
- query += " AND agent = %s"
734
- params.append(agent)
735
-
736
- query += " ORDER BY similarity DESC LIMIT %s"
737
- params.append(top_k)
738
- else:
739
- # Standard query
740
- query = f"""
741
- SELECT *
742
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
743
- WHERE project_id = %s AND confidence >= %s
744
- """
745
- params = [project_id, min_confidence]
746
-
747
- if agent:
748
- query += " AND agent = %s"
749
- params.append(agent)
750
-
751
- query += " ORDER BY confidence DESC LIMIT %s"
752
- params.append(top_k)
753
-
754
- cursor = conn.execute(query, params)
755
- rows = cursor.fetchall()
756
-
757
- results = [self._row_to_heuristic(row) for row in rows]
758
-
759
- # If embedding provided but pgvector not available, do app-level filtering
760
- if embedding and not self._pgvector_available and results:
761
- results = self._filter_by_similarity(results, embedding, top_k, "embedding")
762
-
763
- return results
764
-
765
- def get_outcomes(
766
- self,
767
- project_id: str,
768
- agent: Optional[str] = None,
769
- task_type: Optional[str] = None,
770
- embedding: Optional[List[float]] = None,
771
- top_k: int = 5,
772
- success_only: bool = False,
773
- ) -> List[Outcome]:
774
- """Get outcomes with optional vector search."""
775
- with self._get_connection() as conn:
776
- if embedding and self._pgvector_available:
777
- query = f"""
778
- SELECT *, 1 - (embedding <=> %s::vector) as similarity
779
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
780
- WHERE project_id = %s
781
- """
782
- params: List[Any] = [self._embedding_to_db(embedding), project_id]
783
- else:
784
- query = f"""
785
- SELECT *
786
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
787
- WHERE project_id = %s
788
- """
789
- params = [project_id]
790
-
791
- if agent:
792
- query += " AND agent = %s"
793
- params.append(agent)
794
-
795
- if task_type:
796
- query += " AND task_type = %s"
797
- params.append(task_type)
798
-
799
- if success_only:
800
- query += " AND success = TRUE"
801
-
802
- if embedding and self._pgvector_available:
803
- query += " ORDER BY similarity DESC LIMIT %s"
804
- else:
805
- query += " ORDER BY timestamp DESC LIMIT %s"
806
- params.append(top_k)
807
-
808
- cursor = conn.execute(query, params)
809
- rows = cursor.fetchall()
810
-
811
- results = [self._row_to_outcome(row) for row in rows]
812
-
813
- if embedding and not self._pgvector_available and results:
814
- results = self._filter_by_similarity(results, embedding, top_k, "embedding")
815
-
816
- return results
817
-
818
- def get_user_preferences(
819
- self,
820
- user_id: str,
821
- category: Optional[str] = None,
822
- ) -> List[UserPreference]:
823
- """Get user preferences."""
824
- with self._get_connection() as conn:
825
- query = f"SELECT * FROM {self.schema}.{self.TABLE_NAMES[MemoryType.PREFERENCES]} WHERE user_id = %s"
826
- params: List[Any] = [user_id]
827
-
828
- if category:
829
- query += " AND category = %s"
830
- params.append(category)
831
-
832
- cursor = conn.execute(query, params)
833
- rows = cursor.fetchall()
834
-
835
- return [self._row_to_preference(row) for row in rows]
836
-
837
- def get_domain_knowledge(
838
- self,
839
- project_id: str,
840
- agent: Optional[str] = None,
841
- domain: Optional[str] = None,
842
- embedding: Optional[List[float]] = None,
843
- top_k: int = 5,
844
- ) -> List[DomainKnowledge]:
845
- """Get domain knowledge with optional vector search."""
846
- with self._get_connection() as conn:
847
- if embedding and self._pgvector_available:
848
- query = f"""
849
- SELECT *, 1 - (embedding <=> %s::vector) as similarity
850
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
851
- WHERE project_id = %s
852
- """
853
- params: List[Any] = [self._embedding_to_db(embedding), project_id]
854
- else:
855
- query = f"""
856
- SELECT *
857
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
858
- WHERE project_id = %s
859
- """
860
- params = [project_id]
861
-
862
- if agent:
863
- query += " AND agent = %s"
864
- params.append(agent)
865
-
866
- if domain:
867
- query += " AND domain = %s"
868
- params.append(domain)
869
-
870
- if embedding and self._pgvector_available:
871
- query += " ORDER BY similarity DESC LIMIT %s"
872
- else:
873
- query += " ORDER BY confidence DESC LIMIT %s"
874
- params.append(top_k)
875
-
876
- cursor = conn.execute(query, params)
877
- rows = cursor.fetchall()
878
-
879
- results = [self._row_to_domain_knowledge(row) for row in rows]
880
-
881
- if embedding and not self._pgvector_available and results:
882
- results = self._filter_by_similarity(results, embedding, top_k, "embedding")
883
-
884
- return results
885
-
886
- def get_anti_patterns(
887
- self,
888
- project_id: str,
889
- agent: Optional[str] = None,
890
- embedding: Optional[List[float]] = None,
891
- top_k: int = 5,
892
- ) -> List[AntiPattern]:
893
- """Get anti-patterns with optional vector search."""
894
- with self._get_connection() as conn:
895
- if embedding and self._pgvector_available:
896
- query = f"""
897
- SELECT *, 1 - (embedding <=> %s::vector) as similarity
898
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
899
- WHERE project_id = %s
900
- """
901
- params: List[Any] = [self._embedding_to_db(embedding), project_id]
902
- else:
903
- query = f"""
904
- SELECT *
905
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
906
- WHERE project_id = %s
907
- """
908
- params = [project_id]
909
-
910
- if agent:
911
- query += " AND agent = %s"
912
- params.append(agent)
913
-
914
- if embedding and self._pgvector_available:
915
- query += " ORDER BY similarity DESC LIMIT %s"
916
- else:
917
- query += " ORDER BY occurrence_count DESC LIMIT %s"
918
- params.append(top_k)
919
-
920
- cursor = conn.execute(query, params)
921
- rows = cursor.fetchall()
922
-
923
- results = [self._row_to_anti_pattern(row) for row in rows]
924
-
925
- if embedding and not self._pgvector_available and results:
926
- results = self._filter_by_similarity(results, embedding, top_k, "embedding")
927
-
928
- return results
929
-
930
- def _filter_by_similarity(
931
- self,
932
- items: List[Any],
933
- query_embedding: List[float],
934
- top_k: int,
935
- embedding_attr: str,
936
- ) -> List[Any]:
937
- """Filter items by cosine similarity (fallback when pgvector unavailable)."""
938
- scored = []
939
- for item in items:
940
- item_embedding = getattr(item, embedding_attr, None)
941
- if item_embedding:
942
- similarity = self._cosine_similarity(query_embedding, item_embedding)
943
- scored.append((item, similarity))
944
- else:
945
- scored.append((item, 0.0))
946
-
947
- scored.sort(key=lambda x: x[1], reverse=True)
948
- return [item for item, _ in scored[:top_k]]
949
-
950
- # ==================== MULTI-AGENT MEMORY SHARING ====================
951
-
952
- def get_heuristics_for_agents(
953
- self,
954
- project_id: str,
955
- agents: List[str],
956
- embedding: Optional[List[float]] = None,
957
- top_k: int = 5,
958
- min_confidence: float = 0.0,
959
- ) -> List[Heuristic]:
960
- """Get heuristics from multiple agents using optimized ANY query."""
961
- if not agents:
962
- return []
963
-
964
- with self._get_connection() as conn:
965
- if embedding and self._pgvector_available:
966
- query = f"""
967
- SELECT *, 1 - (embedding <=> %s::vector) as similarity
968
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
969
- WHERE project_id = %s AND confidence >= %s AND agent = ANY(%s)
970
- ORDER BY similarity DESC LIMIT %s
971
- """
972
- params: List[Any] = [
973
- self._embedding_to_db(embedding),
974
- project_id,
975
- min_confidence,
976
- agents,
977
- top_k * len(agents),
978
- ]
979
- else:
980
- query = f"""
981
- SELECT *
982
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
983
- WHERE project_id = %s AND confidence >= %s AND agent = ANY(%s)
984
- ORDER BY confidence DESC LIMIT %s
985
- """
986
- params = [project_id, min_confidence, agents, top_k * len(agents)]
987
-
988
- cursor = conn.execute(query, params)
989
- rows = cursor.fetchall()
990
-
991
- results = [self._row_to_heuristic(row) for row in rows]
992
-
993
- if embedding and not self._pgvector_available and results:
994
- results = self._filter_by_similarity(
995
- results, embedding, top_k * len(agents), "embedding"
996
- )
997
-
998
- return results
999
-
1000
- def get_outcomes_for_agents(
1001
- self,
1002
- project_id: str,
1003
- agents: List[str],
1004
- task_type: Optional[str] = None,
1005
- embedding: Optional[List[float]] = None,
1006
- top_k: int = 5,
1007
- success_only: bool = False,
1008
- ) -> List[Outcome]:
1009
- """Get outcomes from multiple agents using optimized ANY query."""
1010
- if not agents:
1011
- return []
1012
-
1013
- with self._get_connection() as conn:
1014
- if embedding and self._pgvector_available:
1015
- query = f"""
1016
- SELECT *, 1 - (embedding <=> %s::vector) as similarity
1017
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
1018
- WHERE project_id = %s AND agent = ANY(%s)
1019
- """
1020
- params: List[Any] = [
1021
- self._embedding_to_db(embedding),
1022
- project_id,
1023
- agents,
1024
- ]
1025
- else:
1026
- query = f"""
1027
- SELECT *
1028
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
1029
- WHERE project_id = %s AND agent = ANY(%s)
1030
- """
1031
- params = [project_id, agents]
1032
-
1033
- if task_type:
1034
- query += " AND task_type = %s"
1035
- params.append(task_type)
1036
-
1037
- if success_only:
1038
- query += " AND success = TRUE"
1039
-
1040
- if embedding and self._pgvector_available:
1041
- query += " ORDER BY similarity DESC LIMIT %s"
1042
- else:
1043
- query += " ORDER BY timestamp DESC LIMIT %s"
1044
- params.append(top_k * len(agents))
1045
-
1046
- cursor = conn.execute(query, params)
1047
- rows = cursor.fetchall()
1048
-
1049
- results = [self._row_to_outcome(row) for row in rows]
1050
-
1051
- if embedding and not self._pgvector_available and results:
1052
- results = self._filter_by_similarity(
1053
- results, embedding, top_k * len(agents), "embedding"
1054
- )
1055
-
1056
- return results
1057
-
1058
- def get_domain_knowledge_for_agents(
1059
- self,
1060
- project_id: str,
1061
- agents: List[str],
1062
- domain: Optional[str] = None,
1063
- embedding: Optional[List[float]] = None,
1064
- top_k: int = 5,
1065
- ) -> List[DomainKnowledge]:
1066
- """Get domain knowledge from multiple agents using optimized ANY query."""
1067
- if not agents:
1068
- return []
1069
-
1070
- with self._get_connection() as conn:
1071
- if embedding and self._pgvector_available:
1072
- query = f"""
1073
- SELECT *, 1 - (embedding <=> %s::vector) as similarity
1074
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
1075
- WHERE project_id = %s AND agent = ANY(%s)
1076
- """
1077
- params: List[Any] = [
1078
- self._embedding_to_db(embedding),
1079
- project_id,
1080
- agents,
1081
- ]
1082
- else:
1083
- query = f"""
1084
- SELECT *
1085
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
1086
- WHERE project_id = %s AND agent = ANY(%s)
1087
- """
1088
- params = [project_id, agents]
1089
-
1090
- if domain:
1091
- query += " AND domain = %s"
1092
- params.append(domain)
1093
-
1094
- if embedding and self._pgvector_available:
1095
- query += " ORDER BY similarity DESC LIMIT %s"
1096
- else:
1097
- query += " ORDER BY confidence DESC LIMIT %s"
1098
- params.append(top_k * len(agents))
1099
-
1100
- cursor = conn.execute(query, params)
1101
- rows = cursor.fetchall()
1102
-
1103
- results = [self._row_to_domain_knowledge(row) for row in rows]
1104
-
1105
- if embedding and not self._pgvector_available and results:
1106
- results = self._filter_by_similarity(
1107
- results, embedding, top_k * len(agents), "embedding"
1108
- )
1109
-
1110
- return results
1111
-
1112
- def get_anti_patterns_for_agents(
1113
- self,
1114
- project_id: str,
1115
- agents: List[str],
1116
- embedding: Optional[List[float]] = None,
1117
- top_k: int = 5,
1118
- ) -> List[AntiPattern]:
1119
- """Get anti-patterns from multiple agents using optimized ANY query."""
1120
- if not agents:
1121
- return []
1122
-
1123
- with self._get_connection() as conn:
1124
- if embedding and self._pgvector_available:
1125
- query = f"""
1126
- SELECT *, 1 - (embedding <=> %s::vector) as similarity
1127
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
1128
- WHERE project_id = %s AND agent = ANY(%s)
1129
- """
1130
- params: List[Any] = [
1131
- self._embedding_to_db(embedding),
1132
- project_id,
1133
- agents,
1134
- ]
1135
- else:
1136
- query = f"""
1137
- SELECT *
1138
- FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
1139
- WHERE project_id = %s AND agent = ANY(%s)
1140
- """
1141
- params = [project_id, agents]
1142
-
1143
- if embedding and self._pgvector_available:
1144
- query += " ORDER BY similarity DESC LIMIT %s"
1145
- else:
1146
- query += " ORDER BY occurrence_count DESC LIMIT %s"
1147
- params.append(top_k * len(agents))
1148
-
1149
- cursor = conn.execute(query, params)
1150
- rows = cursor.fetchall()
1151
-
1152
- results = [self._row_to_anti_pattern(row) for row in rows]
1153
-
1154
- if embedding and not self._pgvector_available and results:
1155
- results = self._filter_by_similarity(
1156
- results, embedding, top_k * len(agents), "embedding"
1157
- )
1158
-
1159
- return results
1160
-
1161
- # ==================== UPDATE OPERATIONS ====================
1162
-
1163
- def update_heuristic(
1164
- self,
1165
- heuristic_id: str,
1166
- updates: Dict[str, Any],
1167
- ) -> bool:
1168
- """Update a heuristic's fields."""
1169
- if not updates:
1170
- return False
1171
-
1172
- set_clauses = []
1173
- params = []
1174
- for key, value in updates.items():
1175
- if key == "metadata" and value:
1176
- value = json.dumps(value)
1177
- set_clauses.append(f"{key} = %s")
1178
- params.append(value)
1179
-
1180
- params.append(heuristic_id)
1181
-
1182
- with self._get_connection() as conn:
1183
- cursor = conn.execute(
1184
- f"UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]} SET {', '.join(set_clauses)} WHERE id = %s",
1185
- params,
1186
- )
1187
- conn.commit()
1188
- return cursor.rowcount > 0
1189
-
1190
- def increment_heuristic_occurrence(
1191
- self,
1192
- heuristic_id: str,
1193
- success: bool,
1194
- ) -> bool:
1195
- """Increment heuristic occurrence count."""
1196
- with self._get_connection() as conn:
1197
- if success:
1198
- cursor = conn.execute(
1199
- f"""
1200
- UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
1201
- SET occurrence_count = occurrence_count + 1,
1202
- success_count = success_count + 1,
1203
- last_validated = %s
1204
- WHERE id = %s
1205
- """,
1206
- (datetime.now(timezone.utc), heuristic_id),
1207
- )
1208
- else:
1209
- cursor = conn.execute(
1210
- f"""
1211
- UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
1212
- SET occurrence_count = occurrence_count + 1,
1213
- last_validated = %s
1214
- WHERE id = %s
1215
- """,
1216
- (datetime.now(timezone.utc), heuristic_id),
1217
- )
1218
- conn.commit()
1219
- return cursor.rowcount > 0
1220
-
1221
- def update_heuristic_confidence(
1222
- self,
1223
- heuristic_id: str,
1224
- new_confidence: float,
1225
- ) -> bool:
1226
- """Update confidence score for a heuristic."""
1227
- with self._get_connection() as conn:
1228
- cursor = conn.execute(
1229
- f"UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]} SET confidence = %s WHERE id = %s",
1230
- (new_confidence, heuristic_id),
1231
- )
1232
- conn.commit()
1233
- return cursor.rowcount > 0
1234
-
1235
- def update_knowledge_confidence(
1236
- self,
1237
- knowledge_id: str,
1238
- new_confidence: float,
1239
- ) -> bool:
1240
- """Update confidence score for domain knowledge."""
1241
- with self._get_connection() as conn:
1242
- cursor = conn.execute(
1243
- f"UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]} SET confidence = %s WHERE id = %s",
1244
- (new_confidence, knowledge_id),
1245
- )
1246
- conn.commit()
1247
- return cursor.rowcount > 0
1248
-
1249
- # ==================== DELETE OPERATIONS ====================
1250
-
1251
- def delete_heuristic(self, heuristic_id: str) -> bool:
1252
- """Delete a heuristic by ID."""
1253
- with self._get_connection() as conn:
1254
- cursor = conn.execute(
1255
- f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]} WHERE id = %s",
1256
- (heuristic_id,),
1257
- )
1258
- conn.commit()
1259
- return cursor.rowcount > 0
1260
-
1261
- def delete_outcome(self, outcome_id: str) -> bool:
1262
- """Delete an outcome by ID."""
1263
- with self._get_connection() as conn:
1264
- cursor = conn.execute(
1265
- f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]} WHERE id = %s",
1266
- (outcome_id,),
1267
- )
1268
- conn.commit()
1269
- return cursor.rowcount > 0
1270
-
1271
- def delete_domain_knowledge(self, knowledge_id: str) -> bool:
1272
- """Delete domain knowledge by ID."""
1273
- with self._get_connection() as conn:
1274
- cursor = conn.execute(
1275
- f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]} WHERE id = %s",
1276
- (knowledge_id,),
1277
- )
1278
- conn.commit()
1279
- return cursor.rowcount > 0
1280
-
1281
- def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
1282
- """Delete an anti-pattern by ID."""
1283
- with self._get_connection() as conn:
1284
- cursor = conn.execute(
1285
- f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]} WHERE id = %s",
1286
- (anti_pattern_id,),
1287
- )
1288
- conn.commit()
1289
- return cursor.rowcount > 0
1290
-
1291
- def delete_outcomes_older_than(
1292
- self,
1293
- project_id: str,
1294
- older_than: datetime,
1295
- agent: Optional[str] = None,
1296
- ) -> int:
1297
- """Delete old outcomes."""
1298
- with self._get_connection() as conn:
1299
- query = f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]} WHERE project_id = %s AND timestamp < %s"
1300
- params: List[Any] = [project_id, older_than]
1301
-
1302
- if agent:
1303
- query += " AND agent = %s"
1304
- params.append(agent)
1305
-
1306
- cursor = conn.execute(query, params)
1307
- conn.commit()
1308
- deleted = cursor.rowcount
1309
-
1310
- logger.info(f"Deleted {deleted} old outcomes")
1311
- return deleted
1312
-
1313
- def delete_low_confidence_heuristics(
1314
- self,
1315
- project_id: str,
1316
- below_confidence: float,
1317
- agent: Optional[str] = None,
1318
- ) -> int:
1319
- """Delete low-confidence heuristics."""
1320
- with self._get_connection() as conn:
1321
- query = f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]} WHERE project_id = %s AND confidence < %s"
1322
- params: List[Any] = [project_id, below_confidence]
1323
-
1324
- if agent:
1325
- query += " AND agent = %s"
1326
- params.append(agent)
1327
-
1328
- cursor = conn.execute(query, params)
1329
- conn.commit()
1330
- deleted = cursor.rowcount
1331
-
1332
- logger.info(f"Deleted {deleted} low-confidence heuristics")
1333
- return deleted
1334
-
1335
- # ==================== STATS ====================
1336
-
1337
- def get_stats(
1338
- self,
1339
- project_id: str,
1340
- agent: Optional[str] = None,
1341
- ) -> Dict[str, Any]:
1342
- """Get memory statistics."""
1343
- stats = {
1344
- "project_id": project_id,
1345
- "agent": agent,
1346
- "storage_type": "postgresql",
1347
- "pgvector_available": self._pgvector_available,
1348
- }
1349
-
1350
- with self._get_connection() as conn:
1351
- # Use canonical memory types for stats
1352
- for memory_type in MemoryType.ALL:
1353
- table = self.TABLE_NAMES[memory_type]
1354
- if memory_type == MemoryType.PREFERENCES:
1355
- # Preferences don't have project_id
1356
- cursor = conn.execute(
1357
- f"SELECT COUNT(*) as count FROM {self.schema}.{table}"
1358
- )
1359
- row = cursor.fetchone()
1360
- stats[f"{memory_type}_count"] = row["count"] if row else 0
1361
- else:
1362
- query = f"SELECT COUNT(*) as count FROM {self.schema}.{table} WHERE project_id = %s"
1363
- params: List[Any] = [project_id]
1364
- if agent:
1365
- query += " AND agent = %s"
1366
- params.append(agent)
1367
- cursor = conn.execute(query, params)
1368
- row = cursor.fetchone()
1369
- stats[f"{memory_type}_count"] = row["count"] if row else 0
1370
-
1371
- stats["total_count"] = sum(
1372
- stats.get(k, 0) for k in stats if k.endswith("_count")
1373
- )
1374
-
1375
- return stats
1376
-
1377
- # ==================== HELPERS ====================
1378
-
1379
- def _parse_datetime(self, value: Any) -> Optional[datetime]:
1380
- """Parse datetime from database value."""
1381
- if value is None:
1382
- return None
1383
- if isinstance(value, datetime):
1384
- return value
1385
- try:
1386
- return datetime.fromisoformat(str(value).replace("Z", "+00:00"))
1387
- except (ValueError, AttributeError):
1388
- return None
1389
-
1390
- def _row_to_heuristic(self, row: Dict[str, Any]) -> Heuristic:
1391
- """Convert database row to Heuristic."""
1392
- return Heuristic(
1393
- id=row["id"],
1394
- agent=row["agent"],
1395
- project_id=row["project_id"],
1396
- condition=row["condition"],
1397
- strategy=row["strategy"],
1398
- confidence=row["confidence"] or 0.0,
1399
- occurrence_count=row["occurrence_count"] or 0,
1400
- success_count=row["success_count"] or 0,
1401
- last_validated=self._parse_datetime(row["last_validated"])
1402
- or datetime.now(timezone.utc),
1403
- created_at=self._parse_datetime(row["created_at"])
1404
- or datetime.now(timezone.utc),
1405
- embedding=self._embedding_from_db(row.get("embedding")),
1406
- metadata=row["metadata"] if row["metadata"] else {},
1407
- )
1408
-
1409
- def _row_to_outcome(self, row: Dict[str, Any]) -> Outcome:
1410
- """Convert database row to Outcome."""
1411
- return Outcome(
1412
- id=row["id"],
1413
- agent=row["agent"],
1414
- project_id=row["project_id"],
1415
- task_type=row["task_type"] or "general",
1416
- task_description=row["task_description"],
1417
- success=bool(row["success"]),
1418
- strategy_used=row["strategy_used"] or "",
1419
- duration_ms=row["duration_ms"],
1420
- error_message=row["error_message"],
1421
- user_feedback=row["user_feedback"],
1422
- timestamp=self._parse_datetime(row["timestamp"])
1423
- or datetime.now(timezone.utc),
1424
- embedding=self._embedding_from_db(row.get("embedding")),
1425
- metadata=row["metadata"] if row["metadata"] else {},
1426
- )
1427
-
1428
- def _row_to_preference(self, row: Dict[str, Any]) -> UserPreference:
1429
- """Convert database row to UserPreference."""
1430
- return UserPreference(
1431
- id=row["id"],
1432
- user_id=row["user_id"],
1433
- category=row["category"] or "general",
1434
- preference=row["preference"],
1435
- source=row["source"] or "unknown",
1436
- confidence=row["confidence"] or 1.0,
1437
- timestamp=self._parse_datetime(row["timestamp"])
1438
- or datetime.now(timezone.utc),
1439
- metadata=row["metadata"] if row["metadata"] else {},
1440
- )
1441
-
1442
- def _row_to_domain_knowledge(self, row: Dict[str, Any]) -> DomainKnowledge:
1443
- """Convert database row to DomainKnowledge."""
1444
- return DomainKnowledge(
1445
- id=row["id"],
1446
- agent=row["agent"],
1447
- project_id=row["project_id"],
1448
- domain=row["domain"] or "general",
1449
- fact=row["fact"],
1450
- source=row["source"] or "unknown",
1451
- confidence=row["confidence"] or 1.0,
1452
- last_verified=self._parse_datetime(row["last_verified"])
1453
- or datetime.now(timezone.utc),
1454
- embedding=self._embedding_from_db(row.get("embedding")),
1455
- metadata=row["metadata"] if row["metadata"] else {},
1456
- )
1457
-
1458
- def _row_to_anti_pattern(self, row: Dict[str, Any]) -> AntiPattern:
1459
- """Convert database row to AntiPattern."""
1460
- return AntiPattern(
1461
- id=row["id"],
1462
- agent=row["agent"],
1463
- project_id=row["project_id"],
1464
- pattern=row["pattern"],
1465
- why_bad=row["why_bad"] or "",
1466
- better_alternative=row["better_alternative"] or "",
1467
- occurrence_count=row["occurrence_count"] or 1,
1468
- last_seen=self._parse_datetime(row["last_seen"])
1469
- or datetime.now(timezone.utc),
1470
- created_at=self._parse_datetime(row["created_at"])
1471
- or datetime.now(timezone.utc),
1472
- embedding=self._embedding_from_db(row.get("embedding")),
1473
- metadata=row["metadata"] if row["metadata"] else {},
1474
- )
1475
-
1476
- def close(self):
1477
- """Close connection pool."""
1478
- if self._pool:
1479
- self._pool.close()
1480
-
1481
- # ==================== MIGRATION SUPPORT ====================
1482
-
1483
- def _get_version_store(self):
1484
- """Get or create the version store."""
1485
- if self._version_store is None:
1486
- from alma.storage.migrations.version_stores import PostgreSQLVersionStore
1487
-
1488
- self._version_store = PostgreSQLVersionStore(self._pool, self.schema)
1489
- return self._version_store
1490
-
1491
- def _get_migration_runner(self):
1492
- """Get or create the migration runner."""
1493
- if self._migration_runner is None:
1494
- from alma.storage.migrations.runner import MigrationRunner
1495
- from alma.storage.migrations.versions import v1_0_0 # noqa: F401
1496
-
1497
- self._migration_runner = MigrationRunner(
1498
- version_store=self._get_version_store(),
1499
- backend="postgresql",
1500
- )
1501
- return self._migration_runner
1502
-
1503
- def _ensure_migrated(self) -> None:
1504
- """Ensure database is migrated to latest version."""
1505
- runner = self._get_migration_runner()
1506
- if runner.needs_migration():
1507
- with self._get_connection() as conn:
1508
- applied = runner.migrate(conn)
1509
- if applied:
1510
- logger.info(f"Applied {len(applied)} migrations: {applied}")
1511
-
1512
- def get_schema_version(self) -> Optional[str]:
1513
- """Get the current schema version."""
1514
- return self._get_version_store().get_current_version()
1515
-
1516
- def get_migration_status(self) -> Dict[str, Any]:
1517
- """Get migration status information."""
1518
- runner = self._get_migration_runner()
1519
- status = runner.get_status()
1520
- status["migration_supported"] = True
1521
- return status
1522
-
1523
- def migrate(
1524
- self,
1525
- target_version: Optional[str] = None,
1526
- dry_run: bool = False,
1527
- ) -> List[str]:
1528
- """
1529
- Apply pending schema migrations.
1530
-
1531
- Args:
1532
- target_version: Optional target version (applies all if not specified)
1533
- dry_run: If True, show what would be done without making changes
1534
-
1535
- Returns:
1536
- List of applied migration versions
1537
- """
1538
- runner = self._get_migration_runner()
1539
- with self._get_connection() as conn:
1540
- return runner.migrate(conn, target_version=target_version, dry_run=dry_run)
1541
-
1542
- def rollback(
1543
- self,
1544
- target_version: str,
1545
- dry_run: bool = False,
1546
- ) -> List[str]:
1547
- """
1548
- Roll back schema to a previous version.
1549
-
1550
- Args:
1551
- target_version: Version to roll back to
1552
- dry_run: If True, show what would be done without making changes
1553
-
1554
- Returns:
1555
- List of rolled back migration versions
1556
- """
1557
- runner = self._get_migration_runner()
1558
- with self._get_connection() as conn:
1559
- return runner.rollback(conn, target_version=target_version, dry_run=dry_run)
1
+ """
2
+ ALMA PostgreSQL Storage Backend.
3
+
4
+ Production-ready storage using PostgreSQL with pgvector extension for
5
+ native vector similarity search. Supports connection pooling.
6
+
7
+ Recommended for:
8
+ - Customer deployments (Azure PostgreSQL, AWS RDS, etc.)
9
+ - Self-hosted production environments
10
+ - High-availability requirements
11
+
12
+ v0.6.0 adds workflow context support:
13
+ - Checkpoint tables for crash recovery
14
+ - WorkflowOutcome tables for learning from workflows
15
+ - ArtifactRef tables for linking external files
16
+ - scope_filter parameter for workflow-scoped queries
17
+ """
18
+
19
+ import json
20
+ import logging
21
+ import os
22
+ from contextlib import contextmanager
23
+ from datetime import datetime, timezone
24
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
25
+
26
+ # numpy is optional - only needed for fallback similarity when pgvector unavailable
27
+ try:
28
+ import numpy as np
29
+
30
+ NUMPY_AVAILABLE = True
31
+ except ImportError:
32
+ np = None # type: ignore
33
+ NUMPY_AVAILABLE = False
34
+
35
+ from alma.storage.base import StorageBackend
36
+ from alma.storage.constants import POSTGRESQL_TABLE_NAMES, MemoryType
37
+ from alma.types import (
38
+ AntiPattern,
39
+ DomainKnowledge,
40
+ Heuristic,
41
+ Outcome,
42
+ UserPreference,
43
+ )
44
+
45
+ if TYPE_CHECKING:
46
+ from alma.workflow import ArtifactRef, Checkpoint, WorkflowOutcome
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+ # Try to import psycopg (v3) with connection pooling
51
+ try:
52
+ from psycopg.rows import dict_row
53
+ from psycopg_pool import ConnectionPool
54
+
55
+ PSYCOPG_AVAILABLE = True
56
+ except ImportError:
57
+ PSYCOPG_AVAILABLE = False
58
+ logger.warning(
59
+ "psycopg not installed. Install with: pip install 'alma-memory[postgres]'"
60
+ )
61
+
62
+
63
+ class PostgreSQLStorage(StorageBackend):
64
+ """
65
+ PostgreSQL storage backend with pgvector support.
66
+
67
+ Uses native PostgreSQL vector operations for efficient similarity search.
68
+ Falls back to application-level cosine similarity if pgvector is not installed.
69
+
70
+ Database schema (uses canonical memory type names with alma_ prefix):
71
+ - alma_heuristics: id, agent, project_id, condition, strategy, ...
72
+ - alma_outcomes: id, agent, project_id, task_type, ...
73
+ - alma_preferences: id, user_id, category, preference, ...
74
+ - alma_domain_knowledge: id, agent, project_id, domain, fact, ...
75
+ - alma_anti_patterns: id, agent, project_id, pattern, ...
76
+
77
+ Vector search:
78
+ - Uses pgvector extension if available
79
+ - Embeddings stored as VECTOR type with cosine distance operator (<=>)
80
+
81
+ Table names are derived from alma.storage.constants.POSTGRESQL_TABLE_NAMES
82
+ for consistency across all storage backends.
83
+ """
84
+
85
+ # Table names from constants for consistent naming
86
+ TABLE_NAMES = POSTGRESQL_TABLE_NAMES
87
+
88
+ def __init__(
89
+ self,
90
+ host: str,
91
+ port: int,
92
+ database: str,
93
+ user: str,
94
+ password: str,
95
+ embedding_dim: int = 384,
96
+ pool_size: int = 10,
97
+ schema: str = "public",
98
+ ssl_mode: str = "prefer",
99
+ auto_migrate: bool = True,
100
+ ):
101
+ """
102
+ Initialize PostgreSQL storage.
103
+
104
+ Args:
105
+ host: Database host
106
+ port: Database port
107
+ database: Database name
108
+ user: Database user
109
+ password: Database password
110
+ embedding_dim: Dimension of embedding vectors
111
+ pool_size: Connection pool size
112
+ schema: Database schema (default: public)
113
+ ssl_mode: SSL mode (disable, allow, prefer, require, verify-ca, verify-full)
114
+ auto_migrate: If True, automatically apply pending migrations on startup
115
+ """
116
+ if not PSYCOPG_AVAILABLE:
117
+ raise ImportError(
118
+ "psycopg not installed. Install with: pip install 'alma-memory[postgres]'"
119
+ )
120
+
121
+ self.embedding_dim = embedding_dim
122
+ self.schema = schema
123
+ self._pgvector_available = False
124
+
125
+ # Migration support (lazy-loaded)
126
+ self._migration_runner = None
127
+ self._version_store = None
128
+
129
+ # Build connection string
130
+ conninfo = (
131
+ f"host={host} port={port} dbname={database} "
132
+ f"user={user} password={password} sslmode={ssl_mode}"
133
+ )
134
+
135
+ # Create connection pool
136
+ self._pool = ConnectionPool(
137
+ conninfo=conninfo,
138
+ min_size=1,
139
+ max_size=pool_size,
140
+ kwargs={"row_factory": dict_row},
141
+ )
142
+
143
+ # Initialize database
144
+ self._init_database()
145
+
146
+ # Auto-migrate if enabled
147
+ if auto_migrate:
148
+ self._ensure_migrated()
149
+
150
+ @classmethod
151
+ def from_config(cls, config: Dict[str, Any]) -> "PostgreSQLStorage":
152
+ """Create instance from configuration."""
153
+ pg_config = config.get("postgres", {})
154
+
155
+ # Support environment variable expansion
156
+ def get_value(key: str, default: Any = None) -> Any:
157
+ value = pg_config.get(key, default)
158
+ if (
159
+ isinstance(value, str)
160
+ and value.startswith("${")
161
+ and value.endswith("}")
162
+ ):
163
+ env_var = value[2:-1]
164
+ return os.environ.get(env_var, default)
165
+ return value
166
+
167
+ return cls(
168
+ host=get_value("host", "localhost"),
169
+ port=int(get_value("port", 5432)),
170
+ database=get_value("database", "alma_memory"),
171
+ user=get_value("user", "postgres"),
172
+ password=get_value("password", ""),
173
+ embedding_dim=int(config.get("embedding_dim", 384)),
174
+ pool_size=int(get_value("pool_size", 10)),
175
+ schema=get_value("schema", "public"),
176
+ ssl_mode=get_value("ssl_mode", "prefer"),
177
+ )
178
+
179
+ @contextmanager
180
+ def _get_connection(self):
181
+ """Get database connection from pool."""
182
+ with self._pool.connection() as conn:
183
+ yield conn
184
+
185
+ def _init_database(self):
186
+ """Initialize database schema and pgvector extension."""
187
+ with self._get_connection() as conn:
188
+ # Try to enable pgvector extension
189
+ try:
190
+ conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
191
+ conn.commit()
192
+ self._pgvector_available = True
193
+ logger.info("pgvector extension enabled")
194
+ except Exception as e:
195
+ conn.rollback() # Important: rollback to clear aborted transaction
196
+ logger.warning(f"pgvector not available: {e}. Using fallback search.")
197
+ self._pgvector_available = False
198
+
199
+ # Create tables
200
+ vector_type = (
201
+ f"VECTOR({self.embedding_dim})" if self._pgvector_available else "BYTEA"
202
+ )
203
+
204
+ # Heuristics table
205
+ heuristics_table = self.TABLE_NAMES[MemoryType.HEURISTICS]
206
+ conn.execute(f"""
207
+ CREATE TABLE IF NOT EXISTS {self.schema}.{heuristics_table} (
208
+ id TEXT PRIMARY KEY,
209
+ agent TEXT NOT NULL,
210
+ project_id TEXT NOT NULL,
211
+ condition TEXT NOT NULL,
212
+ strategy TEXT NOT NULL,
213
+ confidence REAL DEFAULT 0.0,
214
+ occurrence_count INTEGER DEFAULT 0,
215
+ success_count INTEGER DEFAULT 0,
216
+ last_validated TIMESTAMPTZ,
217
+ created_at TIMESTAMPTZ DEFAULT NOW(),
218
+ metadata JSONB,
219
+ embedding {vector_type}
220
+ )
221
+ """)
222
+ conn.execute(f"""
223
+ CREATE INDEX IF NOT EXISTS idx_heuristics_project_agent
224
+ ON {self.schema}.{heuristics_table}(project_id, agent)
225
+ """)
226
+ # Confidence index for efficient filtering by confidence score
227
+ conn.execute(f"""
228
+ CREATE INDEX IF NOT EXISTS idx_heuristics_confidence
229
+ ON {self.schema}.{heuristics_table}(project_id, confidence DESC)
230
+ """)
231
+
232
+ # Outcomes table
233
+ outcomes_table = self.TABLE_NAMES[MemoryType.OUTCOMES]
234
+ conn.execute(f"""
235
+ CREATE TABLE IF NOT EXISTS {self.schema}.{outcomes_table} (
236
+ id TEXT PRIMARY KEY,
237
+ agent TEXT NOT NULL,
238
+ project_id TEXT NOT NULL,
239
+ task_type TEXT,
240
+ task_description TEXT NOT NULL,
241
+ success BOOLEAN DEFAULT FALSE,
242
+ strategy_used TEXT,
243
+ duration_ms INTEGER,
244
+ error_message TEXT,
245
+ user_feedback TEXT,
246
+ timestamp TIMESTAMPTZ DEFAULT NOW(),
247
+ metadata JSONB,
248
+ embedding {vector_type}
249
+ )
250
+ """)
251
+ conn.execute(f"""
252
+ CREATE INDEX IF NOT EXISTS idx_outcomes_project_agent
253
+ ON {self.schema}.{outcomes_table}(project_id, agent)
254
+ """)
255
+ conn.execute(f"""
256
+ CREATE INDEX IF NOT EXISTS idx_outcomes_task_type
257
+ ON {self.schema}.{outcomes_table}(project_id, agent, task_type)
258
+ """)
259
+ conn.execute(f"""
260
+ CREATE INDEX IF NOT EXISTS idx_outcomes_timestamp
261
+ ON {self.schema}.{outcomes_table}(project_id, timestamp DESC)
262
+ """)
263
+
264
+ # User preferences table
265
+ preferences_table = self.TABLE_NAMES[MemoryType.PREFERENCES]
266
+ conn.execute(f"""
267
+ CREATE TABLE IF NOT EXISTS {self.schema}.{preferences_table} (
268
+ id TEXT PRIMARY KEY,
269
+ user_id TEXT NOT NULL,
270
+ category TEXT,
271
+ preference TEXT NOT NULL,
272
+ source TEXT,
273
+ confidence REAL DEFAULT 1.0,
274
+ timestamp TIMESTAMPTZ DEFAULT NOW(),
275
+ metadata JSONB
276
+ )
277
+ """)
278
+ conn.execute(f"""
279
+ CREATE INDEX IF NOT EXISTS idx_preferences_user
280
+ ON {self.schema}.{preferences_table}(user_id)
281
+ """)
282
+
283
+ # Domain knowledge table
284
+ domain_knowledge_table = self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]
285
+ conn.execute(f"""
286
+ CREATE TABLE IF NOT EXISTS {self.schema}.{domain_knowledge_table} (
287
+ id TEXT PRIMARY KEY,
288
+ agent TEXT NOT NULL,
289
+ project_id TEXT NOT NULL,
290
+ domain TEXT,
291
+ fact TEXT NOT NULL,
292
+ source TEXT,
293
+ confidence REAL DEFAULT 1.0,
294
+ last_verified TIMESTAMPTZ DEFAULT NOW(),
295
+ metadata JSONB,
296
+ embedding {vector_type}
297
+ )
298
+ """)
299
+ conn.execute(f"""
300
+ CREATE INDEX IF NOT EXISTS idx_domain_knowledge_project_agent
301
+ ON {self.schema}.{domain_knowledge_table}(project_id, agent)
302
+ """)
303
+ # Confidence index for efficient filtering by confidence score
304
+ conn.execute(f"""
305
+ CREATE INDEX IF NOT EXISTS idx_domain_knowledge_confidence
306
+ ON {self.schema}.{domain_knowledge_table}(project_id, confidence DESC)
307
+ """)
308
+
309
+ # Anti-patterns table
310
+ anti_patterns_table = self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]
311
+ conn.execute(f"""
312
+ CREATE TABLE IF NOT EXISTS {self.schema}.{anti_patterns_table} (
313
+ id TEXT PRIMARY KEY,
314
+ agent TEXT NOT NULL,
315
+ project_id TEXT NOT NULL,
316
+ pattern TEXT NOT NULL,
317
+ why_bad TEXT,
318
+ better_alternative TEXT,
319
+ occurrence_count INTEGER DEFAULT 1,
320
+ last_seen TIMESTAMPTZ DEFAULT NOW(),
321
+ created_at TIMESTAMPTZ DEFAULT NOW(),
322
+ metadata JSONB,
323
+ embedding {vector_type}
324
+ )
325
+ """)
326
+ conn.execute(f"""
327
+ CREATE INDEX IF NOT EXISTS idx_anti_patterns_project_agent
328
+ ON {self.schema}.{anti_patterns_table}(project_id, agent)
329
+ """)
330
+
331
+ # Create vector indexes if pgvector available
332
+ # Using HNSW instead of IVFFlat because HNSW can be built on empty tables
333
+ # IVFFlat requires existing data to build, which causes silent failures on fresh databases
334
+ if self._pgvector_available:
335
+ # Vector-enabled tables use canonical memory type names
336
+ vector_tables = [
337
+ self.TABLE_NAMES[mt] for mt in MemoryType.VECTOR_ENABLED
338
+ ]
339
+ for table in vector_tables:
340
+ try:
341
+ conn.execute(f"""
342
+ CREATE INDEX IF NOT EXISTS idx_{table}_embedding
343
+ ON {self.schema}.{table}
344
+ USING hnsw (embedding vector_cosine_ops)
345
+ WITH (m = 16, ef_construction = 64)
346
+ """)
347
+ except Exception as e:
348
+ logger.warning(f"Failed to create HNSW index for {table}: {e}")
349
+
350
+ conn.commit()
351
+
352
+ def _embedding_to_db(self, embedding: Optional[List[float]]) -> Any:
353
+ """Convert embedding to database format."""
354
+ if embedding is None:
355
+ return None
356
+ if self._pgvector_available:
357
+ # pgvector expects string format: '[1.0, 2.0, 3.0]'
358
+ return f"[{','.join(str(x) for x in embedding)}]"
359
+ else:
360
+ # Store as bytes (requires numpy)
361
+ if not NUMPY_AVAILABLE:
362
+ raise ImportError("numpy required for non-pgvector embedding storage")
363
+ return np.array(embedding, dtype=np.float32).tobytes()
364
+
365
+ def _embedding_from_db(self, value: Any) -> Optional[List[float]]:
366
+ """Convert embedding from database format."""
367
+ if value is None:
368
+ return None
369
+ if self._pgvector_available:
370
+ # pgvector returns as string or list
371
+ if isinstance(value, str):
372
+ value = value.strip("[]")
373
+ return [float(x) for x in value.split(",")]
374
+ return list(value)
375
+ else:
376
+ # Stored as bytes (requires numpy)
377
+ if not NUMPY_AVAILABLE or np is None:
378
+ return None
379
+ return np.frombuffer(value, dtype=np.float32).tolist()
380
+
381
+ def _cosine_similarity(self, a: List[float], b: List[float]) -> float:
382
+ """Compute cosine similarity between two vectors."""
383
+ if not NUMPY_AVAILABLE or np is None:
384
+ # Fallback to pure Python
385
+ dot = sum(x * y for x, y in zip(a, b, strict=False))
386
+ norm_a = sum(x * x for x in a) ** 0.5
387
+ norm_b = sum(x * x for x in b) ** 0.5
388
+ return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0
389
+ a_arr = np.array(a)
390
+ b_arr = np.array(b)
391
+ return float(
392
+ np.dot(a_arr, b_arr) / (np.linalg.norm(a_arr) * np.linalg.norm(b_arr))
393
+ )
394
+
395
+ # ==================== WRITE OPERATIONS ====================
396
+
397
+ def save_heuristic(self, heuristic: Heuristic) -> str:
398
+ """Save a heuristic."""
399
+ with self._get_connection() as conn:
400
+ conn.execute(
401
+ f"""
402
+ INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
403
+ (id, agent, project_id, condition, strategy, confidence,
404
+ occurrence_count, success_count, last_validated, created_at, metadata, embedding)
405
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
406
+ ON CONFLICT (id) DO UPDATE SET
407
+ condition = EXCLUDED.condition,
408
+ strategy = EXCLUDED.strategy,
409
+ confidence = EXCLUDED.confidence,
410
+ occurrence_count = EXCLUDED.occurrence_count,
411
+ success_count = EXCLUDED.success_count,
412
+ last_validated = EXCLUDED.last_validated,
413
+ metadata = EXCLUDED.metadata,
414
+ embedding = EXCLUDED.embedding
415
+ """,
416
+ (
417
+ heuristic.id,
418
+ heuristic.agent,
419
+ heuristic.project_id,
420
+ heuristic.condition,
421
+ heuristic.strategy,
422
+ heuristic.confidence,
423
+ heuristic.occurrence_count,
424
+ heuristic.success_count,
425
+ heuristic.last_validated,
426
+ heuristic.created_at,
427
+ json.dumps(heuristic.metadata) if heuristic.metadata else None,
428
+ self._embedding_to_db(heuristic.embedding),
429
+ ),
430
+ )
431
+ conn.commit()
432
+
433
+ logger.debug(f"Saved heuristic: {heuristic.id}")
434
+ return heuristic.id
435
+
436
+ def save_outcome(self, outcome: Outcome) -> str:
437
+ """Save an outcome."""
438
+ with self._get_connection() as conn:
439
+ conn.execute(
440
+ f"""
441
+ INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
442
+ (id, agent, project_id, task_type, task_description, success,
443
+ strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata, embedding)
444
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
445
+ ON CONFLICT (id) DO UPDATE SET
446
+ task_description = EXCLUDED.task_description,
447
+ success = EXCLUDED.success,
448
+ strategy_used = EXCLUDED.strategy_used,
449
+ duration_ms = EXCLUDED.duration_ms,
450
+ error_message = EXCLUDED.error_message,
451
+ user_feedback = EXCLUDED.user_feedback,
452
+ metadata = EXCLUDED.metadata,
453
+ embedding = EXCLUDED.embedding
454
+ """,
455
+ (
456
+ outcome.id,
457
+ outcome.agent,
458
+ outcome.project_id,
459
+ outcome.task_type,
460
+ outcome.task_description,
461
+ outcome.success,
462
+ outcome.strategy_used,
463
+ outcome.duration_ms,
464
+ outcome.error_message,
465
+ outcome.user_feedback,
466
+ outcome.timestamp,
467
+ json.dumps(outcome.metadata) if outcome.metadata else None,
468
+ self._embedding_to_db(outcome.embedding),
469
+ ),
470
+ )
471
+ conn.commit()
472
+
473
+ logger.debug(f"Saved outcome: {outcome.id}")
474
+ return outcome.id
475
+
476
+ def save_user_preference(self, preference: UserPreference) -> str:
477
+ """Save a user preference."""
478
+ with self._get_connection() as conn:
479
+ conn.execute(
480
+ f"""
481
+ INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.PREFERENCES]}
482
+ (id, user_id, category, preference, source, confidence, timestamp, metadata)
483
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
484
+ ON CONFLICT (id) DO UPDATE SET
485
+ preference = EXCLUDED.preference,
486
+ source = EXCLUDED.source,
487
+ confidence = EXCLUDED.confidence,
488
+ metadata = EXCLUDED.metadata
489
+ """,
490
+ (
491
+ preference.id,
492
+ preference.user_id,
493
+ preference.category,
494
+ preference.preference,
495
+ preference.source,
496
+ preference.confidence,
497
+ preference.timestamp,
498
+ json.dumps(preference.metadata) if preference.metadata else None,
499
+ ),
500
+ )
501
+ conn.commit()
502
+
503
+ logger.debug(f"Saved preference: {preference.id}")
504
+ return preference.id
505
+
506
+ def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
507
+ """Save domain knowledge."""
508
+ with self._get_connection() as conn:
509
+ conn.execute(
510
+ f"""
511
+ INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
512
+ (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata, embedding)
513
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
514
+ ON CONFLICT (id) DO UPDATE SET
515
+ fact = EXCLUDED.fact,
516
+ source = EXCLUDED.source,
517
+ confidence = EXCLUDED.confidence,
518
+ last_verified = EXCLUDED.last_verified,
519
+ metadata = EXCLUDED.metadata,
520
+ embedding = EXCLUDED.embedding
521
+ """,
522
+ (
523
+ knowledge.id,
524
+ knowledge.agent,
525
+ knowledge.project_id,
526
+ knowledge.domain,
527
+ knowledge.fact,
528
+ knowledge.source,
529
+ knowledge.confidence,
530
+ knowledge.last_verified,
531
+ json.dumps(knowledge.metadata) if knowledge.metadata else None,
532
+ self._embedding_to_db(knowledge.embedding),
533
+ ),
534
+ )
535
+ conn.commit()
536
+
537
+ logger.debug(f"Saved domain knowledge: {knowledge.id}")
538
+ return knowledge.id
539
+
540
+ def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
541
+ """Save an anti-pattern."""
542
+ with self._get_connection() as conn:
543
+ conn.execute(
544
+ f"""
545
+ INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
546
+ (id, agent, project_id, pattern, why_bad, better_alternative,
547
+ occurrence_count, last_seen, created_at, metadata, embedding)
548
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
549
+ ON CONFLICT (id) DO UPDATE SET
550
+ pattern = EXCLUDED.pattern,
551
+ why_bad = EXCLUDED.why_bad,
552
+ better_alternative = EXCLUDED.better_alternative,
553
+ occurrence_count = EXCLUDED.occurrence_count,
554
+ last_seen = EXCLUDED.last_seen,
555
+ metadata = EXCLUDED.metadata,
556
+ embedding = EXCLUDED.embedding
557
+ """,
558
+ (
559
+ anti_pattern.id,
560
+ anti_pattern.agent,
561
+ anti_pattern.project_id,
562
+ anti_pattern.pattern,
563
+ anti_pattern.why_bad,
564
+ anti_pattern.better_alternative,
565
+ anti_pattern.occurrence_count,
566
+ anti_pattern.last_seen,
567
+ anti_pattern.created_at,
568
+ (
569
+ json.dumps(anti_pattern.metadata)
570
+ if anti_pattern.metadata
571
+ else None
572
+ ),
573
+ self._embedding_to_db(anti_pattern.embedding),
574
+ ),
575
+ )
576
+ conn.commit()
577
+
578
+ logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
579
+ return anti_pattern.id
580
+
581
+ # ==================== BATCH WRITE OPERATIONS ====================
582
+
583
+ def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
584
+ """Save multiple heuristics in a batch using executemany."""
585
+ if not heuristics:
586
+ return []
587
+
588
+ with self._get_connection() as conn:
589
+ conn.executemany(
590
+ f"""
591
+ INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
592
+ (id, agent, project_id, condition, strategy, confidence,
593
+ occurrence_count, success_count, last_validated, created_at, metadata, embedding)
594
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
595
+ ON CONFLICT (id) DO UPDATE SET
596
+ condition = EXCLUDED.condition,
597
+ strategy = EXCLUDED.strategy,
598
+ confidence = EXCLUDED.confidence,
599
+ occurrence_count = EXCLUDED.occurrence_count,
600
+ success_count = EXCLUDED.success_count,
601
+ last_validated = EXCLUDED.last_validated,
602
+ metadata = EXCLUDED.metadata,
603
+ embedding = EXCLUDED.embedding
604
+ """,
605
+ [
606
+ (
607
+ h.id,
608
+ h.agent,
609
+ h.project_id,
610
+ h.condition,
611
+ h.strategy,
612
+ h.confidence,
613
+ h.occurrence_count,
614
+ h.success_count,
615
+ h.last_validated,
616
+ h.created_at,
617
+ json.dumps(h.metadata) if h.metadata else None,
618
+ self._embedding_to_db(h.embedding),
619
+ )
620
+ for h in heuristics
621
+ ],
622
+ )
623
+ conn.commit()
624
+
625
+ logger.debug(f"Batch saved {len(heuristics)} heuristics")
626
+ return [h.id for h in heuristics]
627
+
628
+ def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
629
+ """Save multiple outcomes in a batch using executemany."""
630
+ if not outcomes:
631
+ return []
632
+
633
+ with self._get_connection() as conn:
634
+ conn.executemany(
635
+ f"""
636
+ INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
637
+ (id, agent, project_id, task_type, task_description, success,
638
+ strategy_used, duration_ms, error_message, user_feedback, timestamp, metadata, embedding)
639
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
640
+ ON CONFLICT (id) DO UPDATE SET
641
+ task_description = EXCLUDED.task_description,
642
+ success = EXCLUDED.success,
643
+ strategy_used = EXCLUDED.strategy_used,
644
+ duration_ms = EXCLUDED.duration_ms,
645
+ error_message = EXCLUDED.error_message,
646
+ user_feedback = EXCLUDED.user_feedback,
647
+ metadata = EXCLUDED.metadata,
648
+ embedding = EXCLUDED.embedding
649
+ """,
650
+ [
651
+ (
652
+ o.id,
653
+ o.agent,
654
+ o.project_id,
655
+ o.task_type,
656
+ o.task_description,
657
+ o.success,
658
+ o.strategy_used,
659
+ o.duration_ms,
660
+ o.error_message,
661
+ o.user_feedback,
662
+ o.timestamp,
663
+ json.dumps(o.metadata) if o.metadata else None,
664
+ self._embedding_to_db(o.embedding),
665
+ )
666
+ for o in outcomes
667
+ ],
668
+ )
669
+ conn.commit()
670
+
671
+ logger.debug(f"Batch saved {len(outcomes)} outcomes")
672
+ return [o.id for o in outcomes]
673
+
674
+ def save_domain_knowledge_batch(
675
+ self, knowledge_items: List[DomainKnowledge]
676
+ ) -> List[str]:
677
+ """Save multiple domain knowledge items in a batch using executemany."""
678
+ if not knowledge_items:
679
+ return []
680
+
681
+ with self._get_connection() as conn:
682
+ conn.executemany(
683
+ f"""
684
+ INSERT INTO {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
685
+ (id, agent, project_id, domain, fact, source, confidence, last_verified, metadata, embedding)
686
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
687
+ ON CONFLICT (id) DO UPDATE SET
688
+ fact = EXCLUDED.fact,
689
+ source = EXCLUDED.source,
690
+ confidence = EXCLUDED.confidence,
691
+ last_verified = EXCLUDED.last_verified,
692
+ metadata = EXCLUDED.metadata,
693
+ embedding = EXCLUDED.embedding
694
+ """,
695
+ [
696
+ (
697
+ k.id,
698
+ k.agent,
699
+ k.project_id,
700
+ k.domain,
701
+ k.fact,
702
+ k.source,
703
+ k.confidence,
704
+ k.last_verified,
705
+ json.dumps(k.metadata) if k.metadata else None,
706
+ self._embedding_to_db(k.embedding),
707
+ )
708
+ for k in knowledge_items
709
+ ],
710
+ )
711
+ conn.commit()
712
+
713
+ logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
714
+ return [k.id for k in knowledge_items]
715
+
716
+ # ==================== READ OPERATIONS ====================
717
+
718
+ def get_heuristics(
719
+ self,
720
+ project_id: str,
721
+ agent: Optional[str] = None,
722
+ embedding: Optional[List[float]] = None,
723
+ top_k: int = 5,
724
+ min_confidence: float = 0.0,
725
+ ) -> List[Heuristic]:
726
+ """Get heuristics with optional vector search."""
727
+ with self._get_connection() as conn:
728
+ if embedding and self._pgvector_available:
729
+ # Use pgvector similarity search
730
+ query = f"""
731
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
732
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
733
+ WHERE project_id = %s AND confidence >= %s
734
+ """
735
+ params: List[Any] = [
736
+ self._embedding_to_db(embedding),
737
+ project_id,
738
+ min_confidence,
739
+ ]
740
+
741
+ if agent:
742
+ query += " AND agent = %s"
743
+ params.append(agent)
744
+
745
+ query += " ORDER BY similarity DESC LIMIT %s"
746
+ params.append(top_k)
747
+ else:
748
+ # Standard query
749
+ query = f"""
750
+ SELECT *
751
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
752
+ WHERE project_id = %s AND confidence >= %s
753
+ """
754
+ params = [project_id, min_confidence]
755
+
756
+ if agent:
757
+ query += " AND agent = %s"
758
+ params.append(agent)
759
+
760
+ query += " ORDER BY confidence DESC LIMIT %s"
761
+ params.append(top_k)
762
+
763
+ cursor = conn.execute(query, params)
764
+ rows = cursor.fetchall()
765
+
766
+ results = [self._row_to_heuristic(row) for row in rows]
767
+
768
+ # If embedding provided but pgvector not available, do app-level filtering
769
+ if embedding and not self._pgvector_available and results:
770
+ results = self._filter_by_similarity(results, embedding, top_k, "embedding")
771
+
772
+ return results
773
+
774
+ def get_outcomes(
775
+ self,
776
+ project_id: str,
777
+ agent: Optional[str] = None,
778
+ task_type: Optional[str] = None,
779
+ embedding: Optional[List[float]] = None,
780
+ top_k: int = 5,
781
+ success_only: bool = False,
782
+ ) -> List[Outcome]:
783
+ """Get outcomes with optional vector search."""
784
+ with self._get_connection() as conn:
785
+ if embedding and self._pgvector_available:
786
+ query = f"""
787
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
788
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
789
+ WHERE project_id = %s
790
+ """
791
+ params: List[Any] = [self._embedding_to_db(embedding), project_id]
792
+ else:
793
+ query = f"""
794
+ SELECT *
795
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
796
+ WHERE project_id = %s
797
+ """
798
+ params = [project_id]
799
+
800
+ if agent:
801
+ query += " AND agent = %s"
802
+ params.append(agent)
803
+
804
+ if task_type:
805
+ query += " AND task_type = %s"
806
+ params.append(task_type)
807
+
808
+ if success_only:
809
+ query += " AND success = TRUE"
810
+
811
+ if embedding and self._pgvector_available:
812
+ query += " ORDER BY similarity DESC LIMIT %s"
813
+ else:
814
+ query += " ORDER BY timestamp DESC LIMIT %s"
815
+ params.append(top_k)
816
+
817
+ cursor = conn.execute(query, params)
818
+ rows = cursor.fetchall()
819
+
820
+ results = [self._row_to_outcome(row) for row in rows]
821
+
822
+ if embedding and not self._pgvector_available and results:
823
+ results = self._filter_by_similarity(results, embedding, top_k, "embedding")
824
+
825
+ return results
826
+
827
+ def get_user_preferences(
828
+ self,
829
+ user_id: str,
830
+ category: Optional[str] = None,
831
+ ) -> List[UserPreference]:
832
+ """Get user preferences."""
833
+ with self._get_connection() as conn:
834
+ query = f"SELECT * FROM {self.schema}.{self.TABLE_NAMES[MemoryType.PREFERENCES]} WHERE user_id = %s"
835
+ params: List[Any] = [user_id]
836
+
837
+ if category:
838
+ query += " AND category = %s"
839
+ params.append(category)
840
+
841
+ cursor = conn.execute(query, params)
842
+ rows = cursor.fetchall()
843
+
844
+ return [self._row_to_preference(row) for row in rows]
845
+
846
+ def get_domain_knowledge(
847
+ self,
848
+ project_id: str,
849
+ agent: Optional[str] = None,
850
+ domain: Optional[str] = None,
851
+ embedding: Optional[List[float]] = None,
852
+ top_k: int = 5,
853
+ ) -> List[DomainKnowledge]:
854
+ """Get domain knowledge with optional vector search."""
855
+ with self._get_connection() as conn:
856
+ if embedding and self._pgvector_available:
857
+ query = f"""
858
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
859
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
860
+ WHERE project_id = %s
861
+ """
862
+ params: List[Any] = [self._embedding_to_db(embedding), project_id]
863
+ else:
864
+ query = f"""
865
+ SELECT *
866
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
867
+ WHERE project_id = %s
868
+ """
869
+ params = [project_id]
870
+
871
+ if agent:
872
+ query += " AND agent = %s"
873
+ params.append(agent)
874
+
875
+ if domain:
876
+ query += " AND domain = %s"
877
+ params.append(domain)
878
+
879
+ if embedding and self._pgvector_available:
880
+ query += " ORDER BY similarity DESC LIMIT %s"
881
+ else:
882
+ query += " ORDER BY confidence DESC LIMIT %s"
883
+ params.append(top_k)
884
+
885
+ cursor = conn.execute(query, params)
886
+ rows = cursor.fetchall()
887
+
888
+ results = [self._row_to_domain_knowledge(row) for row in rows]
889
+
890
+ if embedding and not self._pgvector_available and results:
891
+ results = self._filter_by_similarity(results, embedding, top_k, "embedding")
892
+
893
+ return results
894
+
895
+ def get_anti_patterns(
896
+ self,
897
+ project_id: str,
898
+ agent: Optional[str] = None,
899
+ embedding: Optional[List[float]] = None,
900
+ top_k: int = 5,
901
+ ) -> List[AntiPattern]:
902
+ """Get anti-patterns with optional vector search."""
903
+ with self._get_connection() as conn:
904
+ if embedding and self._pgvector_available:
905
+ query = f"""
906
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
907
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
908
+ WHERE project_id = %s
909
+ """
910
+ params: List[Any] = [self._embedding_to_db(embedding), project_id]
911
+ else:
912
+ query = f"""
913
+ SELECT *
914
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
915
+ WHERE project_id = %s
916
+ """
917
+ params = [project_id]
918
+
919
+ if agent:
920
+ query += " AND agent = %s"
921
+ params.append(agent)
922
+
923
+ if embedding and self._pgvector_available:
924
+ query += " ORDER BY similarity DESC LIMIT %s"
925
+ else:
926
+ query += " ORDER BY occurrence_count DESC LIMIT %s"
927
+ params.append(top_k)
928
+
929
+ cursor = conn.execute(query, params)
930
+ rows = cursor.fetchall()
931
+
932
+ results = [self._row_to_anti_pattern(row) for row in rows]
933
+
934
+ if embedding and not self._pgvector_available and results:
935
+ results = self._filter_by_similarity(results, embedding, top_k, "embedding")
936
+
937
+ return results
938
+
939
+ def _filter_by_similarity(
940
+ self,
941
+ items: List[Any],
942
+ query_embedding: List[float],
943
+ top_k: int,
944
+ embedding_attr: str,
945
+ ) -> List[Any]:
946
+ """Filter items by cosine similarity (fallback when pgvector unavailable)."""
947
+ scored = []
948
+ for item in items:
949
+ item_embedding = getattr(item, embedding_attr, None)
950
+ if item_embedding:
951
+ similarity = self._cosine_similarity(query_embedding, item_embedding)
952
+ scored.append((item, similarity))
953
+ else:
954
+ scored.append((item, 0.0))
955
+
956
+ scored.sort(key=lambda x: x[1], reverse=True)
957
+ return [item for item, _ in scored[:top_k]]
958
+
959
+ # ==================== MULTI-AGENT MEMORY SHARING ====================
960
+
961
+ def get_heuristics_for_agents(
962
+ self,
963
+ project_id: str,
964
+ agents: List[str],
965
+ embedding: Optional[List[float]] = None,
966
+ top_k: int = 5,
967
+ min_confidence: float = 0.0,
968
+ ) -> List[Heuristic]:
969
+ """Get heuristics from multiple agents using optimized ANY query."""
970
+ if not agents:
971
+ return []
972
+
973
+ with self._get_connection() as conn:
974
+ if embedding and self._pgvector_available:
975
+ query = f"""
976
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
977
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
978
+ WHERE project_id = %s AND confidence >= %s AND agent = ANY(%s)
979
+ ORDER BY similarity DESC LIMIT %s
980
+ """
981
+ params: List[Any] = [
982
+ self._embedding_to_db(embedding),
983
+ project_id,
984
+ min_confidence,
985
+ agents,
986
+ top_k * len(agents),
987
+ ]
988
+ else:
989
+ query = f"""
990
+ SELECT *
991
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
992
+ WHERE project_id = %s AND confidence >= %s AND agent = ANY(%s)
993
+ ORDER BY confidence DESC LIMIT %s
994
+ """
995
+ params = [project_id, min_confidence, agents, top_k * len(agents)]
996
+
997
+ cursor = conn.execute(query, params)
998
+ rows = cursor.fetchall()
999
+
1000
+ results = [self._row_to_heuristic(row) for row in rows]
1001
+
1002
+ if embedding and not self._pgvector_available and results:
1003
+ results = self._filter_by_similarity(
1004
+ results, embedding, top_k * len(agents), "embedding"
1005
+ )
1006
+
1007
+ return results
1008
+
1009
+ def get_outcomes_for_agents(
1010
+ self,
1011
+ project_id: str,
1012
+ agents: List[str],
1013
+ task_type: Optional[str] = None,
1014
+ embedding: Optional[List[float]] = None,
1015
+ top_k: int = 5,
1016
+ success_only: bool = False,
1017
+ ) -> List[Outcome]:
1018
+ """Get outcomes from multiple agents using optimized ANY query."""
1019
+ if not agents:
1020
+ return []
1021
+
1022
+ with self._get_connection() as conn:
1023
+ if embedding and self._pgvector_available:
1024
+ query = f"""
1025
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
1026
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
1027
+ WHERE project_id = %s AND agent = ANY(%s)
1028
+ """
1029
+ params: List[Any] = [
1030
+ self._embedding_to_db(embedding),
1031
+ project_id,
1032
+ agents,
1033
+ ]
1034
+ else:
1035
+ query = f"""
1036
+ SELECT *
1037
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]}
1038
+ WHERE project_id = %s AND agent = ANY(%s)
1039
+ """
1040
+ params = [project_id, agents]
1041
+
1042
+ if task_type:
1043
+ query += " AND task_type = %s"
1044
+ params.append(task_type)
1045
+
1046
+ if success_only:
1047
+ query += " AND success = TRUE"
1048
+
1049
+ if embedding and self._pgvector_available:
1050
+ query += " ORDER BY similarity DESC LIMIT %s"
1051
+ else:
1052
+ query += " ORDER BY timestamp DESC LIMIT %s"
1053
+ params.append(top_k * len(agents))
1054
+
1055
+ cursor = conn.execute(query, params)
1056
+ rows = cursor.fetchall()
1057
+
1058
+ results = [self._row_to_outcome(row) for row in rows]
1059
+
1060
+ if embedding and not self._pgvector_available and results:
1061
+ results = self._filter_by_similarity(
1062
+ results, embedding, top_k * len(agents), "embedding"
1063
+ )
1064
+
1065
+ return results
1066
+
1067
+ def get_domain_knowledge_for_agents(
1068
+ self,
1069
+ project_id: str,
1070
+ agents: List[str],
1071
+ domain: Optional[str] = None,
1072
+ embedding: Optional[List[float]] = None,
1073
+ top_k: int = 5,
1074
+ ) -> List[DomainKnowledge]:
1075
+ """Get domain knowledge from multiple agents using optimized ANY query."""
1076
+ if not agents:
1077
+ return []
1078
+
1079
+ with self._get_connection() as conn:
1080
+ if embedding and self._pgvector_available:
1081
+ query = f"""
1082
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
1083
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
1084
+ WHERE project_id = %s AND agent = ANY(%s)
1085
+ """
1086
+ params: List[Any] = [
1087
+ self._embedding_to_db(embedding),
1088
+ project_id,
1089
+ agents,
1090
+ ]
1091
+ else:
1092
+ query = f"""
1093
+ SELECT *
1094
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]}
1095
+ WHERE project_id = %s AND agent = ANY(%s)
1096
+ """
1097
+ params = [project_id, agents]
1098
+
1099
+ if domain:
1100
+ query += " AND domain = %s"
1101
+ params.append(domain)
1102
+
1103
+ if embedding and self._pgvector_available:
1104
+ query += " ORDER BY similarity DESC LIMIT %s"
1105
+ else:
1106
+ query += " ORDER BY confidence DESC LIMIT %s"
1107
+ params.append(top_k * len(agents))
1108
+
1109
+ cursor = conn.execute(query, params)
1110
+ rows = cursor.fetchall()
1111
+
1112
+ results = [self._row_to_domain_knowledge(row) for row in rows]
1113
+
1114
+ if embedding and not self._pgvector_available and results:
1115
+ results = self._filter_by_similarity(
1116
+ results, embedding, top_k * len(agents), "embedding"
1117
+ )
1118
+
1119
+ return results
1120
+
1121
+ def get_anti_patterns_for_agents(
1122
+ self,
1123
+ project_id: str,
1124
+ agents: List[str],
1125
+ embedding: Optional[List[float]] = None,
1126
+ top_k: int = 5,
1127
+ ) -> List[AntiPattern]:
1128
+ """Get anti-patterns from multiple agents using optimized ANY query."""
1129
+ if not agents:
1130
+ return []
1131
+
1132
+ with self._get_connection() as conn:
1133
+ if embedding and self._pgvector_available:
1134
+ query = f"""
1135
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
1136
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
1137
+ WHERE project_id = %s AND agent = ANY(%s)
1138
+ """
1139
+ params: List[Any] = [
1140
+ self._embedding_to_db(embedding),
1141
+ project_id,
1142
+ agents,
1143
+ ]
1144
+ else:
1145
+ query = f"""
1146
+ SELECT *
1147
+ FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]}
1148
+ WHERE project_id = %s AND agent = ANY(%s)
1149
+ """
1150
+ params = [project_id, agents]
1151
+
1152
+ if embedding and self._pgvector_available:
1153
+ query += " ORDER BY similarity DESC LIMIT %s"
1154
+ else:
1155
+ query += " ORDER BY occurrence_count DESC LIMIT %s"
1156
+ params.append(top_k * len(agents))
1157
+
1158
+ cursor = conn.execute(query, params)
1159
+ rows = cursor.fetchall()
1160
+
1161
+ results = [self._row_to_anti_pattern(row) for row in rows]
1162
+
1163
+ if embedding and not self._pgvector_available and results:
1164
+ results = self._filter_by_similarity(
1165
+ results, embedding, top_k * len(agents), "embedding"
1166
+ )
1167
+
1168
+ return results
1169
+
1170
+ # ==================== UPDATE OPERATIONS ====================
1171
+
1172
+ def update_heuristic(
1173
+ self,
1174
+ heuristic_id: str,
1175
+ updates: Dict[str, Any],
1176
+ ) -> bool:
1177
+ """Update a heuristic's fields."""
1178
+ if not updates:
1179
+ return False
1180
+
1181
+ set_clauses = []
1182
+ params = []
1183
+ for key, value in updates.items():
1184
+ if key == "metadata" and value:
1185
+ value = json.dumps(value)
1186
+ set_clauses.append(f"{key} = %s")
1187
+ params.append(value)
1188
+
1189
+ params.append(heuristic_id)
1190
+
1191
+ with self._get_connection() as conn:
1192
+ cursor = conn.execute(
1193
+ f"UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]} SET {', '.join(set_clauses)} WHERE id = %s",
1194
+ params,
1195
+ )
1196
+ conn.commit()
1197
+ return cursor.rowcount > 0
1198
+
1199
+ def increment_heuristic_occurrence(
1200
+ self,
1201
+ heuristic_id: str,
1202
+ success: bool,
1203
+ ) -> bool:
1204
+ """Increment heuristic occurrence count."""
1205
+ with self._get_connection() as conn:
1206
+ if success:
1207
+ cursor = conn.execute(
1208
+ f"""
1209
+ UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
1210
+ SET occurrence_count = occurrence_count + 1,
1211
+ success_count = success_count + 1,
1212
+ last_validated = %s
1213
+ WHERE id = %s
1214
+ """,
1215
+ (datetime.now(timezone.utc), heuristic_id),
1216
+ )
1217
+ else:
1218
+ cursor = conn.execute(
1219
+ f"""
1220
+ UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]}
1221
+ SET occurrence_count = occurrence_count + 1,
1222
+ last_validated = %s
1223
+ WHERE id = %s
1224
+ """,
1225
+ (datetime.now(timezone.utc), heuristic_id),
1226
+ )
1227
+ conn.commit()
1228
+ return cursor.rowcount > 0
1229
+
1230
+ def update_heuristic_confidence(
1231
+ self,
1232
+ heuristic_id: str,
1233
+ new_confidence: float,
1234
+ ) -> bool:
1235
+ """Update confidence score for a heuristic."""
1236
+ with self._get_connection() as conn:
1237
+ cursor = conn.execute(
1238
+ f"UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]} SET confidence = %s WHERE id = %s",
1239
+ (new_confidence, heuristic_id),
1240
+ )
1241
+ conn.commit()
1242
+ return cursor.rowcount > 0
1243
+
1244
+ def update_knowledge_confidence(
1245
+ self,
1246
+ knowledge_id: str,
1247
+ new_confidence: float,
1248
+ ) -> bool:
1249
+ """Update confidence score for domain knowledge."""
1250
+ with self._get_connection() as conn:
1251
+ cursor = conn.execute(
1252
+ f"UPDATE {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]} SET confidence = %s WHERE id = %s",
1253
+ (new_confidence, knowledge_id),
1254
+ )
1255
+ conn.commit()
1256
+ return cursor.rowcount > 0
1257
+
1258
+ # ==================== DELETE OPERATIONS ====================
1259
+
1260
+ def delete_heuristic(self, heuristic_id: str) -> bool:
1261
+ """Delete a heuristic by ID."""
1262
+ with self._get_connection() as conn:
1263
+ cursor = conn.execute(
1264
+ f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]} WHERE id = %s",
1265
+ (heuristic_id,),
1266
+ )
1267
+ conn.commit()
1268
+ return cursor.rowcount > 0
1269
+
1270
+ def delete_outcome(self, outcome_id: str) -> bool:
1271
+ """Delete an outcome by ID."""
1272
+ with self._get_connection() as conn:
1273
+ cursor = conn.execute(
1274
+ f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]} WHERE id = %s",
1275
+ (outcome_id,),
1276
+ )
1277
+ conn.commit()
1278
+ return cursor.rowcount > 0
1279
+
1280
+ def delete_domain_knowledge(self, knowledge_id: str) -> bool:
1281
+ """Delete domain knowledge by ID."""
1282
+ with self._get_connection() as conn:
1283
+ cursor = conn.execute(
1284
+ f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.DOMAIN_KNOWLEDGE]} WHERE id = %s",
1285
+ (knowledge_id,),
1286
+ )
1287
+ conn.commit()
1288
+ return cursor.rowcount > 0
1289
+
1290
+ def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
1291
+ """Delete an anti-pattern by ID."""
1292
+ with self._get_connection() as conn:
1293
+ cursor = conn.execute(
1294
+ f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.ANTI_PATTERNS]} WHERE id = %s",
1295
+ (anti_pattern_id,),
1296
+ )
1297
+ conn.commit()
1298
+ return cursor.rowcount > 0
1299
+
1300
+ def delete_outcomes_older_than(
1301
+ self,
1302
+ project_id: str,
1303
+ older_than: datetime,
1304
+ agent: Optional[str] = None,
1305
+ ) -> int:
1306
+ """Delete old outcomes."""
1307
+ with self._get_connection() as conn:
1308
+ query = f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.OUTCOMES]} WHERE project_id = %s AND timestamp < %s"
1309
+ params: List[Any] = [project_id, older_than]
1310
+
1311
+ if agent:
1312
+ query += " AND agent = %s"
1313
+ params.append(agent)
1314
+
1315
+ cursor = conn.execute(query, params)
1316
+ conn.commit()
1317
+ deleted = cursor.rowcount
1318
+
1319
+ logger.info(f"Deleted {deleted} old outcomes")
1320
+ return deleted
1321
+
1322
+ def delete_low_confidence_heuristics(
1323
+ self,
1324
+ project_id: str,
1325
+ below_confidence: float,
1326
+ agent: Optional[str] = None,
1327
+ ) -> int:
1328
+ """Delete low-confidence heuristics."""
1329
+ with self._get_connection() as conn:
1330
+ query = f"DELETE FROM {self.schema}.{self.TABLE_NAMES[MemoryType.HEURISTICS]} WHERE project_id = %s AND confidence < %s"
1331
+ params: List[Any] = [project_id, below_confidence]
1332
+
1333
+ if agent:
1334
+ query += " AND agent = %s"
1335
+ params.append(agent)
1336
+
1337
+ cursor = conn.execute(query, params)
1338
+ conn.commit()
1339
+ deleted = cursor.rowcount
1340
+
1341
+ logger.info(f"Deleted {deleted} low-confidence heuristics")
1342
+ return deleted
1343
+
1344
+ # ==================== STATS ====================
1345
+
1346
+ def get_stats(
1347
+ self,
1348
+ project_id: str,
1349
+ agent: Optional[str] = None,
1350
+ ) -> Dict[str, Any]:
1351
+ """Get memory statistics."""
1352
+ stats = {
1353
+ "project_id": project_id,
1354
+ "agent": agent,
1355
+ "storage_type": "postgresql",
1356
+ "pgvector_available": self._pgvector_available,
1357
+ }
1358
+
1359
+ with self._get_connection() as conn:
1360
+ # Use canonical memory types for stats
1361
+ for memory_type in MemoryType.ALL:
1362
+ table = self.TABLE_NAMES[memory_type]
1363
+ if memory_type == MemoryType.PREFERENCES:
1364
+ # Preferences don't have project_id
1365
+ cursor = conn.execute(
1366
+ f"SELECT COUNT(*) as count FROM {self.schema}.{table}"
1367
+ )
1368
+ row = cursor.fetchone()
1369
+ stats[f"{memory_type}_count"] = row["count"] if row else 0
1370
+ else:
1371
+ query = f"SELECT COUNT(*) as count FROM {self.schema}.{table} WHERE project_id = %s"
1372
+ params: List[Any] = [project_id]
1373
+ if agent:
1374
+ query += " AND agent = %s"
1375
+ params.append(agent)
1376
+ cursor = conn.execute(query, params)
1377
+ row = cursor.fetchone()
1378
+ stats[f"{memory_type}_count"] = row["count"] if row else 0
1379
+
1380
+ stats["total_count"] = sum(
1381
+ stats.get(k, 0) for k in stats if k.endswith("_count")
1382
+ )
1383
+
1384
+ return stats
1385
+
1386
+ # ==================== HELPERS ====================
1387
+
1388
+ def _parse_datetime(self, value: Any) -> Optional[datetime]:
1389
+ """Parse datetime from database value."""
1390
+ if value is None:
1391
+ return None
1392
+ if isinstance(value, datetime):
1393
+ return value
1394
+ try:
1395
+ return datetime.fromisoformat(str(value).replace("Z", "+00:00"))
1396
+ except (ValueError, AttributeError):
1397
+ return None
1398
+
1399
+ def _row_to_heuristic(self, row: Dict[str, Any]) -> Heuristic:
1400
+ """Convert database row to Heuristic."""
1401
+ return Heuristic(
1402
+ id=row["id"],
1403
+ agent=row["agent"],
1404
+ project_id=row["project_id"],
1405
+ condition=row["condition"],
1406
+ strategy=row["strategy"],
1407
+ confidence=row["confidence"] or 0.0,
1408
+ occurrence_count=row["occurrence_count"] or 0,
1409
+ success_count=row["success_count"] or 0,
1410
+ last_validated=self._parse_datetime(row["last_validated"])
1411
+ or datetime.now(timezone.utc),
1412
+ created_at=self._parse_datetime(row["created_at"])
1413
+ or datetime.now(timezone.utc),
1414
+ embedding=self._embedding_from_db(row.get("embedding")),
1415
+ metadata=row["metadata"] if row["metadata"] else {},
1416
+ )
1417
+
1418
+ def _row_to_outcome(self, row: Dict[str, Any]) -> Outcome:
1419
+ """Convert database row to Outcome."""
1420
+ return Outcome(
1421
+ id=row["id"],
1422
+ agent=row["agent"],
1423
+ project_id=row["project_id"],
1424
+ task_type=row["task_type"] or "general",
1425
+ task_description=row["task_description"],
1426
+ success=bool(row["success"]),
1427
+ strategy_used=row["strategy_used"] or "",
1428
+ duration_ms=row["duration_ms"],
1429
+ error_message=row["error_message"],
1430
+ user_feedback=row["user_feedback"],
1431
+ timestamp=self._parse_datetime(row["timestamp"])
1432
+ or datetime.now(timezone.utc),
1433
+ embedding=self._embedding_from_db(row.get("embedding")),
1434
+ metadata=row["metadata"] if row["metadata"] else {},
1435
+ )
1436
+
1437
+ def _row_to_preference(self, row: Dict[str, Any]) -> UserPreference:
1438
+ """Convert database row to UserPreference."""
1439
+ return UserPreference(
1440
+ id=row["id"],
1441
+ user_id=row["user_id"],
1442
+ category=row["category"] or "general",
1443
+ preference=row["preference"],
1444
+ source=row["source"] or "unknown",
1445
+ confidence=row["confidence"] or 1.0,
1446
+ timestamp=self._parse_datetime(row["timestamp"])
1447
+ or datetime.now(timezone.utc),
1448
+ metadata=row["metadata"] if row["metadata"] else {},
1449
+ )
1450
+
1451
+ def _row_to_domain_knowledge(self, row: Dict[str, Any]) -> DomainKnowledge:
1452
+ """Convert database row to DomainKnowledge."""
1453
+ return DomainKnowledge(
1454
+ id=row["id"],
1455
+ agent=row["agent"],
1456
+ project_id=row["project_id"],
1457
+ domain=row["domain"] or "general",
1458
+ fact=row["fact"],
1459
+ source=row["source"] or "unknown",
1460
+ confidence=row["confidence"] or 1.0,
1461
+ last_verified=self._parse_datetime(row["last_verified"])
1462
+ or datetime.now(timezone.utc),
1463
+ embedding=self._embedding_from_db(row.get("embedding")),
1464
+ metadata=row["metadata"] if row["metadata"] else {},
1465
+ )
1466
+
1467
+ def _row_to_anti_pattern(self, row: Dict[str, Any]) -> AntiPattern:
1468
+ """Convert database row to AntiPattern."""
1469
+ return AntiPattern(
1470
+ id=row["id"],
1471
+ agent=row["agent"],
1472
+ project_id=row["project_id"],
1473
+ pattern=row["pattern"],
1474
+ why_bad=row["why_bad"] or "",
1475
+ better_alternative=row["better_alternative"] or "",
1476
+ occurrence_count=row["occurrence_count"] or 1,
1477
+ last_seen=self._parse_datetime(row["last_seen"])
1478
+ or datetime.now(timezone.utc),
1479
+ created_at=self._parse_datetime(row["created_at"])
1480
+ or datetime.now(timezone.utc),
1481
+ embedding=self._embedding_from_db(row.get("embedding")),
1482
+ metadata=row["metadata"] if row["metadata"] else {},
1483
+ )
1484
+
1485
+ def close(self):
1486
+ """Close connection pool."""
1487
+ if self._pool:
1488
+ self._pool.close()
1489
+
1490
+ # ==================== MIGRATION SUPPORT ====================
1491
+
1492
+ def _get_version_store(self):
1493
+ """Get or create the version store."""
1494
+ if self._version_store is None:
1495
+ from alma.storage.migrations.version_stores import PostgreSQLVersionStore
1496
+
1497
+ self._version_store = PostgreSQLVersionStore(self._pool, self.schema)
1498
+ return self._version_store
1499
+
1500
+ def _get_migration_runner(self):
1501
+ """Get or create the migration runner."""
1502
+ if self._migration_runner is None:
1503
+ from alma.storage.migrations.runner import MigrationRunner
1504
+ from alma.storage.migrations.versions import v1_0_0 # noqa: F401
1505
+
1506
+ self._migration_runner = MigrationRunner(
1507
+ version_store=self._get_version_store(),
1508
+ backend="postgresql",
1509
+ )
1510
+ return self._migration_runner
1511
+
1512
+ def _ensure_migrated(self) -> None:
1513
+ """Ensure database is migrated to latest version."""
1514
+ runner = self._get_migration_runner()
1515
+ if runner.needs_migration():
1516
+ with self._get_connection() as conn:
1517
+ applied = runner.migrate(conn)
1518
+ if applied:
1519
+ logger.info(f"Applied {len(applied)} migrations: {applied}")
1520
+
1521
+ def get_schema_version(self) -> Optional[str]:
1522
+ """Get the current schema version."""
1523
+ return self._get_version_store().get_current_version()
1524
+
1525
+ def get_migration_status(self) -> Dict[str, Any]:
1526
+ """Get migration status information."""
1527
+ runner = self._get_migration_runner()
1528
+ status = runner.get_status()
1529
+ status["migration_supported"] = True
1530
+ return status
1531
+
1532
+ def migrate(
1533
+ self,
1534
+ target_version: Optional[str] = None,
1535
+ dry_run: bool = False,
1536
+ ) -> List[str]:
1537
+ """
1538
+ Apply pending schema migrations.
1539
+
1540
+ Args:
1541
+ target_version: Optional target version (applies all if not specified)
1542
+ dry_run: If True, show what would be done without making changes
1543
+
1544
+ Returns:
1545
+ List of applied migration versions
1546
+ """
1547
+ runner = self._get_migration_runner()
1548
+ with self._get_connection() as conn:
1549
+ return runner.migrate(conn, target_version=target_version, dry_run=dry_run)
1550
+
1551
+ def rollback(
1552
+ self,
1553
+ target_version: str,
1554
+ dry_run: bool = False,
1555
+ ) -> List[str]:
1556
+ """
1557
+ Roll back schema to a previous version.
1558
+
1559
+ Args:
1560
+ target_version: Version to roll back to
1561
+ dry_run: If True, show what would be done without making changes
1562
+
1563
+ Returns:
1564
+ List of rolled back migration versions
1565
+ """
1566
+ runner = self._get_migration_runner()
1567
+ with self._get_connection() as conn:
1568
+ return runner.rollback(conn, target_version=target_version, dry_run=dry_run)
1569
+
1570
+ # ==================== CHECKPOINT OPERATIONS (v0.6.0+) ====================
1571
+
1572
+ def save_checkpoint(self, checkpoint: "Checkpoint") -> str:
1573
+ """Save a workflow checkpoint."""
1574
+ with self._get_connection() as conn:
1575
+ conn.execute(
1576
+ f"""
1577
+ INSERT INTO {self.schema}.alma_checkpoints
1578
+ (id, run_id, node_id, state_json, state_hash, sequence_number,
1579
+ branch_id, parent_checkpoint_id, metadata, created_at)
1580
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
1581
+ ON CONFLICT (id) DO UPDATE SET
1582
+ state_json = EXCLUDED.state_json,
1583
+ state_hash = EXCLUDED.state_hash,
1584
+ sequence_number = EXCLUDED.sequence_number,
1585
+ metadata = EXCLUDED.metadata
1586
+ """,
1587
+ (
1588
+ checkpoint.id,
1589
+ checkpoint.run_id,
1590
+ checkpoint.node_id,
1591
+ json.dumps(checkpoint.state),
1592
+ checkpoint.state_hash,
1593
+ checkpoint.sequence_number,
1594
+ checkpoint.branch_id,
1595
+ checkpoint.parent_checkpoint_id,
1596
+ json.dumps(checkpoint.metadata) if checkpoint.metadata else None,
1597
+ checkpoint.created_at,
1598
+ ),
1599
+ )
1600
+ conn.commit()
1601
+
1602
+ logger.debug(f"Saved checkpoint: {checkpoint.id}")
1603
+ return checkpoint.id
1604
+
1605
+ def get_checkpoint(self, checkpoint_id: str) -> Optional["Checkpoint"]:
1606
+ """Get a checkpoint by ID."""
1607
+ with self._get_connection() as conn:
1608
+ cursor = conn.execute(
1609
+ f"SELECT * FROM {self.schema}.alma_checkpoints WHERE id = %s",
1610
+ (checkpoint_id,),
1611
+ )
1612
+ row = cursor.fetchone()
1613
+
1614
+ if row is None:
1615
+ return None
1616
+ return self._row_to_checkpoint(row)
1617
+
1618
+ def get_latest_checkpoint(
1619
+ self,
1620
+ run_id: str,
1621
+ branch_id: Optional[str] = None,
1622
+ ) -> Optional["Checkpoint"]:
1623
+ """Get the most recent checkpoint for a workflow run."""
1624
+ with self._get_connection() as conn:
1625
+ if branch_id is not None:
1626
+ cursor = conn.execute(
1627
+ f"""
1628
+ SELECT * FROM {self.schema}.alma_checkpoints
1629
+ WHERE run_id = %s AND branch_id = %s
1630
+ ORDER BY sequence_number DESC LIMIT 1
1631
+ """,
1632
+ (run_id, branch_id),
1633
+ )
1634
+ else:
1635
+ cursor = conn.execute(
1636
+ f"""
1637
+ SELECT * FROM {self.schema}.alma_checkpoints
1638
+ WHERE run_id = %s
1639
+ ORDER BY sequence_number DESC LIMIT 1
1640
+ """,
1641
+ (run_id,),
1642
+ )
1643
+ row = cursor.fetchone()
1644
+
1645
+ if row is None:
1646
+ return None
1647
+ return self._row_to_checkpoint(row)
1648
+
1649
+ def get_checkpoints_for_run(
1650
+ self,
1651
+ run_id: str,
1652
+ branch_id: Optional[str] = None,
1653
+ limit: int = 100,
1654
+ ) -> List["Checkpoint"]:
1655
+ """Get all checkpoints for a workflow run."""
1656
+ with self._get_connection() as conn:
1657
+ if branch_id is not None:
1658
+ cursor = conn.execute(
1659
+ f"""
1660
+ SELECT * FROM {self.schema}.alma_checkpoints
1661
+ WHERE run_id = %s AND branch_id = %s
1662
+ ORDER BY sequence_number ASC LIMIT %s
1663
+ """,
1664
+ (run_id, branch_id, limit),
1665
+ )
1666
+ else:
1667
+ cursor = conn.execute(
1668
+ f"""
1669
+ SELECT * FROM {self.schema}.alma_checkpoints
1670
+ WHERE run_id = %s
1671
+ ORDER BY sequence_number ASC LIMIT %s
1672
+ """,
1673
+ (run_id, limit),
1674
+ )
1675
+ rows = cursor.fetchall()
1676
+
1677
+ return [self._row_to_checkpoint(row) for row in rows]
1678
+
1679
+ def cleanup_checkpoints(
1680
+ self,
1681
+ run_id: str,
1682
+ keep_latest: int = 1,
1683
+ ) -> int:
1684
+ """Clean up old checkpoints for a completed run."""
1685
+ with self._get_connection() as conn:
1686
+ # Delete all but the latest N checkpoints
1687
+ cursor = conn.execute(
1688
+ f"""
1689
+ DELETE FROM {self.schema}.alma_checkpoints
1690
+ WHERE run_id = %s AND id NOT IN (
1691
+ SELECT id FROM {self.schema}.alma_checkpoints
1692
+ WHERE run_id = %s
1693
+ ORDER BY sequence_number DESC
1694
+ LIMIT %s
1695
+ )
1696
+ """,
1697
+ (run_id, run_id, keep_latest),
1698
+ )
1699
+ conn.commit()
1700
+ deleted = cursor.rowcount
1701
+
1702
+ logger.info(f"Cleaned up {deleted} checkpoints for run {run_id}")
1703
+ return deleted
1704
+
1705
+ def _row_to_checkpoint(self, row: Dict[str, Any]) -> "Checkpoint":
1706
+ """Convert database row to Checkpoint."""
1707
+ from alma.workflow import Checkpoint
1708
+
1709
+ return Checkpoint(
1710
+ id=row["id"],
1711
+ run_id=row["run_id"],
1712
+ node_id=row["node_id"],
1713
+ state=json.loads(row["state_json"]) if row["state_json"] else {},
1714
+ sequence_number=row["sequence_number"] or 0,
1715
+ branch_id=row["branch_id"],
1716
+ parent_checkpoint_id=row["parent_checkpoint_id"],
1717
+ state_hash=row["state_hash"] or "",
1718
+ metadata=row["metadata"] if row["metadata"] else {},
1719
+ created_at=self._parse_datetime(row["created_at"])
1720
+ or datetime.now(timezone.utc),
1721
+ )
1722
+
1723
+ # ==================== WORKFLOW OUTCOME OPERATIONS (v0.6.0+) ====================
1724
+
1725
+ def save_workflow_outcome(self, outcome: "WorkflowOutcome") -> str:
1726
+ """Save a workflow outcome."""
1727
+ with self._get_connection() as conn:
1728
+ conn.execute(
1729
+ f"""
1730
+ INSERT INTO {self.schema}.alma_workflow_outcomes
1731
+ (id, tenant_id, workflow_id, run_id, agent, project_id, result,
1732
+ summary, strategies_used, successful_patterns, failed_patterns,
1733
+ extracted_heuristics, extracted_anti_patterns, duration_seconds,
1734
+ node_count, error_message, metadata, embedding, created_at)
1735
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
1736
+ ON CONFLICT (id) DO UPDATE SET
1737
+ result = EXCLUDED.result,
1738
+ summary = EXCLUDED.summary,
1739
+ strategies_used = EXCLUDED.strategies_used,
1740
+ successful_patterns = EXCLUDED.successful_patterns,
1741
+ failed_patterns = EXCLUDED.failed_patterns,
1742
+ extracted_heuristics = EXCLUDED.extracted_heuristics,
1743
+ extracted_anti_patterns = EXCLUDED.extracted_anti_patterns,
1744
+ duration_seconds = EXCLUDED.duration_seconds,
1745
+ node_count = EXCLUDED.node_count,
1746
+ error_message = EXCLUDED.error_message,
1747
+ metadata = EXCLUDED.metadata,
1748
+ embedding = EXCLUDED.embedding
1749
+ """,
1750
+ (
1751
+ outcome.id,
1752
+ outcome.tenant_id,
1753
+ outcome.workflow_id,
1754
+ outcome.run_id,
1755
+ outcome.agent,
1756
+ outcome.project_id,
1757
+ outcome.result.value,
1758
+ outcome.summary,
1759
+ outcome.strategies_used,
1760
+ outcome.successful_patterns,
1761
+ outcome.failed_patterns,
1762
+ outcome.extracted_heuristics,
1763
+ outcome.extracted_anti_patterns,
1764
+ outcome.duration_seconds,
1765
+ outcome.node_count,
1766
+ outcome.error_message,
1767
+ outcome.metadata,
1768
+ self._embedding_to_db(outcome.embedding),
1769
+ outcome.created_at,
1770
+ ),
1771
+ )
1772
+ conn.commit()
1773
+
1774
+ logger.debug(f"Saved workflow outcome: {outcome.id}")
1775
+ return outcome.id
1776
+
1777
+ def get_workflow_outcome(self, outcome_id: str) -> Optional["WorkflowOutcome"]:
1778
+ """Get a workflow outcome by ID."""
1779
+ with self._get_connection() as conn:
1780
+ cursor = conn.execute(
1781
+ f"SELECT * FROM {self.schema}.alma_workflow_outcomes WHERE id = %s",
1782
+ (outcome_id,),
1783
+ )
1784
+ row = cursor.fetchone()
1785
+
1786
+ if row is None:
1787
+ return None
1788
+ return self._row_to_workflow_outcome(row)
1789
+
1790
+ def get_workflow_outcomes(
1791
+ self,
1792
+ project_id: str,
1793
+ agent: Optional[str] = None,
1794
+ workflow_id: Optional[str] = None,
1795
+ embedding: Optional[List[float]] = None,
1796
+ top_k: int = 10,
1797
+ scope_filter: Optional[Dict[str, Any]] = None,
1798
+ ) -> List["WorkflowOutcome"]:
1799
+ """Get workflow outcomes with optional filtering."""
1800
+ with self._get_connection() as conn:
1801
+ if embedding and self._pgvector_available:
1802
+ query = f"""
1803
+ SELECT *, 1 - (embedding <=> %s::vector) as similarity
1804
+ FROM {self.schema}.alma_workflow_outcomes
1805
+ WHERE project_id = %s
1806
+ """
1807
+ params: List[Any] = [self._embedding_to_db(embedding), project_id]
1808
+ else:
1809
+ query = f"""
1810
+ SELECT *
1811
+ FROM {self.schema}.alma_workflow_outcomes
1812
+ WHERE project_id = %s
1813
+ """
1814
+ params = [project_id]
1815
+
1816
+ if agent:
1817
+ query += " AND agent = %s"
1818
+ params.append(agent)
1819
+
1820
+ if workflow_id:
1821
+ query += " AND workflow_id = %s"
1822
+ params.append(workflow_id)
1823
+
1824
+ # Apply scope filter
1825
+ if scope_filter:
1826
+ if scope_filter.get("tenant_id"):
1827
+ query += " AND tenant_id = %s"
1828
+ params.append(scope_filter["tenant_id"])
1829
+ if scope_filter.get("workflow_id"):
1830
+ query += " AND workflow_id = %s"
1831
+ params.append(scope_filter["workflow_id"])
1832
+ if scope_filter.get("run_id"):
1833
+ query += " AND run_id = %s"
1834
+ params.append(scope_filter["run_id"])
1835
+
1836
+ if embedding and self._pgvector_available:
1837
+ query += " ORDER BY similarity DESC LIMIT %s"
1838
+ else:
1839
+ query += " ORDER BY created_at DESC LIMIT %s"
1840
+ params.append(top_k)
1841
+
1842
+ cursor = conn.execute(query, params)
1843
+ rows = cursor.fetchall()
1844
+
1845
+ return [self._row_to_workflow_outcome(row) for row in rows]
1846
+
1847
+ def _row_to_workflow_outcome(self, row: Dict[str, Any]) -> "WorkflowOutcome":
1848
+ """Convert database row to WorkflowOutcome."""
1849
+ from alma.workflow import WorkflowOutcome, WorkflowResult
1850
+
1851
+ return WorkflowOutcome(
1852
+ id=row["id"],
1853
+ tenant_id=row["tenant_id"],
1854
+ workflow_id=row["workflow_id"],
1855
+ run_id=row["run_id"],
1856
+ agent=row["agent"],
1857
+ project_id=row["project_id"],
1858
+ result=WorkflowResult(row["result"]),
1859
+ summary=row["summary"] or "",
1860
+ strategies_used=row["strategies_used"] or [],
1861
+ successful_patterns=row["successful_patterns"] or [],
1862
+ failed_patterns=row["failed_patterns"] or [],
1863
+ extracted_heuristics=row["extracted_heuristics"] or [],
1864
+ extracted_anti_patterns=row["extracted_anti_patterns"] or [],
1865
+ duration_seconds=row["duration_seconds"],
1866
+ node_count=row["node_count"],
1867
+ error_message=row["error_message"],
1868
+ embedding=self._embedding_from_db(row.get("embedding")),
1869
+ metadata=row["metadata"] if row["metadata"] else {},
1870
+ created_at=self._parse_datetime(row["created_at"])
1871
+ or datetime.now(timezone.utc),
1872
+ )
1873
+
1874
+ # ==================== ARTIFACT LINK OPERATIONS (v0.6.0+) ====================
1875
+
1876
+ def save_artifact_link(self, artifact_ref: "ArtifactRef") -> str:
1877
+ """Save an artifact reference linked to a memory."""
1878
+ with self._get_connection() as conn:
1879
+ conn.execute(
1880
+ f"""
1881
+ INSERT INTO {self.schema}.alma_artifact_links
1882
+ (id, memory_id, artifact_type, storage_url, filename,
1883
+ mime_type, size_bytes, checksum, metadata, created_at)
1884
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
1885
+ ON CONFLICT (id) DO UPDATE SET
1886
+ storage_url = EXCLUDED.storage_url,
1887
+ filename = EXCLUDED.filename,
1888
+ mime_type = EXCLUDED.mime_type,
1889
+ size_bytes = EXCLUDED.size_bytes,
1890
+ checksum = EXCLUDED.checksum,
1891
+ metadata = EXCLUDED.metadata
1892
+ """,
1893
+ (
1894
+ artifact_ref.id,
1895
+ artifact_ref.memory_id,
1896
+ artifact_ref.artifact_type.value,
1897
+ artifact_ref.storage_url,
1898
+ artifact_ref.filename,
1899
+ artifact_ref.mime_type,
1900
+ artifact_ref.size_bytes,
1901
+ artifact_ref.checksum,
1902
+ artifact_ref.metadata,
1903
+ artifact_ref.created_at,
1904
+ ),
1905
+ )
1906
+ conn.commit()
1907
+
1908
+ logger.debug(f"Saved artifact link: {artifact_ref.id}")
1909
+ return artifact_ref.id
1910
+
1911
+ def get_artifact_links(self, memory_id: str) -> List["ArtifactRef"]:
1912
+ """Get all artifact references linked to a memory."""
1913
+ with self._get_connection() as conn:
1914
+ cursor = conn.execute(
1915
+ f"SELECT * FROM {self.schema}.alma_artifact_links WHERE memory_id = %s",
1916
+ (memory_id,),
1917
+ )
1918
+ rows = cursor.fetchall()
1919
+
1920
+ return [self._row_to_artifact_ref(row) for row in rows]
1921
+
1922
+ def delete_artifact_link(self, artifact_id: str) -> bool:
1923
+ """Delete an artifact reference."""
1924
+ with self._get_connection() as conn:
1925
+ cursor = conn.execute(
1926
+ f"DELETE FROM {self.schema}.alma_artifact_links WHERE id = %s",
1927
+ (artifact_id,),
1928
+ )
1929
+ conn.commit()
1930
+ return cursor.rowcount > 0
1931
+
1932
+ def _row_to_artifact_ref(self, row: Dict[str, Any]) -> "ArtifactRef":
1933
+ """Convert database row to ArtifactRef."""
1934
+ from alma.workflow import ArtifactRef, ArtifactType
1935
+
1936
+ return ArtifactRef(
1937
+ id=row["id"],
1938
+ memory_id=row["memory_id"],
1939
+ artifact_type=ArtifactType(row["artifact_type"]),
1940
+ storage_url=row["storage_url"],
1941
+ filename=row["filename"],
1942
+ mime_type=row["mime_type"],
1943
+ size_bytes=row["size_bytes"],
1944
+ checksum=row["checksum"],
1945
+ metadata=row["metadata"] if row["metadata"] else {},
1946
+ created_at=self._parse_datetime(row["created_at"])
1947
+ or datetime.now(timezone.utc),
1948
+ )