alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -226
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -430
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -265
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -106
  26. alma/graph/backends/__init__.py +32 -32
  27. alma/graph/backends/kuzu.py +624 -624
  28. alma/graph/backends/memgraph.py +432 -432
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -444
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -509
  55. alma/observability/__init__.py +91 -84
  56. alma/observability/config.py +302 -302
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -424
  59. alma/observability/metrics.py +583 -583
  60. alma/observability/tracing.py +440 -440
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -427
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -90
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1259
  80. alma/storage/base.py +1083 -583
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -103
  83. alma/storage/file_based.py +614 -614
  84. alma/storage/migrations/__init__.py +21 -21
  85. alma/storage/migrations/base.py +321 -321
  86. alma/storage/migrations/runner.py +323 -323
  87. alma/storage/migrations/version_stores.py +337 -337
  88. alma/storage/migrations/versions/__init__.py +11 -11
  89. alma/storage/migrations/versions/v1_0_0.py +373 -373
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1559
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1457
  95. alma/testing/__init__.py +46 -46
  96. alma/testing/factories.py +301 -301
  97. alma/testing/mocks.py +389 -389
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.1.dist-info/RECORD +0 -93
  110. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
alma/storage/pinecone.py CHANGED
@@ -1,1080 +1,1080 @@
1
- """
2
- ALMA Pinecone Storage Backend.
3
-
4
- Production-ready storage using Pinecone vector database for
5
- native vector similarity search with serverless infrastructure.
6
-
7
- Recommended for:
8
- - Cloud-native deployments
9
- - Large-scale vector search workloads
10
- - Serverless architecture preferences
11
- """
12
-
13
- import json
14
- import logging
15
- import os
16
- from datetime import datetime, timezone
17
- from typing import Any, Dict, List, Optional
18
-
19
- from alma.storage.base import StorageBackend
20
- from alma.types import (
21
- AntiPattern,
22
- DomainKnowledge,
23
- Heuristic,
24
- Outcome,
25
- UserPreference,
26
- )
27
-
28
- logger = logging.getLogger(__name__)
29
-
30
- # Try to import pinecone
31
- try:
32
- from pinecone import Pinecone, ServerlessSpec
33
-
34
- PINECONE_AVAILABLE = True
35
- except ImportError:
36
- PINECONE_AVAILABLE = False
37
- Pinecone = None # type: ignore
38
- ServerlessSpec = None # type: ignore
39
- logger.warning(
40
- "pinecone not installed. Install with: pip install 'alma-memory[pinecone]'"
41
- )
42
-
43
- # Namespace constants for memory types
44
- NAMESPACE_HEURISTICS = "heuristics"
45
- NAMESPACE_OUTCOMES = "outcomes"
46
- NAMESPACE_DOMAIN_KNOWLEDGE = "domain_knowledge"
47
- NAMESPACE_ANTI_PATTERNS = "anti_patterns"
48
- NAMESPACE_PREFERENCES = "preferences"
49
-
50
-
51
- class PineconeStorage(StorageBackend):
52
- """
53
- Pinecone storage backend for ALMA.
54
-
55
- Uses Pinecone's vector database with namespaces for different memory types.
56
- Supports serverless deployment for automatic scaling.
57
-
58
- Features:
59
- - One namespace per memory type (heuristics, outcomes, etc.)
60
- - Embeddings stored as vectors with metadata
61
- - Efficient vector similarity search
62
- - Automatic index creation with serverless spec
63
-
64
- Usage:
65
- storage = PineconeStorage(
66
- api_key="your-api-key",
67
- index_name="alma-memory",
68
- embedding_dim=384,
69
- )
70
- """
71
-
72
- def __init__(
73
- self,
74
- api_key: str,
75
- index_name: str = "alma-memory",
76
- embedding_dim: int = 384,
77
- cloud: str = "aws",
78
- region: str = "us-east-1",
79
- metric: str = "cosine",
80
- ):
81
- """
82
- Initialize Pinecone storage.
83
-
84
- Args:
85
- api_key: Pinecone API key (required)
86
- index_name: Name of the Pinecone index (default: alma-memory)
87
- embedding_dim: Dimension of embedding vectors (default: 384)
88
- cloud: Cloud provider for serverless (default: aws)
89
- region: Cloud region for serverless (default: us-east-1)
90
- metric: Distance metric (default: cosine)
91
- """
92
- if not PINECONE_AVAILABLE:
93
- raise ImportError(
94
- "pinecone not installed. Install with: pip install 'alma-memory[pinecone]'"
95
- )
96
-
97
- self.index_name = index_name
98
- self.embedding_dim = embedding_dim
99
- self.cloud = cloud
100
- self.region = region
101
- self.metric = metric
102
-
103
- # Initialize Pinecone client
104
- self._pc = Pinecone(api_key=api_key)
105
-
106
- # Create or get index
107
- self._init_index()
108
-
109
- @classmethod
110
- def from_config(cls, config: Dict[str, Any]) -> "PineconeStorage":
111
- """Create instance from configuration."""
112
- pinecone_config = config.get("pinecone", {})
113
-
114
- # Support environment variable expansion
115
- def get_value(key: str, default: Any = None) -> Any:
116
- value = pinecone_config.get(key, default)
117
- if (
118
- isinstance(value, str)
119
- and value.startswith("${")
120
- and value.endswith("}")
121
- ):
122
- env_var = value[2:-1]
123
- return os.environ.get(env_var, default)
124
- return value
125
-
126
- return cls(
127
- api_key=get_value("api_key", os.environ.get("PINECONE_API_KEY", "")),
128
- index_name=get_value("index_name", "alma-memory"),
129
- embedding_dim=int(config.get("embedding_dim", 384)),
130
- cloud=get_value("cloud", "aws"),
131
- region=get_value("region", "us-east-1"),
132
- metric=get_value("metric", "cosine"),
133
- )
134
-
135
- def _init_index(self):
136
- """Initialize or get the Pinecone index."""
137
- existing_indexes = [idx.name for idx in self._pc.list_indexes()]
138
-
139
- if self.index_name not in existing_indexes:
140
- logger.info(f"Creating Pinecone index: {self.index_name}")
141
- self._pc.create_index(
142
- name=self.index_name,
143
- dimension=self.embedding_dim,
144
- metric=self.metric,
145
- spec=ServerlessSpec(cloud=self.cloud, region=self.region),
146
- )
147
- logger.info(f"Created index: {self.index_name}")
148
-
149
- self._index = self._pc.Index(self.index_name)
150
-
151
- def _get_zero_vector(self) -> List[float]:
152
- """Get a zero vector for records without embeddings."""
153
- return [0.0] * self.embedding_dim
154
-
155
- def _metadata_to_pinecone(self, obj: Any, memory_type: str) -> Dict[str, Any]:
156
- """Convert a memory object to Pinecone metadata format."""
157
- # Pinecone metadata must be flat (no nested dicts/lists of dicts)
158
- # and values must be strings, numbers, booleans, or lists of strings
159
- if memory_type == NAMESPACE_HEURISTICS:
160
- return {
161
- "agent": obj.agent,
162
- "project_id": obj.project_id,
163
- "condition": obj.condition,
164
- "strategy": obj.strategy,
165
- "confidence": float(obj.confidence),
166
- "occurrence_count": int(obj.occurrence_count),
167
- "success_count": int(obj.success_count),
168
- "last_validated": obj.last_validated.isoformat()
169
- if obj.last_validated
170
- else "",
171
- "created_at": obj.created_at.isoformat() if obj.created_at else "",
172
- "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
173
- }
174
- elif memory_type == NAMESPACE_OUTCOMES:
175
- return {
176
- "agent": obj.agent,
177
- "project_id": obj.project_id,
178
- "task_type": obj.task_type or "general",
179
- "task_description": obj.task_description,
180
- "success": obj.success,
181
- "strategy_used": obj.strategy_used or "",
182
- "duration_ms": int(obj.duration_ms) if obj.duration_ms else 0,
183
- "error_message": obj.error_message or "",
184
- "user_feedback": obj.user_feedback or "",
185
- "timestamp": obj.timestamp.isoformat() if obj.timestamp else "",
186
- "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
187
- }
188
- elif memory_type == NAMESPACE_PREFERENCES:
189
- return {
190
- "user_id": obj.user_id,
191
- "category": obj.category or "general",
192
- "preference": obj.preference,
193
- "source": obj.source or "unknown",
194
- "confidence": float(obj.confidence),
195
- "timestamp": obj.timestamp.isoformat() if obj.timestamp else "",
196
- "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
197
- }
198
- elif memory_type == NAMESPACE_DOMAIN_KNOWLEDGE:
199
- return {
200
- "agent": obj.agent,
201
- "project_id": obj.project_id,
202
- "domain": obj.domain or "general",
203
- "fact": obj.fact,
204
- "source": obj.source or "unknown",
205
- "confidence": float(obj.confidence),
206
- "last_verified": obj.last_verified.isoformat()
207
- if obj.last_verified
208
- else "",
209
- "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
210
- }
211
- elif memory_type == NAMESPACE_ANTI_PATTERNS:
212
- return {
213
- "agent": obj.agent,
214
- "project_id": obj.project_id,
215
- "pattern": obj.pattern,
216
- "why_bad": obj.why_bad or "",
217
- "better_alternative": obj.better_alternative or "",
218
- "occurrence_count": int(obj.occurrence_count),
219
- "last_seen": obj.last_seen.isoformat() if obj.last_seen else "",
220
- "created_at": obj.created_at.isoformat() if obj.created_at else "",
221
- "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
222
- }
223
- return {}
224
-
225
- def _parse_datetime(self, value: Any) -> Optional[datetime]:
226
- """Parse datetime from string."""
227
- if value is None or value == "":
228
- return None
229
- if isinstance(value, datetime):
230
- return value
231
- try:
232
- return datetime.fromisoformat(value.replace("Z", "+00:00"))
233
- except (ValueError, AttributeError):
234
- return None
235
-
236
- def _metadata_to_heuristic(self, id: str, metadata: Dict[str, Any]) -> Heuristic:
237
- """Convert Pinecone metadata to Heuristic."""
238
- return Heuristic(
239
- id=id,
240
- agent=metadata.get("agent", ""),
241
- project_id=metadata.get("project_id", ""),
242
- condition=metadata.get("condition", ""),
243
- strategy=metadata.get("strategy", ""),
244
- confidence=float(metadata.get("confidence", 0.0)),
245
- occurrence_count=int(metadata.get("occurrence_count", 0)),
246
- success_count=int(metadata.get("success_count", 0)),
247
- last_validated=self._parse_datetime(metadata.get("last_validated"))
248
- or datetime.now(timezone.utc),
249
- created_at=self._parse_datetime(metadata.get("created_at"))
250
- or datetime.now(timezone.utc),
251
- metadata=json.loads(metadata.get("metadata_json", "{}")),
252
- )
253
-
254
- def _metadata_to_outcome(self, id: str, metadata: Dict[str, Any]) -> Outcome:
255
- """Convert Pinecone metadata to Outcome."""
256
- return Outcome(
257
- id=id,
258
- agent=metadata.get("agent", ""),
259
- project_id=metadata.get("project_id", ""),
260
- task_type=metadata.get("task_type", "general"),
261
- task_description=metadata.get("task_description", ""),
262
- success=bool(metadata.get("success", False)),
263
- strategy_used=metadata.get("strategy_used", ""),
264
- duration_ms=int(metadata.get("duration_ms", 0)) or None,
265
- error_message=metadata.get("error_message") or None,
266
- user_feedback=metadata.get("user_feedback") or None,
267
- timestamp=self._parse_datetime(metadata.get("timestamp"))
268
- or datetime.now(timezone.utc),
269
- metadata=json.loads(metadata.get("metadata_json", "{}")),
270
- )
271
-
272
- def _metadata_to_preference(
273
- self, id: str, metadata: Dict[str, Any]
274
- ) -> UserPreference:
275
- """Convert Pinecone metadata to UserPreference."""
276
- return UserPreference(
277
- id=id,
278
- user_id=metadata.get("user_id", ""),
279
- category=metadata.get("category", "general"),
280
- preference=metadata.get("preference", ""),
281
- source=metadata.get("source", "unknown"),
282
- confidence=float(metadata.get("confidence", 1.0)),
283
- timestamp=self._parse_datetime(metadata.get("timestamp"))
284
- or datetime.now(timezone.utc),
285
- metadata=json.loads(metadata.get("metadata_json", "{}")),
286
- )
287
-
288
- def _metadata_to_domain_knowledge(
289
- self, id: str, metadata: Dict[str, Any]
290
- ) -> DomainKnowledge:
291
- """Convert Pinecone metadata to DomainKnowledge."""
292
- return DomainKnowledge(
293
- id=id,
294
- agent=metadata.get("agent", ""),
295
- project_id=metadata.get("project_id", ""),
296
- domain=metadata.get("domain", "general"),
297
- fact=metadata.get("fact", ""),
298
- source=metadata.get("source", "unknown"),
299
- confidence=float(metadata.get("confidence", 1.0)),
300
- last_verified=self._parse_datetime(metadata.get("last_verified"))
301
- or datetime.now(timezone.utc),
302
- metadata=json.loads(metadata.get("metadata_json", "{}")),
303
- )
304
-
305
- def _metadata_to_anti_pattern(
306
- self, id: str, metadata: Dict[str, Any]
307
- ) -> AntiPattern:
308
- """Convert Pinecone metadata to AntiPattern."""
309
- return AntiPattern(
310
- id=id,
311
- agent=metadata.get("agent", ""),
312
- project_id=metadata.get("project_id", ""),
313
- pattern=metadata.get("pattern", ""),
314
- why_bad=metadata.get("why_bad", ""),
315
- better_alternative=metadata.get("better_alternative", ""),
316
- occurrence_count=int(metadata.get("occurrence_count", 1)),
317
- last_seen=self._parse_datetime(metadata.get("last_seen"))
318
- or datetime.now(timezone.utc),
319
- created_at=self._parse_datetime(metadata.get("created_at"))
320
- or datetime.now(timezone.utc),
321
- metadata=json.loads(metadata.get("metadata_json", "{}")),
322
- )
323
-
324
- # ==================== WRITE OPERATIONS ====================
325
-
326
- def save_heuristic(self, heuristic: Heuristic) -> str:
327
- """Save a heuristic."""
328
- vector = heuristic.embedding or self._get_zero_vector()
329
- metadata = self._metadata_to_pinecone(heuristic, NAMESPACE_HEURISTICS)
330
-
331
- self._index.upsert(
332
- vectors=[{"id": heuristic.id, "values": vector, "metadata": metadata}],
333
- namespace=NAMESPACE_HEURISTICS,
334
- )
335
-
336
- logger.debug(f"Saved heuristic: {heuristic.id}")
337
- return heuristic.id
338
-
339
- def save_outcome(self, outcome: Outcome) -> str:
340
- """Save an outcome."""
341
- vector = outcome.embedding or self._get_zero_vector()
342
- metadata = self._metadata_to_pinecone(outcome, NAMESPACE_OUTCOMES)
343
-
344
- self._index.upsert(
345
- vectors=[{"id": outcome.id, "values": vector, "metadata": metadata}],
346
- namespace=NAMESPACE_OUTCOMES,
347
- )
348
-
349
- logger.debug(f"Saved outcome: {outcome.id}")
350
- return outcome.id
351
-
352
- def save_user_preference(self, preference: UserPreference) -> str:
353
- """Save a user preference."""
354
- # User preferences don't typically have embeddings
355
- vector = self._get_zero_vector()
356
- metadata = self._metadata_to_pinecone(preference, NAMESPACE_PREFERENCES)
357
-
358
- self._index.upsert(
359
- vectors=[{"id": preference.id, "values": vector, "metadata": metadata}],
360
- namespace=NAMESPACE_PREFERENCES,
361
- )
362
-
363
- logger.debug(f"Saved preference: {preference.id}")
364
- return preference.id
365
-
366
- def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
367
- """Save domain knowledge."""
368
- vector = knowledge.embedding or self._get_zero_vector()
369
- metadata = self._metadata_to_pinecone(knowledge, NAMESPACE_DOMAIN_KNOWLEDGE)
370
-
371
- self._index.upsert(
372
- vectors=[{"id": knowledge.id, "values": vector, "metadata": metadata}],
373
- namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
374
- )
375
-
376
- logger.debug(f"Saved domain knowledge: {knowledge.id}")
377
- return knowledge.id
378
-
379
- def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
380
- """Save an anti-pattern."""
381
- vector = anti_pattern.embedding or self._get_zero_vector()
382
- metadata = self._metadata_to_pinecone(anti_pattern, NAMESPACE_ANTI_PATTERNS)
383
-
384
- self._index.upsert(
385
- vectors=[{"id": anti_pattern.id, "values": vector, "metadata": metadata}],
386
- namespace=NAMESPACE_ANTI_PATTERNS,
387
- )
388
-
389
- logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
390
- return anti_pattern.id
391
-
392
- # ==================== BATCH WRITE OPERATIONS ====================
393
-
394
- def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
395
- """Save multiple heuristics in a batch."""
396
- if not heuristics:
397
- return []
398
-
399
- vectors = []
400
- for h in heuristics:
401
- vector = h.embedding or self._get_zero_vector()
402
- metadata = self._metadata_to_pinecone(h, NAMESPACE_HEURISTICS)
403
- vectors.append({"id": h.id, "values": vector, "metadata": metadata})
404
-
405
- # Pinecone supports batches of up to 100 vectors
406
- batch_size = 100
407
- for i in range(0, len(vectors), batch_size):
408
- batch = vectors[i : i + batch_size]
409
- self._index.upsert(vectors=batch, namespace=NAMESPACE_HEURISTICS)
410
-
411
- logger.debug(f"Batch saved {len(heuristics)} heuristics")
412
- return [h.id for h in heuristics]
413
-
414
- def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
415
- """Save multiple outcomes in a batch."""
416
- if not outcomes:
417
- return []
418
-
419
- vectors = []
420
- for o in outcomes:
421
- vector = o.embedding or self._get_zero_vector()
422
- metadata = self._metadata_to_pinecone(o, NAMESPACE_OUTCOMES)
423
- vectors.append({"id": o.id, "values": vector, "metadata": metadata})
424
-
425
- batch_size = 100
426
- for i in range(0, len(vectors), batch_size):
427
- batch = vectors[i : i + batch_size]
428
- self._index.upsert(vectors=batch, namespace=NAMESPACE_OUTCOMES)
429
-
430
- logger.debug(f"Batch saved {len(outcomes)} outcomes")
431
- return [o.id for o in outcomes]
432
-
433
- def save_domain_knowledge_batch(
434
- self, knowledge_items: List[DomainKnowledge]
435
- ) -> List[str]:
436
- """Save multiple domain knowledge items in a batch."""
437
- if not knowledge_items:
438
- return []
439
-
440
- vectors = []
441
- for k in knowledge_items:
442
- vector = k.embedding or self._get_zero_vector()
443
- metadata = self._metadata_to_pinecone(k, NAMESPACE_DOMAIN_KNOWLEDGE)
444
- vectors.append({"id": k.id, "values": vector, "metadata": metadata})
445
-
446
- batch_size = 100
447
- for i in range(0, len(vectors), batch_size):
448
- batch = vectors[i : i + batch_size]
449
- self._index.upsert(vectors=batch, namespace=NAMESPACE_DOMAIN_KNOWLEDGE)
450
-
451
- logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
452
- return [k.id for k in knowledge_items]
453
-
454
- # ==================== READ OPERATIONS ====================
455
-
456
- def _build_filter(
457
- self,
458
- project_id: Optional[str] = None,
459
- agent: Optional[str] = None,
460
- user_id: Optional[str] = None,
461
- task_type: Optional[str] = None,
462
- domain: Optional[str] = None,
463
- category: Optional[str] = None,
464
- success_only: bool = False,
465
- min_confidence: float = 0.0,
466
- ) -> Dict[str, Any]:
467
- """Build Pinecone metadata filter."""
468
- conditions = []
469
-
470
- if project_id:
471
- conditions.append({"project_id": {"$eq": project_id}})
472
-
473
- if agent:
474
- conditions.append({"agent": {"$eq": agent}})
475
-
476
- if user_id:
477
- conditions.append({"user_id": {"$eq": user_id}})
478
-
479
- if task_type:
480
- conditions.append({"task_type": {"$eq": task_type}})
481
-
482
- if domain:
483
- conditions.append({"domain": {"$eq": domain}})
484
-
485
- if category:
486
- conditions.append({"category": {"$eq": category}})
487
-
488
- if success_only:
489
- conditions.append({"success": {"$eq": True}})
490
-
491
- if min_confidence > 0.0:
492
- conditions.append({"confidence": {"$gte": min_confidence}})
493
-
494
- if not conditions:
495
- return {}
496
-
497
- if len(conditions) == 1:
498
- return conditions[0]
499
-
500
- return {"$and": conditions}
501
-
502
- def get_heuristics(
503
- self,
504
- project_id: str,
505
- agent: Optional[str] = None,
506
- embedding: Optional[List[float]] = None,
507
- top_k: int = 5,
508
- min_confidence: float = 0.0,
509
- ) -> List[Heuristic]:
510
- """Get heuristics with optional vector search."""
511
- filter_dict = self._build_filter(
512
- project_id=project_id,
513
- agent=agent,
514
- min_confidence=min_confidence,
515
- )
516
-
517
- query_vector = embedding or self._get_zero_vector()
518
-
519
- results = self._index.query(
520
- vector=query_vector,
521
- top_k=top_k,
522
- namespace=NAMESPACE_HEURISTICS,
523
- filter=filter_dict if filter_dict else None,
524
- include_metadata=True,
525
- )
526
-
527
- return [
528
- self._metadata_to_heuristic(match["id"], match.get("metadata", {}))
529
- for match in results.get("matches", [])
530
- ]
531
-
532
- def get_outcomes(
533
- self,
534
- project_id: str,
535
- agent: Optional[str] = None,
536
- task_type: Optional[str] = None,
537
- embedding: Optional[List[float]] = None,
538
- top_k: int = 5,
539
- success_only: bool = False,
540
- ) -> List[Outcome]:
541
- """Get outcomes with optional vector search."""
542
- filter_dict = self._build_filter(
543
- project_id=project_id,
544
- agent=agent,
545
- task_type=task_type,
546
- success_only=success_only,
547
- )
548
-
549
- query_vector = embedding or self._get_zero_vector()
550
-
551
- results = self._index.query(
552
- vector=query_vector,
553
- top_k=top_k,
554
- namespace=NAMESPACE_OUTCOMES,
555
- filter=filter_dict if filter_dict else None,
556
- include_metadata=True,
557
- )
558
-
559
- return [
560
- self._metadata_to_outcome(match["id"], match.get("metadata", {}))
561
- for match in results.get("matches", [])
562
- ]
563
-
564
- def get_user_preferences(
565
- self,
566
- user_id: str,
567
- category: Optional[str] = None,
568
- ) -> List[UserPreference]:
569
- """Get user preferences."""
570
- filter_dict = self._build_filter(
571
- user_id=user_id,
572
- category=category,
573
- )
574
-
575
- # For preferences, we use a zero vector query since we filter by metadata
576
- query_vector = self._get_zero_vector()
577
-
578
- results = self._index.query(
579
- vector=query_vector,
580
- top_k=100, # Get all preferences for user
581
- namespace=NAMESPACE_PREFERENCES,
582
- filter=filter_dict if filter_dict else None,
583
- include_metadata=True,
584
- )
585
-
586
- return [
587
- self._metadata_to_preference(match["id"], match.get("metadata", {}))
588
- for match in results.get("matches", [])
589
- ]
590
-
591
- def get_domain_knowledge(
592
- self,
593
- project_id: str,
594
- agent: Optional[str] = None,
595
- domain: Optional[str] = None,
596
- embedding: Optional[List[float]] = None,
597
- top_k: int = 5,
598
- ) -> List[DomainKnowledge]:
599
- """Get domain knowledge with optional vector search."""
600
- filter_dict = self._build_filter(
601
- project_id=project_id,
602
- agent=agent,
603
- domain=domain,
604
- )
605
-
606
- query_vector = embedding or self._get_zero_vector()
607
-
608
- results = self._index.query(
609
- vector=query_vector,
610
- top_k=top_k,
611
- namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
612
- filter=filter_dict if filter_dict else None,
613
- include_metadata=True,
614
- )
615
-
616
- return [
617
- self._metadata_to_domain_knowledge(match["id"], match.get("metadata", {}))
618
- for match in results.get("matches", [])
619
- ]
620
-
621
- def get_anti_patterns(
622
- self,
623
- project_id: str,
624
- agent: Optional[str] = None,
625
- embedding: Optional[List[float]] = None,
626
- top_k: int = 5,
627
- ) -> List[AntiPattern]:
628
- """Get anti-patterns with optional vector search."""
629
- filter_dict = self._build_filter(
630
- project_id=project_id,
631
- agent=agent,
632
- )
633
-
634
- query_vector = embedding or self._get_zero_vector()
635
-
636
- results = self._index.query(
637
- vector=query_vector,
638
- top_k=top_k,
639
- namespace=NAMESPACE_ANTI_PATTERNS,
640
- filter=filter_dict if filter_dict else None,
641
- include_metadata=True,
642
- )
643
-
644
- return [
645
- self._metadata_to_anti_pattern(match["id"], match.get("metadata", {}))
646
- for match in results.get("matches", [])
647
- ]
648
-
649
- # ==================== MULTI-AGENT MEMORY SHARING ====================
650
-
651
- def get_heuristics_for_agents(
652
- self,
653
- project_id: str,
654
- agents: List[str],
655
- embedding: Optional[List[float]] = None,
656
- top_k: int = 5,
657
- min_confidence: float = 0.0,
658
- ) -> List[Heuristic]:
659
- """Get heuristics from multiple agents using $in filter."""
660
- if not agents:
661
- return []
662
-
663
- conditions = [
664
- {"project_id": {"$eq": project_id}},
665
- {"agent": {"$in": agents}},
666
- ]
667
-
668
- if min_confidence > 0.0:
669
- conditions.append({"confidence": {"$gte": min_confidence}})
670
-
671
- filter_dict = {"$and": conditions}
672
- query_vector = embedding or self._get_zero_vector()
673
-
674
- results = self._index.query(
675
- vector=query_vector,
676
- top_k=top_k * len(agents),
677
- namespace=NAMESPACE_HEURISTICS,
678
- filter=filter_dict,
679
- include_metadata=True,
680
- )
681
-
682
- return [
683
- self._metadata_to_heuristic(match["id"], match.get("metadata", {}))
684
- for match in results.get("matches", [])
685
- ]
686
-
687
- def get_outcomes_for_agents(
688
- self,
689
- project_id: str,
690
- agents: List[str],
691
- task_type: Optional[str] = None,
692
- embedding: Optional[List[float]] = None,
693
- top_k: int = 5,
694
- success_only: bool = False,
695
- ) -> List[Outcome]:
696
- """Get outcomes from multiple agents using $in filter."""
697
- if not agents:
698
- return []
699
-
700
- conditions = [
701
- {"project_id": {"$eq": project_id}},
702
- {"agent": {"$in": agents}},
703
- ]
704
-
705
- if task_type:
706
- conditions.append({"task_type": {"$eq": task_type}})
707
-
708
- if success_only:
709
- conditions.append({"success": {"$eq": True}})
710
-
711
- filter_dict = {"$and": conditions}
712
- query_vector = embedding or self._get_zero_vector()
713
-
714
- results = self._index.query(
715
- vector=query_vector,
716
- top_k=top_k * len(agents),
717
- namespace=NAMESPACE_OUTCOMES,
718
- filter=filter_dict,
719
- include_metadata=True,
720
- )
721
-
722
- return [
723
- self._metadata_to_outcome(match["id"], match.get("metadata", {}))
724
- for match in results.get("matches", [])
725
- ]
726
-
727
- def get_domain_knowledge_for_agents(
728
- self,
729
- project_id: str,
730
- agents: List[str],
731
- domain: Optional[str] = None,
732
- embedding: Optional[List[float]] = None,
733
- top_k: int = 5,
734
- ) -> List[DomainKnowledge]:
735
- """Get domain knowledge from multiple agents using $in filter."""
736
- if not agents:
737
- return []
738
-
739
- conditions = [
740
- {"project_id": {"$eq": project_id}},
741
- {"agent": {"$in": agents}},
742
- ]
743
-
744
- if domain:
745
- conditions.append({"domain": {"$eq": domain}})
746
-
747
- filter_dict = {"$and": conditions}
748
- query_vector = embedding or self._get_zero_vector()
749
-
750
- results = self._index.query(
751
- vector=query_vector,
752
- top_k=top_k * len(agents),
753
- namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
754
- filter=filter_dict,
755
- include_metadata=True,
756
- )
757
-
758
- return [
759
- self._metadata_to_domain_knowledge(match["id"], match.get("metadata", {}))
760
- for match in results.get("matches", [])
761
- ]
762
-
763
- def get_anti_patterns_for_agents(
764
- self,
765
- project_id: str,
766
- agents: List[str],
767
- embedding: Optional[List[float]] = None,
768
- top_k: int = 5,
769
- ) -> List[AntiPattern]:
770
- """Get anti-patterns from multiple agents using $in filter."""
771
- if not agents:
772
- return []
773
-
774
- conditions = [
775
- {"project_id": {"$eq": project_id}},
776
- {"agent": {"$in": agents}},
777
- ]
778
-
779
- filter_dict = {"$and": conditions}
780
- query_vector = embedding or self._get_zero_vector()
781
-
782
- results = self._index.query(
783
- vector=query_vector,
784
- top_k=top_k * len(agents),
785
- namespace=NAMESPACE_ANTI_PATTERNS,
786
- filter=filter_dict,
787
- include_metadata=True,
788
- )
789
-
790
- return [
791
- self._metadata_to_anti_pattern(match["id"], match.get("metadata", {}))
792
- for match in results.get("matches", [])
793
- ]
794
-
795
- # ==================== UPDATE OPERATIONS ====================
796
-
797
- def update_heuristic(
798
- self,
799
- heuristic_id: str,
800
- updates: Dict[str, Any],
801
- ) -> bool:
802
- """Update a heuristic's fields."""
803
- if not updates:
804
- return False
805
-
806
- # Fetch existing record
807
- results = self._index.fetch(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
808
-
809
- if heuristic_id not in results.get("vectors", {}):
810
- return False
811
-
812
- existing = results["vectors"][heuristic_id]
813
- metadata = existing.get("metadata", {})
814
-
815
- # Apply updates to metadata
816
- for key, value in updates.items():
817
- if key == "metadata":
818
- metadata["metadata_json"] = json.dumps(value) if value else "{}"
819
- elif isinstance(value, datetime):
820
- metadata[key] = value.isoformat()
821
- else:
822
- metadata[key] = value
823
-
824
- # Upsert with updated metadata
825
- self._index.upsert(
826
- vectors=[
827
- {
828
- "id": heuristic_id,
829
- "values": existing.get("values", self._get_zero_vector()),
830
- "metadata": metadata,
831
- }
832
- ],
833
- namespace=NAMESPACE_HEURISTICS,
834
- )
835
-
836
- return True
837
-
838
- def increment_heuristic_occurrence(
839
- self,
840
- heuristic_id: str,
841
- success: bool,
842
- ) -> bool:
843
- """Increment heuristic occurrence count."""
844
- # Fetch existing record
845
- results = self._index.fetch(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
846
-
847
- if heuristic_id not in results.get("vectors", {}):
848
- return False
849
-
850
- existing = results["vectors"][heuristic_id]
851
- metadata = existing.get("metadata", {})
852
-
853
- # Increment counts
854
- metadata["occurrence_count"] = int(metadata.get("occurrence_count", 0)) + 1
855
- if success:
856
- metadata["success_count"] = int(metadata.get("success_count", 0)) + 1
857
- metadata["last_validated"] = datetime.now(timezone.utc).isoformat()
858
-
859
- # Upsert with updated metadata
860
- self._index.upsert(
861
- vectors=[
862
- {
863
- "id": heuristic_id,
864
- "values": existing.get("values", self._get_zero_vector()),
865
- "metadata": metadata,
866
- }
867
- ],
868
- namespace=NAMESPACE_HEURISTICS,
869
- )
870
-
871
- return True
872
-
873
- def update_heuristic_confidence(
874
- self,
875
- heuristic_id: str,
876
- new_confidence: float,
877
- ) -> bool:
878
- """Update a heuristic's confidence value."""
879
- return self.update_heuristic(heuristic_id, {"confidence": new_confidence})
880
-
881
- def update_knowledge_confidence(
882
- self,
883
- knowledge_id: str,
884
- new_confidence: float,
885
- ) -> bool:
886
- """Update domain knowledge confidence value."""
887
- # Fetch existing record
888
- results = self._index.fetch(
889
- ids=[knowledge_id], namespace=NAMESPACE_DOMAIN_KNOWLEDGE
890
- )
891
-
892
- if knowledge_id not in results.get("vectors", {}):
893
- return False
894
-
895
- existing = results["vectors"][knowledge_id]
896
- metadata = existing.get("metadata", {})
897
- metadata["confidence"] = new_confidence
898
-
899
- # Upsert with updated metadata
900
- self._index.upsert(
901
- vectors=[
902
- {
903
- "id": knowledge_id,
904
- "values": existing.get("values", self._get_zero_vector()),
905
- "metadata": metadata,
906
- }
907
- ],
908
- namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
909
- )
910
-
911
- return True
912
-
913
- # ==================== DELETE OPERATIONS ====================
914
-
915
- def delete_heuristic(self, heuristic_id: str) -> bool:
916
- """Delete a heuristic by ID."""
917
- try:
918
- self._index.delete(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
919
- logger.debug(f"Deleted heuristic: {heuristic_id}")
920
- return True
921
- except Exception as e:
922
- logger.error(f"Failed to delete heuristic {heuristic_id}: {e}")
923
- return False
924
-
925
- def delete_outcome(self, outcome_id: str) -> bool:
926
- """Delete an outcome by ID."""
927
- try:
928
- self._index.delete(ids=[outcome_id], namespace=NAMESPACE_OUTCOMES)
929
- logger.debug(f"Deleted outcome: {outcome_id}")
930
- return True
931
- except Exception as e:
932
- logger.error(f"Failed to delete outcome {outcome_id}: {e}")
933
- return False
934
-
935
- def delete_domain_knowledge(self, knowledge_id: str) -> bool:
936
- """Delete domain knowledge by ID."""
937
- try:
938
- self._index.delete(ids=[knowledge_id], namespace=NAMESPACE_DOMAIN_KNOWLEDGE)
939
- logger.debug(f"Deleted domain knowledge: {knowledge_id}")
940
- return True
941
- except Exception as e:
942
- logger.error(f"Failed to delete domain knowledge {knowledge_id}: {e}")
943
- return False
944
-
945
- def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
946
- """Delete an anti-pattern by ID."""
947
- try:
948
- self._index.delete(ids=[anti_pattern_id], namespace=NAMESPACE_ANTI_PATTERNS)
949
- logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
950
- return True
951
- except Exception as e:
952
- logger.error(f"Failed to delete anti-pattern {anti_pattern_id}: {e}")
953
- return False
954
-
955
- def delete_outcomes_older_than(
956
- self,
957
- project_id: str,
958
- older_than: datetime,
959
- agent: Optional[str] = None,
960
- ) -> int:
961
- """Delete old outcomes.
962
-
963
- Note: Pinecone doesn't support bulk delete by filter directly,
964
- so we query first then delete by IDs.
965
- """
966
- filter_dict = self._build_filter(project_id=project_id, agent=agent)
967
- query_vector = self._get_zero_vector()
968
-
969
- # Query to get all matching IDs
970
- results = self._index.query(
971
- vector=query_vector,
972
- top_k=10000, # Large number to get all
973
- namespace=NAMESPACE_OUTCOMES,
974
- filter=filter_dict if filter_dict else None,
975
- include_metadata=True,
976
- )
977
-
978
- older_than_iso = older_than.isoformat()
979
- ids_to_delete = []
980
-
981
- for match in results.get("matches", []):
982
- timestamp = match.get("metadata", {}).get("timestamp", "")
983
- if timestamp and timestamp < older_than_iso:
984
- ids_to_delete.append(match["id"])
985
-
986
- if ids_to_delete:
987
- # Delete in batches of 1000
988
- batch_size = 1000
989
- for i in range(0, len(ids_to_delete), batch_size):
990
- batch = ids_to_delete[i : i + batch_size]
991
- self._index.delete(ids=batch, namespace=NAMESPACE_OUTCOMES)
992
-
993
- deleted = len(ids_to_delete)
994
- logger.info(f"Deleted {deleted} old outcomes")
995
- return deleted
996
-
997
- def delete_low_confidence_heuristics(
998
- self,
999
- project_id: str,
1000
- below_confidence: float,
1001
- agent: Optional[str] = None,
1002
- ) -> int:
1003
- """Delete low-confidence heuristics."""
1004
- filter_dict = self._build_filter(project_id=project_id, agent=agent)
1005
- query_vector = self._get_zero_vector()
1006
-
1007
- # Query to get all matching IDs
1008
- results = self._index.query(
1009
- vector=query_vector,
1010
- top_k=10000,
1011
- namespace=NAMESPACE_HEURISTICS,
1012
- filter=filter_dict if filter_dict else None,
1013
- include_metadata=True,
1014
- )
1015
-
1016
- ids_to_delete = []
1017
- for match in results.get("matches", []):
1018
- confidence = float(match.get("metadata", {}).get("confidence", 0.0))
1019
- if confidence < below_confidence:
1020
- ids_to_delete.append(match["id"])
1021
-
1022
- if ids_to_delete:
1023
- batch_size = 1000
1024
- for i in range(0, len(ids_to_delete), batch_size):
1025
- batch = ids_to_delete[i : i + batch_size]
1026
- self._index.delete(ids=batch, namespace=NAMESPACE_HEURISTICS)
1027
-
1028
- deleted = len(ids_to_delete)
1029
- logger.info(f"Deleted {deleted} low-confidence heuristics")
1030
- return deleted
1031
-
1032
- # ==================== STATS ====================
1033
-
1034
- def get_stats(
1035
- self,
1036
- project_id: str,
1037
- agent: Optional[str] = None,
1038
- ) -> Dict[str, Any]:
1039
- """Get memory statistics."""
1040
- stats = {
1041
- "project_id": project_id,
1042
- "agent": agent,
1043
- "storage_type": "pinecone",
1044
- "index_name": self.index_name,
1045
- }
1046
-
1047
- # Get index stats
1048
- try:
1049
- index_stats = self._index.describe_index_stats()
1050
-
1051
- # Count by namespace
1052
- namespaces = index_stats.get("namespaces", {})
1053
-
1054
- stats["heuristics_count"] = namespaces.get(NAMESPACE_HEURISTICS, {}).get(
1055
- "vector_count", 0
1056
- )
1057
- stats["outcomes_count"] = namespaces.get(NAMESPACE_OUTCOMES, {}).get(
1058
- "vector_count", 0
1059
- )
1060
- stats["domain_knowledge_count"] = namespaces.get(
1061
- NAMESPACE_DOMAIN_KNOWLEDGE, {}
1062
- ).get("vector_count", 0)
1063
- stats["anti_patterns_count"] = namespaces.get(
1064
- NAMESPACE_ANTI_PATTERNS, {}
1065
- ).get("vector_count", 0)
1066
- stats["preferences_count"] = namespaces.get(NAMESPACE_PREFERENCES, {}).get(
1067
- "vector_count", 0
1068
- )
1069
- stats["total_count"] = index_stats.get("total_vector_count", 0)
1070
-
1071
- except Exception as e:
1072
- logger.warning(f"Failed to get index stats: {e}")
1073
- stats["error"] = str(e)
1074
-
1075
- return stats
1076
-
1077
- def close(self):
1078
- """Close the Pinecone connection (no-op for Pinecone client)."""
1079
- # Pinecone client doesn't require explicit cleanup
1080
- pass
1
+ """
2
+ ALMA Pinecone Storage Backend.
3
+
4
+ Production-ready storage using Pinecone vector database for
5
+ native vector similarity search with serverless infrastructure.
6
+
7
+ Recommended for:
8
+ - Cloud-native deployments
9
+ - Large-scale vector search workloads
10
+ - Serverless architecture preferences
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ import os
16
+ from datetime import datetime, timezone
17
+ from typing import Any, Dict, List, Optional
18
+
19
+ from alma.storage.base import StorageBackend
20
+ from alma.types import (
21
+ AntiPattern,
22
+ DomainKnowledge,
23
+ Heuristic,
24
+ Outcome,
25
+ UserPreference,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Try to import pinecone
31
+ try:
32
+ from pinecone import Pinecone, ServerlessSpec
33
+
34
+ PINECONE_AVAILABLE = True
35
+ except ImportError:
36
+ PINECONE_AVAILABLE = False
37
+ Pinecone = None # type: ignore
38
+ ServerlessSpec = None # type: ignore
39
+ logger.warning(
40
+ "pinecone not installed. Install with: pip install 'alma-memory[pinecone]'"
41
+ )
42
+
43
+ # Namespace constants for memory types
44
+ NAMESPACE_HEURISTICS = "heuristics"
45
+ NAMESPACE_OUTCOMES = "outcomes"
46
+ NAMESPACE_DOMAIN_KNOWLEDGE = "domain_knowledge"
47
+ NAMESPACE_ANTI_PATTERNS = "anti_patterns"
48
+ NAMESPACE_PREFERENCES = "preferences"
49
+
50
+
51
+ class PineconeStorage(StorageBackend):
52
+ """
53
+ Pinecone storage backend for ALMA.
54
+
55
+ Uses Pinecone's vector database with namespaces for different memory types.
56
+ Supports serverless deployment for automatic scaling.
57
+
58
+ Features:
59
+ - One namespace per memory type (heuristics, outcomes, etc.)
60
+ - Embeddings stored as vectors with metadata
61
+ - Efficient vector similarity search
62
+ - Automatic index creation with serverless spec
63
+
64
+ Usage:
65
+ storage = PineconeStorage(
66
+ api_key="your-api-key",
67
+ index_name="alma-memory",
68
+ embedding_dim=384,
69
+ )
70
+ """
71
+
72
+ def __init__(
73
+ self,
74
+ api_key: str,
75
+ index_name: str = "alma-memory",
76
+ embedding_dim: int = 384,
77
+ cloud: str = "aws",
78
+ region: str = "us-east-1",
79
+ metric: str = "cosine",
80
+ ):
81
+ """
82
+ Initialize Pinecone storage.
83
+
84
+ Args:
85
+ api_key: Pinecone API key (required)
86
+ index_name: Name of the Pinecone index (default: alma-memory)
87
+ embedding_dim: Dimension of embedding vectors (default: 384)
88
+ cloud: Cloud provider for serverless (default: aws)
89
+ region: Cloud region for serverless (default: us-east-1)
90
+ metric: Distance metric (default: cosine)
91
+ """
92
+ if not PINECONE_AVAILABLE:
93
+ raise ImportError(
94
+ "pinecone not installed. Install with: pip install 'alma-memory[pinecone]'"
95
+ )
96
+
97
+ self.index_name = index_name
98
+ self.embedding_dim = embedding_dim
99
+ self.cloud = cloud
100
+ self.region = region
101
+ self.metric = metric
102
+
103
+ # Initialize Pinecone client
104
+ self._pc = Pinecone(api_key=api_key)
105
+
106
+ # Create or get index
107
+ self._init_index()
108
+
109
+ @classmethod
110
+ def from_config(cls, config: Dict[str, Any]) -> "PineconeStorage":
111
+ """Create instance from configuration."""
112
+ pinecone_config = config.get("pinecone", {})
113
+
114
+ # Support environment variable expansion
115
+ def get_value(key: str, default: Any = None) -> Any:
116
+ value = pinecone_config.get(key, default)
117
+ if (
118
+ isinstance(value, str)
119
+ and value.startswith("${")
120
+ and value.endswith("}")
121
+ ):
122
+ env_var = value[2:-1]
123
+ return os.environ.get(env_var, default)
124
+ return value
125
+
126
+ return cls(
127
+ api_key=get_value("api_key", os.environ.get("PINECONE_API_KEY", "")),
128
+ index_name=get_value("index_name", "alma-memory"),
129
+ embedding_dim=int(config.get("embedding_dim", 384)),
130
+ cloud=get_value("cloud", "aws"),
131
+ region=get_value("region", "us-east-1"),
132
+ metric=get_value("metric", "cosine"),
133
+ )
134
+
135
+ def _init_index(self):
136
+ """Initialize or get the Pinecone index."""
137
+ existing_indexes = [idx.name for idx in self._pc.list_indexes()]
138
+
139
+ if self.index_name not in existing_indexes:
140
+ logger.info(f"Creating Pinecone index: {self.index_name}")
141
+ self._pc.create_index(
142
+ name=self.index_name,
143
+ dimension=self.embedding_dim,
144
+ metric=self.metric,
145
+ spec=ServerlessSpec(cloud=self.cloud, region=self.region),
146
+ )
147
+ logger.info(f"Created index: {self.index_name}")
148
+
149
+ self._index = self._pc.Index(self.index_name)
150
+
151
+ def _get_zero_vector(self) -> List[float]:
152
+ """Get a zero vector for records without embeddings."""
153
+ return [0.0] * self.embedding_dim
154
+
155
+ def _metadata_to_pinecone(self, obj: Any, memory_type: str) -> Dict[str, Any]:
156
+ """Convert a memory object to Pinecone metadata format."""
157
+ # Pinecone metadata must be flat (no nested dicts/lists of dicts)
158
+ # and values must be strings, numbers, booleans, or lists of strings
159
+ if memory_type == NAMESPACE_HEURISTICS:
160
+ return {
161
+ "agent": obj.agent,
162
+ "project_id": obj.project_id,
163
+ "condition": obj.condition,
164
+ "strategy": obj.strategy,
165
+ "confidence": float(obj.confidence),
166
+ "occurrence_count": int(obj.occurrence_count),
167
+ "success_count": int(obj.success_count),
168
+ "last_validated": obj.last_validated.isoformat()
169
+ if obj.last_validated
170
+ else "",
171
+ "created_at": obj.created_at.isoformat() if obj.created_at else "",
172
+ "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
173
+ }
174
+ elif memory_type == NAMESPACE_OUTCOMES:
175
+ return {
176
+ "agent": obj.agent,
177
+ "project_id": obj.project_id,
178
+ "task_type": obj.task_type or "general",
179
+ "task_description": obj.task_description,
180
+ "success": obj.success,
181
+ "strategy_used": obj.strategy_used or "",
182
+ "duration_ms": int(obj.duration_ms) if obj.duration_ms else 0,
183
+ "error_message": obj.error_message or "",
184
+ "user_feedback": obj.user_feedback or "",
185
+ "timestamp": obj.timestamp.isoformat() if obj.timestamp else "",
186
+ "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
187
+ }
188
+ elif memory_type == NAMESPACE_PREFERENCES:
189
+ return {
190
+ "user_id": obj.user_id,
191
+ "category": obj.category or "general",
192
+ "preference": obj.preference,
193
+ "source": obj.source or "unknown",
194
+ "confidence": float(obj.confidence),
195
+ "timestamp": obj.timestamp.isoformat() if obj.timestamp else "",
196
+ "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
197
+ }
198
+ elif memory_type == NAMESPACE_DOMAIN_KNOWLEDGE:
199
+ return {
200
+ "agent": obj.agent,
201
+ "project_id": obj.project_id,
202
+ "domain": obj.domain or "general",
203
+ "fact": obj.fact,
204
+ "source": obj.source or "unknown",
205
+ "confidence": float(obj.confidence),
206
+ "last_verified": obj.last_verified.isoformat()
207
+ if obj.last_verified
208
+ else "",
209
+ "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
210
+ }
211
+ elif memory_type == NAMESPACE_ANTI_PATTERNS:
212
+ return {
213
+ "agent": obj.agent,
214
+ "project_id": obj.project_id,
215
+ "pattern": obj.pattern,
216
+ "why_bad": obj.why_bad or "",
217
+ "better_alternative": obj.better_alternative or "",
218
+ "occurrence_count": int(obj.occurrence_count),
219
+ "last_seen": obj.last_seen.isoformat() if obj.last_seen else "",
220
+ "created_at": obj.created_at.isoformat() if obj.created_at else "",
221
+ "metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
222
+ }
223
+ return {}
224
+
225
+ def _parse_datetime(self, value: Any) -> Optional[datetime]:
226
+ """Parse datetime from string."""
227
+ if value is None or value == "":
228
+ return None
229
+ if isinstance(value, datetime):
230
+ return value
231
+ try:
232
+ return datetime.fromisoformat(value.replace("Z", "+00:00"))
233
+ except (ValueError, AttributeError):
234
+ return None
235
+
236
+ def _metadata_to_heuristic(self, id: str, metadata: Dict[str, Any]) -> Heuristic:
237
+ """Convert Pinecone metadata to Heuristic."""
238
+ return Heuristic(
239
+ id=id,
240
+ agent=metadata.get("agent", ""),
241
+ project_id=metadata.get("project_id", ""),
242
+ condition=metadata.get("condition", ""),
243
+ strategy=metadata.get("strategy", ""),
244
+ confidence=float(metadata.get("confidence", 0.0)),
245
+ occurrence_count=int(metadata.get("occurrence_count", 0)),
246
+ success_count=int(metadata.get("success_count", 0)),
247
+ last_validated=self._parse_datetime(metadata.get("last_validated"))
248
+ or datetime.now(timezone.utc),
249
+ created_at=self._parse_datetime(metadata.get("created_at"))
250
+ or datetime.now(timezone.utc),
251
+ metadata=json.loads(metadata.get("metadata_json", "{}")),
252
+ )
253
+
254
+ def _metadata_to_outcome(self, id: str, metadata: Dict[str, Any]) -> Outcome:
255
+ """Convert Pinecone metadata to Outcome."""
256
+ return Outcome(
257
+ id=id,
258
+ agent=metadata.get("agent", ""),
259
+ project_id=metadata.get("project_id", ""),
260
+ task_type=metadata.get("task_type", "general"),
261
+ task_description=metadata.get("task_description", ""),
262
+ success=bool(metadata.get("success", False)),
263
+ strategy_used=metadata.get("strategy_used", ""),
264
+ duration_ms=int(metadata.get("duration_ms", 0)) or None,
265
+ error_message=metadata.get("error_message") or None,
266
+ user_feedback=metadata.get("user_feedback") or None,
267
+ timestamp=self._parse_datetime(metadata.get("timestamp"))
268
+ or datetime.now(timezone.utc),
269
+ metadata=json.loads(metadata.get("metadata_json", "{}")),
270
+ )
271
+
272
+ def _metadata_to_preference(
273
+ self, id: str, metadata: Dict[str, Any]
274
+ ) -> UserPreference:
275
+ """Convert Pinecone metadata to UserPreference."""
276
+ return UserPreference(
277
+ id=id,
278
+ user_id=metadata.get("user_id", ""),
279
+ category=metadata.get("category", "general"),
280
+ preference=metadata.get("preference", ""),
281
+ source=metadata.get("source", "unknown"),
282
+ confidence=float(metadata.get("confidence", 1.0)),
283
+ timestamp=self._parse_datetime(metadata.get("timestamp"))
284
+ or datetime.now(timezone.utc),
285
+ metadata=json.loads(metadata.get("metadata_json", "{}")),
286
+ )
287
+
288
+ def _metadata_to_domain_knowledge(
289
+ self, id: str, metadata: Dict[str, Any]
290
+ ) -> DomainKnowledge:
291
+ """Convert Pinecone metadata to DomainKnowledge."""
292
+ return DomainKnowledge(
293
+ id=id,
294
+ agent=metadata.get("agent", ""),
295
+ project_id=metadata.get("project_id", ""),
296
+ domain=metadata.get("domain", "general"),
297
+ fact=metadata.get("fact", ""),
298
+ source=metadata.get("source", "unknown"),
299
+ confidence=float(metadata.get("confidence", 1.0)),
300
+ last_verified=self._parse_datetime(metadata.get("last_verified"))
301
+ or datetime.now(timezone.utc),
302
+ metadata=json.loads(metadata.get("metadata_json", "{}")),
303
+ )
304
+
305
+ def _metadata_to_anti_pattern(
306
+ self, id: str, metadata: Dict[str, Any]
307
+ ) -> AntiPattern:
308
+ """Convert Pinecone metadata to AntiPattern."""
309
+ return AntiPattern(
310
+ id=id,
311
+ agent=metadata.get("agent", ""),
312
+ project_id=metadata.get("project_id", ""),
313
+ pattern=metadata.get("pattern", ""),
314
+ why_bad=metadata.get("why_bad", ""),
315
+ better_alternative=metadata.get("better_alternative", ""),
316
+ occurrence_count=int(metadata.get("occurrence_count", 1)),
317
+ last_seen=self._parse_datetime(metadata.get("last_seen"))
318
+ or datetime.now(timezone.utc),
319
+ created_at=self._parse_datetime(metadata.get("created_at"))
320
+ or datetime.now(timezone.utc),
321
+ metadata=json.loads(metadata.get("metadata_json", "{}")),
322
+ )
323
+
324
+ # ==================== WRITE OPERATIONS ====================
325
+
326
+ def save_heuristic(self, heuristic: Heuristic) -> str:
327
+ """Save a heuristic."""
328
+ vector = heuristic.embedding or self._get_zero_vector()
329
+ metadata = self._metadata_to_pinecone(heuristic, NAMESPACE_HEURISTICS)
330
+
331
+ self._index.upsert(
332
+ vectors=[{"id": heuristic.id, "values": vector, "metadata": metadata}],
333
+ namespace=NAMESPACE_HEURISTICS,
334
+ )
335
+
336
+ logger.debug(f"Saved heuristic: {heuristic.id}")
337
+ return heuristic.id
338
+
339
+ def save_outcome(self, outcome: Outcome) -> str:
340
+ """Save an outcome."""
341
+ vector = outcome.embedding or self._get_zero_vector()
342
+ metadata = self._metadata_to_pinecone(outcome, NAMESPACE_OUTCOMES)
343
+
344
+ self._index.upsert(
345
+ vectors=[{"id": outcome.id, "values": vector, "metadata": metadata}],
346
+ namespace=NAMESPACE_OUTCOMES,
347
+ )
348
+
349
+ logger.debug(f"Saved outcome: {outcome.id}")
350
+ return outcome.id
351
+
352
+ def save_user_preference(self, preference: UserPreference) -> str:
353
+ """Save a user preference."""
354
+ # User preferences don't typically have embeddings
355
+ vector = self._get_zero_vector()
356
+ metadata = self._metadata_to_pinecone(preference, NAMESPACE_PREFERENCES)
357
+
358
+ self._index.upsert(
359
+ vectors=[{"id": preference.id, "values": vector, "metadata": metadata}],
360
+ namespace=NAMESPACE_PREFERENCES,
361
+ )
362
+
363
+ logger.debug(f"Saved preference: {preference.id}")
364
+ return preference.id
365
+
366
+ def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
367
+ """Save domain knowledge."""
368
+ vector = knowledge.embedding or self._get_zero_vector()
369
+ metadata = self._metadata_to_pinecone(knowledge, NAMESPACE_DOMAIN_KNOWLEDGE)
370
+
371
+ self._index.upsert(
372
+ vectors=[{"id": knowledge.id, "values": vector, "metadata": metadata}],
373
+ namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
374
+ )
375
+
376
+ logger.debug(f"Saved domain knowledge: {knowledge.id}")
377
+ return knowledge.id
378
+
379
+ def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
380
+ """Save an anti-pattern."""
381
+ vector = anti_pattern.embedding or self._get_zero_vector()
382
+ metadata = self._metadata_to_pinecone(anti_pattern, NAMESPACE_ANTI_PATTERNS)
383
+
384
+ self._index.upsert(
385
+ vectors=[{"id": anti_pattern.id, "values": vector, "metadata": metadata}],
386
+ namespace=NAMESPACE_ANTI_PATTERNS,
387
+ )
388
+
389
+ logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
390
+ return anti_pattern.id
391
+
392
+ # ==================== BATCH WRITE OPERATIONS ====================
393
+
394
+ def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
395
+ """Save multiple heuristics in a batch."""
396
+ if not heuristics:
397
+ return []
398
+
399
+ vectors = []
400
+ for h in heuristics:
401
+ vector = h.embedding or self._get_zero_vector()
402
+ metadata = self._metadata_to_pinecone(h, NAMESPACE_HEURISTICS)
403
+ vectors.append({"id": h.id, "values": vector, "metadata": metadata})
404
+
405
+ # Pinecone supports batches of up to 100 vectors
406
+ batch_size = 100
407
+ for i in range(0, len(vectors), batch_size):
408
+ batch = vectors[i : i + batch_size]
409
+ self._index.upsert(vectors=batch, namespace=NAMESPACE_HEURISTICS)
410
+
411
+ logger.debug(f"Batch saved {len(heuristics)} heuristics")
412
+ return [h.id for h in heuristics]
413
+
414
+ def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
415
+ """Save multiple outcomes in a batch."""
416
+ if not outcomes:
417
+ return []
418
+
419
+ vectors = []
420
+ for o in outcomes:
421
+ vector = o.embedding or self._get_zero_vector()
422
+ metadata = self._metadata_to_pinecone(o, NAMESPACE_OUTCOMES)
423
+ vectors.append({"id": o.id, "values": vector, "metadata": metadata})
424
+
425
+ batch_size = 100
426
+ for i in range(0, len(vectors), batch_size):
427
+ batch = vectors[i : i + batch_size]
428
+ self._index.upsert(vectors=batch, namespace=NAMESPACE_OUTCOMES)
429
+
430
+ logger.debug(f"Batch saved {len(outcomes)} outcomes")
431
+ return [o.id for o in outcomes]
432
+
433
+ def save_domain_knowledge_batch(
434
+ self, knowledge_items: List[DomainKnowledge]
435
+ ) -> List[str]:
436
+ """Save multiple domain knowledge items in a batch."""
437
+ if not knowledge_items:
438
+ return []
439
+
440
+ vectors = []
441
+ for k in knowledge_items:
442
+ vector = k.embedding or self._get_zero_vector()
443
+ metadata = self._metadata_to_pinecone(k, NAMESPACE_DOMAIN_KNOWLEDGE)
444
+ vectors.append({"id": k.id, "values": vector, "metadata": metadata})
445
+
446
+ batch_size = 100
447
+ for i in range(0, len(vectors), batch_size):
448
+ batch = vectors[i : i + batch_size]
449
+ self._index.upsert(vectors=batch, namespace=NAMESPACE_DOMAIN_KNOWLEDGE)
450
+
451
+ logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
452
+ return [k.id for k in knowledge_items]
453
+
454
+ # ==================== READ OPERATIONS ====================
455
+
456
+ def _build_filter(
457
+ self,
458
+ project_id: Optional[str] = None,
459
+ agent: Optional[str] = None,
460
+ user_id: Optional[str] = None,
461
+ task_type: Optional[str] = None,
462
+ domain: Optional[str] = None,
463
+ category: Optional[str] = None,
464
+ success_only: bool = False,
465
+ min_confidence: float = 0.0,
466
+ ) -> Dict[str, Any]:
467
+ """Build Pinecone metadata filter."""
468
+ conditions = []
469
+
470
+ if project_id:
471
+ conditions.append({"project_id": {"$eq": project_id}})
472
+
473
+ if agent:
474
+ conditions.append({"agent": {"$eq": agent}})
475
+
476
+ if user_id:
477
+ conditions.append({"user_id": {"$eq": user_id}})
478
+
479
+ if task_type:
480
+ conditions.append({"task_type": {"$eq": task_type}})
481
+
482
+ if domain:
483
+ conditions.append({"domain": {"$eq": domain}})
484
+
485
+ if category:
486
+ conditions.append({"category": {"$eq": category}})
487
+
488
+ if success_only:
489
+ conditions.append({"success": {"$eq": True}})
490
+
491
+ if min_confidence > 0.0:
492
+ conditions.append({"confidence": {"$gte": min_confidence}})
493
+
494
+ if not conditions:
495
+ return {}
496
+
497
+ if len(conditions) == 1:
498
+ return conditions[0]
499
+
500
+ return {"$and": conditions}
501
+
502
+ def get_heuristics(
503
+ self,
504
+ project_id: str,
505
+ agent: Optional[str] = None,
506
+ embedding: Optional[List[float]] = None,
507
+ top_k: int = 5,
508
+ min_confidence: float = 0.0,
509
+ ) -> List[Heuristic]:
510
+ """Get heuristics with optional vector search."""
511
+ filter_dict = self._build_filter(
512
+ project_id=project_id,
513
+ agent=agent,
514
+ min_confidence=min_confidence,
515
+ )
516
+
517
+ query_vector = embedding or self._get_zero_vector()
518
+
519
+ results = self._index.query(
520
+ vector=query_vector,
521
+ top_k=top_k,
522
+ namespace=NAMESPACE_HEURISTICS,
523
+ filter=filter_dict if filter_dict else None,
524
+ include_metadata=True,
525
+ )
526
+
527
+ return [
528
+ self._metadata_to_heuristic(match["id"], match.get("metadata", {}))
529
+ for match in results.get("matches", [])
530
+ ]
531
+
532
+ def get_outcomes(
533
+ self,
534
+ project_id: str,
535
+ agent: Optional[str] = None,
536
+ task_type: Optional[str] = None,
537
+ embedding: Optional[List[float]] = None,
538
+ top_k: int = 5,
539
+ success_only: bool = False,
540
+ ) -> List[Outcome]:
541
+ """Get outcomes with optional vector search."""
542
+ filter_dict = self._build_filter(
543
+ project_id=project_id,
544
+ agent=agent,
545
+ task_type=task_type,
546
+ success_only=success_only,
547
+ )
548
+
549
+ query_vector = embedding or self._get_zero_vector()
550
+
551
+ results = self._index.query(
552
+ vector=query_vector,
553
+ top_k=top_k,
554
+ namespace=NAMESPACE_OUTCOMES,
555
+ filter=filter_dict if filter_dict else None,
556
+ include_metadata=True,
557
+ )
558
+
559
+ return [
560
+ self._metadata_to_outcome(match["id"], match.get("metadata", {}))
561
+ for match in results.get("matches", [])
562
+ ]
563
+
564
+ def get_user_preferences(
565
+ self,
566
+ user_id: str,
567
+ category: Optional[str] = None,
568
+ ) -> List[UserPreference]:
569
+ """Get user preferences."""
570
+ filter_dict = self._build_filter(
571
+ user_id=user_id,
572
+ category=category,
573
+ )
574
+
575
+ # For preferences, we use a zero vector query since we filter by metadata
576
+ query_vector = self._get_zero_vector()
577
+
578
+ results = self._index.query(
579
+ vector=query_vector,
580
+ top_k=100, # Get all preferences for user
581
+ namespace=NAMESPACE_PREFERENCES,
582
+ filter=filter_dict if filter_dict else None,
583
+ include_metadata=True,
584
+ )
585
+
586
+ return [
587
+ self._metadata_to_preference(match["id"], match.get("metadata", {}))
588
+ for match in results.get("matches", [])
589
+ ]
590
+
591
+ def get_domain_knowledge(
592
+ self,
593
+ project_id: str,
594
+ agent: Optional[str] = None,
595
+ domain: Optional[str] = None,
596
+ embedding: Optional[List[float]] = None,
597
+ top_k: int = 5,
598
+ ) -> List[DomainKnowledge]:
599
+ """Get domain knowledge with optional vector search."""
600
+ filter_dict = self._build_filter(
601
+ project_id=project_id,
602
+ agent=agent,
603
+ domain=domain,
604
+ )
605
+
606
+ query_vector = embedding or self._get_zero_vector()
607
+
608
+ results = self._index.query(
609
+ vector=query_vector,
610
+ top_k=top_k,
611
+ namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
612
+ filter=filter_dict if filter_dict else None,
613
+ include_metadata=True,
614
+ )
615
+
616
+ return [
617
+ self._metadata_to_domain_knowledge(match["id"], match.get("metadata", {}))
618
+ for match in results.get("matches", [])
619
+ ]
620
+
621
+ def get_anti_patterns(
622
+ self,
623
+ project_id: str,
624
+ agent: Optional[str] = None,
625
+ embedding: Optional[List[float]] = None,
626
+ top_k: int = 5,
627
+ ) -> List[AntiPattern]:
628
+ """Get anti-patterns with optional vector search."""
629
+ filter_dict = self._build_filter(
630
+ project_id=project_id,
631
+ agent=agent,
632
+ )
633
+
634
+ query_vector = embedding or self._get_zero_vector()
635
+
636
+ results = self._index.query(
637
+ vector=query_vector,
638
+ top_k=top_k,
639
+ namespace=NAMESPACE_ANTI_PATTERNS,
640
+ filter=filter_dict if filter_dict else None,
641
+ include_metadata=True,
642
+ )
643
+
644
+ return [
645
+ self._metadata_to_anti_pattern(match["id"], match.get("metadata", {}))
646
+ for match in results.get("matches", [])
647
+ ]
648
+
649
+ # ==================== MULTI-AGENT MEMORY SHARING ====================
650
+
651
+ def get_heuristics_for_agents(
652
+ self,
653
+ project_id: str,
654
+ agents: List[str],
655
+ embedding: Optional[List[float]] = None,
656
+ top_k: int = 5,
657
+ min_confidence: float = 0.0,
658
+ ) -> List[Heuristic]:
659
+ """Get heuristics from multiple agents using $in filter."""
660
+ if not agents:
661
+ return []
662
+
663
+ conditions = [
664
+ {"project_id": {"$eq": project_id}},
665
+ {"agent": {"$in": agents}},
666
+ ]
667
+
668
+ if min_confidence > 0.0:
669
+ conditions.append({"confidence": {"$gte": min_confidence}})
670
+
671
+ filter_dict = {"$and": conditions}
672
+ query_vector = embedding or self._get_zero_vector()
673
+
674
+ results = self._index.query(
675
+ vector=query_vector,
676
+ top_k=top_k * len(agents),
677
+ namespace=NAMESPACE_HEURISTICS,
678
+ filter=filter_dict,
679
+ include_metadata=True,
680
+ )
681
+
682
+ return [
683
+ self._metadata_to_heuristic(match["id"], match.get("metadata", {}))
684
+ for match in results.get("matches", [])
685
+ ]
686
+
687
+ def get_outcomes_for_agents(
688
+ self,
689
+ project_id: str,
690
+ agents: List[str],
691
+ task_type: Optional[str] = None,
692
+ embedding: Optional[List[float]] = None,
693
+ top_k: int = 5,
694
+ success_only: bool = False,
695
+ ) -> List[Outcome]:
696
+ """Get outcomes from multiple agents using $in filter."""
697
+ if not agents:
698
+ return []
699
+
700
+ conditions = [
701
+ {"project_id": {"$eq": project_id}},
702
+ {"agent": {"$in": agents}},
703
+ ]
704
+
705
+ if task_type:
706
+ conditions.append({"task_type": {"$eq": task_type}})
707
+
708
+ if success_only:
709
+ conditions.append({"success": {"$eq": True}})
710
+
711
+ filter_dict = {"$and": conditions}
712
+ query_vector = embedding or self._get_zero_vector()
713
+
714
+ results = self._index.query(
715
+ vector=query_vector,
716
+ top_k=top_k * len(agents),
717
+ namespace=NAMESPACE_OUTCOMES,
718
+ filter=filter_dict,
719
+ include_metadata=True,
720
+ )
721
+
722
+ return [
723
+ self._metadata_to_outcome(match["id"], match.get("metadata", {}))
724
+ for match in results.get("matches", [])
725
+ ]
726
+
727
+ def get_domain_knowledge_for_agents(
728
+ self,
729
+ project_id: str,
730
+ agents: List[str],
731
+ domain: Optional[str] = None,
732
+ embedding: Optional[List[float]] = None,
733
+ top_k: int = 5,
734
+ ) -> List[DomainKnowledge]:
735
+ """Get domain knowledge from multiple agents using $in filter."""
736
+ if not agents:
737
+ return []
738
+
739
+ conditions = [
740
+ {"project_id": {"$eq": project_id}},
741
+ {"agent": {"$in": agents}},
742
+ ]
743
+
744
+ if domain:
745
+ conditions.append({"domain": {"$eq": domain}})
746
+
747
+ filter_dict = {"$and": conditions}
748
+ query_vector = embedding or self._get_zero_vector()
749
+
750
+ results = self._index.query(
751
+ vector=query_vector,
752
+ top_k=top_k * len(agents),
753
+ namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
754
+ filter=filter_dict,
755
+ include_metadata=True,
756
+ )
757
+
758
+ return [
759
+ self._metadata_to_domain_knowledge(match["id"], match.get("metadata", {}))
760
+ for match in results.get("matches", [])
761
+ ]
762
+
763
+ def get_anti_patterns_for_agents(
764
+ self,
765
+ project_id: str,
766
+ agents: List[str],
767
+ embedding: Optional[List[float]] = None,
768
+ top_k: int = 5,
769
+ ) -> List[AntiPattern]:
770
+ """Get anti-patterns from multiple agents using $in filter."""
771
+ if not agents:
772
+ return []
773
+
774
+ conditions = [
775
+ {"project_id": {"$eq": project_id}},
776
+ {"agent": {"$in": agents}},
777
+ ]
778
+
779
+ filter_dict = {"$and": conditions}
780
+ query_vector = embedding or self._get_zero_vector()
781
+
782
+ results = self._index.query(
783
+ vector=query_vector,
784
+ top_k=top_k * len(agents),
785
+ namespace=NAMESPACE_ANTI_PATTERNS,
786
+ filter=filter_dict,
787
+ include_metadata=True,
788
+ )
789
+
790
+ return [
791
+ self._metadata_to_anti_pattern(match["id"], match.get("metadata", {}))
792
+ for match in results.get("matches", [])
793
+ ]
794
+
795
+ # ==================== UPDATE OPERATIONS ====================
796
+
797
+ def update_heuristic(
798
+ self,
799
+ heuristic_id: str,
800
+ updates: Dict[str, Any],
801
+ ) -> bool:
802
+ """Update a heuristic's fields."""
803
+ if not updates:
804
+ return False
805
+
806
+ # Fetch existing record
807
+ results = self._index.fetch(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
808
+
809
+ if heuristic_id not in results.get("vectors", {}):
810
+ return False
811
+
812
+ existing = results["vectors"][heuristic_id]
813
+ metadata = existing.get("metadata", {})
814
+
815
+ # Apply updates to metadata
816
+ for key, value in updates.items():
817
+ if key == "metadata":
818
+ metadata["metadata_json"] = json.dumps(value) if value else "{}"
819
+ elif isinstance(value, datetime):
820
+ metadata[key] = value.isoformat()
821
+ else:
822
+ metadata[key] = value
823
+
824
+ # Upsert with updated metadata
825
+ self._index.upsert(
826
+ vectors=[
827
+ {
828
+ "id": heuristic_id,
829
+ "values": existing.get("values", self._get_zero_vector()),
830
+ "metadata": metadata,
831
+ }
832
+ ],
833
+ namespace=NAMESPACE_HEURISTICS,
834
+ )
835
+
836
+ return True
837
+
838
+ def increment_heuristic_occurrence(
839
+ self,
840
+ heuristic_id: str,
841
+ success: bool,
842
+ ) -> bool:
843
+ """Increment heuristic occurrence count."""
844
+ # Fetch existing record
845
+ results = self._index.fetch(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
846
+
847
+ if heuristic_id not in results.get("vectors", {}):
848
+ return False
849
+
850
+ existing = results["vectors"][heuristic_id]
851
+ metadata = existing.get("metadata", {})
852
+
853
+ # Increment counts
854
+ metadata["occurrence_count"] = int(metadata.get("occurrence_count", 0)) + 1
855
+ if success:
856
+ metadata["success_count"] = int(metadata.get("success_count", 0)) + 1
857
+ metadata["last_validated"] = datetime.now(timezone.utc).isoformat()
858
+
859
+ # Upsert with updated metadata
860
+ self._index.upsert(
861
+ vectors=[
862
+ {
863
+ "id": heuristic_id,
864
+ "values": existing.get("values", self._get_zero_vector()),
865
+ "metadata": metadata,
866
+ }
867
+ ],
868
+ namespace=NAMESPACE_HEURISTICS,
869
+ )
870
+
871
+ return True
872
+
873
+ def update_heuristic_confidence(
874
+ self,
875
+ heuristic_id: str,
876
+ new_confidence: float,
877
+ ) -> bool:
878
+ """Update a heuristic's confidence value."""
879
+ return self.update_heuristic(heuristic_id, {"confidence": new_confidence})
880
+
881
+ def update_knowledge_confidence(
882
+ self,
883
+ knowledge_id: str,
884
+ new_confidence: float,
885
+ ) -> bool:
886
+ """Update domain knowledge confidence value."""
887
+ # Fetch existing record
888
+ results = self._index.fetch(
889
+ ids=[knowledge_id], namespace=NAMESPACE_DOMAIN_KNOWLEDGE
890
+ )
891
+
892
+ if knowledge_id not in results.get("vectors", {}):
893
+ return False
894
+
895
+ existing = results["vectors"][knowledge_id]
896
+ metadata = existing.get("metadata", {})
897
+ metadata["confidence"] = new_confidence
898
+
899
+ # Upsert with updated metadata
900
+ self._index.upsert(
901
+ vectors=[
902
+ {
903
+ "id": knowledge_id,
904
+ "values": existing.get("values", self._get_zero_vector()),
905
+ "metadata": metadata,
906
+ }
907
+ ],
908
+ namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
909
+ )
910
+
911
+ return True
912
+
913
+ # ==================== DELETE OPERATIONS ====================
914
+
915
+ def delete_heuristic(self, heuristic_id: str) -> bool:
916
+ """Delete a heuristic by ID."""
917
+ try:
918
+ self._index.delete(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
919
+ logger.debug(f"Deleted heuristic: {heuristic_id}")
920
+ return True
921
+ except Exception as e:
922
+ logger.warning(f"Failed to delete heuristic {heuristic_id}: {e}")
923
+ return False
924
+
925
+ def delete_outcome(self, outcome_id: str) -> bool:
926
+ """Delete an outcome by ID."""
927
+ try:
928
+ self._index.delete(ids=[outcome_id], namespace=NAMESPACE_OUTCOMES)
929
+ logger.debug(f"Deleted outcome: {outcome_id}")
930
+ return True
931
+ except Exception as e:
932
+ logger.warning(f"Failed to delete outcome {outcome_id}: {e}")
933
+ return False
934
+
935
+ def delete_domain_knowledge(self, knowledge_id: str) -> bool:
936
+ """Delete domain knowledge by ID."""
937
+ try:
938
+ self._index.delete(ids=[knowledge_id], namespace=NAMESPACE_DOMAIN_KNOWLEDGE)
939
+ logger.debug(f"Deleted domain knowledge: {knowledge_id}")
940
+ return True
941
+ except Exception as e:
942
+ logger.warning(f"Failed to delete domain knowledge {knowledge_id}: {e}")
943
+ return False
944
+
945
+ def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
946
+ """Delete an anti-pattern by ID."""
947
+ try:
948
+ self._index.delete(ids=[anti_pattern_id], namespace=NAMESPACE_ANTI_PATTERNS)
949
+ logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
950
+ return True
951
+ except Exception as e:
952
+ logger.warning(f"Failed to delete anti-pattern {anti_pattern_id}: {e}")
953
+ return False
954
+
955
+ def delete_outcomes_older_than(
956
+ self,
957
+ project_id: str,
958
+ older_than: datetime,
959
+ agent: Optional[str] = None,
960
+ ) -> int:
961
+ """Delete old outcomes.
962
+
963
+ Note: Pinecone doesn't support bulk delete by filter directly,
964
+ so we query first then delete by IDs.
965
+ """
966
+ filter_dict = self._build_filter(project_id=project_id, agent=agent)
967
+ query_vector = self._get_zero_vector()
968
+
969
+ # Query to get all matching IDs
970
+ results = self._index.query(
971
+ vector=query_vector,
972
+ top_k=10000, # Large number to get all
973
+ namespace=NAMESPACE_OUTCOMES,
974
+ filter=filter_dict if filter_dict else None,
975
+ include_metadata=True,
976
+ )
977
+
978
+ older_than_iso = older_than.isoformat()
979
+ ids_to_delete = []
980
+
981
+ for match in results.get("matches", []):
982
+ timestamp = match.get("metadata", {}).get("timestamp", "")
983
+ if timestamp and timestamp < older_than_iso:
984
+ ids_to_delete.append(match["id"])
985
+
986
+ if ids_to_delete:
987
+ # Delete in batches of 1000
988
+ batch_size = 1000
989
+ for i in range(0, len(ids_to_delete), batch_size):
990
+ batch = ids_to_delete[i : i + batch_size]
991
+ self._index.delete(ids=batch, namespace=NAMESPACE_OUTCOMES)
992
+
993
+ deleted = len(ids_to_delete)
994
+ logger.info(f"Deleted {deleted} old outcomes")
995
+ return deleted
996
+
997
+ def delete_low_confidence_heuristics(
998
+ self,
999
+ project_id: str,
1000
+ below_confidence: float,
1001
+ agent: Optional[str] = None,
1002
+ ) -> int:
1003
+ """Delete low-confidence heuristics."""
1004
+ filter_dict = self._build_filter(project_id=project_id, agent=agent)
1005
+ query_vector = self._get_zero_vector()
1006
+
1007
+ # Query to get all matching IDs
1008
+ results = self._index.query(
1009
+ vector=query_vector,
1010
+ top_k=10000,
1011
+ namespace=NAMESPACE_HEURISTICS,
1012
+ filter=filter_dict if filter_dict else None,
1013
+ include_metadata=True,
1014
+ )
1015
+
1016
+ ids_to_delete = []
1017
+ for match in results.get("matches", []):
1018
+ confidence = float(match.get("metadata", {}).get("confidence", 0.0))
1019
+ if confidence < below_confidence:
1020
+ ids_to_delete.append(match["id"])
1021
+
1022
+ if ids_to_delete:
1023
+ batch_size = 1000
1024
+ for i in range(0, len(ids_to_delete), batch_size):
1025
+ batch = ids_to_delete[i : i + batch_size]
1026
+ self._index.delete(ids=batch, namespace=NAMESPACE_HEURISTICS)
1027
+
1028
+ deleted = len(ids_to_delete)
1029
+ logger.info(f"Deleted {deleted} low-confidence heuristics")
1030
+ return deleted
1031
+
1032
+ # ==================== STATS ====================
1033
+
1034
+ def get_stats(
1035
+ self,
1036
+ project_id: str,
1037
+ agent: Optional[str] = None,
1038
+ ) -> Dict[str, Any]:
1039
+ """Get memory statistics."""
1040
+ stats = {
1041
+ "project_id": project_id,
1042
+ "agent": agent,
1043
+ "storage_type": "pinecone",
1044
+ "index_name": self.index_name,
1045
+ }
1046
+
1047
+ # Get index stats
1048
+ try:
1049
+ index_stats = self._index.describe_index_stats()
1050
+
1051
+ # Count by namespace
1052
+ namespaces = index_stats.get("namespaces", {})
1053
+
1054
+ stats["heuristics_count"] = namespaces.get(NAMESPACE_HEURISTICS, {}).get(
1055
+ "vector_count", 0
1056
+ )
1057
+ stats["outcomes_count"] = namespaces.get(NAMESPACE_OUTCOMES, {}).get(
1058
+ "vector_count", 0
1059
+ )
1060
+ stats["domain_knowledge_count"] = namespaces.get(
1061
+ NAMESPACE_DOMAIN_KNOWLEDGE, {}
1062
+ ).get("vector_count", 0)
1063
+ stats["anti_patterns_count"] = namespaces.get(
1064
+ NAMESPACE_ANTI_PATTERNS, {}
1065
+ ).get("vector_count", 0)
1066
+ stats["preferences_count"] = namespaces.get(NAMESPACE_PREFERENCES, {}).get(
1067
+ "vector_count", 0
1068
+ )
1069
+ stats["total_count"] = index_stats.get("total_vector_count", 0)
1070
+
1071
+ except Exception as e:
1072
+ logger.warning(f"Failed to get index stats: {e}")
1073
+ stats["error"] = str(e)
1074
+
1075
+ return stats
1076
+
1077
+ def close(self):
1078
+ """Close the Pinecone connection (no-op for Pinecone client)."""
1079
+ # Pinecone client doesn't require explicit cleanup
1080
+ pass