alma-memory 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +99 -29
- alma/confidence/__init__.py +47 -0
- alma/confidence/engine.py +540 -0
- alma/confidence/types.py +351 -0
- alma/config/loader.py +3 -2
- alma/consolidation/__init__.py +23 -0
- alma/consolidation/engine.py +678 -0
- alma/consolidation/prompts.py +84 -0
- alma/core.py +15 -15
- alma/domains/__init__.py +6 -6
- alma/domains/factory.py +12 -9
- alma/domains/schemas.py +17 -3
- alma/domains/types.py +8 -4
- alma/events/__init__.py +75 -0
- alma/events/emitter.py +284 -0
- alma/events/storage_mixin.py +246 -0
- alma/events/types.py +126 -0
- alma/events/webhook.py +425 -0
- alma/exceptions.py +49 -0
- alma/extraction/__init__.py +31 -0
- alma/extraction/auto_learner.py +264 -0
- alma/extraction/extractor.py +420 -0
- alma/graph/__init__.py +81 -0
- alma/graph/backends/__init__.py +18 -0
- alma/graph/backends/memory.py +236 -0
- alma/graph/backends/neo4j.py +417 -0
- alma/graph/base.py +159 -0
- alma/graph/extraction.py +198 -0
- alma/graph/store.py +860 -0
- alma/harness/__init__.py +4 -4
- alma/harness/base.py +18 -9
- alma/harness/domains.py +27 -11
- alma/initializer/__init__.py +37 -0
- alma/initializer/initializer.py +418 -0
- alma/initializer/types.py +250 -0
- alma/integration/__init__.py +9 -9
- alma/integration/claude_agents.py +10 -10
- alma/integration/helena.py +32 -22
- alma/integration/victor.py +57 -33
- alma/learning/__init__.py +27 -27
- alma/learning/forgetting.py +198 -148
- alma/learning/heuristic_extractor.py +40 -24
- alma/learning/protocols.py +62 -14
- alma/learning/validation.py +7 -2
- alma/mcp/__init__.py +4 -4
- alma/mcp/__main__.py +2 -1
- alma/mcp/resources.py +17 -16
- alma/mcp/server.py +102 -44
- alma/mcp/tools.py +174 -37
- alma/progress/__init__.py +3 -3
- alma/progress/tracker.py +26 -20
- alma/progress/types.py +8 -12
- alma/py.typed +0 -0
- alma/retrieval/__init__.py +11 -11
- alma/retrieval/cache.py +20 -21
- alma/retrieval/embeddings.py +4 -4
- alma/retrieval/engine.py +114 -35
- alma/retrieval/scoring.py +73 -63
- alma/session/__init__.py +2 -2
- alma/session/manager.py +5 -5
- alma/session/types.py +5 -4
- alma/storage/__init__.py +41 -0
- alma/storage/azure_cosmos.py +107 -31
- alma/storage/base.py +157 -4
- alma/storage/chroma.py +1443 -0
- alma/storage/file_based.py +56 -20
- alma/storage/pinecone.py +1080 -0
- alma/storage/postgresql.py +1452 -0
- alma/storage/qdrant.py +1306 -0
- alma/storage/sqlite_local.py +376 -31
- alma/types.py +62 -14
- alma_memory-0.5.0.dist-info/METADATA +905 -0
- alma_memory-0.5.0.dist-info/RECORD +76 -0
- {alma_memory-0.3.0.dist-info → alma_memory-0.5.0.dist-info}/WHEEL +1 -1
- alma_memory-0.3.0.dist-info/METADATA +0 -438
- alma_memory-0.3.0.dist-info/RECORD +0 -46
- {alma_memory-0.3.0.dist-info → alma_memory-0.5.0.dist-info}/top_level.txt +0 -0
alma/storage/pinecone.py
ADDED
|
@@ -0,0 +1,1080 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Pinecone Storage Backend.
|
|
3
|
+
|
|
4
|
+
Production-ready storage using Pinecone vector database for
|
|
5
|
+
native vector similarity search with serverless infrastructure.
|
|
6
|
+
|
|
7
|
+
Recommended for:
|
|
8
|
+
- Cloud-native deployments
|
|
9
|
+
- Large-scale vector search workloads
|
|
10
|
+
- Serverless architecture preferences
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
|
|
19
|
+
from alma.storage.base import StorageBackend
|
|
20
|
+
from alma.types import (
|
|
21
|
+
AntiPattern,
|
|
22
|
+
DomainKnowledge,
|
|
23
|
+
Heuristic,
|
|
24
|
+
Outcome,
|
|
25
|
+
UserPreference,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# Try to import pinecone
|
|
31
|
+
try:
|
|
32
|
+
from pinecone import Pinecone, ServerlessSpec
|
|
33
|
+
|
|
34
|
+
PINECONE_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
PINECONE_AVAILABLE = False
|
|
37
|
+
Pinecone = None # type: ignore
|
|
38
|
+
ServerlessSpec = None # type: ignore
|
|
39
|
+
logger.warning(
|
|
40
|
+
"pinecone not installed. Install with: pip install 'alma-memory[pinecone]'"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Namespace constants for memory types
|
|
44
|
+
NAMESPACE_HEURISTICS = "heuristics"
|
|
45
|
+
NAMESPACE_OUTCOMES = "outcomes"
|
|
46
|
+
NAMESPACE_DOMAIN_KNOWLEDGE = "domain_knowledge"
|
|
47
|
+
NAMESPACE_ANTI_PATTERNS = "anti_patterns"
|
|
48
|
+
NAMESPACE_PREFERENCES = "preferences"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class PineconeStorage(StorageBackend):
|
|
52
|
+
"""
|
|
53
|
+
Pinecone storage backend for ALMA.
|
|
54
|
+
|
|
55
|
+
Uses Pinecone's vector database with namespaces for different memory types.
|
|
56
|
+
Supports serverless deployment for automatic scaling.
|
|
57
|
+
|
|
58
|
+
Features:
|
|
59
|
+
- One namespace per memory type (heuristics, outcomes, etc.)
|
|
60
|
+
- Embeddings stored as vectors with metadata
|
|
61
|
+
- Efficient vector similarity search
|
|
62
|
+
- Automatic index creation with serverless spec
|
|
63
|
+
|
|
64
|
+
Usage:
|
|
65
|
+
storage = PineconeStorage(
|
|
66
|
+
api_key="your-api-key",
|
|
67
|
+
index_name="alma-memory",
|
|
68
|
+
embedding_dim=384,
|
|
69
|
+
)
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(
|
|
73
|
+
self,
|
|
74
|
+
api_key: str,
|
|
75
|
+
index_name: str = "alma-memory",
|
|
76
|
+
embedding_dim: int = 384,
|
|
77
|
+
cloud: str = "aws",
|
|
78
|
+
region: str = "us-east-1",
|
|
79
|
+
metric: str = "cosine",
|
|
80
|
+
):
|
|
81
|
+
"""
|
|
82
|
+
Initialize Pinecone storage.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
api_key: Pinecone API key (required)
|
|
86
|
+
index_name: Name of the Pinecone index (default: alma-memory)
|
|
87
|
+
embedding_dim: Dimension of embedding vectors (default: 384)
|
|
88
|
+
cloud: Cloud provider for serverless (default: aws)
|
|
89
|
+
region: Cloud region for serverless (default: us-east-1)
|
|
90
|
+
metric: Distance metric (default: cosine)
|
|
91
|
+
"""
|
|
92
|
+
if not PINECONE_AVAILABLE:
|
|
93
|
+
raise ImportError(
|
|
94
|
+
"pinecone not installed. Install with: pip install 'alma-memory[pinecone]'"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
self.index_name = index_name
|
|
98
|
+
self.embedding_dim = embedding_dim
|
|
99
|
+
self.cloud = cloud
|
|
100
|
+
self.region = region
|
|
101
|
+
self.metric = metric
|
|
102
|
+
|
|
103
|
+
# Initialize Pinecone client
|
|
104
|
+
self._pc = Pinecone(api_key=api_key)
|
|
105
|
+
|
|
106
|
+
# Create or get index
|
|
107
|
+
self._init_index()
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def from_config(cls, config: Dict[str, Any]) -> "PineconeStorage":
|
|
111
|
+
"""Create instance from configuration."""
|
|
112
|
+
pinecone_config = config.get("pinecone", {})
|
|
113
|
+
|
|
114
|
+
# Support environment variable expansion
|
|
115
|
+
def get_value(key: str, default: Any = None) -> Any:
|
|
116
|
+
value = pinecone_config.get(key, default)
|
|
117
|
+
if (
|
|
118
|
+
isinstance(value, str)
|
|
119
|
+
and value.startswith("${")
|
|
120
|
+
and value.endswith("}")
|
|
121
|
+
):
|
|
122
|
+
env_var = value[2:-1]
|
|
123
|
+
return os.environ.get(env_var, default)
|
|
124
|
+
return value
|
|
125
|
+
|
|
126
|
+
return cls(
|
|
127
|
+
api_key=get_value("api_key", os.environ.get("PINECONE_API_KEY", "")),
|
|
128
|
+
index_name=get_value("index_name", "alma-memory"),
|
|
129
|
+
embedding_dim=int(config.get("embedding_dim", 384)),
|
|
130
|
+
cloud=get_value("cloud", "aws"),
|
|
131
|
+
region=get_value("region", "us-east-1"),
|
|
132
|
+
metric=get_value("metric", "cosine"),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
def _init_index(self):
|
|
136
|
+
"""Initialize or get the Pinecone index."""
|
|
137
|
+
existing_indexes = [idx.name for idx in self._pc.list_indexes()]
|
|
138
|
+
|
|
139
|
+
if self.index_name not in existing_indexes:
|
|
140
|
+
logger.info(f"Creating Pinecone index: {self.index_name}")
|
|
141
|
+
self._pc.create_index(
|
|
142
|
+
name=self.index_name,
|
|
143
|
+
dimension=self.embedding_dim,
|
|
144
|
+
metric=self.metric,
|
|
145
|
+
spec=ServerlessSpec(cloud=self.cloud, region=self.region),
|
|
146
|
+
)
|
|
147
|
+
logger.info(f"Created index: {self.index_name}")
|
|
148
|
+
|
|
149
|
+
self._index = self._pc.Index(self.index_name)
|
|
150
|
+
|
|
151
|
+
def _get_zero_vector(self) -> List[float]:
|
|
152
|
+
"""Get a zero vector for records without embeddings."""
|
|
153
|
+
return [0.0] * self.embedding_dim
|
|
154
|
+
|
|
155
|
+
def _metadata_to_pinecone(self, obj: Any, memory_type: str) -> Dict[str, Any]:
|
|
156
|
+
"""Convert a memory object to Pinecone metadata format."""
|
|
157
|
+
# Pinecone metadata must be flat (no nested dicts/lists of dicts)
|
|
158
|
+
# and values must be strings, numbers, booleans, or lists of strings
|
|
159
|
+
if memory_type == NAMESPACE_HEURISTICS:
|
|
160
|
+
return {
|
|
161
|
+
"agent": obj.agent,
|
|
162
|
+
"project_id": obj.project_id,
|
|
163
|
+
"condition": obj.condition,
|
|
164
|
+
"strategy": obj.strategy,
|
|
165
|
+
"confidence": float(obj.confidence),
|
|
166
|
+
"occurrence_count": int(obj.occurrence_count),
|
|
167
|
+
"success_count": int(obj.success_count),
|
|
168
|
+
"last_validated": obj.last_validated.isoformat()
|
|
169
|
+
if obj.last_validated
|
|
170
|
+
else "",
|
|
171
|
+
"created_at": obj.created_at.isoformat() if obj.created_at else "",
|
|
172
|
+
"metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
|
|
173
|
+
}
|
|
174
|
+
elif memory_type == NAMESPACE_OUTCOMES:
|
|
175
|
+
return {
|
|
176
|
+
"agent": obj.agent,
|
|
177
|
+
"project_id": obj.project_id,
|
|
178
|
+
"task_type": obj.task_type or "general",
|
|
179
|
+
"task_description": obj.task_description,
|
|
180
|
+
"success": obj.success,
|
|
181
|
+
"strategy_used": obj.strategy_used or "",
|
|
182
|
+
"duration_ms": int(obj.duration_ms) if obj.duration_ms else 0,
|
|
183
|
+
"error_message": obj.error_message or "",
|
|
184
|
+
"user_feedback": obj.user_feedback or "",
|
|
185
|
+
"timestamp": obj.timestamp.isoformat() if obj.timestamp else "",
|
|
186
|
+
"metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
|
|
187
|
+
}
|
|
188
|
+
elif memory_type == NAMESPACE_PREFERENCES:
|
|
189
|
+
return {
|
|
190
|
+
"user_id": obj.user_id,
|
|
191
|
+
"category": obj.category or "general",
|
|
192
|
+
"preference": obj.preference,
|
|
193
|
+
"source": obj.source or "unknown",
|
|
194
|
+
"confidence": float(obj.confidence),
|
|
195
|
+
"timestamp": obj.timestamp.isoformat() if obj.timestamp else "",
|
|
196
|
+
"metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
|
|
197
|
+
}
|
|
198
|
+
elif memory_type == NAMESPACE_DOMAIN_KNOWLEDGE:
|
|
199
|
+
return {
|
|
200
|
+
"agent": obj.agent,
|
|
201
|
+
"project_id": obj.project_id,
|
|
202
|
+
"domain": obj.domain or "general",
|
|
203
|
+
"fact": obj.fact,
|
|
204
|
+
"source": obj.source or "unknown",
|
|
205
|
+
"confidence": float(obj.confidence),
|
|
206
|
+
"last_verified": obj.last_verified.isoformat()
|
|
207
|
+
if obj.last_verified
|
|
208
|
+
else "",
|
|
209
|
+
"metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
|
|
210
|
+
}
|
|
211
|
+
elif memory_type == NAMESPACE_ANTI_PATTERNS:
|
|
212
|
+
return {
|
|
213
|
+
"agent": obj.agent,
|
|
214
|
+
"project_id": obj.project_id,
|
|
215
|
+
"pattern": obj.pattern,
|
|
216
|
+
"why_bad": obj.why_bad or "",
|
|
217
|
+
"better_alternative": obj.better_alternative or "",
|
|
218
|
+
"occurrence_count": int(obj.occurrence_count),
|
|
219
|
+
"last_seen": obj.last_seen.isoformat() if obj.last_seen else "",
|
|
220
|
+
"created_at": obj.created_at.isoformat() if obj.created_at else "",
|
|
221
|
+
"metadata_json": json.dumps(obj.metadata) if obj.metadata else "{}",
|
|
222
|
+
}
|
|
223
|
+
return {}
|
|
224
|
+
|
|
225
|
+
def _parse_datetime(self, value: Any) -> Optional[datetime]:
|
|
226
|
+
"""Parse datetime from string."""
|
|
227
|
+
if value is None or value == "":
|
|
228
|
+
return None
|
|
229
|
+
if isinstance(value, datetime):
|
|
230
|
+
return value
|
|
231
|
+
try:
|
|
232
|
+
return datetime.fromisoformat(value.replace("Z", "+00:00"))
|
|
233
|
+
except (ValueError, AttributeError):
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
def _metadata_to_heuristic(self, id: str, metadata: Dict[str, Any]) -> Heuristic:
|
|
237
|
+
"""Convert Pinecone metadata to Heuristic."""
|
|
238
|
+
return Heuristic(
|
|
239
|
+
id=id,
|
|
240
|
+
agent=metadata.get("agent", ""),
|
|
241
|
+
project_id=metadata.get("project_id", ""),
|
|
242
|
+
condition=metadata.get("condition", ""),
|
|
243
|
+
strategy=metadata.get("strategy", ""),
|
|
244
|
+
confidence=float(metadata.get("confidence", 0.0)),
|
|
245
|
+
occurrence_count=int(metadata.get("occurrence_count", 0)),
|
|
246
|
+
success_count=int(metadata.get("success_count", 0)),
|
|
247
|
+
last_validated=self._parse_datetime(metadata.get("last_validated"))
|
|
248
|
+
or datetime.now(timezone.utc),
|
|
249
|
+
created_at=self._parse_datetime(metadata.get("created_at"))
|
|
250
|
+
or datetime.now(timezone.utc),
|
|
251
|
+
metadata=json.loads(metadata.get("metadata_json", "{}")),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
def _metadata_to_outcome(self, id: str, metadata: Dict[str, Any]) -> Outcome:
|
|
255
|
+
"""Convert Pinecone metadata to Outcome."""
|
|
256
|
+
return Outcome(
|
|
257
|
+
id=id,
|
|
258
|
+
agent=metadata.get("agent", ""),
|
|
259
|
+
project_id=metadata.get("project_id", ""),
|
|
260
|
+
task_type=metadata.get("task_type", "general"),
|
|
261
|
+
task_description=metadata.get("task_description", ""),
|
|
262
|
+
success=bool(metadata.get("success", False)),
|
|
263
|
+
strategy_used=metadata.get("strategy_used", ""),
|
|
264
|
+
duration_ms=int(metadata.get("duration_ms", 0)) or None,
|
|
265
|
+
error_message=metadata.get("error_message") or None,
|
|
266
|
+
user_feedback=metadata.get("user_feedback") or None,
|
|
267
|
+
timestamp=self._parse_datetime(metadata.get("timestamp"))
|
|
268
|
+
or datetime.now(timezone.utc),
|
|
269
|
+
metadata=json.loads(metadata.get("metadata_json", "{}")),
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
def _metadata_to_preference(
|
|
273
|
+
self, id: str, metadata: Dict[str, Any]
|
|
274
|
+
) -> UserPreference:
|
|
275
|
+
"""Convert Pinecone metadata to UserPreference."""
|
|
276
|
+
return UserPreference(
|
|
277
|
+
id=id,
|
|
278
|
+
user_id=metadata.get("user_id", ""),
|
|
279
|
+
category=metadata.get("category", "general"),
|
|
280
|
+
preference=metadata.get("preference", ""),
|
|
281
|
+
source=metadata.get("source", "unknown"),
|
|
282
|
+
confidence=float(metadata.get("confidence", 1.0)),
|
|
283
|
+
timestamp=self._parse_datetime(metadata.get("timestamp"))
|
|
284
|
+
or datetime.now(timezone.utc),
|
|
285
|
+
metadata=json.loads(metadata.get("metadata_json", "{}")),
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
def _metadata_to_domain_knowledge(
|
|
289
|
+
self, id: str, metadata: Dict[str, Any]
|
|
290
|
+
) -> DomainKnowledge:
|
|
291
|
+
"""Convert Pinecone metadata to DomainKnowledge."""
|
|
292
|
+
return DomainKnowledge(
|
|
293
|
+
id=id,
|
|
294
|
+
agent=metadata.get("agent", ""),
|
|
295
|
+
project_id=metadata.get("project_id", ""),
|
|
296
|
+
domain=metadata.get("domain", "general"),
|
|
297
|
+
fact=metadata.get("fact", ""),
|
|
298
|
+
source=metadata.get("source", "unknown"),
|
|
299
|
+
confidence=float(metadata.get("confidence", 1.0)),
|
|
300
|
+
last_verified=self._parse_datetime(metadata.get("last_verified"))
|
|
301
|
+
or datetime.now(timezone.utc),
|
|
302
|
+
metadata=json.loads(metadata.get("metadata_json", "{}")),
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def _metadata_to_anti_pattern(
|
|
306
|
+
self, id: str, metadata: Dict[str, Any]
|
|
307
|
+
) -> AntiPattern:
|
|
308
|
+
"""Convert Pinecone metadata to AntiPattern."""
|
|
309
|
+
return AntiPattern(
|
|
310
|
+
id=id,
|
|
311
|
+
agent=metadata.get("agent", ""),
|
|
312
|
+
project_id=metadata.get("project_id", ""),
|
|
313
|
+
pattern=metadata.get("pattern", ""),
|
|
314
|
+
why_bad=metadata.get("why_bad", ""),
|
|
315
|
+
better_alternative=metadata.get("better_alternative", ""),
|
|
316
|
+
occurrence_count=int(metadata.get("occurrence_count", 1)),
|
|
317
|
+
last_seen=self._parse_datetime(metadata.get("last_seen"))
|
|
318
|
+
or datetime.now(timezone.utc),
|
|
319
|
+
created_at=self._parse_datetime(metadata.get("created_at"))
|
|
320
|
+
or datetime.now(timezone.utc),
|
|
321
|
+
metadata=json.loads(metadata.get("metadata_json", "{}")),
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# ==================== WRITE OPERATIONS ====================
|
|
325
|
+
|
|
326
|
+
def save_heuristic(self, heuristic: Heuristic) -> str:
|
|
327
|
+
"""Save a heuristic."""
|
|
328
|
+
vector = heuristic.embedding or self._get_zero_vector()
|
|
329
|
+
metadata = self._metadata_to_pinecone(heuristic, NAMESPACE_HEURISTICS)
|
|
330
|
+
|
|
331
|
+
self._index.upsert(
|
|
332
|
+
vectors=[{"id": heuristic.id, "values": vector, "metadata": metadata}],
|
|
333
|
+
namespace=NAMESPACE_HEURISTICS,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
logger.debug(f"Saved heuristic: {heuristic.id}")
|
|
337
|
+
return heuristic.id
|
|
338
|
+
|
|
339
|
+
def save_outcome(self, outcome: Outcome) -> str:
|
|
340
|
+
"""Save an outcome."""
|
|
341
|
+
vector = outcome.embedding or self._get_zero_vector()
|
|
342
|
+
metadata = self._metadata_to_pinecone(outcome, NAMESPACE_OUTCOMES)
|
|
343
|
+
|
|
344
|
+
self._index.upsert(
|
|
345
|
+
vectors=[{"id": outcome.id, "values": vector, "metadata": metadata}],
|
|
346
|
+
namespace=NAMESPACE_OUTCOMES,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
logger.debug(f"Saved outcome: {outcome.id}")
|
|
350
|
+
return outcome.id
|
|
351
|
+
|
|
352
|
+
def save_user_preference(self, preference: UserPreference) -> str:
|
|
353
|
+
"""Save a user preference."""
|
|
354
|
+
# User preferences don't typically have embeddings
|
|
355
|
+
vector = self._get_zero_vector()
|
|
356
|
+
metadata = self._metadata_to_pinecone(preference, NAMESPACE_PREFERENCES)
|
|
357
|
+
|
|
358
|
+
self._index.upsert(
|
|
359
|
+
vectors=[{"id": preference.id, "values": vector, "metadata": metadata}],
|
|
360
|
+
namespace=NAMESPACE_PREFERENCES,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
logger.debug(f"Saved preference: {preference.id}")
|
|
364
|
+
return preference.id
|
|
365
|
+
|
|
366
|
+
def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
|
|
367
|
+
"""Save domain knowledge."""
|
|
368
|
+
vector = knowledge.embedding or self._get_zero_vector()
|
|
369
|
+
metadata = self._metadata_to_pinecone(knowledge, NAMESPACE_DOMAIN_KNOWLEDGE)
|
|
370
|
+
|
|
371
|
+
self._index.upsert(
|
|
372
|
+
vectors=[{"id": knowledge.id, "values": vector, "metadata": metadata}],
|
|
373
|
+
namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
logger.debug(f"Saved domain knowledge: {knowledge.id}")
|
|
377
|
+
return knowledge.id
|
|
378
|
+
|
|
379
|
+
def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
|
|
380
|
+
"""Save an anti-pattern."""
|
|
381
|
+
vector = anti_pattern.embedding or self._get_zero_vector()
|
|
382
|
+
metadata = self._metadata_to_pinecone(anti_pattern, NAMESPACE_ANTI_PATTERNS)
|
|
383
|
+
|
|
384
|
+
self._index.upsert(
|
|
385
|
+
vectors=[{"id": anti_pattern.id, "values": vector, "metadata": metadata}],
|
|
386
|
+
namespace=NAMESPACE_ANTI_PATTERNS,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
|
|
390
|
+
return anti_pattern.id
|
|
391
|
+
|
|
392
|
+
# ==================== BATCH WRITE OPERATIONS ====================
|
|
393
|
+
|
|
394
|
+
def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
|
|
395
|
+
"""Save multiple heuristics in a batch."""
|
|
396
|
+
if not heuristics:
|
|
397
|
+
return []
|
|
398
|
+
|
|
399
|
+
vectors = []
|
|
400
|
+
for h in heuristics:
|
|
401
|
+
vector = h.embedding or self._get_zero_vector()
|
|
402
|
+
metadata = self._metadata_to_pinecone(h, NAMESPACE_HEURISTICS)
|
|
403
|
+
vectors.append({"id": h.id, "values": vector, "metadata": metadata})
|
|
404
|
+
|
|
405
|
+
# Pinecone supports batches of up to 100 vectors
|
|
406
|
+
batch_size = 100
|
|
407
|
+
for i in range(0, len(vectors), batch_size):
|
|
408
|
+
batch = vectors[i : i + batch_size]
|
|
409
|
+
self._index.upsert(vectors=batch, namespace=NAMESPACE_HEURISTICS)
|
|
410
|
+
|
|
411
|
+
logger.debug(f"Batch saved {len(heuristics)} heuristics")
|
|
412
|
+
return [h.id for h in heuristics]
|
|
413
|
+
|
|
414
|
+
def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
|
|
415
|
+
"""Save multiple outcomes in a batch."""
|
|
416
|
+
if not outcomes:
|
|
417
|
+
return []
|
|
418
|
+
|
|
419
|
+
vectors = []
|
|
420
|
+
for o in outcomes:
|
|
421
|
+
vector = o.embedding or self._get_zero_vector()
|
|
422
|
+
metadata = self._metadata_to_pinecone(o, NAMESPACE_OUTCOMES)
|
|
423
|
+
vectors.append({"id": o.id, "values": vector, "metadata": metadata})
|
|
424
|
+
|
|
425
|
+
batch_size = 100
|
|
426
|
+
for i in range(0, len(vectors), batch_size):
|
|
427
|
+
batch = vectors[i : i + batch_size]
|
|
428
|
+
self._index.upsert(vectors=batch, namespace=NAMESPACE_OUTCOMES)
|
|
429
|
+
|
|
430
|
+
logger.debug(f"Batch saved {len(outcomes)} outcomes")
|
|
431
|
+
return [o.id for o in outcomes]
|
|
432
|
+
|
|
433
|
+
def save_domain_knowledge_batch(
|
|
434
|
+
self, knowledge_items: List[DomainKnowledge]
|
|
435
|
+
) -> List[str]:
|
|
436
|
+
"""Save multiple domain knowledge items in a batch."""
|
|
437
|
+
if not knowledge_items:
|
|
438
|
+
return []
|
|
439
|
+
|
|
440
|
+
vectors = []
|
|
441
|
+
for k in knowledge_items:
|
|
442
|
+
vector = k.embedding or self._get_zero_vector()
|
|
443
|
+
metadata = self._metadata_to_pinecone(k, NAMESPACE_DOMAIN_KNOWLEDGE)
|
|
444
|
+
vectors.append({"id": k.id, "values": vector, "metadata": metadata})
|
|
445
|
+
|
|
446
|
+
batch_size = 100
|
|
447
|
+
for i in range(0, len(vectors), batch_size):
|
|
448
|
+
batch = vectors[i : i + batch_size]
|
|
449
|
+
self._index.upsert(vectors=batch, namespace=NAMESPACE_DOMAIN_KNOWLEDGE)
|
|
450
|
+
|
|
451
|
+
logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
|
|
452
|
+
return [k.id for k in knowledge_items]
|
|
453
|
+
|
|
454
|
+
# ==================== READ OPERATIONS ====================
|
|
455
|
+
|
|
456
|
+
def _build_filter(
|
|
457
|
+
self,
|
|
458
|
+
project_id: Optional[str] = None,
|
|
459
|
+
agent: Optional[str] = None,
|
|
460
|
+
user_id: Optional[str] = None,
|
|
461
|
+
task_type: Optional[str] = None,
|
|
462
|
+
domain: Optional[str] = None,
|
|
463
|
+
category: Optional[str] = None,
|
|
464
|
+
success_only: bool = False,
|
|
465
|
+
min_confidence: float = 0.0,
|
|
466
|
+
) -> Dict[str, Any]:
|
|
467
|
+
"""Build Pinecone metadata filter."""
|
|
468
|
+
conditions = []
|
|
469
|
+
|
|
470
|
+
if project_id:
|
|
471
|
+
conditions.append({"project_id": {"$eq": project_id}})
|
|
472
|
+
|
|
473
|
+
if agent:
|
|
474
|
+
conditions.append({"agent": {"$eq": agent}})
|
|
475
|
+
|
|
476
|
+
if user_id:
|
|
477
|
+
conditions.append({"user_id": {"$eq": user_id}})
|
|
478
|
+
|
|
479
|
+
if task_type:
|
|
480
|
+
conditions.append({"task_type": {"$eq": task_type}})
|
|
481
|
+
|
|
482
|
+
if domain:
|
|
483
|
+
conditions.append({"domain": {"$eq": domain}})
|
|
484
|
+
|
|
485
|
+
if category:
|
|
486
|
+
conditions.append({"category": {"$eq": category}})
|
|
487
|
+
|
|
488
|
+
if success_only:
|
|
489
|
+
conditions.append({"success": {"$eq": True}})
|
|
490
|
+
|
|
491
|
+
if min_confidence > 0.0:
|
|
492
|
+
conditions.append({"confidence": {"$gte": min_confidence}})
|
|
493
|
+
|
|
494
|
+
if not conditions:
|
|
495
|
+
return {}
|
|
496
|
+
|
|
497
|
+
if len(conditions) == 1:
|
|
498
|
+
return conditions[0]
|
|
499
|
+
|
|
500
|
+
return {"$and": conditions}
|
|
501
|
+
|
|
502
|
+
def get_heuristics(
|
|
503
|
+
self,
|
|
504
|
+
project_id: str,
|
|
505
|
+
agent: Optional[str] = None,
|
|
506
|
+
embedding: Optional[List[float]] = None,
|
|
507
|
+
top_k: int = 5,
|
|
508
|
+
min_confidence: float = 0.0,
|
|
509
|
+
) -> List[Heuristic]:
|
|
510
|
+
"""Get heuristics with optional vector search."""
|
|
511
|
+
filter_dict = self._build_filter(
|
|
512
|
+
project_id=project_id,
|
|
513
|
+
agent=agent,
|
|
514
|
+
min_confidence=min_confidence,
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
query_vector = embedding or self._get_zero_vector()
|
|
518
|
+
|
|
519
|
+
results = self._index.query(
|
|
520
|
+
vector=query_vector,
|
|
521
|
+
top_k=top_k,
|
|
522
|
+
namespace=NAMESPACE_HEURISTICS,
|
|
523
|
+
filter=filter_dict if filter_dict else None,
|
|
524
|
+
include_metadata=True,
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
return [
|
|
528
|
+
self._metadata_to_heuristic(match["id"], match.get("metadata", {}))
|
|
529
|
+
for match in results.get("matches", [])
|
|
530
|
+
]
|
|
531
|
+
|
|
532
|
+
def get_outcomes(
|
|
533
|
+
self,
|
|
534
|
+
project_id: str,
|
|
535
|
+
agent: Optional[str] = None,
|
|
536
|
+
task_type: Optional[str] = None,
|
|
537
|
+
embedding: Optional[List[float]] = None,
|
|
538
|
+
top_k: int = 5,
|
|
539
|
+
success_only: bool = False,
|
|
540
|
+
) -> List[Outcome]:
|
|
541
|
+
"""Get outcomes with optional vector search."""
|
|
542
|
+
filter_dict = self._build_filter(
|
|
543
|
+
project_id=project_id,
|
|
544
|
+
agent=agent,
|
|
545
|
+
task_type=task_type,
|
|
546
|
+
success_only=success_only,
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
query_vector = embedding or self._get_zero_vector()
|
|
550
|
+
|
|
551
|
+
results = self._index.query(
|
|
552
|
+
vector=query_vector,
|
|
553
|
+
top_k=top_k,
|
|
554
|
+
namespace=NAMESPACE_OUTCOMES,
|
|
555
|
+
filter=filter_dict if filter_dict else None,
|
|
556
|
+
include_metadata=True,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
return [
|
|
560
|
+
self._metadata_to_outcome(match["id"], match.get("metadata", {}))
|
|
561
|
+
for match in results.get("matches", [])
|
|
562
|
+
]
|
|
563
|
+
|
|
564
|
+
def get_user_preferences(
|
|
565
|
+
self,
|
|
566
|
+
user_id: str,
|
|
567
|
+
category: Optional[str] = None,
|
|
568
|
+
) -> List[UserPreference]:
|
|
569
|
+
"""Get user preferences."""
|
|
570
|
+
filter_dict = self._build_filter(
|
|
571
|
+
user_id=user_id,
|
|
572
|
+
category=category,
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# For preferences, we use a zero vector query since we filter by metadata
|
|
576
|
+
query_vector = self._get_zero_vector()
|
|
577
|
+
|
|
578
|
+
results = self._index.query(
|
|
579
|
+
vector=query_vector,
|
|
580
|
+
top_k=100, # Get all preferences for user
|
|
581
|
+
namespace=NAMESPACE_PREFERENCES,
|
|
582
|
+
filter=filter_dict if filter_dict else None,
|
|
583
|
+
include_metadata=True,
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
return [
|
|
587
|
+
self._metadata_to_preference(match["id"], match.get("metadata", {}))
|
|
588
|
+
for match in results.get("matches", [])
|
|
589
|
+
]
|
|
590
|
+
|
|
591
|
+
def get_domain_knowledge(
|
|
592
|
+
self,
|
|
593
|
+
project_id: str,
|
|
594
|
+
agent: Optional[str] = None,
|
|
595
|
+
domain: Optional[str] = None,
|
|
596
|
+
embedding: Optional[List[float]] = None,
|
|
597
|
+
top_k: int = 5,
|
|
598
|
+
) -> List[DomainKnowledge]:
|
|
599
|
+
"""Get domain knowledge with optional vector search."""
|
|
600
|
+
filter_dict = self._build_filter(
|
|
601
|
+
project_id=project_id,
|
|
602
|
+
agent=agent,
|
|
603
|
+
domain=domain,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
query_vector = embedding or self._get_zero_vector()
|
|
607
|
+
|
|
608
|
+
results = self._index.query(
|
|
609
|
+
vector=query_vector,
|
|
610
|
+
top_k=top_k,
|
|
611
|
+
namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
|
|
612
|
+
filter=filter_dict if filter_dict else None,
|
|
613
|
+
include_metadata=True,
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
return [
|
|
617
|
+
self._metadata_to_domain_knowledge(match["id"], match.get("metadata", {}))
|
|
618
|
+
for match in results.get("matches", [])
|
|
619
|
+
]
|
|
620
|
+
|
|
621
|
+
def get_anti_patterns(
|
|
622
|
+
self,
|
|
623
|
+
project_id: str,
|
|
624
|
+
agent: Optional[str] = None,
|
|
625
|
+
embedding: Optional[List[float]] = None,
|
|
626
|
+
top_k: int = 5,
|
|
627
|
+
) -> List[AntiPattern]:
|
|
628
|
+
"""Get anti-patterns with optional vector search."""
|
|
629
|
+
filter_dict = self._build_filter(
|
|
630
|
+
project_id=project_id,
|
|
631
|
+
agent=agent,
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
query_vector = embedding or self._get_zero_vector()
|
|
635
|
+
|
|
636
|
+
results = self._index.query(
|
|
637
|
+
vector=query_vector,
|
|
638
|
+
top_k=top_k,
|
|
639
|
+
namespace=NAMESPACE_ANTI_PATTERNS,
|
|
640
|
+
filter=filter_dict if filter_dict else None,
|
|
641
|
+
include_metadata=True,
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
return [
|
|
645
|
+
self._metadata_to_anti_pattern(match["id"], match.get("metadata", {}))
|
|
646
|
+
for match in results.get("matches", [])
|
|
647
|
+
]
|
|
648
|
+
|
|
649
|
+
# ==================== MULTI-AGENT MEMORY SHARING ====================
|
|
650
|
+
|
|
651
|
+
def get_heuristics_for_agents(
|
|
652
|
+
self,
|
|
653
|
+
project_id: str,
|
|
654
|
+
agents: List[str],
|
|
655
|
+
embedding: Optional[List[float]] = None,
|
|
656
|
+
top_k: int = 5,
|
|
657
|
+
min_confidence: float = 0.0,
|
|
658
|
+
) -> List[Heuristic]:
|
|
659
|
+
"""Get heuristics from multiple agents using $in filter."""
|
|
660
|
+
if not agents:
|
|
661
|
+
return []
|
|
662
|
+
|
|
663
|
+
conditions = [
|
|
664
|
+
{"project_id": {"$eq": project_id}},
|
|
665
|
+
{"agent": {"$in": agents}},
|
|
666
|
+
]
|
|
667
|
+
|
|
668
|
+
if min_confidence > 0.0:
|
|
669
|
+
conditions.append({"confidence": {"$gte": min_confidence}})
|
|
670
|
+
|
|
671
|
+
filter_dict = {"$and": conditions}
|
|
672
|
+
query_vector = embedding or self._get_zero_vector()
|
|
673
|
+
|
|
674
|
+
results = self._index.query(
|
|
675
|
+
vector=query_vector,
|
|
676
|
+
top_k=top_k * len(agents),
|
|
677
|
+
namespace=NAMESPACE_HEURISTICS,
|
|
678
|
+
filter=filter_dict,
|
|
679
|
+
include_metadata=True,
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
return [
|
|
683
|
+
self._metadata_to_heuristic(match["id"], match.get("metadata", {}))
|
|
684
|
+
for match in results.get("matches", [])
|
|
685
|
+
]
|
|
686
|
+
|
|
687
|
+
def get_outcomes_for_agents(
|
|
688
|
+
self,
|
|
689
|
+
project_id: str,
|
|
690
|
+
agents: List[str],
|
|
691
|
+
task_type: Optional[str] = None,
|
|
692
|
+
embedding: Optional[List[float]] = None,
|
|
693
|
+
top_k: int = 5,
|
|
694
|
+
success_only: bool = False,
|
|
695
|
+
) -> List[Outcome]:
|
|
696
|
+
"""Get outcomes from multiple agents using $in filter."""
|
|
697
|
+
if not agents:
|
|
698
|
+
return []
|
|
699
|
+
|
|
700
|
+
conditions = [
|
|
701
|
+
{"project_id": {"$eq": project_id}},
|
|
702
|
+
{"agent": {"$in": agents}},
|
|
703
|
+
]
|
|
704
|
+
|
|
705
|
+
if task_type:
|
|
706
|
+
conditions.append({"task_type": {"$eq": task_type}})
|
|
707
|
+
|
|
708
|
+
if success_only:
|
|
709
|
+
conditions.append({"success": {"$eq": True}})
|
|
710
|
+
|
|
711
|
+
filter_dict = {"$and": conditions}
|
|
712
|
+
query_vector = embedding or self._get_zero_vector()
|
|
713
|
+
|
|
714
|
+
results = self._index.query(
|
|
715
|
+
vector=query_vector,
|
|
716
|
+
top_k=top_k * len(agents),
|
|
717
|
+
namespace=NAMESPACE_OUTCOMES,
|
|
718
|
+
filter=filter_dict,
|
|
719
|
+
include_metadata=True,
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
return [
|
|
723
|
+
self._metadata_to_outcome(match["id"], match.get("metadata", {}))
|
|
724
|
+
for match in results.get("matches", [])
|
|
725
|
+
]
|
|
726
|
+
|
|
727
|
+
def get_domain_knowledge_for_agents(
|
|
728
|
+
self,
|
|
729
|
+
project_id: str,
|
|
730
|
+
agents: List[str],
|
|
731
|
+
domain: Optional[str] = None,
|
|
732
|
+
embedding: Optional[List[float]] = None,
|
|
733
|
+
top_k: int = 5,
|
|
734
|
+
) -> List[DomainKnowledge]:
|
|
735
|
+
"""Get domain knowledge from multiple agents using $in filter."""
|
|
736
|
+
if not agents:
|
|
737
|
+
return []
|
|
738
|
+
|
|
739
|
+
conditions = [
|
|
740
|
+
{"project_id": {"$eq": project_id}},
|
|
741
|
+
{"agent": {"$in": agents}},
|
|
742
|
+
]
|
|
743
|
+
|
|
744
|
+
if domain:
|
|
745
|
+
conditions.append({"domain": {"$eq": domain}})
|
|
746
|
+
|
|
747
|
+
filter_dict = {"$and": conditions}
|
|
748
|
+
query_vector = embedding or self._get_zero_vector()
|
|
749
|
+
|
|
750
|
+
results = self._index.query(
|
|
751
|
+
vector=query_vector,
|
|
752
|
+
top_k=top_k * len(agents),
|
|
753
|
+
namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
|
|
754
|
+
filter=filter_dict,
|
|
755
|
+
include_metadata=True,
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
return [
|
|
759
|
+
self._metadata_to_domain_knowledge(match["id"], match.get("metadata", {}))
|
|
760
|
+
for match in results.get("matches", [])
|
|
761
|
+
]
|
|
762
|
+
|
|
763
|
+
def get_anti_patterns_for_agents(
|
|
764
|
+
self,
|
|
765
|
+
project_id: str,
|
|
766
|
+
agents: List[str],
|
|
767
|
+
embedding: Optional[List[float]] = None,
|
|
768
|
+
top_k: int = 5,
|
|
769
|
+
) -> List[AntiPattern]:
|
|
770
|
+
"""Get anti-patterns from multiple agents using $in filter."""
|
|
771
|
+
if not agents:
|
|
772
|
+
return []
|
|
773
|
+
|
|
774
|
+
conditions = [
|
|
775
|
+
{"project_id": {"$eq": project_id}},
|
|
776
|
+
{"agent": {"$in": agents}},
|
|
777
|
+
]
|
|
778
|
+
|
|
779
|
+
filter_dict = {"$and": conditions}
|
|
780
|
+
query_vector = embedding or self._get_zero_vector()
|
|
781
|
+
|
|
782
|
+
results = self._index.query(
|
|
783
|
+
vector=query_vector,
|
|
784
|
+
top_k=top_k * len(agents),
|
|
785
|
+
namespace=NAMESPACE_ANTI_PATTERNS,
|
|
786
|
+
filter=filter_dict,
|
|
787
|
+
include_metadata=True,
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
return [
|
|
791
|
+
self._metadata_to_anti_pattern(match["id"], match.get("metadata", {}))
|
|
792
|
+
for match in results.get("matches", [])
|
|
793
|
+
]
|
|
794
|
+
|
|
795
|
+
# ==================== UPDATE OPERATIONS ====================
|
|
796
|
+
|
|
797
|
+
def update_heuristic(
|
|
798
|
+
self,
|
|
799
|
+
heuristic_id: str,
|
|
800
|
+
updates: Dict[str, Any],
|
|
801
|
+
) -> bool:
|
|
802
|
+
"""Update a heuristic's fields."""
|
|
803
|
+
if not updates:
|
|
804
|
+
return False
|
|
805
|
+
|
|
806
|
+
# Fetch existing record
|
|
807
|
+
results = self._index.fetch(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
|
|
808
|
+
|
|
809
|
+
if heuristic_id not in results.get("vectors", {}):
|
|
810
|
+
return False
|
|
811
|
+
|
|
812
|
+
existing = results["vectors"][heuristic_id]
|
|
813
|
+
metadata = existing.get("metadata", {})
|
|
814
|
+
|
|
815
|
+
# Apply updates to metadata
|
|
816
|
+
for key, value in updates.items():
|
|
817
|
+
if key == "metadata":
|
|
818
|
+
metadata["metadata_json"] = json.dumps(value) if value else "{}"
|
|
819
|
+
elif isinstance(value, datetime):
|
|
820
|
+
metadata[key] = value.isoformat()
|
|
821
|
+
else:
|
|
822
|
+
metadata[key] = value
|
|
823
|
+
|
|
824
|
+
# Upsert with updated metadata
|
|
825
|
+
self._index.upsert(
|
|
826
|
+
vectors=[
|
|
827
|
+
{
|
|
828
|
+
"id": heuristic_id,
|
|
829
|
+
"values": existing.get("values", self._get_zero_vector()),
|
|
830
|
+
"metadata": metadata,
|
|
831
|
+
}
|
|
832
|
+
],
|
|
833
|
+
namespace=NAMESPACE_HEURISTICS,
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
return True
|
|
837
|
+
|
|
838
|
+
def increment_heuristic_occurrence(
|
|
839
|
+
self,
|
|
840
|
+
heuristic_id: str,
|
|
841
|
+
success: bool,
|
|
842
|
+
) -> bool:
|
|
843
|
+
"""Increment heuristic occurrence count."""
|
|
844
|
+
# Fetch existing record
|
|
845
|
+
results = self._index.fetch(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
|
|
846
|
+
|
|
847
|
+
if heuristic_id not in results.get("vectors", {}):
|
|
848
|
+
return False
|
|
849
|
+
|
|
850
|
+
existing = results["vectors"][heuristic_id]
|
|
851
|
+
metadata = existing.get("metadata", {})
|
|
852
|
+
|
|
853
|
+
# Increment counts
|
|
854
|
+
metadata["occurrence_count"] = int(metadata.get("occurrence_count", 0)) + 1
|
|
855
|
+
if success:
|
|
856
|
+
metadata["success_count"] = int(metadata.get("success_count", 0)) + 1
|
|
857
|
+
metadata["last_validated"] = datetime.now(timezone.utc).isoformat()
|
|
858
|
+
|
|
859
|
+
# Upsert with updated metadata
|
|
860
|
+
self._index.upsert(
|
|
861
|
+
vectors=[
|
|
862
|
+
{
|
|
863
|
+
"id": heuristic_id,
|
|
864
|
+
"values": existing.get("values", self._get_zero_vector()),
|
|
865
|
+
"metadata": metadata,
|
|
866
|
+
}
|
|
867
|
+
],
|
|
868
|
+
namespace=NAMESPACE_HEURISTICS,
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
return True
|
|
872
|
+
|
|
873
|
+
def update_heuristic_confidence(
|
|
874
|
+
self,
|
|
875
|
+
heuristic_id: str,
|
|
876
|
+
new_confidence: float,
|
|
877
|
+
) -> bool:
|
|
878
|
+
"""Update a heuristic's confidence value."""
|
|
879
|
+
return self.update_heuristic(heuristic_id, {"confidence": new_confidence})
|
|
880
|
+
|
|
881
|
+
def update_knowledge_confidence(
|
|
882
|
+
self,
|
|
883
|
+
knowledge_id: str,
|
|
884
|
+
new_confidence: float,
|
|
885
|
+
) -> bool:
|
|
886
|
+
"""Update domain knowledge confidence value."""
|
|
887
|
+
# Fetch existing record
|
|
888
|
+
results = self._index.fetch(
|
|
889
|
+
ids=[knowledge_id], namespace=NAMESPACE_DOMAIN_KNOWLEDGE
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
if knowledge_id not in results.get("vectors", {}):
|
|
893
|
+
return False
|
|
894
|
+
|
|
895
|
+
existing = results["vectors"][knowledge_id]
|
|
896
|
+
metadata = existing.get("metadata", {})
|
|
897
|
+
metadata["confidence"] = new_confidence
|
|
898
|
+
|
|
899
|
+
# Upsert with updated metadata
|
|
900
|
+
self._index.upsert(
|
|
901
|
+
vectors=[
|
|
902
|
+
{
|
|
903
|
+
"id": knowledge_id,
|
|
904
|
+
"values": existing.get("values", self._get_zero_vector()),
|
|
905
|
+
"metadata": metadata,
|
|
906
|
+
}
|
|
907
|
+
],
|
|
908
|
+
namespace=NAMESPACE_DOMAIN_KNOWLEDGE,
|
|
909
|
+
)
|
|
910
|
+
|
|
911
|
+
return True
|
|
912
|
+
|
|
913
|
+
# ==================== DELETE OPERATIONS ====================
|
|
914
|
+
|
|
915
|
+
def delete_heuristic(self, heuristic_id: str) -> bool:
|
|
916
|
+
"""Delete a heuristic by ID."""
|
|
917
|
+
try:
|
|
918
|
+
self._index.delete(ids=[heuristic_id], namespace=NAMESPACE_HEURISTICS)
|
|
919
|
+
logger.debug(f"Deleted heuristic: {heuristic_id}")
|
|
920
|
+
return True
|
|
921
|
+
except Exception as e:
|
|
922
|
+
logger.error(f"Failed to delete heuristic {heuristic_id}: {e}")
|
|
923
|
+
return False
|
|
924
|
+
|
|
925
|
+
def delete_outcome(self, outcome_id: str) -> bool:
|
|
926
|
+
"""Delete an outcome by ID."""
|
|
927
|
+
try:
|
|
928
|
+
self._index.delete(ids=[outcome_id], namespace=NAMESPACE_OUTCOMES)
|
|
929
|
+
logger.debug(f"Deleted outcome: {outcome_id}")
|
|
930
|
+
return True
|
|
931
|
+
except Exception as e:
|
|
932
|
+
logger.error(f"Failed to delete outcome {outcome_id}: {e}")
|
|
933
|
+
return False
|
|
934
|
+
|
|
935
|
+
def delete_domain_knowledge(self, knowledge_id: str) -> bool:
|
|
936
|
+
"""Delete domain knowledge by ID."""
|
|
937
|
+
try:
|
|
938
|
+
self._index.delete(ids=[knowledge_id], namespace=NAMESPACE_DOMAIN_KNOWLEDGE)
|
|
939
|
+
logger.debug(f"Deleted domain knowledge: {knowledge_id}")
|
|
940
|
+
return True
|
|
941
|
+
except Exception as e:
|
|
942
|
+
logger.error(f"Failed to delete domain knowledge {knowledge_id}: {e}")
|
|
943
|
+
return False
|
|
944
|
+
|
|
945
|
+
def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
|
|
946
|
+
"""Delete an anti-pattern by ID."""
|
|
947
|
+
try:
|
|
948
|
+
self._index.delete(ids=[anti_pattern_id], namespace=NAMESPACE_ANTI_PATTERNS)
|
|
949
|
+
logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
|
|
950
|
+
return True
|
|
951
|
+
except Exception as e:
|
|
952
|
+
logger.error(f"Failed to delete anti-pattern {anti_pattern_id}: {e}")
|
|
953
|
+
return False
|
|
954
|
+
|
|
955
|
+
def delete_outcomes_older_than(
|
|
956
|
+
self,
|
|
957
|
+
project_id: str,
|
|
958
|
+
older_than: datetime,
|
|
959
|
+
agent: Optional[str] = None,
|
|
960
|
+
) -> int:
|
|
961
|
+
"""Delete old outcomes.
|
|
962
|
+
|
|
963
|
+
Note: Pinecone doesn't support bulk delete by filter directly,
|
|
964
|
+
so we query first then delete by IDs.
|
|
965
|
+
"""
|
|
966
|
+
filter_dict = self._build_filter(project_id=project_id, agent=agent)
|
|
967
|
+
query_vector = self._get_zero_vector()
|
|
968
|
+
|
|
969
|
+
# Query to get all matching IDs
|
|
970
|
+
results = self._index.query(
|
|
971
|
+
vector=query_vector,
|
|
972
|
+
top_k=10000, # Large number to get all
|
|
973
|
+
namespace=NAMESPACE_OUTCOMES,
|
|
974
|
+
filter=filter_dict if filter_dict else None,
|
|
975
|
+
include_metadata=True,
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
older_than_iso = older_than.isoformat()
|
|
979
|
+
ids_to_delete = []
|
|
980
|
+
|
|
981
|
+
for match in results.get("matches", []):
|
|
982
|
+
timestamp = match.get("metadata", {}).get("timestamp", "")
|
|
983
|
+
if timestamp and timestamp < older_than_iso:
|
|
984
|
+
ids_to_delete.append(match["id"])
|
|
985
|
+
|
|
986
|
+
if ids_to_delete:
|
|
987
|
+
# Delete in batches of 1000
|
|
988
|
+
batch_size = 1000
|
|
989
|
+
for i in range(0, len(ids_to_delete), batch_size):
|
|
990
|
+
batch = ids_to_delete[i : i + batch_size]
|
|
991
|
+
self._index.delete(ids=batch, namespace=NAMESPACE_OUTCOMES)
|
|
992
|
+
|
|
993
|
+
deleted = len(ids_to_delete)
|
|
994
|
+
logger.info(f"Deleted {deleted} old outcomes")
|
|
995
|
+
return deleted
|
|
996
|
+
|
|
997
|
+
def delete_low_confidence_heuristics(
|
|
998
|
+
self,
|
|
999
|
+
project_id: str,
|
|
1000
|
+
below_confidence: float,
|
|
1001
|
+
agent: Optional[str] = None,
|
|
1002
|
+
) -> int:
|
|
1003
|
+
"""Delete low-confidence heuristics."""
|
|
1004
|
+
filter_dict = self._build_filter(project_id=project_id, agent=agent)
|
|
1005
|
+
query_vector = self._get_zero_vector()
|
|
1006
|
+
|
|
1007
|
+
# Query to get all matching IDs
|
|
1008
|
+
results = self._index.query(
|
|
1009
|
+
vector=query_vector,
|
|
1010
|
+
top_k=10000,
|
|
1011
|
+
namespace=NAMESPACE_HEURISTICS,
|
|
1012
|
+
filter=filter_dict if filter_dict else None,
|
|
1013
|
+
include_metadata=True,
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
ids_to_delete = []
|
|
1017
|
+
for match in results.get("matches", []):
|
|
1018
|
+
confidence = float(match.get("metadata", {}).get("confidence", 0.0))
|
|
1019
|
+
if confidence < below_confidence:
|
|
1020
|
+
ids_to_delete.append(match["id"])
|
|
1021
|
+
|
|
1022
|
+
if ids_to_delete:
|
|
1023
|
+
batch_size = 1000
|
|
1024
|
+
for i in range(0, len(ids_to_delete), batch_size):
|
|
1025
|
+
batch = ids_to_delete[i : i + batch_size]
|
|
1026
|
+
self._index.delete(ids=batch, namespace=NAMESPACE_HEURISTICS)
|
|
1027
|
+
|
|
1028
|
+
deleted = len(ids_to_delete)
|
|
1029
|
+
logger.info(f"Deleted {deleted} low-confidence heuristics")
|
|
1030
|
+
return deleted
|
|
1031
|
+
|
|
1032
|
+
# ==================== STATS ====================
|
|
1033
|
+
|
|
1034
|
+
def get_stats(
|
|
1035
|
+
self,
|
|
1036
|
+
project_id: str,
|
|
1037
|
+
agent: Optional[str] = None,
|
|
1038
|
+
) -> Dict[str, Any]:
|
|
1039
|
+
"""Get memory statistics."""
|
|
1040
|
+
stats = {
|
|
1041
|
+
"project_id": project_id,
|
|
1042
|
+
"agent": agent,
|
|
1043
|
+
"storage_type": "pinecone",
|
|
1044
|
+
"index_name": self.index_name,
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
# Get index stats
|
|
1048
|
+
try:
|
|
1049
|
+
index_stats = self._index.describe_index_stats()
|
|
1050
|
+
|
|
1051
|
+
# Count by namespace
|
|
1052
|
+
namespaces = index_stats.get("namespaces", {})
|
|
1053
|
+
|
|
1054
|
+
stats["heuristics_count"] = namespaces.get(NAMESPACE_HEURISTICS, {}).get(
|
|
1055
|
+
"vector_count", 0
|
|
1056
|
+
)
|
|
1057
|
+
stats["outcomes_count"] = namespaces.get(NAMESPACE_OUTCOMES, {}).get(
|
|
1058
|
+
"vector_count", 0
|
|
1059
|
+
)
|
|
1060
|
+
stats["domain_knowledge_count"] = namespaces.get(
|
|
1061
|
+
NAMESPACE_DOMAIN_KNOWLEDGE, {}
|
|
1062
|
+
).get("vector_count", 0)
|
|
1063
|
+
stats["anti_patterns_count"] = namespaces.get(
|
|
1064
|
+
NAMESPACE_ANTI_PATTERNS, {}
|
|
1065
|
+
).get("vector_count", 0)
|
|
1066
|
+
stats["preferences_count"] = namespaces.get(NAMESPACE_PREFERENCES, {}).get(
|
|
1067
|
+
"vector_count", 0
|
|
1068
|
+
)
|
|
1069
|
+
stats["total_count"] = index_stats.get("total_vector_count", 0)
|
|
1070
|
+
|
|
1071
|
+
except Exception as e:
|
|
1072
|
+
logger.warning(f"Failed to get index stats: {e}")
|
|
1073
|
+
stats["error"] = str(e)
|
|
1074
|
+
|
|
1075
|
+
return stats
|
|
1076
|
+
|
|
1077
|
+
def close(self):
|
|
1078
|
+
"""Close the Pinecone connection (no-op for Pinecone client)."""
|
|
1079
|
+
# Pinecone client doesn't require explicit cleanup
|
|
1080
|
+
pass
|