powermem 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- powermem/__init__.py +103 -0
- powermem/agent/__init__.py +35 -0
- powermem/agent/abstract/__init__.py +22 -0
- powermem/agent/abstract/collaboration.py +259 -0
- powermem/agent/abstract/context.py +187 -0
- powermem/agent/abstract/manager.py +232 -0
- powermem/agent/abstract/permission.py +217 -0
- powermem/agent/abstract/privacy.py +267 -0
- powermem/agent/abstract/scope.py +199 -0
- powermem/agent/agent.py +791 -0
- powermem/agent/components/__init__.py +18 -0
- powermem/agent/components/collaboration_coordinator.py +645 -0
- powermem/agent/components/permission_controller.py +586 -0
- powermem/agent/components/privacy_protector.py +767 -0
- powermem/agent/components/scope_controller.py +685 -0
- powermem/agent/factories/__init__.py +16 -0
- powermem/agent/factories/agent_factory.py +266 -0
- powermem/agent/factories/config_factory.py +308 -0
- powermem/agent/factories/memory_factory.py +229 -0
- powermem/agent/implementations/__init__.py +16 -0
- powermem/agent/implementations/hybrid.py +728 -0
- powermem/agent/implementations/multi_agent.py +1040 -0
- powermem/agent/implementations/multi_user.py +1020 -0
- powermem/agent/types.py +53 -0
- powermem/agent/wrappers/__init__.py +14 -0
- powermem/agent/wrappers/agent_memory_wrapper.py +427 -0
- powermem/agent/wrappers/compatibility_wrapper.py +520 -0
- powermem/config_loader.py +318 -0
- powermem/configs.py +249 -0
- powermem/core/__init__.py +19 -0
- powermem/core/async_memory.py +1493 -0
- powermem/core/audit.py +258 -0
- powermem/core/base.py +165 -0
- powermem/core/memory.py +1567 -0
- powermem/core/setup.py +162 -0
- powermem/core/telemetry.py +215 -0
- powermem/integrations/__init__.py +17 -0
- powermem/integrations/embeddings/__init__.py +13 -0
- powermem/integrations/embeddings/aws_bedrock.py +100 -0
- powermem/integrations/embeddings/azure_openai.py +55 -0
- powermem/integrations/embeddings/base.py +31 -0
- powermem/integrations/embeddings/config/base.py +132 -0
- powermem/integrations/embeddings/configs.py +31 -0
- powermem/integrations/embeddings/factory.py +48 -0
- powermem/integrations/embeddings/gemini.py +39 -0
- powermem/integrations/embeddings/huggingface.py +41 -0
- powermem/integrations/embeddings/langchain.py +35 -0
- powermem/integrations/embeddings/lmstudio.py +29 -0
- powermem/integrations/embeddings/mock.py +11 -0
- powermem/integrations/embeddings/ollama.py +53 -0
- powermem/integrations/embeddings/openai.py +49 -0
- powermem/integrations/embeddings/qwen.py +102 -0
- powermem/integrations/embeddings/together.py +31 -0
- powermem/integrations/embeddings/vertexai.py +54 -0
- powermem/integrations/llm/__init__.py +18 -0
- powermem/integrations/llm/anthropic.py +87 -0
- powermem/integrations/llm/base.py +132 -0
- powermem/integrations/llm/config/anthropic.py +56 -0
- powermem/integrations/llm/config/azure.py +56 -0
- powermem/integrations/llm/config/base.py +62 -0
- powermem/integrations/llm/config/deepseek.py +56 -0
- powermem/integrations/llm/config/ollama.py +56 -0
- powermem/integrations/llm/config/openai.py +79 -0
- powermem/integrations/llm/config/qwen.py +68 -0
- powermem/integrations/llm/config/qwen_asr.py +46 -0
- powermem/integrations/llm/config/vllm.py +56 -0
- powermem/integrations/llm/configs.py +26 -0
- powermem/integrations/llm/deepseek.py +106 -0
- powermem/integrations/llm/factory.py +118 -0
- powermem/integrations/llm/gemini.py +201 -0
- powermem/integrations/llm/langchain.py +65 -0
- powermem/integrations/llm/ollama.py +106 -0
- powermem/integrations/llm/openai.py +166 -0
- powermem/integrations/llm/openai_structured.py +80 -0
- powermem/integrations/llm/qwen.py +207 -0
- powermem/integrations/llm/qwen_asr.py +171 -0
- powermem/integrations/llm/vllm.py +106 -0
- powermem/integrations/rerank/__init__.py +20 -0
- powermem/integrations/rerank/base.py +43 -0
- powermem/integrations/rerank/config/__init__.py +7 -0
- powermem/integrations/rerank/config/base.py +27 -0
- powermem/integrations/rerank/configs.py +23 -0
- powermem/integrations/rerank/factory.py +68 -0
- powermem/integrations/rerank/qwen.py +159 -0
- powermem/intelligence/__init__.py +17 -0
- powermem/intelligence/ebbinghaus_algorithm.py +354 -0
- powermem/intelligence/importance_evaluator.py +361 -0
- powermem/intelligence/intelligent_memory_manager.py +284 -0
- powermem/intelligence/manager.py +148 -0
- powermem/intelligence/plugin.py +229 -0
- powermem/prompts/__init__.py +29 -0
- powermem/prompts/graph/graph_prompts.py +217 -0
- powermem/prompts/graph/graph_tools_prompts.py +469 -0
- powermem/prompts/importance_evaluation.py +246 -0
- powermem/prompts/intelligent_memory_prompts.py +163 -0
- powermem/prompts/templates.py +193 -0
- powermem/storage/__init__.py +14 -0
- powermem/storage/adapter.py +896 -0
- powermem/storage/base.py +109 -0
- powermem/storage/config/base.py +13 -0
- powermem/storage/config/oceanbase.py +58 -0
- powermem/storage/config/pgvector.py +52 -0
- powermem/storage/config/sqlite.py +27 -0
- powermem/storage/configs.py +159 -0
- powermem/storage/factory.py +59 -0
- powermem/storage/migration_manager.py +438 -0
- powermem/storage/oceanbase/__init__.py +8 -0
- powermem/storage/oceanbase/constants.py +162 -0
- powermem/storage/oceanbase/oceanbase.py +1384 -0
- powermem/storage/oceanbase/oceanbase_graph.py +1441 -0
- powermem/storage/pgvector/__init__.py +7 -0
- powermem/storage/pgvector/pgvector.py +420 -0
- powermem/storage/sqlite/__init__.py +0 -0
- powermem/storage/sqlite/sqlite.py +218 -0
- powermem/storage/sqlite/sqlite_vector_store.py +311 -0
- powermem/utils/__init__.py +35 -0
- powermem/utils/utils.py +605 -0
- powermem/version.py +23 -0
- powermem-0.1.0.dist-info/METADATA +187 -0
- powermem-0.1.0.dist-info/RECORD +123 -0
- powermem-0.1.0.dist-info/WHEEL +5 -0
- powermem-0.1.0.dist-info/licenses/LICENSE +206 -0
- powermem-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,896 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Storage adapter for Memory class
|
|
3
|
+
|
|
4
|
+
This module provides an adapter that bridges the VectorStoreBase interface
|
|
5
|
+
with the interface expected by the Memory class.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import uuid
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
from powermem.storage.base import VectorStoreBase
|
|
14
|
+
from powermem.utils.utils import serialize_datetime
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StorageAdapter:
|
|
20
|
+
"""Adapter that bridges VectorStoreBase interface with Memory class expectations."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, vector_store: VectorStoreBase, embedding_service=None):
|
|
23
|
+
"""Initialize the adapter with a vector store and embedding service."""
|
|
24
|
+
self.vector_store = vector_store
|
|
25
|
+
self.embedding_service = embedding_service
|
|
26
|
+
# get collection name from vector store attribute collection_name
|
|
27
|
+
self.collection_name = getattr(vector_store, 'collection_name', 'memories')
|
|
28
|
+
|
|
29
|
+
# Sub stores support (optional, for multi-table routing)
|
|
30
|
+
self.sub_stores: Dict[str, 'SubStoreConfig'] = {}
|
|
31
|
+
self.migration_manager = None
|
|
32
|
+
|
|
33
|
+
# Ensure collection exists (will be created with actual vector size when first vector is added)
|
|
34
|
+
# self.vector_store.create_col(self.collection_name, vector_size=1536, distance="cosine")
|
|
35
|
+
|
|
36
|
+
def add_memory(self, memory_data: Dict[str, Any]) -> int:
|
|
37
|
+
"""Add a memory to the store."""
|
|
38
|
+
# ID will be generated using Snowflake algorithm before insertion
|
|
39
|
+
|
|
40
|
+
# Create vector from content using embedding service
|
|
41
|
+
content = memory_data.get("content", "")
|
|
42
|
+
metadata = memory_data.get("metadata", {})
|
|
43
|
+
|
|
44
|
+
# Route to target store (main or sub store)
|
|
45
|
+
target_store = self._route_to_store(metadata)
|
|
46
|
+
|
|
47
|
+
# Check if embedding is already provided (preferred way)
|
|
48
|
+
vector = memory_data.get("embedding")
|
|
49
|
+
|
|
50
|
+
if vector is None:
|
|
51
|
+
# No embedding provided, generate using embedding service
|
|
52
|
+
if self.embedding_service:
|
|
53
|
+
try:
|
|
54
|
+
vector = self.embedding_service.embed(content, memory_action="add")
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logger.warning(f"Failed to generate embedding, using mock vector: {e}")
|
|
57
|
+
vector = [0.1] * 1536 # Use 1536 dimensions for OceanBase compatibility
|
|
58
|
+
else:
|
|
59
|
+
# No embedding service available, use mock vector
|
|
60
|
+
vector = [0.1] * 1536
|
|
61
|
+
|
|
62
|
+
# Create collection with actual vector size if not exists
|
|
63
|
+
collection_name = getattr(target_store, 'collection_name', self.collection_name)
|
|
64
|
+
if not hasattr(self, '_collection_created'):
|
|
65
|
+
target_store.create_col(collection_name, vector_size=len(vector), distance="cosine")
|
|
66
|
+
self._collection_created = True
|
|
67
|
+
|
|
68
|
+
# Store the memory data as payload - unified format based on OceanBase
|
|
69
|
+
payload = {
|
|
70
|
+
"data": content, # Unified field name for text content
|
|
71
|
+
"user_id": memory_data.get("user_id", ""),
|
|
72
|
+
"agent_id": memory_data.get("agent_id", ""),
|
|
73
|
+
"run_id": memory_data.get("run_id", ""),
|
|
74
|
+
"actor_id": memory_data.get("actor_id", ""),
|
|
75
|
+
"hash": memory_data.get("hash", ""),
|
|
76
|
+
"created_at": serialize_datetime(memory_data.get("created_at", "")),
|
|
77
|
+
"updated_at": serialize_datetime(memory_data.get("updated_at", "")),
|
|
78
|
+
"category": memory_data.get("category", ""),
|
|
79
|
+
"fulltext_content": content, # For full-text search
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# Add only user-defined metadata (not system fields)
|
|
83
|
+
user_metadata = memory_data.get("metadata", {})
|
|
84
|
+
payload["metadata"] = serialize_datetime(user_metadata) if user_metadata else {}
|
|
85
|
+
|
|
86
|
+
# Add any extra fields (excluding system fields and embedding)
|
|
87
|
+
excluded_fields = ["id", "content", "data", "user_id", "agent_id", "run_id", "metadata", "filters",
|
|
88
|
+
"created_at", "updated_at", "actor_id", "hash", "category", "embedding"]
|
|
89
|
+
for key, value in memory_data.items():
|
|
90
|
+
if key not in excluded_fields:
|
|
91
|
+
payload[key] = serialize_datetime(value)
|
|
92
|
+
|
|
93
|
+
# Insert and get generated Snowflake ID
|
|
94
|
+
generated_ids = target_store.insert([vector], [payload])
|
|
95
|
+
if not generated_ids:
|
|
96
|
+
raise ValueError("Failed to insert memory: no ID returned from vector store")
|
|
97
|
+
memory_id = generated_ids[0] # Get the first (and only) generated Snowflake ID
|
|
98
|
+
return memory_id
|
|
99
|
+
|
|
100
|
+
def search_memories(
|
|
101
|
+
self,
|
|
102
|
+
query_embedding: List[float],
|
|
103
|
+
user_id: Optional[str] = None,
|
|
104
|
+
agent_id: Optional[str] = None,
|
|
105
|
+
run_id: Optional[str] = None,
|
|
106
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
107
|
+
limit: int = 30,
|
|
108
|
+
query: Optional[str] = None,
|
|
109
|
+
) -> List[Dict[str, Any]]:
|
|
110
|
+
"""Search for memories."""
|
|
111
|
+
# Use the provided query embedding or generate one
|
|
112
|
+
if query_embedding:
|
|
113
|
+
query_vector = query_embedding
|
|
114
|
+
else:
|
|
115
|
+
# If no query embedding provided, we can't search meaningfully
|
|
116
|
+
logger.warning("No query embedding provided for search")
|
|
117
|
+
return []
|
|
118
|
+
|
|
119
|
+
# Merge user_id/agent_id/run_id into filters to ensure consistency
|
|
120
|
+
# This ensures filters are applied at the database level, avoiding redundant filtering
|
|
121
|
+
effective_filters = filters.copy() if filters else {}
|
|
122
|
+
if user_id is not None:
|
|
123
|
+
effective_filters["user_id"] = user_id
|
|
124
|
+
if agent_id is not None:
|
|
125
|
+
effective_filters["agent_id"] = agent_id
|
|
126
|
+
if run_id is not None:
|
|
127
|
+
effective_filters["run_id"] = run_id
|
|
128
|
+
|
|
129
|
+
# Route to target store (main or sub store)
|
|
130
|
+
target_store = self._route_to_store(effective_filters)
|
|
131
|
+
|
|
132
|
+
# Unified search method - try OceanBase format first, fallback to SQLite
|
|
133
|
+
# Pass query text to enable hybrid search (vector + full-text search)
|
|
134
|
+
try:
|
|
135
|
+
# Try OceanBase format first - pass query text for hybrid search
|
|
136
|
+
search_query = query if query else ""
|
|
137
|
+
results = target_store.search(search_query, vectors=query_vector, limit=limit, filters=effective_filters)
|
|
138
|
+
except TypeError:
|
|
139
|
+
# Fallback to SQLite format (doesn't support query text parameter)
|
|
140
|
+
# Pass filters to ensure filtering works correctly
|
|
141
|
+
results = target_store.search(search_query if query else "", vectors=[query_vector], limit=limit, filters=effective_filters)
|
|
142
|
+
|
|
143
|
+
# Convert results to unified format
|
|
144
|
+
memories = []
|
|
145
|
+
for result in results:
|
|
146
|
+
# Handle different result formats
|
|
147
|
+
if hasattr(result, 'payload') and result.payload:
|
|
148
|
+
# Result with payload attribute
|
|
149
|
+
payload = result.payload
|
|
150
|
+
memory_id = result.id
|
|
151
|
+
# Extract score - use 0.0 as default instead of 1.0 to avoid false high scores
|
|
152
|
+
# Score should always exist from vector search, but handle None case gracefully
|
|
153
|
+
score = getattr(result, 'score', None)
|
|
154
|
+
if score is None:
|
|
155
|
+
logger.warning(f"Result {memory_id} missing score, using 0.0")
|
|
156
|
+
score = 0.0
|
|
157
|
+
elif hasattr(result, 'payload') and isinstance(result.payload, dict):
|
|
158
|
+
# Result with dict payload
|
|
159
|
+
payload = result.payload
|
|
160
|
+
memory_id = result.id
|
|
161
|
+
# Extract score - use 0.0 as default instead of 1.0
|
|
162
|
+
score = getattr(result, 'score', None)
|
|
163
|
+
if score is None:
|
|
164
|
+
logger.warning(f"Result {memory_id} missing score, using 0.0")
|
|
165
|
+
score = 0.0
|
|
166
|
+
elif isinstance(result, dict):
|
|
167
|
+
# Direct dict result
|
|
168
|
+
payload = result
|
|
169
|
+
memory_id = result.get("id")
|
|
170
|
+
# Extract score - use 0.0 as default instead of 1.0
|
|
171
|
+
score = result.get("score")
|
|
172
|
+
if score is None:
|
|
173
|
+
logger.warning(f"Result {memory_id} missing score, using 0.0")
|
|
174
|
+
score = 0.0
|
|
175
|
+
else:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Extract unified fields
|
|
179
|
+
# Core and promoted keys that should not be in metadata
|
|
180
|
+
promoted_payload_keys = ["user_id", "agent_id", "run_id", "actor_id", "role"]
|
|
181
|
+
core_and_promoted_keys = {"data", "hash", "created_at", "updated_at", "id", "metadata", *promoted_payload_keys}
|
|
182
|
+
|
|
183
|
+
# Extract core fields
|
|
184
|
+
content = payload.get("data", "")
|
|
185
|
+
created_at = payload.get("created_at")
|
|
186
|
+
updated_at = payload.get("updated_at")
|
|
187
|
+
|
|
188
|
+
# Extract promoted fields
|
|
189
|
+
promoted_fields = {}
|
|
190
|
+
for key in promoted_payload_keys:
|
|
191
|
+
if key in payload:
|
|
192
|
+
promoted_fields[key] = payload[key]
|
|
193
|
+
|
|
194
|
+
# Extract user metadata from payload
|
|
195
|
+
# If payload contains "metadata" field (nested user metadata), use it directly
|
|
196
|
+
# Otherwise, extract additional metadata from other fields
|
|
197
|
+
if "metadata" in payload:
|
|
198
|
+
user_metadata = payload["metadata"].copy() if payload["metadata"] else {}
|
|
199
|
+
else:
|
|
200
|
+
# Extract additional metadata (all fields not in core_and_promoted_keys)
|
|
201
|
+
user_metadata = {k: v for k, v in payload.items() if k not in core_and_promoted_keys}
|
|
202
|
+
|
|
203
|
+
# Merge any user-defined fields from payload top-level into metadata
|
|
204
|
+
# These fields (like "category") were extracted from metadata for filtering purposes
|
|
205
|
+
# but should still be visible in the returned metadata
|
|
206
|
+
for key, value in payload.items():
|
|
207
|
+
if key not in core_and_promoted_keys and key not in user_metadata and value:
|
|
208
|
+
# Only include non-empty values that aren't already in metadata
|
|
209
|
+
user_metadata[key] = value
|
|
210
|
+
|
|
211
|
+
memory = {
|
|
212
|
+
"id": memory_id,
|
|
213
|
+
"memory": content,
|
|
214
|
+
"created_at": created_at,
|
|
215
|
+
"updated_at": updated_at,
|
|
216
|
+
"score": score,
|
|
217
|
+
**promoted_fields, # Add promoted fields at top level
|
|
218
|
+
"metadata": user_metadata if user_metadata else {}, # Add user metadata
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
# No need to apply filters here - filters are already applied at the database level
|
|
222
|
+
# in vector_store.search(), so all returned results should already match the filters
|
|
223
|
+
memories.append(memory)
|
|
224
|
+
|
|
225
|
+
# Vector store already applied limit, no need to slice again
|
|
226
|
+
return memories
|
|
227
|
+
|
|
228
|
+
def get_memory(
|
|
229
|
+
self,
|
|
230
|
+
memory_id: int,
|
|
231
|
+
user_id: Optional[str] = None,
|
|
232
|
+
agent_id: Optional[str] = None,
|
|
233
|
+
) -> Optional[Dict[str, Any]]:
|
|
234
|
+
"""Get a specific memory by ID."""
|
|
235
|
+
result = self.vector_store.get(memory_id)
|
|
236
|
+
|
|
237
|
+
if result and result.payload:
|
|
238
|
+
memory = {
|
|
239
|
+
"id": result.id,
|
|
240
|
+
"content": result.payload.get("content", ""),
|
|
241
|
+
"user_id": result.payload.get("user_id"),
|
|
242
|
+
"agent_id": result.payload.get("agent_id"),
|
|
243
|
+
"run_id": result.payload.get("run_id"),
|
|
244
|
+
"metadata": result.payload.get("metadata", {}),
|
|
245
|
+
"created_at": result.payload.get("created_at"),
|
|
246
|
+
"updated_at": result.payload.get("updated_at"),
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
# Check access control
|
|
250
|
+
if user_id and memory.get("user_id") != user_id:
|
|
251
|
+
return None
|
|
252
|
+
if agent_id and memory.get("agent_id") != agent_id:
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
return memory
|
|
256
|
+
|
|
257
|
+
# If not found in main store and sub stores exist, search sub stores
|
|
258
|
+
if self.sub_stores:
|
|
259
|
+
for sub_config in self.sub_stores.values():
|
|
260
|
+
try:
|
|
261
|
+
result = sub_config.vector_store.get(memory_id)
|
|
262
|
+
if result and result.payload:
|
|
263
|
+
memory = {
|
|
264
|
+
"id": result.id,
|
|
265
|
+
"content": result.payload.get("content", ""),
|
|
266
|
+
"user_id": result.payload.get("user_id"),
|
|
267
|
+
"agent_id": result.payload.get("agent_id"),
|
|
268
|
+
"run_id": result.payload.get("run_id"),
|
|
269
|
+
"metadata": result.payload.get("metadata", {}),
|
|
270
|
+
"created_at": result.payload.get("created_at"),
|
|
271
|
+
"updated_at": result.payload.get("updated_at"),
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
# Check access control
|
|
275
|
+
if user_id and memory.get("user_id") != user_id:
|
|
276
|
+
continue
|
|
277
|
+
if agent_id and memory.get("agent_id") != agent_id:
|
|
278
|
+
continue
|
|
279
|
+
|
|
280
|
+
return memory
|
|
281
|
+
except Exception as e:
|
|
282
|
+
logger.debug(f"Error searching in sub store {sub_config.name}: {e}")
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
return None
|
|
286
|
+
|
|
287
|
+
def update_memory(
|
|
288
|
+
self,
|
|
289
|
+
memory_id: int,
|
|
290
|
+
update_data: Dict[str, Any],
|
|
291
|
+
user_id: Optional[str] = None,
|
|
292
|
+
agent_id: Optional[str] = None,
|
|
293
|
+
) -> Optional[Dict[str, Any]]:
|
|
294
|
+
"""Update a memory."""
|
|
295
|
+
# First check if memory exists and user has access (get_memory returns dict)
|
|
296
|
+
existing_memory_dict = self.get_memory(memory_id, user_id, agent_id)
|
|
297
|
+
if not existing_memory_dict:
|
|
298
|
+
logger.warning(f"Memory {memory_id} not found or access denied")
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
# Get raw OutputData object from vector store to access payload
|
|
302
|
+
existing_result = self.vector_store.get(memory_id)
|
|
303
|
+
target_store = self.vector_store
|
|
304
|
+
|
|
305
|
+
# If not found in main store, search sub stores
|
|
306
|
+
if (not existing_result or not existing_result.payload) and self.sub_stores:
|
|
307
|
+
for sub_config in self.sub_stores.values():
|
|
308
|
+
try:
|
|
309
|
+
sub_result = sub_config.vector_store.get(memory_id)
|
|
310
|
+
if sub_result and sub_result.payload:
|
|
311
|
+
# Verify access control matches
|
|
312
|
+
sub_payload = sub_result.payload
|
|
313
|
+
if user_id and sub_payload.get("user_id") != user_id:
|
|
314
|
+
continue
|
|
315
|
+
if agent_id and sub_payload.get("agent_id") != agent_id:
|
|
316
|
+
continue
|
|
317
|
+
existing_result = sub_result
|
|
318
|
+
target_store = sub_config.vector_store
|
|
319
|
+
break
|
|
320
|
+
except Exception as e:
|
|
321
|
+
logger.debug(f"Error searching in sub store {sub_config.name}: {e}")
|
|
322
|
+
continue
|
|
323
|
+
|
|
324
|
+
if not existing_result or not existing_result.payload:
|
|
325
|
+
logger.warning(f"Memory {memory_id} not found in vector store")
|
|
326
|
+
return None
|
|
327
|
+
|
|
328
|
+
# Get existing payload
|
|
329
|
+
existing_payload = existing_result.payload
|
|
330
|
+
|
|
331
|
+
# Merge update_data into payload
|
|
332
|
+
updated_payload = existing_payload.copy()
|
|
333
|
+
|
|
334
|
+
# Handle content field - map to "data" in payload
|
|
335
|
+
if "content" in update_data:
|
|
336
|
+
updated_payload["data"] = update_data["content"]
|
|
337
|
+
updated_payload["fulltext_content"] = update_data["content"]
|
|
338
|
+
# Remove content from update_data to avoid confusion
|
|
339
|
+
update_data = update_data.copy()
|
|
340
|
+
del update_data["content"]
|
|
341
|
+
|
|
342
|
+
# Serialize datetime objects in update_data before merging
|
|
343
|
+
serialized_update_data = serialize_datetime(update_data)
|
|
344
|
+
|
|
345
|
+
# Update other fields
|
|
346
|
+
updated_payload.update(serialized_update_data)
|
|
347
|
+
|
|
348
|
+
# Ensure datetime fields are serialized as ISO format strings
|
|
349
|
+
if "updated_at" in updated_payload:
|
|
350
|
+
updated_at = updated_payload["updated_at"]
|
|
351
|
+
if isinstance(updated_at, datetime):
|
|
352
|
+
updated_payload["updated_at"] = updated_at.isoformat()
|
|
353
|
+
if "created_at" in updated_payload:
|
|
354
|
+
created_at = updated_payload["created_at"]
|
|
355
|
+
if isinstance(created_at, datetime):
|
|
356
|
+
updated_payload["created_at"] = created_at.isoformat()
|
|
357
|
+
|
|
358
|
+
# Update updated_at if not provided
|
|
359
|
+
if "updated_at" not in updated_payload:
|
|
360
|
+
updated_payload["updated_at"] = datetime.utcnow().isoformat()
|
|
361
|
+
|
|
362
|
+
# Update in vector store with proper payload
|
|
363
|
+
target_store.update(memory_id, vector=update_data.get("embedding"), payload=updated_payload)
|
|
364
|
+
|
|
365
|
+
return updated_payload
|
|
366
|
+
|
|
367
|
+
def delete_memory(
|
|
368
|
+
self,
|
|
369
|
+
memory_id: int,
|
|
370
|
+
user_id: Optional[str] = None,
|
|
371
|
+
agent_id: Optional[str] = None,
|
|
372
|
+
) -> bool:
|
|
373
|
+
"""Delete a memory."""
|
|
374
|
+
# Check if memory exists and user has access
|
|
375
|
+
existing = self.get_memory(memory_id, user_id, agent_id)
|
|
376
|
+
if not existing:
|
|
377
|
+
return False
|
|
378
|
+
|
|
379
|
+
# Try to delete from main store
|
|
380
|
+
try:
|
|
381
|
+
self.vector_store.delete(memory_id)
|
|
382
|
+
return True
|
|
383
|
+
except Exception as e:
|
|
384
|
+
logger.debug(f"Memory {memory_id} not in main store: {e}")
|
|
385
|
+
|
|
386
|
+
# If not in main store, try sub stores
|
|
387
|
+
if self.sub_stores:
|
|
388
|
+
for sub_config in self.sub_stores.values():
|
|
389
|
+
try:
|
|
390
|
+
sub_config.vector_store.delete(memory_id)
|
|
391
|
+
logger.debug(f"Deleted memory {memory_id} from sub store {sub_config.name}")
|
|
392
|
+
return True
|
|
393
|
+
except Exception as e:
|
|
394
|
+
logger.debug(f"Memory {memory_id} not in sub store {sub_config.name}: {e}")
|
|
395
|
+
continue
|
|
396
|
+
|
|
397
|
+
# If we reach here, memory existed in get_memory but couldn't be deleted
|
|
398
|
+
logger.warning(f"Failed to delete memory {memory_id}")
|
|
399
|
+
return False
|
|
400
|
+
|
|
401
|
+
def get_all_memories(
|
|
402
|
+
self,
|
|
403
|
+
user_id: Optional[str] = None,
|
|
404
|
+
agent_id: Optional[str] = None,
|
|
405
|
+
run_id: Optional[str] = None,
|
|
406
|
+
limit: int = 100,
|
|
407
|
+
offset: int = 0,
|
|
408
|
+
) -> List[Dict[str, Any]]:
|
|
409
|
+
"""Get all memories with optional filtering."""
|
|
410
|
+
# Build filters for database-level filtering
|
|
411
|
+
filters = {}
|
|
412
|
+
if user_id:
|
|
413
|
+
filters["user_id"] = user_id
|
|
414
|
+
if agent_id:
|
|
415
|
+
filters["agent_id"] = agent_id
|
|
416
|
+
if run_id:
|
|
417
|
+
filters["run_id"] = run_id
|
|
418
|
+
|
|
419
|
+
# Get memories from vector store with filters (if supported)
|
|
420
|
+
if filters and hasattr(self.vector_store, 'list'):
|
|
421
|
+
# Pass filters to vector store's list method for database-level filtering
|
|
422
|
+
# Request more records to support offset
|
|
423
|
+
results = self.vector_store.list(filters=filters, limit=limit + offset)
|
|
424
|
+
else:
|
|
425
|
+
# Fallback: get all and filter in memory
|
|
426
|
+
results = self.vector_store.list(limit=limit + offset)
|
|
427
|
+
|
|
428
|
+
# OceanBase returns [memories], SQLite/PGVector return memories directly
|
|
429
|
+
if results and isinstance(results[0], list):
|
|
430
|
+
raw_results = results[0]
|
|
431
|
+
else:
|
|
432
|
+
raw_results = results
|
|
433
|
+
|
|
434
|
+
# Convert to expected format and apply filters
|
|
435
|
+
memories = []
|
|
436
|
+
for result in raw_results:
|
|
437
|
+
# Handle different result formats
|
|
438
|
+
if hasattr(result, 'payload') and result.payload:
|
|
439
|
+
# Result with payload attribute (e.g., from OceanBase OutputData)
|
|
440
|
+
payload = result.payload
|
|
441
|
+
memory_id = result.id
|
|
442
|
+
elif isinstance(result, dict):
|
|
443
|
+
# Direct dict result (e.g., from SQLite)
|
|
444
|
+
payload = result
|
|
445
|
+
memory_id = result.get("id")
|
|
446
|
+
else:
|
|
447
|
+
continue
|
|
448
|
+
|
|
449
|
+
# Convert datetime objects to ISO format strings
|
|
450
|
+
created_at = payload.get("created_at")
|
|
451
|
+
if created_at is not None:
|
|
452
|
+
from datetime import datetime
|
|
453
|
+
if isinstance(created_at, datetime):
|
|
454
|
+
created_at = created_at.isoformat()
|
|
455
|
+
|
|
456
|
+
updated_at = payload.get("updated_at")
|
|
457
|
+
if updated_at is not None:
|
|
458
|
+
from datetime import datetime
|
|
459
|
+
if isinstance(updated_at, datetime):
|
|
460
|
+
updated_at = updated_at.isoformat()
|
|
461
|
+
|
|
462
|
+
memory = {
|
|
463
|
+
"id": memory_id,
|
|
464
|
+
"memory": payload.get("data", ""), # Unified field name to match search_memories format
|
|
465
|
+
"user_id": payload.get("user_id"),
|
|
466
|
+
"agent_id": payload.get("agent_id"),
|
|
467
|
+
"run_id": payload.get("run_id"),
|
|
468
|
+
"metadata": payload.get("metadata", {}),
|
|
469
|
+
"created_at": created_at,
|
|
470
|
+
"updated_at": updated_at,
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
# Apply filters (as double-check if database didn't filter)
|
|
474
|
+
# Note: If filters were applied at database level, these will all pass
|
|
475
|
+
if user_id and memory.get("user_id") != user_id:
|
|
476
|
+
continue
|
|
477
|
+
if agent_id and memory.get("agent_id") != agent_id:
|
|
478
|
+
continue
|
|
479
|
+
if run_id and memory.get("run_id") != run_id:
|
|
480
|
+
continue
|
|
481
|
+
|
|
482
|
+
memories.append(memory)
|
|
483
|
+
|
|
484
|
+
# Apply offset and limit
|
|
485
|
+
return memories[offset:offset + limit]
|
|
486
|
+
|
|
487
|
+
def clear_memories(
|
|
488
|
+
self,
|
|
489
|
+
user_id: Optional[str] = None,
|
|
490
|
+
agent_id: Optional[str] = None,
|
|
491
|
+
run_id: Optional[str] = None,
|
|
492
|
+
) -> bool:
|
|
493
|
+
"""Clear all memories for a user or agent or run."""
|
|
494
|
+
# Build filters for database query
|
|
495
|
+
filters = {}
|
|
496
|
+
if user_id:
|
|
497
|
+
filters["user_id"] = user_id
|
|
498
|
+
if agent_id:
|
|
499
|
+
filters["agent_id"] = agent_id
|
|
500
|
+
if run_id:
|
|
501
|
+
filters["run_id"] = run_id
|
|
502
|
+
|
|
503
|
+
# Use batch processing to avoid timeout
|
|
504
|
+
batch_size = 1000
|
|
505
|
+
deleted_count = 0
|
|
506
|
+
|
|
507
|
+
while True:
|
|
508
|
+
# Get a batch of memories with filtering
|
|
509
|
+
batch = self.get_all_memories(user_id, agent_id, run_id, limit=batch_size, offset=deleted_count)
|
|
510
|
+
|
|
511
|
+
# If no more records, we're done
|
|
512
|
+
if not batch:
|
|
513
|
+
break
|
|
514
|
+
|
|
515
|
+
# Delete each memory in the batch
|
|
516
|
+
for memory in batch:
|
|
517
|
+
try:
|
|
518
|
+
self.vector_store.delete(memory["id"])
|
|
519
|
+
except Exception as e:
|
|
520
|
+
logger.warning(f"Failed to delete memory {memory.get('id')}: {e}")
|
|
521
|
+
|
|
522
|
+
deleted_count += len(batch)
|
|
523
|
+
|
|
524
|
+
# If we got fewer records than batch_size, we've reached the end
|
|
525
|
+
if len(batch) < batch_size:
|
|
526
|
+
break
|
|
527
|
+
|
|
528
|
+
logger.info(f"Deleted {deleted_count} memories with filters: {filters}")
|
|
529
|
+
return True
|
|
530
|
+
|
|
531
|
+
async def get_all_memories_async(
|
|
532
|
+
self,
|
|
533
|
+
user_id: Optional[str] = None,
|
|
534
|
+
agent_id: Optional[str] = None,
|
|
535
|
+
run_id: Optional[str] = None,
|
|
536
|
+
limit: int = 100,
|
|
537
|
+
offset: int = 0,
|
|
538
|
+
) -> List[Dict[str, Any]]:
|
|
539
|
+
"""Get all memories with optional filtering asynchronously."""
|
|
540
|
+
import asyncio
|
|
541
|
+
return await asyncio.to_thread(self.get_all_memories, user_id, agent_id, run_id, limit, offset)
|
|
542
|
+
|
|
543
|
+
async def clear_memories_async(
|
|
544
|
+
self,
|
|
545
|
+
user_id: Optional[str] = None,
|
|
546
|
+
agent_id: Optional[str] = None,
|
|
547
|
+
run_id: Optional[str] = None,
|
|
548
|
+
) -> bool:
|
|
549
|
+
"""Clear all memories for a user or agent or run asynchronously."""
|
|
550
|
+
import asyncio
|
|
551
|
+
return await asyncio.to_thread(self.clear_memories, user_id, agent_id, run_id)
|
|
552
|
+
|
|
553
|
+
async def initialize_async(self):
|
|
554
|
+
"""Initialize storage asynchronously."""
|
|
555
|
+
# No-op for now
|
|
556
|
+
pass
|
|
557
|
+
|
|
558
|
+
async def add_memory_async(self, memory_data: Dict[str, Any]) -> int:
|
|
559
|
+
"""Add a memory to the store asynchronously."""
|
|
560
|
+
import asyncio
|
|
561
|
+
return await asyncio.to_thread(self.add_memory, memory_data)
|
|
562
|
+
|
|
563
|
+
async def search_memories_async(
|
|
564
|
+
self,
|
|
565
|
+
query_embedding: List[float],
|
|
566
|
+
user_id: Optional[str] = None,
|
|
567
|
+
agent_id: Optional[str] = None,
|
|
568
|
+
run_id: Optional[str] = None,
|
|
569
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
570
|
+
limit: int = 30,
|
|
571
|
+
query: Optional[str] = None,
|
|
572
|
+
) -> List[Dict[str, Any]]:
|
|
573
|
+
"""Search for memories asynchronously."""
|
|
574
|
+
import asyncio
|
|
575
|
+
return await asyncio.to_thread(self.search_memories, query_embedding, user_id, agent_id, run_id, filters, limit, query)
|
|
576
|
+
|
|
577
|
+
async def get_memory_async(
|
|
578
|
+
self,
|
|
579
|
+
memory_id: int,
|
|
580
|
+
user_id: Optional[str] = None,
|
|
581
|
+
agent_id: Optional[str] = None,
|
|
582
|
+
) -> Optional[Dict[str, Any]]:
|
|
583
|
+
"""Get a specific memory by ID asynchronously."""
|
|
584
|
+
import asyncio
|
|
585
|
+
return await asyncio.to_thread(self.get_memory, memory_id, user_id, agent_id)
|
|
586
|
+
|
|
587
|
+
async def delete_memory_async(
|
|
588
|
+
self,
|
|
589
|
+
memory_id: int,
|
|
590
|
+
user_id: Optional[str] = None,
|
|
591
|
+
agent_id: Optional[str] = None,
|
|
592
|
+
) -> bool:
|
|
593
|
+
"""Delete a memory asynchronously."""
|
|
594
|
+
import asyncio
|
|
595
|
+
return await asyncio.to_thread(self.delete_memory, memory_id, user_id, agent_id)
|
|
596
|
+
|
|
597
|
+
async def update_memory_async(
|
|
598
|
+
self,
|
|
599
|
+
memory_id: int,
|
|
600
|
+
update_data: Dict[str, Any],
|
|
601
|
+
user_id: Optional[str] = None,
|
|
602
|
+
agent_id: Optional[str] = None,
|
|
603
|
+
) -> Optional[Dict[str, Any]]:
|
|
604
|
+
"""Update a memory asynchronously."""
|
|
605
|
+
import asyncio
|
|
606
|
+
return await asyncio.to_thread(self.update_memory, memory_id, update_data, user_id, agent_id)
|
|
607
|
+
|
|
608
|
+
# ==================== Routing Support Methods ====================
|
|
609
|
+
|
|
610
|
+
def _route_to_store(self, filters_or_metadata: Optional[Dict] = None) -> VectorStoreBase:
|
|
611
|
+
"""
|
|
612
|
+
Route to correct storage instance (main or sub store).
|
|
613
|
+
|
|
614
|
+
Args:
|
|
615
|
+
filters_or_metadata: Query conditions or memory metadata
|
|
616
|
+
|
|
617
|
+
Returns:
|
|
618
|
+
Target VectorStoreBase instance
|
|
619
|
+
"""
|
|
620
|
+
# If no sub stores configured, always use main store
|
|
621
|
+
if not self.sub_stores:
|
|
622
|
+
return self.vector_store
|
|
623
|
+
|
|
624
|
+
# Try to find matching sub store
|
|
625
|
+
if filters_or_metadata:
|
|
626
|
+
for sub_config in self.sub_stores.values():
|
|
627
|
+
# Check if sub store is ready (only for query operations)
|
|
628
|
+
if self.migration_manager and not self.migration_manager.is_ready(sub_config.name):
|
|
629
|
+
continue
|
|
630
|
+
|
|
631
|
+
# Check if filters_or_metadata matches routing rules
|
|
632
|
+
routing_filter = sub_config.routing_filter
|
|
633
|
+
if all(
|
|
634
|
+
key in filters_or_metadata and filters_or_metadata[key] == value
|
|
635
|
+
for key, value in routing_filter.items()
|
|
636
|
+
):
|
|
637
|
+
logger.debug(f"Routing to sub store: {sub_config.name}")
|
|
638
|
+
return sub_config.vector_store
|
|
639
|
+
|
|
640
|
+
# Default to main store
|
|
641
|
+
logger.debug("Routing to main store")
|
|
642
|
+
return self.vector_store
|
|
643
|
+
|
|
644
|
+
def get_target_store_name(self, filters_or_metadata: Optional[Dict] = None) -> str:
|
|
645
|
+
"""
|
|
646
|
+
Get target store name for given filters/metadata.
|
|
647
|
+
|
|
648
|
+
Args:
|
|
649
|
+
filters_or_metadata: Query conditions or memory metadata
|
|
650
|
+
|
|
651
|
+
Returns:
|
|
652
|
+
Target storage name
|
|
653
|
+
"""
|
|
654
|
+
target_store = self._route_to_store(filters_or_metadata)
|
|
655
|
+
return getattr(target_store, 'collection_name', self.collection_name)
|
|
656
|
+
|
|
657
|
+
def is_sub_store_ready(self, store_name: str) -> bool:
|
|
658
|
+
"""
|
|
659
|
+
Check if sub store is ready (migration completed).
|
|
660
|
+
|
|
661
|
+
Args:
|
|
662
|
+
store_name: Sub store name
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
True if ready, False otherwise (or if no migration manager)
|
|
666
|
+
"""
|
|
667
|
+
if self.migration_manager:
|
|
668
|
+
return self.migration_manager.is_ready(store_name)
|
|
669
|
+
return False
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
# ==================== Sub Store Configuration ====================
|
|
673
|
+
|
|
674
|
+
class SubStoreConfig:
|
|
675
|
+
"""Configuration for a sub store."""
|
|
676
|
+
def __init__(self, name: str, routing_filter: Dict, vector_store: VectorStoreBase, embedding_service=None):
|
|
677
|
+
self.name = name
|
|
678
|
+
self.routing_filter = routing_filter
|
|
679
|
+
self.vector_store = vector_store
|
|
680
|
+
self.embedding_service = embedding_service
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
class SubStorageAdapter(StorageAdapter):
|
|
684
|
+
"""
|
|
685
|
+
Extended storage adapter with sub-store management capabilities.
|
|
686
|
+
|
|
687
|
+
This adapter extends the basic StorageAdapter to support multiple sub-stores
|
|
688
|
+
with intelligent routing based on metadata filters. All CRUD operations are
|
|
689
|
+
inherited from the parent class and automatically support routing.
|
|
690
|
+
|
|
691
|
+
This class only contains sub-store management methods.
|
|
692
|
+
"""
|
|
693
|
+
|
|
694
|
+
def __init__(self, vector_store: VectorStoreBase, embedding_service=None):
|
|
695
|
+
"""
|
|
696
|
+
Initialize the sub-storage adapter.
|
|
697
|
+
|
|
698
|
+
Args:
|
|
699
|
+
vector_store: The main vector store instance
|
|
700
|
+
embedding_service: Optional embedding service for generating vectors
|
|
701
|
+
"""
|
|
702
|
+
# Initialize parent class
|
|
703
|
+
super().__init__(vector_store, embedding_service)
|
|
704
|
+
|
|
705
|
+
# Initialize migration status management (database-backed)
|
|
706
|
+
from powermem.storage.migration_manager import SubStoreMigrationManager
|
|
707
|
+
self.migration_manager = SubStoreMigrationManager(vector_store, self.collection_name)
|
|
708
|
+
|
|
709
|
+
# ==================== Sub Store Management Methods ====================
|
|
710
|
+
|
|
711
|
+
def register_sub_store(
|
|
712
|
+
self,
|
|
713
|
+
store_name: str,
|
|
714
|
+
routing_filter: Dict,
|
|
715
|
+
vector_store: VectorStoreBase,
|
|
716
|
+
embedding_service=None,
|
|
717
|
+
):
|
|
718
|
+
"""
|
|
719
|
+
Register a sub store for routing.
|
|
720
|
+
|
|
721
|
+
Args:
|
|
722
|
+
store_name: Name of the sub store
|
|
723
|
+
routing_filter: Dictionary of metadata conditions for routing
|
|
724
|
+
vector_store: Vector store instance for the sub store
|
|
725
|
+
embedding_service: Optional embedding service for this sub store (for migration)
|
|
726
|
+
"""
|
|
727
|
+
sub_config = SubStoreConfig(
|
|
728
|
+
name=store_name,
|
|
729
|
+
routing_filter=routing_filter,
|
|
730
|
+
vector_store=vector_store,
|
|
731
|
+
embedding_service=embedding_service
|
|
732
|
+
)
|
|
733
|
+
self.sub_stores[store_name] = sub_config
|
|
734
|
+
|
|
735
|
+
# Register in migration manager
|
|
736
|
+
if self.migration_manager:
|
|
737
|
+
self.migration_manager.register_sub_store(
|
|
738
|
+
sub_store_name=store_name,
|
|
739
|
+
routing_filter=routing_filter
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
logger.info(f"Registered sub store: {store_name} with filter: {routing_filter}")
|
|
743
|
+
|
|
744
|
+
def migrate_to_sub_store(
|
|
745
|
+
self,
|
|
746
|
+
store_name: str,
|
|
747
|
+
delete_source: bool = False,
|
|
748
|
+
batch_size: int = 100
|
|
749
|
+
) -> int:
|
|
750
|
+
"""
|
|
751
|
+
Migrate data from main store to sub store based on routing filter.
|
|
752
|
+
|
|
753
|
+
Args:
|
|
754
|
+
store_name: Name of the sub store to migrate to
|
|
755
|
+
delete_source: Whether to delete source data after migration
|
|
756
|
+
batch_size: Number of records to process in each batch
|
|
757
|
+
|
|
758
|
+
Returns:
|
|
759
|
+
Number of records migrated
|
|
760
|
+
|
|
761
|
+
Raises:
|
|
762
|
+
ValueError: If sub store not found or not registered
|
|
763
|
+
"""
|
|
764
|
+
if store_name not in self.sub_stores:
|
|
765
|
+
raise ValueError(f"Sub store '{store_name}' not found. Please register it first.")
|
|
766
|
+
|
|
767
|
+
sub_config = self.sub_stores[store_name]
|
|
768
|
+
routing_filter = sub_config.routing_filter
|
|
769
|
+
target_store = sub_config.vector_store
|
|
770
|
+
sub_embedding_service = sub_config.embedding_service
|
|
771
|
+
|
|
772
|
+
# Validate that embedding service is provided
|
|
773
|
+
if not sub_embedding_service:
|
|
774
|
+
raise ValueError(f"Sub store '{store_name}' does not have an embedding service configured. "
|
|
775
|
+
"Cannot migrate without re-embedding the data.")
|
|
776
|
+
|
|
777
|
+
# Mark migration as started
|
|
778
|
+
if self.migration_manager:
|
|
779
|
+
self.migration_manager.mark_migrating(store_name, 0)
|
|
780
|
+
|
|
781
|
+
try:
|
|
782
|
+
# Query all matching records from main store
|
|
783
|
+
migrated_count = 0
|
|
784
|
+
|
|
785
|
+
# Get all memories that match the routing filter
|
|
786
|
+
from powermem.storage.oceanbase.oceanbase import OceanBaseVectorStore
|
|
787
|
+
|
|
788
|
+
if isinstance(self.vector_store, OceanBaseVectorStore):
|
|
789
|
+
# Use OceanBase specific query
|
|
790
|
+
# Build SQL query to find matching records (only need ID and content fields)
|
|
791
|
+
filter_conditions = " AND ".join([
|
|
792
|
+
f"JSON_EXTRACT(metadata, '$.{key}') = '{value}'"
|
|
793
|
+
for key, value in routing_filter.items()
|
|
794
|
+
])
|
|
795
|
+
|
|
796
|
+
# Query only IDs first for efficiency
|
|
797
|
+
id_query_sql = f"""
|
|
798
|
+
SELECT id
|
|
799
|
+
FROM {self.collection_name}
|
|
800
|
+
WHERE {filter_conditions}
|
|
801
|
+
LIMIT {batch_size}
|
|
802
|
+
"""
|
|
803
|
+
|
|
804
|
+
while True:
|
|
805
|
+
id_results = self.vector_store.execute_sql(id_query_sql)
|
|
806
|
+
if not id_results:
|
|
807
|
+
break
|
|
808
|
+
|
|
809
|
+
for id_record in id_results:
|
|
810
|
+
record_id = id_record['id']
|
|
811
|
+
|
|
812
|
+
# Use get() method to retrieve the full record
|
|
813
|
+
result = self.vector_store.get(record_id)
|
|
814
|
+
if not result or not result.payload:
|
|
815
|
+
logger.warning(f"Record {record_id} not found, skipping")
|
|
816
|
+
continue
|
|
817
|
+
|
|
818
|
+
# Use payload from result
|
|
819
|
+
payload = result.payload.copy()
|
|
820
|
+
payload['id'] = record_id
|
|
821
|
+
|
|
822
|
+
# Extract content for re-embedding
|
|
823
|
+
content = payload.get('data', '')
|
|
824
|
+
if not content:
|
|
825
|
+
logger.warning(f"Record {record_id} has no content, skipping")
|
|
826
|
+
continue
|
|
827
|
+
|
|
828
|
+
# Re-generate vector using sub store's embedding service
|
|
829
|
+
try:
|
|
830
|
+
vector = sub_embedding_service.embed(content, memory_action="add")
|
|
831
|
+
logger.debug(f"Re-embedded record {record_id} with dimension {len(vector)}")
|
|
832
|
+
except Exception as embed_error:
|
|
833
|
+
logger.error(f"Failed to re-embed record {record_id}: {embed_error}")
|
|
834
|
+
continue
|
|
835
|
+
|
|
836
|
+
try:
|
|
837
|
+
target_store.insert([vector], [payload])
|
|
838
|
+
migrated_count += 1
|
|
839
|
+
|
|
840
|
+
# Delete from source if requested
|
|
841
|
+
if delete_source:
|
|
842
|
+
self.vector_store.delete(record_id)
|
|
843
|
+
|
|
844
|
+
# Update progress
|
|
845
|
+
if self.migration_manager and migrated_count % 10 == 0:
|
|
846
|
+
self.migration_manager.update_progress(
|
|
847
|
+
store_name,
|
|
848
|
+
migrated_count,
|
|
849
|
+
migrated_count # Total is unknown in this approach
|
|
850
|
+
)
|
|
851
|
+
except Exception as e:
|
|
852
|
+
logger.error(f"Error migrating record {record_id}: {e}")
|
|
853
|
+
continue
|
|
854
|
+
|
|
855
|
+
# If we got fewer results than batch_size, we're done
|
|
856
|
+
if len(id_results) < batch_size:
|
|
857
|
+
break
|
|
858
|
+
else:
|
|
859
|
+
logger.warning(f"Migration not fully supported for {type(self.vector_store).__name__}")
|
|
860
|
+
|
|
861
|
+
# Mark migration as completed
|
|
862
|
+
if self.migration_manager:
|
|
863
|
+
self.migration_manager.mark_completed(store_name, migrated_count)
|
|
864
|
+
|
|
865
|
+
logger.info(f"Migration completed: {migrated_count} records migrated to {store_name}")
|
|
866
|
+
return migrated_count
|
|
867
|
+
|
|
868
|
+
except Exception as e:
|
|
869
|
+
# Mark migration as failed
|
|
870
|
+
if self.migration_manager:
|
|
871
|
+
self.migration_manager.mark_failed(store_name, str(e))
|
|
872
|
+
logger.error(f"Migration failed: {e}")
|
|
873
|
+
raise
|
|
874
|
+
|
|
875
|
+
def get_migration_status(self, store_name: str) -> Optional[Dict[str, Any]]:
|
|
876
|
+
"""
|
|
877
|
+
Get migration status for a sub store.
|
|
878
|
+
|
|
879
|
+
Args:
|
|
880
|
+
store_name: Sub store name
|
|
881
|
+
|
|
882
|
+
Returns:
|
|
883
|
+
Migration status dict or None if not found
|
|
884
|
+
"""
|
|
885
|
+
if self.migration_manager:
|
|
886
|
+
return self.migration_manager.get_status(store_name)
|
|
887
|
+
return None
|
|
888
|
+
|
|
889
|
+
def list_sub_stores(self) -> List[str]:
|
|
890
|
+
"""
|
|
891
|
+
List all registered sub stores.
|
|
892
|
+
|
|
893
|
+
Returns:
|
|
894
|
+
List of sub store names
|
|
895
|
+
"""
|
|
896
|
+
return list(self.sub_stores.keys())
|