iflow-mcp_hanw39_reasoning-bank-mcp 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. iflow_mcp_hanw39_reasoning_bank_mcp-0.2.0.dist-info/METADATA +599 -0
  2. iflow_mcp_hanw39_reasoning_bank_mcp-0.2.0.dist-info/RECORD +55 -0
  3. iflow_mcp_hanw39_reasoning_bank_mcp-0.2.0.dist-info/WHEEL +4 -0
  4. iflow_mcp_hanw39_reasoning_bank_mcp-0.2.0.dist-info/entry_points.txt +2 -0
  5. iflow_mcp_hanw39_reasoning_bank_mcp-0.2.0.dist-info/licenses/LICENSE +21 -0
  6. src/__init__.py +16 -0
  7. src/__main__.py +6 -0
  8. src/config.py +266 -0
  9. src/deduplication/__init__.py +19 -0
  10. src/deduplication/base.py +88 -0
  11. src/deduplication/factory.py +60 -0
  12. src/deduplication/strategies/__init__.py +1 -0
  13. src/deduplication/strategies/semantic_dedup.py +187 -0
  14. src/default_config.yaml +121 -0
  15. src/initializers/__init__.py +50 -0
  16. src/initializers/base.py +196 -0
  17. src/initializers/embedding_initializer.py +22 -0
  18. src/initializers/llm_initializer.py +22 -0
  19. src/initializers/memory_manager_initializer.py +55 -0
  20. src/initializers/retrieval_initializer.py +32 -0
  21. src/initializers/storage_initializer.py +22 -0
  22. src/initializers/tools_initializer.py +48 -0
  23. src/llm/__init__.py +10 -0
  24. src/llm/base.py +61 -0
  25. src/llm/factory.py +75 -0
  26. src/llm/providers/__init__.py +12 -0
  27. src/llm/providers/anthropic.py +62 -0
  28. src/llm/providers/dashscope.py +76 -0
  29. src/llm/providers/openai.py +76 -0
  30. src/merge/__init__.py +22 -0
  31. src/merge/base.py +89 -0
  32. src/merge/factory.py +60 -0
  33. src/merge/strategies/__init__.py +1 -0
  34. src/merge/strategies/llm_merge.py +170 -0
  35. src/merge/strategies/voting_merge.py +108 -0
  36. src/prompts/__init__.py +21 -0
  37. src/prompts/formatters.py +74 -0
  38. src/prompts/templates.py +184 -0
  39. src/retrieval/__init__.py +8 -0
  40. src/retrieval/base.py +37 -0
  41. src/retrieval/factory.py +55 -0
  42. src/retrieval/strategies/__init__.py +8 -0
  43. src/retrieval/strategies/cosine_retrieval.py +47 -0
  44. src/retrieval/strategies/hybrid_retrieval.py +155 -0
  45. src/server.py +306 -0
  46. src/services/__init__.py +5 -0
  47. src/services/memory_manager.py +403 -0
  48. src/storage/__init__.py +45 -0
  49. src/storage/backends/json_backend.py +290 -0
  50. src/storage/base.py +150 -0
  51. src/tools/__init__.py +8 -0
  52. src/tools/extract_memory.py +285 -0
  53. src/tools/retrieve_memory.py +139 -0
  54. src/utils/__init__.py +7 -0
  55. src/utils/similarity.py +54 -0
@@ -0,0 +1,403 @@
1
+ """
2
+ Memory Manager Service
3
+
4
+ Core service for managing memory deduplication, merging, and cleanup.
5
+ All operations respect agent_id isolation.
6
+ """
7
+
8
+ from typing import List, Dict, Any, Optional
9
+ import logging
10
+ from datetime import datetime
11
+ from dataclasses import dataclass, field
12
+ import asyncio
13
+ from uuid import uuid4
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class MemoryManagementResult:
20
+ """Result of memory management operations"""
21
+ success: bool
22
+ action: str # "saved", "skipped_duplicate", "merged", "error"
23
+ memory_ids: List[str] = field(default_factory=list)
24
+ duplicates_found: int = 0
25
+ merged_count: int = 0
26
+ message: str = ""
27
+ metadata: Dict[str, Any] = field(default_factory=dict)
28
+
29
+
30
+ class MemoryManager:
31
+ """
32
+ Memory Manager: Orchestrates deduplication, merging, and cleanup.
33
+
34
+ Core responsibilities:
35
+ 1. Real-time deduplication on memory creation
36
+ 2. Trigger merging when similar memories accumulate
37
+ 3. Archive original memories after merge
38
+ 4. All operations are agent_id scoped
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ storage_backend,
44
+ dedup_strategy,
45
+ merge_strategy,
46
+ embedding_provider,
47
+ llm_provider,
48
+ config: Dict[str, Any]
49
+ ):
50
+ """
51
+ Initialize MemoryManager.
52
+
53
+ Args:
54
+ storage_backend: Storage backend instance
55
+ dedup_strategy: Deduplication strategy instance
56
+ merge_strategy: Merge strategy instance
57
+ embedding_provider: Embedding provider for new memories
58
+ llm_provider: LLM provider for merge operations
59
+ config: Configuration dict
60
+ """
61
+ self.storage = storage_backend
62
+ self.dedup_strategy = dedup_strategy
63
+ self.merge_strategy = merge_strategy
64
+ self.embedding_provider = embedding_provider
65
+ self.llm_provider = llm_provider
66
+ self.config = config
67
+
68
+ # Inject LLM provider into merge strategy if needed
69
+ if hasattr(merge_strategy, 'set_llm_provider'):
70
+ merge_strategy.set_llm_provider(llm_provider)
71
+
72
+ # Configuration
73
+ self.manager_config = config.get("memory_manager", {})
74
+ self.dedup_on_extraction = self.manager_config.get("deduplication", {}).get("on_extraction", True)
75
+ self.merge_auto_execute = self.manager_config.get("merge", {}).get("auto_execute", True)
76
+ self.merge_min_similar = self.manager_config.get("merge", {}).get("trigger", {}).get("min_similar_count", 3)
77
+ self.merge_threshold = self.manager_config.get("merge", {}).get("trigger", {}).get("similarity_threshold", 0.85)
78
+
79
+ logger.info(
80
+ f"MemoryManager initialized: "
81
+ f"dedup={self.dedup_on_extraction}, "
82
+ f"auto_merge={self.merge_auto_execute}"
83
+ )
84
+
85
+ async def on_memory_created(
86
+ self,
87
+ new_memories: List[Dict[str, Any]],
88
+ embeddings: Dict[str, Any],
89
+ agent_id: Optional[str] = None
90
+ ) -> MemoryManagementResult:
91
+ """
92
+ Hook called after memories are extracted but before saving.
93
+
94
+ Process:
95
+ 1. Check for duplicates (agent_id scoped)
96
+ 2. Filter out duplicates
97
+ 3. Check if merge should be triggered (agent_id scoped)
98
+ 4. Return filtered memories and merge status
99
+
100
+ Args:
101
+ new_memories: List of newly extracted memories
102
+ embeddings: Dict mapping memory_id to embedding vector
103
+ agent_id: Agent ID (CRITICAL for isolation)
104
+
105
+ Returns:
106
+ MemoryManagementResult with filtered memories and actions taken
107
+ """
108
+ logger.info(
109
+ f"MemoryManager.on_memory_created: {len(new_memories)} new memories "
110
+ f"for agent_id={agent_id}"
111
+ )
112
+
113
+ unique_memories = []
114
+ duplicate_count = 0
115
+ merge_triggered = []
116
+
117
+ # Step 1: Deduplication check
118
+ if self.dedup_on_extraction:
119
+ for mem in new_memories:
120
+ embedding = embeddings.get(mem["memory_id"])
121
+
122
+ dedup_result = await self.dedup_strategy.check_duplicate(
123
+ memory=mem,
124
+ embedding=embedding,
125
+ storage_backend=self.storage,
126
+ agent_id=agent_id # CRITICAL: Only check within this agent
127
+ )
128
+
129
+ if dedup_result.is_duplicate:
130
+ logger.info(
131
+ f"Skipping duplicate memory: {mem.get('title', 'N/A')} "
132
+ f"(similar to {dedup_result.duplicate_of}, agent_id={agent_id})"
133
+ )
134
+ duplicate_count += 1
135
+ else:
136
+ unique_memories.append(mem)
137
+ else:
138
+ unique_memories = new_memories
139
+
140
+ # Step 2: Check if merge should be triggered
141
+ if self.merge_auto_execute and unique_memories:
142
+ for mem in unique_memories:
143
+ try:
144
+ merge_check = await self._check_merge_opportunity(
145
+ memory=mem,
146
+ embedding=embeddings.get(mem["memory_id"]),
147
+ agent_id=agent_id
148
+ )
149
+
150
+ if merge_check:
151
+ merge_triggered.append(merge_check)
152
+ except Exception as e:
153
+ logger.error(f"Error checking merge opportunity: {e}", exc_info=True)
154
+
155
+ return MemoryManagementResult(
156
+ success=True,
157
+ action="processed",
158
+ memory_ids=[m["memory_id"] for m in unique_memories],
159
+ duplicates_found=duplicate_count,
160
+ merged_count=len(merge_triggered),
161
+ message=f"Processed {len(unique_memories)} unique memories, "
162
+ f"skipped {duplicate_count} duplicates, "
163
+ f"triggered {len(merge_triggered)} merges",
164
+ metadata={
165
+ "unique_memories": unique_memories,
166
+ "merge_tasks": merge_triggered
167
+ }
168
+ )
169
+
170
+ async def _check_merge_opportunity(
171
+ self,
172
+ memory: Dict[str, Any],
173
+ embedding: Any,
174
+ agent_id: Optional[str]
175
+ ) -> Optional[Dict[str, Any]]:
176
+ """
177
+ Check if new memory creates a merge opportunity.
178
+
179
+ Logic:
180
+ 1. Find similar memories (agent_id scoped)
181
+ 2. If similar_count >= threshold, trigger merge
182
+ 3. Execute merge in background
183
+
184
+ Returns:
185
+ Merge task info if triggered, None otherwise
186
+ """
187
+ if not embedding:
188
+ return None
189
+
190
+ # Retrieve similar memories within agent_id scope
191
+ try:
192
+ retrieval_strategy = self.storage.retrieval_strategy
193
+ if not retrieval_strategy:
194
+ return None
195
+
196
+ # Retrieve similar memories within agent_id scope
197
+ query_text = memory.get("query", "")
198
+ similar_results = await retrieval_strategy.retrieve(
199
+ query=query_text,
200
+ query_embedding=embedding,
201
+ top_k=10, # Check top 10
202
+ agent_id=agent_id, # CRITICAL: Only search within this agent
203
+ storage_backend=self.storage
204
+ )
205
+
206
+ # Filter by threshold
207
+ similar_ids = [
208
+ mem_id for mem_id, score in similar_results
209
+ if score >= self.merge_threshold
210
+ ]
211
+
212
+ if len(similar_ids) >= self.merge_min_similar:
213
+ logger.info(
214
+ f"Merge opportunity detected: {len(similar_ids)} similar memories "
215
+ f"for agent_id={agent_id}"
216
+ )
217
+
218
+ # Fetch full memory objects
219
+ similar_memories = []
220
+ for mem_id in similar_ids:
221
+ mem = await self.storage.get_memory(mem_id)
222
+ if mem:
223
+ similar_memories.append(mem)
224
+
225
+ # Add the new memory to the group
226
+ similar_memories.append(memory)
227
+
228
+ # Check if should merge
229
+ should_merge = await self.merge_strategy.should_merge(
230
+ similar_memories,
231
+ agent_id=agent_id
232
+ )
233
+
234
+ if should_merge:
235
+ # Execute merge in background
236
+ asyncio.create_task(
237
+ self._execute_merge(similar_memories, agent_id)
238
+ )
239
+
240
+ return {
241
+ "group_size": len(similar_memories),
242
+ "memory_ids": [m["memory_id"] for m in similar_memories]
243
+ }
244
+
245
+ except Exception as e:
246
+ logger.error(f"Error in merge opportunity check: {e}", exc_info=True)
247
+
248
+ return None
249
+
250
+ async def _execute_merge(
251
+ self,
252
+ memories: List[Dict[str, Any]],
253
+ agent_id: Optional[str]
254
+ ):
255
+ """
256
+ Execute merge operation in background.
257
+
258
+ Steps:
259
+ 1. Call merge strategy to create merged memory
260
+ 2. Generate embedding for merged memory
261
+ 3. Save merged memory
262
+ 4. Archive original memories
263
+ 5. Delete original memories from active storage
264
+ """
265
+ try:
266
+ logger.info(
267
+ f"Executing merge: {len(memories)} memories for agent_id={agent_id}"
268
+ )
269
+
270
+ # Step 1: Merge
271
+ merged_data = await self.merge_strategy.merge(memories, agent_id=agent_id)
272
+
273
+ # Step 2: Generate memory_id and timestamp
274
+ merged_memory = {
275
+ **merged_data,
276
+ "memory_id": f"mem_merged_{uuid4().hex[:16]}",
277
+ "timestamp": datetime.now().isoformat(),
278
+ "retrieval_count": 0,
279
+ "last_retrieved": None
280
+ }
281
+
282
+ # Step 3: Generate embedding
283
+ query_text = merged_memory.get("query", merged_memory.get("description", ""))
284
+ embedding = await self.embedding_provider.embed(query_text)
285
+
286
+ # Step 4: Save merged memory
287
+ await self.storage.save_memories(
288
+ [merged_memory],
289
+ {merged_memory["memory_id"]: embedding}
290
+ )
291
+
292
+ logger.info(
293
+ f"Merged memory saved: {merged_memory['memory_id']} "
294
+ f"(agent_id={agent_id})"
295
+ )
296
+
297
+ # Step 5: Archive originals
298
+ await self._archive_memories(
299
+ memories,
300
+ merged_into=merged_memory["memory_id"],
301
+ agent_id=agent_id
302
+ )
303
+
304
+ logger.info(
305
+ f"Merge completed: {len(memories)} memories -> "
306
+ f"{merged_memory['memory_id']} (agent_id={agent_id})"
307
+ )
308
+
309
+ except Exception as e:
310
+ logger.error(
311
+ f"Error executing merge for agent_id={agent_id}: {e}",
312
+ exc_info=True
313
+ )
314
+
315
+ async def _archive_memories(
316
+ self,
317
+ memories: List[Dict[str, Any]],
318
+ merged_into: str,
319
+ agent_id: Optional[str]
320
+ ):
321
+ """
322
+ Archive original memories after merge.
323
+
324
+ Args:
325
+ memories: Original memories to archive
326
+ merged_into: ID of merged memory
327
+ agent_id: Agent ID for validation
328
+ """
329
+ archived_data = []
330
+
331
+ for mem in memories:
332
+ # Validate agent_id
333
+ if agent_id and mem.get("agent_id") != agent_id:
334
+ logger.warning(
335
+ f"Skipping archive: memory {mem['memory_id']} has different agent_id"
336
+ )
337
+ continue
338
+
339
+ archived_mem = {
340
+ **mem,
341
+ "archived": True,
342
+ "archived_at": datetime.now().isoformat(),
343
+ "archived_reason": "merged",
344
+ "merged_into": merged_into,
345
+ "original_retrieval_count": mem.get("retrieval_count", 0)
346
+ }
347
+ archived_data.append(archived_mem)
348
+
349
+ if archived_data:
350
+ # Archive to storage
351
+ await self.storage.archive_memories(archived_data)
352
+
353
+ # Delete from active storage
354
+ memory_ids = [m["memory_id"] for m in archived_data]
355
+ await self.storage.delete_memories(memory_ids, agent_id=agent_id)
356
+
357
+ logger.info(
358
+ f"Archived {len(archived_data)} memories for agent_id={agent_id}"
359
+ )
360
+
361
+ async def cleanup_duplicates(
362
+ self,
363
+ agent_id: Optional[str] = None,
364
+ dry_run: bool = True
365
+ ) -> Dict[str, Any]:
366
+ """
367
+ Find and remove duplicate memories (agent_id scoped).
368
+
369
+ Args:
370
+ agent_id: Only cleanup within this agent's memories (None = all agents)
371
+ dry_run: If True, only report what would be deleted
372
+
373
+ Returns:
374
+ Cleanup report dict
375
+ """
376
+ logger.info(
377
+ f"Running duplicate cleanup for agent_id={agent_id}, dry_run={dry_run}"
378
+ )
379
+
380
+ duplicate_groups = await self.dedup_strategy.find_duplicate_groups(
381
+ storage_backend=self.storage,
382
+ agent_id=agent_id
383
+ )
384
+
385
+ to_delete = []
386
+ to_keep = []
387
+
388
+ for group in duplicate_groups:
389
+ # Keep the first one (arbitrary choice), delete rest
390
+ to_keep.append(group[0])
391
+ to_delete.extend(group[1:])
392
+
393
+ if not dry_run and to_delete:
394
+ await self.storage.delete_memories(to_delete, agent_id=agent_id)
395
+
396
+ return {
397
+ "agent_id": agent_id,
398
+ "duplicate_groups": len(duplicate_groups),
399
+ "memories_to_delete": len(to_delete),
400
+ "memories_to_keep": len(to_keep),
401
+ "dry_run": dry_run,
402
+ "deleted_ids": [] if dry_run else to_delete
403
+ }
@@ -0,0 +1,45 @@
1
+ """存储后端模块"""
2
+ from .base import StorageBackend
3
+ from .backends.json_backend import JSONStorageBackend
4
+
5
+
6
+ class StorageFactory:
7
+ """存储后端工厂"""
8
+
9
+ _backends = {
10
+ "json": JSONStorageBackend,
11
+ }
12
+
13
+ @classmethod
14
+ def create(cls, config: dict) -> StorageBackend:
15
+ """
16
+ 创建存储后端实例
17
+
18
+ Args:
19
+ config: 配置字典,包含 'backend' 键和对应配置
20
+
21
+ Returns:
22
+ StorageBackend 实例
23
+ """
24
+ backend_name = config.get("backend", "json")
25
+
26
+ if backend_name not in cls._backends:
27
+ raise ValueError(
28
+ f"Unknown storage backend: {backend_name}. "
29
+ f"Available backends: {list(cls._backends.keys())}"
30
+ )
31
+
32
+ backend_class = cls._backends[backend_name]
33
+ return backend_class(config)
34
+
35
+ @classmethod
36
+ def register_backend(cls, name: str, backend_class: type):
37
+ """注册新的存储后端(插件机制)"""
38
+ cls._backends[name] = backend_class
39
+
40
+
41
+ __all__ = [
42
+ "StorageBackend",
43
+ "JSONStorageBackend",
44
+ "StorageFactory",
45
+ ]